diff --git a/go.mod b/go.mod index c693ce5fd..ef62dfe7e 100644 --- a/go.mod +++ b/go.mod @@ -11,14 +11,14 @@ require ( github.com/fsnotify/fsnotify v1.7.0 github.com/golang/snappy v0.0.4 github.com/google/uuid v1.6.0 - github.com/klauspost/compress v1.17.8 + github.com/klauspost/compress v1.17.11 github.com/klauspost/pgzip v1.2.6 github.com/mongodb/mongo-tools v0.0.0-20240723193119-837c2bc263f4 github.com/pierrec/lz4 v2.6.1+incompatible github.com/pkg/errors v0.9.1 github.com/spf13/cobra v1.8.1 github.com/spf13/viper v1.19.0 - go.mongodb.org/mongo-driver v1.16.0 + go.mongodb.org/mongo-driver v1.17.1 golang.org/x/mod v0.19.0 golang.org/x/sync v0.10.0 gopkg.in/yaml.v2 v2.4.0 @@ -30,6 +30,7 @@ require ( github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 // indirect github.com/alecthomas/units v0.0.0-20231202071711-9a357b53e9c9 // indirect github.com/containerd/log v0.1.0 // indirect + github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/distribution/reference v0.5.0 // indirect github.com/docker/go-connections v0.5.0 // indirect github.com/docker/go-units v0.5.0 // indirect @@ -60,7 +61,7 @@ require ( github.com/xdg-go/pbkdf2 v1.0.0 // indirect github.com/xdg-go/scram v1.1.2 // indirect github.com/xdg-go/stringprep v1.0.4 // indirect - github.com/youmark/pkcs8 v0.0.0-20201027041543-1326539a0a0a // indirect + github.com/youmark/pkcs8 v0.0.0-20240726163527-a2c0da244d78 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0 // indirect go.opentelemetry.io/otel v1.24.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.24.0 // indirect diff --git a/go.sum b/go.sum index 07d1b7623..c4a6cf1f2 100644 --- a/go.sum +++ b/go.sum @@ -82,8 +82,8 @@ github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7 github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/klauspost/compress v1.17.8 h1:YcnTYrq7MikUT7k0Yb5eceMmALQPYBW/Xltxn0NAMnU= -github.com/klauspost/compress v1.17.8/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= +github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= +github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= github.com/klauspost/pgzip v1.2.6 h1:8RXeL5crjEUFnR2/Sn6GJNWtSQ3Dk8pq4CL3jvdDyjU= github.com/klauspost/pgzip v1.2.6/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= @@ -170,13 +170,13 @@ github.com/xdg-go/scram v1.1.2 h1:FHX5I5B4i4hKRVRBCFRxq1iQRej7WO3hhBuJf+UUySY= github.com/xdg-go/scram v1.1.2/go.mod h1:RT/sEzTbU5y00aCK8UOx6R7YryM0iF1N2MOmC3kKLN4= github.com/xdg-go/stringprep v1.0.4 h1:XLI/Ng3O1Atzq0oBs3TWm+5ZVgkq2aqdlvP9JtoZ6c8= github.com/xdg-go/stringprep v1.0.4/go.mod h1:mPGuuIYwz7CmR2bT9j4GbQqutWS1zV24gijq1dTyGkM= -github.com/youmark/pkcs8 v0.0.0-20201027041543-1326539a0a0a h1:fZHgsYlfvtyqToslyjUt3VOPF4J7aK/3MPcK7xp3PDk= -github.com/youmark/pkcs8 v0.0.0-20201027041543-1326539a0a0a/go.mod h1:ul22v+Nro/R083muKhosV54bj5niojjWZvU8xrevuH4= +github.com/youmark/pkcs8 v0.0.0-20240726163527-a2c0da244d78 h1:ilQV1hzziu+LLM3zUTJ0trRztfwgjqKnBWNtSRkbmwM= +github.com/youmark/pkcs8 v0.0.0-20240726163527-a2c0da244d78/go.mod h1:aL8wCCfTfSfmXjznFBSZNN13rSJjlIOI1fUNAtF7rmI= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= -go.mongodb.org/mongo-driver v1.16.0 h1:tpRsfBJMROVHKpdGyc1BBEzzjDUWjItxbVSZ8Ls4BQ4= -go.mongodb.org/mongo-driver v1.16.0/go.mod h1:oB6AhJQvFQL4LEHyXi6aJzQJtBiTQHiAd83l0GdFaiw= +go.mongodb.org/mongo-driver v1.17.1 h1:Wic5cJIwJgSpBhe3lx3+/RybR5PiYRMpVFgO7cOHyIM= +go.mongodb.org/mongo-driver v1.17.1/go.mod h1:wwWm/+BuOddhcq3n68LKRmgk2wXzmF6s0SFOa0GINL4= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0 h1:jq9TW8u3so/bN+JPT166wjOI6/vQPF6Xe7nMNIltagk= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0/go.mod h1:p8pYQP+m5XfbZm9fxtSKAbM6oIllS7s2AfxrChvc7iw= go.opentelemetry.io/otel v1.24.0 h1:0LAOdjNmQeSTzGBzduGe/rU4tZhMwL5rWgtp9Ku5Jfo= @@ -199,7 +199,6 @@ go.uber.org/multierr v1.9.0 h1:7fIwc/ZtS0q++VgcfqFDxSBZVv/Xo49/SYnDFupUwlI= go.uber.org/multierr v1.9.0/go.mod h1:X2jQV1h+kxSjClGpnseKVIxpmcjrj7MNnI0bnlfKTVQ= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20200302210943-78000ba7a073/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U= diff --git a/pbm/oplog/db.go b/pbm/oplog/db.go new file mode 100644 index 000000000..138e98528 --- /dev/null +++ b/pbm/oplog/db.go @@ -0,0 +1,84 @@ +package oplog + +import ( + "context" + "strings" + + "go.mongodb.org/mongo-driver/bson" + "go.mongodb.org/mongo-driver/bson/primitive" + "go.mongodb.org/mongo-driver/mongo" + + "github.com/percona/percona-backup-mongodb/pbm/errors" +) + +// mDB represents MongoDB access functionality. +type mDB struct { + m *mongo.Client +} + +func newMDB(m *mongo.Client) *mDB { + return &mDB{m: m} +} + +// getUUIDForNS ruturns UUID of existing collection. +// When ns doesn't exist, it returns zero value without an error. +// In case of error, it returns zero value for UUID in addition to error. +func (d *mDB) getUUIDForNS(ctx context.Context, ns string) (primitive.Binary, error) { + var uuid primitive.Binary + + db, coll, _ := strings.Cut(ns, ".") + cur, err := d.m.Database(db).ListCollections(ctx, bson.D{{"name", coll}}) + if err != nil { + return uuid, errors.Wrap(err, "list collections") + } + defer cur.Close(ctx) + + for cur.Next(ctx) { + if subtype, data, ok := cur.Current.Lookup("info", "uuid").BinaryOK(); ok { + uuid = primitive.Binary{ + Subtype: subtype, + Data: data, + } + break + } + } + + return uuid, errors.Wrap(cur.Err(), "list collections cursor") +} + +// ensureCollExists ensures that the collection exists before "creating" views or timeseries. +// See PBM-921 for details. +func (d *mDB) ensureCollExists(dbName string) error { + err := d.m.Database(dbName).CreateCollection(context.TODO(), "system.views") + if err != nil { + // MongoDB 5.0 and 6.0 returns NamespaceExists error. + // MongoDB 7.0 and 8.0 does not return error. + // https://github.com/mongodb/mongo/blob/v6.0/src/mongo/base/error_codes.yml#L84 + const NamespaceExists = 48 + var cmdError mongo.CommandError + if !errors.As(err, &cmdError) || cmdError.Code != NamespaceExists { + return errors.Wrapf(err, "ensure %s.system.views collection", dbName) + } + } + + return nil +} + +// applyOps is a wrapper for the applyOps database command, we pass in +// a session to avoid opening a new connection for a few inserts at a time. +func (d *mDB) applyOps(entries []interface{}) error { + singleRes := d.m.Database("admin").RunCommand(context.TODO(), bson.D{{"applyOps", entries}}) + if err := singleRes.Err(); err != nil { + return errors.Wrap(err, "applyOps") + } + res := bson.M{} + err := singleRes.Decode(&res) + if err != nil { + return errors.Wrap(err, "decode singleRes") + } + if isFalsy(res["ok"]) { + return errors.Errorf("applyOps command: %v", res["errmsg"]) + } + + return nil +} diff --git a/pbm/oplog/db_test.go b/pbm/oplog/db_test.go new file mode 100644 index 000000000..6848cbb73 --- /dev/null +++ b/pbm/oplog/db_test.go @@ -0,0 +1,57 @@ +package oplog + +import ( + "context" + "strings" + "testing" + + "go.mongodb.org/mongo-driver/bson" + "go.mongodb.org/mongo-driver/bson/primitive" + "go.mongodb.org/mongo-driver/mongo/integration/mtest" +) + +func TestGetUUIDForNS(t *testing.T) { + mt := mtest.New(t, mtest.NewOptions().ClientType(mtest.Mock)) + + mt.Run("successful response from db", func(mt *mtest.T) { + expectedUUID := primitive.Binary{Subtype: 0xFF, Data: []byte{0x01, 0x02, 0x03}} + listCollRes := bson.D{ + {"name", "c1"}, + {"type", "collection"}, + {"info", bson.D{ + {"readOnly", false}, + {"uuid", expectedUUID}, + }}, + } + mt.AddMockResponses(mtest.CreateCursorResponse(1, "mydb.c1", mtest.FirstBatch, listCollRes)) + + db := newMDB(mt.Client) + uuid, err := db.getUUIDForNS(context.Background(), "mydb.c1") + if err != nil { + t.Errorf("got err=%v", err) + } + primitive.NewObjectID() + + if !uuid.Equal(expectedUUID) { + t.Errorf("wrong uuid for ns: expected=%v, got=%v", expectedUUID, uuid) + } + t.Log(uuid) + }) + + mt.Run("failed response from db", func(mt *mtest.T) { + errRes := mtest.CreateCommandErrorResponse(mtest.CommandError{ + Code: 11601, + Name: "error", + Message: "querying list collections", + }) + mt.AddMockResponses(errRes) + db := newMDB(mt.Client) + _, err := db.getUUIDForNS(context.Background(), "mydb.c1") + if err == nil { + t.Error("expected to get error from getUUIDForNS") + } + if !strings.Contains(err.Error(), "list collections") { + t.Error("wrong err") + } + }) +} diff --git a/pbm/oplog/restore.go b/pbm/oplog/restore.go index ffd28bdde..64c2c040a 100644 --- a/pbm/oplog/restore.go +++ b/pbm/oplog/restore.go @@ -118,9 +118,16 @@ func (c *cloneNS) SetNSPair(nsPair snapshot.CloneNS) { c.toDB, c.toColl = nsPair.SplitToNS() } +// mDBCl represents client interface for MongoDB logic used by OplogRestore +type mDBCl interface { + getUUIDForNS(ctx context.Context, ns string) (primitive.Binary, error) + ensureCollExists(dbName string) error + applyOps(entries []interface{}) error +} + // OplogRestore is the oplog applyer type OplogRestore struct { - dst *mongo.Client + mdb mDBCl ver *db.Version needIdxWorkaround bool preserveUUIDopt bool @@ -157,7 +164,7 @@ const saveLastDistTxns = 100 // NewOplogRestore creates an object for an oplog applying func NewOplogRestore( - dst *mongo.Client, + m *mongo.Client, ic *idx.IndexCatalog, sv *version.MongoVersion, unsafe, @@ -165,7 +172,7 @@ func NewOplogRestore( ctxn chan phys.RestoreTxn, txnErr chan error, ) (*OplogRestore, error) { - m, err := ns.NewMatcher(append(snapshot.ExcludeFromRestore, excludeFromOplog...)) + matcher, err := ns.NewMatcher(append(snapshot.ExcludeFromRestore, excludeFromOplog...)) if err != nil { return nil, errors.Wrap(err, "create matcher for the collections exclude") } @@ -185,13 +192,13 @@ func NewOplogRestore( } ver := &db.Version{v[0], v[1], v[2]} return &OplogRestore{ - dst: dst, + mdb: newMDB(m), ver: ver, preserveUUIDopt: preserveUUID, preserveUUID: preserveUUID, needIdxWorkaround: needsCreateIndexWorkaround(ver), indexCatalog: ic, - excludeNS: m, + excludeNS: matcher, noUUIDns: noUUID, txn: ctxn, txnSyncErr: txnErr, @@ -298,7 +305,7 @@ func (o *OplogRestore) SetCloneNS(ctx context.Context, ns snapshot.CloneNS) erro o.cloneNS.SetNSPair(ns) var err error - o.cloneNS.toUUID, err = getUUIDForNS(ctx, o.dst, o.cloneNS.ToNS) + o.cloneNS.toUUID, err = o.mdb.getUUIDForNS(ctx, o.cloneNS.ToNS) if err != nil { return errors.Wrap(err, "get to ns uuid") } @@ -956,20 +963,14 @@ func (o *OplogRestore) handleNonTxnOp(op db.Oplog) error { } } else if op.Operation == "i" && collName == "system.views" { // PBM-921: ensure the collection exists before "creating" views or timeseries - err := o.dst.Database(dbName).CreateCollection(context.TODO(), "system.views") + err := o.mdb.ensureCollExists(dbName) if err != nil { - // MongoDB 5.0 and 6.0 returns NamespaceExists error. - // MongoDB 7.0 and 8.0 does not return error. - // https://github.com/mongodb/mongo/blob/v6.0/src/mongo/base/error_codes.yml#L84 - const NamespaceExists = 48 - var cmdError mongo.CommandError - if !errors.As(err, &cmdError) || cmdError.Code != NamespaceExists { - return errors.Wrapf(err, "ensure %s.system.views collection", dbName) - } + return err } + } - err = o.applyOps([]interface{}{op}) + err = o.mdb.applyOps([]interface{}{op}) if err != nil { // https://jira.percona.com/browse/PBM-818 if o.unsafe && op.Namespace == "config.chunks" { @@ -1071,25 +1072,6 @@ func extractIndexDocumentFromCommitIndexBuilds(op db.Oplog) (string, []*idx.Inde return collectionName, nil } -// applyOps is a wrapper for the applyOps database command, we pass in -// a session to avoid opening a new connection for a few inserts at a time. -func (o *OplogRestore) applyOps(entries []interface{}) error { - singleRes := o.dst.Database("admin").RunCommand(context.TODO(), bson.D{{"applyOps", entries}}) - if err := singleRes.Err(); err != nil { - return errors.Wrap(err, "applyOps") - } - res := bson.M{} - err := singleRes.Decode(&res) - if err != nil { - return errors.Wrap(err, "decode singleRes") - } - if isFalsy(res["ok"]) { - return errors.Errorf("applyOps command: %v", res["errmsg"]) - } - - return nil -} - // filterUUIDs removes 'ui' entries from ops, including nested applyOps ops. // It also modifies ops that rely on 'ui'. func (o *OplogRestore) filterUUIDs(op db.Oplog) (db.Oplog, error) { @@ -1261,29 +1243,3 @@ func isTruthy(val interface{}) bool { func isFalsy(val interface{}) bool { return !isTruthy(val) } - -// getUUIDForNS ruturns UUID of existing collection. -// When ns doesn't exist, it returns zero value without an error. -// In case of error, it returns zero value for UUID in addition to error. -func getUUIDForNS(ctx context.Context, m *mongo.Client, ns string) (primitive.Binary, error) { - var uuid primitive.Binary - - d, c, _ := strings.Cut(ns, ".") - cur, err := m.Database(d).ListCollections(ctx, bson.D{{"name", c}}) - if err != nil { - return uuid, errors.Wrap(err, "list collections") - } - defer cur.Close(ctx) - - for cur.Next(ctx) { - if subtype, data, ok := cur.Current.Lookup("info", "uuid").BinaryOK(); ok { - uuid = primitive.Binary{ - Subtype: subtype, - Data: data, - } - break - } - } - - return uuid, errors.Wrap(cur.Err(), "list collections cursor") -} diff --git a/pbm/oplog/restore_test.go b/pbm/oplog/restore_test.go new file mode 100644 index 000000000..7f56a5be2 --- /dev/null +++ b/pbm/oplog/restore_test.go @@ -0,0 +1,584 @@ +package oplog + +import ( + "bytes" + "context" + "fmt" + "io" + "os" + "path/filepath" + "testing" + + "github.com/mongodb/mongo-tools/common/db" + "github.com/mongodb/mongo-tools/common/idx" + "github.com/mongodb/mongo-tools/mongorestore/ns" + "github.com/pkg/errors" + "go.mongodb.org/mongo-driver/bson" + "go.mongodb.org/mongo-driver/bson/primitive" + + "github.com/percona/percona-backup-mongodb/pbm/snapshot" +) + +func newOplogRestoreTest(mdb mDBCl) *OplogRestore { + noUUID, _ := ns.NewMatcher(dontPreserveUUID) + matcher, _ := ns.NewMatcher(append(snapshot.ExcludeFromRestore, excludeFromOplog...)) + return &OplogRestore{ + mdb: mdb, + ver: &db.Version{7, 0, 0}, + excludeNS: matcher, + noUUIDns: noUUID, + preserveUUIDopt: true, + preserveUUID: true, + indexCatalog: idx.NewIndexCatalog(), + filter: DefaultOpFilter, + } +} + +type mdbTestClient struct { + applyOpsInv []map[string]string +} + +func newMDBTestClient() *mdbTestClient { + return &mdbTestClient{applyOpsInv: []map[string]string{}} +} + +func (d *mdbTestClient) getUUIDForNS(_ context.Context, _ string) (primitive.Binary, error) { + return primitive.Binary{Subtype: 0x00, Data: []byte{0x01, 0x02, 0x03}}, nil +} + +func (d *mdbTestClient) ensureCollExists(_ string) error { + return nil +} + +func (d *mdbTestClient) applyOps(entries []interface{}) error { + if len(entries) != 1 { + return errors.New("applyOps without single oplog entry") + } + + oe := entries[0].(db.Oplog) + invParams := map[string]string{ + "op": oe.Operation, + "ns": oe.Namespace, + } + if oe.Operation == "c" && oe.Object != nil && len(oe.Object) > 0 { + invParams["cmd"] = oe.Object[0].Key + invParams["coll"] = oe.Object[0].Value.(string) + } + d.applyOpsInv = append(d.applyOpsInv, invParams) + + return nil +} + +func TestIsOpForCloning(t *testing.T) { + oRestore := newOplogRestoreTest(&mdbTestClient{}) + _ = oRestore.SetCloneNS(context.Background(), snapshot.CloneNS{FromNS: "mydb.cloningFrom", ToNS: "mydb.cloningTo"}) + + testCases := []struct { + desc string + entry *db.Oplog + isForCloning bool + }{ + // i op + { + desc: "insert op for cloning ", + entry: createInsertOp(t, "mydb.cloningFrom"), + isForCloning: true, + }, + { + desc: "insert op, collection not for cloning", + entry: createInsertOp(t, "mydb.x"), + isForCloning: false, + }, + { + desc: "insert op, db not for cloning", + entry: createInsertOp(t, "x.cloningFrom"), + isForCloning: false, + }, + + // u op + { + desc: "update op for cloning ", + entry: createUpdateOp(t, "mydb.cloningFrom"), + isForCloning: true, + }, + { + desc: "update op, collection not for cloning", + entry: createUpdateOp(t, "mydb.x"), + isForCloning: false, + }, + { + desc: "update op, db not for cloning", + entry: createUpdateOp(t, "x.cloningFrom"), + isForCloning: false, + }, + + // d op + { + desc: "delete op for cloning ", + entry: createDeleteOp(t, "mydb.cloningFrom"), + isForCloning: true, + }, + { + desc: "delete op, collection not for cloning", + entry: createDeleteOp(t, "mydb.x"), + isForCloning: false, + }, + { + desc: "delete op, db not for cloning", + entry: createDeleteOp(t, "x.cloningFrom"), + isForCloning: false, + }, + } + + for _, tC := range testCases { + t.Run(tC.desc, func(t *testing.T) { + res := oRestore.isOpForCloning(tC.entry) + if res != tC.isForCloning { + t.Errorf("%s: for entry: %+v isOpForCloning is: %t, but it should be opposite", + tC.desc, tC.entry, tC.isForCloning) + } + }) + } +} + +func TestApply(t *testing.T) { + t.Run("collection restore", func(t *testing.T) { + testCases := []struct { + desc string + oplogFile string + resOps []string + resNS []string + resCmd []string + resColl []string + }{ + { + desc: "collection: create-drop-create", + oplogFile: "ops_cmd_create_drop", + resOps: []string{"c", "c", "c", "c", "c"}, + resNS: []string{"mydb.$cmd", "mydb.$cmd", "mydb.$cmd", "mydb.$cmd", "mydb.$cmd"}, + resCmd: []string{"drop", "create", "drop", "drop", "create"}, + resColl: []string{"c1", "c1", "c1", "c2", "c2"}, + }, + // todo: add more cases + } + for _, tC := range testCases { + t.Run(tC.desc, func(t *testing.T) { + db := newMDBTestClient() + oRestore := newOplogRestoreTest(db) + + fr := useTestFile(t, tC.oplogFile) + + _, err := oRestore.Apply(fr) + if err != nil { + t.Fatalf("error while applying oplog: %v", err) + } + + if len(tC.resOps) != len(db.applyOpsInv) { + t.Errorf("wrong number of applyOps invocation, want=%d, got=%d", len(tC.resOps), len(db.applyOpsInv)) + } + for i, wantOp := range tC.resOps { + gotOp := db.applyOpsInv[i]["op"] + if wantOp != gotOp { + t.Errorf("wrong #%d. operation: want=%s, got=%s", i, wantOp, gotOp) + } + } + for i, wantNS := range tC.resNS { + gotNS := db.applyOpsInv[i]["ns"] + if wantNS != gotNS { + t.Errorf("wrong #%d. namespace: want=%s, got=%s", i, wantNS, gotNS) + } + } + for i, wantCmd := range tC.resCmd { + gotCmd := db.applyOpsInv[i]["cmd"] + if wantCmd != gotCmd { + t.Errorf("wrong #%d. command: want=%s, got=%s", i, wantCmd, gotCmd) + } + } + for i, wantColl := range tC.resColl { + gotColl := db.applyOpsInv[i]["coll"] + if wantColl != gotColl { + t.Errorf("wrong #%d. collection: want=%s, got=%s", i, wantColl, gotColl) + } + } + }) + } + }) + + t.Run("selective restore", func(t *testing.T) { + // todo: add tests + }) + + t.Run("index restore", func(t *testing.T) { + testCases := []struct { + desc string + oplogFile string + db string + coll string + idxs bson.D + }{ + { + desc: "index: dropIndexes-createIndexes", + oplogFile: "ops_cmd_createIndexes_dropIndexes", + db: "mydb", + coll: "c1", + idxs: bson.D{{"fieldX", -1}, {"fieldZ", -1}}, + }, + // todo: add more cases + } + for _, tC := range testCases { + t.Run(tC.desc, func(t *testing.T) { + db := newMDBTestClient() + oRestore := newOplogRestoreTest(db) + + fr := useTestFile(t, tC.oplogFile) + + _, err := oRestore.Apply(fr) + if err != nil { + t.Fatalf("error while applying oplog: %v", err) + } + + idxs := oRestore.indexCatalog.GetIndexes(tC.db, tC.coll) + if len(idxs) != len(tC.idxs) { + t.Errorf("wrong number of indexes: want=%d, got=%d", len(tC.idxs), len(idxs)) + } + for i, idx := range idxs { + if idx.Key[0].Key != tC.idxs[i].Key { + t.Errorf("wrong key: want=%v, got=%v", tC.idxs[i], idx.Key[0]) + } + } + }) + } + }) + + t.Run("cloning namespace", func(t *testing.T) { + testCases := []struct { + desc string + oplogFile string + nsFrom string + nsTo string + resOps []string + resNS []string + }{ + { + desc: "clone: insert, update, delete ops", + oplogFile: "ops_i_u_d", + nsFrom: "mydb.c1", + nsTo: "mydb.c1_clone", + resOps: []string{"i", "u", "d"}, + resNS: []string{"mydb.c1_clone", "mydb.c1_clone", "mydb.c1_clone"}, + }, + { + desc: "ignore namespaces not relevant for cloning", + oplogFile: "ops_i_u_d", + nsFrom: "mydb.xyz", + nsTo: "mydb.xyz_clone", + resOps: []string{}, + resNS: []string{}, + }, + { + desc: "ignore noop op", + oplogFile: "ops_n", + nsFrom: "mydb.xyz", + nsTo: "mydb.xyz_clone", + resOps: []string{}, + resNS: []string{}, + }, + // add index creation + } + for _, tC := range testCases { + t.Run(tC.desc, func(t *testing.T) { + db := newMDBTestClient() + oRestore := newOplogRestoreTest(db) + _ = oRestore.SetCloneNS(context.Background(), snapshot.CloneNS{FromNS: tC.nsFrom, ToNS: tC.nsTo}) + + fr := useTestFile(t, tC.oplogFile) + + _, err := oRestore.Apply(fr) + if err != nil { + t.Fatalf("error while applying oplog: %v", err) + } + + if len(tC.resOps) != len(db.applyOpsInv) { + t.Errorf("wrong number of applyOps invocation, want=%d, got=%d", len(tC.resOps), len(db.applyOpsInv)) + } + for i, wantOp := range tC.resOps { + gotOp := db.applyOpsInv[i]["op"] + if wantOp != gotOp { + t.Errorf("wrong #%d. operation: want=%s, got=%s", i, wantOp, gotOp) + } + } + for i, wantNS := range tC.resNS { + gotNS := db.applyOpsInv[i]["ns"] + if wantNS != gotNS { + t.Errorf("wrong #%d. namespace: want=%s, got=%s", i, wantNS, gotNS) + } + } + }) + } + }) +} + +func useTestFile(t *testing.T, testFileName string) io.ReadCloser { + t.Helper() + + f := fmt.Sprintf("%s.json", testFileName) + jsonData, err := os.ReadFile(filepath.Join("./testdata", f)) + if err != nil { + t.Fatalf("failed to read test json file: filename=%s, err=%v", f, err) + } + + var jsonDocs []db.Oplog + err = bson.UnmarshalExtJSON(jsonData, false, &jsonDocs) + if err != nil { + t.Fatalf("failed to parse test json array: filename=%s, err=%v", f, err) + } + + b := &bytes.Buffer{} + for _, jsonDoc := range jsonDocs { + bsonDoc, err := bson.Marshal(jsonDoc) + if err != nil { + t.Fatalf("failed to marshal json to bson: %v", err) + } + _, err = b.Write(bsonDoc) + if err != nil { + t.Fatalf("Failed to write BSON: %v", err) + } + } + + return io.NopCloser(b) +} + +func createInsertOp(t *testing.T, ns string) *db.Oplog { + t.Helper() + iOpJSON := ` + { + "lsid": { + "id": { + "$binary": { + "base64": "YYbkO7kpRt6xFqJqIh+h9g==", + "subType": "04" + } + }, + "uid": { + "$binary": { + "base64": "8L/kOoqHkvDRIRJTrmrrO3wwOr+ToO8WLvmn15Ql7G0=", + "subType": "00" + } + } + }, + "txnNumber": { + "$numberLong": "9" + }, + "op": "i", + "ns": "db.coll", + "ui": { + "$binary": { + "base64": "v+mHa8niRBKG7Z+uqJGARQ==", + "subType": "04" + } + }, + "o": { + "_id": { + "$oid": "6747008178d82a2b1134a2b8" + }, + "d": { + "$numberInt": "6" + }, + "desc": "doc-6" + }, + "o2": { + "_id": { + "$oid": "6747008178d82a2b1134a2b8" + } + }, + "stmtId": { + "$numberInt": "0" + }, + "ts": { + "$timestamp": { + "t": 1732706433, + "i": 1 + } + }, + "t": { + "$numberLong": "2" + }, + "v": { + "$numberLong": "2" + }, + "wall": { + "$date": { + "$numberLong": "1732706433987" + } + }, + "prevOpTime": { + "ts": { + "$timestamp": { + "t": 0, + "i": 0 + } + }, + "t": { + "$numberLong": "-1" + } + } + }` + + return replaceNsWithinOpEntry(t, iOpJSON, ns) +} + +func createUpdateOp(t *testing.T, ns string) *db.Oplog { + t.Helper() + + uOpJSON := ` + { + "lsid": { + "id": { + "$binary": { + "base64": "HxXre7SSRxe8eq+OjOQOhw==", + "subType": "04" + } + }, + "uid": { + "$binary": { + "base64": "Bh/Anp+//gSHltMgOtOX+7sunrF/VwW+VDdA3fRANl0=", + "subType": "00" + } + } + }, + "txnNumber": { + "$numberLong": "5" + }, + "op": "u", + "ns": "db.coll", + "ui": { + "$binary": { + "base64": "f774YvKERIKXSJVH+xCtPw==", + "subType": "04" + } + }, + "o": { + "$v": 2, + "diff": { + "i": { + "city": "split" + } + } + }, + "o2": { + "_id": { + "$oid": "6728e3fcedfb509c06f01307" + } + }, + "stmtId": 0, + "ts": { + "$timestamp": { + "t": 1730733212, + "i": 1 + } + }, + "t": { + "$numberLong": "7" + }, + "v": { + "$numberLong": "2" + }, + "wall": { + "$date": "2024-11-04T15:13:32.123Z" + }, + "prevOpTime": { + "ts": { + "$timestamp": { + "t": 0, + "i": 0 + } + }, + "t": { + "$numberLong": "-1" + } + } + }` + return replaceNsWithinOpEntry(t, uOpJSON, ns) +} + +func createDeleteOp(t *testing.T, ns string) *db.Oplog { + t.Helper() + + dOpJSON := ` + { + "lsid": { + "id": { + "$binary": { + "base64": "HxXre7SSRxe8eq+OjOQOhw==", + "subType": "04" + } + }, + "uid": { + "$binary": { + "base64": "Bh/Anp+//gSHltMgOtOX+7sunrF/VwW+VDdA3fRANl0=", + "subType": "00" + } + } + }, + "txnNumber": { + "$numberLong": "6" + }, + "op": "d", + "ns": "db.coll", + "ui": { + "$binary": { + "base64": "f774YvKERIKXSJVH+xCtPw==", + "subType": "04" + } + }, + "o": { + "_id": { + "$oid": "6728e3fcedfb509c06f01307" + } + }, + "stmtId": 0, + "ts": { + "$timestamp": { + "t": 1730733256, + "i": 1 + } + }, + "t": { + "$numberLong": "7" + }, + "v": { + "$numberLong": "2" + }, + "wall": { + "$date": "2024-11-04T15:14:16.626Z" + }, + "prevOpTime": { + "ts": { + "$timestamp": { + "t": 0, + "i": 0 + } + }, + "t": { + "$numberLong": "-1" + } + } + }` + + return replaceNsWithinOpEntry(t, dOpJSON, ns) +} + +func replaceNsWithinOpEntry(t *testing.T, jsonEntry, ns string) *db.Oplog { + t.Helper() + + oe := db.Oplog{} + err := bson.UnmarshalExtJSON([]byte(jsonEntry), false, &oe) + if err != nil { + t.Errorf("err while unmarshal from json: %v", err) + } + + if ns != "" { + oe.Namespace = ns + } + return &oe +} diff --git a/pbm/oplog/testdata/oplog_test.json b/pbm/oplog/testdata/oplog_test.json new file mode 100644 index 000000000..5e7dc6a60 --- /dev/null +++ b/pbm/oplog/testdata/oplog_test.json @@ -0,0 +1,260 @@ +[ +{ + "op": "c", + "ns": "mydb.$cmd", + "ui": { + "$binary": { + "base64": "f774YvKERIKXSJVH+xCtPw==", + "subType": "04" + } + }, + "o": { + "create": "c5", + "idIndex": { + "v": 2, + "key": { + "_id": 1 + }, + "name": "_id_" + } + }, + "ts": { + "$timestamp": { + "t": 1730733015, + "i": 1 + } + }, + "t": { + "$numberLong": "7" + }, + "v": { + "$numberLong": "2" + }, + "wall": { + "$date": "2024-11-04T15:10:15.564Z" + } +}, +{ + "lsid": { + "id": { + "$binary": { + "base64": "HxXre7SSRxe8eq+OjOQOhw==", + "subType": "04" + } + }, + "uid": { + "$binary": { + "base64": "Bh/Anp+//gSHltMgOtOX+7sunrF/VwW+VDdA3fRANl0=", + "subType": "00" + } + } + }, + "txnNumber": { + "$numberLong": "4" + }, + "op": "i", + "ns": "mydb.c5", + "ui": { + "$binary": { + "base64": "f774YvKERIKXSJVH+xCtPw==", + "subType": "04" + } + }, + "o": { + "_id": { + "$oid": "6728e3fcedfb509c06f01307" + }, + "person": "jure", + "age": 37 + }, + "o2": { + "_id": { + "$oid": "6728e3fcedfb509c06f01307" + } + }, + "stmtId": 0, + "ts": { + "$timestamp": { + "t": 1730733052, + "i": 1 + } + }, + "t": { + "$numberLong": "7" + }, + "v": { + "$numberLong": "2" + }, + "wall": { + "$date": "2024-11-04T15:10:52.775Z" + }, + "prevOpTime": { + "ts": { + "$timestamp": { + "t": 0, + "i": 0 + } + }, + "t": { + "$numberLong": "-1" + } + } +}, +{ + "lsid": { + "id": { + "$binary": { + "base64": "HxXre7SSRxe8eq+OjOQOhw==", + "subType": "04" + } + }, + "uid": { + "$binary": { + "base64": "Bh/Anp+//gSHltMgOtOX+7sunrF/VwW+VDdA3fRANl0=", + "subType": "00" + } + } + }, + "txnNumber": { + "$numberLong": "5" + }, + "op": "u", + "ns": "mydb.c5", + "ui": { + "$binary": { + "base64": "f774YvKERIKXSJVH+xCtPw==", + "subType": "04" + } + }, + "o": { + "$v": 2, + "diff": { + "i": { + "city": "split" + } + } + }, + "o2": { + "_id": { + "$oid": "6728e3fcedfb509c06f01307" + } + }, + "stmtId": 0, + "ts": { + "$timestamp": { + "t": 1730733212, + "i": 1 + } + }, + "t": { + "$numberLong": "7" + }, + "v": { + "$numberLong": "2" + }, + "wall": { + "$date": "2024-11-04T15:13:32.123Z" + }, + "prevOpTime": { + "ts": { + "$timestamp": { + "t": 0, + "i": 0 + } + }, + "t": { + "$numberLong": "-1" + } + } +}, +{ + "lsid": { + "id": { + "$binary": { + "base64": "HxXre7SSRxe8eq+OjOQOhw==", + "subType": "04" + } + }, + "uid": { + "$binary": { + "base64": "Bh/Anp+//gSHltMgOtOX+7sunrF/VwW+VDdA3fRANl0=", + "subType": "00" + } + } + }, + "txnNumber": { + "$numberLong": "6" + }, + "op": "d", + "ns": "mydb.c5", + "ui": { + "$binary": { + "base64": "f774YvKERIKXSJVH+xCtPw==", + "subType": "04" + } + }, + "o": { + "_id": { + "$oid": "6728e3fcedfb509c06f01307" + } + }, + "stmtId": 0, + "ts": { + "$timestamp": { + "t": 1730733256, + "i": 1 + } + }, + "t": { + "$numberLong": "7" + }, + "v": { + "$numberLong": "2" + }, + "wall": { + "$date": "2024-11-04T15:14:16.626Z" + }, + "prevOpTime": { + "ts": { + "$timestamp": { + "t": 0, + "i": 0 + } + }, + "t": { + "$numberLong": "-1" + } + } +}, +{ + "op": "c", + "ns": "mydb.$cmd", + "ui": { + "$binary": { + "base64": "f774YvKERIKXSJVH+xCtPw==", + "subType": "04" + } + }, + "o": { + "drop": "c5" + }, + "o2": { + "numRecords": 0 + }, + "ts": { + "$timestamp": { + "t": 1730733285, + "i": 1 + } + }, + "t": { + "$numberLong": "7" + }, + "v": { + "$numberLong": "2" + }, + "wall": { + "$date": "2024-11-04T15:14:45.192Z" + } +} +] diff --git a/pbm/oplog/testdata/ops_cmd_createIndexes_dropIndexes.json b/pbm/oplog/testdata/ops_cmd_createIndexes_dropIndexes.json new file mode 100644 index 000000000..f66290aad --- /dev/null +++ b/pbm/oplog/testdata/ops_cmd_createIndexes_dropIndexes.json @@ -0,0 +1,137 @@ +[ +{ + "op": "c", + "ns": "mydb.$cmd", + "ui": { + "$binary": { + "base64": "dh2oYeQGTUO5UiXZLQXVsQ==", + "subType": "04" + } + }, + "o": { + "createIndexes": "c1", + "v": 2, + "key": { + "fieldX": -1 + }, + "name": "fieldX_-1" + }, + "ts": { + "$timestamp": { + "t": 1730655388, + "i": 1 + } + }, + "t": { + "$numberLong": "1" + }, + "v": { + "$numberLong": "2" + }, + "wall": { + "$date": "2024-11-03T17:36:28.142Z" + } +}, +{ + "op": "c", + "ns": "mydb.$cmd", + "ui": { + "$binary": { + "base64": "dh2oYeQGTUO5UiXZLQXVsQ==", + "subType": "04" + } + }, + "o": { + "createIndexes": "c1", + "v": 2, + "key": { + "fieldY": 1 + }, + "name": "fieldY_1" + }, + "ts": { + "$timestamp": { + "t": 1730655389, + "i": 1 + } + }, + "t": { + "$numberLong": "1" + }, + "v": { + "$numberLong": "2" + }, + "wall": { + "$date": "2024-11-03T17:36:28.142Z" + } +}, +{ + "op": "c", + "ns": "mydb.$cmd", + "ui": { + "$binary": { + "base64": "dh2oYeQGTUO5UiXZLQXVsQ==", + "subType": "04" + } + }, + "o": { + "dropIndexes": "c1", + "index": "fieldY_1" + }, + "o2": { + "v": 2, + "key": { + "email": -1 + }, + "name": "fieldY_1" + }, + "ts": { + "$timestamp": { + "t": 1730655616, + "i": 1 + } + }, + "t": { + "$numberLong": "1" + }, + "v": { + "$numberLong": "2" + }, + "wall": { + "$date": "2024-11-03T17:40:16.298Z" + } +}, +{ + "op": "c", + "ns": "mydb.$cmd", + "ui": { + "$binary": { + "base64": "dh2oYeQGTUO5UiXZLQXVsQ==", + "subType": "04" + } + }, + "o": { + "createIndexes": "c1", + "v": 2, + "key": { + "fieldZ": 1 + }, + "name": "fieldZ_1" + }, + "ts": { + "$timestamp": { + "t": 1730655389, + "i": 1 + } + }, + "t": { + "$numberLong": "1" + }, + "v": { + "$numberLong": "2" + }, + "wall": { + "$date": "2024-11-03T17:36:28.142Z" + } +} +] diff --git a/pbm/oplog/testdata/ops_cmd_create_drop.json b/pbm/oplog/testdata/ops_cmd_create_drop.json new file mode 100644 index 000000000..30db804ee --- /dev/null +++ b/pbm/oplog/testdata/ops_cmd_create_drop.json @@ -0,0 +1,119 @@ +[ +{ + "op": "c", + "ns": "mydb.$cmd", + "ui": { + "$binary": { + "base64": "lguUWGLtQ7SO+//vs7Rlrw==", + "subType": "04" + } + }, + "o": { + "create": "c1", + "idIndex": { + "v": { + "$numberInt": "2" + }, + "key": { + "_id": { + "$numberInt": "1" + } + }, + "name": "_id_" + } + }, + "ts": { + "$timestamp": { + "t": 1732706704, + "i": 1 + } + }, + "t": { + "$numberLong": "2" + }, + "v": { + "$numberLong": "2" + }, + "wall": { + "$date": { + "$numberLong": "1732706704980" + } + } +}, +{ + "op": "c", + "ns": "mydb.$cmd", + "ui": { + "$binary": { + "base64": "v+mHa8niRBKG7Z+uqJGARQ==", + "subType": "04" + } + }, + "o": { + "drop": "c1" + }, + "o2": { + "numRecords": { + "$numberInt": "6" + } + }, + "ts": { + "$timestamp": { + "t": 1732706615, + "i": 1 + } + }, + "t": { + "$numberLong": "2" + }, + "v": { + "$numberLong": "2" + }, + "wall": { + "$date": { + "$numberLong": "1732706615502" + } + } +}, +{ + "op": "c", + "ns": "mydb.$cmd", + "ui": { + "$binary": { + "base64": "lguUWGLtQ7SO+//vs7Rlrw==", + "subType": "04" + } + }, + "o": { + "create": "c2", + "idIndex": { + "v": { + "$numberInt": "2" + }, + "key": { + "_id": { + "$numberInt": "1" + } + }, + "name": "_id_" + } + }, + "ts": { + "$timestamp": { + "t": 1732706704, + "i": 1 + } + }, + "t": { + "$numberLong": "2" + }, + "v": { + "$numberLong": "2" + }, + "wall": { + "$date": { + "$numberLong": "1732706704980" + } + } +} +] diff --git a/pbm/oplog/testdata/ops_i_u_d.json b/pbm/oplog/testdata/ops_i_u_d.json new file mode 100644 index 000000000..f13522a6d --- /dev/null +++ b/pbm/oplog/testdata/ops_i_u_d.json @@ -0,0 +1,261 @@ +[ +{ + "lsid": { + "id": { + "$binary": { + "base64": "HxXre7SSRxe8eq+OjOQOhw==", + "subType": "04" + } + }, + "uid": { + "$binary": { + "base64": "Bh/Anp+//gSHltMgOtOX+7sunrF/VwW+VDdA3fRANl0=", + "subType": "00" + } + } + }, + "txnNumber": { + "$numberLong": "4" + }, + "op": "i", + "ns": "mydb.c1", + "ui": { + "$binary": { + "base64": "f774YvKERIKXSJVH+xCtPw==", + "subType": "04" + } + }, + "o": { + "_id": { + "$oid": "6728e3fcedfb509c06f01307" + }, + "person": "jure", + "age": 37 + }, + "o2": { + "_id": { + "$oid": "6728e3fcedfb509c06f01307" + } + }, + "stmtId": 0, + "ts": { + "$timestamp": { + "t": 1730733052, + "i": 1 + } + }, + "t": { + "$numberLong": "7" + }, + "v": { + "$numberLong": "2" + }, + "wall": { + "$date": "2024-11-04T15:10:52.775Z" + }, + "prevOpTime": { + "ts": { + "$timestamp": { + "t": 0, + "i": 0 + } + }, + "t": { + "$numberLong": "-1" + } + } +}, +{ + "lsid": { + "id": { + "$binary": { + "base64": "HxXre7SSRxe8eq+OjOQOhw==", + "subType": "04" + } + }, + "uid": { + "$binary": { + "base64": "Bh/Anp+//gSHltMgOtOX+7sunrF/VwW+VDdA3fRANl0=", + "subType": "00" + } + } + }, + "txnNumber": { + "$numberLong": "5" + }, + "op": "u", + "ns": "mydb.c1", + "ui": { + "$binary": { + "base64": "f774YvKERIKXSJVH+xCtPw==", + "subType": "04" + } + }, + "o": { + "$v": 2, + "diff": { + "i": { + "city": "split" + } + } + }, + "o2": { + "_id": { + "$oid": "6728e3fcedfb509c06f01307" + } + }, + "stmtId": 0, + "ts": { + "$timestamp": { + "t": 1730733212, + "i": 1 + } + }, + "t": { + "$numberLong": "7" + }, + "v": { + "$numberLong": "2" + }, + "wall": { + "$date": "2024-11-04T15:13:32.123Z" + }, + "prevOpTime": { + "ts": { + "$timestamp": { + "t": 0, + "i": 0 + } + }, + "t": { + "$numberLong": "-1" + } + } +}, +{ + "lsid": { + "id": { + "$binary": { + "base64": "HxXre7SSRxe8eq+OjOQOhw==", + "subType": "04" + } + }, + "uid": { + "$binary": { + "base64": "Bh/Anp+//gSHltMgOtOX+7sunrF/VwW+VDdA3fRANl0=", + "subType": "00" + } + } + }, + "txnNumber": { + "$numberLong": "5" + }, + "op": "u", + "ns": "mydb.cX", + "ui": { + "$binary": { + "base64": "f774YvKERIKXSJVH+xCtPw==", + "subType": "04" + } + }, + "o": { + "$v": 2, + "diff": { + "i": { + "city": "split" + } + } + }, + "o2": { + "_id": { + "$oid": "6728e3fcedfb509c06f01307" + } + }, + "stmtId": 0, + "ts": { + "$timestamp": { + "t": 1730733212, + "i": 1 + } + }, + "t": { + "$numberLong": "7" + }, + "v": { + "$numberLong": "2" + }, + "wall": { + "$date": "2024-11-04T15:13:32.123Z" + }, + "prevOpTime": { + "ts": { + "$timestamp": { + "t": 0, + "i": 0 + } + }, + "t": { + "$numberLong": "-1" + } + } +}, +{ + "lsid": { + "id": { + "$binary": { + "base64": "HxXre7SSRxe8eq+OjOQOhw==", + "subType": "04" + } + }, + "uid": { + "$binary": { + "base64": "Bh/Anp+//gSHltMgOtOX+7sunrF/VwW+VDdA3fRANl0=", + "subType": "00" + } + } + }, + "txnNumber": { + "$numberLong": "6" + }, + "op": "d", + "ns": "mydb.c1", + "ui": { + "$binary": { + "base64": "f774YvKERIKXSJVH+xCtPw==", + "subType": "04" + } + }, + "o": { + "_id": { + "$oid": "6728e3fcedfb509c06f01307" + } + }, + "stmtId": 0, + "ts": { + "$timestamp": { + "t": 1730733256, + "i": 1 + } + }, + "t": { + "$numberLong": "7" + }, + "v": { + "$numberLong": "2" + }, + "wall": { + "$date": "2024-11-04T15:14:16.626Z" + }, + "prevOpTime": { + "ts": { + "$timestamp": { + "t": 0, + "i": 0 + } + }, + "t": { + "$numberLong": "-1" + } + } +} +] diff --git a/pbm/oplog/testdata/ops_n.json b/pbm/oplog/testdata/ops_n.json new file mode 100644 index 000000000..d05ed5648 --- /dev/null +++ b/pbm/oplog/testdata/ops_n.json @@ -0,0 +1,24 @@ +[ +{ + "op": "n", + "ns": "", + "o": { + "msg": "periodic noop" + }, + "ts": { + "$timestamp": { + "t": 1730733052, + "i": 1 + } + }, + "t": { + "$numberLong": "1" + }, + "v": { + "$numberLong": "2" + }, + "wall": { + "$date": "2024-11-04T15:10:52.775Z" + } +} +] diff --git a/vendor/github.com/davecgh/go-spew/LICENSE b/vendor/github.com/davecgh/go-spew/LICENSE new file mode 100644 index 000000000..bc52e96f2 --- /dev/null +++ b/vendor/github.com/davecgh/go-spew/LICENSE @@ -0,0 +1,15 @@ +ISC License + +Copyright (c) 2012-2016 Dave Collins + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/vendor/github.com/davecgh/go-spew/spew/bypass.go b/vendor/github.com/davecgh/go-spew/spew/bypass.go new file mode 100644 index 000000000..792994785 --- /dev/null +++ b/vendor/github.com/davecgh/go-spew/spew/bypass.go @@ -0,0 +1,145 @@ +// Copyright (c) 2015-2016 Dave Collins +// +// Permission to use, copy, modify, and distribute this software for any +// purpose with or without fee is hereby granted, provided that the above +// copyright notice and this permission notice appear in all copies. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +// NOTE: Due to the following build constraints, this file will only be compiled +// when the code is not running on Google App Engine, compiled by GopherJS, and +// "-tags safe" is not added to the go build command line. The "disableunsafe" +// tag is deprecated and thus should not be used. +// Go versions prior to 1.4 are disabled because they use a different layout +// for interfaces which make the implementation of unsafeReflectValue more complex. +// +build !js,!appengine,!safe,!disableunsafe,go1.4 + +package spew + +import ( + "reflect" + "unsafe" +) + +const ( + // UnsafeDisabled is a build-time constant which specifies whether or + // not access to the unsafe package is available. + UnsafeDisabled = false + + // ptrSize is the size of a pointer on the current arch. + ptrSize = unsafe.Sizeof((*byte)(nil)) +) + +type flag uintptr + +var ( + // flagRO indicates whether the value field of a reflect.Value + // is read-only. + flagRO flag + + // flagAddr indicates whether the address of the reflect.Value's + // value may be taken. + flagAddr flag +) + +// flagKindMask holds the bits that make up the kind +// part of the flags field. In all the supported versions, +// it is in the lower 5 bits. +const flagKindMask = flag(0x1f) + +// Different versions of Go have used different +// bit layouts for the flags type. This table +// records the known combinations. +var okFlags = []struct { + ro, addr flag +}{{ + // From Go 1.4 to 1.5 + ro: 1 << 5, + addr: 1 << 7, +}, { + // Up to Go tip. + ro: 1<<5 | 1<<6, + addr: 1 << 8, +}} + +var flagValOffset = func() uintptr { + field, ok := reflect.TypeOf(reflect.Value{}).FieldByName("flag") + if !ok { + panic("reflect.Value has no flag field") + } + return field.Offset +}() + +// flagField returns a pointer to the flag field of a reflect.Value. +func flagField(v *reflect.Value) *flag { + return (*flag)(unsafe.Pointer(uintptr(unsafe.Pointer(v)) + flagValOffset)) +} + +// unsafeReflectValue converts the passed reflect.Value into a one that bypasses +// the typical safety restrictions preventing access to unaddressable and +// unexported data. It works by digging the raw pointer to the underlying +// value out of the protected value and generating a new unprotected (unsafe) +// reflect.Value to it. +// +// This allows us to check for implementations of the Stringer and error +// interfaces to be used for pretty printing ordinarily unaddressable and +// inaccessible values such as unexported struct fields. +func unsafeReflectValue(v reflect.Value) reflect.Value { + if !v.IsValid() || (v.CanInterface() && v.CanAddr()) { + return v + } + flagFieldPtr := flagField(&v) + *flagFieldPtr &^= flagRO + *flagFieldPtr |= flagAddr + return v +} + +// Sanity checks against future reflect package changes +// to the type or semantics of the Value.flag field. +func init() { + field, ok := reflect.TypeOf(reflect.Value{}).FieldByName("flag") + if !ok { + panic("reflect.Value has no flag field") + } + if field.Type.Kind() != reflect.TypeOf(flag(0)).Kind() { + panic("reflect.Value flag field has changed kind") + } + type t0 int + var t struct { + A t0 + // t0 will have flagEmbedRO set. + t0 + // a will have flagStickyRO set + a t0 + } + vA := reflect.ValueOf(t).FieldByName("A") + va := reflect.ValueOf(t).FieldByName("a") + vt0 := reflect.ValueOf(t).FieldByName("t0") + + // Infer flagRO from the difference between the flags + // for the (otherwise identical) fields in t. + flagPublic := *flagField(&vA) + flagWithRO := *flagField(&va) | *flagField(&vt0) + flagRO = flagPublic ^ flagWithRO + + // Infer flagAddr from the difference between a value + // taken from a pointer and not. + vPtrA := reflect.ValueOf(&t).Elem().FieldByName("A") + flagNoPtr := *flagField(&vA) + flagPtr := *flagField(&vPtrA) + flagAddr = flagNoPtr ^ flagPtr + + // Check that the inferred flags tally with one of the known versions. + for _, f := range okFlags { + if flagRO == f.ro && flagAddr == f.addr { + return + } + } + panic("reflect.Value read-only flag has changed semantics") +} diff --git a/vendor/github.com/davecgh/go-spew/spew/bypasssafe.go b/vendor/github.com/davecgh/go-spew/spew/bypasssafe.go new file mode 100644 index 000000000..205c28d68 --- /dev/null +++ b/vendor/github.com/davecgh/go-spew/spew/bypasssafe.go @@ -0,0 +1,38 @@ +// Copyright (c) 2015-2016 Dave Collins +// +// Permission to use, copy, modify, and distribute this software for any +// purpose with or without fee is hereby granted, provided that the above +// copyright notice and this permission notice appear in all copies. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +// NOTE: Due to the following build constraints, this file will only be compiled +// when the code is running on Google App Engine, compiled by GopherJS, or +// "-tags safe" is added to the go build command line. The "disableunsafe" +// tag is deprecated and thus should not be used. +// +build js appengine safe disableunsafe !go1.4 + +package spew + +import "reflect" + +const ( + // UnsafeDisabled is a build-time constant which specifies whether or + // not access to the unsafe package is available. + UnsafeDisabled = true +) + +// unsafeReflectValue typically converts the passed reflect.Value into a one +// that bypasses the typical safety restrictions preventing access to +// unaddressable and unexported data. However, doing this relies on access to +// the unsafe package. This is a stub version which simply returns the passed +// reflect.Value when the unsafe package is not available. +func unsafeReflectValue(v reflect.Value) reflect.Value { + return v +} diff --git a/vendor/github.com/davecgh/go-spew/spew/common.go b/vendor/github.com/davecgh/go-spew/spew/common.go new file mode 100644 index 000000000..1be8ce945 --- /dev/null +++ b/vendor/github.com/davecgh/go-spew/spew/common.go @@ -0,0 +1,341 @@ +/* + * Copyright (c) 2013-2016 Dave Collins + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +package spew + +import ( + "bytes" + "fmt" + "io" + "reflect" + "sort" + "strconv" +) + +// Some constants in the form of bytes to avoid string overhead. This mirrors +// the technique used in the fmt package. +var ( + panicBytes = []byte("(PANIC=") + plusBytes = []byte("+") + iBytes = []byte("i") + trueBytes = []byte("true") + falseBytes = []byte("false") + interfaceBytes = []byte("(interface {})") + commaNewlineBytes = []byte(",\n") + newlineBytes = []byte("\n") + openBraceBytes = []byte("{") + openBraceNewlineBytes = []byte("{\n") + closeBraceBytes = []byte("}") + asteriskBytes = []byte("*") + colonBytes = []byte(":") + colonSpaceBytes = []byte(": ") + openParenBytes = []byte("(") + closeParenBytes = []byte(")") + spaceBytes = []byte(" ") + pointerChainBytes = []byte("->") + nilAngleBytes = []byte("") + maxNewlineBytes = []byte("\n") + maxShortBytes = []byte("") + circularBytes = []byte("") + circularShortBytes = []byte("") + invalidAngleBytes = []byte("") + openBracketBytes = []byte("[") + closeBracketBytes = []byte("]") + percentBytes = []byte("%") + precisionBytes = []byte(".") + openAngleBytes = []byte("<") + closeAngleBytes = []byte(">") + openMapBytes = []byte("map[") + closeMapBytes = []byte("]") + lenEqualsBytes = []byte("len=") + capEqualsBytes = []byte("cap=") +) + +// hexDigits is used to map a decimal value to a hex digit. +var hexDigits = "0123456789abcdef" + +// catchPanic handles any panics that might occur during the handleMethods +// calls. +func catchPanic(w io.Writer, v reflect.Value) { + if err := recover(); err != nil { + w.Write(panicBytes) + fmt.Fprintf(w, "%v", err) + w.Write(closeParenBytes) + } +} + +// handleMethods attempts to call the Error and String methods on the underlying +// type the passed reflect.Value represents and outputes the result to Writer w. +// +// It handles panics in any called methods by catching and displaying the error +// as the formatted value. +func handleMethods(cs *ConfigState, w io.Writer, v reflect.Value) (handled bool) { + // We need an interface to check if the type implements the error or + // Stringer interface. However, the reflect package won't give us an + // interface on certain things like unexported struct fields in order + // to enforce visibility rules. We use unsafe, when it's available, + // to bypass these restrictions since this package does not mutate the + // values. + if !v.CanInterface() { + if UnsafeDisabled { + return false + } + + v = unsafeReflectValue(v) + } + + // Choose whether or not to do error and Stringer interface lookups against + // the base type or a pointer to the base type depending on settings. + // Technically calling one of these methods with a pointer receiver can + // mutate the value, however, types which choose to satisify an error or + // Stringer interface with a pointer receiver should not be mutating their + // state inside these interface methods. + if !cs.DisablePointerMethods && !UnsafeDisabled && !v.CanAddr() { + v = unsafeReflectValue(v) + } + if v.CanAddr() { + v = v.Addr() + } + + // Is it an error or Stringer? + switch iface := v.Interface().(type) { + case error: + defer catchPanic(w, v) + if cs.ContinueOnMethod { + w.Write(openParenBytes) + w.Write([]byte(iface.Error())) + w.Write(closeParenBytes) + w.Write(spaceBytes) + return false + } + + w.Write([]byte(iface.Error())) + return true + + case fmt.Stringer: + defer catchPanic(w, v) + if cs.ContinueOnMethod { + w.Write(openParenBytes) + w.Write([]byte(iface.String())) + w.Write(closeParenBytes) + w.Write(spaceBytes) + return false + } + w.Write([]byte(iface.String())) + return true + } + return false +} + +// printBool outputs a boolean value as true or false to Writer w. +func printBool(w io.Writer, val bool) { + if val { + w.Write(trueBytes) + } else { + w.Write(falseBytes) + } +} + +// printInt outputs a signed integer value to Writer w. +func printInt(w io.Writer, val int64, base int) { + w.Write([]byte(strconv.FormatInt(val, base))) +} + +// printUint outputs an unsigned integer value to Writer w. +func printUint(w io.Writer, val uint64, base int) { + w.Write([]byte(strconv.FormatUint(val, base))) +} + +// printFloat outputs a floating point value using the specified precision, +// which is expected to be 32 or 64bit, to Writer w. +func printFloat(w io.Writer, val float64, precision int) { + w.Write([]byte(strconv.FormatFloat(val, 'g', -1, precision))) +} + +// printComplex outputs a complex value using the specified float precision +// for the real and imaginary parts to Writer w. +func printComplex(w io.Writer, c complex128, floatPrecision int) { + r := real(c) + w.Write(openParenBytes) + w.Write([]byte(strconv.FormatFloat(r, 'g', -1, floatPrecision))) + i := imag(c) + if i >= 0 { + w.Write(plusBytes) + } + w.Write([]byte(strconv.FormatFloat(i, 'g', -1, floatPrecision))) + w.Write(iBytes) + w.Write(closeParenBytes) +} + +// printHexPtr outputs a uintptr formatted as hexadecimal with a leading '0x' +// prefix to Writer w. +func printHexPtr(w io.Writer, p uintptr) { + // Null pointer. + num := uint64(p) + if num == 0 { + w.Write(nilAngleBytes) + return + } + + // Max uint64 is 16 bytes in hex + 2 bytes for '0x' prefix + buf := make([]byte, 18) + + // It's simpler to construct the hex string right to left. + base := uint64(16) + i := len(buf) - 1 + for num >= base { + buf[i] = hexDigits[num%base] + num /= base + i-- + } + buf[i] = hexDigits[num] + + // Add '0x' prefix. + i-- + buf[i] = 'x' + i-- + buf[i] = '0' + + // Strip unused leading bytes. + buf = buf[i:] + w.Write(buf) +} + +// valuesSorter implements sort.Interface to allow a slice of reflect.Value +// elements to be sorted. +type valuesSorter struct { + values []reflect.Value + strings []string // either nil or same len and values + cs *ConfigState +} + +// newValuesSorter initializes a valuesSorter instance, which holds a set of +// surrogate keys on which the data should be sorted. It uses flags in +// ConfigState to decide if and how to populate those surrogate keys. +func newValuesSorter(values []reflect.Value, cs *ConfigState) sort.Interface { + vs := &valuesSorter{values: values, cs: cs} + if canSortSimply(vs.values[0].Kind()) { + return vs + } + if !cs.DisableMethods { + vs.strings = make([]string, len(values)) + for i := range vs.values { + b := bytes.Buffer{} + if !handleMethods(cs, &b, vs.values[i]) { + vs.strings = nil + break + } + vs.strings[i] = b.String() + } + } + if vs.strings == nil && cs.SpewKeys { + vs.strings = make([]string, len(values)) + for i := range vs.values { + vs.strings[i] = Sprintf("%#v", vs.values[i].Interface()) + } + } + return vs +} + +// canSortSimply tests whether a reflect.Kind is a primitive that can be sorted +// directly, or whether it should be considered for sorting by surrogate keys +// (if the ConfigState allows it). +func canSortSimply(kind reflect.Kind) bool { + // This switch parallels valueSortLess, except for the default case. + switch kind { + case reflect.Bool: + return true + case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Int: + return true + case reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uint: + return true + case reflect.Float32, reflect.Float64: + return true + case reflect.String: + return true + case reflect.Uintptr: + return true + case reflect.Array: + return true + } + return false +} + +// Len returns the number of values in the slice. It is part of the +// sort.Interface implementation. +func (s *valuesSorter) Len() int { + return len(s.values) +} + +// Swap swaps the values at the passed indices. It is part of the +// sort.Interface implementation. +func (s *valuesSorter) Swap(i, j int) { + s.values[i], s.values[j] = s.values[j], s.values[i] + if s.strings != nil { + s.strings[i], s.strings[j] = s.strings[j], s.strings[i] + } +} + +// valueSortLess returns whether the first value should sort before the second +// value. It is used by valueSorter.Less as part of the sort.Interface +// implementation. +func valueSortLess(a, b reflect.Value) bool { + switch a.Kind() { + case reflect.Bool: + return !a.Bool() && b.Bool() + case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Int: + return a.Int() < b.Int() + case reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uint: + return a.Uint() < b.Uint() + case reflect.Float32, reflect.Float64: + return a.Float() < b.Float() + case reflect.String: + return a.String() < b.String() + case reflect.Uintptr: + return a.Uint() < b.Uint() + case reflect.Array: + // Compare the contents of both arrays. + l := a.Len() + for i := 0; i < l; i++ { + av := a.Index(i) + bv := b.Index(i) + if av.Interface() == bv.Interface() { + continue + } + return valueSortLess(av, bv) + } + } + return a.String() < b.String() +} + +// Less returns whether the value at index i should sort before the +// value at index j. It is part of the sort.Interface implementation. +func (s *valuesSorter) Less(i, j int) bool { + if s.strings == nil { + return valueSortLess(s.values[i], s.values[j]) + } + return s.strings[i] < s.strings[j] +} + +// sortValues is a sort function that handles both native types and any type that +// can be converted to error or Stringer. Other inputs are sorted according to +// their Value.String() value to ensure display stability. +func sortValues(values []reflect.Value, cs *ConfigState) { + if len(values) == 0 { + return + } + sort.Sort(newValuesSorter(values, cs)) +} diff --git a/vendor/github.com/davecgh/go-spew/spew/config.go b/vendor/github.com/davecgh/go-spew/spew/config.go new file mode 100644 index 000000000..2e3d22f31 --- /dev/null +++ b/vendor/github.com/davecgh/go-spew/spew/config.go @@ -0,0 +1,306 @@ +/* + * Copyright (c) 2013-2016 Dave Collins + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +package spew + +import ( + "bytes" + "fmt" + "io" + "os" +) + +// ConfigState houses the configuration options used by spew to format and +// display values. There is a global instance, Config, that is used to control +// all top-level Formatter and Dump functionality. Each ConfigState instance +// provides methods equivalent to the top-level functions. +// +// The zero value for ConfigState provides no indentation. You would typically +// want to set it to a space or a tab. +// +// Alternatively, you can use NewDefaultConfig to get a ConfigState instance +// with default settings. See the documentation of NewDefaultConfig for default +// values. +type ConfigState struct { + // Indent specifies the string to use for each indentation level. The + // global config instance that all top-level functions use set this to a + // single space by default. If you would like more indentation, you might + // set this to a tab with "\t" or perhaps two spaces with " ". + Indent string + + // MaxDepth controls the maximum number of levels to descend into nested + // data structures. The default, 0, means there is no limit. + // + // NOTE: Circular data structures are properly detected, so it is not + // necessary to set this value unless you specifically want to limit deeply + // nested data structures. + MaxDepth int + + // DisableMethods specifies whether or not error and Stringer interfaces are + // invoked for types that implement them. + DisableMethods bool + + // DisablePointerMethods specifies whether or not to check for and invoke + // error and Stringer interfaces on types which only accept a pointer + // receiver when the current type is not a pointer. + // + // NOTE: This might be an unsafe action since calling one of these methods + // with a pointer receiver could technically mutate the value, however, + // in practice, types which choose to satisify an error or Stringer + // interface with a pointer receiver should not be mutating their state + // inside these interface methods. As a result, this option relies on + // access to the unsafe package, so it will not have any effect when + // running in environments without access to the unsafe package such as + // Google App Engine or with the "safe" build tag specified. + DisablePointerMethods bool + + // DisablePointerAddresses specifies whether to disable the printing of + // pointer addresses. This is useful when diffing data structures in tests. + DisablePointerAddresses bool + + // DisableCapacities specifies whether to disable the printing of capacities + // for arrays, slices, maps and channels. This is useful when diffing + // data structures in tests. + DisableCapacities bool + + // ContinueOnMethod specifies whether or not recursion should continue once + // a custom error or Stringer interface is invoked. The default, false, + // means it will print the results of invoking the custom error or Stringer + // interface and return immediately instead of continuing to recurse into + // the internals of the data type. + // + // NOTE: This flag does not have any effect if method invocation is disabled + // via the DisableMethods or DisablePointerMethods options. + ContinueOnMethod bool + + // SortKeys specifies map keys should be sorted before being printed. Use + // this to have a more deterministic, diffable output. Note that only + // native types (bool, int, uint, floats, uintptr and string) and types + // that support the error or Stringer interfaces (if methods are + // enabled) are supported, with other types sorted according to the + // reflect.Value.String() output which guarantees display stability. + SortKeys bool + + // SpewKeys specifies that, as a last resort attempt, map keys should + // be spewed to strings and sorted by those strings. This is only + // considered if SortKeys is true. + SpewKeys bool +} + +// Config is the active configuration of the top-level functions. +// The configuration can be changed by modifying the contents of spew.Config. +var Config = ConfigState{Indent: " "} + +// Errorf is a wrapper for fmt.Errorf that treats each argument as if it were +// passed with a Formatter interface returned by c.NewFormatter. It returns +// the formatted string as a value that satisfies error. See NewFormatter +// for formatting details. +// +// This function is shorthand for the following syntax: +// +// fmt.Errorf(format, c.NewFormatter(a), c.NewFormatter(b)) +func (c *ConfigState) Errorf(format string, a ...interface{}) (err error) { + return fmt.Errorf(format, c.convertArgs(a)...) +} + +// Fprint is a wrapper for fmt.Fprint that treats each argument as if it were +// passed with a Formatter interface returned by c.NewFormatter. It returns +// the number of bytes written and any write error encountered. See +// NewFormatter for formatting details. +// +// This function is shorthand for the following syntax: +// +// fmt.Fprint(w, c.NewFormatter(a), c.NewFormatter(b)) +func (c *ConfigState) Fprint(w io.Writer, a ...interface{}) (n int, err error) { + return fmt.Fprint(w, c.convertArgs(a)...) +} + +// Fprintf is a wrapper for fmt.Fprintf that treats each argument as if it were +// passed with a Formatter interface returned by c.NewFormatter. It returns +// the number of bytes written and any write error encountered. See +// NewFormatter for formatting details. +// +// This function is shorthand for the following syntax: +// +// fmt.Fprintf(w, format, c.NewFormatter(a), c.NewFormatter(b)) +func (c *ConfigState) Fprintf(w io.Writer, format string, a ...interface{}) (n int, err error) { + return fmt.Fprintf(w, format, c.convertArgs(a)...) +} + +// Fprintln is a wrapper for fmt.Fprintln that treats each argument as if it +// passed with a Formatter interface returned by c.NewFormatter. See +// NewFormatter for formatting details. +// +// This function is shorthand for the following syntax: +// +// fmt.Fprintln(w, c.NewFormatter(a), c.NewFormatter(b)) +func (c *ConfigState) Fprintln(w io.Writer, a ...interface{}) (n int, err error) { + return fmt.Fprintln(w, c.convertArgs(a)...) +} + +// Print is a wrapper for fmt.Print that treats each argument as if it were +// passed with a Formatter interface returned by c.NewFormatter. It returns +// the number of bytes written and any write error encountered. See +// NewFormatter for formatting details. +// +// This function is shorthand for the following syntax: +// +// fmt.Print(c.NewFormatter(a), c.NewFormatter(b)) +func (c *ConfigState) Print(a ...interface{}) (n int, err error) { + return fmt.Print(c.convertArgs(a)...) +} + +// Printf is a wrapper for fmt.Printf that treats each argument as if it were +// passed with a Formatter interface returned by c.NewFormatter. It returns +// the number of bytes written and any write error encountered. See +// NewFormatter for formatting details. +// +// This function is shorthand for the following syntax: +// +// fmt.Printf(format, c.NewFormatter(a), c.NewFormatter(b)) +func (c *ConfigState) Printf(format string, a ...interface{}) (n int, err error) { + return fmt.Printf(format, c.convertArgs(a)...) +} + +// Println is a wrapper for fmt.Println that treats each argument as if it were +// passed with a Formatter interface returned by c.NewFormatter. It returns +// the number of bytes written and any write error encountered. See +// NewFormatter for formatting details. +// +// This function is shorthand for the following syntax: +// +// fmt.Println(c.NewFormatter(a), c.NewFormatter(b)) +func (c *ConfigState) Println(a ...interface{}) (n int, err error) { + return fmt.Println(c.convertArgs(a)...) +} + +// Sprint is a wrapper for fmt.Sprint that treats each argument as if it were +// passed with a Formatter interface returned by c.NewFormatter. It returns +// the resulting string. See NewFormatter for formatting details. +// +// This function is shorthand for the following syntax: +// +// fmt.Sprint(c.NewFormatter(a), c.NewFormatter(b)) +func (c *ConfigState) Sprint(a ...interface{}) string { + return fmt.Sprint(c.convertArgs(a)...) +} + +// Sprintf is a wrapper for fmt.Sprintf that treats each argument as if it were +// passed with a Formatter interface returned by c.NewFormatter. It returns +// the resulting string. See NewFormatter for formatting details. +// +// This function is shorthand for the following syntax: +// +// fmt.Sprintf(format, c.NewFormatter(a), c.NewFormatter(b)) +func (c *ConfigState) Sprintf(format string, a ...interface{}) string { + return fmt.Sprintf(format, c.convertArgs(a)...) +} + +// Sprintln is a wrapper for fmt.Sprintln that treats each argument as if it +// were passed with a Formatter interface returned by c.NewFormatter. It +// returns the resulting string. See NewFormatter for formatting details. +// +// This function is shorthand for the following syntax: +// +// fmt.Sprintln(c.NewFormatter(a), c.NewFormatter(b)) +func (c *ConfigState) Sprintln(a ...interface{}) string { + return fmt.Sprintln(c.convertArgs(a)...) +} + +/* +NewFormatter returns a custom formatter that satisfies the fmt.Formatter +interface. As a result, it integrates cleanly with standard fmt package +printing functions. The formatter is useful for inline printing of smaller data +types similar to the standard %v format specifier. + +The custom formatter only responds to the %v (most compact), %+v (adds pointer +addresses), %#v (adds types), and %#+v (adds types and pointer addresses) verb +combinations. Any other verbs such as %x and %q will be sent to the the +standard fmt package for formatting. In addition, the custom formatter ignores +the width and precision arguments (however they will still work on the format +specifiers not handled by the custom formatter). + +Typically this function shouldn't be called directly. It is much easier to make +use of the custom formatter by calling one of the convenience functions such as +c.Printf, c.Println, or c.Printf. +*/ +func (c *ConfigState) NewFormatter(v interface{}) fmt.Formatter { + return newFormatter(c, v) +} + +// Fdump formats and displays the passed arguments to io.Writer w. It formats +// exactly the same as Dump. +func (c *ConfigState) Fdump(w io.Writer, a ...interface{}) { + fdump(c, w, a...) +} + +/* +Dump displays the passed parameters to standard out with newlines, customizable +indentation, and additional debug information such as complete types and all +pointer addresses used to indirect to the final value. It provides the +following features over the built-in printing facilities provided by the fmt +package: + + * Pointers are dereferenced and followed + * Circular data structures are detected and handled properly + * Custom Stringer/error interfaces are optionally invoked, including + on unexported types + * Custom types which only implement the Stringer/error interfaces via + a pointer receiver are optionally invoked when passing non-pointer + variables + * Byte arrays and slices are dumped like the hexdump -C command which + includes offsets, byte values in hex, and ASCII output + +The configuration options are controlled by modifying the public members +of c. See ConfigState for options documentation. + +See Fdump if you would prefer dumping to an arbitrary io.Writer or Sdump to +get the formatted result as a string. +*/ +func (c *ConfigState) Dump(a ...interface{}) { + fdump(c, os.Stdout, a...) +} + +// Sdump returns a string with the passed arguments formatted exactly the same +// as Dump. +func (c *ConfigState) Sdump(a ...interface{}) string { + var buf bytes.Buffer + fdump(c, &buf, a...) + return buf.String() +} + +// convertArgs accepts a slice of arguments and returns a slice of the same +// length with each argument converted to a spew Formatter interface using +// the ConfigState associated with s. +func (c *ConfigState) convertArgs(args []interface{}) (formatters []interface{}) { + formatters = make([]interface{}, len(args)) + for index, arg := range args { + formatters[index] = newFormatter(c, arg) + } + return formatters +} + +// NewDefaultConfig returns a ConfigState with the following default settings. +// +// Indent: " " +// MaxDepth: 0 +// DisableMethods: false +// DisablePointerMethods: false +// ContinueOnMethod: false +// SortKeys: false +func NewDefaultConfig() *ConfigState { + return &ConfigState{Indent: " "} +} diff --git a/vendor/github.com/davecgh/go-spew/spew/doc.go b/vendor/github.com/davecgh/go-spew/spew/doc.go new file mode 100644 index 000000000..aacaac6f1 --- /dev/null +++ b/vendor/github.com/davecgh/go-spew/spew/doc.go @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2013-2016 Dave Collins + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* +Package spew implements a deep pretty printer for Go data structures to aid in +debugging. + +A quick overview of the additional features spew provides over the built-in +printing facilities for Go data types are as follows: + + * Pointers are dereferenced and followed + * Circular data structures are detected and handled properly + * Custom Stringer/error interfaces are optionally invoked, including + on unexported types + * Custom types which only implement the Stringer/error interfaces via + a pointer receiver are optionally invoked when passing non-pointer + variables + * Byte arrays and slices are dumped like the hexdump -C command which + includes offsets, byte values in hex, and ASCII output (only when using + Dump style) + +There are two different approaches spew allows for dumping Go data structures: + + * Dump style which prints with newlines, customizable indentation, + and additional debug information such as types and all pointer addresses + used to indirect to the final value + * A custom Formatter interface that integrates cleanly with the standard fmt + package and replaces %v, %+v, %#v, and %#+v to provide inline printing + similar to the default %v while providing the additional functionality + outlined above and passing unsupported format verbs such as %x and %q + along to fmt + +Quick Start + +This section demonstrates how to quickly get started with spew. See the +sections below for further details on formatting and configuration options. + +To dump a variable with full newlines, indentation, type, and pointer +information use Dump, Fdump, or Sdump: + spew.Dump(myVar1, myVar2, ...) + spew.Fdump(someWriter, myVar1, myVar2, ...) + str := spew.Sdump(myVar1, myVar2, ...) + +Alternatively, if you would prefer to use format strings with a compacted inline +printing style, use the convenience wrappers Printf, Fprintf, etc with +%v (most compact), %+v (adds pointer addresses), %#v (adds types), or +%#+v (adds types and pointer addresses): + spew.Printf("myVar1: %v -- myVar2: %+v", myVar1, myVar2) + spew.Printf("myVar3: %#v -- myVar4: %#+v", myVar3, myVar4) + spew.Fprintf(someWriter, "myVar1: %v -- myVar2: %+v", myVar1, myVar2) + spew.Fprintf(someWriter, "myVar3: %#v -- myVar4: %#+v", myVar3, myVar4) + +Configuration Options + +Configuration of spew is handled by fields in the ConfigState type. For +convenience, all of the top-level functions use a global state available +via the spew.Config global. + +It is also possible to create a ConfigState instance that provides methods +equivalent to the top-level functions. This allows concurrent configuration +options. See the ConfigState documentation for more details. + +The following configuration options are available: + * Indent + String to use for each indentation level for Dump functions. + It is a single space by default. A popular alternative is "\t". + + * MaxDepth + Maximum number of levels to descend into nested data structures. + There is no limit by default. + + * DisableMethods + Disables invocation of error and Stringer interface methods. + Method invocation is enabled by default. + + * DisablePointerMethods + Disables invocation of error and Stringer interface methods on types + which only accept pointer receivers from non-pointer variables. + Pointer method invocation is enabled by default. + + * DisablePointerAddresses + DisablePointerAddresses specifies whether to disable the printing of + pointer addresses. This is useful when diffing data structures in tests. + + * DisableCapacities + DisableCapacities specifies whether to disable the printing of + capacities for arrays, slices, maps and channels. This is useful when + diffing data structures in tests. + + * ContinueOnMethod + Enables recursion into types after invoking error and Stringer interface + methods. Recursion after method invocation is disabled by default. + + * SortKeys + Specifies map keys should be sorted before being printed. Use + this to have a more deterministic, diffable output. Note that + only native types (bool, int, uint, floats, uintptr and string) + and types which implement error or Stringer interfaces are + supported with other types sorted according to the + reflect.Value.String() output which guarantees display + stability. Natural map order is used by default. + + * SpewKeys + Specifies that, as a last resort attempt, map keys should be + spewed to strings and sorted by those strings. This is only + considered if SortKeys is true. + +Dump Usage + +Simply call spew.Dump with a list of variables you want to dump: + + spew.Dump(myVar1, myVar2, ...) + +You may also call spew.Fdump if you would prefer to output to an arbitrary +io.Writer. For example, to dump to standard error: + + spew.Fdump(os.Stderr, myVar1, myVar2, ...) + +A third option is to call spew.Sdump to get the formatted output as a string: + + str := spew.Sdump(myVar1, myVar2, ...) + +Sample Dump Output + +See the Dump example for details on the setup of the types and variables being +shown here. + + (main.Foo) { + unexportedField: (*main.Bar)(0xf84002e210)({ + flag: (main.Flag) flagTwo, + data: (uintptr) + }), + ExportedField: (map[interface {}]interface {}) (len=1) { + (string) (len=3) "one": (bool) true + } + } + +Byte (and uint8) arrays and slices are displayed uniquely like the hexdump -C +command as shown. + ([]uint8) (len=32 cap=32) { + 00000000 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f 20 |............... | + 00000010 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f 30 |!"#$%&'()*+,-./0| + 00000020 31 32 |12| + } + +Custom Formatter + +Spew provides a custom formatter that implements the fmt.Formatter interface +so that it integrates cleanly with standard fmt package printing functions. The +formatter is useful for inline printing of smaller data types similar to the +standard %v format specifier. + +The custom formatter only responds to the %v (most compact), %+v (adds pointer +addresses), %#v (adds types), or %#+v (adds types and pointer addresses) verb +combinations. Any other verbs such as %x and %q will be sent to the the +standard fmt package for formatting. In addition, the custom formatter ignores +the width and precision arguments (however they will still work on the format +specifiers not handled by the custom formatter). + +Custom Formatter Usage + +The simplest way to make use of the spew custom formatter is to call one of the +convenience functions such as spew.Printf, spew.Println, or spew.Printf. The +functions have syntax you are most likely already familiar with: + + spew.Printf("myVar1: %v -- myVar2: %+v", myVar1, myVar2) + spew.Printf("myVar3: %#v -- myVar4: %#+v", myVar3, myVar4) + spew.Println(myVar, myVar2) + spew.Fprintf(os.Stderr, "myVar1: %v -- myVar2: %+v", myVar1, myVar2) + spew.Fprintf(os.Stderr, "myVar3: %#v -- myVar4: %#+v", myVar3, myVar4) + +See the Index for the full list convenience functions. + +Sample Formatter Output + +Double pointer to a uint8: + %v: <**>5 + %+v: <**>(0xf8400420d0->0xf8400420c8)5 + %#v: (**uint8)5 + %#+v: (**uint8)(0xf8400420d0->0xf8400420c8)5 + +Pointer to circular struct with a uint8 field and a pointer to itself: + %v: <*>{1 <*>} + %+v: <*>(0xf84003e260){ui8:1 c:<*>(0xf84003e260)} + %#v: (*main.circular){ui8:(uint8)1 c:(*main.circular)} + %#+v: (*main.circular)(0xf84003e260){ui8:(uint8)1 c:(*main.circular)(0xf84003e260)} + +See the Printf example for details on the setup of variables being shown +here. + +Errors + +Since it is possible for custom Stringer/error interfaces to panic, spew +detects them and handles them internally by printing the panic information +inline with the output. Since spew is intended to provide deep pretty printing +capabilities on structures, it intentionally does not return any errors. +*/ +package spew diff --git a/vendor/github.com/davecgh/go-spew/spew/dump.go b/vendor/github.com/davecgh/go-spew/spew/dump.go new file mode 100644 index 000000000..f78d89fc1 --- /dev/null +++ b/vendor/github.com/davecgh/go-spew/spew/dump.go @@ -0,0 +1,509 @@ +/* + * Copyright (c) 2013-2016 Dave Collins + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +package spew + +import ( + "bytes" + "encoding/hex" + "fmt" + "io" + "os" + "reflect" + "regexp" + "strconv" + "strings" +) + +var ( + // uint8Type is a reflect.Type representing a uint8. It is used to + // convert cgo types to uint8 slices for hexdumping. + uint8Type = reflect.TypeOf(uint8(0)) + + // cCharRE is a regular expression that matches a cgo char. + // It is used to detect character arrays to hexdump them. + cCharRE = regexp.MustCompile(`^.*\._Ctype_char$`) + + // cUnsignedCharRE is a regular expression that matches a cgo unsigned + // char. It is used to detect unsigned character arrays to hexdump + // them. + cUnsignedCharRE = regexp.MustCompile(`^.*\._Ctype_unsignedchar$`) + + // cUint8tCharRE is a regular expression that matches a cgo uint8_t. + // It is used to detect uint8_t arrays to hexdump them. + cUint8tCharRE = regexp.MustCompile(`^.*\._Ctype_uint8_t$`) +) + +// dumpState contains information about the state of a dump operation. +type dumpState struct { + w io.Writer + depth int + pointers map[uintptr]int + ignoreNextType bool + ignoreNextIndent bool + cs *ConfigState +} + +// indent performs indentation according to the depth level and cs.Indent +// option. +func (d *dumpState) indent() { + if d.ignoreNextIndent { + d.ignoreNextIndent = false + return + } + d.w.Write(bytes.Repeat([]byte(d.cs.Indent), d.depth)) +} + +// unpackValue returns values inside of non-nil interfaces when possible. +// This is useful for data types like structs, arrays, slices, and maps which +// can contain varying types packed inside an interface. +func (d *dumpState) unpackValue(v reflect.Value) reflect.Value { + if v.Kind() == reflect.Interface && !v.IsNil() { + v = v.Elem() + } + return v +} + +// dumpPtr handles formatting of pointers by indirecting them as necessary. +func (d *dumpState) dumpPtr(v reflect.Value) { + // Remove pointers at or below the current depth from map used to detect + // circular refs. + for k, depth := range d.pointers { + if depth >= d.depth { + delete(d.pointers, k) + } + } + + // Keep list of all dereferenced pointers to show later. + pointerChain := make([]uintptr, 0) + + // Figure out how many levels of indirection there are by dereferencing + // pointers and unpacking interfaces down the chain while detecting circular + // references. + nilFound := false + cycleFound := false + indirects := 0 + ve := v + for ve.Kind() == reflect.Ptr { + if ve.IsNil() { + nilFound = true + break + } + indirects++ + addr := ve.Pointer() + pointerChain = append(pointerChain, addr) + if pd, ok := d.pointers[addr]; ok && pd < d.depth { + cycleFound = true + indirects-- + break + } + d.pointers[addr] = d.depth + + ve = ve.Elem() + if ve.Kind() == reflect.Interface { + if ve.IsNil() { + nilFound = true + break + } + ve = ve.Elem() + } + } + + // Display type information. + d.w.Write(openParenBytes) + d.w.Write(bytes.Repeat(asteriskBytes, indirects)) + d.w.Write([]byte(ve.Type().String())) + d.w.Write(closeParenBytes) + + // Display pointer information. + if !d.cs.DisablePointerAddresses && len(pointerChain) > 0 { + d.w.Write(openParenBytes) + for i, addr := range pointerChain { + if i > 0 { + d.w.Write(pointerChainBytes) + } + printHexPtr(d.w, addr) + } + d.w.Write(closeParenBytes) + } + + // Display dereferenced value. + d.w.Write(openParenBytes) + switch { + case nilFound: + d.w.Write(nilAngleBytes) + + case cycleFound: + d.w.Write(circularBytes) + + default: + d.ignoreNextType = true + d.dump(ve) + } + d.w.Write(closeParenBytes) +} + +// dumpSlice handles formatting of arrays and slices. Byte (uint8 under +// reflection) arrays and slices are dumped in hexdump -C fashion. +func (d *dumpState) dumpSlice(v reflect.Value) { + // Determine whether this type should be hex dumped or not. Also, + // for types which should be hexdumped, try to use the underlying data + // first, then fall back to trying to convert them to a uint8 slice. + var buf []uint8 + doConvert := false + doHexDump := false + numEntries := v.Len() + if numEntries > 0 { + vt := v.Index(0).Type() + vts := vt.String() + switch { + // C types that need to be converted. + case cCharRE.MatchString(vts): + fallthrough + case cUnsignedCharRE.MatchString(vts): + fallthrough + case cUint8tCharRE.MatchString(vts): + doConvert = true + + // Try to use existing uint8 slices and fall back to converting + // and copying if that fails. + case vt.Kind() == reflect.Uint8: + // We need an addressable interface to convert the type + // to a byte slice. However, the reflect package won't + // give us an interface on certain things like + // unexported struct fields in order to enforce + // visibility rules. We use unsafe, when available, to + // bypass these restrictions since this package does not + // mutate the values. + vs := v + if !vs.CanInterface() || !vs.CanAddr() { + vs = unsafeReflectValue(vs) + } + if !UnsafeDisabled { + vs = vs.Slice(0, numEntries) + + // Use the existing uint8 slice if it can be + // type asserted. + iface := vs.Interface() + if slice, ok := iface.([]uint8); ok { + buf = slice + doHexDump = true + break + } + } + + // The underlying data needs to be converted if it can't + // be type asserted to a uint8 slice. + doConvert = true + } + + // Copy and convert the underlying type if needed. + if doConvert && vt.ConvertibleTo(uint8Type) { + // Convert and copy each element into a uint8 byte + // slice. + buf = make([]uint8, numEntries) + for i := 0; i < numEntries; i++ { + vv := v.Index(i) + buf[i] = uint8(vv.Convert(uint8Type).Uint()) + } + doHexDump = true + } + } + + // Hexdump the entire slice as needed. + if doHexDump { + indent := strings.Repeat(d.cs.Indent, d.depth) + str := indent + hex.Dump(buf) + str = strings.Replace(str, "\n", "\n"+indent, -1) + str = strings.TrimRight(str, d.cs.Indent) + d.w.Write([]byte(str)) + return + } + + // Recursively call dump for each item. + for i := 0; i < numEntries; i++ { + d.dump(d.unpackValue(v.Index(i))) + if i < (numEntries - 1) { + d.w.Write(commaNewlineBytes) + } else { + d.w.Write(newlineBytes) + } + } +} + +// dump is the main workhorse for dumping a value. It uses the passed reflect +// value to figure out what kind of object we are dealing with and formats it +// appropriately. It is a recursive function, however circular data structures +// are detected and handled properly. +func (d *dumpState) dump(v reflect.Value) { + // Handle invalid reflect values immediately. + kind := v.Kind() + if kind == reflect.Invalid { + d.w.Write(invalidAngleBytes) + return + } + + // Handle pointers specially. + if kind == reflect.Ptr { + d.indent() + d.dumpPtr(v) + return + } + + // Print type information unless already handled elsewhere. + if !d.ignoreNextType { + d.indent() + d.w.Write(openParenBytes) + d.w.Write([]byte(v.Type().String())) + d.w.Write(closeParenBytes) + d.w.Write(spaceBytes) + } + d.ignoreNextType = false + + // Display length and capacity if the built-in len and cap functions + // work with the value's kind and the len/cap itself is non-zero. + valueLen, valueCap := 0, 0 + switch v.Kind() { + case reflect.Array, reflect.Slice, reflect.Chan: + valueLen, valueCap = v.Len(), v.Cap() + case reflect.Map, reflect.String: + valueLen = v.Len() + } + if valueLen != 0 || !d.cs.DisableCapacities && valueCap != 0 { + d.w.Write(openParenBytes) + if valueLen != 0 { + d.w.Write(lenEqualsBytes) + printInt(d.w, int64(valueLen), 10) + } + if !d.cs.DisableCapacities && valueCap != 0 { + if valueLen != 0 { + d.w.Write(spaceBytes) + } + d.w.Write(capEqualsBytes) + printInt(d.w, int64(valueCap), 10) + } + d.w.Write(closeParenBytes) + d.w.Write(spaceBytes) + } + + // Call Stringer/error interfaces if they exist and the handle methods flag + // is enabled + if !d.cs.DisableMethods { + if (kind != reflect.Invalid) && (kind != reflect.Interface) { + if handled := handleMethods(d.cs, d.w, v); handled { + return + } + } + } + + switch kind { + case reflect.Invalid: + // Do nothing. We should never get here since invalid has already + // been handled above. + + case reflect.Bool: + printBool(d.w, v.Bool()) + + case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Int: + printInt(d.w, v.Int(), 10) + + case reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uint: + printUint(d.w, v.Uint(), 10) + + case reflect.Float32: + printFloat(d.w, v.Float(), 32) + + case reflect.Float64: + printFloat(d.w, v.Float(), 64) + + case reflect.Complex64: + printComplex(d.w, v.Complex(), 32) + + case reflect.Complex128: + printComplex(d.w, v.Complex(), 64) + + case reflect.Slice: + if v.IsNil() { + d.w.Write(nilAngleBytes) + break + } + fallthrough + + case reflect.Array: + d.w.Write(openBraceNewlineBytes) + d.depth++ + if (d.cs.MaxDepth != 0) && (d.depth > d.cs.MaxDepth) { + d.indent() + d.w.Write(maxNewlineBytes) + } else { + d.dumpSlice(v) + } + d.depth-- + d.indent() + d.w.Write(closeBraceBytes) + + case reflect.String: + d.w.Write([]byte(strconv.Quote(v.String()))) + + case reflect.Interface: + // The only time we should get here is for nil interfaces due to + // unpackValue calls. + if v.IsNil() { + d.w.Write(nilAngleBytes) + } + + case reflect.Ptr: + // Do nothing. We should never get here since pointers have already + // been handled above. + + case reflect.Map: + // nil maps should be indicated as different than empty maps + if v.IsNil() { + d.w.Write(nilAngleBytes) + break + } + + d.w.Write(openBraceNewlineBytes) + d.depth++ + if (d.cs.MaxDepth != 0) && (d.depth > d.cs.MaxDepth) { + d.indent() + d.w.Write(maxNewlineBytes) + } else { + numEntries := v.Len() + keys := v.MapKeys() + if d.cs.SortKeys { + sortValues(keys, d.cs) + } + for i, key := range keys { + d.dump(d.unpackValue(key)) + d.w.Write(colonSpaceBytes) + d.ignoreNextIndent = true + d.dump(d.unpackValue(v.MapIndex(key))) + if i < (numEntries - 1) { + d.w.Write(commaNewlineBytes) + } else { + d.w.Write(newlineBytes) + } + } + } + d.depth-- + d.indent() + d.w.Write(closeBraceBytes) + + case reflect.Struct: + d.w.Write(openBraceNewlineBytes) + d.depth++ + if (d.cs.MaxDepth != 0) && (d.depth > d.cs.MaxDepth) { + d.indent() + d.w.Write(maxNewlineBytes) + } else { + vt := v.Type() + numFields := v.NumField() + for i := 0; i < numFields; i++ { + d.indent() + vtf := vt.Field(i) + d.w.Write([]byte(vtf.Name)) + d.w.Write(colonSpaceBytes) + d.ignoreNextIndent = true + d.dump(d.unpackValue(v.Field(i))) + if i < (numFields - 1) { + d.w.Write(commaNewlineBytes) + } else { + d.w.Write(newlineBytes) + } + } + } + d.depth-- + d.indent() + d.w.Write(closeBraceBytes) + + case reflect.Uintptr: + printHexPtr(d.w, uintptr(v.Uint())) + + case reflect.UnsafePointer, reflect.Chan, reflect.Func: + printHexPtr(d.w, v.Pointer()) + + // There were not any other types at the time this code was written, but + // fall back to letting the default fmt package handle it in case any new + // types are added. + default: + if v.CanInterface() { + fmt.Fprintf(d.w, "%v", v.Interface()) + } else { + fmt.Fprintf(d.w, "%v", v.String()) + } + } +} + +// fdump is a helper function to consolidate the logic from the various public +// methods which take varying writers and config states. +func fdump(cs *ConfigState, w io.Writer, a ...interface{}) { + for _, arg := range a { + if arg == nil { + w.Write(interfaceBytes) + w.Write(spaceBytes) + w.Write(nilAngleBytes) + w.Write(newlineBytes) + continue + } + + d := dumpState{w: w, cs: cs} + d.pointers = make(map[uintptr]int) + d.dump(reflect.ValueOf(arg)) + d.w.Write(newlineBytes) + } +} + +// Fdump formats and displays the passed arguments to io.Writer w. It formats +// exactly the same as Dump. +func Fdump(w io.Writer, a ...interface{}) { + fdump(&Config, w, a...) +} + +// Sdump returns a string with the passed arguments formatted exactly the same +// as Dump. +func Sdump(a ...interface{}) string { + var buf bytes.Buffer + fdump(&Config, &buf, a...) + return buf.String() +} + +/* +Dump displays the passed parameters to standard out with newlines, customizable +indentation, and additional debug information such as complete types and all +pointer addresses used to indirect to the final value. It provides the +following features over the built-in printing facilities provided by the fmt +package: + + * Pointers are dereferenced and followed + * Circular data structures are detected and handled properly + * Custom Stringer/error interfaces are optionally invoked, including + on unexported types + * Custom types which only implement the Stringer/error interfaces via + a pointer receiver are optionally invoked when passing non-pointer + variables + * Byte arrays and slices are dumped like the hexdump -C command which + includes offsets, byte values in hex, and ASCII output + +The configuration options are controlled by an exported package global, +spew.Config. See ConfigState for options documentation. + +See Fdump if you would prefer dumping to an arbitrary io.Writer or Sdump to +get the formatted result as a string. +*/ +func Dump(a ...interface{}) { + fdump(&Config, os.Stdout, a...) +} diff --git a/vendor/github.com/davecgh/go-spew/spew/format.go b/vendor/github.com/davecgh/go-spew/spew/format.go new file mode 100644 index 000000000..b04edb7d7 --- /dev/null +++ b/vendor/github.com/davecgh/go-spew/spew/format.go @@ -0,0 +1,419 @@ +/* + * Copyright (c) 2013-2016 Dave Collins + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +package spew + +import ( + "bytes" + "fmt" + "reflect" + "strconv" + "strings" +) + +// supportedFlags is a list of all the character flags supported by fmt package. +const supportedFlags = "0-+# " + +// formatState implements the fmt.Formatter interface and contains information +// about the state of a formatting operation. The NewFormatter function can +// be used to get a new Formatter which can be used directly as arguments +// in standard fmt package printing calls. +type formatState struct { + value interface{} + fs fmt.State + depth int + pointers map[uintptr]int + ignoreNextType bool + cs *ConfigState +} + +// buildDefaultFormat recreates the original format string without precision +// and width information to pass in to fmt.Sprintf in the case of an +// unrecognized type. Unless new types are added to the language, this +// function won't ever be called. +func (f *formatState) buildDefaultFormat() (format string) { + buf := bytes.NewBuffer(percentBytes) + + for _, flag := range supportedFlags { + if f.fs.Flag(int(flag)) { + buf.WriteRune(flag) + } + } + + buf.WriteRune('v') + + format = buf.String() + return format +} + +// constructOrigFormat recreates the original format string including precision +// and width information to pass along to the standard fmt package. This allows +// automatic deferral of all format strings this package doesn't support. +func (f *formatState) constructOrigFormat(verb rune) (format string) { + buf := bytes.NewBuffer(percentBytes) + + for _, flag := range supportedFlags { + if f.fs.Flag(int(flag)) { + buf.WriteRune(flag) + } + } + + if width, ok := f.fs.Width(); ok { + buf.WriteString(strconv.Itoa(width)) + } + + if precision, ok := f.fs.Precision(); ok { + buf.Write(precisionBytes) + buf.WriteString(strconv.Itoa(precision)) + } + + buf.WriteRune(verb) + + format = buf.String() + return format +} + +// unpackValue returns values inside of non-nil interfaces when possible and +// ensures that types for values which have been unpacked from an interface +// are displayed when the show types flag is also set. +// This is useful for data types like structs, arrays, slices, and maps which +// can contain varying types packed inside an interface. +func (f *formatState) unpackValue(v reflect.Value) reflect.Value { + if v.Kind() == reflect.Interface { + f.ignoreNextType = false + if !v.IsNil() { + v = v.Elem() + } + } + return v +} + +// formatPtr handles formatting of pointers by indirecting them as necessary. +func (f *formatState) formatPtr(v reflect.Value) { + // Display nil if top level pointer is nil. + showTypes := f.fs.Flag('#') + if v.IsNil() && (!showTypes || f.ignoreNextType) { + f.fs.Write(nilAngleBytes) + return + } + + // Remove pointers at or below the current depth from map used to detect + // circular refs. + for k, depth := range f.pointers { + if depth >= f.depth { + delete(f.pointers, k) + } + } + + // Keep list of all dereferenced pointers to possibly show later. + pointerChain := make([]uintptr, 0) + + // Figure out how many levels of indirection there are by derferencing + // pointers and unpacking interfaces down the chain while detecting circular + // references. + nilFound := false + cycleFound := false + indirects := 0 + ve := v + for ve.Kind() == reflect.Ptr { + if ve.IsNil() { + nilFound = true + break + } + indirects++ + addr := ve.Pointer() + pointerChain = append(pointerChain, addr) + if pd, ok := f.pointers[addr]; ok && pd < f.depth { + cycleFound = true + indirects-- + break + } + f.pointers[addr] = f.depth + + ve = ve.Elem() + if ve.Kind() == reflect.Interface { + if ve.IsNil() { + nilFound = true + break + } + ve = ve.Elem() + } + } + + // Display type or indirection level depending on flags. + if showTypes && !f.ignoreNextType { + f.fs.Write(openParenBytes) + f.fs.Write(bytes.Repeat(asteriskBytes, indirects)) + f.fs.Write([]byte(ve.Type().String())) + f.fs.Write(closeParenBytes) + } else { + if nilFound || cycleFound { + indirects += strings.Count(ve.Type().String(), "*") + } + f.fs.Write(openAngleBytes) + f.fs.Write([]byte(strings.Repeat("*", indirects))) + f.fs.Write(closeAngleBytes) + } + + // Display pointer information depending on flags. + if f.fs.Flag('+') && (len(pointerChain) > 0) { + f.fs.Write(openParenBytes) + for i, addr := range pointerChain { + if i > 0 { + f.fs.Write(pointerChainBytes) + } + printHexPtr(f.fs, addr) + } + f.fs.Write(closeParenBytes) + } + + // Display dereferenced value. + switch { + case nilFound: + f.fs.Write(nilAngleBytes) + + case cycleFound: + f.fs.Write(circularShortBytes) + + default: + f.ignoreNextType = true + f.format(ve) + } +} + +// format is the main workhorse for providing the Formatter interface. It +// uses the passed reflect value to figure out what kind of object we are +// dealing with and formats it appropriately. It is a recursive function, +// however circular data structures are detected and handled properly. +func (f *formatState) format(v reflect.Value) { + // Handle invalid reflect values immediately. + kind := v.Kind() + if kind == reflect.Invalid { + f.fs.Write(invalidAngleBytes) + return + } + + // Handle pointers specially. + if kind == reflect.Ptr { + f.formatPtr(v) + return + } + + // Print type information unless already handled elsewhere. + if !f.ignoreNextType && f.fs.Flag('#') { + f.fs.Write(openParenBytes) + f.fs.Write([]byte(v.Type().String())) + f.fs.Write(closeParenBytes) + } + f.ignoreNextType = false + + // Call Stringer/error interfaces if they exist and the handle methods + // flag is enabled. + if !f.cs.DisableMethods { + if (kind != reflect.Invalid) && (kind != reflect.Interface) { + if handled := handleMethods(f.cs, f.fs, v); handled { + return + } + } + } + + switch kind { + case reflect.Invalid: + // Do nothing. We should never get here since invalid has already + // been handled above. + + case reflect.Bool: + printBool(f.fs, v.Bool()) + + case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Int: + printInt(f.fs, v.Int(), 10) + + case reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uint: + printUint(f.fs, v.Uint(), 10) + + case reflect.Float32: + printFloat(f.fs, v.Float(), 32) + + case reflect.Float64: + printFloat(f.fs, v.Float(), 64) + + case reflect.Complex64: + printComplex(f.fs, v.Complex(), 32) + + case reflect.Complex128: + printComplex(f.fs, v.Complex(), 64) + + case reflect.Slice: + if v.IsNil() { + f.fs.Write(nilAngleBytes) + break + } + fallthrough + + case reflect.Array: + f.fs.Write(openBracketBytes) + f.depth++ + if (f.cs.MaxDepth != 0) && (f.depth > f.cs.MaxDepth) { + f.fs.Write(maxShortBytes) + } else { + numEntries := v.Len() + for i := 0; i < numEntries; i++ { + if i > 0 { + f.fs.Write(spaceBytes) + } + f.ignoreNextType = true + f.format(f.unpackValue(v.Index(i))) + } + } + f.depth-- + f.fs.Write(closeBracketBytes) + + case reflect.String: + f.fs.Write([]byte(v.String())) + + case reflect.Interface: + // The only time we should get here is for nil interfaces due to + // unpackValue calls. + if v.IsNil() { + f.fs.Write(nilAngleBytes) + } + + case reflect.Ptr: + // Do nothing. We should never get here since pointers have already + // been handled above. + + case reflect.Map: + // nil maps should be indicated as different than empty maps + if v.IsNil() { + f.fs.Write(nilAngleBytes) + break + } + + f.fs.Write(openMapBytes) + f.depth++ + if (f.cs.MaxDepth != 0) && (f.depth > f.cs.MaxDepth) { + f.fs.Write(maxShortBytes) + } else { + keys := v.MapKeys() + if f.cs.SortKeys { + sortValues(keys, f.cs) + } + for i, key := range keys { + if i > 0 { + f.fs.Write(spaceBytes) + } + f.ignoreNextType = true + f.format(f.unpackValue(key)) + f.fs.Write(colonBytes) + f.ignoreNextType = true + f.format(f.unpackValue(v.MapIndex(key))) + } + } + f.depth-- + f.fs.Write(closeMapBytes) + + case reflect.Struct: + numFields := v.NumField() + f.fs.Write(openBraceBytes) + f.depth++ + if (f.cs.MaxDepth != 0) && (f.depth > f.cs.MaxDepth) { + f.fs.Write(maxShortBytes) + } else { + vt := v.Type() + for i := 0; i < numFields; i++ { + if i > 0 { + f.fs.Write(spaceBytes) + } + vtf := vt.Field(i) + if f.fs.Flag('+') || f.fs.Flag('#') { + f.fs.Write([]byte(vtf.Name)) + f.fs.Write(colonBytes) + } + f.format(f.unpackValue(v.Field(i))) + } + } + f.depth-- + f.fs.Write(closeBraceBytes) + + case reflect.Uintptr: + printHexPtr(f.fs, uintptr(v.Uint())) + + case reflect.UnsafePointer, reflect.Chan, reflect.Func: + printHexPtr(f.fs, v.Pointer()) + + // There were not any other types at the time this code was written, but + // fall back to letting the default fmt package handle it if any get added. + default: + format := f.buildDefaultFormat() + if v.CanInterface() { + fmt.Fprintf(f.fs, format, v.Interface()) + } else { + fmt.Fprintf(f.fs, format, v.String()) + } + } +} + +// Format satisfies the fmt.Formatter interface. See NewFormatter for usage +// details. +func (f *formatState) Format(fs fmt.State, verb rune) { + f.fs = fs + + // Use standard formatting for verbs that are not v. + if verb != 'v' { + format := f.constructOrigFormat(verb) + fmt.Fprintf(fs, format, f.value) + return + } + + if f.value == nil { + if fs.Flag('#') { + fs.Write(interfaceBytes) + } + fs.Write(nilAngleBytes) + return + } + + f.format(reflect.ValueOf(f.value)) +} + +// newFormatter is a helper function to consolidate the logic from the various +// public methods which take varying config states. +func newFormatter(cs *ConfigState, v interface{}) fmt.Formatter { + fs := &formatState{value: v, cs: cs} + fs.pointers = make(map[uintptr]int) + return fs +} + +/* +NewFormatter returns a custom formatter that satisfies the fmt.Formatter +interface. As a result, it integrates cleanly with standard fmt package +printing functions. The formatter is useful for inline printing of smaller data +types similar to the standard %v format specifier. + +The custom formatter only responds to the %v (most compact), %+v (adds pointer +addresses), %#v (adds types), or %#+v (adds types and pointer addresses) verb +combinations. Any other verbs such as %x and %q will be sent to the the +standard fmt package for formatting. In addition, the custom formatter ignores +the width and precision arguments (however they will still work on the format +specifiers not handled by the custom formatter). + +Typically this function shouldn't be called directly. It is much easier to make +use of the custom formatter by calling one of the convenience functions such as +Printf, Println, or Fprintf. +*/ +func NewFormatter(v interface{}) fmt.Formatter { + return newFormatter(&Config, v) +} diff --git a/vendor/github.com/davecgh/go-spew/spew/spew.go b/vendor/github.com/davecgh/go-spew/spew/spew.go new file mode 100644 index 000000000..32c0e3388 --- /dev/null +++ b/vendor/github.com/davecgh/go-spew/spew/spew.go @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2013-2016 Dave Collins + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +package spew + +import ( + "fmt" + "io" +) + +// Errorf is a wrapper for fmt.Errorf that treats each argument as if it were +// passed with a default Formatter interface returned by NewFormatter. It +// returns the formatted string as a value that satisfies error. See +// NewFormatter for formatting details. +// +// This function is shorthand for the following syntax: +// +// fmt.Errorf(format, spew.NewFormatter(a), spew.NewFormatter(b)) +func Errorf(format string, a ...interface{}) (err error) { + return fmt.Errorf(format, convertArgs(a)...) +} + +// Fprint is a wrapper for fmt.Fprint that treats each argument as if it were +// passed with a default Formatter interface returned by NewFormatter. It +// returns the number of bytes written and any write error encountered. See +// NewFormatter for formatting details. +// +// This function is shorthand for the following syntax: +// +// fmt.Fprint(w, spew.NewFormatter(a), spew.NewFormatter(b)) +func Fprint(w io.Writer, a ...interface{}) (n int, err error) { + return fmt.Fprint(w, convertArgs(a)...) +} + +// Fprintf is a wrapper for fmt.Fprintf that treats each argument as if it were +// passed with a default Formatter interface returned by NewFormatter. It +// returns the number of bytes written and any write error encountered. See +// NewFormatter for formatting details. +// +// This function is shorthand for the following syntax: +// +// fmt.Fprintf(w, format, spew.NewFormatter(a), spew.NewFormatter(b)) +func Fprintf(w io.Writer, format string, a ...interface{}) (n int, err error) { + return fmt.Fprintf(w, format, convertArgs(a)...) +} + +// Fprintln is a wrapper for fmt.Fprintln that treats each argument as if it +// passed with a default Formatter interface returned by NewFormatter. See +// NewFormatter for formatting details. +// +// This function is shorthand for the following syntax: +// +// fmt.Fprintln(w, spew.NewFormatter(a), spew.NewFormatter(b)) +func Fprintln(w io.Writer, a ...interface{}) (n int, err error) { + return fmt.Fprintln(w, convertArgs(a)...) +} + +// Print is a wrapper for fmt.Print that treats each argument as if it were +// passed with a default Formatter interface returned by NewFormatter. It +// returns the number of bytes written and any write error encountered. See +// NewFormatter for formatting details. +// +// This function is shorthand for the following syntax: +// +// fmt.Print(spew.NewFormatter(a), spew.NewFormatter(b)) +func Print(a ...interface{}) (n int, err error) { + return fmt.Print(convertArgs(a)...) +} + +// Printf is a wrapper for fmt.Printf that treats each argument as if it were +// passed with a default Formatter interface returned by NewFormatter. It +// returns the number of bytes written and any write error encountered. See +// NewFormatter for formatting details. +// +// This function is shorthand for the following syntax: +// +// fmt.Printf(format, spew.NewFormatter(a), spew.NewFormatter(b)) +func Printf(format string, a ...interface{}) (n int, err error) { + return fmt.Printf(format, convertArgs(a)...) +} + +// Println is a wrapper for fmt.Println that treats each argument as if it were +// passed with a default Formatter interface returned by NewFormatter. It +// returns the number of bytes written and any write error encountered. See +// NewFormatter for formatting details. +// +// This function is shorthand for the following syntax: +// +// fmt.Println(spew.NewFormatter(a), spew.NewFormatter(b)) +func Println(a ...interface{}) (n int, err error) { + return fmt.Println(convertArgs(a)...) +} + +// Sprint is a wrapper for fmt.Sprint that treats each argument as if it were +// passed with a default Formatter interface returned by NewFormatter. It +// returns the resulting string. See NewFormatter for formatting details. +// +// This function is shorthand for the following syntax: +// +// fmt.Sprint(spew.NewFormatter(a), spew.NewFormatter(b)) +func Sprint(a ...interface{}) string { + return fmt.Sprint(convertArgs(a)...) +} + +// Sprintf is a wrapper for fmt.Sprintf that treats each argument as if it were +// passed with a default Formatter interface returned by NewFormatter. It +// returns the resulting string. See NewFormatter for formatting details. +// +// This function is shorthand for the following syntax: +// +// fmt.Sprintf(format, spew.NewFormatter(a), spew.NewFormatter(b)) +func Sprintf(format string, a ...interface{}) string { + return fmt.Sprintf(format, convertArgs(a)...) +} + +// Sprintln is a wrapper for fmt.Sprintln that treats each argument as if it +// were passed with a default Formatter interface returned by NewFormatter. It +// returns the resulting string. See NewFormatter for formatting details. +// +// This function is shorthand for the following syntax: +// +// fmt.Sprintln(spew.NewFormatter(a), spew.NewFormatter(b)) +func Sprintln(a ...interface{}) string { + return fmt.Sprintln(convertArgs(a)...) +} + +// convertArgs accepts a slice of arguments and returns a slice of the same +// length with each argument converted to a default spew Formatter interface. +func convertArgs(args []interface{}) (formatters []interface{}) { + formatters = make([]interface{}, len(args)) + for index, arg := range args { + formatters[index] = NewFormatter(arg) + } + return formatters +} diff --git a/vendor/github.com/klauspost/compress/.goreleaser.yml b/vendor/github.com/klauspost/compress/.goreleaser.yml index a22953805..4528059ca 100644 --- a/vendor/github.com/klauspost/compress/.goreleaser.yml +++ b/vendor/github.com/klauspost/compress/.goreleaser.yml @@ -1,5 +1,5 @@ -# This is an example goreleaser.yaml file with some sane defaults. -# Make sure to check the documentation at http://goreleaser.com +version: 2 + before: hooks: - ./gen.sh @@ -99,7 +99,7 @@ archives: checksum: name_template: 'checksums.txt' snapshot: - name_template: "{{ .Tag }}-next" + version_template: "{{ .Tag }}-next" changelog: sort: asc filters: diff --git a/vendor/github.com/klauspost/compress/README.md b/vendor/github.com/klauspost/compress/README.md index 05c7359e4..de264c85a 100644 --- a/vendor/github.com/klauspost/compress/README.md +++ b/vendor/github.com/klauspost/compress/README.md @@ -16,6 +16,27 @@ This package provides various compression algorithms. # changelog +* Sep 23rd, 2024 - [1.17.10](https://github.com/klauspost/compress/releases/tag/v1.17.10) + * gzhttp: Add TransportAlwaysDecompress option. https://github.com/klauspost/compress/pull/978 + * gzhttp: Add supported decompress request body by @mirecl in https://github.com/klauspost/compress/pull/1002 + * s2: Add EncodeBuffer buffer recycling callback https://github.com/klauspost/compress/pull/982 + * zstd: Improve memory usage on small streaming encodes https://github.com/klauspost/compress/pull/1007 + * flate: read data written with partial flush by @vajexal in https://github.com/klauspost/compress/pull/996 + +* Jun 12th, 2024 - [1.17.9](https://github.com/klauspost/compress/releases/tag/v1.17.9) + * s2: Reduce ReadFrom temporary allocations https://github.com/klauspost/compress/pull/949 + * flate, zstd: Shave some bytes off amd64 matchLen by @greatroar in https://github.com/klauspost/compress/pull/963 + * Upgrade zip/zlib to 1.22.4 upstream https://github.com/klauspost/compress/pull/970 https://github.com/klauspost/compress/pull/971 + * zstd: BuildDict fails with RLE table https://github.com/klauspost/compress/pull/951 + +* Apr 9th, 2024 - [1.17.8](https://github.com/klauspost/compress/releases/tag/v1.17.8) + * zstd: Reject blocks where reserved values are not 0 https://github.com/klauspost/compress/pull/885 + * zstd: Add RLE detection+encoding https://github.com/klauspost/compress/pull/938 + +* Feb 21st, 2024 - [1.17.7](https://github.com/klauspost/compress/releases/tag/v1.17.7) + * s2: Add AsyncFlush method: Complete the block without flushing by @Jille in https://github.com/klauspost/compress/pull/927 + * s2: Fix literal+repeat exceeds dst crash https://github.com/klauspost/compress/pull/930 + * Feb 5th, 2024 - [1.17.6](https://github.com/klauspost/compress/releases/tag/v1.17.6) * zstd: Fix incorrect repeat coding in best mode https://github.com/klauspost/compress/pull/923 * s2: Fix DecodeConcurrent deadlock on errors https://github.com/klauspost/compress/pull/925 @@ -81,7 +102,7 @@ https://github.com/klauspost/compress/pull/919 https://github.com/klauspost/comp * zstd: Various minor improvements by @greatroar in https://github.com/klauspost/compress/pull/788 https://github.com/klauspost/compress/pull/794 https://github.com/klauspost/compress/pull/795 * s2: Fix huge block overflow https://github.com/klauspost/compress/pull/779 * s2: Allow CustomEncoder fallback https://github.com/klauspost/compress/pull/780 - * gzhttp: Suppport ResponseWriter Unwrap() in gzhttp handler by @jgimenez in https://github.com/klauspost/compress/pull/799 + * gzhttp: Support ResponseWriter Unwrap() in gzhttp handler by @jgimenez in https://github.com/klauspost/compress/pull/799 * Mar 13, 2023 - [v1.16.1](https://github.com/klauspost/compress/releases/tag/v1.16.1) * zstd: Speed up + improve best encoder by @greatroar in https://github.com/klauspost/compress/pull/776 @@ -136,7 +157,7 @@ https://github.com/klauspost/compress/pull/919 https://github.com/klauspost/comp * zstd: Add [WithDecodeAllCapLimit](https://pkg.go.dev/github.com/klauspost/compress@v1.15.10/zstd#WithDecodeAllCapLimit) https://github.com/klauspost/compress/pull/649 * Add Go 1.19 - deprecate Go 1.16 https://github.com/klauspost/compress/pull/651 * flate: Improve level 5+6 compression https://github.com/klauspost/compress/pull/656 - * zstd: Improve "better" compresssion https://github.com/klauspost/compress/pull/657 + * zstd: Improve "better" compression https://github.com/klauspost/compress/pull/657 * s2: Improve "best" compression https://github.com/klauspost/compress/pull/658 * s2: Improve "better" compression. https://github.com/klauspost/compress/pull/635 * s2: Slightly faster non-assembly decompression https://github.com/klauspost/compress/pull/646 @@ -339,7 +360,7 @@ While the release has been extensively tested, it is recommended to testing when * s2: Fix binaries. * Feb 25, 2021 (v1.11.8) - * s2: Fixed occational out-of-bounds write on amd64. Upgrade recommended. + * s2: Fixed occasional out-of-bounds write on amd64. Upgrade recommended. * s2: Add AMD64 assembly for better mode. 25-50% faster. [#315](https://github.com/klauspost/compress/pull/315) * s2: Less upfront decoder allocation. [#322](https://github.com/klauspost/compress/pull/322) * zstd: Faster "compression" of incompressible data. [#314](https://github.com/klauspost/compress/pull/314) @@ -518,7 +539,7 @@ While the release has been extensively tested, it is recommended to testing when * Feb 19, 2016: Faster bit writer, level -2 is 15% faster, level 1 is 4% faster. * Feb 19, 2016: Handle small payloads faster in level 1-3. * Feb 19, 2016: Added faster level 2 + 3 compression modes. -* Feb 19, 2016: [Rebalanced compression levels](https://blog.klauspost.com/rebalancing-deflate-compression-levels/), so there is a more even progresssion in terms of compression. New default level is 5. +* Feb 19, 2016: [Rebalanced compression levels](https://blog.klauspost.com/rebalancing-deflate-compression-levels/), so there is a more even progression in terms of compression. New default level is 5. * Feb 14, 2016: Snappy: Merge upstream changes. * Feb 14, 2016: Snappy: Fix aggressive skipping. * Feb 14, 2016: Snappy: Update benchmark. diff --git a/vendor/github.com/klauspost/compress/flate/deflate.go b/vendor/github.com/klauspost/compress/flate/deflate.go index 66d1657d2..af53fb860 100644 --- a/vendor/github.com/klauspost/compress/flate/deflate.go +++ b/vendor/github.com/klauspost/compress/flate/deflate.go @@ -861,7 +861,7 @@ func (d *compressor) reset(w io.Writer) { } switch d.compressionLevel.chain { case 0: - // level was NoCompression or ConstantCompresssion. + // level was NoCompression or ConstantCompression. d.windowEnd = 0 default: s := d.state diff --git a/vendor/github.com/klauspost/compress/flate/inflate.go b/vendor/github.com/klauspost/compress/flate/inflate.go index 2f410d64f..0d7b437f1 100644 --- a/vendor/github.com/klauspost/compress/flate/inflate.go +++ b/vendor/github.com/klauspost/compress/flate/inflate.go @@ -298,6 +298,14 @@ const ( huffmanGenericReader ) +// flushMode tells decompressor when to return data +type flushMode uint8 + +const ( + syncFlush flushMode = iota // return data after sync flush block + partialFlush // return data after each block +) + // Decompress state. type decompressor struct { // Input source. @@ -332,6 +340,8 @@ type decompressor struct { nb uint final bool + + flushMode flushMode } func (f *decompressor) nextBlock() { @@ -618,7 +628,10 @@ func (f *decompressor) dataBlock() { } if n == 0 { - f.toRead = f.dict.readFlush() + if f.flushMode == syncFlush { + f.toRead = f.dict.readFlush() + } + f.finishBlock() return } @@ -657,8 +670,12 @@ func (f *decompressor) finishBlock() { if f.dict.availRead() > 0 { f.toRead = f.dict.readFlush() } + f.err = io.EOF + } else if f.flushMode == partialFlush && f.dict.availRead() > 0 { + f.toRead = f.dict.readFlush() } + f.step = nextBlock } @@ -789,15 +806,25 @@ func (f *decompressor) Reset(r io.Reader, dict []byte) error { return nil } -// NewReader returns a new ReadCloser that can be used -// to read the uncompressed version of r. -// If r does not also implement io.ByteReader, -// the decompressor may read more data than necessary from r. -// It is the caller's responsibility to call Close on the ReadCloser -// when finished reading. -// -// The ReadCloser returned by NewReader also implements Resetter. -func NewReader(r io.Reader) io.ReadCloser { +type ReaderOpt func(*decompressor) + +// WithPartialBlock tells decompressor to return after each block, +// so it can read data written with partial flush +func WithPartialBlock() ReaderOpt { + return func(f *decompressor) { + f.flushMode = partialFlush + } +} + +// WithDict initializes the reader with a preset dictionary +func WithDict(dict []byte) ReaderOpt { + return func(f *decompressor) { + f.dict.init(maxMatchOffset, dict) + } +} + +// NewReaderOpts returns new reader with provided options +func NewReaderOpts(r io.Reader, opts ...ReaderOpt) io.ReadCloser { fixedHuffmanDecoderInit() var f decompressor @@ -806,9 +833,26 @@ func NewReader(r io.Reader) io.ReadCloser { f.codebits = new([numCodes]int) f.step = nextBlock f.dict.init(maxMatchOffset, nil) + + for _, opt := range opts { + opt(&f) + } + return &f } +// NewReader returns a new ReadCloser that can be used +// to read the uncompressed version of r. +// If r does not also implement io.ByteReader, +// the decompressor may read more data than necessary from r. +// It is the caller's responsibility to call Close on the ReadCloser +// when finished reading. +// +// The ReadCloser returned by NewReader also implements Resetter. +func NewReader(r io.Reader) io.ReadCloser { + return NewReaderOpts(r) +} + // NewReaderDict is like NewReader but initializes the reader // with a preset dictionary. The returned Reader behaves as if // the uncompressed data stream started with the given dictionary, @@ -817,13 +861,5 @@ func NewReader(r io.Reader) io.ReadCloser { // // The ReadCloser returned by NewReader also implements Resetter. func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser { - fixedHuffmanDecoderInit() - - var f decompressor - f.r = makeReader(r) - f.bits = new([maxNumLit + maxNumDist]int) - f.codebits = new([numCodes]int) - f.step = nextBlock - f.dict.init(maxMatchOffset, dict) - return &f + return NewReaderOpts(r, WithDict(dict)) } diff --git a/vendor/github.com/klauspost/compress/flate/matchlen_amd64.s b/vendor/github.com/klauspost/compress/flate/matchlen_amd64.s index 9a7655c0f..0782b86e3 100644 --- a/vendor/github.com/klauspost/compress/flate/matchlen_amd64.s +++ b/vendor/github.com/klauspost/compress/flate/matchlen_amd64.s @@ -5,7 +5,6 @@ #include "textflag.h" // func matchLen(a []byte, b []byte) int -// Requires: BMI TEXT ·matchLen(SB), NOSPLIT, $0-56 MOVQ a_base+0(FP), AX MOVQ b_base+24(FP), CX @@ -17,17 +16,16 @@ TEXT ·matchLen(SB), NOSPLIT, $0-56 JB matchlen_match4_standalone matchlen_loopback_standalone: - MOVQ (AX)(SI*1), BX - XORQ (CX)(SI*1), BX - TESTQ BX, BX - JZ matchlen_loop_standalone + MOVQ (AX)(SI*1), BX + XORQ (CX)(SI*1), BX + JZ matchlen_loop_standalone #ifdef GOAMD64_v3 TZCNTQ BX, BX #else BSFQ BX, BX #endif - SARQ $0x03, BX + SHRL $0x03, BX LEAL (SI)(BX*1), SI JMP gen_match_len_end diff --git a/vendor/github.com/klauspost/compress/fse/decompress.go b/vendor/github.com/klauspost/compress/fse/decompress.go index cc05d0f7e..0c7dd4ffe 100644 --- a/vendor/github.com/klauspost/compress/fse/decompress.go +++ b/vendor/github.com/klauspost/compress/fse/decompress.go @@ -15,7 +15,7 @@ const ( // It is possible, but by no way guaranteed that corrupt data will // return an error. // It is up to the caller to verify integrity of the returned data. -// Use a predefined Scrach to set maximum acceptable output size. +// Use a predefined Scratch to set maximum acceptable output size. func Decompress(b []byte, s *Scratch) ([]byte, error) { s, err := s.prepare(b) if err != nil { diff --git a/vendor/github.com/klauspost/compress/huff0/decompress.go b/vendor/github.com/klauspost/compress/huff0/decompress.go index 54bd08b25..0f56b02d7 100644 --- a/vendor/github.com/klauspost/compress/huff0/decompress.go +++ b/vendor/github.com/klauspost/compress/huff0/decompress.go @@ -1136,7 +1136,7 @@ func (s *Scratch) matches(ct cTable, w io.Writer) { errs++ } if errs > 0 { - fmt.Fprintf(w, "%d errros in base, stopping\n", errs) + fmt.Fprintf(w, "%d errors in base, stopping\n", errs) continue } // Ensure that all combinations are covered. @@ -1152,7 +1152,7 @@ func (s *Scratch) matches(ct cTable, w io.Writer) { errs++ } if errs > 20 { - fmt.Fprintf(w, "%d errros, stopping\n", errs) + fmt.Fprintf(w, "%d errors, stopping\n", errs) break } } diff --git a/vendor/github.com/klauspost/compress/s2/decode_arm64.s b/vendor/github.com/klauspost/compress/s2/decode_arm64.s index 4b63d5086..78e463f34 100644 --- a/vendor/github.com/klauspost/compress/s2/decode_arm64.s +++ b/vendor/github.com/klauspost/compress/s2/decode_arm64.s @@ -60,7 +60,7 @@ // // The d variable is implicitly R_DST - R_DBASE, and len(dst)-d is R_DEND - R_DST. // The s variable is implicitly R_SRC - R_SBASE, and len(src)-s is R_SEND - R_SRC. -TEXT ·s2Decode(SB), NOSPLIT, $56-64 +TEXT ·s2Decode(SB), NOSPLIT, $56-56 // Initialize R_SRC, R_DST and R_DBASE-R_SEND. MOVD dst_base+0(FP), R_DBASE MOVD dst_len+8(FP), R_DLEN diff --git a/vendor/github.com/klauspost/compress/s2/encode.go b/vendor/github.com/klauspost/compress/s2/encode.go index 0c9088adf..20b802270 100644 --- a/vendor/github.com/klauspost/compress/s2/encode.go +++ b/vendor/github.com/klauspost/compress/s2/encode.go @@ -9,6 +9,9 @@ import ( "encoding/binary" "math" "math/bits" + "sync" + + "github.com/klauspost/compress/internal/race" ) // Encode returns the encoded form of src. The returned slice may be a sub- @@ -52,6 +55,8 @@ func Encode(dst, src []byte) []byte { return dst[:d] } +var estblockPool [2]sync.Pool + // EstimateBlockSize will perform a very fast compression // without outputting the result and return the compressed output size. // The function returns -1 if no improvement could be achieved. @@ -61,9 +66,25 @@ func EstimateBlockSize(src []byte) (d int) { return -1 } if len(src) <= 1024 { - d = calcBlockSizeSmall(src) + const sz, pool = 2048, 0 + tmp, ok := estblockPool[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer estblockPool[pool].Put(tmp) + + d = calcBlockSizeSmall(src, tmp) } else { - d = calcBlockSize(src) + const sz, pool = 32768, 1 + tmp, ok := estblockPool[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer estblockPool[pool].Put(tmp) + + d = calcBlockSize(src, tmp) } if d == 0 { diff --git a/vendor/github.com/klauspost/compress/s2/encode_amd64.go b/vendor/github.com/klauspost/compress/s2/encode_amd64.go index 4f45206a4..7aadd255f 100644 --- a/vendor/github.com/klauspost/compress/s2/encode_amd64.go +++ b/vendor/github.com/klauspost/compress/s2/encode_amd64.go @@ -3,10 +3,16 @@ package s2 -import "github.com/klauspost/compress/internal/race" +import ( + "sync" + + "github.com/klauspost/compress/internal/race" +) const hasAmd64Asm = true +var encPools [4]sync.Pool + // encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It // assumes that the varint-encoded length of the decompressed bytes has already // been written. @@ -29,23 +35,60 @@ func encodeBlock(dst, src []byte) (d int) { ) if len(src) >= 4<<20 { - return encodeBlockAsm(dst, src) + const sz, pool = 65536, 0 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeBlockAsm(dst, src, tmp) } if len(src) >= limit12B { - return encodeBlockAsm4MB(dst, src) + const sz, pool = 65536, 0 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeBlockAsm4MB(dst, src, tmp) } if len(src) >= limit10B { - return encodeBlockAsm12B(dst, src) + const sz, pool = 16384, 1 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeBlockAsm12B(dst, src, tmp) } if len(src) >= limit8B { - return encodeBlockAsm10B(dst, src) + const sz, pool = 4096, 2 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeBlockAsm10B(dst, src, tmp) } if len(src) < minNonLiteralBlockSize { return 0 } - return encodeBlockAsm8B(dst, src) + const sz, pool = 1024, 3 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeBlockAsm8B(dst, src, tmp) } +var encBetterPools [5]sync.Pool + // encodeBlockBetter encodes a non-empty src to a guaranteed-large-enough dst. It // assumes that the varint-encoded length of the decompressed bytes has already // been written. @@ -68,21 +111,59 @@ func encodeBlockBetter(dst, src []byte) (d int) { ) if len(src) > 4<<20 { - return encodeBetterBlockAsm(dst, src) + const sz, pool = 589824, 0 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + return encodeBetterBlockAsm(dst, src, tmp) } if len(src) >= limit12B { - return encodeBetterBlockAsm4MB(dst, src) + const sz, pool = 589824, 0 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + + return encodeBetterBlockAsm4MB(dst, src, tmp) } if len(src) >= limit10B { - return encodeBetterBlockAsm12B(dst, src) + const sz, pool = 81920, 0 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + + return encodeBetterBlockAsm12B(dst, src, tmp) } if len(src) >= limit8B { - return encodeBetterBlockAsm10B(dst, src) + const sz, pool = 20480, 1 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + return encodeBetterBlockAsm10B(dst, src, tmp) } if len(src) < minNonLiteralBlockSize { return 0 } - return encodeBetterBlockAsm8B(dst, src) + + const sz, pool = 5120, 2 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + return encodeBetterBlockAsm8B(dst, src, tmp) } // encodeBlockSnappy encodes a non-empty src to a guaranteed-large-enough dst. It @@ -105,22 +186,57 @@ func encodeBlockSnappy(dst, src []byte) (d int) { // Use 8 bit table when less than... limit8B = 512 ) - if len(src) >= 64<<10 { - return encodeSnappyBlockAsm(dst, src) + if len(src) > 65536 { + const sz, pool = 65536, 0 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeSnappyBlockAsm(dst, src, tmp) } if len(src) >= limit12B { - return encodeSnappyBlockAsm64K(dst, src) + const sz, pool = 65536, 0 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeSnappyBlockAsm64K(dst, src, tmp) } if len(src) >= limit10B { - return encodeSnappyBlockAsm12B(dst, src) + const sz, pool = 16384, 1 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeSnappyBlockAsm12B(dst, src, tmp) } if len(src) >= limit8B { - return encodeSnappyBlockAsm10B(dst, src) + const sz, pool = 4096, 2 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeSnappyBlockAsm10B(dst, src, tmp) } if len(src) < minNonLiteralBlockSize { return 0 } - return encodeSnappyBlockAsm8B(dst, src) + const sz, pool = 1024, 3 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeSnappyBlockAsm8B(dst, src, tmp) } // encodeBlockSnappy encodes a non-empty src to a guaranteed-large-enough dst. It @@ -143,20 +259,59 @@ func encodeBlockBetterSnappy(dst, src []byte) (d int) { // Use 8 bit table when less than... limit8B = 512 ) - if len(src) >= 64<<10 { - return encodeSnappyBetterBlockAsm(dst, src) + if len(src) > 65536 { + const sz, pool = 589824, 0 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + return encodeSnappyBetterBlockAsm(dst, src, tmp) } + if len(src) >= limit12B { - return encodeSnappyBetterBlockAsm64K(dst, src) + const sz, pool = 294912, 4 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + + return encodeSnappyBetterBlockAsm64K(dst, src, tmp) } if len(src) >= limit10B { - return encodeSnappyBetterBlockAsm12B(dst, src) + const sz, pool = 81920, 0 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + + return encodeSnappyBetterBlockAsm12B(dst, src, tmp) } if len(src) >= limit8B { - return encodeSnappyBetterBlockAsm10B(dst, src) + const sz, pool = 20480, 1 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + return encodeSnappyBetterBlockAsm10B(dst, src, tmp) } if len(src) < minNonLiteralBlockSize { return 0 } - return encodeSnappyBetterBlockAsm8B(dst, src) + + const sz, pool = 5120, 2 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + return encodeSnappyBetterBlockAsm8B(dst, src, tmp) } diff --git a/vendor/github.com/klauspost/compress/s2/encode_go.go b/vendor/github.com/klauspost/compress/s2/encode_go.go index 6b393c34d..dd1c973ca 100644 --- a/vendor/github.com/klauspost/compress/s2/encode_go.go +++ b/vendor/github.com/klauspost/compress/s2/encode_go.go @@ -317,7 +317,7 @@ func matchLen(a []byte, b []byte) int { } // input must be > inputMargin -func calcBlockSize(src []byte) (d int) { +func calcBlockSize(src []byte, _ *[32768]byte) (d int) { // Initialize the hash table. const ( tableBits = 13 @@ -503,7 +503,7 @@ emitRemainder: } // length must be > inputMargin. -func calcBlockSizeSmall(src []byte) (d int) { +func calcBlockSizeSmall(src []byte, _ *[2048]byte) (d int) { // Initialize the hash table. const ( tableBits = 9 diff --git a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go index 297e41501..f43aa8154 100644 --- a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go +++ b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go @@ -11,154 +11,154 @@ func _dummy_() // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBlockAsm(dst []byte, src []byte) int +func encodeBlockAsm(dst []byte, src []byte, tmp *[65536]byte) int // encodeBlockAsm4MB encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4194304 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBlockAsm4MB(dst []byte, src []byte) int +func encodeBlockAsm4MB(dst []byte, src []byte, tmp *[65536]byte) int // encodeBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 16383 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBlockAsm12B(dst []byte, src []byte) int +func encodeBlockAsm12B(dst []byte, src []byte, tmp *[16384]byte) int // encodeBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4095 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBlockAsm10B(dst []byte, src []byte) int +func encodeBlockAsm10B(dst []byte, src []byte, tmp *[4096]byte) int // encodeBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 511 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBlockAsm8B(dst []byte, src []byte) int +func encodeBlockAsm8B(dst []byte, src []byte, tmp *[1024]byte) int // encodeBetterBlockAsm encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4294967295 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBetterBlockAsm(dst []byte, src []byte) int +func encodeBetterBlockAsm(dst []byte, src []byte, tmp *[589824]byte) int // encodeBetterBlockAsm4MB encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4194304 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBetterBlockAsm4MB(dst []byte, src []byte) int +func encodeBetterBlockAsm4MB(dst []byte, src []byte, tmp *[589824]byte) int // encodeBetterBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 16383 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBetterBlockAsm12B(dst []byte, src []byte) int +func encodeBetterBlockAsm12B(dst []byte, src []byte, tmp *[81920]byte) int // encodeBetterBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4095 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBetterBlockAsm10B(dst []byte, src []byte) int +func encodeBetterBlockAsm10B(dst []byte, src []byte, tmp *[20480]byte) int // encodeBetterBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 511 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBetterBlockAsm8B(dst []byte, src []byte) int +func encodeBetterBlockAsm8B(dst []byte, src []byte, tmp *[5120]byte) int // encodeSnappyBlockAsm encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4294967295 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBlockAsm(dst []byte, src []byte) int +func encodeSnappyBlockAsm(dst []byte, src []byte, tmp *[65536]byte) int // encodeSnappyBlockAsm64K encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 65535 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBlockAsm64K(dst []byte, src []byte) int +func encodeSnappyBlockAsm64K(dst []byte, src []byte, tmp *[65536]byte) int // encodeSnappyBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 16383 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBlockAsm12B(dst []byte, src []byte) int +func encodeSnappyBlockAsm12B(dst []byte, src []byte, tmp *[16384]byte) int // encodeSnappyBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4095 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBlockAsm10B(dst []byte, src []byte) int +func encodeSnappyBlockAsm10B(dst []byte, src []byte, tmp *[4096]byte) int // encodeSnappyBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 511 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBlockAsm8B(dst []byte, src []byte) int +func encodeSnappyBlockAsm8B(dst []byte, src []byte, tmp *[1024]byte) int // encodeSnappyBetterBlockAsm encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4294967295 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBetterBlockAsm(dst []byte, src []byte) int +func encodeSnappyBetterBlockAsm(dst []byte, src []byte, tmp *[589824]byte) int // encodeSnappyBetterBlockAsm64K encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 65535 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte) int +func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte, tmp *[294912]byte) int // encodeSnappyBetterBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 16383 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte) int +func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte, tmp *[81920]byte) int // encodeSnappyBetterBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4095 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte) int +func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte, tmp *[20480]byte) int // encodeSnappyBetterBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 511 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte) int +func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte, tmp *[5120]byte) int // calcBlockSize encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4294967295 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func calcBlockSize(src []byte) int +func calcBlockSize(src []byte, tmp *[32768]byte) int // calcBlockSizeSmall encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 1024 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func calcBlockSizeSmall(src []byte) int +func calcBlockSizeSmall(src []byte, tmp *[2048]byte) int // emitLiteral writes a literal chunk and returns the number of bytes written. // diff --git a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s index 2ff5b3340..df9be687b 100644 --- a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s +++ b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s @@ -13,1270 +13,1271 @@ TEXT ·_dummy_(SB), $0 #endif RET -// func encodeBlockAsm(dst []byte, src []byte) int +// func encodeBlockAsm(dst []byte, src []byte, tmp *[65536]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBlockAsm(SB), $65560-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000200, CX - LEAQ 24(SP), DX +TEXT ·encodeBlockAsm(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000200, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBlockAsm: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBlockAsm MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeBlockAsm: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x06, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x06, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBlockAsm - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x10, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x10, R11 + IMULQ R9, R11 + SHRQ $0x32, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeBlockAsm - LEAL 1(CX), SI - MOVL 12(SP), DI - MOVL SI, BX - SUBL 16(SP), BX + LEAL 1(DX), DI + MOVL 12(SP), R8 + MOVL DI, SI + SUBL 16(SP), SI JZ repeat_extend_back_end_encodeBlockAsm repeat_extend_back_loop_encodeBlockAsm: - CMPL SI, DI + CMPL DI, R8 JBE repeat_extend_back_end_encodeBlockAsm - MOVB -1(DX)(BX*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(SI*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeBlockAsm - LEAL -1(SI), SI - DECL BX + LEAL -1(DI), DI + DECL SI JNZ repeat_extend_back_loop_encodeBlockAsm repeat_extend_back_end_encodeBlockAsm: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 5(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 5(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeBlockAsm: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeBlockAsm - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeBlockAsm - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeBlockAsm - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_repeat_emit_encodeBlockAsm - CMPL BX, $0x01000000 + CMPL SI, $0x01000000 JB four_bytes_repeat_emit_encodeBlockAsm - MOVB $0xfc, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP memmove_long_repeat_emit_encodeBlockAsm four_bytes_repeat_emit_encodeBlockAsm: - MOVL BX, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX + MOVL SI, R11 + SHRL $0x10, R11 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R11, 3(CX) + ADDQ $0x04, CX JMP memmove_long_repeat_emit_encodeBlockAsm three_bytes_repeat_emit_encodeBlockAsm: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeBlockAsm two_bytes_repeat_emit_encodeBlockAsm: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeBlockAsm JMP memmove_long_repeat_emit_encodeBlockAsm one_byte_repeat_emit_encodeBlockAsm: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_repeat_emit_encodeBlockAsm: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeBlockAsm memmove_long_repeat_emit_encodeBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R12 + SHRQ $0x05, R12 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R13 + SUBQ R11, R13 + DECQ R12 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(R9)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(R10)(R13*1), R11 + LEAQ -32(CX)(R13*1), R14 emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) + ADDQ $0x20, R14 + ADDQ $0x20, R11 ADDQ $0x20, R13 - ADDQ $0x20, R10 - ADDQ $0x20, R12 - DECQ R11 + DECQ R12 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(R9)(R12*1), X4 - MOVOU -16(R9)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R8, R12 + MOVOU -32(R10)(R13*1), X4 + MOVOU -16(R10)(R13*1), X5 + MOVOA X4, -32(CX)(R13*1) + MOVOA X5, -16(CX)(R13*1) + ADDQ $0x20, R13 + CMPQ R9, R13 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeBlockAsm: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R9 + SUBL DX, R9 + LEAQ (BX)(DX*1), R10 + LEAQ (BX)(SI*1), SI // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_repeat_extend_encodeBlockAsm: - CMPL R8, $0x10 + CMPL R9, $0x10 JB matchlen_match8_repeat_extend_encodeBlockAsm - MOVQ (R9)(R11*1), R10 - MOVQ 8(R9)(R11*1), R12 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + MOVQ 8(R10)(R12*1), R13 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm - XORQ 8(BX)(R11*1), R12 + XORQ 8(SI)(R12*1), R13 JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm - LEAL -16(R8), R8 - LEAL 16(R11), R11 + LEAL -16(R9), R9 + LEAL 16(R12), R12 JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm matchlen_bsf_16repeat_extend_encodeBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm matchlen_match8_repeat_extend_encodeBlockAsm: - CMPL R8, $0x08 + CMPL R9, $0x08 JB matchlen_match4_repeat_extend_encodeBlockAsm - MOVQ (R9)(R11*1), R10 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm - LEAL -8(R8), R8 - LEAL 8(R11), R11 + LEAL -8(R9), R9 + LEAL 8(R12), R12 JMP matchlen_match4_repeat_extend_encodeBlockAsm matchlen_bsf_8_repeat_extend_encodeBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm matchlen_match4_repeat_extend_encodeBlockAsm: - CMPL R8, $0x04 + CMPL R9, $0x04 JB matchlen_match2_repeat_extend_encodeBlockAsm - MOVL (R9)(R11*1), R10 - CMPL (BX)(R11*1), R10 + MOVL (R10)(R12*1), R11 + CMPL (SI)(R12*1), R11 JNE matchlen_match2_repeat_extend_encodeBlockAsm - LEAL -4(R8), R8 - LEAL 4(R11), R11 + LEAL -4(R9), R9 + LEAL 4(R12), R12 matchlen_match2_repeat_extend_encodeBlockAsm: - CMPL R8, $0x01 + CMPL R9, $0x01 JE matchlen_match1_repeat_extend_encodeBlockAsm JB repeat_extend_forward_end_encodeBlockAsm - MOVW (R9)(R11*1), R10 - CMPW (BX)(R11*1), R10 + MOVW (R10)(R12*1), R11 + CMPW (SI)(R12*1), R11 JNE matchlen_match1_repeat_extend_encodeBlockAsm - LEAL 2(R11), R11 - SUBL $0x02, R8 + LEAL 2(R12), R12 + SUBL $0x02, R9 JZ repeat_extend_forward_end_encodeBlockAsm matchlen_match1_repeat_extend_encodeBlockAsm: - MOVB (R9)(R11*1), R10 - CMPB (BX)(R11*1), R10 + MOVB (R10)(R12*1), R11 + CMPB (SI)(R12*1), R11 JNE repeat_extend_forward_end_encodeBlockAsm - LEAL 1(R11), R11 + LEAL 1(R12), R12 repeat_extend_forward_end_encodeBlockAsm: - ADDL R11, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI - TESTL DI, DI + ADDL R12, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI + TESTL R8, R8 JZ repeat_as_copy_encodeBlockAsm // emitRepeat emit_repeat_again_match_repeat_encodeBlockAsm: - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_match_repeat_encodeBlockAsm - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_match_repeat_encodeBlockAsm cant_repeat_two_offset_match_repeat_encodeBlockAsm: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_match_repeat_encodeBlockAsm - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_match_repeat_encodeBlockAsm - CMPL BX, $0x0100ffff + CMPL SI, $0x0100ffff JB repeat_five_match_repeat_encodeBlockAsm - LEAL -16842747(BX), BX - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(SI), SI + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_repeat_encodeBlockAsm repeat_five_match_repeat_encodeBlockAsm: - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm repeat_four_match_repeat_encodeBlockAsm: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm repeat_three_match_repeat_encodeBlockAsm: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_match_repeat_encodeBlockAsm: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_offset_match_repeat_encodeBlockAsm: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm repeat_as_copy_encodeBlockAsm: // emitCopy - CMPL SI, $0x00010000 + CMPL DI, $0x00010000 JB two_byte_offset_repeat_as_copy_encodeBlockAsm - CMPL BX, $0x40 + CMPL SI, $0x40 JBE four_bytes_remain_repeat_as_copy_encodeBlockAsm - MOVB $0xff, (AX) - MOVL SI, 1(AX) - LEAL -64(BX), BX - ADDQ $0x05, AX - CMPL BX, $0x04 + MOVB $0xff, (CX) + MOVL DI, 1(CX) + LEAL -64(SI), SI + ADDQ $0x05, CX + CMPL SI, $0x04 JB four_bytes_remain_repeat_as_copy_encodeBlockAsm // emitRepeat emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy: - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy - CMPL BX, $0x0100ffff + CMPL SI, $0x0100ffff JB repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy - LEAL -16842747(BX), BX - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(SI), SI + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy: - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm four_bytes_remain_repeat_as_copy_encodeBlockAsm: - TESTL BX, BX + TESTL SI, SI JZ repeat_end_emit_encodeBlockAsm - XORL DI, DI - LEAL -1(DI)(BX*4), BX - MOVB BL, (AX) - MOVL SI, 1(AX) - ADDQ $0x05, AX + XORL R8, R8 + LEAL -1(R8)(SI*4), SI + MOVB SI, (CX) + MOVL DI, 1(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm two_byte_offset_repeat_as_copy_encodeBlockAsm: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE long_offset_short_repeat_as_copy_encodeBlockAsm - MOVL $0x00000001, DI - LEAL 16(DI), DI - MOVB SI, 1(AX) - MOVL SI, R8 - SHRL $0x08, R8 - SHLL $0x05, R8 - ORL R8, DI - MOVB DI, (AX) - ADDQ $0x02, AX - SUBL $0x08, BX + MOVL $0x00000001, R8 + LEAL 16(R8), R8 + MOVB DI, 1(CX) + MOVL DI, R9 + SHRL $0x08, R9 + SHLL $0x05, R9 + ORL R9, R8 + MOVB R8, (CX) + ADDQ $0x02, CX + SUBL $0x08, SI // emitRepeat - LEAL -4(BX), BX + LEAL -4(SI), SI JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b - CMPL BX, $0x0100ffff + CMPL SI, $0x0100ffff JB repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b - LEAL -16842747(BX), BX - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(SI), SI + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm long_offset_short_repeat_as_copy_encodeBlockAsm: - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX // emitRepeat emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short: - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short - CMPL BX, $0x0100ffff + CMPL SI, $0x0100ffff JB repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short - LEAL -16842747(BX), BX - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(SI), SI + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short: - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm two_byte_offset_short_repeat_as_copy_encodeBlockAsm: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeBlockAsm - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeBlockAsm - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm emit_copy_three_repeat_as_copy_encodeBlockAsm: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeBlockAsm: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeBlockAsm no_repeat_found_encodeBlockAsm: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBlockAsm - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeBlockAsm - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeBlockAsm - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBlockAsm candidate3_match_encodeBlockAsm: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeBlockAsm candidate2_match_encodeBlockAsm: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeBlockAsm: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBlockAsm match_extend_back_loop_encodeBlockAsm: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBlockAsm - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBlockAsm - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBlockAsm JMP match_extend_back_loop_encodeBlockAsm match_extend_back_end_encodeBlockAsm: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 5(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 5(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBlockAsm: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeBlockAsm - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeBlockAsm - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeBlockAsm - CMPL DI, $0x00010000 + CMPL R8, $0x00010000 JB three_bytes_match_emit_encodeBlockAsm - CMPL DI, $0x01000000 + CMPL R8, $0x01000000 JB four_bytes_match_emit_encodeBlockAsm - MOVB $0xfc, (AX) - MOVL DI, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL R8, 1(CX) + ADDQ $0x05, CX JMP memmove_long_match_emit_encodeBlockAsm four_bytes_match_emit_encodeBlockAsm: - MOVL DI, R9 - SHRL $0x10, R9 - MOVB $0xf8, (AX) - MOVW DI, 1(AX) - MOVB R9, 3(AX) - ADDQ $0x04, AX + MOVL R8, R10 + SHRL $0x10, R10 + MOVB $0xf8, (CX) + MOVW R8, 1(CX) + MOVB R10, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_encodeBlockAsm three_bytes_match_emit_encodeBlockAsm: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBlockAsm two_bytes_match_emit_encodeBlockAsm: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeBlockAsm JMP memmove_long_match_emit_encodeBlockAsm one_byte_match_emit_encodeBlockAsm: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBlockAsm: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBlockAsm: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeBlockAsm memmove_long_match_emit_encodeBlockAsm: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeBlockAsm: match_nolit_loop_encodeBlockAsm: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeBlockAsm: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeBlockAsm - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeBlockAsm - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeBlockAsm matchlen_bsf_16match_nolit_encodeBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeBlockAsm matchlen_match8_match_nolit_encodeBlockAsm: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeBlockAsm - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeBlockAsm matchlen_bsf_8_match_nolit_encodeBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeBlockAsm matchlen_match4_match_nolit_encodeBlockAsm: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeBlockAsm - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeBlockAsm - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeBlockAsm: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeBlockAsm JB match_nolit_end_encodeBlockAsm - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeBlockAsm - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeBlockAsm matchlen_match1_match_nolit_encodeBlockAsm: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeBlockAsm - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeBlockAsm: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB two_byte_offset_match_nolit_encodeBlockAsm - CMPL R9, $0x40 + CMPL R10, $0x40 JBE four_bytes_remain_match_nolit_encodeBlockAsm - MOVB $0xff, (AX) - MOVL BX, 1(AX) - LEAL -64(R9), R9 - ADDQ $0x05, AX - CMPL R9, $0x04 + MOVB $0xff, (CX) + MOVL SI, 1(CX) + LEAL -64(R10), R10 + ADDQ $0x05, CX + CMPL R10, $0x04 JB four_bytes_remain_match_nolit_encodeBlockAsm // emitRepeat emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy: - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm_emit_copy - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm_emit_copy - CMPL R9, $0x00010100 + CMPL R10, $0x00010100 JB repeat_four_match_nolit_encodeBlockAsm_emit_copy - CMPL R9, $0x0100ffff + CMPL R10, $0x0100ffff JB repeat_five_match_nolit_encodeBlockAsm_emit_copy - LEAL -16842747(R9), R9 - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(R10), R10 + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy repeat_five_match_nolit_encodeBlockAsm_emit_copy: - LEAL -65536(R9), R9 - MOVL R9, BX - MOVW $0x001d, (AX) - MOVW R9, 2(AX) - SARL $0x10, BX - MOVB BL, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R10), R10 + MOVL R10, SI + MOVW $0x001d, (CX) + MOVW R10, 2(CX) + SARL $0x10, SI + MOVB SI, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_four_match_nolit_encodeBlockAsm_emit_copy: - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_three_match_nolit_encodeBlockAsm_emit_copy: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_match_nolit_encodeBlockAsm_emit_copy: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm four_bytes_remain_match_nolit_encodeBlockAsm: - TESTL R9, R9 + TESTL R10, R10 JZ match_nolit_emitcopy_end_encodeBlockAsm - XORL SI, SI - LEAL -1(SI)(R9*4), R9 - MOVB R9, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + XORL DI, DI + LEAL -1(DI)(R10*4), R10 + MOVB R10, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm two_byte_offset_match_nolit_encodeBlockAsm: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeBlockAsm - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE long_offset_short_match_nolit_encodeBlockAsm - MOVL $0x00000001, SI - LEAL 16(SI), SI - MOVB BL, 1(AX) - MOVL BX, DI - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, SI - MOVB SI, (AX) - ADDQ $0x02, AX - SUBL $0x08, R9 + MOVL $0x00000001, DI + LEAL 16(DI), DI + MOVB SI, 1(CX) + MOVL SI, R8 + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, DI + MOVB DI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R10 // emitRepeat - LEAL -4(R9), R9 + LEAL -4(R10), R10 JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short_2b: - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm_emit_copy_short_2b - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm_emit_copy_short_2b - CMPL R9, $0x00010100 + CMPL R10, $0x00010100 JB repeat_four_match_nolit_encodeBlockAsm_emit_copy_short_2b - CMPL R9, $0x0100ffff + CMPL R10, $0x0100ffff JB repeat_five_match_nolit_encodeBlockAsm_emit_copy_short_2b - LEAL -16842747(R9), R9 - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(R10), R10 + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short_2b repeat_five_match_nolit_encodeBlockAsm_emit_copy_short_2b: - LEAL -65536(R9), R9 - MOVL R9, BX - MOVW $0x001d, (AX) - MOVW R9, 2(AX) - SARL $0x10, BX - MOVB BL, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R10), R10 + MOVL R10, SI + MOVW $0x001d, (CX) + MOVW R10, 2(CX) + SARL $0x10, SI + MOVB SI, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_four_match_nolit_encodeBlockAsm_emit_copy_short_2b: - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_three_match_nolit_encodeBlockAsm_emit_copy_short_2b: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_match_nolit_encodeBlockAsm_emit_copy_short_2b: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm long_offset_short_match_nolit_encodeBlockAsm: - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX // emitRepeat emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short: - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm_emit_copy_short - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm_emit_copy_short - CMPL R9, $0x00010100 + CMPL R10, $0x00010100 JB repeat_four_match_nolit_encodeBlockAsm_emit_copy_short - CMPL R9, $0x0100ffff + CMPL R10, $0x0100ffff JB repeat_five_match_nolit_encodeBlockAsm_emit_copy_short - LEAL -16842747(R9), R9 - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(R10), R10 + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short repeat_five_match_nolit_encodeBlockAsm_emit_copy_short: - LEAL -65536(R9), R9 - MOVL R9, BX - MOVW $0x001d, (AX) - MOVW R9, 2(AX) - SARL $0x10, BX - MOVB BL, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R10), R10 + MOVL R10, SI + MOVW $0x001d, (CX) + MOVW R10, 2(CX) + SARL $0x10, SI + MOVB SI, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_four_match_nolit_encodeBlockAsm_emit_copy_short: - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_three_match_nolit_encodeBlockAsm_emit_copy_short: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_match_nolit_encodeBlockAsm_emit_copy_short: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm two_byte_offset_short_match_nolit_encodeBlockAsm: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeBlockAsm - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeBlockAsm - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm emit_copy_three_match_nolit_encodeBlockAsm: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeBlockAsm: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBlockAsm - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBlockAsm: - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x10, DI - IMULQ R8, DI - SHRQ $0x32, DI - SHLQ $0x10, BX - IMULQ R8, BX - SHRQ $0x32, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x10, R8 + IMULQ R9, R8 + SHRQ $0x32, R8 + SHLQ $0x10, SI + IMULQ R9, SI + SHRQ $0x32, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeBlockAsm - INCL CX + INCL DX JMP search_loop_encodeBlockAsm emit_remainder_encodeBlockAsm: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 5(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 5(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBlockAsm: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBlockAsm - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBlockAsm @@ -1286,41 +1287,41 @@ emit_remainder_ok_encodeBlockAsm: JB three_bytes_emit_remainder_encodeBlockAsm CMPL DX, $0x01000000 JB four_bytes_emit_remainder_encodeBlockAsm - MOVB $0xfc, (AX) - MOVL DX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL DX, 1(CX) + ADDQ $0x05, CX JMP memmove_long_emit_remainder_encodeBlockAsm four_bytes_emit_remainder_encodeBlockAsm: MOVL DX, BX SHRL $0x10, BX - MOVB $0xf8, (AX) - MOVW DX, 1(AX) - MOVB BL, 3(AX) - ADDQ $0x04, AX + MOVB $0xf8, (CX) + MOVW DX, 1(CX) + MOVB BL, 3(CX) + ADDQ $0x04, CX JMP memmove_long_emit_remainder_encodeBlockAsm three_bytes_emit_remainder_encodeBlockAsm: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBlockAsm two_bytes_emit_remainder_encodeBlockAsm: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBlockAsm JMP memmove_long_emit_remainder_encodeBlockAsm one_byte_emit_remainder_encodeBlockAsm: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -1336,73 +1337,73 @@ memmove_emit_remainder_encodeBlockAsm: JMP emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBlockAsm memmove_long_emit_remainder_encodeBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back: MOVOU (SI), X4 @@ -1416,1199 +1417,1200 @@ emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBlockAsm: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBlockAsm4MB(dst []byte, src []byte) int +// func encodeBlockAsm4MB(dst []byte, src []byte, tmp *[65536]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBlockAsm4MB(SB), $65560-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000200, CX - LEAQ 24(SP), DX +TEXT ·encodeBlockAsm4MB(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000200, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBlockAsm4MB: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBlockAsm4MB MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeBlockAsm4MB: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x06, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x06, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBlockAsm4MB - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x10, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x10, R11 + IMULQ R9, R11 + SHRQ $0x32, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeBlockAsm4MB - LEAL 1(CX), SI - MOVL 12(SP), DI - MOVL SI, BX - SUBL 16(SP), BX + LEAL 1(DX), DI + MOVL 12(SP), R8 + MOVL DI, SI + SUBL 16(SP), SI JZ repeat_extend_back_end_encodeBlockAsm4MB repeat_extend_back_loop_encodeBlockAsm4MB: - CMPL SI, DI + CMPL DI, R8 JBE repeat_extend_back_end_encodeBlockAsm4MB - MOVB -1(DX)(BX*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(SI*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeBlockAsm4MB - LEAL -1(SI), SI - DECL BX + LEAL -1(DI), DI + DECL SI JNZ repeat_extend_back_loop_encodeBlockAsm4MB repeat_extend_back_end_encodeBlockAsm4MB: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 4(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 4(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeBlockAsm4MB - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeBlockAsm4MB: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeBlockAsm4MB - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeBlockAsm4MB - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeBlockAsm4MB - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_repeat_emit_encodeBlockAsm4MB - MOVL BX, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX + MOVL SI, R11 + SHRL $0x10, R11 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R11, 3(CX) + ADDQ $0x04, CX JMP memmove_long_repeat_emit_encodeBlockAsm4MB three_bytes_repeat_emit_encodeBlockAsm4MB: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeBlockAsm4MB two_bytes_repeat_emit_encodeBlockAsm4MB: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeBlockAsm4MB JMP memmove_long_repeat_emit_encodeBlockAsm4MB one_byte_repeat_emit_encodeBlockAsm4MB: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeBlockAsm4MB: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_repeat_emit_encodeBlockAsm4MB: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeBlockAsm4MB memmove_long_repeat_emit_encodeBlockAsm4MB: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R12 + SHRQ $0x05, R12 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R13 + SUBQ R11, R13 + DECQ R12 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 - LEAQ -32(R9)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(R10)(R13*1), R11 + LEAQ -32(CX)(R13*1), R14 emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) + ADDQ $0x20, R14 + ADDQ $0x20, R11 ADDQ $0x20, R13 - ADDQ $0x20, R10 - ADDQ $0x20, R12 - DECQ R11 + DECQ R12 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32: - MOVOU -32(R9)(R12*1), X4 - MOVOU -16(R9)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R8, R12 + MOVOU -32(R10)(R13*1), X4 + MOVOU -16(R10)(R13*1), X5 + MOVOA X4, -32(CX)(R13*1) + MOVOA X5, -16(CX)(R13*1) + ADDQ $0x20, R13 + CMPQ R9, R13 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeBlockAsm4MB: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R9 + SUBL DX, R9 + LEAQ (BX)(DX*1), R10 + LEAQ (BX)(SI*1), SI // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_repeat_extend_encodeBlockAsm4MB: - CMPL R8, $0x10 + CMPL R9, $0x10 JB matchlen_match8_repeat_extend_encodeBlockAsm4MB - MOVQ (R9)(R11*1), R10 - MOVQ 8(R9)(R11*1), R12 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + MOVQ 8(R10)(R12*1), R13 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB - XORQ 8(BX)(R11*1), R12 + XORQ 8(SI)(R12*1), R13 JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm4MB - LEAL -16(R8), R8 - LEAL 16(R11), R11 + LEAL -16(R9), R9 + LEAL 16(R12), R12 JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm4MB matchlen_bsf_16repeat_extend_encodeBlockAsm4MB: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm4MB matchlen_match8_repeat_extend_encodeBlockAsm4MB: - CMPL R8, $0x08 + CMPL R9, $0x08 JB matchlen_match4_repeat_extend_encodeBlockAsm4MB - MOVQ (R9)(R11*1), R10 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB - LEAL -8(R8), R8 - LEAL 8(R11), R11 + LEAL -8(R9), R9 + LEAL 8(R12), R12 JMP matchlen_match4_repeat_extend_encodeBlockAsm4MB matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm4MB matchlen_match4_repeat_extend_encodeBlockAsm4MB: - CMPL R8, $0x04 + CMPL R9, $0x04 JB matchlen_match2_repeat_extend_encodeBlockAsm4MB - MOVL (R9)(R11*1), R10 - CMPL (BX)(R11*1), R10 + MOVL (R10)(R12*1), R11 + CMPL (SI)(R12*1), R11 JNE matchlen_match2_repeat_extend_encodeBlockAsm4MB - LEAL -4(R8), R8 - LEAL 4(R11), R11 + LEAL -4(R9), R9 + LEAL 4(R12), R12 matchlen_match2_repeat_extend_encodeBlockAsm4MB: - CMPL R8, $0x01 + CMPL R9, $0x01 JE matchlen_match1_repeat_extend_encodeBlockAsm4MB JB repeat_extend_forward_end_encodeBlockAsm4MB - MOVW (R9)(R11*1), R10 - CMPW (BX)(R11*1), R10 + MOVW (R10)(R12*1), R11 + CMPW (SI)(R12*1), R11 JNE matchlen_match1_repeat_extend_encodeBlockAsm4MB - LEAL 2(R11), R11 - SUBL $0x02, R8 + LEAL 2(R12), R12 + SUBL $0x02, R9 JZ repeat_extend_forward_end_encodeBlockAsm4MB matchlen_match1_repeat_extend_encodeBlockAsm4MB: - MOVB (R9)(R11*1), R10 - CMPB (BX)(R11*1), R10 + MOVB (R10)(R12*1), R11 + CMPB (SI)(R12*1), R11 JNE repeat_extend_forward_end_encodeBlockAsm4MB - LEAL 1(R11), R11 + LEAL 1(R12), R12 repeat_extend_forward_end_encodeBlockAsm4MB: - ADDL R11, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI - TESTL DI, DI + ADDL R12, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI + TESTL R8, R8 JZ repeat_as_copy_encodeBlockAsm4MB // emitRepeat - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_match_repeat_encodeBlockAsm4MB - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm4MB - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_match_repeat_encodeBlockAsm4MB cant_repeat_two_offset_match_repeat_encodeBlockAsm4MB: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_match_repeat_encodeBlockAsm4MB - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_match_repeat_encodeBlockAsm4MB - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_four_match_repeat_encodeBlockAsm4MB: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_three_match_repeat_encodeBlockAsm4MB: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_match_repeat_encodeBlockAsm4MB: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_offset_match_repeat_encodeBlockAsm4MB: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_as_copy_encodeBlockAsm4MB: // emitCopy - CMPL SI, $0x00010000 + CMPL DI, $0x00010000 JB two_byte_offset_repeat_as_copy_encodeBlockAsm4MB - CMPL BX, $0x40 + CMPL SI, $0x40 JBE four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB - MOVB $0xff, (AX) - MOVL SI, 1(AX) - LEAL -64(BX), BX - ADDQ $0x05, AX - CMPL BX, $0x04 + MOVB $0xff, (CX) + MOVL DI, 1(CX) + LEAL -64(SI), SI + ADDQ $0x05, CX + CMPL SI, $0x04 JB four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB // emitRepeat - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB: - TESTL BX, BX + TESTL SI, SI JZ repeat_end_emit_encodeBlockAsm4MB - XORL DI, DI - LEAL -1(DI)(BX*4), BX - MOVB BL, (AX) - MOVL SI, 1(AX) - ADDQ $0x05, AX + XORL R8, R8 + LEAL -1(R8)(SI*4), SI + MOVB SI, (CX) + MOVL DI, 1(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm4MB two_byte_offset_repeat_as_copy_encodeBlockAsm4MB: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm4MB - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE long_offset_short_repeat_as_copy_encodeBlockAsm4MB - MOVL $0x00000001, DI - LEAL 16(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX - SUBL $0x08, BX + MOVL $0x00000001, R8 + LEAL 16(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX + SUBL $0x08, SI // emitRepeat - LEAL -4(BX), BX + LEAL -4(SI), SI JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB long_offset_short_repeat_as_copy_encodeBlockAsm4MB: - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX // emitRepeat - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB two_byte_offset_short_repeat_as_copy_encodeBlockAsm4MB: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeBlockAsm4MB - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeBlockAsm4MB - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB emit_copy_three_repeat_as_copy_encodeBlockAsm4MB: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeBlockAsm4MB: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeBlockAsm4MB no_repeat_found_encodeBlockAsm4MB: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBlockAsm4MB - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeBlockAsm4MB - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeBlockAsm4MB - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBlockAsm4MB candidate3_match_encodeBlockAsm4MB: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeBlockAsm4MB candidate2_match_encodeBlockAsm4MB: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeBlockAsm4MB: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBlockAsm4MB match_extend_back_loop_encodeBlockAsm4MB: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBlockAsm4MB - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBlockAsm4MB - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBlockAsm4MB JMP match_extend_back_loop_encodeBlockAsm4MB match_extend_back_end_encodeBlockAsm4MB: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 4(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 4(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBlockAsm4MB - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBlockAsm4MB: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeBlockAsm4MB - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeBlockAsm4MB - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeBlockAsm4MB - CMPL DI, $0x00010000 + CMPL R8, $0x00010000 JB three_bytes_match_emit_encodeBlockAsm4MB - MOVL DI, R9 - SHRL $0x10, R9 - MOVB $0xf8, (AX) - MOVW DI, 1(AX) - MOVB R9, 3(AX) - ADDQ $0x04, AX + MOVL R8, R10 + SHRL $0x10, R10 + MOVB $0xf8, (CX) + MOVW R8, 1(CX) + MOVB R10, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_encodeBlockAsm4MB three_bytes_match_emit_encodeBlockAsm4MB: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBlockAsm4MB two_bytes_match_emit_encodeBlockAsm4MB: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeBlockAsm4MB JMP memmove_long_match_emit_encodeBlockAsm4MB one_byte_match_emit_encodeBlockAsm4MB: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBlockAsm4MB: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeBlockAsm4MB emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm4MB emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm4MB emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBlockAsm4MB: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeBlockAsm4MB memmove_long_match_emit_encodeBlockAsm4MB: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeBlockAsm4MB: match_nolit_loop_encodeBlockAsm4MB: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeBlockAsm4MB: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeBlockAsm4MB - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm4MB - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeBlockAsm4MB - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeBlockAsm4MB matchlen_bsf_16match_nolit_encodeBlockAsm4MB: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeBlockAsm4MB matchlen_match8_match_nolit_encodeBlockAsm4MB: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeBlockAsm4MB - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm4MB - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeBlockAsm4MB matchlen_bsf_8_match_nolit_encodeBlockAsm4MB: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeBlockAsm4MB matchlen_match4_match_nolit_encodeBlockAsm4MB: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeBlockAsm4MB - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeBlockAsm4MB - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeBlockAsm4MB: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeBlockAsm4MB JB match_nolit_end_encodeBlockAsm4MB - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeBlockAsm4MB - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeBlockAsm4MB matchlen_match1_match_nolit_encodeBlockAsm4MB: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeBlockAsm4MB - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeBlockAsm4MB: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB two_byte_offset_match_nolit_encodeBlockAsm4MB - CMPL R9, $0x40 + CMPL R10, $0x40 JBE four_bytes_remain_match_nolit_encodeBlockAsm4MB - MOVB $0xff, (AX) - MOVL BX, 1(AX) - LEAL -64(R9), R9 - ADDQ $0x05, AX - CMPL R9, $0x04 + MOVB $0xff, (CX) + MOVL SI, 1(CX) + LEAL -64(R10), R10 + ADDQ $0x05, CX + CMPL R10, $0x04 JB four_bytes_remain_match_nolit_encodeBlockAsm4MB // emitRepeat - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy - CMPL R9, $0x00010100 + CMPL R10, $0x00010100 JB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy - LEAL -65536(R9), R9 - MOVL R9, BX - MOVW $0x001d, (AX) - MOVW R9, 2(AX) - SARL $0x10, BX - MOVB BL, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R10), R10 + MOVL R10, SI + MOVW $0x001d, (CX) + MOVW R10, 2(CX) + SARL $0x10, SI + MOVB SI, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy: - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB four_bytes_remain_match_nolit_encodeBlockAsm4MB: - TESTL R9, R9 + TESTL R10, R10 JZ match_nolit_emitcopy_end_encodeBlockAsm4MB - XORL SI, SI - LEAL -1(SI)(R9*4), R9 - MOVB R9, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + XORL DI, DI + LEAL -1(DI)(R10*4), R10 + MOVB R10, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB two_byte_offset_match_nolit_encodeBlockAsm4MB: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeBlockAsm4MB - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE long_offset_short_match_nolit_encodeBlockAsm4MB - MOVL $0x00000001, SI - LEAL 16(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX - SUBL $0x08, R9 - + MOVL $0x00000001, DI + LEAL 16(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R10 + // emitRepeat - LEAL -4(R9), R9 + LEAL -4(R10), R10 JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b - CMPL R9, $0x00010100 + CMPL R10, $0x00010100 JB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b - LEAL -65536(R9), R9 - MOVL R9, BX - MOVW $0x001d, (AX) - MOVW R9, 2(AX) - SARL $0x10, BX - MOVB BL, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R10), R10 + MOVL R10, SI + MOVW $0x001d, (CX) + MOVW R10, 2(CX) + SARL $0x10, SI + MOVB SI, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b: - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB long_offset_short_match_nolit_encodeBlockAsm4MB: - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX // emitRepeat - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short - CMPL R9, $0x00010100 + CMPL R10, $0x00010100 JB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short - LEAL -65536(R9), R9 - MOVL R9, BX - MOVW $0x001d, (AX) - MOVW R9, 2(AX) - SARL $0x10, BX - MOVB BL, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R10), R10 + MOVL R10, SI + MOVW $0x001d, (CX) + MOVW R10, 2(CX) + SARL $0x10, SI + MOVB SI, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short: - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB two_byte_offset_short_match_nolit_encodeBlockAsm4MB: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeBlockAsm4MB - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeBlockAsm4MB - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB emit_copy_three_match_nolit_encodeBlockAsm4MB: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeBlockAsm4MB: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBlockAsm4MB - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBlockAsm4MB - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBlockAsm4MB: - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x10, DI - IMULQ R8, DI - SHRQ $0x32, DI - SHLQ $0x10, BX - IMULQ R8, BX - SHRQ $0x32, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x10, R8 + IMULQ R9, R8 + SHRQ $0x32, R8 + SHLQ $0x10, SI + IMULQ R9, SI + SHRQ $0x32, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeBlockAsm4MB - INCL CX + INCL DX JMP search_loop_encodeBlockAsm4MB emit_remainder_encodeBlockAsm4MB: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 4(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 4(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBlockAsm4MB - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBlockAsm4MB: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBlockAsm4MB - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBlockAsm4MB @@ -2618,33 +2620,33 @@ emit_remainder_ok_encodeBlockAsm4MB: JB three_bytes_emit_remainder_encodeBlockAsm4MB MOVL DX, BX SHRL $0x10, BX - MOVB $0xf8, (AX) - MOVW DX, 1(AX) - MOVB BL, 3(AX) - ADDQ $0x04, AX + MOVB $0xf8, (CX) + MOVW DX, 1(CX) + MOVB BL, 3(CX) + ADDQ $0x04, CX JMP memmove_long_emit_remainder_encodeBlockAsm4MB three_bytes_emit_remainder_encodeBlockAsm4MB: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBlockAsm4MB two_bytes_emit_remainder_encodeBlockAsm4MB: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBlockAsm4MB JMP memmove_long_emit_remainder_encodeBlockAsm4MB one_byte_emit_remainder_encodeBlockAsm4MB: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBlockAsm4MB: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -2660,73 +2662,73 @@ memmove_emit_remainder_encodeBlockAsm4MB: JMP emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm4MB: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBlockAsm4MB memmove_long_emit_remainder_encodeBlockAsm4MB: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back: MOVOU (SI), X4 @@ -2740,967 +2742,968 @@ emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBlockAsm4MB: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBlockAsm12B(dst []byte, src []byte) int +// func encodeBlockAsm12B(dst []byte, src []byte, tmp *[16384]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBlockAsm12B(SB), $16408-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000080, CX - LEAQ 24(SP), DX +TEXT ·encodeBlockAsm12B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000080, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBlockAsm12B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBlockAsm12B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeBlockAsm12B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x05, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x05, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBlockAsm12B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x000000cf1bbcdcbb, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x18, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x000000cf1bbcdcbb, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x18, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x34, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x18, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x18, R11 + IMULQ R9, R11 + SHRQ $0x34, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x18, R10 + IMULQ R9, R10 + SHRQ $0x34, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeBlockAsm12B - LEAL 1(CX), SI - MOVL 12(SP), DI - MOVL SI, BX - SUBL 16(SP), BX + LEAL 1(DX), DI + MOVL 12(SP), R8 + MOVL DI, SI + SUBL 16(SP), SI JZ repeat_extend_back_end_encodeBlockAsm12B repeat_extend_back_loop_encodeBlockAsm12B: - CMPL SI, DI + CMPL DI, R8 JBE repeat_extend_back_end_encodeBlockAsm12B - MOVB -1(DX)(BX*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(SI*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeBlockAsm12B - LEAL -1(SI), SI - DECL BX + LEAL -1(DI), DI + DECL SI JNZ repeat_extend_back_loop_encodeBlockAsm12B repeat_extend_back_end_encodeBlockAsm12B: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeBlockAsm12B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeBlockAsm12B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeBlockAsm12B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeBlockAsm12B JB three_bytes_repeat_emit_encodeBlockAsm12B three_bytes_repeat_emit_encodeBlockAsm12B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeBlockAsm12B two_bytes_repeat_emit_encodeBlockAsm12B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeBlockAsm12B JMP memmove_long_repeat_emit_encodeBlockAsm12B one_byte_repeat_emit_encodeBlockAsm12B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_repeat_emit_encodeBlockAsm12B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeBlockAsm12B memmove_long_repeat_emit_encodeBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R12 + SHRQ $0x05, R12 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R13 + SUBQ R11, R13 + DECQ R12 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(R10)(R13*1), R11 + LEAQ -32(CX)(R13*1), R14 emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) + ADDQ $0x20, R14 + ADDQ $0x20, R11 ADDQ $0x20, R13 - ADDQ $0x20, R10 - ADDQ $0x20, R12 - DECQ R11 + DECQ R12 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R12*1), X4 - MOVOU -16(R9)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R8, R12 + MOVOU -32(R10)(R13*1), X4 + MOVOU -16(R10)(R13*1), X5 + MOVOA X4, -32(CX)(R13*1) + MOVOA X5, -16(CX)(R13*1) + ADDQ $0x20, R13 + CMPQ R9, R13 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeBlockAsm12B: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R9 + SUBL DX, R9 + LEAQ (BX)(DX*1), R10 + LEAQ (BX)(SI*1), SI // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_repeat_extend_encodeBlockAsm12B: - CMPL R8, $0x10 + CMPL R9, $0x10 JB matchlen_match8_repeat_extend_encodeBlockAsm12B - MOVQ (R9)(R11*1), R10 - MOVQ 8(R9)(R11*1), R12 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + MOVQ 8(R10)(R12*1), R13 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm12B - XORQ 8(BX)(R11*1), R12 + XORQ 8(SI)(R12*1), R13 JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm12B - LEAL -16(R8), R8 - LEAL 16(R11), R11 + LEAL -16(R9), R9 + LEAL 16(R12), R12 JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm12B matchlen_bsf_16repeat_extend_encodeBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm12B matchlen_match8_repeat_extend_encodeBlockAsm12B: - CMPL R8, $0x08 + CMPL R9, $0x08 JB matchlen_match4_repeat_extend_encodeBlockAsm12B - MOVQ (R9)(R11*1), R10 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm12B - LEAL -8(R8), R8 - LEAL 8(R11), R11 + LEAL -8(R9), R9 + LEAL 8(R12), R12 JMP matchlen_match4_repeat_extend_encodeBlockAsm12B matchlen_bsf_8_repeat_extend_encodeBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm12B matchlen_match4_repeat_extend_encodeBlockAsm12B: - CMPL R8, $0x04 + CMPL R9, $0x04 JB matchlen_match2_repeat_extend_encodeBlockAsm12B - MOVL (R9)(R11*1), R10 - CMPL (BX)(R11*1), R10 + MOVL (R10)(R12*1), R11 + CMPL (SI)(R12*1), R11 JNE matchlen_match2_repeat_extend_encodeBlockAsm12B - LEAL -4(R8), R8 - LEAL 4(R11), R11 + LEAL -4(R9), R9 + LEAL 4(R12), R12 matchlen_match2_repeat_extend_encodeBlockAsm12B: - CMPL R8, $0x01 + CMPL R9, $0x01 JE matchlen_match1_repeat_extend_encodeBlockAsm12B JB repeat_extend_forward_end_encodeBlockAsm12B - MOVW (R9)(R11*1), R10 - CMPW (BX)(R11*1), R10 + MOVW (R10)(R12*1), R11 + CMPW (SI)(R12*1), R11 JNE matchlen_match1_repeat_extend_encodeBlockAsm12B - LEAL 2(R11), R11 - SUBL $0x02, R8 + LEAL 2(R12), R12 + SUBL $0x02, R9 JZ repeat_extend_forward_end_encodeBlockAsm12B matchlen_match1_repeat_extend_encodeBlockAsm12B: - MOVB (R9)(R11*1), R10 - CMPB (BX)(R11*1), R10 + MOVB (R10)(R12*1), R11 + CMPB (SI)(R12*1), R11 JNE repeat_extend_forward_end_encodeBlockAsm12B - LEAL 1(R11), R11 + LEAL 1(R12), R12 repeat_extend_forward_end_encodeBlockAsm12B: - ADDL R11, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI - TESTL DI, DI + ADDL R12, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI + TESTL R8, R8 JZ repeat_as_copy_encodeBlockAsm12B // emitRepeat - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_match_repeat_encodeBlockAsm12B - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm12B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_match_repeat_encodeBlockAsm12B cant_repeat_two_offset_match_repeat_encodeBlockAsm12B: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_match_repeat_encodeBlockAsm12B - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_three_match_repeat_encodeBlockAsm12B: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_match_repeat_encodeBlockAsm12B: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_offset_match_repeat_encodeBlockAsm12B: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_as_copy_encodeBlockAsm12B: // emitCopy - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE long_offset_short_repeat_as_copy_encodeBlockAsm12B - MOVL $0x00000001, DI - LEAL 16(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX - SUBL $0x08, BX + MOVL $0x00000001, R8 + LEAL 16(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX + SUBL $0x08, SI // emitRepeat - LEAL -4(BX), BX + LEAL -4(SI), SI JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm12B long_offset_short_repeat_as_copy_encodeBlockAsm12B: - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX // emitRepeat - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm12B two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeBlockAsm12B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeBlockAsm12B - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm12B emit_copy_three_repeat_as_copy_encodeBlockAsm12B: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeBlockAsm12B: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeBlockAsm12B no_repeat_found_encodeBlockAsm12B: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBlockAsm12B - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeBlockAsm12B - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeBlockAsm12B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBlockAsm12B candidate3_match_encodeBlockAsm12B: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeBlockAsm12B candidate2_match_encodeBlockAsm12B: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeBlockAsm12B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBlockAsm12B match_extend_back_loop_encodeBlockAsm12B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBlockAsm12B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBlockAsm12B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBlockAsm12B JMP match_extend_back_loop_encodeBlockAsm12B match_extend_back_end_encodeBlockAsm12B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBlockAsm12B: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeBlockAsm12B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeBlockAsm12B - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeBlockAsm12B JB three_bytes_match_emit_encodeBlockAsm12B three_bytes_match_emit_encodeBlockAsm12B: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBlockAsm12B two_bytes_match_emit_encodeBlockAsm12B: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeBlockAsm12B JMP memmove_long_match_emit_encodeBlockAsm12B one_byte_match_emit_encodeBlockAsm12B: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBlockAsm12B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBlockAsm12B: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeBlockAsm12B memmove_long_match_emit_encodeBlockAsm12B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeBlockAsm12B: match_nolit_loop_encodeBlockAsm12B: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeBlockAsm12B: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeBlockAsm12B - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm12B - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeBlockAsm12B - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeBlockAsm12B matchlen_bsf_16match_nolit_encodeBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeBlockAsm12B matchlen_match8_match_nolit_encodeBlockAsm12B: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeBlockAsm12B - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm12B - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeBlockAsm12B matchlen_bsf_8_match_nolit_encodeBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeBlockAsm12B matchlen_match4_match_nolit_encodeBlockAsm12B: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeBlockAsm12B - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeBlockAsm12B - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeBlockAsm12B: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeBlockAsm12B JB match_nolit_end_encodeBlockAsm12B - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeBlockAsm12B - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeBlockAsm12B matchlen_match1_match_nolit_encodeBlockAsm12B: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeBlockAsm12B - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeBlockAsm12B: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeBlockAsm12B - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE long_offset_short_match_nolit_encodeBlockAsm12B - MOVL $0x00000001, SI - LEAL 16(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX - SUBL $0x08, R9 + MOVL $0x00000001, DI + LEAL 16(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R10 // emitRepeat - LEAL -4(R9), R9 + LEAL -4(R10), R10 JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short_2b - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short_2b - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short_2b: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short_2b: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B long_offset_short_match_nolit_encodeBlockAsm12B: - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX // emitRepeat - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX - JMP match_nolit_emitcopy_end_encodeBlockAsm12B - + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX + JMP match_nolit_emitcopy_end_encodeBlockAsm12B + two_byte_offset_short_match_nolit_encodeBlockAsm12B: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeBlockAsm12B - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeBlockAsm12B - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B emit_copy_three_match_nolit_encodeBlockAsm12B: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeBlockAsm12B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBlockAsm12B - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBlockAsm12B: - MOVQ $0x000000cf1bbcdcbb, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x18, DI - IMULQ R8, DI - SHRQ $0x34, DI - SHLQ $0x18, BX - IMULQ R8, BX - SHRQ $0x34, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x000000cf1bbcdcbb, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x18, R8 + IMULQ R9, R8 + SHRQ $0x34, R8 + SHLQ $0x18, SI + IMULQ R9, SI + SHRQ $0x34, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeBlockAsm12B - INCL CX + INCL DX JMP search_loop_encodeBlockAsm12B emit_remainder_encodeBlockAsm12B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBlockAsm12B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBlockAsm12B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBlockAsm12B @@ -3709,26 +3712,26 @@ emit_remainder_ok_encodeBlockAsm12B: JB three_bytes_emit_remainder_encodeBlockAsm12B three_bytes_emit_remainder_encodeBlockAsm12B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBlockAsm12B two_bytes_emit_remainder_encodeBlockAsm12B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBlockAsm12B JMP memmove_long_emit_remainder_encodeBlockAsm12B one_byte_emit_remainder_encodeBlockAsm12B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -3744,73 +3747,73 @@ memmove_emit_remainder_encodeBlockAsm12B: JMP emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm12B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBlockAsm12B memmove_long_emit_remainder_encodeBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back: MOVOU (SI), X4 @@ -3824,967 +3827,968 @@ emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBlockAsm12B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBlockAsm10B(dst []byte, src []byte) int +// func encodeBlockAsm10B(dst []byte, src []byte, tmp *[4096]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBlockAsm10B(SB), $4120-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000020, CX - LEAQ 24(SP), DX +TEXT ·encodeBlockAsm10B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000020, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBlockAsm10B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBlockAsm10B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeBlockAsm10B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x05, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x05, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBlockAsm10B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x9e3779b1, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x9e3779b1, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x20, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x36, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x20, R11 + IMULQ R9, R11 + SHRQ $0x36, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x20, R10 + IMULQ R9, R10 + SHRQ $0x36, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeBlockAsm10B - LEAL 1(CX), SI - MOVL 12(SP), DI - MOVL SI, BX - SUBL 16(SP), BX + LEAL 1(DX), DI + MOVL 12(SP), R8 + MOVL DI, SI + SUBL 16(SP), SI JZ repeat_extend_back_end_encodeBlockAsm10B repeat_extend_back_loop_encodeBlockAsm10B: - CMPL SI, DI + CMPL DI, R8 JBE repeat_extend_back_end_encodeBlockAsm10B - MOVB -1(DX)(BX*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(SI*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeBlockAsm10B - LEAL -1(SI), SI - DECL BX + LEAL -1(DI), DI + DECL SI JNZ repeat_extend_back_loop_encodeBlockAsm10B repeat_extend_back_end_encodeBlockAsm10B: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeBlockAsm10B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeBlockAsm10B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeBlockAsm10B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeBlockAsm10B JB three_bytes_repeat_emit_encodeBlockAsm10B three_bytes_repeat_emit_encodeBlockAsm10B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeBlockAsm10B two_bytes_repeat_emit_encodeBlockAsm10B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeBlockAsm10B JMP memmove_long_repeat_emit_encodeBlockAsm10B one_byte_repeat_emit_encodeBlockAsm10B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_repeat_emit_encodeBlockAsm10B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeBlockAsm10B memmove_long_repeat_emit_encodeBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R12 + SHRQ $0x05, R12 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R13 + SUBQ R11, R13 + DECQ R12 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(R10)(R13*1), R11 + LEAQ -32(CX)(R13*1), R14 emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) + ADDQ $0x20, R14 + ADDQ $0x20, R11 ADDQ $0x20, R13 - ADDQ $0x20, R10 - ADDQ $0x20, R12 - DECQ R11 + DECQ R12 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R12*1), X4 - MOVOU -16(R9)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R8, R12 + MOVOU -32(R10)(R13*1), X4 + MOVOU -16(R10)(R13*1), X5 + MOVOA X4, -32(CX)(R13*1) + MOVOA X5, -16(CX)(R13*1) + ADDQ $0x20, R13 + CMPQ R9, R13 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeBlockAsm10B: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R9 + SUBL DX, R9 + LEAQ (BX)(DX*1), R10 + LEAQ (BX)(SI*1), SI // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_repeat_extend_encodeBlockAsm10B: - CMPL R8, $0x10 + CMPL R9, $0x10 JB matchlen_match8_repeat_extend_encodeBlockAsm10B - MOVQ (R9)(R11*1), R10 - MOVQ 8(R9)(R11*1), R12 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + MOVQ 8(R10)(R12*1), R13 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm10B - XORQ 8(BX)(R11*1), R12 + XORQ 8(SI)(R12*1), R13 JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm10B - LEAL -16(R8), R8 - LEAL 16(R11), R11 + LEAL -16(R9), R9 + LEAL 16(R12), R12 JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm10B matchlen_bsf_16repeat_extend_encodeBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm10B matchlen_match8_repeat_extend_encodeBlockAsm10B: - CMPL R8, $0x08 + CMPL R9, $0x08 JB matchlen_match4_repeat_extend_encodeBlockAsm10B - MOVQ (R9)(R11*1), R10 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm10B - LEAL -8(R8), R8 - LEAL 8(R11), R11 + LEAL -8(R9), R9 + LEAL 8(R12), R12 JMP matchlen_match4_repeat_extend_encodeBlockAsm10B matchlen_bsf_8_repeat_extend_encodeBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm10B matchlen_match4_repeat_extend_encodeBlockAsm10B: - CMPL R8, $0x04 + CMPL R9, $0x04 JB matchlen_match2_repeat_extend_encodeBlockAsm10B - MOVL (R9)(R11*1), R10 - CMPL (BX)(R11*1), R10 + MOVL (R10)(R12*1), R11 + CMPL (SI)(R12*1), R11 JNE matchlen_match2_repeat_extend_encodeBlockAsm10B - LEAL -4(R8), R8 - LEAL 4(R11), R11 + LEAL -4(R9), R9 + LEAL 4(R12), R12 matchlen_match2_repeat_extend_encodeBlockAsm10B: - CMPL R8, $0x01 + CMPL R9, $0x01 JE matchlen_match1_repeat_extend_encodeBlockAsm10B JB repeat_extend_forward_end_encodeBlockAsm10B - MOVW (R9)(R11*1), R10 - CMPW (BX)(R11*1), R10 + MOVW (R10)(R12*1), R11 + CMPW (SI)(R12*1), R11 JNE matchlen_match1_repeat_extend_encodeBlockAsm10B - LEAL 2(R11), R11 - SUBL $0x02, R8 + LEAL 2(R12), R12 + SUBL $0x02, R9 JZ repeat_extend_forward_end_encodeBlockAsm10B matchlen_match1_repeat_extend_encodeBlockAsm10B: - MOVB (R9)(R11*1), R10 - CMPB (BX)(R11*1), R10 + MOVB (R10)(R12*1), R11 + CMPB (SI)(R12*1), R11 JNE repeat_extend_forward_end_encodeBlockAsm10B - LEAL 1(R11), R11 + LEAL 1(R12), R12 repeat_extend_forward_end_encodeBlockAsm10B: - ADDL R11, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI - TESTL DI, DI + ADDL R12, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI + TESTL R8, R8 JZ repeat_as_copy_encodeBlockAsm10B // emitRepeat - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_match_repeat_encodeBlockAsm10B - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm10B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_match_repeat_encodeBlockAsm10B cant_repeat_two_offset_match_repeat_encodeBlockAsm10B: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_match_repeat_encodeBlockAsm10B - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_three_match_repeat_encodeBlockAsm10B: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_match_repeat_encodeBlockAsm10B: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_offset_match_repeat_encodeBlockAsm10B: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_as_copy_encodeBlockAsm10B: // emitCopy - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE long_offset_short_repeat_as_copy_encodeBlockAsm10B - MOVL $0x00000001, DI - LEAL 16(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX - SUBL $0x08, BX + MOVL $0x00000001, R8 + LEAL 16(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX + SUBL $0x08, SI // emitRepeat - LEAL -4(BX), BX + LEAL -4(SI), SI JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm10B long_offset_short_repeat_as_copy_encodeBlockAsm10B: - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX // emitRepeat - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm10B two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeBlockAsm10B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeBlockAsm10B - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm10B emit_copy_three_repeat_as_copy_encodeBlockAsm10B: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeBlockAsm10B: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeBlockAsm10B no_repeat_found_encodeBlockAsm10B: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBlockAsm10B - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeBlockAsm10B - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeBlockAsm10B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBlockAsm10B candidate3_match_encodeBlockAsm10B: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeBlockAsm10B candidate2_match_encodeBlockAsm10B: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeBlockAsm10B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBlockAsm10B match_extend_back_loop_encodeBlockAsm10B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBlockAsm10B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBlockAsm10B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBlockAsm10B JMP match_extend_back_loop_encodeBlockAsm10B match_extend_back_end_encodeBlockAsm10B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBlockAsm10B: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeBlockAsm10B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeBlockAsm10B - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeBlockAsm10B JB three_bytes_match_emit_encodeBlockAsm10B three_bytes_match_emit_encodeBlockAsm10B: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBlockAsm10B two_bytes_match_emit_encodeBlockAsm10B: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeBlockAsm10B JMP memmove_long_match_emit_encodeBlockAsm10B one_byte_match_emit_encodeBlockAsm10B: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBlockAsm10B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBlockAsm10B: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeBlockAsm10B memmove_long_match_emit_encodeBlockAsm10B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeBlockAsm10B: match_nolit_loop_encodeBlockAsm10B: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeBlockAsm10B: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeBlockAsm10B - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm10B - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeBlockAsm10B - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeBlockAsm10B matchlen_bsf_16match_nolit_encodeBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeBlockAsm10B matchlen_match8_match_nolit_encodeBlockAsm10B: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeBlockAsm10B - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm10B - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeBlockAsm10B matchlen_bsf_8_match_nolit_encodeBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeBlockAsm10B matchlen_match4_match_nolit_encodeBlockAsm10B: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeBlockAsm10B - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeBlockAsm10B - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeBlockAsm10B: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeBlockAsm10B JB match_nolit_end_encodeBlockAsm10B - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeBlockAsm10B - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeBlockAsm10B matchlen_match1_match_nolit_encodeBlockAsm10B: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeBlockAsm10B - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeBlockAsm10B: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeBlockAsm10B - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE long_offset_short_match_nolit_encodeBlockAsm10B - MOVL $0x00000001, SI - LEAL 16(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX - SUBL $0x08, R9 + MOVL $0x00000001, DI + LEAL 16(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R10 // emitRepeat - LEAL -4(R9), R9 + LEAL -4(R10), R10 JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short_2b - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short_2b - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short_2b: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short_2b: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B long_offset_short_match_nolit_encodeBlockAsm10B: - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX // emitRepeat - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B two_byte_offset_short_match_nolit_encodeBlockAsm10B: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeBlockAsm10B - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeBlockAsm10B - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B emit_copy_three_match_nolit_encodeBlockAsm10B: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeBlockAsm10B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBlockAsm10B - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBlockAsm10B: - MOVQ $0x9e3779b1, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x20, DI - IMULQ R8, DI - SHRQ $0x36, DI - SHLQ $0x20, BX - IMULQ R8, BX - SHRQ $0x36, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x9e3779b1, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x20, R8 + IMULQ R9, R8 + SHRQ $0x36, R8 + SHLQ $0x20, SI + IMULQ R9, SI + SHRQ $0x36, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeBlockAsm10B - INCL CX + INCL DX JMP search_loop_encodeBlockAsm10B emit_remainder_encodeBlockAsm10B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBlockAsm10B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBlockAsm10B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBlockAsm10B @@ -4793,26 +4797,26 @@ emit_remainder_ok_encodeBlockAsm10B: JB three_bytes_emit_remainder_encodeBlockAsm10B three_bytes_emit_remainder_encodeBlockAsm10B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBlockAsm10B two_bytes_emit_remainder_encodeBlockAsm10B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBlockAsm10B JMP memmove_long_emit_remainder_encodeBlockAsm10B one_byte_emit_remainder_encodeBlockAsm10B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -4828,73 +4832,73 @@ memmove_emit_remainder_encodeBlockAsm10B: JMP emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm10B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBlockAsm10B memmove_long_emit_remainder_encodeBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back: MOVOU (SI), X4 @@ -4908,943 +4912,944 @@ emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBlockAsm10B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBlockAsm8B(dst []byte, src []byte) int +// func encodeBlockAsm8B(dst []byte, src []byte, tmp *[1024]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBlockAsm8B(SB), $1048-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000008, CX - LEAQ 24(SP), DX +TEXT ·encodeBlockAsm8B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000008, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBlockAsm8B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBlockAsm8B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeBlockAsm8B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x04, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x04, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBlockAsm8B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x9e3779b1, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x38, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x9e3779b1, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x20, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x38, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x38, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 - JNE no_repeat_found_encodeBlockAsm8B - LEAL 1(CX), SI - MOVL 12(SP), DI - MOVL SI, BX - SUBL 16(SP), BX + SHLQ $0x20, R11 + IMULQ R9, R11 + SHRQ $0x38, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x20, R10 + IMULQ R9, R10 + SHRQ $0x38, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 + JNE no_repeat_found_encodeBlockAsm8B + LEAL 1(DX), DI + MOVL 12(SP), R8 + MOVL DI, SI + SUBL 16(SP), SI JZ repeat_extend_back_end_encodeBlockAsm8B repeat_extend_back_loop_encodeBlockAsm8B: - CMPL SI, DI + CMPL DI, R8 JBE repeat_extend_back_end_encodeBlockAsm8B - MOVB -1(DX)(BX*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(SI*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeBlockAsm8B - LEAL -1(SI), SI - DECL BX + LEAL -1(DI), DI + DECL SI JNZ repeat_extend_back_loop_encodeBlockAsm8B repeat_extend_back_end_encodeBlockAsm8B: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeBlockAsm8B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeBlockAsm8B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeBlockAsm8B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeBlockAsm8B JB three_bytes_repeat_emit_encodeBlockAsm8B three_bytes_repeat_emit_encodeBlockAsm8B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeBlockAsm8B two_bytes_repeat_emit_encodeBlockAsm8B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeBlockAsm8B JMP memmove_long_repeat_emit_encodeBlockAsm8B one_byte_repeat_emit_encodeBlockAsm8B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeBlockAsm8B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_repeat_emit_encodeBlockAsm8B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeBlockAsm8B memmove_long_repeat_emit_encodeBlockAsm8B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R12 + SHRQ $0x05, R12 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R13 + SUBQ R11, R13 + DECQ R12 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(R10)(R13*1), R11 + LEAQ -32(CX)(R13*1), R14 emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) + ADDQ $0x20, R14 + ADDQ $0x20, R11 ADDQ $0x20, R13 - ADDQ $0x20, R10 - ADDQ $0x20, R12 - DECQ R11 + DECQ R12 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R12*1), X4 - MOVOU -16(R9)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R8, R12 + MOVOU -32(R10)(R13*1), X4 + MOVOU -16(R10)(R13*1), X5 + MOVOA X4, -32(CX)(R13*1) + MOVOA X5, -16(CX)(R13*1) + ADDQ $0x20, R13 + CMPQ R9, R13 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeBlockAsm8B: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R9 + SUBL DX, R9 + LEAQ (BX)(DX*1), R10 + LEAQ (BX)(SI*1), SI // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_repeat_extend_encodeBlockAsm8B: - CMPL R8, $0x10 + CMPL R9, $0x10 JB matchlen_match8_repeat_extend_encodeBlockAsm8B - MOVQ (R9)(R11*1), R10 - MOVQ 8(R9)(R11*1), R12 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + MOVQ 8(R10)(R12*1), R13 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm8B - XORQ 8(BX)(R11*1), R12 + XORQ 8(SI)(R12*1), R13 JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm8B - LEAL -16(R8), R8 - LEAL 16(R11), R11 + LEAL -16(R9), R9 + LEAL 16(R12), R12 JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm8B matchlen_bsf_16repeat_extend_encodeBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm8B matchlen_match8_repeat_extend_encodeBlockAsm8B: - CMPL R8, $0x08 + CMPL R9, $0x08 JB matchlen_match4_repeat_extend_encodeBlockAsm8B - MOVQ (R9)(R11*1), R10 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm8B - LEAL -8(R8), R8 - LEAL 8(R11), R11 + LEAL -8(R9), R9 + LEAL 8(R12), R12 JMP matchlen_match4_repeat_extend_encodeBlockAsm8B matchlen_bsf_8_repeat_extend_encodeBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm8B matchlen_match4_repeat_extend_encodeBlockAsm8B: - CMPL R8, $0x04 + CMPL R9, $0x04 JB matchlen_match2_repeat_extend_encodeBlockAsm8B - MOVL (R9)(R11*1), R10 - CMPL (BX)(R11*1), R10 + MOVL (R10)(R12*1), R11 + CMPL (SI)(R12*1), R11 JNE matchlen_match2_repeat_extend_encodeBlockAsm8B - LEAL -4(R8), R8 - LEAL 4(R11), R11 + LEAL -4(R9), R9 + LEAL 4(R12), R12 matchlen_match2_repeat_extend_encodeBlockAsm8B: - CMPL R8, $0x01 + CMPL R9, $0x01 JE matchlen_match1_repeat_extend_encodeBlockAsm8B JB repeat_extend_forward_end_encodeBlockAsm8B - MOVW (R9)(R11*1), R10 - CMPW (BX)(R11*1), R10 + MOVW (R10)(R12*1), R11 + CMPW (SI)(R12*1), R11 JNE matchlen_match1_repeat_extend_encodeBlockAsm8B - LEAL 2(R11), R11 - SUBL $0x02, R8 + LEAL 2(R12), R12 + SUBL $0x02, R9 JZ repeat_extend_forward_end_encodeBlockAsm8B matchlen_match1_repeat_extend_encodeBlockAsm8B: - MOVB (R9)(R11*1), R10 - CMPB (BX)(R11*1), R10 + MOVB (R10)(R12*1), R11 + CMPB (SI)(R12*1), R11 JNE repeat_extend_forward_end_encodeBlockAsm8B - LEAL 1(R11), R11 + LEAL 1(R12), R12 repeat_extend_forward_end_encodeBlockAsm8B: - ADDL R11, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI - TESTL DI, DI + ADDL R12, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI + TESTL R8, R8 JZ repeat_as_copy_encodeBlockAsm8B // emitRepeat - MOVL BX, SI - LEAL -4(BX), BX - CMPL SI, $0x08 + MOVL SI, DI + LEAL -4(SI), SI + CMPL DI, $0x08 JBE repeat_two_match_repeat_encodeBlockAsm8B - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm8B cant_repeat_two_offset_match_repeat_encodeBlockAsm8B: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_match_repeat_encodeBlockAsm8B - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm8B repeat_three_match_repeat_encodeBlockAsm8B: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm8B repeat_two_match_repeat_encodeBlockAsm8B: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm8B - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm8B repeat_as_copy_encodeBlockAsm8B: // emitCopy - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE long_offset_short_repeat_as_copy_encodeBlockAsm8B - MOVL $0x00000001, DI - LEAL 16(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX - SUBL $0x08, BX + MOVL $0x00000001, R8 + LEAL 16(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX + SUBL $0x08, SI // emitRepeat - LEAL -4(BX), BX + LEAL -4(SI), SI JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b - MOVL BX, SI - LEAL -4(BX), BX - CMPL SI, $0x08 + MOVL SI, DI + LEAL -4(SI), SI + CMPL DI, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm8B repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm8B repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm8B - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm8B long_offset_short_repeat_as_copy_encodeBlockAsm8B: - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX // emitRepeat - MOVL BX, SI - LEAL -4(BX), BX - CMPL SI, $0x08 + MOVL SI, DI + LEAL -4(SI), SI + CMPL DI, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm8B repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm8B repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm8B - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm8B two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeBlockAsm8B - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm8B emit_copy_three_repeat_as_copy_encodeBlockAsm8B: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeBlockAsm8B: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeBlockAsm8B no_repeat_found_encodeBlockAsm8B: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBlockAsm8B - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeBlockAsm8B - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeBlockAsm8B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBlockAsm8B candidate3_match_encodeBlockAsm8B: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeBlockAsm8B candidate2_match_encodeBlockAsm8B: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeBlockAsm8B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBlockAsm8B match_extend_back_loop_encodeBlockAsm8B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBlockAsm8B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBlockAsm8B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBlockAsm8B JMP match_extend_back_loop_encodeBlockAsm8B match_extend_back_end_encodeBlockAsm8B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBlockAsm8B: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeBlockAsm8B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeBlockAsm8B - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeBlockAsm8B JB three_bytes_match_emit_encodeBlockAsm8B three_bytes_match_emit_encodeBlockAsm8B: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBlockAsm8B two_bytes_match_emit_encodeBlockAsm8B: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeBlockAsm8B JMP memmove_long_match_emit_encodeBlockAsm8B one_byte_match_emit_encodeBlockAsm8B: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBlockAsm8B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBlockAsm8B: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeBlockAsm8B memmove_long_match_emit_encodeBlockAsm8B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) - ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 + ADDQ $0x20, R12 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeBlockAsm8B: match_nolit_loop_encodeBlockAsm8B: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeBlockAsm8B: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeBlockAsm8B - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm8B - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeBlockAsm8B - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeBlockAsm8B matchlen_bsf_16match_nolit_encodeBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeBlockAsm8B matchlen_match8_match_nolit_encodeBlockAsm8B: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeBlockAsm8B - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm8B - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeBlockAsm8B matchlen_bsf_8_match_nolit_encodeBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeBlockAsm8B matchlen_match4_match_nolit_encodeBlockAsm8B: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeBlockAsm8B - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeBlockAsm8B - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeBlockAsm8B: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeBlockAsm8B JB match_nolit_end_encodeBlockAsm8B - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeBlockAsm8B - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeBlockAsm8B matchlen_match1_match_nolit_encodeBlockAsm8B: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeBlockAsm8B - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeBlockAsm8B: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeBlockAsm8B - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE long_offset_short_match_nolit_encodeBlockAsm8B - MOVL $0x00000001, SI - LEAL 16(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX - SUBL $0x08, R9 + MOVL $0x00000001, DI + LEAL 16(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R10 // emitRepeat - LEAL -4(R9), R9 + LEAL -4(R10), R10 JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short_2b - MOVL R9, BX - LEAL -4(R9), R9 - CMPL BX, $0x08 + MOVL R10, SI + LEAL -4(R10), R10 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short_2b - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short_2b: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short_2b - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short_2b: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short_2b: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B long_offset_short_match_nolit_encodeBlockAsm8B: - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX // emitRepeat - MOVL R9, BX - LEAL -4(R9), R9 - CMPL BX, $0x08 + MOVL R10, SI + LEAL -4(R10), R10 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B two_byte_offset_short_match_nolit_encodeBlockAsm8B: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeBlockAsm8B - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B emit_copy_three_match_nolit_encodeBlockAsm8B: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeBlockAsm8B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBlockAsm8B - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBlockAsm8B: - MOVQ $0x9e3779b1, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x20, DI - IMULQ R8, DI - SHRQ $0x38, DI - SHLQ $0x20, BX - IMULQ R8, BX - SHRQ $0x38, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x9e3779b1, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x20, R8 + IMULQ R9, R8 + SHRQ $0x38, R8 + SHLQ $0x20, SI + IMULQ R9, SI + SHRQ $0x38, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeBlockAsm8B - INCL CX + INCL DX JMP search_loop_encodeBlockAsm8B emit_remainder_encodeBlockAsm8B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBlockAsm8B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBlockAsm8B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBlockAsm8B @@ -5853,26 +5858,26 @@ emit_remainder_ok_encodeBlockAsm8B: JB three_bytes_emit_remainder_encodeBlockAsm8B three_bytes_emit_remainder_encodeBlockAsm8B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBlockAsm8B two_bytes_emit_remainder_encodeBlockAsm8B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBlockAsm8B JMP memmove_long_emit_remainder_encodeBlockAsm8B one_byte_emit_remainder_encodeBlockAsm8B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -5888,73 +5893,73 @@ memmove_emit_remainder_encodeBlockAsm8B: JMP emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm8B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBlockAsm8B memmove_long_emit_remainder_encodeBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back: MOVOU (SI), X4 @@ -5968,961 +5973,962 @@ emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBlockAsm8B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBetterBlockAsm(dst []byte, src []byte) int +// func encodeBetterBlockAsm(dst []byte, src []byte, tmp *[589824]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBetterBlockAsm(SB), $589848-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00001200, CX - LEAQ 24(SP), DX +TEXT ·encodeBetterBlockAsm(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00001200, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBetterBlockAsm: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBetterBlockAsm MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -6(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -6(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeBetterBlockAsm: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x07, BX - CMPL BX, $0x63 + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x07, SI + CMPL SI, $0x63 JBE check_maxskip_ok_encodeBetterBlockAsm - LEAL 100(CX), BX + LEAL 100(DX), SI JMP check_maxskip_cont_encodeBetterBlockAsm check_maxskip_ok_encodeBetterBlockAsm: - LEAL 1(CX)(BX*1), BX + LEAL 1(DX)(SI*1), SI check_maxskip_cont_encodeBetterBlockAsm: - CMPL BX, 8(SP) + CMPL SI, 8(SP) JAE emit_remainder_encodeBetterBlockAsm - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x00cf1bbcdcbfa563, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x2f, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 524312(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 524312(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x00cf1bbcdcbfa563, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x2f, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x32, R11 + MOVL (AX)(R10*4), SI + MOVL 524288(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 524288(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeBetterBlockAsm - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeBetterBlockAsm - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeBetterBlockAsm no_short_found_encodeBetterBlockAsm: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeBetterBlockAsm - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeBetterBlockAsm - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBetterBlockAsm candidateS_match_encodeBetterBlockAsm: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x2f, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x2f, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBetterBlockAsm - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeBetterBlockAsm: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBetterBlockAsm match_extend_back_loop_encodeBetterBlockAsm: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBetterBlockAsm - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBetterBlockAsm - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBetterBlockAsm JMP match_extend_back_loop_encodeBetterBlockAsm match_extend_back_end_encodeBetterBlockAsm: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 5(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 5(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBetterBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBetterBlockAsm: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeBetterBlockAsm: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeBetterBlockAsm - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm matchlen_bsf_16match_nolit_encodeBetterBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeBetterBlockAsm matchlen_match8_match_nolit_encodeBetterBlockAsm: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeBetterBlockAsm - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeBetterBlockAsm matchlen_bsf_8_match_nolit_encodeBetterBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeBetterBlockAsm matchlen_match4_match_nolit_encodeBetterBlockAsm: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeBetterBlockAsm - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeBetterBlockAsm - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeBetterBlockAsm: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeBetterBlockAsm JB match_nolit_end_encodeBetterBlockAsm - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeBetterBlockAsm - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeBetterBlockAsm matchlen_match1_match_nolit_encodeBetterBlockAsm: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeBetterBlockAsm - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeBetterBlockAsm: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - CMPL 16(SP), DI + CMPL 16(SP), R8 JEQ match_is_repeat_encodeBetterBlockAsm - CMPL R11, $0x01 + CMPL R12, $0x01 JA match_length_ok_encodeBetterBlockAsm - CMPL DI, $0x0000ffff + CMPL R8, $0x0000ffff JBE match_length_ok_encodeBetterBlockAsm - MOVL 20(SP), CX - INCL CX + MOVL 20(SP), DX + INCL DX JMP search_loop_encodeBetterBlockAsm match_length_ok_encodeBetterBlockAsm: - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeBetterBlockAsm - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeBetterBlockAsm - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_match_emit_encodeBetterBlockAsm - CMPL BX, $0x01000000 + CMPL SI, $0x01000000 JB four_bytes_match_emit_encodeBetterBlockAsm - MOVB $0xfc, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP memmove_long_match_emit_encodeBetterBlockAsm four_bytes_match_emit_encodeBetterBlockAsm: - MOVL BX, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX + MOVL SI, R11 + SHRL $0x10, R11 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R11, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_encodeBetterBlockAsm three_bytes_match_emit_encodeBetterBlockAsm: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBetterBlockAsm two_bytes_match_emit_encodeBetterBlockAsm: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeBetterBlockAsm JMP memmove_long_match_emit_encodeBetterBlockAsm one_byte_match_emit_encodeBetterBlockAsm: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBetterBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBetterBlockAsm: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeBetterBlockAsm memmove_long_match_emit_encodeBetterBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeBetterBlockAsm: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy - CMPL DI, $0x00010000 + CMPL R8, $0x00010000 JB two_byte_offset_match_nolit_encodeBetterBlockAsm - CMPL R11, $0x40 + CMPL R12, $0x40 JBE four_bytes_remain_match_nolit_encodeBetterBlockAsm - MOVB $0xff, (AX) - MOVL DI, 1(AX) - LEAL -64(R11), R11 - ADDQ $0x05, AX - CMPL R11, $0x04 + MOVB $0xff, (CX) + MOVL R8, 1(CX) + LEAL -64(R12), R12 + ADDQ $0x05, CX + CMPL R12, $0x04 JB four_bytes_remain_match_nolit_encodeBetterBlockAsm // emitRepeat emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy: - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy - CMPL R11, $0x0100ffff + CMPL R12, $0x0100ffff JB repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy - LEAL -16842747(R11), R11 - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(R12), R12 + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy: - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm four_bytes_remain_match_nolit_encodeBetterBlockAsm: - TESTL R11, R11 + TESTL R12, R12 JZ match_nolit_emitcopy_end_encodeBetterBlockAsm - XORL BX, BX - LEAL -1(BX)(R11*4), R11 - MOVB R11, (AX) - MOVL DI, 1(AX) - ADDQ $0x05, AX + XORL SI, SI + LEAL -1(SI)(R12*4), R12 + MOVB R12, (CX) + MOVL R8, 1(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm two_byte_offset_match_nolit_encodeBetterBlockAsm: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE long_offset_short_match_nolit_encodeBetterBlockAsm - MOVL $0x00000001, BX - LEAL 16(BX), BX - MOVB DI, 1(AX) - MOVL DI, R8 - SHRL $0x08, R8 - SHLL $0x05, R8 - ORL R8, BX - MOVB BL, (AX) - ADDQ $0x02, AX - SUBL $0x08, R11 + MOVL $0x00000001, SI + LEAL 16(SI), SI + MOVB R8, 1(CX) + MOVL R8, R9 + SHRL $0x08, R9 + SHLL $0x05, R9 + ORL R9, SI + MOVB SI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R12 // emitRepeat - LEAL -4(R11), R11 + LEAL -4(R12), R12 JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b - CMPL R11, $0x0100ffff + CMPL R12, $0x0100ffff JB repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b - LEAL -16842747(R11), R11 - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(R12), R12 + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm long_offset_short_match_nolit_encodeBetterBlockAsm: - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX // emitRepeat emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short: - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short - CMPL R11, $0x0100ffff + CMPL R12, $0x0100ffff JB repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short - LEAL -16842747(R11), R11 - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(R12), R12 + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short: - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm two_byte_offset_short_match_nolit_encodeBetterBlockAsm: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeBetterBlockAsm - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeBetterBlockAsm - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm emit_copy_three_match_nolit_encodeBetterBlockAsm: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm match_is_repeat_encodeBetterBlockAsm: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_repeat_encodeBetterBlockAsm - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_repeat_encodeBetterBlockAsm - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_match_emit_repeat_encodeBetterBlockAsm - CMPL BX, $0x01000000 + CMPL SI, $0x01000000 JB four_bytes_match_emit_repeat_encodeBetterBlockAsm - MOVB $0xfc, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm four_bytes_match_emit_repeat_encodeBetterBlockAsm: - MOVL BX, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX + MOVL SI, R11 + SHRL $0x10, R11 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R11, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm three_bytes_match_emit_repeat_encodeBetterBlockAsm: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm two_bytes_match_emit_repeat_encodeBetterBlockAsm: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_repeat_encodeBetterBlockAsm JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm one_byte_match_emit_repeat_encodeBetterBlockAsm: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_repeat_encodeBetterBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm memmove_long_match_emit_repeat_encodeBetterBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitRepeat emit_repeat_again_match_nolit_repeat_encodeBetterBlockAsm: - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_repeat_encodeBetterBlockAsm - CMPL R11, $0x0100ffff + CMPL R12, $0x0100ffff JB repeat_five_match_nolit_repeat_encodeBetterBlockAsm - LEAL -16842747(R11), R11 - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(R12), R12 + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_nolit_repeat_encodeBetterBlockAsm repeat_five_match_nolit_repeat_encodeBetterBlockAsm: - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_four_match_nolit_repeat_encodeBetterBlockAsm: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_three_match_nolit_repeat_encodeBetterBlockAsm: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_match_nolit_repeat_encodeBetterBlockAsm: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX match_nolit_emitcopy_end_encodeBetterBlockAsm: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBetterBlockAsm - CMPQ AX, (SP) - JB match_nolit_dst_ok_encodeBetterBlockAsm - MOVQ $0x00000000, ret+48(FP) - RET - -match_nolit_dst_ok_encodeBetterBlockAsm: - MOVQ $0x00cf1bbcdcbfa563, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x2f, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x32, R10 - SHLQ $0x08, R11 - IMULQ BX, R11 - SHRQ $0x2f, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x32, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 524312(SP)(R10*4) - MOVL R13, 524312(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + CMPQ CX, (SP) + JB match_nolit_dst_ok_encodeBetterBlockAsm + MOVQ $0x00000000, ret+56(FP) + RET + +match_nolit_dst_ok_encodeBetterBlockAsm: + MOVQ $0x00cf1bbcdcbfa563, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x08, R10 + IMULQ SI, R10 + SHRQ $0x2f, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x32, R11 + SHLQ $0x08, R12 + IMULQ SI, R12 + SHRQ $0x2f, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x32, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 524288(AX)(R11*4) + MOVL R14, 524288(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeBetterBlockAsm: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeBetterBlockAsm - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x2f, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x08, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x2f, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x08, R11 + IMULQ SI, R11 + SHRQ $0x2f, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeBetterBlockAsm emit_remainder_encodeBetterBlockAsm: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 5(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 5(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBetterBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBetterBlockAsm: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBetterBlockAsm @@ -6932,41 +6938,41 @@ emit_remainder_ok_encodeBetterBlockAsm: JB three_bytes_emit_remainder_encodeBetterBlockAsm CMPL DX, $0x01000000 JB four_bytes_emit_remainder_encodeBetterBlockAsm - MOVB $0xfc, (AX) - MOVL DX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL DX, 1(CX) + ADDQ $0x05, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm four_bytes_emit_remainder_encodeBetterBlockAsm: MOVL DX, BX SHRL $0x10, BX - MOVB $0xf8, (AX) - MOVW DX, 1(AX) - MOVB BL, 3(AX) - ADDQ $0x04, AX + MOVB $0xf8, (CX) + MOVW DX, 1(CX) + MOVB BL, 3(CX) + ADDQ $0x04, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm three_bytes_emit_remainder_encodeBetterBlockAsm: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm two_bytes_emit_remainder_encodeBetterBlockAsm: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBetterBlockAsm JMP memmove_long_emit_remainder_encodeBetterBlockAsm one_byte_emit_remainder_encodeBetterBlockAsm: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBetterBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -6982,73 +6988,73 @@ memmove_emit_remainder_encodeBetterBlockAsm: JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBetterBlockAsm: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm memmove_long_emit_remainder_encodeBetterBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back: MOVOU (SI), X4 @@ -7062,903 +7068,904 @@ emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBetterBlockAsm: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBetterBlockAsm4MB(dst []byte, src []byte) int +// func encodeBetterBlockAsm4MB(dst []byte, src []byte, tmp *[589824]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBetterBlockAsm4MB(SB), $589848-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00001200, CX - LEAQ 24(SP), DX +TEXT ·encodeBetterBlockAsm4MB(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00001200, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBetterBlockAsm4MB: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBetterBlockAsm4MB MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -6(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -6(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeBetterBlockAsm4MB: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x07, BX - CMPL BX, $0x63 + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x07, SI + CMPL SI, $0x63 JBE check_maxskip_ok_encodeBetterBlockAsm4MB - LEAL 100(CX), BX + LEAL 100(DX), SI JMP check_maxskip_cont_encodeBetterBlockAsm4MB check_maxskip_ok_encodeBetterBlockAsm4MB: - LEAL 1(CX)(BX*1), BX + LEAL 1(DX)(SI*1), SI check_maxskip_cont_encodeBetterBlockAsm4MB: - CMPL BX, 8(SP) + CMPL SI, 8(SP) JAE emit_remainder_encodeBetterBlockAsm4MB - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x00cf1bbcdcbfa563, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x2f, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 524312(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 524312(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x00cf1bbcdcbfa563, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x2f, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x32, R11 + MOVL (AX)(R10*4), SI + MOVL 524288(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 524288(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeBetterBlockAsm4MB - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeBetterBlockAsm4MB - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeBetterBlockAsm4MB no_short_found_encodeBetterBlockAsm4MB: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeBetterBlockAsm4MB - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeBetterBlockAsm4MB - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBetterBlockAsm4MB candidateS_match_encodeBetterBlockAsm4MB: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x2f, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x2f, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBetterBlockAsm4MB - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeBetterBlockAsm4MB: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBetterBlockAsm4MB match_extend_back_loop_encodeBetterBlockAsm4MB: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBetterBlockAsm4MB - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBetterBlockAsm4MB - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBetterBlockAsm4MB JMP match_extend_back_loop_encodeBetterBlockAsm4MB match_extend_back_end_encodeBetterBlockAsm4MB: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 4(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 4(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBetterBlockAsm4MB - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBetterBlockAsm4MB: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeBetterBlockAsm4MB: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeBetterBlockAsm4MB - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm4MB - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm4MB matchlen_bsf_16match_nolit_encodeBetterBlockAsm4MB: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeBetterBlockAsm4MB matchlen_match8_match_nolit_encodeBetterBlockAsm4MB: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeBetterBlockAsm4MB - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeBetterBlockAsm4MB matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeBetterBlockAsm4MB matchlen_match4_match_nolit_encodeBetterBlockAsm4MB: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeBetterBlockAsm4MB - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeBetterBlockAsm4MB - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeBetterBlockAsm4MB: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeBetterBlockAsm4MB JB match_nolit_end_encodeBetterBlockAsm4MB - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeBetterBlockAsm4MB - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeBetterBlockAsm4MB matchlen_match1_match_nolit_encodeBetterBlockAsm4MB: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeBetterBlockAsm4MB - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeBetterBlockAsm4MB: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - CMPL 16(SP), DI + CMPL 16(SP), R8 JEQ match_is_repeat_encodeBetterBlockAsm4MB - CMPL R11, $0x01 + CMPL R12, $0x01 JA match_length_ok_encodeBetterBlockAsm4MB - CMPL DI, $0x0000ffff + CMPL R8, $0x0000ffff JBE match_length_ok_encodeBetterBlockAsm4MB - MOVL 20(SP), CX - INCL CX + MOVL 20(SP), DX + INCL DX JMP search_loop_encodeBetterBlockAsm4MB match_length_ok_encodeBetterBlockAsm4MB: - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm4MB - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeBetterBlockAsm4MB - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeBetterBlockAsm4MB - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_match_emit_encodeBetterBlockAsm4MB - MOVL BX, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX + MOVL SI, R11 + SHRL $0x10, R11 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R11, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_encodeBetterBlockAsm4MB three_bytes_match_emit_encodeBetterBlockAsm4MB: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBetterBlockAsm4MB two_bytes_match_emit_encodeBetterBlockAsm4MB: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeBetterBlockAsm4MB JMP memmove_long_match_emit_encodeBetterBlockAsm4MB one_byte_match_emit_encodeBetterBlockAsm4MB: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBetterBlockAsm4MB: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBetterBlockAsm4MB: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeBetterBlockAsm4MB memmove_long_match_emit_encodeBetterBlockAsm4MB: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeBetterBlockAsm4MB: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy - CMPL DI, $0x00010000 + CMPL R8, $0x00010000 JB two_byte_offset_match_nolit_encodeBetterBlockAsm4MB - CMPL R11, $0x40 + CMPL R12, $0x40 JBE four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB - MOVB $0xff, (AX) - MOVL DI, 1(AX) - LEAL -64(R11), R11 - ADDQ $0x05, AX - CMPL R11, $0x04 + MOVB $0xff, (CX) + MOVL R8, 1(CX) + LEAL -64(R12), R12 + ADDQ $0x05, CX + CMPL R12, $0x04 JB four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB: - TESTL R11, R11 + TESTL R12, R12 JZ match_nolit_emitcopy_end_encodeBetterBlockAsm4MB - XORL BX, BX - LEAL -1(BX)(R11*4), R11 - MOVB R11, (AX) - MOVL DI, 1(AX) - ADDQ $0x05, AX + XORL SI, SI + LEAL -1(SI)(R12*4), R12 + MOVB R12, (CX) + MOVL R8, 1(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB two_byte_offset_match_nolit_encodeBetterBlockAsm4MB: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm4MB - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE long_offset_short_match_nolit_encodeBetterBlockAsm4MB - MOVL $0x00000001, BX - LEAL 16(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX - SUBL $0x08, R11 + MOVL $0x00000001, SI + LEAL 16(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R12 // emitRepeat - LEAL -4(R11), R11 + LEAL -4(R12), R12 JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB long_offset_short_match_nolit_encodeBetterBlockAsm4MB: - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB two_byte_offset_short_match_nolit_encodeBetterBlockAsm4MB: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeBetterBlockAsm4MB - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeBetterBlockAsm4MB - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB emit_copy_three_match_nolit_encodeBetterBlockAsm4MB: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB match_is_repeat_encodeBetterBlockAsm4MB: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_repeat_encodeBetterBlockAsm4MB - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_repeat_encodeBetterBlockAsm4MB - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_match_emit_repeat_encodeBetterBlockAsm4MB - MOVL BX, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX + MOVL SI, R11 + SHRL $0x10, R11 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R11, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB three_bytes_match_emit_repeat_encodeBetterBlockAsm4MB: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB two_bytes_match_emit_repeat_encodeBetterBlockAsm4MB: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_repeat_encodeBetterBlockAsm4MB JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB one_byte_match_emit_repeat_encodeBetterBlockAsm4MB: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_repeat_encodeBetterBlockAsm4MB: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm4MB - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm4MB - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_repeat_encodeBetterBlockAsm4MB - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_four_match_nolit_repeat_encodeBetterBlockAsm4MB: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_three_match_nolit_repeat_encodeBetterBlockAsm4MB: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_match_nolit_repeat_encodeBetterBlockAsm4MB: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX match_nolit_emitcopy_end_encodeBetterBlockAsm4MB: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBetterBlockAsm4MB - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBetterBlockAsm4MB - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBetterBlockAsm4MB: - MOVQ $0x00cf1bbcdcbfa563, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x2f, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x32, R10 - SHLQ $0x08, R11 - IMULQ BX, R11 - SHRQ $0x2f, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x32, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 524312(SP)(R10*4) - MOVL R13, 524312(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x00cf1bbcdcbfa563, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x08, R10 + IMULQ SI, R10 + SHRQ $0x2f, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x32, R11 + SHLQ $0x08, R12 + IMULQ SI, R12 + SHRQ $0x2f, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x32, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 524288(AX)(R11*4) + MOVL R14, 524288(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeBetterBlockAsm4MB: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeBetterBlockAsm4MB - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x2f, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x08, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x2f, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x08, R11 + IMULQ SI, R11 + SHRQ $0x2f, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeBetterBlockAsm4MB emit_remainder_encodeBetterBlockAsm4MB: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 4(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 4(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBetterBlockAsm4MB - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBetterBlockAsm4MB: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBetterBlockAsm4MB @@ -7968,33 +7975,33 @@ emit_remainder_ok_encodeBetterBlockAsm4MB: JB three_bytes_emit_remainder_encodeBetterBlockAsm4MB MOVL DX, BX SHRL $0x10, BX - MOVB $0xf8, (AX) - MOVW DX, 1(AX) - MOVB BL, 3(AX) - ADDQ $0x04, AX + MOVB $0xf8, (CX) + MOVW DX, 1(CX) + MOVB BL, 3(CX) + ADDQ $0x04, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm4MB three_bytes_emit_remainder_encodeBetterBlockAsm4MB: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm4MB two_bytes_emit_remainder_encodeBetterBlockAsm4MB: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBetterBlockAsm4MB JMP memmove_long_emit_remainder_encodeBetterBlockAsm4MB one_byte_emit_remainder_encodeBetterBlockAsm4MB: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBetterBlockAsm4MB: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -8010,73 +8017,73 @@ memmove_emit_remainder_encodeBetterBlockAsm4MB: JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB memmove_long_emit_remainder_encodeBetterBlockAsm4MB: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back: MOVOU (SI), X4 @@ -8090,756 +8097,757 @@ emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBetterBlockAsm12B(dst []byte, src []byte) int +// func encodeBetterBlockAsm12B(dst []byte, src []byte, tmp *[81920]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBetterBlockAsm12B(SB), $81944-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000280, CX - LEAQ 24(SP), DX +TEXT ·encodeBetterBlockAsm12B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000280, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBetterBlockAsm12B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBetterBlockAsm12B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -6(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -6(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeBetterBlockAsm12B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x06, BX - LEAL 1(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x06, SI + LEAL 1(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBetterBlockAsm12B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x34, R10 - MOVL 24(SP)(R9*4), BX - MOVL 65560(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 65560(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x34, R11 + MOVL (AX)(R10*4), SI + MOVL 65536(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 65536(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeBetterBlockAsm12B - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeBetterBlockAsm12B - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeBetterBlockAsm12B no_short_found_encodeBetterBlockAsm12B: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeBetterBlockAsm12B - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeBetterBlockAsm12B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBetterBlockAsm12B candidateS_match_encodeBetterBlockAsm12B: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBetterBlockAsm12B - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeBetterBlockAsm12B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBetterBlockAsm12B match_extend_back_loop_encodeBetterBlockAsm12B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBetterBlockAsm12B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBetterBlockAsm12B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBetterBlockAsm12B JMP match_extend_back_loop_encodeBetterBlockAsm12B match_extend_back_end_encodeBetterBlockAsm12B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBetterBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBetterBlockAsm12B: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeBetterBlockAsm12B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeBetterBlockAsm12B - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm12B - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm12B matchlen_bsf_16match_nolit_encodeBetterBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeBetterBlockAsm12B matchlen_match8_match_nolit_encodeBetterBlockAsm12B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeBetterBlockAsm12B - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeBetterBlockAsm12B matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeBetterBlockAsm12B matchlen_match4_match_nolit_encodeBetterBlockAsm12B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeBetterBlockAsm12B - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeBetterBlockAsm12B - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeBetterBlockAsm12B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeBetterBlockAsm12B JB match_nolit_end_encodeBetterBlockAsm12B - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeBetterBlockAsm12B - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeBetterBlockAsm12B matchlen_match1_match_nolit_encodeBetterBlockAsm12B: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeBetterBlockAsm12B - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeBetterBlockAsm12B: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - CMPL 16(SP), DI + CMPL 16(SP), R8 JEQ match_is_repeat_encodeBetterBlockAsm12B - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm12B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeBetterBlockAsm12B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeBetterBlockAsm12B JB three_bytes_match_emit_encodeBetterBlockAsm12B three_bytes_match_emit_encodeBetterBlockAsm12B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBetterBlockAsm12B two_bytes_match_emit_encodeBetterBlockAsm12B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeBetterBlockAsm12B JMP memmove_long_match_emit_encodeBetterBlockAsm12B one_byte_match_emit_encodeBetterBlockAsm12B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBetterBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBetterBlockAsm12B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeBetterBlockAsm12B memmove_long_match_emit_encodeBetterBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeBetterBlockAsm12B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm12B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE long_offset_short_match_nolit_encodeBetterBlockAsm12B - MOVL $0x00000001, BX - LEAL 16(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX - SUBL $0x08, R11 + MOVL $0x00000001, SI + LEAL 16(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R12 // emitRepeat - LEAL -4(R11), R11 + LEAL -4(R12), R12 JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B long_offset_short_match_nolit_encodeBetterBlockAsm12B: - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B two_byte_offset_short_match_nolit_encodeBetterBlockAsm12B: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeBetterBlockAsm12B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeBetterBlockAsm12B - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B emit_copy_three_match_nolit_encodeBetterBlockAsm12B: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B match_is_repeat_encodeBetterBlockAsm12B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_repeat_encodeBetterBlockAsm12B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_repeat_encodeBetterBlockAsm12B JB three_bytes_match_emit_repeat_encodeBetterBlockAsm12B three_bytes_match_emit_repeat_encodeBetterBlockAsm12B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm12B two_bytes_match_emit_repeat_encodeBetterBlockAsm12B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_repeat_encodeBetterBlockAsm12B JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm12B one_byte_match_emit_repeat_encodeBetterBlockAsm12B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_repeat_encodeBetterBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B memmove_long_match_emit_repeat_encodeBetterBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm12B - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm12B - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_three_match_nolit_repeat_encodeBetterBlockAsm12B: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_match_nolit_repeat_encodeBetterBlockAsm12B: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX match_nolit_emitcopy_end_encodeBetterBlockAsm12B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBetterBlockAsm12B - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBetterBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBetterBlockAsm12B: - MOVQ $0x0000cf1bbcdcbf9b, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x32, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x34, R10 - SHLQ $0x10, R11 - IMULQ BX, R11 - SHRQ $0x32, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x34, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 65560(SP)(R10*4) - MOVL R13, 65560(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x0000cf1bbcdcbf9b, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x10, R10 + IMULQ SI, R10 + SHRQ $0x32, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x34, R11 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x32, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x34, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 65536(AX)(R11*4) + MOVL R14, 65536(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeBetterBlockAsm12B: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeBetterBlockAsm12B - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x32, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x10, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x32, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x10, R11 + IMULQ SI, R11 + SHRQ $0x32, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeBetterBlockAsm12B emit_remainder_encodeBetterBlockAsm12B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBetterBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBetterBlockAsm12B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm12B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBetterBlockAsm12B @@ -8848,26 +8856,26 @@ emit_remainder_ok_encodeBetterBlockAsm12B: JB three_bytes_emit_remainder_encodeBetterBlockAsm12B three_bytes_emit_remainder_encodeBetterBlockAsm12B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm12B two_bytes_emit_remainder_encodeBetterBlockAsm12B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBetterBlockAsm12B JMP memmove_long_emit_remainder_encodeBetterBlockAsm12B one_byte_emit_remainder_encodeBetterBlockAsm12B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBetterBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -8883,73 +8891,73 @@ memmove_emit_remainder_encodeBetterBlockAsm12B: JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm12B memmove_long_emit_remainder_encodeBetterBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back: MOVOU (SI), X4 @@ -8963,756 +8971,757 @@ emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 - JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBetterBlockAsm12B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBetterBlockAsm10B(dst []byte, src []byte) int +// func encodeBetterBlockAsm10B(dst []byte, src []byte, tmp *[20480]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBetterBlockAsm10B(SB), $20504-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x000000a0, CX - LEAQ 24(SP), DX +TEXT ·encodeBetterBlockAsm10B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x000000a0, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBetterBlockAsm10B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBetterBlockAsm10B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -6(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -6(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeBetterBlockAsm10B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x05, BX - LEAL 1(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x05, SI + LEAL 1(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBetterBlockAsm10B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x36, R10 - MOVL 24(SP)(R9*4), BX - MOVL 16408(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 16408(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x34, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x36, R11 + MOVL (AX)(R10*4), SI + MOVL 16384(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 16384(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeBetterBlockAsm10B - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeBetterBlockAsm10B - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeBetterBlockAsm10B no_short_found_encodeBetterBlockAsm10B: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeBetterBlockAsm10B - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeBetterBlockAsm10B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBetterBlockAsm10B candidateS_match_encodeBetterBlockAsm10B: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x34, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBetterBlockAsm10B - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeBetterBlockAsm10B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBetterBlockAsm10B match_extend_back_loop_encodeBetterBlockAsm10B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBetterBlockAsm10B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBetterBlockAsm10B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBetterBlockAsm10B JMP match_extend_back_loop_encodeBetterBlockAsm10B match_extend_back_end_encodeBetterBlockAsm10B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBetterBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBetterBlockAsm10B: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeBetterBlockAsm10B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeBetterBlockAsm10B - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm10B - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm10B matchlen_bsf_16match_nolit_encodeBetterBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeBetterBlockAsm10B matchlen_match8_match_nolit_encodeBetterBlockAsm10B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeBetterBlockAsm10B - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeBetterBlockAsm10B matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeBetterBlockAsm10B matchlen_match4_match_nolit_encodeBetterBlockAsm10B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeBetterBlockAsm10B - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeBetterBlockAsm10B - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeBetterBlockAsm10B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeBetterBlockAsm10B JB match_nolit_end_encodeBetterBlockAsm10B - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeBetterBlockAsm10B - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeBetterBlockAsm10B matchlen_match1_match_nolit_encodeBetterBlockAsm10B: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeBetterBlockAsm10B - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeBetterBlockAsm10B: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - CMPL 16(SP), DI + CMPL 16(SP), R8 JEQ match_is_repeat_encodeBetterBlockAsm10B - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm10B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeBetterBlockAsm10B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeBetterBlockAsm10B JB three_bytes_match_emit_encodeBetterBlockAsm10B three_bytes_match_emit_encodeBetterBlockAsm10B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBetterBlockAsm10B two_bytes_match_emit_encodeBetterBlockAsm10B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeBetterBlockAsm10B JMP memmove_long_match_emit_encodeBetterBlockAsm10B one_byte_match_emit_encodeBetterBlockAsm10B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBetterBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBetterBlockAsm10B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeBetterBlockAsm10B memmove_long_match_emit_encodeBetterBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeBetterBlockAsm10B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm10B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE long_offset_short_match_nolit_encodeBetterBlockAsm10B - MOVL $0x00000001, BX - LEAL 16(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX - SUBL $0x08, R11 + MOVL $0x00000001, SI + LEAL 16(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R12 // emitRepeat - LEAL -4(R11), R11 + LEAL -4(R12), R12 JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B long_offset_short_match_nolit_encodeBetterBlockAsm10B: - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B two_byte_offset_short_match_nolit_encodeBetterBlockAsm10B: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeBetterBlockAsm10B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeBetterBlockAsm10B - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B emit_copy_three_match_nolit_encodeBetterBlockAsm10B: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B match_is_repeat_encodeBetterBlockAsm10B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_repeat_encodeBetterBlockAsm10B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_repeat_encodeBetterBlockAsm10B JB three_bytes_match_emit_repeat_encodeBetterBlockAsm10B three_bytes_match_emit_repeat_encodeBetterBlockAsm10B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm10B two_bytes_match_emit_repeat_encodeBetterBlockAsm10B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_repeat_encodeBetterBlockAsm10B JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm10B one_byte_match_emit_repeat_encodeBetterBlockAsm10B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_repeat_encodeBetterBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B memmove_long_match_emit_repeat_encodeBetterBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm10B - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm10B - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_three_match_nolit_repeat_encodeBetterBlockAsm10B: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_match_nolit_repeat_encodeBetterBlockAsm10B: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX match_nolit_emitcopy_end_encodeBetterBlockAsm10B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBetterBlockAsm10B - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBetterBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBetterBlockAsm10B: - MOVQ $0x0000cf1bbcdcbf9b, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x34, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x36, R10 - SHLQ $0x10, R11 - IMULQ BX, R11 - SHRQ $0x34, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x36, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 16408(SP)(R10*4) - MOVL R13, 16408(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x0000cf1bbcdcbf9b, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x10, R10 + IMULQ SI, R10 + SHRQ $0x34, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x36, R11 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x34, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x36, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 16384(AX)(R11*4) + MOVL R14, 16384(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeBetterBlockAsm10B: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeBetterBlockAsm10B - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x34, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x10, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x34, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x10, R11 + IMULQ SI, R11 + SHRQ $0x34, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeBetterBlockAsm10B emit_remainder_encodeBetterBlockAsm10B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBetterBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBetterBlockAsm10B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm10B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBetterBlockAsm10B @@ -9721,26 +9730,26 @@ emit_remainder_ok_encodeBetterBlockAsm10B: JB three_bytes_emit_remainder_encodeBetterBlockAsm10B three_bytes_emit_remainder_encodeBetterBlockAsm10B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm10B two_bytes_emit_remainder_encodeBetterBlockAsm10B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBetterBlockAsm10B JMP memmove_long_emit_remainder_encodeBetterBlockAsm10B one_byte_emit_remainder_encodeBetterBlockAsm10B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBetterBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -9756,73 +9765,73 @@ memmove_emit_remainder_encodeBetterBlockAsm10B: JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm10B memmove_long_emit_remainder_encodeBetterBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back: MOVOU (SI), X4 @@ -9836,742 +9845,743 @@ emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBetterBlockAsm10B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBetterBlockAsm8B(dst []byte, src []byte) int +// func encodeBetterBlockAsm8B(dst []byte, src []byte, tmp *[5120]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBetterBlockAsm8B(SB), $5144-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000028, CX - LEAQ 24(SP), DX +TEXT ·encodeBetterBlockAsm8B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000028, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBetterBlockAsm8B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBetterBlockAsm8B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -6(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -6(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeBetterBlockAsm8B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x04, BX - LEAL 1(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x04, SI + LEAL 1(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBetterBlockAsm8B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x38, R10 - MOVL 24(SP)(R9*4), BX - MOVL 4120(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 4120(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x36, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x38, R11 + MOVL (AX)(R10*4), SI + MOVL 4096(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 4096(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeBetterBlockAsm8B - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeBetterBlockAsm8B - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeBetterBlockAsm8B no_short_found_encodeBetterBlockAsm8B: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeBetterBlockAsm8B - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeBetterBlockAsm8B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBetterBlockAsm8B candidateS_match_encodeBetterBlockAsm8B: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x36, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBetterBlockAsm8B - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeBetterBlockAsm8B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBetterBlockAsm8B match_extend_back_loop_encodeBetterBlockAsm8B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBetterBlockAsm8B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBetterBlockAsm8B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBetterBlockAsm8B JMP match_extend_back_loop_encodeBetterBlockAsm8B match_extend_back_end_encodeBetterBlockAsm8B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBetterBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBetterBlockAsm8B: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeBetterBlockAsm8B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeBetterBlockAsm8B - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm8B - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm8B matchlen_bsf_16match_nolit_encodeBetterBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeBetterBlockAsm8B matchlen_match8_match_nolit_encodeBetterBlockAsm8B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeBetterBlockAsm8B - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeBetterBlockAsm8B matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeBetterBlockAsm8B matchlen_match4_match_nolit_encodeBetterBlockAsm8B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeBetterBlockAsm8B - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeBetterBlockAsm8B - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeBetterBlockAsm8B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeBetterBlockAsm8B JB match_nolit_end_encodeBetterBlockAsm8B - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeBetterBlockAsm8B - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeBetterBlockAsm8B matchlen_match1_match_nolit_encodeBetterBlockAsm8B: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeBetterBlockAsm8B - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeBetterBlockAsm8B: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - CMPL 16(SP), DI + CMPL 16(SP), R8 JEQ match_is_repeat_encodeBetterBlockAsm8B - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm8B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeBetterBlockAsm8B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeBetterBlockAsm8B JB three_bytes_match_emit_encodeBetterBlockAsm8B three_bytes_match_emit_encodeBetterBlockAsm8B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBetterBlockAsm8B two_bytes_match_emit_encodeBetterBlockAsm8B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeBetterBlockAsm8B JMP memmove_long_match_emit_encodeBetterBlockAsm8B one_byte_match_emit_encodeBetterBlockAsm8B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBetterBlockAsm8B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBetterBlockAsm8B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeBetterBlockAsm8B memmove_long_match_emit_encodeBetterBlockAsm8B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeBetterBlockAsm8B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm8B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE long_offset_short_match_nolit_encodeBetterBlockAsm8B - MOVL $0x00000001, BX - LEAL 16(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX - SUBL $0x08, R11 + MOVL $0x00000001, SI + LEAL 16(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R12 // emitRepeat - LEAL -4(R11), R11 + LEAL -4(R12), R12 JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B long_offset_short_match_nolit_encodeBetterBlockAsm8B: - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B -repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX - JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX +repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short: + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX + JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B two_byte_offset_short_match_nolit_encodeBetterBlockAsm8B: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeBetterBlockAsm8B - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B emit_copy_three_match_nolit_encodeBetterBlockAsm8B: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B match_is_repeat_encodeBetterBlockAsm8B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_match_emit_repeat_encodeBetterBlockAsm8B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_repeat_encodeBetterBlockAsm8B JB three_bytes_match_emit_repeat_encodeBetterBlockAsm8B three_bytes_match_emit_repeat_encodeBetterBlockAsm8B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm8B two_bytes_match_emit_repeat_encodeBetterBlockAsm8B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_repeat_encodeBetterBlockAsm8B JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm8B one_byte_match_emit_repeat_encodeBetterBlockAsm8B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_repeat_encodeBetterBlockAsm8B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveShort - CMPQ DI, $0x04 + CMPQ R8, $0x04 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4 - CMPQ DI, $0x08 + CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4through7 - CMPQ DI, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_8through16 - CMPQ DI, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4: - MOVL (R8), R9 - MOVL R9, (AX) + MOVL (R9), R10 + MOVL R10, (CX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4through7: - MOVL (R8), R9 - MOVL -4(R8)(DI*1), R8 - MOVL R9, (AX) - MOVL R8, -4(AX)(DI*1) + MOVL (R9), R10 + MOVL -4(R9)(R8*1), R9 + MOVL R10, (CX) + MOVL R9, -4(CX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_8through16: - MOVQ (R8), R9 - MOVQ -8(R8)(DI*1), R8 - MOVQ R9, (AX) - MOVQ R8, -8(AX)(DI*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (CX) + MOVQ R9, -8(CX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_17through32: - MOVOU (R8), X0 - MOVOU -16(R8)(DI*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_33through64: - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B memmove_long_match_emit_repeat_encodeBetterBlockAsm8B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveLong - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVQ DI, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R12 - SUBQ R9, R12 - DECQ R10 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R13 + SUBQ R10, R13 + DECQ R11 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(R8)(R12*1), R9 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(R9)(R13*1), R10 + LEAQ -32(CX)(R13*1), R14 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) + ADDQ $0x20, R14 + ADDQ $0x20, R10 ADDQ $0x20, R13 - ADDQ $0x20, R9 - ADDQ $0x20, R12 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(R8)(R12*1), X4 - MOVOU -16(R8)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ DI, R12 + MOVOU -32(R9)(R13*1), X4 + MOVOU -16(R9)(R13*1), X5 + MOVOA X4, -32(CX)(R13*1) + MOVOA X5, -16(CX)(R13*1) + ADDQ $0x20, R13 + CMPQ R8, R13 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) + MOVQ SI, CX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm8B - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm8B cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm8B: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm8B - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B repeat_three_match_nolit_repeat_encodeBetterBlockAsm8B: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B repeat_two_match_nolit_repeat_encodeBetterBlockAsm8B: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX match_nolit_emitcopy_end_encodeBetterBlockAsm8B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBetterBlockAsm8B - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBetterBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBetterBlockAsm8B: - MOVQ $0x0000cf1bbcdcbf9b, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x36, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x38, R10 - SHLQ $0x10, R11 - IMULQ BX, R11 - SHRQ $0x36, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x38, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 4120(SP)(R10*4) - MOVL R13, 4120(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x0000cf1bbcdcbf9b, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x10, R10 + IMULQ SI, R10 + SHRQ $0x36, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x38, R11 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x36, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x38, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 4096(AX)(R11*4) + MOVL R14, 4096(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeBetterBlockAsm8B: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeBetterBlockAsm8B - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x36, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x10, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x36, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x10, R11 + IMULQ SI, R11 + SHRQ $0x36, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeBetterBlockAsm8B emit_remainder_encodeBetterBlockAsm8B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBetterBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBetterBlockAsm8B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm8B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBetterBlockAsm8B @@ -10580,26 +10590,26 @@ emit_remainder_ok_encodeBetterBlockAsm8B: JB three_bytes_emit_remainder_encodeBetterBlockAsm8B three_bytes_emit_remainder_encodeBetterBlockAsm8B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm8B two_bytes_emit_remainder_encodeBetterBlockAsm8B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBetterBlockAsm8B JMP memmove_long_emit_remainder_encodeBetterBlockAsm8B one_byte_emit_remainder_encodeBetterBlockAsm8B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBetterBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -10615,73 +10625,73 @@ memmove_emit_remainder_encodeBetterBlockAsm8B: JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm8B memmove_long_emit_remainder_encodeBetterBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back: MOVOU (SI), X4 @@ -10695,798 +10705,799 @@ emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBetterBlockAsm8B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBlockAsm(dst []byte, src []byte) int +// func encodeSnappyBlockAsm(dst []byte, src []byte, tmp *[65536]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBlockAsm(SB), $65560-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000200, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBlockAsm(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000200, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBlockAsm MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBlockAsm: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x06, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x06, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x10, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x10, R11 + IMULQ R9, R11 + SHRQ $0x32, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeSnappyBlockAsm - LEAL 1(CX), SI - MOVL 12(SP), BX - MOVL SI, DI - SUBL 16(SP), DI + LEAL 1(DX), DI + MOVL 12(SP), SI + MOVL DI, R8 + SUBL 16(SP), R8 JZ repeat_extend_back_end_encodeSnappyBlockAsm repeat_extend_back_loop_encodeSnappyBlockAsm: - CMPL SI, BX + CMPL DI, SI JBE repeat_extend_back_end_encodeSnappyBlockAsm - MOVB -1(DX)(DI*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(R8*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeSnappyBlockAsm - LEAL -1(SI), SI - DECL DI + LEAL -1(DI), DI + DECL R8 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm repeat_extend_back_end_encodeSnappyBlockAsm: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 5(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 5(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeSnappyBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeSnappyBlockAsm: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeSnappyBlockAsm - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeSnappyBlockAsm - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_repeat_emit_encodeSnappyBlockAsm - CMPL BX, $0x01000000 + CMPL SI, $0x01000000 JB four_bytes_repeat_emit_encodeSnappyBlockAsm - MOVB $0xfc, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm four_bytes_repeat_emit_encodeSnappyBlockAsm: - MOVL BX, R9 - SHRL $0x10, R9 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R9, 3(AX) - ADDQ $0x04, AX + MOVL SI, R10 + SHRL $0x10, R10 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R10, 3(CX) + ADDQ $0x04, CX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm three_bytes_repeat_emit_encodeSnappyBlockAsm: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm two_bytes_repeat_emit_encodeSnappyBlockAsm: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeSnappyBlockAsm JMP memmove_long_repeat_emit_encodeSnappyBlockAsm one_byte_repeat_emit_encodeSnappyBlockAsm: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeSnappyBlockAsm: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveShort - CMPQ DI, $0x08 + CMPQ R8, $0x08 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8 - CMPQ DI, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16 - CMPQ DI, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8: - MOVQ (R8), R9 - MOVQ R9, (AX) + MOVQ (R9), R10 + MOVQ R10, (CX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16: - MOVQ (R8), R9 - MOVQ -8(R8)(DI*1), R8 - MOVQ R9, (AX) - MOVQ R8, -8(AX)(DI*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (CX) + MOVQ R9, -8(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32: - MOVOU (R8), X0 - MOVOU -16(R8)(DI*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64: - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm memmove_long_repeat_emit_encodeSnappyBlockAsm: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveLong - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVQ DI, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(R8)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(R9)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(R8)(R11*1), X4 - MOVOU -16(R8)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ DI, R11 + MOVOU -32(R9)(R12*1), X4 + MOVOU -16(R9)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeSnappyBlockAsm: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), SI // matchLen - XORL R10, R10 + XORL R11, R11 matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm - MOVQ (R8)(R10*1), R9 - MOVQ 8(R8)(R10*1), R11 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm - XORQ 8(BX)(R10*1), R11 + XORQ 8(SI)(R11*1), R12 JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm - LEAL -16(DI), DI - LEAL 16(R10), R10 + LEAL -16(R8), R8 + LEAL 16(R11), R11 JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R12, R12 #else - BSFQ R11, R11 + BSFQ R12, R12 #endif - SARQ $0x03, R11 - LEAL 8(R10)(R11*1), R10 + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm matchlen_match8_repeat_extend_encodeSnappyBlockAsm: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm - LEAL -8(DI), DI - LEAL 8(R10), R10 + LEAL -8(R8), R8 + LEAL 8(R11), R11 JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R10, R10 #else - BSFQ R9, R9 + BSFQ R10, R10 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm matchlen_match4_repeat_extend_encodeSnappyBlockAsm: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm - MOVL (R8)(R10*1), R9 - CMPL (BX)(R10*1), R9 + MOVL (R9)(R11*1), R10 + CMPL (SI)(R11*1), R10 JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm - LEAL -4(DI), DI - LEAL 4(R10), R10 + LEAL -4(R8), R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_encodeSnappyBlockAsm: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm JB repeat_extend_forward_end_encodeSnappyBlockAsm - MOVW (R8)(R10*1), R9 - CMPW (BX)(R10*1), R9 + MOVW (R9)(R11*1), R10 + CMPW (SI)(R11*1), R10 JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm - LEAL 2(R10), R10 - SUBL $0x02, DI + LEAL 2(R11), R11 + SUBL $0x02, R8 JZ repeat_extend_forward_end_encodeSnappyBlockAsm matchlen_match1_repeat_extend_encodeSnappyBlockAsm: - MOVB (R8)(R10*1), R9 - CMPB (BX)(R10*1), R9 + MOVB (R9)(R11*1), R10 + CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_encodeSnappyBlockAsm - LEAL 1(R10), R10 + LEAL 1(R11), R11 repeat_extend_forward_end_encodeSnappyBlockAsm: - ADDL R10, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI + ADDL R11, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI // emitCopy - CMPL SI, $0x00010000 + CMPL DI, $0x00010000 JB two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm - MOVB $0xff, (AX) - MOVL SI, 1(AX) - LEAL -64(BX), BX - ADDQ $0x05, AX - CMPL BX, $0x04 + MOVB $0xff, (CX) + MOVL DI, 1(CX) + LEAL -64(SI), SI + ADDQ $0x05, CX + CMPL SI, $0x04 JB four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm JMP four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm: - TESTL BX, BX + TESTL SI, SI JZ repeat_end_emit_encodeSnappyBlockAsm - XORL DI, DI - LEAL -1(DI)(BX*4), BX - MOVB BL, (AX) - MOVL SI, 1(AX) - ADDQ $0x05, AX + XORL R8, R8 + LEAL -1(R8)(SI*4), SI + MOVB SI, (CX) + MOVL DI, 1(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeSnappyBlockAsm two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeSnappyBlockAsm emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeSnappyBlockAsm: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeSnappyBlockAsm no_repeat_found_encodeSnappyBlockAsm: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBlockAsm - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeSnappyBlockAsm - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeSnappyBlockAsm - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBlockAsm candidate3_match_encodeSnappyBlockAsm: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeSnappyBlockAsm candidate2_match_encodeSnappyBlockAsm: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeSnappyBlockAsm: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBlockAsm match_extend_back_loop_encodeSnappyBlockAsm: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBlockAsm - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBlockAsm - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBlockAsm JMP match_extend_back_loop_encodeSnappyBlockAsm match_extend_back_end_encodeSnappyBlockAsm: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 5(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 5(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBlockAsm: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeSnappyBlockAsm - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeSnappyBlockAsm - CMPL DI, $0x00010000 + CMPL R8, $0x00010000 JB three_bytes_match_emit_encodeSnappyBlockAsm - CMPL DI, $0x01000000 + CMPL R8, $0x01000000 JB four_bytes_match_emit_encodeSnappyBlockAsm - MOVB $0xfc, (AX) - MOVL DI, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL R8, 1(CX) + ADDQ $0x05, CX JMP memmove_long_match_emit_encodeSnappyBlockAsm four_bytes_match_emit_encodeSnappyBlockAsm: - MOVL DI, R9 - SHRL $0x10, R9 - MOVB $0xf8, (AX) - MOVW DI, 1(AX) - MOVB R9, 3(AX) - ADDQ $0x04, AX + MOVL R8, R10 + SHRL $0x10, R10 + MOVB $0xf8, (CX) + MOVW R8, 1(CX) + MOVB R10, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_encodeSnappyBlockAsm three_bytes_match_emit_encodeSnappyBlockAsm: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBlockAsm two_bytes_match_emit_encodeSnappyBlockAsm: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeSnappyBlockAsm JMP memmove_long_match_emit_encodeSnappyBlockAsm one_byte_match_emit_encodeSnappyBlockAsm: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBlockAsm: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBlockAsm: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeSnappyBlockAsm memmove_long_match_emit_encodeSnappyBlockAsm: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeSnappyBlockAsm: match_nolit_loop_encodeSnappyBlockAsm: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBlockAsm - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm matchlen_bsf_16match_nolit_encodeSnappyBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm matchlen_match8_match_nolit_encodeSnappyBlockAsm: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBlockAsm - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm matchlen_match4_match_nolit_encodeSnappyBlockAsm: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBlockAsm - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeSnappyBlockAsm: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBlockAsm JB match_nolit_end_encodeSnappyBlockAsm - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeSnappyBlockAsm matchlen_match1_match_nolit_encodeSnappyBlockAsm: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeSnappyBlockAsm - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeSnappyBlockAsm: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB two_byte_offset_match_nolit_encodeSnappyBlockAsm four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE four_bytes_remain_match_nolit_encodeSnappyBlockAsm - MOVB $0xff, (AX) - MOVL BX, 1(AX) - LEAL -64(R9), R9 - ADDQ $0x05, AX - CMPL R9, $0x04 + MOVB $0xff, (CX) + MOVL SI, 1(CX) + LEAL -64(R10), R10 + ADDQ $0x05, CX + CMPL R10, $0x04 JB four_bytes_remain_match_nolit_encodeSnappyBlockAsm JMP four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm four_bytes_remain_match_nolit_encodeSnappyBlockAsm: - TESTL R9, R9 + TESTL R10, R10 JZ match_nolit_emitcopy_end_encodeSnappyBlockAsm - XORL SI, SI - LEAL -1(SI)(R9*4), R9 - MOVB R9, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + XORL DI, DI + LEAL -1(DI)(R10*4), R10 + MOVB R10, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm two_byte_offset_match_nolit_encodeSnappyBlockAsm: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm two_byte_offset_short_match_nolit_encodeSnappyBlockAsm: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm emit_copy_three_match_nolit_encodeSnappyBlockAsm: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBlockAsm: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm: - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x10, DI - IMULQ R8, DI - SHRQ $0x32, DI - SHLQ $0x10, BX - IMULQ R8, BX - SHRQ $0x32, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x10, R8 + IMULQ R9, R8 + SHRQ $0x32, R8 + SHLQ $0x10, SI + IMULQ R9, SI + SHRQ $0x32, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeSnappyBlockAsm - INCL CX + INCL DX JMP search_loop_encodeSnappyBlockAsm emit_remainder_encodeSnappyBlockAsm: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 5(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 5(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBlockAsm: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBlockAsm @@ -11496,41 +11507,41 @@ emit_remainder_ok_encodeSnappyBlockAsm: JB three_bytes_emit_remainder_encodeSnappyBlockAsm CMPL DX, $0x01000000 JB four_bytes_emit_remainder_encodeSnappyBlockAsm - MOVB $0xfc, (AX) - MOVL DX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL DX, 1(CX) + ADDQ $0x05, CX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm four_bytes_emit_remainder_encodeSnappyBlockAsm: MOVL DX, BX SHRL $0x10, BX - MOVB $0xf8, (AX) - MOVW DX, 1(AX) - MOVB BL, 3(AX) - ADDQ $0x04, AX + MOVB $0xf8, (CX) + MOVW DX, 1(CX) + MOVB BL, 3(CX) + ADDQ $0x04, CX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm three_bytes_emit_remainder_encodeSnappyBlockAsm: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm two_bytes_emit_remainder_encodeSnappyBlockAsm: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBlockAsm JMP memmove_long_emit_remainder_encodeSnappyBlockAsm one_byte_emit_remainder_encodeSnappyBlockAsm: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -11546,73 +11557,73 @@ memmove_emit_remainder_encodeSnappyBlockAsm: JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm memmove_long_emit_remainder_encodeSnappyBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back: MOVOU (SI), X4 @@ -11626,718 +11637,719 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBlockAsm: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBlockAsm64K(dst []byte, src []byte) int +// func encodeSnappyBlockAsm64K(dst []byte, src []byte, tmp *[65536]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBlockAsm64K(SB), $65560-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000200, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBlockAsm64K(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000200, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm64K: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBlockAsm64K MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBlockAsm64K: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x06, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x06, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm64K - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x10, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x10, R11 + IMULQ R9, R11 + SHRQ $0x32, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeSnappyBlockAsm64K - LEAL 1(CX), SI - MOVL 12(SP), BX - MOVL SI, DI - SUBL 16(SP), DI + LEAL 1(DX), DI + MOVL 12(SP), SI + MOVL DI, R8 + SUBL 16(SP), R8 JZ repeat_extend_back_end_encodeSnappyBlockAsm64K repeat_extend_back_loop_encodeSnappyBlockAsm64K: - CMPL SI, BX + CMPL DI, SI JBE repeat_extend_back_end_encodeSnappyBlockAsm64K - MOVB -1(DX)(DI*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(R8*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeSnappyBlockAsm64K - LEAL -1(SI), SI - DECL DI + LEAL -1(DI), DI + DECL R8 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm64K repeat_extend_back_end_encodeSnappyBlockAsm64K: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeSnappyBlockAsm64K - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeSnappyBlockAsm64K: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeSnappyBlockAsm64K - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeSnappyBlockAsm64K JB three_bytes_repeat_emit_encodeSnappyBlockAsm64K three_bytes_repeat_emit_encodeSnappyBlockAsm64K: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm64K two_bytes_repeat_emit_encodeSnappyBlockAsm64K: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeSnappyBlockAsm64K JMP memmove_long_repeat_emit_encodeSnappyBlockAsm64K one_byte_repeat_emit_encodeSnappyBlockAsm64K: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeSnappyBlockAsm64K: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveShort - CMPQ DI, $0x08 + CMPQ R8, $0x08 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8 - CMPQ DI, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8through16 - CMPQ DI, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8: - MOVQ (R8), R9 - MOVQ R9, (AX) + MOVQ (R9), R10 + MOVQ R10, (CX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8through16: - MOVQ (R8), R9 - MOVQ -8(R8)(DI*1), R8 - MOVQ R9, (AX) - MOVQ R8, -8(AX)(DI*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (CX) + MOVQ R9, -8(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_17through32: - MOVOU (R8), X0 - MOVOU -16(R8)(DI*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_33through64: - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K memmove_long_repeat_emit_encodeSnappyBlockAsm64K: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveLong - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVQ DI, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 - LEAQ -32(R8)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(R9)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32: - MOVOU -32(R8)(R11*1), X4 - MOVOU -16(R8)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ DI, R11 + MOVOU -32(R9)(R12*1), X4 + MOVOU -16(R9)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), SI // matchLen - XORL R10, R10 + XORL R11, R11 matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm64K: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm64K - MOVQ (R8)(R10*1), R9 - MOVQ 8(R8)(R10*1), R11 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K - XORQ 8(BX)(R10*1), R11 + XORQ 8(SI)(R11*1), R12 JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm64K - LEAL -16(DI), DI - LEAL 16(R10), R10 + LEAL -16(R8), R8 + LEAL 16(R11), R11 JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm64K matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm64K: #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R12, R12 #else - BSFQ R11, R11 + BSFQ R12, R12 #endif - SARQ $0x03, R11 - LEAL 8(R10)(R11*1), R10 + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm64K matchlen_match8_repeat_extend_encodeSnappyBlockAsm64K: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K - LEAL -8(DI), DI - LEAL 8(R10), R10 + LEAL -8(R8), R8 + LEAL 8(R11), R11 JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K: #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R10, R10 #else - BSFQ R9, R9 + BSFQ R10, R10 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm64K matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K - MOVL (R8)(R10*1), R9 - CMPL (BX)(R10*1), R9 + MOVL (R9)(R11*1), R10 + CMPL (SI)(R11*1), R10 JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K - LEAL -4(DI), DI - LEAL 4(R10), R10 + LEAL -4(R8), R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K JB repeat_extend_forward_end_encodeSnappyBlockAsm64K - MOVW (R8)(R10*1), R9 - CMPW (BX)(R10*1), R9 + MOVW (R9)(R11*1), R10 + CMPW (SI)(R11*1), R10 JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K - LEAL 2(R10), R10 - SUBL $0x02, DI + LEAL 2(R11), R11 + SUBL $0x02, R8 JZ repeat_extend_forward_end_encodeSnappyBlockAsm64K matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K: - MOVB (R8)(R10*1), R9 - CMPB (BX)(R10*1), R9 + MOVB (R9)(R11*1), R10 + CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_encodeSnappyBlockAsm64K - LEAL 1(R10), R10 + LEAL 1(R11), R11 repeat_extend_forward_end_encodeSnappyBlockAsm64K: - ADDL R10, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI + ADDL R11, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI // emitCopy two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm64K: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm64K - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm64K two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm64K: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeSnappyBlockAsm64K emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeSnappyBlockAsm64K: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeSnappyBlockAsm64K no_repeat_found_encodeSnappyBlockAsm64K: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBlockAsm64K - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeSnappyBlockAsm64K - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeSnappyBlockAsm64K - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBlockAsm64K candidate3_match_encodeSnappyBlockAsm64K: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeSnappyBlockAsm64K candidate2_match_encodeSnappyBlockAsm64K: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeSnappyBlockAsm64K: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBlockAsm64K match_extend_back_loop_encodeSnappyBlockAsm64K: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBlockAsm64K - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBlockAsm64K - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBlockAsm64K JMP match_extend_back_loop_encodeSnappyBlockAsm64K match_extend_back_end_encodeSnappyBlockAsm64K: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBlockAsm64K - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBlockAsm64K: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm64K - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeSnappyBlockAsm64K - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeSnappyBlockAsm64K JB three_bytes_match_emit_encodeSnappyBlockAsm64K three_bytes_match_emit_encodeSnappyBlockAsm64K: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBlockAsm64K two_bytes_match_emit_encodeSnappyBlockAsm64K: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeSnappyBlockAsm64K JMP memmove_long_match_emit_encodeSnappyBlockAsm64K one_byte_match_emit_encodeSnappyBlockAsm64K: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBlockAsm64K: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBlockAsm64K: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeSnappyBlockAsm64K memmove_long_match_emit_encodeSnappyBlockAsm64K: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeSnappyBlockAsm64K: match_nolit_loop_encodeSnappyBlockAsm64K: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm64K: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBlockAsm64K - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm64K - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm64K matchlen_bsf_16match_nolit_encodeSnappyBlockAsm64K: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm64K matchlen_match8_match_nolit_encodeSnappyBlockAsm64K: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBlockAsm64K - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm64K matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm64K matchlen_match4_match_nolit_encodeSnappyBlockAsm64K: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBlockAsm64K - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm64K - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeSnappyBlockAsm64K: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBlockAsm64K JB match_nolit_end_encodeSnappyBlockAsm64K - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm64K - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeSnappyBlockAsm64K matchlen_match1_match_nolit_encodeSnappyBlockAsm64K: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeSnappyBlockAsm64K - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeSnappyBlockAsm64K: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBlockAsm64K: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm64K - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm64K two_byte_offset_short_match_nolit_encodeSnappyBlockAsm64K: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm64K - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm64K - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm64K emit_copy_three_match_nolit_encodeSnappyBlockAsm64K: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBlockAsm64K: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm64K - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBlockAsm64K - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm64K: - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x10, DI - IMULQ R8, DI - SHRQ $0x32, DI - SHLQ $0x10, BX - IMULQ R8, BX - SHRQ $0x32, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x10, R8 + IMULQ R9, R8 + SHRQ $0x32, R8 + SHLQ $0x10, SI + IMULQ R9, SI + SHRQ $0x32, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeSnappyBlockAsm64K - INCL CX + INCL DX JMP search_loop_encodeSnappyBlockAsm64K emit_remainder_encodeSnappyBlockAsm64K: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBlockAsm64K - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBlockAsm64K: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBlockAsm64K @@ -12346,26 +12358,26 @@ emit_remainder_ok_encodeSnappyBlockAsm64K: JB three_bytes_emit_remainder_encodeSnappyBlockAsm64K three_bytes_emit_remainder_encodeSnappyBlockAsm64K: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm64K two_bytes_emit_remainder_encodeSnappyBlockAsm64K: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBlockAsm64K JMP memmove_long_emit_remainder_encodeSnappyBlockAsm64K one_byte_emit_remainder_encodeSnappyBlockAsm64K: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBlockAsm64K: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -12381,73 +12393,73 @@ memmove_emit_remainder_encodeSnappyBlockAsm64K: JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K -emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) +emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_4through7: + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K memmove_long_emit_remainder_encodeSnappyBlockAsm64K: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_big_loop_back: MOVOU (SI), X4 @@ -12461,718 +12473,719 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBlockAsm12B(dst []byte, src []byte) int +// func encodeSnappyBlockAsm12B(dst []byte, src []byte, tmp *[16384]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBlockAsm12B(SB), $16408-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000080, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBlockAsm12B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000080, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm12B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBlockAsm12B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBlockAsm12B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x05, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x05, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm12B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x000000cf1bbcdcbb, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x18, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x000000cf1bbcdcbb, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x18, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x34, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x18, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x18, R11 + IMULQ R9, R11 + SHRQ $0x34, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x18, R10 + IMULQ R9, R10 + SHRQ $0x34, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeSnappyBlockAsm12B - LEAL 1(CX), SI - MOVL 12(SP), BX - MOVL SI, DI - SUBL 16(SP), DI + LEAL 1(DX), DI + MOVL 12(SP), SI + MOVL DI, R8 + SUBL 16(SP), R8 JZ repeat_extend_back_end_encodeSnappyBlockAsm12B repeat_extend_back_loop_encodeSnappyBlockAsm12B: - CMPL SI, BX + CMPL DI, SI JBE repeat_extend_back_end_encodeSnappyBlockAsm12B - MOVB -1(DX)(DI*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(R8*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeSnappyBlockAsm12B - LEAL -1(SI), SI - DECL DI + LEAL -1(DI), DI + DECL R8 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm12B repeat_extend_back_end_encodeSnappyBlockAsm12B: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeSnappyBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeSnappyBlockAsm12B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeSnappyBlockAsm12B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeSnappyBlockAsm12B JB three_bytes_repeat_emit_encodeSnappyBlockAsm12B three_bytes_repeat_emit_encodeSnappyBlockAsm12B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm12B two_bytes_repeat_emit_encodeSnappyBlockAsm12B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeSnappyBlockAsm12B JMP memmove_long_repeat_emit_encodeSnappyBlockAsm12B one_byte_repeat_emit_encodeSnappyBlockAsm12B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeSnappyBlockAsm12B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveShort - CMPQ DI, $0x08 + CMPQ R8, $0x08 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8 - CMPQ DI, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16 - CMPQ DI, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8: - MOVQ (R8), R9 - MOVQ R9, (AX) + MOVQ (R9), R10 + MOVQ R10, (CX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16: - MOVQ (R8), R9 - MOVQ -8(R8)(DI*1), R8 - MOVQ R9, (AX) - MOVQ R8, -8(AX)(DI*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (CX) + MOVQ R9, -8(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32: - MOVOU (R8), X0 - MOVOU -16(R8)(DI*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64: - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B memmove_long_repeat_emit_encodeSnappyBlockAsm12B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveLong - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVQ DI, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(R8)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(R9)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(R8)(R11*1), X4 - MOVOU -16(R8)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ DI, R11 + MOVOU -32(R9)(R12*1), X4 + MOVOU -16(R9)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), SI // matchLen - XORL R10, R10 + XORL R11, R11 matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm12B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm12B - MOVQ (R8)(R10*1), R9 - MOVQ 8(R8)(R10*1), R11 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B - XORQ 8(BX)(R10*1), R11 + XORQ 8(SI)(R11*1), R12 JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm12B - LEAL -16(DI), DI - LEAL 16(R10), R10 + LEAL -16(R8), R8 + LEAL 16(R11), R11 JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm12B matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R12, R12 #else - BSFQ R11, R11 + BSFQ R12, R12 #endif - SARQ $0x03, R11 - LEAL 8(R10)(R11*1), R10 + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm12B matchlen_match8_repeat_extend_encodeSnappyBlockAsm12B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B - LEAL -8(DI), DI - LEAL 8(R10), R10 + LEAL -8(R8), R8 + LEAL 8(R11), R11 JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R10, R10 #else - BSFQ R9, R9 + BSFQ R10, R10 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm12B matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B - MOVL (R8)(R10*1), R9 - CMPL (BX)(R10*1), R9 + MOVL (R9)(R11*1), R10 + CMPL (SI)(R11*1), R10 JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B - LEAL -4(DI), DI - LEAL 4(R10), R10 + LEAL -4(R8), R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B JB repeat_extend_forward_end_encodeSnappyBlockAsm12B - MOVW (R8)(R10*1), R9 - CMPW (BX)(R10*1), R9 + MOVW (R9)(R11*1), R10 + CMPW (SI)(R11*1), R10 JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B - LEAL 2(R10), R10 - SUBL $0x02, DI + LEAL 2(R11), R11 + SUBL $0x02, R8 JZ repeat_extend_forward_end_encodeSnappyBlockAsm12B matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B: - MOVB (R8)(R10*1), R9 - CMPB (BX)(R10*1), R9 + MOVB (R9)(R11*1), R10 + CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_encodeSnappyBlockAsm12B - LEAL 1(R10), R10 + LEAL 1(R11), R11 repeat_extend_forward_end_encodeSnappyBlockAsm12B: - ADDL R10, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI + ADDL R11, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI // emitCopy two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeSnappyBlockAsm12B emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeSnappyBlockAsm12B: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeSnappyBlockAsm12B no_repeat_found_encodeSnappyBlockAsm12B: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBlockAsm12B - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeSnappyBlockAsm12B - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeSnappyBlockAsm12B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBlockAsm12B candidate3_match_encodeSnappyBlockAsm12B: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeSnappyBlockAsm12B candidate2_match_encodeSnappyBlockAsm12B: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeSnappyBlockAsm12B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBlockAsm12B match_extend_back_loop_encodeSnappyBlockAsm12B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBlockAsm12B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBlockAsm12B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBlockAsm12B JMP match_extend_back_loop_encodeSnappyBlockAsm12B match_extend_back_end_encodeSnappyBlockAsm12B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBlockAsm12B: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm12B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeSnappyBlockAsm12B - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeSnappyBlockAsm12B JB three_bytes_match_emit_encodeSnappyBlockAsm12B three_bytes_match_emit_encodeSnappyBlockAsm12B: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBlockAsm12B two_bytes_match_emit_encodeSnappyBlockAsm12B: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeSnappyBlockAsm12B JMP memmove_long_match_emit_encodeSnappyBlockAsm12B one_byte_match_emit_encodeSnappyBlockAsm12B: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBlockAsm12B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBlockAsm12B: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeSnappyBlockAsm12B memmove_long_match_emit_encodeSnappyBlockAsm12B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeSnappyBlockAsm12B: match_nolit_loop_encodeSnappyBlockAsm12B: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm12B: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBlockAsm12B - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm12B - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm12B matchlen_bsf_16match_nolit_encodeSnappyBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm12B matchlen_match8_match_nolit_encodeSnappyBlockAsm12B: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBlockAsm12B - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm12B matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm12B matchlen_match4_match_nolit_encodeSnappyBlockAsm12B: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBlockAsm12B - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm12B - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeSnappyBlockAsm12B: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBlockAsm12B JB match_nolit_end_encodeSnappyBlockAsm12B - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm12B - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeSnappyBlockAsm12B matchlen_match1_match_nolit_encodeSnappyBlockAsm12B: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeSnappyBlockAsm12B - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeSnappyBlockAsm12B: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBlockAsm12B: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm12B two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm12B - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm12B - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm12B emit_copy_three_match_nolit_encodeSnappyBlockAsm12B: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBlockAsm12B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm12B - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm12B: - MOVQ $0x000000cf1bbcdcbb, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x18, DI - IMULQ R8, DI - SHRQ $0x34, DI - SHLQ $0x18, BX - IMULQ R8, BX - SHRQ $0x34, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x000000cf1bbcdcbb, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x18, R8 + IMULQ R9, R8 + SHRQ $0x34, R8 + SHLQ $0x18, SI + IMULQ R9, SI + SHRQ $0x34, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeSnappyBlockAsm12B - INCL CX + INCL DX JMP search_loop_encodeSnappyBlockAsm12B emit_remainder_encodeSnappyBlockAsm12B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBlockAsm12B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBlockAsm12B @@ -13181,26 +13194,26 @@ emit_remainder_ok_encodeSnappyBlockAsm12B: JB three_bytes_emit_remainder_encodeSnappyBlockAsm12B three_bytes_emit_remainder_encodeSnappyBlockAsm12B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm12B two_bytes_emit_remainder_encodeSnappyBlockAsm12B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBlockAsm12B JMP memmove_long_emit_remainder_encodeSnappyBlockAsm12B one_byte_emit_remainder_encodeSnappyBlockAsm12B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -13216,73 +13229,73 @@ memmove_emit_remainder_encodeSnappyBlockAsm12B: JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B memmove_long_emit_remainder_encodeSnappyBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back: MOVOU (SI), X4 @@ -13296,718 +13309,719 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBlockAsm10B(dst []byte, src []byte) int +// func encodeSnappyBlockAsm10B(dst []byte, src []byte, tmp *[4096]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBlockAsm10B(SB), $4120-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000020, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBlockAsm10B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000020, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm10B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBlockAsm10B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBlockAsm10B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x05, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x05, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm10B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x9e3779b1, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x9e3779b1, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x20, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x36, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x20, R11 + IMULQ R9, R11 + SHRQ $0x36, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x20, R10 + IMULQ R9, R10 + SHRQ $0x36, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeSnappyBlockAsm10B - LEAL 1(CX), SI - MOVL 12(SP), BX - MOVL SI, DI - SUBL 16(SP), DI + LEAL 1(DX), DI + MOVL 12(SP), SI + MOVL DI, R8 + SUBL 16(SP), R8 JZ repeat_extend_back_end_encodeSnappyBlockAsm10B repeat_extend_back_loop_encodeSnappyBlockAsm10B: - CMPL SI, BX + CMPL DI, SI JBE repeat_extend_back_end_encodeSnappyBlockAsm10B - MOVB -1(DX)(DI*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(R8*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeSnappyBlockAsm10B - LEAL -1(SI), SI - DECL DI + LEAL -1(DI), DI + DECL R8 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm10B repeat_extend_back_end_encodeSnappyBlockAsm10B: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeSnappyBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeSnappyBlockAsm10B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeSnappyBlockAsm10B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeSnappyBlockAsm10B JB three_bytes_repeat_emit_encodeSnappyBlockAsm10B three_bytes_repeat_emit_encodeSnappyBlockAsm10B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm10B two_bytes_repeat_emit_encodeSnappyBlockAsm10B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeSnappyBlockAsm10B JMP memmove_long_repeat_emit_encodeSnappyBlockAsm10B one_byte_repeat_emit_encodeSnappyBlockAsm10B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeSnappyBlockAsm10B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveShort - CMPQ DI, $0x08 + CMPQ R8, $0x08 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8 - CMPQ DI, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16 - CMPQ DI, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8: - MOVQ (R8), R9 - MOVQ R9, (AX) + MOVQ (R9), R10 + MOVQ R10, (CX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16: - MOVQ (R8), R9 - MOVQ -8(R8)(DI*1), R8 - MOVQ R9, (AX) - MOVQ R8, -8(AX)(DI*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (CX) + MOVQ R9, -8(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32: - MOVOU (R8), X0 - MOVOU -16(R8)(DI*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64: - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B memmove_long_repeat_emit_encodeSnappyBlockAsm10B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveLong - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVQ DI, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(R8)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(R9)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(R8)(R11*1), X4 - MOVOU -16(R8)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ DI, R11 + MOVOU -32(R9)(R12*1), X4 + MOVOU -16(R9)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), SI // matchLen - XORL R10, R10 + XORL R11, R11 matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm10B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm10B - MOVQ (R8)(R10*1), R9 - MOVQ 8(R8)(R10*1), R11 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B - XORQ 8(BX)(R10*1), R11 + XORQ 8(SI)(R11*1), R12 JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm10B - LEAL -16(DI), DI - LEAL 16(R10), R10 + LEAL -16(R8), R8 + LEAL 16(R11), R11 JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm10B matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R12, R12 #else - BSFQ R11, R11 + BSFQ R12, R12 #endif - SARQ $0x03, R11 - LEAL 8(R10)(R11*1), R10 + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm10B matchlen_match8_repeat_extend_encodeSnappyBlockAsm10B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B - LEAL -8(DI), DI - LEAL 8(R10), R10 + LEAL -8(R8), R8 + LEAL 8(R11), R11 JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R10, R10 #else - BSFQ R9, R9 + BSFQ R10, R10 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm10B matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B - MOVL (R8)(R10*1), R9 - CMPL (BX)(R10*1), R9 + MOVL (R9)(R11*1), R10 + CMPL (SI)(R11*1), R10 JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B - LEAL -4(DI), DI - LEAL 4(R10), R10 + LEAL -4(R8), R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B JB repeat_extend_forward_end_encodeSnappyBlockAsm10B - MOVW (R8)(R10*1), R9 - CMPW (BX)(R10*1), R9 + MOVW (R9)(R11*1), R10 + CMPW (SI)(R11*1), R10 JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B - LEAL 2(R10), R10 - SUBL $0x02, DI + LEAL 2(R11), R11 + SUBL $0x02, R8 JZ repeat_extend_forward_end_encodeSnappyBlockAsm10B matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B: - MOVB (R8)(R10*1), R9 - CMPB (BX)(R10*1), R9 + MOVB (R9)(R11*1), R10 + CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_encodeSnappyBlockAsm10B - LEAL 1(R10), R10 + LEAL 1(R11), R11 repeat_extend_forward_end_encodeSnappyBlockAsm10B: - ADDL R10, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI + ADDL R11, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI // emitCopy two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeSnappyBlockAsm10B emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeSnappyBlockAsm10B: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeSnappyBlockAsm10B no_repeat_found_encodeSnappyBlockAsm10B: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBlockAsm10B - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeSnappyBlockAsm10B - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeSnappyBlockAsm10B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBlockAsm10B candidate3_match_encodeSnappyBlockAsm10B: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeSnappyBlockAsm10B candidate2_match_encodeSnappyBlockAsm10B: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeSnappyBlockAsm10B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBlockAsm10B match_extend_back_loop_encodeSnappyBlockAsm10B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBlockAsm10B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBlockAsm10B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBlockAsm10B JMP match_extend_back_loop_encodeSnappyBlockAsm10B match_extend_back_end_encodeSnappyBlockAsm10B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBlockAsm10B: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm10B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeSnappyBlockAsm10B - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeSnappyBlockAsm10B JB three_bytes_match_emit_encodeSnappyBlockAsm10B three_bytes_match_emit_encodeSnappyBlockAsm10B: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBlockAsm10B two_bytes_match_emit_encodeSnappyBlockAsm10B: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeSnappyBlockAsm10B JMP memmove_long_match_emit_encodeSnappyBlockAsm10B one_byte_match_emit_encodeSnappyBlockAsm10B: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBlockAsm10B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBlockAsm10B: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeSnappyBlockAsm10B memmove_long_match_emit_encodeSnappyBlockAsm10B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeSnappyBlockAsm10B: match_nolit_loop_encodeSnappyBlockAsm10B: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm10B: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBlockAsm10B - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm10B - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm10B matchlen_bsf_16match_nolit_encodeSnappyBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm10B matchlen_match8_match_nolit_encodeSnappyBlockAsm10B: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBlockAsm10B - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm10B matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm10B matchlen_match4_match_nolit_encodeSnappyBlockAsm10B: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBlockAsm10B - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm10B - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeSnappyBlockAsm10B: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBlockAsm10B JB match_nolit_end_encodeSnappyBlockAsm10B - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm10B - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeSnappyBlockAsm10B matchlen_match1_match_nolit_encodeSnappyBlockAsm10B: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeSnappyBlockAsm10B - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeSnappyBlockAsm10B: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBlockAsm10B: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm10B two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm10B - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm10B - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm10B emit_copy_three_match_nolit_encodeSnappyBlockAsm10B: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBlockAsm10B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm10B - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm10B: - MOVQ $0x9e3779b1, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x20, DI - IMULQ R8, DI - SHRQ $0x36, DI - SHLQ $0x20, BX - IMULQ R8, BX - SHRQ $0x36, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x9e3779b1, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x20, R8 + IMULQ R9, R8 + SHRQ $0x36, R8 + SHLQ $0x20, SI + IMULQ R9, SI + SHRQ $0x36, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeSnappyBlockAsm10B - INCL CX + INCL DX JMP search_loop_encodeSnappyBlockAsm10B emit_remainder_encodeSnappyBlockAsm10B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBlockAsm10B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBlockAsm10B @@ -14016,26 +14030,26 @@ emit_remainder_ok_encodeSnappyBlockAsm10B: JB three_bytes_emit_remainder_encodeSnappyBlockAsm10B three_bytes_emit_remainder_encodeSnappyBlockAsm10B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm10B two_bytes_emit_remainder_encodeSnappyBlockAsm10B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBlockAsm10B JMP memmove_long_emit_remainder_encodeSnappyBlockAsm10B one_byte_emit_remainder_encodeSnappyBlockAsm10B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -14051,73 +14065,73 @@ memmove_emit_remainder_encodeSnappyBlockAsm10B: JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B memmove_long_emit_remainder_encodeSnappyBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back: MOVOU (SI), X4 @@ -14131,714 +14145,715 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBlockAsm8B(dst []byte, src []byte) int +// func encodeSnappyBlockAsm8B(dst []byte, src []byte, tmp *[1024]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBlockAsm8B(SB), $1048-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000008, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBlockAsm8B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000008, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm8B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBlockAsm8B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBlockAsm8B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x04, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x04, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm8B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x9e3779b1, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x38, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x9e3779b1, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x20, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x38, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x38, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x20, R11 + IMULQ R9, R11 + SHRQ $0x38, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x20, R10 + IMULQ R9, R10 + SHRQ $0x38, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeSnappyBlockAsm8B - LEAL 1(CX), SI - MOVL 12(SP), BX - MOVL SI, DI - SUBL 16(SP), DI + LEAL 1(DX), DI + MOVL 12(SP), SI + MOVL DI, R8 + SUBL 16(SP), R8 JZ repeat_extend_back_end_encodeSnappyBlockAsm8B repeat_extend_back_loop_encodeSnappyBlockAsm8B: - CMPL SI, BX + CMPL DI, SI JBE repeat_extend_back_end_encodeSnappyBlockAsm8B - MOVB -1(DX)(DI*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(R8*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeSnappyBlockAsm8B - LEAL -1(SI), SI - DECL DI + LEAL -1(DI), DI + DECL R8 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm8B repeat_extend_back_end_encodeSnappyBlockAsm8B: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeSnappyBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeSnappyBlockAsm8B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeSnappyBlockAsm8B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeSnappyBlockAsm8B JB three_bytes_repeat_emit_encodeSnappyBlockAsm8B three_bytes_repeat_emit_encodeSnappyBlockAsm8B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm8B two_bytes_repeat_emit_encodeSnappyBlockAsm8B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeSnappyBlockAsm8B JMP memmove_long_repeat_emit_encodeSnappyBlockAsm8B one_byte_repeat_emit_encodeSnappyBlockAsm8B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeSnappyBlockAsm8B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveShort - CMPQ DI, $0x08 + CMPQ R8, $0x08 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8 - CMPQ DI, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16 - CMPQ DI, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8: - MOVQ (R8), R9 - MOVQ R9, (AX) + MOVQ (R9), R10 + MOVQ R10, (CX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16: - MOVQ (R8), R9 - MOVQ -8(R8)(DI*1), R8 - MOVQ R9, (AX) - MOVQ R8, -8(AX)(DI*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (CX) + MOVQ R9, -8(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32: - MOVOU (R8), X0 - MOVOU -16(R8)(DI*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64: - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B memmove_long_repeat_emit_encodeSnappyBlockAsm8B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveLong - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVQ DI, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(R8)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(R9)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(R8)(R11*1), X4 - MOVOU -16(R8)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ DI, R11 + MOVOU -32(R9)(R12*1), X4 + MOVOU -16(R9)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), SI // matchLen - XORL R10, R10 + XORL R11, R11 matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm8B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm8B - MOVQ (R8)(R10*1), R9 - MOVQ 8(R8)(R10*1), R11 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B - XORQ 8(BX)(R10*1), R11 + XORQ 8(SI)(R11*1), R12 JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm8B - LEAL -16(DI), DI - LEAL 16(R10), R10 + LEAL -16(R8), R8 + LEAL 16(R11), R11 JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm8B matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R12, R12 #else - BSFQ R11, R11 + BSFQ R12, R12 #endif - SARQ $0x03, R11 - LEAL 8(R10)(R11*1), R10 + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm8B matchlen_match8_repeat_extend_encodeSnappyBlockAsm8B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B - LEAL -8(DI), DI - LEAL 8(R10), R10 + LEAL -8(R8), R8 + LEAL 8(R11), R11 JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R10, R10 #else - BSFQ R9, R9 + BSFQ R10, R10 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm8B matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B - MOVL (R8)(R10*1), R9 - CMPL (BX)(R10*1), R9 + MOVL (R9)(R11*1), R10 + CMPL (SI)(R11*1), R10 JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B - LEAL -4(DI), DI - LEAL 4(R10), R10 + LEAL -4(R8), R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B JB repeat_extend_forward_end_encodeSnappyBlockAsm8B - MOVW (R8)(R10*1), R9 - CMPW (BX)(R10*1), R9 + MOVW (R9)(R11*1), R10 + CMPW (SI)(R11*1), R10 JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B - LEAL 2(R10), R10 - SUBL $0x02, DI + LEAL 2(R11), R11 + SUBL $0x02, R8 JZ repeat_extend_forward_end_encodeSnappyBlockAsm8B matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B: - MOVB (R8)(R10*1), R9 - CMPB (BX)(R10*1), R9 + MOVB (R9)(R11*1), R10 + CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_encodeSnappyBlockAsm8B - LEAL 1(R10), R10 + LEAL 1(R11), R11 repeat_extend_forward_end_encodeSnappyBlockAsm8B: - ADDL R10, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI + ADDL R11, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI // emitCopy two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeSnappyBlockAsm8B emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeSnappyBlockAsm8B: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeSnappyBlockAsm8B no_repeat_found_encodeSnappyBlockAsm8B: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBlockAsm8B - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeSnappyBlockAsm8B - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeSnappyBlockAsm8B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBlockAsm8B candidate3_match_encodeSnappyBlockAsm8B: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeSnappyBlockAsm8B candidate2_match_encodeSnappyBlockAsm8B: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeSnappyBlockAsm8B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBlockAsm8B match_extend_back_loop_encodeSnappyBlockAsm8B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBlockAsm8B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBlockAsm8B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBlockAsm8B JMP match_extend_back_loop_encodeSnappyBlockAsm8B match_extend_back_end_encodeSnappyBlockAsm8B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBlockAsm8B: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm8B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeSnappyBlockAsm8B - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeSnappyBlockAsm8B JB three_bytes_match_emit_encodeSnappyBlockAsm8B three_bytes_match_emit_encodeSnappyBlockAsm8B: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBlockAsm8B two_bytes_match_emit_encodeSnappyBlockAsm8B: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeSnappyBlockAsm8B JMP memmove_long_match_emit_encodeSnappyBlockAsm8B one_byte_match_emit_encodeSnappyBlockAsm8B: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBlockAsm8B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBlockAsm8B: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeSnappyBlockAsm8B memmove_long_match_emit_encodeSnappyBlockAsm8B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeSnappyBlockAsm8B: match_nolit_loop_encodeSnappyBlockAsm8B: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm8B: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBlockAsm8B - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm8B - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm8B matchlen_bsf_16match_nolit_encodeSnappyBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm8B matchlen_match8_match_nolit_encodeSnappyBlockAsm8B: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBlockAsm8B - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm8B matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm8B matchlen_match4_match_nolit_encodeSnappyBlockAsm8B: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBlockAsm8B - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm8B - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeSnappyBlockAsm8B: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBlockAsm8B JB match_nolit_end_encodeSnappyBlockAsm8B - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm8B - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeSnappyBlockAsm8B matchlen_match1_match_nolit_encodeSnappyBlockAsm8B: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeSnappyBlockAsm8B - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeSnappyBlockAsm8B: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBlockAsm8B: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm8B two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm8B - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm8B emit_copy_three_match_nolit_encodeSnappyBlockAsm8B: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBlockAsm8B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm8B - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm8B: - MOVQ $0x9e3779b1, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x20, DI - IMULQ R8, DI - SHRQ $0x38, DI - SHLQ $0x20, BX - IMULQ R8, BX - SHRQ $0x38, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x9e3779b1, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x20, R8 + IMULQ R9, R8 + SHRQ $0x38, R8 + SHLQ $0x20, SI + IMULQ R9, SI + SHRQ $0x38, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeSnappyBlockAsm8B - INCL CX + INCL DX JMP search_loop_encodeSnappyBlockAsm8B emit_remainder_encodeSnappyBlockAsm8B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBlockAsm8B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBlockAsm8B @@ -14847,26 +14862,26 @@ emit_remainder_ok_encodeSnappyBlockAsm8B: JB three_bytes_emit_remainder_encodeSnappyBlockAsm8B three_bytes_emit_remainder_encodeSnappyBlockAsm8B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm8B two_bytes_emit_remainder_encodeSnappyBlockAsm8B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBlockAsm8B JMP memmove_long_emit_remainder_encodeSnappyBlockAsm8B one_byte_emit_remainder_encodeSnappyBlockAsm8B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -14882,73 +14897,73 @@ memmove_emit_remainder_encodeSnappyBlockAsm8B: JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B memmove_long_emit_remainder_encodeSnappyBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back: MOVOU (SI), X4 @@ -14962,520 +14977,521 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBetterBlockAsm(dst []byte, src []byte) int +// func encodeSnappyBetterBlockAsm(dst []byte, src []byte, tmp *[589824]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBetterBlockAsm(SB), $589848-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00001200, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBetterBlockAsm(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00001200, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBetterBlockAsm: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBetterBlockAsm MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBetterBlockAsm: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x07, BX - CMPL BX, $0x63 + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x07, SI + CMPL SI, $0x63 JBE check_maxskip_ok_encodeSnappyBetterBlockAsm - LEAL 100(CX), BX + LEAL 100(DX), SI JMP check_maxskip_cont_encodeSnappyBetterBlockAsm check_maxskip_ok_encodeSnappyBetterBlockAsm: - LEAL 1(CX)(BX*1), BX + LEAL 1(DX)(SI*1), SI check_maxskip_cont_encodeSnappyBetterBlockAsm: - CMPL BX, 8(SP) + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x00cf1bbcdcbfa563, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x2f, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 524312(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 524312(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x00cf1bbcdcbfa563, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x2f, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x32, R11 + MOVL (AX)(R10*4), SI + MOVL 524288(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 524288(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeSnappyBetterBlockAsm - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeSnappyBetterBlockAsm no_short_found_encodeSnappyBetterBlockAsm: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeSnappyBetterBlockAsm - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBetterBlockAsm candidateS_match_encodeSnappyBetterBlockAsm: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x2f, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x2f, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBetterBlockAsm - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeSnappyBetterBlockAsm: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm match_extend_back_loop_encodeSnappyBetterBlockAsm: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBetterBlockAsm - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBetterBlockAsm - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm JMP match_extend_back_loop_encodeSnappyBetterBlockAsm match_extend_back_end_encodeSnappyBetterBlockAsm: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 5(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 5(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBetterBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBetterBlockAsm: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm JB match_nolit_end_encodeSnappyBetterBlockAsm - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeSnappyBetterBlockAsm matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeSnappyBetterBlockAsm - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeSnappyBetterBlockAsm: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - CMPL R11, $0x01 + CMPL R12, $0x01 JA match_length_ok_encodeSnappyBetterBlockAsm - CMPL DI, $0x0000ffff + CMPL R8, $0x0000ffff JBE match_length_ok_encodeSnappyBetterBlockAsm - MOVL 20(SP), CX - INCL CX + MOVL 20(SP), DX + INCL DX JMP search_loop_encodeSnappyBetterBlockAsm match_length_ok_encodeSnappyBetterBlockAsm: - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeSnappyBetterBlockAsm - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeSnappyBetterBlockAsm - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_match_emit_encodeSnappyBetterBlockAsm - CMPL BX, $0x01000000 + CMPL SI, $0x01000000 JB four_bytes_match_emit_encodeSnappyBetterBlockAsm - MOVB $0xfc, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm four_bytes_match_emit_encodeSnappyBetterBlockAsm: - MOVL BX, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX + MOVL SI, R11 + SHRL $0x10, R11 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R11, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm three_bytes_match_emit_encodeSnappyBetterBlockAsm: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm two_bytes_match_emit_encodeSnappyBetterBlockAsm: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeSnappyBetterBlockAsm JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm one_byte_match_emit_encodeSnappyBetterBlockAsm: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBetterBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm memmove_long_match_emit_encodeSnappyBetterBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeSnappyBetterBlockAsm: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy - CMPL DI, $0x00010000 + CMPL R8, $0x00010000 JB two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm four_bytes_loop_back_match_nolit_encodeSnappyBetterBlockAsm: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm - MOVB $0xff, (AX) - MOVL DI, 1(AX) - LEAL -64(R11), R11 - ADDQ $0x05, AX - CMPL R11, $0x04 + MOVB $0xff, (CX) + MOVL R8, 1(CX) + LEAL -64(R12), R12 + ADDQ $0x05, CX + CMPL R12, $0x04 JB four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm JMP four_bytes_loop_back_match_nolit_encodeSnappyBetterBlockAsm four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm: - TESTL R11, R11 + TESTL R12, R12 JZ match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm - XORL BX, BX - LEAL -1(BX)(R11*4), R11 - MOVB R11, (AX) - MOVL DI, 1(AX) - ADDQ $0x05, AX + XORL SI, SI + LEAL -1(SI)(R12*4), R12 + MOVB R12, (CX) + MOVL R8, 1(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBetterBlockAsm: - MOVQ $0x00cf1bbcdcbfa563, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x2f, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x32, R10 - SHLQ $0x08, R11 - IMULQ BX, R11 - SHRQ $0x2f, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x32, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 524312(SP)(R10*4) - MOVL R13, 524312(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x00cf1bbcdcbfa563, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x08, R10 + IMULQ SI, R10 + SHRQ $0x2f, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x32, R11 + SHLQ $0x08, R12 + IMULQ SI, R12 + SHRQ $0x2f, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x32, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 524288(AX)(R11*4) + MOVL R14, 524288(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeSnappyBetterBlockAsm: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeSnappyBetterBlockAsm - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x2f, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x08, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x2f, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x08, R11 + IMULQ SI, R11 + SHRQ $0x2f, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeSnappyBetterBlockAsm emit_remainder_encodeSnappyBetterBlockAsm: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 5(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 5(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBetterBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBetterBlockAsm: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm @@ -15485,41 +15501,41 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm: JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm CMPL DX, $0x01000000 JB four_bytes_emit_remainder_encodeSnappyBetterBlockAsm - MOVB $0xfc, (AX) - MOVL DX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL DX, 1(CX) + ADDQ $0x05, CX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm four_bytes_emit_remainder_encodeSnappyBetterBlockAsm: MOVL DX, BX SHRL $0x10, BX - MOVB $0xf8, (AX) - MOVW DX, 1(AX) - MOVB BL, 3(AX) - ADDQ $0x04, AX + MOVB $0xf8, (CX) + MOVW DX, 1(CX) + MOVB BL, 3(CX) + ADDQ $0x04, CX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm three_bytes_emit_remainder_encodeSnappyBetterBlockAsm: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm two_bytes_emit_remainder_encodeSnappyBetterBlockAsm: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBetterBlockAsm JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm one_byte_emit_remainder_encodeSnappyBetterBlockAsm: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBetterBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -15535,73 +15551,73 @@ memmove_emit_remainder_encodeSnappyBetterBlockAsm: JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm memmove_long_emit_remainder_encodeSnappyBetterBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_big_loop_back: MOVOU (SI), X4 @@ -15615,463 +15631,464 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_big_loop_ba JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte) int +// func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte, tmp *[294912]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBetterBlockAsm64K(SB), $327704-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000a00, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBetterBlockAsm64K(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000900, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBetterBlockAsm64K: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBetterBlockAsm64K MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBetterBlockAsm64K: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x07, BX - LEAL 1(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x07, SI + LEAL 1(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm64K - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x00cf1bbcdcbfa563, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x30, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 262168(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 262168(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x00cf1bbcdcbfa563, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x30, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x33, R11 + MOVL (AX)(R10*4), SI + MOVL 262144(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 262144(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm64K - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeSnappyBetterBlockAsm64K - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeSnappyBetterBlockAsm64K no_short_found_encodeSnappyBetterBlockAsm64K: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm64K - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeSnappyBetterBlockAsm64K - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBetterBlockAsm64K candidateS_match_encodeSnappyBetterBlockAsm64K: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x30, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x30, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBetterBlockAsm64K - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeSnappyBetterBlockAsm64K: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm64K match_extend_back_loop_encodeSnappyBetterBlockAsm64K: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBetterBlockAsm64K - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBetterBlockAsm64K - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm64K JMP match_extend_back_loop_encodeSnappyBetterBlockAsm64K match_extend_back_end_encodeSnappyBetterBlockAsm64K: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBetterBlockAsm64K - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBetterBlockAsm64K: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm64K: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm64K - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm64K - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm64K matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm64K: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm64K matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm64K: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm64K matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K JB match_nolit_end_encodeSnappyBetterBlockAsm64K - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeSnappyBetterBlockAsm64K matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeSnappyBetterBlockAsm64K - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeSnappyBetterBlockAsm64K: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeSnappyBetterBlockAsm64K - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeSnappyBetterBlockAsm64K JB three_bytes_match_emit_encodeSnappyBetterBlockAsm64K three_bytes_match_emit_encodeSnappyBetterBlockAsm64K: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm64K two_bytes_match_emit_encodeSnappyBetterBlockAsm64K: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeSnappyBetterBlockAsm64K JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm64K one_byte_match_emit_encodeSnappyBetterBlockAsm64K: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBetterBlockAsm64K: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K memmove_long_match_emit_encodeSnappyBetterBlockAsm64K: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_big_loop_back - -emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + +emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32: + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm64K: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm64K - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm64K two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm64K: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm64K emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm64K: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm64K - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm64K - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBetterBlockAsm64K: - MOVQ $0x00cf1bbcdcbfa563, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x30, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x32, R10 - SHLQ $0x08, R11 - IMULQ BX, R11 - SHRQ $0x30, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x32, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 262168(SP)(R10*4) - MOVL R13, 262168(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x00cf1bbcdcbfa563, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x08, R10 + IMULQ SI, R10 + SHRQ $0x30, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x33, R11 + SHLQ $0x08, R12 + IMULQ SI, R12 + SHRQ $0x30, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x33, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 262144(AX)(R11*4) + MOVL R14, 262144(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeSnappyBetterBlockAsm64K: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeSnappyBetterBlockAsm64K - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x30, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x08, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x30, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x08, R11 + IMULQ SI, R11 + SHRQ $0x30, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeSnappyBetterBlockAsm64K emit_remainder_encodeSnappyBetterBlockAsm64K: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBetterBlockAsm64K - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBetterBlockAsm64K: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm64K @@ -16080,26 +16097,26 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm64K: JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K three_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K two_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBetterBlockAsm64K JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K one_byte_emit_remainder_encodeSnappyBetterBlockAsm64K: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBetterBlockAsm64K: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -16115,73 +16132,73 @@ memmove_emit_remainder_encodeSnappyBetterBlockAsm64K: JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_big_loop_back: MOVOU (SI), X4 @@ -16195,463 +16212,464 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_big_loop JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte) int +// func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte, tmp *[81920]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBetterBlockAsm12B(SB), $81944-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000280, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBetterBlockAsm12B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000280, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBetterBlockAsm12B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBetterBlockAsm12B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBetterBlockAsm12B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x06, BX - LEAL 1(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x06, SI + LEAL 1(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm12B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x34, R10 - MOVL 24(SP)(R9*4), BX - MOVL 65560(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 65560(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x34, R11 + MOVL (AX)(R10*4), SI + MOVL 65536(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 65536(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm12B - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeSnappyBetterBlockAsm12B - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeSnappyBetterBlockAsm12B no_short_found_encodeSnappyBetterBlockAsm12B: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm12B - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeSnappyBetterBlockAsm12B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBetterBlockAsm12B candidateS_match_encodeSnappyBetterBlockAsm12B: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBetterBlockAsm12B - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeSnappyBetterBlockAsm12B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm12B match_extend_back_loop_encodeSnappyBetterBlockAsm12B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBetterBlockAsm12B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBetterBlockAsm12B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm12B JMP match_extend_back_loop_encodeSnappyBetterBlockAsm12B match_extend_back_end_encodeSnappyBetterBlockAsm12B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBetterBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBetterBlockAsm12B: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm12B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm12B - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm12B - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm12B matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm12B matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm12B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm12B matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B JB match_nolit_end_encodeSnappyBetterBlockAsm12B - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeSnappyBetterBlockAsm12B matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeSnappyBetterBlockAsm12B - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeSnappyBetterBlockAsm12B: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeSnappyBetterBlockAsm12B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeSnappyBetterBlockAsm12B JB three_bytes_match_emit_encodeSnappyBetterBlockAsm12B three_bytes_match_emit_encodeSnappyBetterBlockAsm12B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm12B two_bytes_match_emit_encodeSnappyBetterBlockAsm12B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeSnappyBetterBlockAsm12B JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm12B one_byte_match_emit_encodeSnappyBetterBlockAsm12B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBetterBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B memmove_long_match_emit_encodeSnappyBetterBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm12B: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm12B - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm12B two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm12B: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm12B emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm12B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm12B - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBetterBlockAsm12B: - MOVQ $0x0000cf1bbcdcbf9b, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x32, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x34, R10 - SHLQ $0x10, R11 - IMULQ BX, R11 - SHRQ $0x32, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x34, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 65560(SP)(R10*4) - MOVL R13, 65560(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x0000cf1bbcdcbf9b, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x10, R10 + IMULQ SI, R10 + SHRQ $0x32, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x34, R11 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x32, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x34, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 65536(AX)(R11*4) + MOVL R14, 65536(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeSnappyBetterBlockAsm12B: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeSnappyBetterBlockAsm12B - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x32, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x10, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x32, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x10, R11 + IMULQ SI, R11 + SHRQ $0x32, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeSnappyBetterBlockAsm12B emit_remainder_encodeSnappyBetterBlockAsm12B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBetterBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBetterBlockAsm12B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm12B @@ -16660,26 +16678,26 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm12B: JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B three_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B two_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBetterBlockAsm12B JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B one_byte_emit_remainder_encodeSnappyBetterBlockAsm12B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBetterBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -16695,73 +16713,73 @@ memmove_emit_remainder_encodeSnappyBetterBlockAsm12B: JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) - JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B - -emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) + JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B + +emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_33through64: + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_big_loop_back: MOVOU (SI), X4 @@ -16775,463 +16793,464 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_big_loop JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte) int +// func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte, tmp *[20480]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBetterBlockAsm10B(SB), $20504-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x000000a0, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBetterBlockAsm10B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x000000a0, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBetterBlockAsm10B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBetterBlockAsm10B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBetterBlockAsm10B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x05, BX - LEAL 1(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x05, SI + LEAL 1(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm10B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x36, R10 - MOVL 24(SP)(R9*4), BX - MOVL 16408(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 16408(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x34, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x36, R11 + MOVL (AX)(R10*4), SI + MOVL 16384(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 16384(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm10B - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeSnappyBetterBlockAsm10B - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeSnappyBetterBlockAsm10B no_short_found_encodeSnappyBetterBlockAsm10B: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm10B - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeSnappyBetterBlockAsm10B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBetterBlockAsm10B candidateS_match_encodeSnappyBetterBlockAsm10B: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x34, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBetterBlockAsm10B - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeSnappyBetterBlockAsm10B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm10B match_extend_back_loop_encodeSnappyBetterBlockAsm10B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBetterBlockAsm10B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBetterBlockAsm10B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm10B JMP match_extend_back_loop_encodeSnappyBetterBlockAsm10B match_extend_back_end_encodeSnappyBetterBlockAsm10B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBetterBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBetterBlockAsm10B: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm10B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm10B - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm10B - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm10B matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm10B matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm10B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm10B matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B JB match_nolit_end_encodeSnappyBetterBlockAsm10B - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeSnappyBetterBlockAsm10B matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeSnappyBetterBlockAsm10B - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeSnappyBetterBlockAsm10B: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeSnappyBetterBlockAsm10B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeSnappyBetterBlockAsm10B JB three_bytes_match_emit_encodeSnappyBetterBlockAsm10B three_bytes_match_emit_encodeSnappyBetterBlockAsm10B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm10B two_bytes_match_emit_encodeSnappyBetterBlockAsm10B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeSnappyBetterBlockAsm10B JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm10B one_byte_match_emit_encodeSnappyBetterBlockAsm10B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBetterBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B memmove_long_match_emit_encodeSnappyBetterBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm10B: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm10B - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm10B two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm10B: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm10B emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm10B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm10B - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBetterBlockAsm10B: - MOVQ $0x0000cf1bbcdcbf9b, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x34, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x36, R10 - SHLQ $0x10, R11 - IMULQ BX, R11 - SHRQ $0x34, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x36, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 16408(SP)(R10*4) - MOVL R13, 16408(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x0000cf1bbcdcbf9b, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x10, R10 + IMULQ SI, R10 + SHRQ $0x34, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x36, R11 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x34, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x36, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 16384(AX)(R11*4) + MOVL R14, 16384(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeSnappyBetterBlockAsm10B: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeSnappyBetterBlockAsm10B - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x34, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x10, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x34, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x10, R11 + IMULQ SI, R11 + SHRQ $0x34, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeSnappyBetterBlockAsm10B emit_remainder_encodeSnappyBetterBlockAsm10B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBetterBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBetterBlockAsm10B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm10B @@ -17240,26 +17259,26 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm10B: JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B three_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B two_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBetterBlockAsm10B JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B one_byte_emit_remainder_encodeSnappyBetterBlockAsm10B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBetterBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -17275,73 +17294,73 @@ memmove_emit_remainder_encodeSnappyBetterBlockAsm10B: JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_big_loop_back: MOVOU (SI), X4 @@ -17355,461 +17374,462 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_big_loop JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte) int +// func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte, tmp *[5120]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBetterBlockAsm8B(SB), $5144-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000028, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBetterBlockAsm8B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000028, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBetterBlockAsm8B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBetterBlockAsm8B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBetterBlockAsm8B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x04, BX - LEAL 1(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x04, SI + LEAL 1(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm8B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x38, R10 - MOVL 24(SP)(R9*4), BX - MOVL 4120(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 4120(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x36, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x38, R11 + MOVL (AX)(R10*4), SI + MOVL 4096(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 4096(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm8B - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeSnappyBetterBlockAsm8B - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeSnappyBetterBlockAsm8B no_short_found_encodeSnappyBetterBlockAsm8B: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm8B - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeSnappyBetterBlockAsm8B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBetterBlockAsm8B candidateS_match_encodeSnappyBetterBlockAsm8B: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x36, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBetterBlockAsm8B - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeSnappyBetterBlockAsm8B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm8B match_extend_back_loop_encodeSnappyBetterBlockAsm8B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBetterBlockAsm8B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBetterBlockAsm8B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm8B JMP match_extend_back_loop_encodeSnappyBetterBlockAsm8B match_extend_back_end_encodeSnappyBetterBlockAsm8B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBetterBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBetterBlockAsm8B: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm8B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm8B - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm8B - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm8B matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm8B matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm8B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm8B matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 - JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B - LEAL -4(DI), DI - LEAL 4(R11), R11 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 + JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B JB match_nolit_end_encodeSnappyBetterBlockAsm8B - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeSnappyBetterBlockAsm8B matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeSnappyBetterBlockAsm8B - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeSnappyBetterBlockAsm8B: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeSnappyBetterBlockAsm8B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeSnappyBetterBlockAsm8B JB three_bytes_match_emit_encodeSnappyBetterBlockAsm8B three_bytes_match_emit_encodeSnappyBetterBlockAsm8B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm8B two_bytes_match_emit_encodeSnappyBetterBlockAsm8B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeSnappyBetterBlockAsm8B JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm8B one_byte_match_emit_encodeSnappyBetterBlockAsm8B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBetterBlockAsm8B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B memmove_long_match_emit_encodeSnappyBetterBlockAsm8B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm8B: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm8B - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm8B two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm8B: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm8B - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm8B emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm8B: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm8B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm8B - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBetterBlockAsm8B: - MOVQ $0x0000cf1bbcdcbf9b, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x36, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x38, R10 - SHLQ $0x10, R11 - IMULQ BX, R11 - SHRQ $0x36, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x38, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 4120(SP)(R10*4) - MOVL R13, 4120(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x0000cf1bbcdcbf9b, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x10, R10 + IMULQ SI, R10 + SHRQ $0x36, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x38, R11 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x36, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x38, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 4096(AX)(R11*4) + MOVL R14, 4096(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeSnappyBetterBlockAsm8B: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeSnappyBetterBlockAsm8B - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x36, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x10, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x36, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x10, R11 + IMULQ SI, R11 + SHRQ $0x36, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeSnappyBetterBlockAsm8B emit_remainder_encodeSnappyBetterBlockAsm8B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBetterBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBetterBlockAsm8B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm8B @@ -17818,26 +17838,26 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm8B: JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B three_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B two_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBetterBlockAsm8B JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B one_byte_emit_remainder_encodeSnappyBetterBlockAsm8B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBetterBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -17853,73 +17873,73 @@ memmove_emit_remainder_encodeSnappyBetterBlockAsm8B: JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_big_loop_back: MOVOU (SI), X4 @@ -17933,1136 +17953,1142 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_big_loop_ JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func calcBlockSize(src []byte) int +// func calcBlockSize(src []byte, tmp *[32768]byte) int // Requires: BMI, SSE2 -TEXT ·calcBlockSize(SB), $32792-32 - XORQ AX, AX - MOVQ $0x00000100, CX - LEAQ 24(SP), DX +TEXT ·calcBlockSize(SB), $24-40 + MOVQ tmp+24(FP), AX + XORQ CX, CX + MOVQ $0x00000100, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_calcBlockSize: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_calcBlockSize MOVL $0x00000000, 12(SP) - MOVQ src_len+8(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+0(FP), DX + MOVQ src_len+8(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+0(FP), BX search_loop_calcBlockSize: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x05, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x05, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_calcBlockSize - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x33, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x10, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x33, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x33, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x10, R11 + IMULQ R9, R11 + SHRQ $0x33, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x33, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_calcBlockSize - LEAL 1(CX), SI - MOVL 12(SP), BX - MOVL SI, DI - SUBL 16(SP), DI + LEAL 1(DX), DI + MOVL 12(SP), SI + MOVL DI, R8 + SUBL 16(SP), R8 JZ repeat_extend_back_end_calcBlockSize repeat_extend_back_loop_calcBlockSize: - CMPL SI, BX + CMPL DI, SI JBE repeat_extend_back_end_calcBlockSize - MOVB -1(DX)(DI*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(R8*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_calcBlockSize - LEAL -1(SI), SI - DECL DI + LEAL -1(DI), DI + DECL R8 JNZ repeat_extend_back_loop_calcBlockSize repeat_extend_back_end_calcBlockSize: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 5(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 5(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_calcBlockSize - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET repeat_dst_size_check_calcBlockSize: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_calcBlockSize - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_calcBlockSize - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_calcBlockSize - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_repeat_emit_calcBlockSize - CMPL BX, $0x01000000 + CMPL SI, $0x01000000 JB four_bytes_repeat_emit_calcBlockSize - ADDQ $0x05, AX + ADDQ $0x05, CX JMP memmove_long_repeat_emit_calcBlockSize four_bytes_repeat_emit_calcBlockSize: - ADDQ $0x04, AX + ADDQ $0x04, CX JMP memmove_long_repeat_emit_calcBlockSize three_bytes_repeat_emit_calcBlockSize: - ADDQ $0x03, AX + ADDQ $0x03, CX JMP memmove_long_repeat_emit_calcBlockSize two_bytes_repeat_emit_calcBlockSize: - ADDQ $0x02, AX - CMPL BX, $0x40 + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_calcBlockSize JMP memmove_long_repeat_emit_calcBlockSize one_byte_repeat_emit_calcBlockSize: - ADDQ $0x01, AX + ADDQ $0x01, CX memmove_repeat_emit_calcBlockSize: - LEAQ (AX)(DI*1), AX + LEAQ (CX)(R8*1), CX JMP emit_literal_done_repeat_emit_calcBlockSize memmove_long_repeat_emit_calcBlockSize: - LEAQ (AX)(DI*1), AX + LEAQ (CX)(R8*1), CX emit_literal_done_repeat_emit_calcBlockSize: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+8(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+8(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), SI // matchLen - XORL R10, R10 + XORL R11, R11 matchlen_loopback_16_repeat_extend_calcBlockSize: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_repeat_extend_calcBlockSize - MOVQ (R8)(R10*1), R9 - MOVQ 8(R8)(R10*1), R11 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_calcBlockSize - XORQ 8(BX)(R10*1), R11 + XORQ 8(SI)(R11*1), R12 JNZ matchlen_bsf_16repeat_extend_calcBlockSize - LEAL -16(DI), DI - LEAL 16(R10), R10 + LEAL -16(R8), R8 + LEAL 16(R11), R11 JMP matchlen_loopback_16_repeat_extend_calcBlockSize matchlen_bsf_16repeat_extend_calcBlockSize: #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R12, R12 #else - BSFQ R11, R11 + BSFQ R12, R12 #endif - SARQ $0x03, R11 - LEAL 8(R10)(R11*1), R10 + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 JMP repeat_extend_forward_end_calcBlockSize matchlen_match8_repeat_extend_calcBlockSize: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_repeat_extend_calcBlockSize - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_calcBlockSize - LEAL -8(DI), DI - LEAL 8(R10), R10 + LEAL -8(R8), R8 + LEAL 8(R11), R11 JMP matchlen_match4_repeat_extend_calcBlockSize matchlen_bsf_8_repeat_extend_calcBlockSize: #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R10, R10 #else - BSFQ R9, R9 + BSFQ R10, R10 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_calcBlockSize matchlen_match4_repeat_extend_calcBlockSize: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_repeat_extend_calcBlockSize - MOVL (R8)(R10*1), R9 - CMPL (BX)(R10*1), R9 + MOVL (R9)(R11*1), R10 + CMPL (SI)(R11*1), R10 JNE matchlen_match2_repeat_extend_calcBlockSize - LEAL -4(DI), DI - LEAL 4(R10), R10 + LEAL -4(R8), R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_calcBlockSize: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_repeat_extend_calcBlockSize JB repeat_extend_forward_end_calcBlockSize - MOVW (R8)(R10*1), R9 - CMPW (BX)(R10*1), R9 + MOVW (R9)(R11*1), R10 + CMPW (SI)(R11*1), R10 JNE matchlen_match1_repeat_extend_calcBlockSize - LEAL 2(R10), R10 - SUBL $0x02, DI + LEAL 2(R11), R11 + SUBL $0x02, R8 JZ repeat_extend_forward_end_calcBlockSize matchlen_match1_repeat_extend_calcBlockSize: - MOVB (R8)(R10*1), R9 - CMPB (BX)(R10*1), R9 + MOVB (R9)(R11*1), R10 + CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_calcBlockSize - LEAL 1(R10), R10 + LEAL 1(R11), R11 repeat_extend_forward_end_calcBlockSize: - ADDL R10, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI + ADDL R11, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI // emitCopy - CMPL SI, $0x00010000 + CMPL DI, $0x00010000 JB two_byte_offset_repeat_as_copy_calcBlockSize four_bytes_loop_back_repeat_as_copy_calcBlockSize: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE four_bytes_remain_repeat_as_copy_calcBlockSize - LEAL -64(BX), BX - ADDQ $0x05, AX - CMPL BX, $0x04 + LEAL -64(SI), SI + ADDQ $0x05, CX + CMPL SI, $0x04 JB four_bytes_remain_repeat_as_copy_calcBlockSize JMP four_bytes_loop_back_repeat_as_copy_calcBlockSize four_bytes_remain_repeat_as_copy_calcBlockSize: - TESTL BX, BX + TESTL SI, SI JZ repeat_end_emit_calcBlockSize - XORL BX, BX - ADDQ $0x05, AX + XORL SI, SI + ADDQ $0x05, CX JMP repeat_end_emit_calcBlockSize two_byte_offset_repeat_as_copy_calcBlockSize: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_calcBlockSize - LEAL -60(BX), BX - ADDQ $0x03, AX + LEAL -60(SI), SI + ADDQ $0x03, CX JMP two_byte_offset_repeat_as_copy_calcBlockSize two_byte_offset_short_repeat_as_copy_calcBlockSize: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_calcBlockSize - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_calcBlockSize - ADDQ $0x02, AX + ADDQ $0x02, CX JMP repeat_end_emit_calcBlockSize emit_copy_three_repeat_as_copy_calcBlockSize: - ADDQ $0x03, AX + ADDQ $0x03, CX repeat_end_emit_calcBlockSize: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_calcBlockSize no_repeat_found_calcBlockSize: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_calcBlockSize - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_calcBlockSize - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_calcBlockSize - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_calcBlockSize candidate3_match_calcBlockSize: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_calcBlockSize candidate2_match_calcBlockSize: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_calcBlockSize: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_calcBlockSize match_extend_back_loop_calcBlockSize: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_calcBlockSize - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_calcBlockSize - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_calcBlockSize JMP match_extend_back_loop_calcBlockSize match_extend_back_end_calcBlockSize: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 5(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 5(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_calcBlockSize - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET match_dst_size_check_calcBlockSize: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_calcBlockSize - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), SI - CMPL SI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), DI + CMPL DI, $0x3c JB one_byte_match_emit_calcBlockSize - CMPL SI, $0x00000100 + CMPL DI, $0x00000100 JB two_bytes_match_emit_calcBlockSize - CMPL SI, $0x00010000 + CMPL DI, $0x00010000 JB three_bytes_match_emit_calcBlockSize - CMPL SI, $0x01000000 + CMPL DI, $0x01000000 JB four_bytes_match_emit_calcBlockSize - ADDQ $0x05, AX + ADDQ $0x05, CX JMP memmove_long_match_emit_calcBlockSize four_bytes_match_emit_calcBlockSize: - ADDQ $0x04, AX + ADDQ $0x04, CX JMP memmove_long_match_emit_calcBlockSize three_bytes_match_emit_calcBlockSize: - ADDQ $0x03, AX + ADDQ $0x03, CX JMP memmove_long_match_emit_calcBlockSize two_bytes_match_emit_calcBlockSize: - ADDQ $0x02, AX - CMPL SI, $0x40 + ADDQ $0x02, CX + CMPL DI, $0x40 JB memmove_match_emit_calcBlockSize JMP memmove_long_match_emit_calcBlockSize one_byte_match_emit_calcBlockSize: - ADDQ $0x01, AX + ADDQ $0x01, CX memmove_match_emit_calcBlockSize: - LEAQ (AX)(R8*1), AX + LEAQ (CX)(R9*1), CX JMP emit_literal_done_match_emit_calcBlockSize memmove_long_match_emit_calcBlockSize: - LEAQ (AX)(R8*1), AX + LEAQ (CX)(R9*1), CX emit_literal_done_match_emit_calcBlockSize: match_nolit_loop_calcBlockSize: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+8(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+8(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_calcBlockSize: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_calcBlockSize - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_calcBlockSize - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_calcBlockSize - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_calcBlockSize matchlen_bsf_16match_nolit_calcBlockSize: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_calcBlockSize matchlen_match8_match_nolit_calcBlockSize: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_calcBlockSize - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_calcBlockSize - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_calcBlockSize matchlen_bsf_8_match_nolit_calcBlockSize: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_calcBlockSize matchlen_match4_match_nolit_calcBlockSize: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_calcBlockSize - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_calcBlockSize - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_calcBlockSize: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_calcBlockSize JB match_nolit_end_calcBlockSize - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_calcBlockSize - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_calcBlockSize matchlen_match1_match_nolit_calcBlockSize: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_calcBlockSize - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_calcBlockSize: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB two_byte_offset_match_nolit_calcBlockSize four_bytes_loop_back_match_nolit_calcBlockSize: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE four_bytes_remain_match_nolit_calcBlockSize - LEAL -64(R9), R9 - ADDQ $0x05, AX - CMPL R9, $0x04 + LEAL -64(R10), R10 + ADDQ $0x05, CX + CMPL R10, $0x04 JB four_bytes_remain_match_nolit_calcBlockSize JMP four_bytes_loop_back_match_nolit_calcBlockSize four_bytes_remain_match_nolit_calcBlockSize: - TESTL R9, R9 + TESTL R10, R10 JZ match_nolit_emitcopy_end_calcBlockSize - XORL BX, BX - ADDQ $0x05, AX + XORL SI, SI + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_calcBlockSize two_byte_offset_match_nolit_calcBlockSize: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_calcBlockSize - LEAL -60(R9), R9 - ADDQ $0x03, AX + LEAL -60(R10), R10 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_calcBlockSize two_byte_offset_short_match_nolit_calcBlockSize: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_calcBlockSize - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_calcBlockSize - ADDQ $0x02, AX + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_calcBlockSize emit_copy_three_match_nolit_calcBlockSize: - ADDQ $0x03, AX + ADDQ $0x03, CX match_nolit_emitcopy_end_calcBlockSize: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_calcBlockSize - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_calcBlockSize - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET match_nolit_dst_ok_calcBlockSize: - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x10, DI - IMULQ R8, DI - SHRQ $0x33, DI - SHLQ $0x10, BX - IMULQ R8, BX - SHRQ $0x33, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x10, R8 + IMULQ R9, R8 + SHRQ $0x33, R8 + SHLQ $0x10, SI + IMULQ R9, SI + SHRQ $0x33, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_calcBlockSize - INCL CX + INCL DX JMP search_loop_calcBlockSize emit_remainder_calcBlockSize: - MOVQ src_len+8(FP), CX - SUBL 12(SP), CX - LEAQ 5(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+8(FP), AX + SUBL 12(SP), AX + LEAQ 5(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_calcBlockSize - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET emit_remainder_ok_calcBlockSize: - MOVQ src_len+8(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+8(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_calcBlockSize - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI - LEAL -1(SI), CX - CMPL CX, $0x3c + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI + LEAL -1(SI), AX + CMPL AX, $0x3c JB one_byte_emit_remainder_calcBlockSize - CMPL CX, $0x00000100 + CMPL AX, $0x00000100 JB two_bytes_emit_remainder_calcBlockSize - CMPL CX, $0x00010000 + CMPL AX, $0x00010000 JB three_bytes_emit_remainder_calcBlockSize - CMPL CX, $0x01000000 + CMPL AX, $0x01000000 JB four_bytes_emit_remainder_calcBlockSize - ADDQ $0x05, AX + ADDQ $0x05, CX JMP memmove_long_emit_remainder_calcBlockSize four_bytes_emit_remainder_calcBlockSize: - ADDQ $0x04, AX + ADDQ $0x04, CX JMP memmove_long_emit_remainder_calcBlockSize three_bytes_emit_remainder_calcBlockSize: - ADDQ $0x03, AX + ADDQ $0x03, CX JMP memmove_long_emit_remainder_calcBlockSize two_bytes_emit_remainder_calcBlockSize: - ADDQ $0x02, AX - CMPL CX, $0x40 + ADDQ $0x02, CX + CMPL AX, $0x40 JB memmove_emit_remainder_calcBlockSize JMP memmove_long_emit_remainder_calcBlockSize one_byte_emit_remainder_calcBlockSize: - ADDQ $0x01, AX + ADDQ $0x01, CX memmove_emit_remainder_calcBlockSize: - LEAQ (AX)(SI*1), AX + LEAQ (CX)(SI*1), AX + MOVQ AX, CX JMP emit_literal_done_emit_remainder_calcBlockSize memmove_long_emit_remainder_calcBlockSize: - LEAQ (AX)(SI*1), AX + LEAQ (CX)(SI*1), AX + MOVQ AX, CX emit_literal_done_emit_remainder_calcBlockSize: - MOVQ AX, ret+24(FP) + MOVQ CX, ret+32(FP) RET -// func calcBlockSizeSmall(src []byte) int +// func calcBlockSizeSmall(src []byte, tmp *[2048]byte) int // Requires: BMI, SSE2 -TEXT ·calcBlockSizeSmall(SB), $2072-32 - XORQ AX, AX - MOVQ $0x00000010, CX - LEAQ 24(SP), DX +TEXT ·calcBlockSizeSmall(SB), $24-40 + MOVQ tmp+24(FP), AX + XORQ CX, CX + MOVQ $0x00000010, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_calcBlockSizeSmall: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_calcBlockSizeSmall MOVL $0x00000000, 12(SP) - MOVQ src_len+8(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+0(FP), DX + MOVQ src_len+8(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+0(FP), BX search_loop_calcBlockSizeSmall: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x04, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x04, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_calcBlockSizeSmall - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x9e3779b1, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x37, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x9e3779b1, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x20, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x37, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x37, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x20, R11 + IMULQ R9, R11 + SHRQ $0x37, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x20, R10 + IMULQ R9, R10 + SHRQ $0x37, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_calcBlockSizeSmall - LEAL 1(CX), SI - MOVL 12(SP), BX - MOVL SI, DI - SUBL 16(SP), DI + LEAL 1(DX), DI + MOVL 12(SP), SI + MOVL DI, R8 + SUBL 16(SP), R8 JZ repeat_extend_back_end_calcBlockSizeSmall repeat_extend_back_loop_calcBlockSizeSmall: - CMPL SI, BX + CMPL DI, SI JBE repeat_extend_back_end_calcBlockSizeSmall - MOVB -1(DX)(DI*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(R8*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_calcBlockSizeSmall - LEAL -1(SI), SI - DECL DI + LEAL -1(DI), DI + DECL R8 JNZ repeat_extend_back_loop_calcBlockSizeSmall repeat_extend_back_end_calcBlockSizeSmall: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_calcBlockSizeSmall - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET repeat_dst_size_check_calcBlockSizeSmall: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_calcBlockSizeSmall - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_calcBlockSizeSmall - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_calcBlockSizeSmall JB three_bytes_repeat_emit_calcBlockSizeSmall three_bytes_repeat_emit_calcBlockSizeSmall: - ADDQ $0x03, AX + ADDQ $0x03, CX JMP memmove_long_repeat_emit_calcBlockSizeSmall two_bytes_repeat_emit_calcBlockSizeSmall: - ADDQ $0x02, AX - CMPL BX, $0x40 + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_calcBlockSizeSmall JMP memmove_long_repeat_emit_calcBlockSizeSmall one_byte_repeat_emit_calcBlockSizeSmall: - ADDQ $0x01, AX + ADDQ $0x01, CX memmove_repeat_emit_calcBlockSizeSmall: - LEAQ (AX)(DI*1), AX + LEAQ (CX)(R8*1), CX JMP emit_literal_done_repeat_emit_calcBlockSizeSmall memmove_long_repeat_emit_calcBlockSizeSmall: - LEAQ (AX)(DI*1), AX + LEAQ (CX)(R8*1), CX emit_literal_done_repeat_emit_calcBlockSizeSmall: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+8(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+8(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), SI // matchLen - XORL R10, R10 + XORL R11, R11 matchlen_loopback_16_repeat_extend_calcBlockSizeSmall: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_repeat_extend_calcBlockSizeSmall - MOVQ (R8)(R10*1), R9 - MOVQ 8(R8)(R10*1), R11 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_calcBlockSizeSmall - XORQ 8(BX)(R10*1), R11 + XORQ 8(SI)(R11*1), R12 JNZ matchlen_bsf_16repeat_extend_calcBlockSizeSmall - LEAL -16(DI), DI - LEAL 16(R10), R10 + LEAL -16(R8), R8 + LEAL 16(R11), R11 JMP matchlen_loopback_16_repeat_extend_calcBlockSizeSmall matchlen_bsf_16repeat_extend_calcBlockSizeSmall: #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R12, R12 #else - BSFQ R11, R11 + BSFQ R12, R12 #endif - SARQ $0x03, R11 - LEAL 8(R10)(R11*1), R10 + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 JMP repeat_extend_forward_end_calcBlockSizeSmall matchlen_match8_repeat_extend_calcBlockSizeSmall: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_repeat_extend_calcBlockSizeSmall - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_calcBlockSizeSmall - LEAL -8(DI), DI - LEAL 8(R10), R10 + LEAL -8(R8), R8 + LEAL 8(R11), R11 JMP matchlen_match4_repeat_extend_calcBlockSizeSmall matchlen_bsf_8_repeat_extend_calcBlockSizeSmall: #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R10, R10 #else - BSFQ R9, R9 + BSFQ R10, R10 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_calcBlockSizeSmall matchlen_match4_repeat_extend_calcBlockSizeSmall: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_repeat_extend_calcBlockSizeSmall - MOVL (R8)(R10*1), R9 - CMPL (BX)(R10*1), R9 + MOVL (R9)(R11*1), R10 + CMPL (SI)(R11*1), R10 JNE matchlen_match2_repeat_extend_calcBlockSizeSmall - LEAL -4(DI), DI - LEAL 4(R10), R10 + LEAL -4(R8), R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_calcBlockSizeSmall: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_repeat_extend_calcBlockSizeSmall JB repeat_extend_forward_end_calcBlockSizeSmall - MOVW (R8)(R10*1), R9 - CMPW (BX)(R10*1), R9 + MOVW (R9)(R11*1), R10 + CMPW (SI)(R11*1), R10 JNE matchlen_match1_repeat_extend_calcBlockSizeSmall - LEAL 2(R10), R10 - SUBL $0x02, DI + LEAL 2(R11), R11 + SUBL $0x02, R8 JZ repeat_extend_forward_end_calcBlockSizeSmall matchlen_match1_repeat_extend_calcBlockSizeSmall: - MOVB (R8)(R10*1), R9 - CMPB (BX)(R10*1), R9 + MOVB (R9)(R11*1), R10 + CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_calcBlockSizeSmall - LEAL 1(R10), R10 + LEAL 1(R11), R11 repeat_extend_forward_end_calcBlockSizeSmall: - ADDL R10, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI + ADDL R11, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI // emitCopy two_byte_offset_repeat_as_copy_calcBlockSizeSmall: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_calcBlockSizeSmall - LEAL -60(BX), BX - ADDQ $0x03, AX + LEAL -60(SI), SI + ADDQ $0x03, CX JMP two_byte_offset_repeat_as_copy_calcBlockSizeSmall two_byte_offset_short_repeat_as_copy_calcBlockSizeSmall: - MOVL BX, SI - SHLL $0x02, SI - CMPL BX, $0x0c + MOVL SI, DI + SHLL $0x02, DI + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_calcBlockSizeSmall - ADDQ $0x02, AX + ADDQ $0x02, CX JMP repeat_end_emit_calcBlockSizeSmall emit_copy_three_repeat_as_copy_calcBlockSizeSmall: - ADDQ $0x03, AX + ADDQ $0x03, CX repeat_end_emit_calcBlockSizeSmall: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_calcBlockSizeSmall no_repeat_found_calcBlockSizeSmall: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_calcBlockSizeSmall - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_calcBlockSizeSmall - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_calcBlockSizeSmall - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_calcBlockSizeSmall candidate3_match_calcBlockSizeSmall: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_calcBlockSizeSmall candidate2_match_calcBlockSizeSmall: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_calcBlockSizeSmall: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_calcBlockSizeSmall match_extend_back_loop_calcBlockSizeSmall: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_calcBlockSizeSmall - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_calcBlockSizeSmall - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_calcBlockSizeSmall JMP match_extend_back_loop_calcBlockSizeSmall match_extend_back_end_calcBlockSizeSmall: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_calcBlockSizeSmall - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET match_dst_size_check_calcBlockSizeSmall: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_calcBlockSizeSmall - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), SI - CMPL SI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), DI + CMPL DI, $0x3c JB one_byte_match_emit_calcBlockSizeSmall - CMPL SI, $0x00000100 + CMPL DI, $0x00000100 JB two_bytes_match_emit_calcBlockSizeSmall JB three_bytes_match_emit_calcBlockSizeSmall three_bytes_match_emit_calcBlockSizeSmall: - ADDQ $0x03, AX + ADDQ $0x03, CX JMP memmove_long_match_emit_calcBlockSizeSmall two_bytes_match_emit_calcBlockSizeSmall: - ADDQ $0x02, AX - CMPL SI, $0x40 + ADDQ $0x02, CX + CMPL DI, $0x40 JB memmove_match_emit_calcBlockSizeSmall JMP memmove_long_match_emit_calcBlockSizeSmall one_byte_match_emit_calcBlockSizeSmall: - ADDQ $0x01, AX + ADDQ $0x01, CX memmove_match_emit_calcBlockSizeSmall: - LEAQ (AX)(R8*1), AX + LEAQ (CX)(R9*1), CX JMP emit_literal_done_match_emit_calcBlockSizeSmall memmove_long_match_emit_calcBlockSizeSmall: - LEAQ (AX)(R8*1), AX + LEAQ (CX)(R9*1), CX emit_literal_done_match_emit_calcBlockSizeSmall: match_nolit_loop_calcBlockSizeSmall: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+8(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+8(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_calcBlockSizeSmall: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_calcBlockSizeSmall - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_calcBlockSizeSmall - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_calcBlockSizeSmall - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_calcBlockSizeSmall matchlen_bsf_16match_nolit_calcBlockSizeSmall: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_calcBlockSizeSmall matchlen_match8_match_nolit_calcBlockSizeSmall: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_calcBlockSizeSmall - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_calcBlockSizeSmall - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_calcBlockSizeSmall matchlen_bsf_8_match_nolit_calcBlockSizeSmall: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_calcBlockSizeSmall matchlen_match4_match_nolit_calcBlockSizeSmall: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_calcBlockSizeSmall - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_calcBlockSizeSmall - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_calcBlockSizeSmall: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_calcBlockSizeSmall JB match_nolit_end_calcBlockSizeSmall - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_calcBlockSizeSmall - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_calcBlockSizeSmall matchlen_match1_match_nolit_calcBlockSizeSmall: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_calcBlockSizeSmall - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_calcBlockSizeSmall: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_calcBlockSizeSmall: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_calcBlockSizeSmall - LEAL -60(R9), R9 - ADDQ $0x03, AX + LEAL -60(R10), R10 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_calcBlockSizeSmall two_byte_offset_short_match_nolit_calcBlockSizeSmall: - MOVL R9, BX - SHLL $0x02, BX - CMPL R9, $0x0c + MOVL R10, SI + SHLL $0x02, SI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_calcBlockSizeSmall - ADDQ $0x02, AX + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_calcBlockSizeSmall emit_copy_three_match_nolit_calcBlockSizeSmall: - ADDQ $0x03, AX + ADDQ $0x03, CX match_nolit_emitcopy_end_calcBlockSizeSmall: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_calcBlockSizeSmall - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_calcBlockSizeSmall - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET match_nolit_dst_ok_calcBlockSizeSmall: - MOVQ $0x9e3779b1, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x20, DI - IMULQ R8, DI - SHRQ $0x37, DI - SHLQ $0x20, BX - IMULQ R8, BX - SHRQ $0x37, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x9e3779b1, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x20, R8 + IMULQ R9, R8 + SHRQ $0x37, R8 + SHLQ $0x20, SI + IMULQ R9, SI + SHRQ $0x37, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_calcBlockSizeSmall - INCL CX + INCL DX JMP search_loop_calcBlockSizeSmall emit_remainder_calcBlockSizeSmall: - MOVQ src_len+8(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+8(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_calcBlockSizeSmall - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET emit_remainder_ok_calcBlockSizeSmall: - MOVQ src_len+8(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+8(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_calcBlockSizeSmall - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI - LEAL -1(SI), CX - CMPL CX, $0x3c + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI + LEAL -1(SI), AX + CMPL AX, $0x3c JB one_byte_emit_remainder_calcBlockSizeSmall - CMPL CX, $0x00000100 + CMPL AX, $0x00000100 JB two_bytes_emit_remainder_calcBlockSizeSmall JB three_bytes_emit_remainder_calcBlockSizeSmall three_bytes_emit_remainder_calcBlockSizeSmall: - ADDQ $0x03, AX + ADDQ $0x03, CX JMP memmove_long_emit_remainder_calcBlockSizeSmall two_bytes_emit_remainder_calcBlockSizeSmall: - ADDQ $0x02, AX - CMPL CX, $0x40 + ADDQ $0x02, CX + CMPL AX, $0x40 JB memmove_emit_remainder_calcBlockSizeSmall JMP memmove_long_emit_remainder_calcBlockSizeSmall one_byte_emit_remainder_calcBlockSizeSmall: - ADDQ $0x01, AX + ADDQ $0x01, CX memmove_emit_remainder_calcBlockSizeSmall: - LEAQ (AX)(SI*1), AX + LEAQ (CX)(SI*1), AX + MOVQ AX, CX JMP emit_literal_done_emit_remainder_calcBlockSizeSmall memmove_long_emit_remainder_calcBlockSizeSmall: - LEAQ (AX)(SI*1), AX + LEAQ (CX)(SI*1), AX + MOVQ AX, CX emit_literal_done_emit_remainder_calcBlockSizeSmall: - MOVQ AX, ret+24(FP) + MOVQ CX, ret+32(FP) RET // func emitLiteral(dst []byte, lit []byte) int @@ -19783,7 +19809,7 @@ TEXT ·cvtLZ4BlockAsm(SB), NOSPLIT, $0-64 MOVQ src_base+24(FP), DX MOVQ src_len+32(FP), BX LEAQ (DX)(BX*1), BX - LEAQ -10(AX)(CX*1), CX + LEAQ -8(AX)(CX*1), CX XORQ DI, DI lz4_s2_loop: @@ -20266,7 +20292,7 @@ TEXT ·cvtLZ4sBlockAsm(SB), NOSPLIT, $0-64 MOVQ src_base+24(FP), DX MOVQ src_len+32(FP), BX LEAQ (DX)(BX*1), BX - LEAQ -10(AX)(CX*1), CX + LEAQ -8(AX)(CX*1), CX XORQ DI, DI lz4s_s2_loop: @@ -20751,7 +20777,7 @@ TEXT ·cvtLZ4BlockSnappyAsm(SB), NOSPLIT, $0-64 MOVQ src_base+24(FP), DX MOVQ src_len+32(FP), BX LEAQ (DX)(BX*1), BX - LEAQ -10(AX)(CX*1), CX + LEAQ -8(AX)(CX*1), CX lz4_snappy_loop: CMPQ DX, BX @@ -21017,7 +21043,7 @@ TEXT ·cvtLZ4sBlockSnappyAsm(SB), NOSPLIT, $0-64 MOVQ src_base+24(FP), DX MOVQ src_len+32(FP), BX LEAQ (DX)(BX*1), BX - LEAQ -10(AX)(CX*1), CX + LEAQ -8(AX)(CX*1), CX lz4s_snappy_loop: CMPQ DX, BX diff --git a/vendor/github.com/klauspost/compress/s2/index.go b/vendor/github.com/klauspost/compress/s2/index.go index 18a4f7acd..4229957b9 100644 --- a/vendor/github.com/klauspost/compress/s2/index.go +++ b/vendor/github.com/klauspost/compress/s2/index.go @@ -17,6 +17,8 @@ const ( S2IndexHeader = "s2idx\x00" S2IndexTrailer = "\x00xdi2s" maxIndexEntries = 1 << 16 + // If distance is less than this, we do not add the entry. + minIndexDist = 1 << 20 ) // Index represents an S2/Snappy index. @@ -72,6 +74,10 @@ func (i *Index) add(compressedOffset, uncompressedOffset int64) error { if latest.compressedOffset > compressedOffset { return fmt.Errorf("internal error: Earlier compressed received (%d > %d)", latest.uncompressedOffset, uncompressedOffset) } + if latest.uncompressedOffset+minIndexDist > uncompressedOffset { + // Only add entry if distance is large enough. + return nil + } } i.info = append(i.info, struct { compressedOffset int64 @@ -122,7 +128,7 @@ func (i *Index) Find(offset int64) (compressedOff, uncompressedOff int64, err er // reduce to stay below maxIndexEntries func (i *Index) reduce() { - if len(i.info) < maxIndexEntries && i.estBlockUncomp >= 1<<20 { + if len(i.info) < maxIndexEntries && i.estBlockUncomp >= minIndexDist { return } @@ -132,7 +138,7 @@ func (i *Index) reduce() { j := 0 // Each block should be at least 1MB, but don't reduce below 1000 entries. - for i.estBlockUncomp*(int64(removeN)+1) < 1<<20 && len(i.info)/(removeN+1) > 1000 { + for i.estBlockUncomp*(int64(removeN)+1) < minIndexDist && len(i.info)/(removeN+1) > 1000 { removeN++ } for idx := 0; idx < len(src); idx++ { diff --git a/vendor/github.com/klauspost/compress/s2/s2.go b/vendor/github.com/klauspost/compress/s2/s2.go index 72bcb4945..cbd1ed64d 100644 --- a/vendor/github.com/klauspost/compress/s2/s2.go +++ b/vendor/github.com/klauspost/compress/s2/s2.go @@ -109,7 +109,11 @@ const ( chunkTypeStreamIdentifier = 0xff ) -var crcTable = crc32.MakeTable(crc32.Castagnoli) +var ( + crcTable = crc32.MakeTable(crc32.Castagnoli) + magicChunkSnappyBytes = []byte(magicChunkSnappy) // Can be passed to functions where it escapes. + magicChunkBytes = []byte(magicChunk) // Can be passed to functions where it escapes. +) // crc implements the checksum specified in section 3 of // https://github.com/google/snappy/blob/master/framing_format.txt diff --git a/vendor/github.com/klauspost/compress/s2/writer.go b/vendor/github.com/klauspost/compress/s2/writer.go index 637c93147..fd15078f7 100644 --- a/vendor/github.com/klauspost/compress/s2/writer.go +++ b/vendor/github.com/klauspost/compress/s2/writer.go @@ -83,11 +83,14 @@ type Writer struct { snappy bool flushOnWrite bool appendIndex bool + bufferCB func([]byte) level uint8 } type result struct { b []byte + // return when writing + ret []byte // Uncompressed start offset startOffset int64 } @@ -146,6 +149,10 @@ func (w *Writer) Reset(writer io.Writer) { for write := range toWrite { // Wait for the data to be available. input := <-write + if input.ret != nil && w.bufferCB != nil { + w.bufferCB(input.ret) + input.ret = nil + } in := input.b if len(in) > 0 { if w.err(nil) == nil { @@ -239,6 +246,9 @@ func (w *Writer) ReadFrom(r io.Reader) (n int64, err error) { } } if n2 == 0 { + if cap(inbuf) >= w.obufLen { + w.buffers.Put(inbuf) + } break } n += int64(n2) @@ -314,9 +324,9 @@ func (w *Writer) AddSkippableBlock(id uint8, data []byte) (err error) { hWriter := make(chan result) w.output <- hWriter if w.snappy { - hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunkSnappy)} + hWriter <- result{startOffset: w.uncompWritten, b: magicChunkSnappyBytes} } else { - hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunk)} + hWriter <- result{startOffset: w.uncompWritten, b: magicChunkBytes} } } @@ -338,7 +348,8 @@ func (w *Writer) AddSkippableBlock(id uint8, data []byte) (err error) { // but the input buffer cannot be written to by the caller // until Flush or Close has been called when concurrency != 1. // -// If you cannot control that, use the regular Write function. +// Use the WriterBufferDone to receive a callback when the buffer is done +// Processing. // // Note that input is not buffered. // This means that each write will result in discrete blocks being created. @@ -361,6 +372,9 @@ func (w *Writer) EncodeBuffer(buf []byte) (err error) { } if w.concurrency == 1 { _, err := w.writeSync(buf) + if w.bufferCB != nil { + w.bufferCB(buf) + } return err } @@ -370,12 +384,12 @@ func (w *Writer) EncodeBuffer(buf []byte) (err error) { hWriter := make(chan result) w.output <- hWriter if w.snappy { - hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunkSnappy)} + hWriter <- result{startOffset: w.uncompWritten, b: magicChunkSnappyBytes} } else { - hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunk)} + hWriter <- result{startOffset: w.uncompWritten, b: magicChunkBytes} } } - + orgBuf := buf for len(buf) > 0 { // Cut input. uncompressed := buf @@ -394,6 +408,9 @@ func (w *Writer) EncodeBuffer(buf []byte) (err error) { startOffset: w.uncompWritten, } w.uncompWritten += int64(len(uncompressed)) + if len(buf) == 0 && w.bufferCB != nil { + res.ret = orgBuf + } go func() { race.ReadSlice(uncompressed) @@ -478,9 +495,9 @@ func (w *Writer) write(p []byte) (nRet int, errRet error) { hWriter := make(chan result) w.output <- hWriter if w.snappy { - hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunkSnappy)} + hWriter <- result{startOffset: w.uncompWritten, b: magicChunkSnappyBytes} } else { - hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunk)} + hWriter <- result{startOffset: w.uncompWritten, b: magicChunkBytes} } } @@ -560,6 +577,9 @@ func (w *Writer) writeFull(inbuf []byte) (errRet error) { if w.concurrency == 1 { _, err := w.writeSync(inbuf[obufHeaderLen:]) + if cap(inbuf) >= w.obufLen { + w.buffers.Put(inbuf) + } return err } @@ -569,9 +589,9 @@ func (w *Writer) writeFull(inbuf []byte) (errRet error) { hWriter := make(chan result) w.output <- hWriter if w.snappy { - hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunkSnappy)} + hWriter <- result{startOffset: w.uncompWritten, b: magicChunkSnappyBytes} } else { - hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunk)} + hWriter <- result{startOffset: w.uncompWritten, b: magicChunkBytes} } } @@ -637,9 +657,9 @@ func (w *Writer) writeSync(p []byte) (nRet int, errRet error) { var n int var err error if w.snappy { - n, err = w.writer.Write([]byte(magicChunkSnappy)) + n, err = w.writer.Write(magicChunkSnappyBytes) } else { - n, err = w.writer.Write([]byte(magicChunk)) + n, err = w.writer.Write(magicChunkBytes) } if err != nil { return 0, w.err(err) @@ -916,7 +936,7 @@ func WriterBetterCompression() WriterOption { } // WriterBestCompression will enable better compression. -// EncodeBetter compresses better than Encode but typically with a +// EncodeBest compresses better than Encode but typically with a // big speed decrease on compression. func WriterBestCompression() WriterOption { return func(w *Writer) error { @@ -935,6 +955,17 @@ func WriterUncompressed() WriterOption { } } +// WriterBufferDone will perform a callback when EncodeBuffer has finished +// writing a buffer to the output and the buffer can safely be reused. +// If the buffer was split into several blocks, it will be sent after the last block. +// Callbacks will not be done concurrently. +func WriterBufferDone(fn func(b []byte)) WriterOption { + return func(w *Writer) error { + w.bufferCB = fn + return nil + } +} + // WriterBlockSize allows to override the default block size. // Blocks will be this size or smaller. // Minimum size is 4KB and maximum size is 4MB. diff --git a/vendor/github.com/klauspost/compress/zstd/blockdec.go b/vendor/github.com/klauspost/compress/zstd/blockdec.go index 03744fbc7..9c28840c3 100644 --- a/vendor/github.com/klauspost/compress/zstd/blockdec.go +++ b/vendor/github.com/klauspost/compress/zstd/blockdec.go @@ -598,7 +598,9 @@ func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) { printf("RLE set to 0x%x, code: %v", symb, v) } case compModeFSE: - println("Reading table for", tableIndex(i)) + if debugDecoder { + println("Reading table for", tableIndex(i)) + } if seq.fse == nil || seq.fse.preDefined { seq.fse = fseDecoderPool.Get().(*fseDecoder) } diff --git a/vendor/github.com/klauspost/compress/zstd/dict.go b/vendor/github.com/klauspost/compress/zstd/dict.go index 8d5567fe6..b7b83164b 100644 --- a/vendor/github.com/klauspost/compress/zstd/dict.go +++ b/vendor/github.com/klauspost/compress/zstd/dict.go @@ -273,6 +273,9 @@ func BuildDict(o BuildDictOptions) ([]byte, error) { enc.Encode(&block, b) addValues(&remain, block.literals) litTotal += len(block.literals) + if len(block.sequences) == 0 { + continue + } seqs += len(block.sequences) block.genCodes() addHist(&ll, block.coders.llEnc.Histogram()) @@ -286,6 +289,9 @@ func BuildDict(o BuildDictOptions) ([]byte, error) { if offset == 0 { continue } + if int(offset) >= len(o.History) { + continue + } if offset > 3 { newOffsets[offset-3]++ } else { @@ -336,6 +342,9 @@ func BuildDict(o BuildDictOptions) ([]byte, error) { if seqs/nUsed < 512 { // Use 512 as minimum. nUsed = seqs / 512 + if nUsed == 0 { + nUsed = 1 + } } copyHist := func(dst *fseEncoder, src *[256]int) ([]byte, error) { hist := dst.Histogram() @@ -358,6 +367,28 @@ func BuildDict(o BuildDictOptions) ([]byte, error) { fakeLength += v hist[i] = uint32(v) } + + // Ensure we aren't trying to represent RLE. + if maxCount == fakeLength { + for i := range hist { + if uint8(i) == maxSym { + fakeLength++ + maxSym++ + hist[i+1] = 1 + if maxSym > 1 { + break + } + } + if hist[0] == 0 { + fakeLength++ + hist[i] = 1 + if maxSym > 1 { + break + } + } + } + } + dst.HistogramFinished(maxSym, maxCount) dst.reUsed = false dst.useRLE = false diff --git a/vendor/github.com/klauspost/compress/zstd/enc_better.go b/vendor/github.com/klauspost/compress/zstd/enc_better.go index a4f5bf91f..84a79fde7 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_better.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_better.go @@ -179,9 +179,9 @@ encodeLoop: if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { // Consider history as well. var seq seq - lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src) + length := 4 + e.matchlen(s+4+repOff, repIndex+4, src) - seq.matchLen = uint32(lenght - zstdMinMatch) + seq.matchLen = uint32(length - zstdMinMatch) // We might be able to match backwards. // Extend as long as we can. @@ -210,12 +210,12 @@ encodeLoop: // Index match start+1 (long) -> s - 1 index0 := s + repOff - s += lenght + repOff + s += length + repOff nextEmit = s if s >= sLimit { if debugEncoder { - println("repeat ended", s, lenght) + println("repeat ended", s, length) } break encodeLoop @@ -241,9 +241,9 @@ encodeLoop: if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) { // Consider history as well. var seq seq - lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src) + length := 8 + e.matchlen(s+8+repOff2, repIndex+8, src) - seq.matchLen = uint32(lenght - zstdMinMatch) + seq.matchLen = uint32(length - zstdMinMatch) // We might be able to match backwards. // Extend as long as we can. @@ -270,11 +270,11 @@ encodeLoop: } blk.sequences = append(blk.sequences, seq) - s += lenght + repOff2 + s += length + repOff2 nextEmit = s if s >= sLimit { if debugEncoder { - println("repeat ended", s, lenght) + println("repeat ended", s, length) } break encodeLoop @@ -708,9 +708,9 @@ encodeLoop: if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { // Consider history as well. var seq seq - lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src) + length := 4 + e.matchlen(s+4+repOff, repIndex+4, src) - seq.matchLen = uint32(lenght - zstdMinMatch) + seq.matchLen = uint32(length - zstdMinMatch) // We might be able to match backwards. // Extend as long as we can. @@ -738,12 +738,12 @@ encodeLoop: blk.sequences = append(blk.sequences, seq) // Index match start+1 (long) -> s - 1 - s += lenght + repOff + s += length + repOff nextEmit = s if s >= sLimit { if debugEncoder { - println("repeat ended", s, lenght) + println("repeat ended", s, length) } break encodeLoop @@ -772,9 +772,9 @@ encodeLoop: if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) { // Consider history as well. var seq seq - lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src) + length := 8 + e.matchlen(s+8+repOff2, repIndex+8, src) - seq.matchLen = uint32(lenght - zstdMinMatch) + seq.matchLen = uint32(length - zstdMinMatch) // We might be able to match backwards. // Extend as long as we can. @@ -801,11 +801,11 @@ encodeLoop: } blk.sequences = append(blk.sequences, seq) - s += lenght + repOff2 + s += length + repOff2 nextEmit = s if s >= sLimit { if debugEncoder { - println("repeat ended", s, lenght) + println("repeat ended", s, length) } break encodeLoop diff --git a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go index a154c18f7..d36be7bd8 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go @@ -138,9 +138,9 @@ encodeLoop: if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { // Consider history as well. var seq seq - lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src) + length := 4 + e.matchlen(s+4+repOff, repIndex+4, src) - seq.matchLen = uint32(lenght - zstdMinMatch) + seq.matchLen = uint32(length - zstdMinMatch) // We might be able to match backwards. // Extend as long as we can. @@ -166,11 +166,11 @@ encodeLoop: println("repeat sequence", seq, "next s:", s) } blk.sequences = append(blk.sequences, seq) - s += lenght + repOff + s += length + repOff nextEmit = s if s >= sLimit { if debugEncoder { - println("repeat ended", s, lenght) + println("repeat ended", s, length) } break encodeLoop @@ -798,9 +798,9 @@ encodeLoop: if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { // Consider history as well. var seq seq - lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src) + length := 4 + e.matchlen(s+4+repOff, repIndex+4, src) - seq.matchLen = uint32(lenght - zstdMinMatch) + seq.matchLen = uint32(length - zstdMinMatch) // We might be able to match backwards. // Extend as long as we can. @@ -826,11 +826,11 @@ encodeLoop: println("repeat sequence", seq, "next s:", s) } blk.sequences = append(blk.sequences, seq) - s += lenght + repOff + s += length + repOff nextEmit = s if s >= sLimit { if debugEncoder { - println("repeat ended", s, lenght) + println("repeat ended", s, length) } break encodeLoop diff --git a/vendor/github.com/klauspost/compress/zstd/encoder.go b/vendor/github.com/klauspost/compress/zstd/encoder.go index 72af7ef0f..8f8223cd3 100644 --- a/vendor/github.com/klauspost/compress/zstd/encoder.go +++ b/vendor/github.com/klauspost/compress/zstd/encoder.go @@ -6,6 +6,7 @@ package zstd import ( "crypto/rand" + "errors" "fmt" "io" "math" @@ -149,6 +150,9 @@ func (e *Encoder) ResetContentSize(w io.Writer, size int64) { // and write CRC if requested. func (e *Encoder) Write(p []byte) (n int, err error) { s := &e.state + if s.eofWritten { + return 0, ErrEncoderClosed + } for len(p) > 0 { if len(p)+len(s.filling) < e.o.blockSize { if e.o.crc { @@ -202,7 +206,7 @@ func (e *Encoder) nextBlock(final bool) error { return nil } if final && len(s.filling) > 0 { - s.current = e.EncodeAll(s.filling, s.current[:0]) + s.current = e.encodeAll(s.encoder, s.filling, s.current[:0]) var n2 int n2, s.err = s.w.Write(s.current) if s.err != nil { @@ -288,6 +292,9 @@ func (e *Encoder) nextBlock(final bool) error { s.filling, s.current, s.previous = s.previous[:0], s.filling, s.current s.nInput += int64(len(s.current)) s.wg.Add(1) + if final { + s.eofWritten = true + } go func(src []byte) { if debugEncoder { println("Adding block,", len(src), "bytes, final:", final) @@ -303,9 +310,6 @@ func (e *Encoder) nextBlock(final bool) error { blk := enc.Block() enc.Encode(blk, src) blk.last = final - if final { - s.eofWritten = true - } // Wait for pending writes. s.wWg.Wait() if s.writeErr != nil { @@ -401,12 +405,20 @@ func (e *Encoder) Flush() error { if len(s.filling) > 0 { err := e.nextBlock(false) if err != nil { + // Ignore Flush after Close. + if errors.Is(s.err, ErrEncoderClosed) { + return nil + } return err } } s.wg.Wait() s.wWg.Wait() if s.err != nil { + // Ignore Flush after Close. + if errors.Is(s.err, ErrEncoderClosed) { + return nil + } return s.err } return s.writeErr @@ -422,6 +434,9 @@ func (e *Encoder) Close() error { } err := e.nextBlock(true) if err != nil { + if errors.Is(s.err, ErrEncoderClosed) { + return nil + } return err } if s.frameContentSize > 0 { @@ -459,6 +474,11 @@ func (e *Encoder) Close() error { } _, s.err = s.w.Write(frame) } + if s.err == nil { + s.err = ErrEncoderClosed + return nil + } + return s.err } @@ -469,6 +489,15 @@ func (e *Encoder) Close() error { // Data compressed with EncodeAll can be decoded with the Decoder, // using either a stream or DecodeAll. func (e *Encoder) EncodeAll(src, dst []byte) []byte { + e.init.Do(e.initialize) + enc := <-e.encoders + defer func() { + e.encoders <- enc + }() + return e.encodeAll(enc, src, dst) +} + +func (e *Encoder) encodeAll(enc encoder, src, dst []byte) []byte { if len(src) == 0 { if e.o.fullZero { // Add frame header. @@ -491,13 +520,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { } return dst } - e.init.Do(e.initialize) - enc := <-e.encoders - defer func() { - // Release encoder reference to last block. - // If a non-single block is needed the encoder will reset again. - e.encoders <- enc - }() + // Use single segments when above minimum window and below window size. single := len(src) <= e.o.windowSize && len(src) > MinWindowSize if e.o.single != nil { diff --git a/vendor/github.com/klauspost/compress/zstd/framedec.go b/vendor/github.com/klauspost/compress/zstd/framedec.go index 53e160f7e..e47af66e7 100644 --- a/vendor/github.com/klauspost/compress/zstd/framedec.go +++ b/vendor/github.com/klauspost/compress/zstd/framedec.go @@ -146,7 +146,9 @@ func (d *frameDec) reset(br byteBuffer) error { } return err } - printf("raw: %x, mantissa: %d, exponent: %d\n", wd, wd&7, wd>>3) + if debugDecoder { + printf("raw: %x, mantissa: %d, exponent: %d\n", wd, wd&7, wd>>3) + } windowLog := 10 + (wd >> 3) windowBase := uint64(1) << windowLog windowAdd := (windowBase / 8) * uint64(wd&0x7) diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s index 17901e080..ae7d4d329 100644 --- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s +++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s @@ -162,12 +162,12 @@ finalize: MOVD h, ret+24(FP) RET -// func writeBlocks(d *Digest, b []byte) int +// func writeBlocks(s *Digest, b []byte) int TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40 LDP ·primes+0(SB), (prime1, prime2) // Load state. Assume v[1-4] are stored contiguously. - MOVD d+0(FP), digest + MOVD s+0(FP), digest LDP 0(digest), (v1, v2) LDP 16(digest), (v3, v4) diff --git a/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.s b/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.s index 9a7655c0f..0782b86e3 100644 --- a/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.s +++ b/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.s @@ -5,7 +5,6 @@ #include "textflag.h" // func matchLen(a []byte, b []byte) int -// Requires: BMI TEXT ·matchLen(SB), NOSPLIT, $0-56 MOVQ a_base+0(FP), AX MOVQ b_base+24(FP), CX @@ -17,17 +16,16 @@ TEXT ·matchLen(SB), NOSPLIT, $0-56 JB matchlen_match4_standalone matchlen_loopback_standalone: - MOVQ (AX)(SI*1), BX - XORQ (CX)(SI*1), BX - TESTQ BX, BX - JZ matchlen_loop_standalone + MOVQ (AX)(SI*1), BX + XORQ (CX)(SI*1), BX + JZ matchlen_loop_standalone #ifdef GOAMD64_v3 TZCNTQ BX, BX #else BSFQ BX, BX #endif - SARQ $0x03, BX + SHRL $0x03, BX LEAL (SI)(BX*1), SI JMP gen_match_len_end diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go index 8adabd828..c59f17e07 100644 --- a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go +++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go @@ -146,7 +146,7 @@ func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) { return true, fmt.Errorf("output bigger than max block size (%d)", maxBlockSize) default: - return true, fmt.Errorf("sequenceDecs_decode returned erronous code %d", errCode) + return true, fmt.Errorf("sequenceDecs_decode returned erroneous code %d", errCode) } s.seqSize += ctx.litRemain @@ -292,7 +292,7 @@ func (s *sequenceDecs) decode(seqs []seqVals) error { return io.ErrUnexpectedEOF } - return fmt.Errorf("sequenceDecs_decode_amd64 returned erronous code %d", errCode) + return fmt.Errorf("sequenceDecs_decode_amd64 returned erroneous code %d", errCode) } if ctx.litRemain < 0 { diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s index 5b06174b8..f5591fa1e 100644 --- a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s +++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s @@ -1814,7 +1814,7 @@ TEXT ·sequenceDecs_decodeSync_amd64(SB), $64-32 MOVQ 40(SP), AX ADDQ AX, 48(SP) - // Calculate poiter to s.out[cap(s.out)] (a past-end pointer) + // Calculate pointer to s.out[cap(s.out)] (a past-end pointer) ADDQ R10, 32(SP) // outBase += outPosition @@ -2376,7 +2376,7 @@ TEXT ·sequenceDecs_decodeSync_bmi2(SB), $64-32 MOVQ 40(SP), CX ADDQ CX, 48(SP) - // Calculate poiter to s.out[cap(s.out)] (a past-end pointer) + // Calculate pointer to s.out[cap(s.out)] (a past-end pointer) ADDQ R9, 32(SP) // outBase += outPosition @@ -2896,7 +2896,7 @@ TEXT ·sequenceDecs_decodeSync_safe_amd64(SB), $64-32 MOVQ 40(SP), AX ADDQ AX, 48(SP) - // Calculate poiter to s.out[cap(s.out)] (a past-end pointer) + // Calculate pointer to s.out[cap(s.out)] (a past-end pointer) ADDQ R10, 32(SP) // outBase += outPosition @@ -3560,7 +3560,7 @@ TEXT ·sequenceDecs_decodeSync_safe_bmi2(SB), $64-32 MOVQ 40(SP), CX ADDQ CX, 48(SP) - // Calculate poiter to s.out[cap(s.out)] (a past-end pointer) + // Calculate pointer to s.out[cap(s.out)] (a past-end pointer) ADDQ R9, 32(SP) // outBase += outPosition diff --git a/vendor/github.com/klauspost/compress/zstd/zstd.go b/vendor/github.com/klauspost/compress/zstd/zstd.go index 4be7cc736..066bef2a4 100644 --- a/vendor/github.com/klauspost/compress/zstd/zstd.go +++ b/vendor/github.com/klauspost/compress/zstd/zstd.go @@ -88,6 +88,10 @@ var ( // Close has been called. ErrDecoderClosed = errors.New("decoder used after Close") + // ErrEncoderClosed will be returned if the Encoder was used after + // Close has been called. + ErrEncoderClosed = errors.New("encoder used after Close") + // ErrDecoderNilInput is returned when a nil Reader was provided // and an operation other than Reset/DecodeAll/Close was attempted. ErrDecoderNilInput = errors.New("nil input provided as reader") diff --git a/vendor/github.com/youmark/pkcs8/.travis.yml b/vendor/github.com/youmark/pkcs8/.travis.yml deleted file mode 100644 index 3608f7dc7..000000000 --- a/vendor/github.com/youmark/pkcs8/.travis.yml +++ /dev/null @@ -1,14 +0,0 @@ -arch: - - amd64 - - ppc64le -language: go - -go: - - "1.10.x" - - "1.11.x" - - "1.12.x" - - "1.13.x" - - master - -script: - - go test -v ./... diff --git a/vendor/go.mongodb.org/mongo-driver/bson/bsonrw/extjson_parser.go b/vendor/go.mongodb.org/mongo-driver/bson/bsonrw/extjson_parser.go index bb52a0ec3..f0702d9d3 100644 --- a/vendor/go.mongodb.org/mongo-driver/bson/bsonrw/extjson_parser.go +++ b/vendor/go.mongodb.org/mongo-driver/bson/bsonrw/extjson_parser.go @@ -305,7 +305,7 @@ func (ejp *extJSONParser) readValue(t bsontype.Type) (*extJSONValue, error) { } // remove hyphens - uuidNoHyphens := strings.Replace(uuid, "-", "", -1) + uuidNoHyphens := strings.ReplaceAll(uuid, "-", "") if len(uuidNoHyphens) != 32 { return nil, fmt.Errorf("$uuid value does not follow RFC 4122 format regarding length and hyphens") } diff --git a/vendor/go.mongodb.org/mongo-driver/bson/bsonrw/extjson_writer.go b/vendor/go.mongodb.org/mongo-driver/bson/bsonrw/extjson_writer.go index bb9303167..57781ff9f 100644 --- a/vendor/go.mongodb.org/mongo-driver/bson/bsonrw/extjson_writer.go +++ b/vendor/go.mongodb.org/mongo-driver/bson/bsonrw/extjson_writer.go @@ -628,13 +628,14 @@ func (ejvw *extJSONValueWriter) WriteArrayEnd() error { func formatDouble(f float64) string { var s string - if math.IsInf(f, 1) { + switch { + case math.IsInf(f, 1): s = "Infinity" - } else if math.IsInf(f, -1) { + case math.IsInf(f, -1): s = "-Infinity" - } else if math.IsNaN(f) { + case math.IsNaN(f): s = "NaN" - } else { + default: // Print exactly one decimalType place for integers; otherwise, print as many are necessary to // perfectly represent it. s = strconv.FormatFloat(f, 'G', -1, 64) @@ -739,9 +740,7 @@ func (ss sortableString) Less(i, j int) bool { } func (ss sortableString) Swap(i, j int) { - oldI := ss[i] - ss[i] = ss[j] - ss[j] = oldI + ss[i], ss[j] = ss[j], ss[i] } func sortStringAlphebeticAscending(s string) string { diff --git a/vendor/go.mongodb.org/mongo-driver/bson/bsonrw/json_scanner.go b/vendor/go.mongodb.org/mongo-driver/bson/bsonrw/json_scanner.go index 43f3e4f38..978289191 100644 --- a/vendor/go.mongodb.org/mongo-driver/bson/bsonrw/json_scanner.go +++ b/vendor/go.mongodb.org/mongo-driver/bson/bsonrw/json_scanner.go @@ -82,12 +82,13 @@ func (js *jsonScanner) nextToken() (*jsonToken, error) { return js.scanString() default: // check if it's a number - if c == '-' || isDigit(c) { + switch { + case c == '-' || isDigit(c): return js.scanNumber(c) - } else if c == 't' || c == 'f' || c == 'n' { + case c == 't' || c == 'f' || c == 'n': // maybe a literal return js.scanLiteral(c) - } else { + default: return nil, fmt.Errorf("invalid JSON input. Position: %d. Character: %c", js.pos-1, c) } } @@ -174,7 +175,7 @@ func getu4(s []byte) rune { for _, c := range s[:4] { switch { case '0' <= c && c <= '9': - c = c - '0' + c -= '0' case 'a' <= c && c <= 'f': c = c - 'a' + 10 case 'A' <= c && c <= 'F': @@ -325,13 +326,14 @@ func (js *jsonScanner) scanLiteral(first byte) (*jsonToken, error) { c5, err := js.readNextByte() - if bytes.Equal([]byte("true"), lit) && (isValueTerminator(c5) || errors.Is(err, io.EOF)) { + switch { + case bytes.Equal([]byte("true"), lit) && (isValueTerminator(c5) || errors.Is(err, io.EOF)): js.pos = int(math.Max(0, float64(js.pos-1))) return &jsonToken{t: jttBool, v: true, p: p}, nil - } else if bytes.Equal([]byte("null"), lit) && (isValueTerminator(c5) || errors.Is(err, io.EOF)) { + case bytes.Equal([]byte("null"), lit) && (isValueTerminator(c5) || errors.Is(err, io.EOF)): js.pos = int(math.Max(0, float64(js.pos-1))) return &jsonToken{t: jttNull, v: nil, p: p}, nil - } else if bytes.Equal([]byte("fals"), lit) { + case bytes.Equal([]byte("fals"), lit): if c5 == 'e' { c5, err = js.readNextByte() @@ -430,12 +432,13 @@ func (js *jsonScanner) scanNumber(first byte) (*jsonToken, error) { case '}', ']', ',': s = nssDone default: - if isWhiteSpace(c) || errors.Is(err, io.EOF) { + switch { + case isWhiteSpace(c) || errors.Is(err, io.EOF): s = nssDone - } else if isDigit(c) { + case isDigit(c): s = nssSawIntegerDigits b.WriteByte(c) - } else { + default: s = nssInvalid } } @@ -455,12 +458,13 @@ func (js *jsonScanner) scanNumber(first byte) (*jsonToken, error) { case '}', ']', ',': s = nssDone default: - if isWhiteSpace(c) || errors.Is(err, io.EOF) { + switch { + case isWhiteSpace(c) || errors.Is(err, io.EOF): s = nssDone - } else if isDigit(c) { + case isDigit(c): s = nssSawFractionDigits b.WriteByte(c) - } else { + default: s = nssInvalid } } @@ -490,12 +494,13 @@ func (js *jsonScanner) scanNumber(first byte) (*jsonToken, error) { case '}', ']', ',': s = nssDone default: - if isWhiteSpace(c) || errors.Is(err, io.EOF) { + switch { + case isWhiteSpace(c) || errors.Is(err, io.EOF): s = nssDone - } else if isDigit(c) { + case isDigit(c): s = nssSawExponentDigits b.WriteByte(c) - } else { + default: s = nssInvalid } } diff --git a/vendor/go.mongodb.org/mongo-driver/bson/bsonrw/value_writer.go b/vendor/go.mongodb.org/mongo-driver/bson/bsonrw/value_writer.go index 311518a80..501c6d7f9 100644 --- a/vendor/go.mongodb.org/mongo-driver/bson/bsonrw/value_writer.go +++ b/vendor/go.mongodb.org/mongo-driver/bson/bsonrw/value_writer.go @@ -598,7 +598,7 @@ func (vw *valueWriter) writeLength() error { return errMaxDocumentSizeExceeded{size: int64(len(vw.buf))} } frame := &vw.stack[vw.frame] - length = length - int(frame.start) + length -= int(frame.start) start := frame.start _ = vw.buf[start+3] // BCE diff --git a/vendor/go.mongodb.org/mongo-driver/bson/doc.go b/vendor/go.mongodb.org/mongo-driver/bson/doc.go index af6098475..fb075b478 100644 --- a/vendor/go.mongodb.org/mongo-driver/bson/doc.go +++ b/vendor/go.mongodb.org/mongo-driver/bson/doc.go @@ -107,10 +107,13 @@ // The name may be empty in order to specify options without overriding the default field name. The following options can // be used to configure behavior: // -// 1. omitempty: If the omitempty struct tag is specified on a field, the field will be omitted from the marshaling if -// the field has an empty value, defined as false, 0, a nil pointer, a nil interface value, and any empty array, -// slice, map, or string. -// NOTE: It is recommended that this tag be used for all slice and map fields. +// 1. omitempty: If the "omitempty" struct tag is specified on a field, the field will not be marshaled if it is set to +// an "empty" value. Numbers, booleans, and strings are considered empty if their value is equal to the zero value for +// the type (i.e. 0 for numbers, false for booleans, and "" for strings). Slices, maps, and arrays are considered +// empty if they are of length zero. Interfaces and pointers are considered empty if their value is nil. By default, +// structs are only considered empty if the struct type implements [bsoncodec.Zeroer] and the IsZero +// method returns true. Struct types that do not implement [bsoncodec.Zeroer] are never considered empty and will be +// marshaled as embedded documents. NOTE: It is recommended that this tag be used for all slice and map fields. // // 2. minsize: If the minsize struct tag is specified on a field of type int64, uint, uint32, or uint64 and the value of // the field can fit in a signed int32, the field will be serialized as a BSON int32 rather than a BSON int64. For diff --git a/vendor/go.mongodb.org/mongo-driver/bson/primitive/decimal.go b/vendor/go.mongodb.org/mongo-driver/bson/primitive/decimal.go index 08c39514b..db8be74d8 100644 --- a/vendor/go.mongodb.org/mongo-driver/bson/primitive/decimal.go +++ b/vendor/go.mongodb.org/mongo-driver/bson/primitive/decimal.go @@ -70,7 +70,6 @@ func (d Decimal128) String() string { // Bits: 1*sign 2*ignored 14*exponent 111*significand. // Implicit 0b100 prefix in significand. exp = int(d.h >> 47 & (1<<14 - 1)) - //high = 4<<47 | d.h&(1<<47-1) // Spec says all of these values are out of range. high, low = 0, 0 } else { @@ -152,13 +151,12 @@ func (d Decimal128) BigInt() (*big.Int, int, error) { // Bits: 1*sign 2*ignored 14*exponent 111*significand. // Implicit 0b100 prefix in significand. exp = int(high >> 47 & (1<<14 - 1)) - //high = 4<<47 | d.h&(1<<47-1) // Spec says all of these values are out of range. high, low = 0, 0 } else { // Bits: 1*sign 14*exponent 113*significand exp = int(high >> 49 & (1<<14 - 1)) - high = high & (1<<49 - 1) + high &= (1<<49 - 1) } exp += MinDecimal128Exp @@ -352,7 +350,7 @@ var ( // ParseDecimal128FromBigInt attempts to parse the given significand and exponent into a valid Decimal128 value. func ParseDecimal128FromBigInt(bi *big.Int, exp int) (Decimal128, bool) { - //copy + // copy bi = new(big.Int).Set(bi) q := new(big.Int) diff --git a/vendor/go.mongodb.org/mongo-driver/internal/assert/assertion_compare.go b/vendor/go.mongodb.org/mongo-driver/internal/assert/assertion_compare.go new file mode 100644 index 000000000..b7b724987 --- /dev/null +++ b/vendor/go.mongodb.org/mongo-driver/internal/assert/assertion_compare.go @@ -0,0 +1,464 @@ +// Copied from https://github.com/stretchr/testify/blob/1333b5d3bda8cf5aedcf3e1aaa95cac28aaab892/assert/assertion_compare.go + +// Copyright 2020 Mat Ryer, Tyler Bunnell and all contributors. All rights reserved. +// Use of this source code is governed by an MIT-style license that can be found in +// the THIRD-PARTY-NOTICES file. + +package assert + +import ( + "bytes" + "fmt" + "reflect" + "time" +) + +type CompareType int + +const ( + compareLess CompareType = iota - 1 + compareEqual + compareGreater +) + +var ( + intType = reflect.TypeOf(int(1)) + int8Type = reflect.TypeOf(int8(1)) + int16Type = reflect.TypeOf(int16(1)) + int32Type = reflect.TypeOf(int32(1)) + int64Type = reflect.TypeOf(int64(1)) + + uintType = reflect.TypeOf(uint(1)) + uint8Type = reflect.TypeOf(uint8(1)) + uint16Type = reflect.TypeOf(uint16(1)) + uint32Type = reflect.TypeOf(uint32(1)) + uint64Type = reflect.TypeOf(uint64(1)) + + float32Type = reflect.TypeOf(float32(1)) + float64Type = reflect.TypeOf(float64(1)) + + stringType = reflect.TypeOf("") + + timeType = reflect.TypeOf(time.Time{}) + bytesType = reflect.TypeOf([]byte{}) +) + +func compare(obj1, obj2 interface{}, kind reflect.Kind) (CompareType, bool) { + obj1Value := reflect.ValueOf(obj1) + obj2Value := reflect.ValueOf(obj2) + + // throughout this switch we try and avoid calling .Convert() if possible, + // as this has a pretty big performance impact + switch kind { + case reflect.Int: + { + intobj1, ok := obj1.(int) + if !ok { + intobj1 = obj1Value.Convert(intType).Interface().(int) + } + intobj2, ok := obj2.(int) + if !ok { + intobj2 = obj2Value.Convert(intType).Interface().(int) + } + if intobj1 > intobj2 { + return compareGreater, true + } + if intobj1 == intobj2 { + return compareEqual, true + } + if intobj1 < intobj2 { + return compareLess, true + } + } + case reflect.Int8: + { + int8obj1, ok := obj1.(int8) + if !ok { + int8obj1 = obj1Value.Convert(int8Type).Interface().(int8) + } + int8obj2, ok := obj2.(int8) + if !ok { + int8obj2 = obj2Value.Convert(int8Type).Interface().(int8) + } + if int8obj1 > int8obj2 { + return compareGreater, true + } + if int8obj1 == int8obj2 { + return compareEqual, true + } + if int8obj1 < int8obj2 { + return compareLess, true + } + } + case reflect.Int16: + { + int16obj1, ok := obj1.(int16) + if !ok { + int16obj1 = obj1Value.Convert(int16Type).Interface().(int16) + } + int16obj2, ok := obj2.(int16) + if !ok { + int16obj2 = obj2Value.Convert(int16Type).Interface().(int16) + } + if int16obj1 > int16obj2 { + return compareGreater, true + } + if int16obj1 == int16obj2 { + return compareEqual, true + } + if int16obj1 < int16obj2 { + return compareLess, true + } + } + case reflect.Int32: + { + int32obj1, ok := obj1.(int32) + if !ok { + int32obj1 = obj1Value.Convert(int32Type).Interface().(int32) + } + int32obj2, ok := obj2.(int32) + if !ok { + int32obj2 = obj2Value.Convert(int32Type).Interface().(int32) + } + if int32obj1 > int32obj2 { + return compareGreater, true + } + if int32obj1 == int32obj2 { + return compareEqual, true + } + if int32obj1 < int32obj2 { + return compareLess, true + } + } + case reflect.Int64: + { + int64obj1, ok := obj1.(int64) + if !ok { + int64obj1 = obj1Value.Convert(int64Type).Interface().(int64) + } + int64obj2, ok := obj2.(int64) + if !ok { + int64obj2 = obj2Value.Convert(int64Type).Interface().(int64) + } + if int64obj1 > int64obj2 { + return compareGreater, true + } + if int64obj1 == int64obj2 { + return compareEqual, true + } + if int64obj1 < int64obj2 { + return compareLess, true + } + } + case reflect.Uint: + { + uintobj1, ok := obj1.(uint) + if !ok { + uintobj1 = obj1Value.Convert(uintType).Interface().(uint) + } + uintobj2, ok := obj2.(uint) + if !ok { + uintobj2 = obj2Value.Convert(uintType).Interface().(uint) + } + if uintobj1 > uintobj2 { + return compareGreater, true + } + if uintobj1 == uintobj2 { + return compareEqual, true + } + if uintobj1 < uintobj2 { + return compareLess, true + } + } + case reflect.Uint8: + { + uint8obj1, ok := obj1.(uint8) + if !ok { + uint8obj1 = obj1Value.Convert(uint8Type).Interface().(uint8) + } + uint8obj2, ok := obj2.(uint8) + if !ok { + uint8obj2 = obj2Value.Convert(uint8Type).Interface().(uint8) + } + if uint8obj1 > uint8obj2 { + return compareGreater, true + } + if uint8obj1 == uint8obj2 { + return compareEqual, true + } + if uint8obj1 < uint8obj2 { + return compareLess, true + } + } + case reflect.Uint16: + { + uint16obj1, ok := obj1.(uint16) + if !ok { + uint16obj1 = obj1Value.Convert(uint16Type).Interface().(uint16) + } + uint16obj2, ok := obj2.(uint16) + if !ok { + uint16obj2 = obj2Value.Convert(uint16Type).Interface().(uint16) + } + if uint16obj1 > uint16obj2 { + return compareGreater, true + } + if uint16obj1 == uint16obj2 { + return compareEqual, true + } + if uint16obj1 < uint16obj2 { + return compareLess, true + } + } + case reflect.Uint32: + { + uint32obj1, ok := obj1.(uint32) + if !ok { + uint32obj1 = obj1Value.Convert(uint32Type).Interface().(uint32) + } + uint32obj2, ok := obj2.(uint32) + if !ok { + uint32obj2 = obj2Value.Convert(uint32Type).Interface().(uint32) + } + if uint32obj1 > uint32obj2 { + return compareGreater, true + } + if uint32obj1 == uint32obj2 { + return compareEqual, true + } + if uint32obj1 < uint32obj2 { + return compareLess, true + } + } + case reflect.Uint64: + { + uint64obj1, ok := obj1.(uint64) + if !ok { + uint64obj1 = obj1Value.Convert(uint64Type).Interface().(uint64) + } + uint64obj2, ok := obj2.(uint64) + if !ok { + uint64obj2 = obj2Value.Convert(uint64Type).Interface().(uint64) + } + if uint64obj1 > uint64obj2 { + return compareGreater, true + } + if uint64obj1 == uint64obj2 { + return compareEqual, true + } + if uint64obj1 < uint64obj2 { + return compareLess, true + } + } + case reflect.Float32: + { + float32obj1, ok := obj1.(float32) + if !ok { + float32obj1 = obj1Value.Convert(float32Type).Interface().(float32) + } + float32obj2, ok := obj2.(float32) + if !ok { + float32obj2 = obj2Value.Convert(float32Type).Interface().(float32) + } + if float32obj1 > float32obj2 { + return compareGreater, true + } + if float32obj1 == float32obj2 { + return compareEqual, true + } + if float32obj1 < float32obj2 { + return compareLess, true + } + } + case reflect.Float64: + { + float64obj1, ok := obj1.(float64) + if !ok { + float64obj1 = obj1Value.Convert(float64Type).Interface().(float64) + } + float64obj2, ok := obj2.(float64) + if !ok { + float64obj2 = obj2Value.Convert(float64Type).Interface().(float64) + } + if float64obj1 > float64obj2 { + return compareGreater, true + } + if float64obj1 == float64obj2 { + return compareEqual, true + } + if float64obj1 < float64obj2 { + return compareLess, true + } + } + case reflect.String: + { + stringobj1, ok := obj1.(string) + if !ok { + stringobj1 = obj1Value.Convert(stringType).Interface().(string) + } + stringobj2, ok := obj2.(string) + if !ok { + stringobj2 = obj2Value.Convert(stringType).Interface().(string) + } + if stringobj1 > stringobj2 { + return compareGreater, true + } + if stringobj1 == stringobj2 { + return compareEqual, true + } + if stringobj1 < stringobj2 { + return compareLess, true + } + } + // Check for known struct types we can check for compare results. + case reflect.Struct: + { + // All structs enter here. We're not interested in most types. + if !canConvert(obj1Value, timeType) { + break + } + + // time.Time can compared! + timeObj1, ok := obj1.(time.Time) + if !ok { + timeObj1 = obj1Value.Convert(timeType).Interface().(time.Time) + } + + timeObj2, ok := obj2.(time.Time) + if !ok { + timeObj2 = obj2Value.Convert(timeType).Interface().(time.Time) + } + + return compare(timeObj1.UnixNano(), timeObj2.UnixNano(), reflect.Int64) + } + case reflect.Slice: + { + // We only care about the []byte type. + if !canConvert(obj1Value, bytesType) { + break + } + + // []byte can be compared! + bytesObj1, ok := obj1.([]byte) + if !ok { + bytesObj1 = obj1Value.Convert(bytesType).Interface().([]byte) + + } + bytesObj2, ok := obj2.([]byte) + if !ok { + bytesObj2 = obj2Value.Convert(bytesType).Interface().([]byte) + } + + return CompareType(bytes.Compare(bytesObj1, bytesObj2)), true + } + } + + return compareEqual, false +} + +// Greater asserts that the first element is greater than the second +// +// assert.Greater(t, 2, 1) +// assert.Greater(t, float64(2), float64(1)) +// assert.Greater(t, "b", "a") +func Greater(t TestingT, e1 interface{}, e2 interface{}, msgAndArgs ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return compareTwoValues(t, e1, e2, []CompareType{compareGreater}, "\"%v\" is not greater than \"%v\"", msgAndArgs...) +} + +// GreaterOrEqual asserts that the first element is greater than or equal to the second +// +// assert.GreaterOrEqual(t, 2, 1) +// assert.GreaterOrEqual(t, 2, 2) +// assert.GreaterOrEqual(t, "b", "a") +// assert.GreaterOrEqual(t, "b", "b") +func GreaterOrEqual(t TestingT, e1 interface{}, e2 interface{}, msgAndArgs ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return compareTwoValues(t, e1, e2, []CompareType{compareGreater, compareEqual}, "\"%v\" is not greater than or equal to \"%v\"", msgAndArgs...) +} + +// Less asserts that the first element is less than the second +// +// assert.Less(t, 1, 2) +// assert.Less(t, float64(1), float64(2)) +// assert.Less(t, "a", "b") +func Less(t TestingT, e1 interface{}, e2 interface{}, msgAndArgs ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return compareTwoValues(t, e1, e2, []CompareType{compareLess}, "\"%v\" is not less than \"%v\"", msgAndArgs...) +} + +// LessOrEqual asserts that the first element is less than or equal to the second +// +// assert.LessOrEqual(t, 1, 2) +// assert.LessOrEqual(t, 2, 2) +// assert.LessOrEqual(t, "a", "b") +// assert.LessOrEqual(t, "b", "b") +func LessOrEqual(t TestingT, e1 interface{}, e2 interface{}, msgAndArgs ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return compareTwoValues(t, e1, e2, []CompareType{compareLess, compareEqual}, "\"%v\" is not less than or equal to \"%v\"", msgAndArgs...) +} + +// Positive asserts that the specified element is positive +// +// assert.Positive(t, 1) +// assert.Positive(t, 1.23) +func Positive(t TestingT, e interface{}, msgAndArgs ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + zero := reflect.Zero(reflect.TypeOf(e)) + return compareTwoValues(t, e, zero.Interface(), []CompareType{compareGreater}, "\"%v\" is not positive", msgAndArgs...) +} + +// Negative asserts that the specified element is negative +// +// assert.Negative(t, -1) +// assert.Negative(t, -1.23) +func Negative(t TestingT, e interface{}, msgAndArgs ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + zero := reflect.Zero(reflect.TypeOf(e)) + return compareTwoValues(t, e, zero.Interface(), []CompareType{compareLess}, "\"%v\" is not negative", msgAndArgs...) +} + +func compareTwoValues(t TestingT, e1 interface{}, e2 interface{}, allowedComparesResults []CompareType, failMessage string, msgAndArgs ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + + e1Kind := reflect.ValueOf(e1).Kind() + e2Kind := reflect.ValueOf(e2).Kind() + if e1Kind != e2Kind { + return Fail(t, "Elements should be the same type", msgAndArgs...) + } + + compareResult, isComparable := compare(e1, e2, e1Kind) + if !isComparable { + return Fail(t, fmt.Sprintf("Can not compare type \"%s\"", reflect.TypeOf(e1)), msgAndArgs...) + } + + if !containsValue(allowedComparesResults, compareResult) { + return Fail(t, fmt.Sprintf(failMessage, e1, e2), msgAndArgs...) + } + + return true +} + +func containsValue(values []CompareType, value CompareType) bool { + for _, v := range values { + if v == value { + return true + } + } + + return false +} diff --git a/vendor/go.mongodb.org/mongo-driver/internal/assert/assertion_compare_can_convert.go b/vendor/go.mongodb.org/mongo-driver/internal/assert/assertion_compare_can_convert.go new file mode 100644 index 000000000..ff7f9e8c2 --- /dev/null +++ b/vendor/go.mongodb.org/mongo-driver/internal/assert/assertion_compare_can_convert.go @@ -0,0 +1,18 @@ +// Copied from https://github.com/stretchr/testify/blob/1333b5d3bda8cf5aedcf3e1aaa95cac28aaab892/assert/assertion_compare_can_convert.go + +// Copyright 2020 Mat Ryer, Tyler Bunnell and all contributors. All rights reserved. +// Use of this source code is governed by an MIT-style license that can be found in +// the THIRD-PARTY-NOTICES file. + +//go:build go1.17 +// +build go1.17 + +package assert + +import "reflect" + +// Wrapper around reflect.Value.CanConvert, for compatibility +// reasons. +func canConvert(value reflect.Value, to reflect.Type) bool { + return value.CanConvert(to) +} diff --git a/vendor/go.mongodb.org/mongo-driver/internal/assert/assertion_compare_legacy.go b/vendor/go.mongodb.org/mongo-driver/internal/assert/assertion_compare_legacy.go new file mode 100644 index 000000000..c23c7d111 --- /dev/null +++ b/vendor/go.mongodb.org/mongo-driver/internal/assert/assertion_compare_legacy.go @@ -0,0 +1,18 @@ +// Copied from https://github.com/stretchr/testify/blob/1333b5d3bda8cf5aedcf3e1aaa95cac28aaab892/assert/assertion_compare_legacy.go + +// Copyright 2020 Mat Ryer, Tyler Bunnell and all contributors. All rights reserved. +// Use of this source code is governed by an MIT-style license that can be found in +// the THIRD-PARTY-NOTICES file. + +//go:build !go1.17 +// +build !go1.17 + +package assert + +import "reflect" + +// Older versions of Go does not have the reflect.Value.CanConvert +// method. +func canConvert(value reflect.Value, to reflect.Type) bool { + return false +} diff --git a/vendor/go.mongodb.org/mongo-driver/internal/assert/assertion_format.go b/vendor/go.mongodb.org/mongo-driver/internal/assert/assertion_format.go new file mode 100644 index 000000000..474a5f42f --- /dev/null +++ b/vendor/go.mongodb.org/mongo-driver/internal/assert/assertion_format.go @@ -0,0 +1,325 @@ +// Copied from https://github.com/stretchr/testify/blob/1333b5d3bda8cf5aedcf3e1aaa95cac28aaab892/assert/assertion_format.go + +// Copyright 2020 Mat Ryer, Tyler Bunnell and all contributors. All rights reserved. +// Use of this source code is governed by an MIT-style license that can be found in +// the THIRD-PARTY-NOTICES file. + +package assert + +import ( + time "time" +) + +// Containsf asserts that the specified string, list(array, slice...) or map contains the +// specified substring or element. +// +// assert.Containsf(t, "Hello World", "World", "error message %s", "formatted") +// assert.Containsf(t, ["Hello", "World"], "World", "error message %s", "formatted") +// assert.Containsf(t, {"Hello": "World"}, "Hello", "error message %s", "formatted") +func Containsf(t TestingT, s interface{}, contains interface{}, msg string, args ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return Contains(t, s, contains, append([]interface{}{msg}, args...)...) +} + +// ElementsMatchf asserts that the specified listA(array, slice...) is equal to specified +// listB(array, slice...) ignoring the order of the elements. If there are duplicate elements, +// the number of appearances of each of them in both lists should match. +// +// assert.ElementsMatchf(t, [1, 3, 2, 3], [1, 3, 3, 2], "error message %s", "formatted") +func ElementsMatchf(t TestingT, listA interface{}, listB interface{}, msg string, args ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return ElementsMatch(t, listA, listB, append([]interface{}{msg}, args...)...) +} + +// Equalf asserts that two objects are equal. +// +// assert.Equalf(t, 123, 123, "error message %s", "formatted") +// +// Pointer variable equality is determined based on the equality of the +// referenced values (as opposed to the memory addresses). Function equality +// cannot be determined and will always fail. +func Equalf(t TestingT, expected interface{}, actual interface{}, msg string, args ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return Equal(t, expected, actual, append([]interface{}{msg}, args...)...) +} + +// EqualErrorf asserts that a function returned an error (i.e. not `nil`) +// and that it is equal to the provided error. +// +// actualObj, err := SomeFunction() +// assert.EqualErrorf(t, err, expectedErrorString, "error message %s", "formatted") +func EqualErrorf(t TestingT, theError error, errString string, msg string, args ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return EqualError(t, theError, errString, append([]interface{}{msg}, args...)...) +} + +// EqualValuesf asserts that two objects are equal or convertible to the same types +// and equal. +// +// assert.EqualValuesf(t, uint32(123), int32(123), "error message %s", "formatted") +func EqualValuesf(t TestingT, expected interface{}, actual interface{}, msg string, args ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return EqualValues(t, expected, actual, append([]interface{}{msg}, args...)...) +} + +// Errorf asserts that a function returned an error (i.e. not `nil`). +// +// actualObj, err := SomeFunction() +// if assert.Errorf(t, err, "error message %s", "formatted") { +// assert.Equal(t, expectedErrorf, err) +// } +func Errorf(t TestingT, err error, msg string, args ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return Error(t, err, append([]interface{}{msg}, args...)...) +} + +// ErrorContainsf asserts that a function returned an error (i.e. not `nil`) +// and that the error contains the specified substring. +// +// actualObj, err := SomeFunction() +// assert.ErrorContainsf(t, err, expectedErrorSubString, "error message %s", "formatted") +func ErrorContainsf(t TestingT, theError error, contains string, msg string, args ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return ErrorContains(t, theError, contains, append([]interface{}{msg}, args...)...) +} + +// Eventuallyf asserts that given condition will be met in waitFor time, +// periodically checking target function each tick. +// +// assert.Eventuallyf(t, func() bool { return true; }, time.Second, 10*time.Millisecond, "error message %s", "formatted") +func Eventuallyf(t TestingT, condition func() bool, waitFor time.Duration, tick time.Duration, msg string, args ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return Eventually(t, condition, waitFor, tick, append([]interface{}{msg}, args...)...) +} + +// Failf reports a failure through +func Failf(t TestingT, failureMessage string, msg string, args ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return Fail(t, failureMessage, append([]interface{}{msg}, args...)...) +} + +// FailNowf fails test +func FailNowf(t TestingT, failureMessage string, msg string, args ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return FailNow(t, failureMessage, append([]interface{}{msg}, args...)...) +} + +// Falsef asserts that the specified value is false. +// +// assert.Falsef(t, myBool, "error message %s", "formatted") +func Falsef(t TestingT, value bool, msg string, args ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return False(t, value, append([]interface{}{msg}, args...)...) +} + +// Greaterf asserts that the first element is greater than the second +// +// assert.Greaterf(t, 2, 1, "error message %s", "formatted") +// assert.Greaterf(t, float64(2), float64(1), "error message %s", "formatted") +// assert.Greaterf(t, "b", "a", "error message %s", "formatted") +func Greaterf(t TestingT, e1 interface{}, e2 interface{}, msg string, args ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return Greater(t, e1, e2, append([]interface{}{msg}, args...)...) +} + +// GreaterOrEqualf asserts that the first element is greater than or equal to the second +// +// assert.GreaterOrEqualf(t, 2, 1, "error message %s", "formatted") +// assert.GreaterOrEqualf(t, 2, 2, "error message %s", "formatted") +// assert.GreaterOrEqualf(t, "b", "a", "error message %s", "formatted") +// assert.GreaterOrEqualf(t, "b", "b", "error message %s", "formatted") +func GreaterOrEqualf(t TestingT, e1 interface{}, e2 interface{}, msg string, args ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return GreaterOrEqual(t, e1, e2, append([]interface{}{msg}, args...)...) +} + +// InDeltaf asserts that the two numerals are within delta of each other. +// +// assert.InDeltaf(t, math.Pi, 22/7.0, 0.01, "error message %s", "formatted") +func InDeltaf(t TestingT, expected interface{}, actual interface{}, delta float64, msg string, args ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return InDelta(t, expected, actual, delta, append([]interface{}{msg}, args...)...) +} + +// IsTypef asserts that the specified objects are of the same type. +func IsTypef(t TestingT, expectedType interface{}, object interface{}, msg string, args ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return IsType(t, expectedType, object, append([]interface{}{msg}, args...)...) +} + +// Lenf asserts that the specified object has specific length. +// Lenf also fails if the object has a type that len() not accept. +// +// assert.Lenf(t, mySlice, 3, "error message %s", "formatted") +func Lenf(t TestingT, object interface{}, length int, msg string, args ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return Len(t, object, length, append([]interface{}{msg}, args...)...) +} + +// Lessf asserts that the first element is less than the second +// +// assert.Lessf(t, 1, 2, "error message %s", "formatted") +// assert.Lessf(t, float64(1), float64(2), "error message %s", "formatted") +// assert.Lessf(t, "a", "b", "error message %s", "formatted") +func Lessf(t TestingT, e1 interface{}, e2 interface{}, msg string, args ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return Less(t, e1, e2, append([]interface{}{msg}, args...)...) +} + +// LessOrEqualf asserts that the first element is less than or equal to the second +// +// assert.LessOrEqualf(t, 1, 2, "error message %s", "formatted") +// assert.LessOrEqualf(t, 2, 2, "error message %s", "formatted") +// assert.LessOrEqualf(t, "a", "b", "error message %s", "formatted") +// assert.LessOrEqualf(t, "b", "b", "error message %s", "formatted") +func LessOrEqualf(t TestingT, e1 interface{}, e2 interface{}, msg string, args ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return LessOrEqual(t, e1, e2, append([]interface{}{msg}, args...)...) +} + +// Negativef asserts that the specified element is negative +// +// assert.Negativef(t, -1, "error message %s", "formatted") +// assert.Negativef(t, -1.23, "error message %s", "formatted") +func Negativef(t TestingT, e interface{}, msg string, args ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return Negative(t, e, append([]interface{}{msg}, args...)...) +} + +// Nilf asserts that the specified object is nil. +// +// assert.Nilf(t, err, "error message %s", "formatted") +func Nilf(t TestingT, object interface{}, msg string, args ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return Nil(t, object, append([]interface{}{msg}, args...)...) +} + +// NoErrorf asserts that a function returned no error (i.e. `nil`). +// +// actualObj, err := SomeFunction() +// if assert.NoErrorf(t, err, "error message %s", "formatted") { +// assert.Equal(t, expectedObj, actualObj) +// } +func NoErrorf(t TestingT, err error, msg string, args ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return NoError(t, err, append([]interface{}{msg}, args...)...) +} + +// NotContainsf asserts that the specified string, list(array, slice...) or map does NOT contain the +// specified substring or element. +// +// assert.NotContainsf(t, "Hello World", "Earth", "error message %s", "formatted") +// assert.NotContainsf(t, ["Hello", "World"], "Earth", "error message %s", "formatted") +// assert.NotContainsf(t, {"Hello": "World"}, "Earth", "error message %s", "formatted") +func NotContainsf(t TestingT, s interface{}, contains interface{}, msg string, args ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return NotContains(t, s, contains, append([]interface{}{msg}, args...)...) +} + +// NotEqualf asserts that the specified values are NOT equal. +// +// assert.NotEqualf(t, obj1, obj2, "error message %s", "formatted") +// +// Pointer variable equality is determined based on the equality of the +// referenced values (as opposed to the memory addresses). +func NotEqualf(t TestingT, expected interface{}, actual interface{}, msg string, args ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return NotEqual(t, expected, actual, append([]interface{}{msg}, args...)...) +} + +// NotEqualValuesf asserts that two objects are not equal even when converted to the same type +// +// assert.NotEqualValuesf(t, obj1, obj2, "error message %s", "formatted") +func NotEqualValuesf(t TestingT, expected interface{}, actual interface{}, msg string, args ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return NotEqualValues(t, expected, actual, append([]interface{}{msg}, args...)...) +} + +// NotNilf asserts that the specified object is not nil. +// +// assert.NotNilf(t, err, "error message %s", "formatted") +func NotNilf(t TestingT, object interface{}, msg string, args ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return NotNil(t, object, append([]interface{}{msg}, args...)...) +} + +// Positivef asserts that the specified element is positive +// +// assert.Positivef(t, 1, "error message %s", "formatted") +// assert.Positivef(t, 1.23, "error message %s", "formatted") +func Positivef(t TestingT, e interface{}, msg string, args ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return Positive(t, e, append([]interface{}{msg}, args...)...) +} + +// Truef asserts that the specified value is true. +// +// assert.Truef(t, myBool, "error message %s", "formatted") +func Truef(t TestingT, value bool, msg string, args ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return True(t, value, append([]interface{}{msg}, args...)...) +} + +// WithinDurationf asserts that the two times are within duration delta of each other. +// +// assert.WithinDurationf(t, time.Now(), time.Now(), 10*time.Second, "error message %s", "formatted") +func WithinDurationf(t TestingT, expected time.Time, actual time.Time, delta time.Duration, msg string, args ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return WithinDuration(t, expected, actual, delta, append([]interface{}{msg}, args...)...) +} diff --git a/vendor/go.mongodb.org/mongo-driver/internal/assert/assertion_mongo.go b/vendor/go.mongodb.org/mongo-driver/internal/assert/assertion_mongo.go new file mode 100644 index 000000000..e47fdf93e --- /dev/null +++ b/vendor/go.mongodb.org/mongo-driver/internal/assert/assertion_mongo.go @@ -0,0 +1,126 @@ +// Copyright (C) MongoDB, Inc. 2023-present. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +// assertion_mongo.go contains MongoDB-specific extensions to the "assert" +// package. + +package assert + +import ( + "context" + "fmt" + "reflect" + "time" + "unsafe" +) + +// DifferentAddressRanges asserts that two byte slices reference distinct memory +// address ranges, meaning they reference different underlying byte arrays. +func DifferentAddressRanges(t TestingT, a, b []byte) (ok bool) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + + if len(a) == 0 || len(b) == 0 { + return true + } + + // Find the start and end memory addresses for the underlying byte array for + // each input byte slice. + sliceAddrRange := func(b []byte) (uintptr, uintptr) { + sh := (*reflect.SliceHeader)(unsafe.Pointer(&b)) + return sh.Data, sh.Data + uintptr(sh.Cap-1) + } + aStart, aEnd := sliceAddrRange(a) + bStart, bEnd := sliceAddrRange(b) + + // If "b" starts after "a" ends or "a" starts after "b" ends, there is no + // overlap. + if bStart > aEnd || aStart > bEnd { + return true + } + + // Otherwise, calculate the overlap start and end and print the memory + // overlap error message. + min := func(a, b uintptr) uintptr { + if a < b { + return a + } + return b + } + max := func(a, b uintptr) uintptr { + if a > b { + return a + } + return b + } + overlapLow := max(aStart, bStart) + overlapHigh := min(aEnd, bEnd) + + t.Errorf("Byte slices point to the same underlying byte array:\n"+ + "\ta addresses:\t%d ... %d\n"+ + "\tb addresses:\t%d ... %d\n"+ + "\toverlap:\t%d ... %d", + aStart, aEnd, + bStart, bEnd, + overlapLow, overlapHigh) + + return false +} + +// EqualBSON asserts that the expected and actual BSON binary values are equal. +// If the values are not equal, it prints both the binary and Extended JSON diff +// of the BSON values. The provided BSON value types must implement the +// fmt.Stringer interface. +func EqualBSON(t TestingT, expected, actual interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + + return Equal(t, + expected, + actual, + `expected and actual BSON values do not match +As Extended JSON: +Expected: %s +Actual : %s`, + expected.(fmt.Stringer).String(), + actual.(fmt.Stringer).String()) +} + +// Soon runs the provided callback and fails the passed-in test if the callback +// does not complete within timeout. The provided callback should respect the +// passed-in context and cease execution when it has expired. +// +// Deprecated: This function will be removed with GODRIVER-2667, use +// assert.Eventually instead. +func Soon(t TestingT, callback func(ctx context.Context), timeout time.Duration) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + + // Create context to manually cancel callback after Soon assertion. + callbackCtx, cancel := context.WithCancel(context.Background()) + defer cancel() + + done := make(chan struct{}) + fullCallback := func() { + callback(callbackCtx) + done <- struct{}{} + } + + timer := time.NewTimer(timeout) + defer timer.Stop() + + go fullCallback() + + select { + case <-done: + return + case <-timer.C: + t.Errorf("timed out in %s waiting for callback", timeout) + } +} diff --git a/vendor/go.mongodb.org/mongo-driver/internal/assert/assertions.go b/vendor/go.mongodb.org/mongo-driver/internal/assert/assertions.go new file mode 100644 index 000000000..c227d47c8 --- /dev/null +++ b/vendor/go.mongodb.org/mongo-driver/internal/assert/assertions.go @@ -0,0 +1,1075 @@ +// Copied from https://github.com/stretchr/testify/blob/1333b5d3bda8cf5aedcf3e1aaa95cac28aaab892/assert/assertions.go + +// Copyright 2020 Mat Ryer, Tyler Bunnell and all contributors. All rights reserved. +// Use of this source code is governed by an MIT-style license that can be found in +// the THIRD-PARTY-NOTICES file. + +package assert + +import ( + "bufio" + "bytes" + "errors" + "fmt" + "math" + "path/filepath" + "reflect" + "runtime" + "strings" + "time" + "unicode" + "unicode/utf8" + + "github.com/davecgh/go-spew/spew" +) + +// TestingT is an interface wrapper around *testing.T +type TestingT interface { + Errorf(format string, args ...interface{}) +} + +// ObjectsAreEqual determines if two objects are considered equal. +// +// This function does no assertion of any kind. +func ObjectsAreEqual(expected, actual interface{}) bool { + if expected == nil || actual == nil { + return expected == actual + } + + exp, ok := expected.([]byte) + if !ok { + return reflect.DeepEqual(expected, actual) + } + + act, ok := actual.([]byte) + if !ok { + return false + } + if exp == nil || act == nil { + return exp == nil && act == nil + } + return bytes.Equal(exp, act) +} + +// ObjectsAreEqualValues gets whether two objects are equal, or if their +// values are equal. +func ObjectsAreEqualValues(expected, actual interface{}) bool { + if ObjectsAreEqual(expected, actual) { + return true + } + + actualType := reflect.TypeOf(actual) + if actualType == nil { + return false + } + expectedValue := reflect.ValueOf(expected) + if expectedValue.IsValid() && expectedValue.Type().ConvertibleTo(actualType) { + // Attempt comparison after type conversion + return reflect.DeepEqual(expectedValue.Convert(actualType).Interface(), actual) + } + + return false +} + +/* CallerInfo is necessary because the assert functions use the testing object +internally, causing it to print the file:line of the assert method, rather than where +the problem actually occurred in calling code.*/ + +// CallerInfo returns an array of strings containing the file and line number +// of each stack frame leading from the current test to the assert call that +// failed. +func CallerInfo() []string { + + var pc uintptr + var ok bool + var file string + var line int + var name string + + callers := []string{} + for i := 0; ; i++ { + pc, file, line, ok = runtime.Caller(i) + if !ok { + // The breaks below failed to terminate the loop, and we ran off the + // end of the call stack. + break + } + + // This is a huge edge case, but it will panic if this is the case, see #180 + if file == "" { + break + } + + f := runtime.FuncForPC(pc) + if f == nil { + break + } + name = f.Name() + + // testing.tRunner is the standard library function that calls + // tests. Subtests are called directly by tRunner, without going through + // the Test/Benchmark/Example function that contains the t.Run calls, so + // with subtests we should break when we hit tRunner, without adding it + // to the list of callers. + if name == "testing.tRunner" { + break + } + + parts := strings.Split(file, "/") + file = parts[len(parts)-1] + if len(parts) > 1 { + dir := parts[len(parts)-2] + if (dir != "assert" && dir != "mock" && dir != "require") || file == "mock_test.go" { + path, _ := filepath.Abs(file) + callers = append(callers, fmt.Sprintf("%s:%d", path, line)) + } + } + + // Drop the package + segments := strings.Split(name, ".") + name = segments[len(segments)-1] + if isTest(name, "Test") || + isTest(name, "Benchmark") || + isTest(name, "Example") { + break + } + } + + return callers +} + +// Stolen from the `go test` tool. +// isTest tells whether name looks like a test (or benchmark, according to prefix). +// It is a Test (say) if there is a character after Test that is not a lower-case letter. +// We don't want TesticularCancer. +func isTest(name, prefix string) bool { + if !strings.HasPrefix(name, prefix) { + return false + } + if len(name) == len(prefix) { // "Test" is ok + return true + } + r, _ := utf8.DecodeRuneInString(name[len(prefix):]) + return !unicode.IsLower(r) +} + +func messageFromMsgAndArgs(msgAndArgs ...interface{}) string { + if len(msgAndArgs) == 0 || msgAndArgs == nil { + return "" + } + if len(msgAndArgs) == 1 { + msg := msgAndArgs[0] + if msgAsStr, ok := msg.(string); ok { + return msgAsStr + } + return fmt.Sprintf("%+v", msg) + } + if len(msgAndArgs) > 1 { + return fmt.Sprintf(msgAndArgs[0].(string), msgAndArgs[1:]...) + } + return "" +} + +// Aligns the provided message so that all lines after the first line start at the same location as the first line. +// Assumes that the first line starts at the correct location (after carriage return, tab, label, spacer and tab). +// The longestLabelLen parameter specifies the length of the longest label in the output (required because this is the +// basis on which the alignment occurs). +func indentMessageLines(message string, longestLabelLen int) string { + outBuf := new(bytes.Buffer) + + for i, scanner := 0, bufio.NewScanner(strings.NewReader(message)); scanner.Scan(); i++ { + // no need to align first line because it starts at the correct location (after the label) + if i != 0 { + // append alignLen+1 spaces to align with "{{longestLabel}}:" before adding tab + outBuf.WriteString("\n\t" + strings.Repeat(" ", longestLabelLen+1) + "\t") + } + outBuf.WriteString(scanner.Text()) + } + + return outBuf.String() +} + +type failNower interface { + FailNow() +} + +// FailNow fails test +func FailNow(t TestingT, failureMessage string, msgAndArgs ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + Fail(t, failureMessage, msgAndArgs...) + + // We cannot extend TestingT with FailNow() and + // maintain backwards compatibility, so we fallback + // to panicking when FailNow is not available in + // TestingT. + // See issue #263 + + if t, ok := t.(failNower); ok { + t.FailNow() + } else { + panic("test failed and t is missing `FailNow()`") + } + return false +} + +// Fail reports a failure through +func Fail(t TestingT, failureMessage string, msgAndArgs ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + content := []labeledContent{ + {"Error Trace", strings.Join(CallerInfo(), "\n\t\t\t")}, + {"Error", failureMessage}, + } + + // Add test name if the Go version supports it + if n, ok := t.(interface { + Name() string + }); ok { + content = append(content, labeledContent{"Test", n.Name()}) + } + + message := messageFromMsgAndArgs(msgAndArgs...) + if len(message) > 0 { + content = append(content, labeledContent{"Messages", message}) + } + + t.Errorf("\n%s", ""+labeledOutput(content...)) + + return false +} + +type labeledContent struct { + label string + content string +} + +// labeledOutput returns a string consisting of the provided labeledContent. Each labeled output is appended in the following manner: +// +// \t{{label}}:{{align_spaces}}\t{{content}}\n +// +// The initial carriage return is required to undo/erase any padding added by testing.T.Errorf. The "\t{{label}}:" is for the label. +// If a label is shorter than the longest label provided, padding spaces are added to make all the labels match in length. Once this +// alignment is achieved, "\t{{content}}\n" is added for the output. +// +// If the content of the labeledOutput contains line breaks, the subsequent lines are aligned so that they start at the same location as the first line. +func labeledOutput(content ...labeledContent) string { + longestLabel := 0 + for _, v := range content { + if len(v.label) > longestLabel { + longestLabel = len(v.label) + } + } + var output string + for _, v := range content { + output += "\t" + v.label + ":" + strings.Repeat(" ", longestLabel-len(v.label)) + "\t" + indentMessageLines(v.content, longestLabel) + "\n" + } + return output +} + +// IsType asserts that the specified objects are of the same type. +func IsType(t TestingT, expectedType interface{}, object interface{}, msgAndArgs ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + + if !ObjectsAreEqual(reflect.TypeOf(object), reflect.TypeOf(expectedType)) { + return Fail(t, fmt.Sprintf("Object expected to be of type %v, but was %v", reflect.TypeOf(expectedType), reflect.TypeOf(object)), msgAndArgs...) + } + + return true +} + +// Equal asserts that two objects are equal. +// +// assert.Equal(t, 123, 123) +// +// Pointer variable equality is determined based on the equality of the +// referenced values (as opposed to the memory addresses). Function equality +// cannot be determined and will always fail. +func Equal(t TestingT, expected, actual interface{}, msgAndArgs ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if err := validateEqualArgs(expected, actual); err != nil { + return Fail(t, fmt.Sprintf("Invalid operation: %#v == %#v (%s)", + expected, actual, err), msgAndArgs...) + } + + if !ObjectsAreEqual(expected, actual) { + diff := diff(expected, actual) + expected, actual = formatUnequalValues(expected, actual) + return Fail(t, fmt.Sprintf("Not equal: \n"+ + "expected: %s\n"+ + "actual : %s%s", expected, actual, diff), msgAndArgs...) + } + + return true + +} + +// validateEqualArgs checks whether provided arguments can be safely used in the +// Equal/NotEqual functions. +func validateEqualArgs(expected, actual interface{}) error { + if expected == nil && actual == nil { + return nil + } + + if isFunction(expected) || isFunction(actual) { + return errors.New("cannot take func type as argument") + } + return nil +} + +// formatUnequalValues takes two values of arbitrary types and returns string +// representations appropriate to be presented to the user. +// +// If the values are not of like type, the returned strings will be prefixed +// with the type name, and the value will be enclosed in parenthesis similar +// to a type conversion in the Go grammar. +func formatUnequalValues(expected, actual interface{}) (e string, a string) { + if reflect.TypeOf(expected) != reflect.TypeOf(actual) { + return fmt.Sprintf("%T(%s)", expected, truncatingFormat(expected)), + fmt.Sprintf("%T(%s)", actual, truncatingFormat(actual)) + } + switch expected.(type) { + case time.Duration: + return fmt.Sprintf("%v", expected), fmt.Sprintf("%v", actual) + } + return truncatingFormat(expected), truncatingFormat(actual) +} + +// truncatingFormat formats the data and truncates it if it's too long. +// +// This helps keep formatted error messages lines from exceeding the +// bufio.MaxScanTokenSize max line length that the go testing framework imposes. +func truncatingFormat(data interface{}) string { + value := fmt.Sprintf("%#v", data) + max := bufio.MaxScanTokenSize - 100 // Give us some space the type info too if needed. + if len(value) > max { + value = value[0:max] + "<... truncated>" + } + return value +} + +// EqualValues asserts that two objects are equal or convertible to the same types +// and equal. +// +// assert.EqualValues(t, uint32(123), int32(123)) +func EqualValues(t TestingT, expected, actual interface{}, msgAndArgs ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + + if !ObjectsAreEqualValues(expected, actual) { + diff := diff(expected, actual) + expected, actual = formatUnequalValues(expected, actual) + return Fail(t, fmt.Sprintf("Not equal: \n"+ + "expected: %s\n"+ + "actual : %s%s", expected, actual, diff), msgAndArgs...) + } + + return true + +} + +// NotNil asserts that the specified object is not nil. +// +// assert.NotNil(t, err) +func NotNil(t TestingT, object interface{}, msgAndArgs ...interface{}) bool { + if !isNil(object) { + return true + } + if h, ok := t.(tHelper); ok { + h.Helper() + } + return Fail(t, "Expected value not to be nil.", msgAndArgs...) +} + +// containsKind checks if a specified kind in the slice of kinds. +func containsKind(kinds []reflect.Kind, kind reflect.Kind) bool { + for i := 0; i < len(kinds); i++ { + if kind == kinds[i] { + return true + } + } + + return false +} + +// isNil checks if a specified object is nil or not, without Failing. +func isNil(object interface{}) bool { + if object == nil { + return true + } + + value := reflect.ValueOf(object) + kind := value.Kind() + isNilableKind := containsKind( + []reflect.Kind{ + reflect.Chan, reflect.Func, + reflect.Interface, reflect.Map, + reflect.Ptr, reflect.Slice}, + kind) + + if isNilableKind && value.IsNil() { + return true + } + + return false +} + +// Nil asserts that the specified object is nil. +// +// assert.Nil(t, err) +func Nil(t TestingT, object interface{}, msgAndArgs ...interface{}) bool { + if isNil(object) { + return true + } + if h, ok := t.(tHelper); ok { + h.Helper() + } + return Fail(t, fmt.Sprintf("Expected nil, but got: %#v", object), msgAndArgs...) +} + +// getLen try to get length of object. +// return (false, 0) if impossible. +func getLen(x interface{}) (ok bool, length int) { + v := reflect.ValueOf(x) + defer func() { + if e := recover(); e != nil { + ok = false + } + }() + return true, v.Len() +} + +// Len asserts that the specified object has specific length. +// Len also fails if the object has a type that len() not accept. +// +// assert.Len(t, mySlice, 3) +func Len(t TestingT, object interface{}, length int, msgAndArgs ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + ok, l := getLen(object) + if !ok { + return Fail(t, fmt.Sprintf("\"%s\" could not be applied builtin len()", object), msgAndArgs...) + } + + if l != length { + return Fail(t, fmt.Sprintf("\"%s\" should have %d item(s), but has %d", object, length, l), msgAndArgs...) + } + return true +} + +// True asserts that the specified value is true. +// +// assert.True(t, myBool) +func True(t TestingT, value bool, msgAndArgs ...interface{}) bool { + if !value { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return Fail(t, "Should be true", msgAndArgs...) + } + + return true + +} + +// False asserts that the specified value is false. +// +// assert.False(t, myBool) +func False(t TestingT, value bool, msgAndArgs ...interface{}) bool { + if value { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return Fail(t, "Should be false", msgAndArgs...) + } + + return true + +} + +// NotEqual asserts that the specified values are NOT equal. +// +// assert.NotEqual(t, obj1, obj2) +// +// Pointer variable equality is determined based on the equality of the +// referenced values (as opposed to the memory addresses). +func NotEqual(t TestingT, expected, actual interface{}, msgAndArgs ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if err := validateEqualArgs(expected, actual); err != nil { + return Fail(t, fmt.Sprintf("Invalid operation: %#v != %#v (%s)", + expected, actual, err), msgAndArgs...) + } + + if ObjectsAreEqual(expected, actual) { + return Fail(t, fmt.Sprintf("Should not be: %#v\n", actual), msgAndArgs...) + } + + return true + +} + +// NotEqualValues asserts that two objects are not equal even when converted to the same type +// +// assert.NotEqualValues(t, obj1, obj2) +func NotEqualValues(t TestingT, expected, actual interface{}, msgAndArgs ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + + if ObjectsAreEqualValues(expected, actual) { + return Fail(t, fmt.Sprintf("Should not be: %#v\n", actual), msgAndArgs...) + } + + return true +} + +// containsElement try loop over the list check if the list includes the element. +// return (false, false) if impossible. +// return (true, false) if element was not found. +// return (true, true) if element was found. +func containsElement(list interface{}, element interface{}) (ok, found bool) { + + listValue := reflect.ValueOf(list) + listType := reflect.TypeOf(list) + if listType == nil { + return false, false + } + listKind := listType.Kind() + defer func() { + if e := recover(); e != nil { + ok = false + found = false + } + }() + + if listKind == reflect.String { + elementValue := reflect.ValueOf(element) + return true, strings.Contains(listValue.String(), elementValue.String()) + } + + if listKind == reflect.Map { + mapKeys := listValue.MapKeys() + for i := 0; i < len(mapKeys); i++ { + if ObjectsAreEqual(mapKeys[i].Interface(), element) { + return true, true + } + } + return true, false + } + + for i := 0; i < listValue.Len(); i++ { + if ObjectsAreEqual(listValue.Index(i).Interface(), element) { + return true, true + } + } + return true, false + +} + +// Contains asserts that the specified string, list(array, slice...) or map contains the +// specified substring or element. +// +// assert.Contains(t, "Hello World", "World") +// assert.Contains(t, ["Hello", "World"], "World") +// assert.Contains(t, {"Hello": "World"}, "Hello") +func Contains(t TestingT, s, contains interface{}, msgAndArgs ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + + ok, found := containsElement(s, contains) + if !ok { + return Fail(t, fmt.Sprintf("%#v could not be applied builtin len()", s), msgAndArgs...) + } + if !found { + return Fail(t, fmt.Sprintf("%#v does not contain %#v", s, contains), msgAndArgs...) + } + + return true + +} + +// NotContains asserts that the specified string, list(array, slice...) or map does NOT contain the +// specified substring or element. +// +// assert.NotContains(t, "Hello World", "Earth") +// assert.NotContains(t, ["Hello", "World"], "Earth") +// assert.NotContains(t, {"Hello": "World"}, "Earth") +func NotContains(t TestingT, s, contains interface{}, msgAndArgs ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + + ok, found := containsElement(s, contains) + if !ok { + return Fail(t, fmt.Sprintf("\"%s\" could not be applied builtin len()", s), msgAndArgs...) + } + if found { + return Fail(t, fmt.Sprintf("\"%s\" should not contain \"%s\"", s, contains), msgAndArgs...) + } + + return true + +} + +// isEmpty gets whether the specified object is considered empty or not. +func isEmpty(object interface{}) bool { + + // get nil case out of the way + if object == nil { + return true + } + + objValue := reflect.ValueOf(object) + + switch objValue.Kind() { + // collection types are empty when they have no element + case reflect.Chan, reflect.Map, reflect.Slice: + return objValue.Len() == 0 + // pointers are empty if nil or if the value they point to is empty + case reflect.Ptr: + if objValue.IsNil() { + return true + } + deref := objValue.Elem().Interface() + return isEmpty(deref) + // for all other types, compare against the zero value + // array types are empty when they match their zero-initialized state + default: + zero := reflect.Zero(objValue.Type()) + return reflect.DeepEqual(object, zero.Interface()) + } +} + +// ElementsMatch asserts that the specified listA(array, slice...) is equal to specified +// listB(array, slice...) ignoring the order of the elements. If there are duplicate elements, +// the number of appearances of each of them in both lists should match. +// +// assert.ElementsMatch(t, [1, 3, 2, 3], [1, 3, 3, 2]) +func ElementsMatch(t TestingT, listA, listB interface{}, msgAndArgs ...interface{}) (ok bool) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if isEmpty(listA) && isEmpty(listB) { + return true + } + + if !isList(t, listA, msgAndArgs...) || !isList(t, listB, msgAndArgs...) { + return false + } + + extraA, extraB := diffLists(listA, listB) + + if len(extraA) == 0 && len(extraB) == 0 { + return true + } + + return Fail(t, formatListDiff(listA, listB, extraA, extraB), msgAndArgs...) +} + +// isList checks that the provided value is array or slice. +func isList(t TestingT, list interface{}, msgAndArgs ...interface{}) (ok bool) { + kind := reflect.TypeOf(list).Kind() + if kind != reflect.Array && kind != reflect.Slice { + return Fail(t, fmt.Sprintf("%q has an unsupported type %s, expecting array or slice", list, kind), + msgAndArgs...) + } + return true +} + +// diffLists diffs two arrays/slices and returns slices of elements that are only in A and only in B. +// If some element is present multiple times, each instance is counted separately (e.g. if something is 2x in A and +// 5x in B, it will be 0x in extraA and 3x in extraB). The order of items in both lists is ignored. +func diffLists(listA, listB interface{}) ([]interface{}, []interface{}) { + var extraA, extraB []interface{} + + aValue := reflect.ValueOf(listA) + bValue := reflect.ValueOf(listB) + + aLen := aValue.Len() + bLen := bValue.Len() + + // Mark indexes in bValue that we already used + visited := make([]bool, bLen) + for i := 0; i < aLen; i++ { + element := aValue.Index(i).Interface() + found := false + for j := 0; j < bLen; j++ { + if visited[j] { + continue + } + if ObjectsAreEqual(bValue.Index(j).Interface(), element) { + visited[j] = true + found = true + break + } + } + if !found { + extraA = append(extraA, element) + } + } + + for j := 0; j < bLen; j++ { + if visited[j] { + continue + } + extraB = append(extraB, bValue.Index(j).Interface()) + } + + return extraA, extraB +} + +func formatListDiff(listA, listB interface{}, extraA, extraB []interface{}) string { + var msg bytes.Buffer + + msg.WriteString("elements differ") + if len(extraA) > 0 { + msg.WriteString("\n\nextra elements in list A:\n") + msg.WriteString(spewConfig.Sdump(extraA)) + } + if len(extraB) > 0 { + msg.WriteString("\n\nextra elements in list B:\n") + msg.WriteString(spewConfig.Sdump(extraB)) + } + msg.WriteString("\n\nlistA:\n") + msg.WriteString(spewConfig.Sdump(listA)) + msg.WriteString("\n\nlistB:\n") + msg.WriteString(spewConfig.Sdump(listB)) + + return msg.String() +} + +// WithinDuration asserts that the two times are within duration delta of each other. +// +// assert.WithinDuration(t, time.Now(), time.Now(), 10*time.Second) +func WithinDuration(t TestingT, expected, actual time.Time, delta time.Duration, msgAndArgs ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + + dt := expected.Sub(actual) + if dt < -delta || dt > delta { + return Fail(t, fmt.Sprintf("Max difference between %v and %v allowed is %v, but difference was %v", expected, actual, delta, dt), msgAndArgs...) + } + + return true +} + +func toFloat(x interface{}) (float64, bool) { + var xf float64 + xok := true + + switch xn := x.(type) { + case uint: + xf = float64(xn) + case uint8: + xf = float64(xn) + case uint16: + xf = float64(xn) + case uint32: + xf = float64(xn) + case uint64: + xf = float64(xn) + case int: + xf = float64(xn) + case int8: + xf = float64(xn) + case int16: + xf = float64(xn) + case int32: + xf = float64(xn) + case int64: + xf = float64(xn) + case float32: + xf = float64(xn) + case float64: + xf = xn + case time.Duration: + xf = float64(xn) + default: + xok = false + } + + return xf, xok +} + +// InDelta asserts that the two numerals are within delta of each other. +// +// assert.InDelta(t, math.Pi, 22/7.0, 0.01) +func InDelta(t TestingT, expected, actual interface{}, delta float64, msgAndArgs ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + + af, aok := toFloat(expected) + bf, bok := toFloat(actual) + + if !aok || !bok { + return Fail(t, "Parameters must be numerical", msgAndArgs...) + } + + if math.IsNaN(af) && math.IsNaN(bf) { + return true + } + + if math.IsNaN(af) { + return Fail(t, "Expected must not be NaN", msgAndArgs...) + } + + if math.IsNaN(bf) { + return Fail(t, fmt.Sprintf("Expected %v with delta %v, but was NaN", expected, delta), msgAndArgs...) + } + + dt := af - bf + if dt < -delta || dt > delta { + return Fail(t, fmt.Sprintf("Max difference between %v and %v allowed is %v, but difference was %v", expected, actual, delta, dt), msgAndArgs...) + } + + return true +} + +/* + Errors +*/ + +// NoError asserts that a function returned no error (i.e. `nil`). +// +// actualObj, err := SomeFunction() +// if assert.NoError(t, err) { +// assert.Equal(t, expectedObj, actualObj) +// } +func NoError(t TestingT, err error, msgAndArgs ...interface{}) bool { + if err != nil { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return Fail(t, fmt.Sprintf("Received unexpected error:\n%+v", err), msgAndArgs...) + } + + return true +} + +// Error asserts that a function returned an error (i.e. not `nil`). +// +// actualObj, err := SomeFunction() +// if assert.Error(t, err) { +// assert.Equal(t, expectedError, err) +// } +func Error(t TestingT, err error, msgAndArgs ...interface{}) bool { + if err == nil { + if h, ok := t.(tHelper); ok { + h.Helper() + } + return Fail(t, "An error is expected but got nil.", msgAndArgs...) + } + + return true +} + +// EqualError asserts that a function returned an error (i.e. not `nil`) +// and that it is equal to the provided error. +// +// actualObj, err := SomeFunction() +// assert.EqualError(t, err, expectedErrorString) +func EqualError(t TestingT, theError error, errString string, msgAndArgs ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if !Error(t, theError, msgAndArgs...) { + return false + } + expected := errString + actual := theError.Error() + // don't need to use deep equals here, we know they are both strings + if expected != actual { + return Fail(t, fmt.Sprintf("Error message not equal:\n"+ + "expected: %q\n"+ + "actual : %q", expected, actual), msgAndArgs...) + } + return true +} + +// ErrorIs asserts that at least one of the errors in err's chain matches target. +// This is a wrapper for errors.Is. +func ErrorIs(t TestingT, err, target error, msgAndArgs ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if errors.Is(err, target) { + return true + } + + var expectedText string + if target != nil { + expectedText = target.Error() + } + + chain := buildErrorChainString(err) + + return Fail(t, fmt.Sprintf("Target error should be in err chain:\n"+ + "expected: %q\n"+ + "in chain: %s", expectedText, chain, + ), msgAndArgs...) +} + +// ErrorContains asserts that a function returned an error (i.e. not `nil`) +// and that the error contains the specified substring. +// +// actualObj, err := SomeFunction() +// assert.ErrorContains(t, err, expectedErrorSubString) +func ErrorContains(t TestingT, theError error, contains string, msgAndArgs ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if !Error(t, theError, msgAndArgs...) { + return false + } + + actual := theError.Error() + if !strings.Contains(actual, contains) { + return Fail(t, fmt.Sprintf("Error %#v does not contain %#v", actual, contains), msgAndArgs...) + } + + return true +} + +func typeAndKind(v interface{}) (reflect.Type, reflect.Kind) { + t := reflect.TypeOf(v) + k := t.Kind() + + if k == reflect.Ptr { + t = t.Elem() + k = t.Kind() + } + return t, k +} + +// diff returns a diff of both values as long as both are of the same type and +// are a struct, map, slice, array or string. Otherwise it returns an empty string. +func diff(expected interface{}, actual interface{}) string { + if expected == nil || actual == nil { + return "" + } + + et, ek := typeAndKind(expected) + at, _ := typeAndKind(actual) + + if et != at { + return "" + } + + if ek != reflect.Struct && ek != reflect.Map && ek != reflect.Slice && ek != reflect.Array && ek != reflect.String { + return "" + } + + var e, a string + + switch et { + case reflect.TypeOf(""): + e = reflect.ValueOf(expected).String() + a = reflect.ValueOf(actual).String() + case reflect.TypeOf(time.Time{}): + e = spewConfigStringerEnabled.Sdump(expected) + a = spewConfigStringerEnabled.Sdump(actual) + default: + e = spewConfig.Sdump(expected) + a = spewConfig.Sdump(actual) + } + + diff, _ := GetUnifiedDiffString(UnifiedDiff{ + A: SplitLines(e), + B: SplitLines(a), + FromFile: "Expected", + FromDate: "", + ToFile: "Actual", + ToDate: "", + Context: 1, + }) + + return "\n\nDiff:\n" + diff +} + +func isFunction(arg interface{}) bool { + if arg == nil { + return false + } + return reflect.TypeOf(arg).Kind() == reflect.Func +} + +var spewConfig = spew.ConfigState{ + Indent: " ", + DisablePointerAddresses: true, + DisableCapacities: true, + SortKeys: true, + DisableMethods: true, + MaxDepth: 10, +} + +var spewConfigStringerEnabled = spew.ConfigState{ + Indent: " ", + DisablePointerAddresses: true, + DisableCapacities: true, + SortKeys: true, + MaxDepth: 10, +} + +type tHelper interface { + Helper() +} + +// Eventually asserts that given condition will be met in waitFor time, +// periodically checking target function each tick. +// +// assert.Eventually(t, func() bool { return true; }, time.Second, 10*time.Millisecond) +func Eventually(t TestingT, condition func() bool, waitFor time.Duration, tick time.Duration, msgAndArgs ...interface{}) bool { + if h, ok := t.(tHelper); ok { + h.Helper() + } + + ch := make(chan bool, 1) + + timer := time.NewTimer(waitFor) + defer timer.Stop() + + ticker := time.NewTicker(tick) + defer ticker.Stop() + + for tick := ticker.C; ; { + select { + case <-timer.C: + return Fail(t, "Condition never satisfied", msgAndArgs...) + case <-tick: + tick = nil + go func() { ch <- condition() }() + case v := <-ch: + if v { + return true + } + tick = ticker.C + } + } +} + +func buildErrorChainString(err error) string { + if err == nil { + return "" + } + + e := errors.Unwrap(err) + chain := fmt.Sprintf("%q", err.Error()) + for e != nil { + chain += fmt.Sprintf("\n\t%q", e.Error()) + e = errors.Unwrap(e) + } + return chain +} diff --git a/vendor/go.mongodb.org/mongo-driver/internal/assert/difflib.go b/vendor/go.mongodb.org/mongo-driver/internal/assert/difflib.go new file mode 100644 index 000000000..e13a66a93 --- /dev/null +++ b/vendor/go.mongodb.org/mongo-driver/internal/assert/difflib.go @@ -0,0 +1,766 @@ +// Copied from https://github.com/pmezard/go-difflib/blob/5d4384ee4fb2527b0a1256a821ebfc92f91efefc/difflib/difflib.go + +// Copyright 2013 Patrick Mezard. All rights reserved. Use of this source code is +// governed by a license that can be found in the THIRD-PARTY-NOTICES file. + +package assert + +import ( + "bufio" + "bytes" + "fmt" + "io" + "strings" +) + +func min(a, b int) int { + if a < b { + return a + } + return b +} + +func max(a, b int) int { + if a > b { + return a + } + return b +} + +func calculateRatio(matches, length int) float64 { + if length > 0 { + return 2.0 * float64(matches) / float64(length) + } + return 1.0 +} + +type Match struct { + A int + B int + Size int +} + +type OpCode struct { + Tag byte + I1 int + I2 int + J1 int + J2 int +} + +// SequenceMatcher compares sequence of strings. The basic +// algorithm predates, and is a little fancier than, an algorithm +// published in the late 1980's by Ratcliff and Obershelp under the +// hyperbolic name "gestalt pattern matching". The basic idea is to find +// the longest contiguous matching subsequence that contains no "junk" +// elements (R-O doesn't address junk). The same idea is then applied +// recursively to the pieces of the sequences to the left and to the right +// of the matching subsequence. This does not yield minimal edit +// sequences, but does tend to yield matches that "look right" to people. +// +// SequenceMatcher tries to compute a "human-friendly diff" between two +// sequences. Unlike e.g. UNIX(tm) diff, the fundamental notion is the +// longest *contiguous* & junk-free matching subsequence. That's what +// catches peoples' eyes. The Windows(tm) windiff has another interesting +// notion, pairing up elements that appear uniquely in each sequence. +// That, and the method here, appear to yield more intuitive difference +// reports than does diff. This method appears to be the least vulnerable +// to syncing up on blocks of "junk lines", though (like blank lines in +// ordinary text files, or maybe "

" lines in HTML files). That may be +// because this is the only method of the 3 that has a *concept* of +// "junk" . +// +// Timing: Basic R-O is cubic time worst case and quadratic time expected +// case. SequenceMatcher is quadratic time for the worst case and has +// expected-case behavior dependent in a complicated way on how many +// elements the sequences have in common; best case time is linear. +type SequenceMatcher struct { + a []string + b []string + b2j map[string][]int + IsJunk func(string) bool + autoJunk bool + bJunk map[string]struct{} + matchingBlocks []Match + fullBCount map[string]int + bPopular map[string]struct{} + opCodes []OpCode +} + +func NewMatcher(a, b []string) *SequenceMatcher { + m := SequenceMatcher{autoJunk: true} + m.SetSeqs(a, b) + return &m +} + +func NewMatcherWithJunk(a, b []string, autoJunk bool, + isJunk func(string) bool) *SequenceMatcher { + + m := SequenceMatcher{IsJunk: isJunk, autoJunk: autoJunk} + m.SetSeqs(a, b) + return &m +} + +// SetSeqs sets the two sequences to be compared. +func (m *SequenceMatcher) SetSeqs(a, b []string) { + m.SetSeq1(a) + m.SetSeq2(b) +} + +// SetSeq1 sets the first sequence to be compared. The second sequence to be compared is +// not changed. +// +// SequenceMatcher computes and caches detailed information about the second +// sequence, so if you want to compare one sequence S against many sequences, +// use .SetSeq2(s) once and call .SetSeq1(x) repeatedly for each of the other +// sequences. +// +// See also SetSeqs() and SetSeq2(). +func (m *SequenceMatcher) SetSeq1(a []string) { + if &a == &m.a { + return + } + m.a = a + m.matchingBlocks = nil + m.opCodes = nil +} + +// SetSeq2 sets the second sequence to be compared. The first sequence to be compared is +// not changed. +func (m *SequenceMatcher) SetSeq2(b []string) { + if &b == &m.b { + return + } + m.b = b + m.matchingBlocks = nil + m.opCodes = nil + m.fullBCount = nil + m.chainB() +} + +func (m *SequenceMatcher) chainB() { + // Populate line -> index mapping + b2j := map[string][]int{} + for i, s := range m.b { + indices := b2j[s] + indices = append(indices, i) + b2j[s] = indices + } + + // Purge junk elements + m.bJunk = map[string]struct{}{} + if m.IsJunk != nil { + junk := m.bJunk + for s := range b2j { + if m.IsJunk(s) { + junk[s] = struct{}{} + } + } + for s := range junk { + delete(b2j, s) + } + } + + // Purge remaining popular elements + popular := map[string]struct{}{} + n := len(m.b) + if m.autoJunk && n >= 200 { + ntest := n/100 + 1 + for s, indices := range b2j { + if len(indices) > ntest { + popular[s] = struct{}{} + } + } + for s := range popular { + delete(b2j, s) + } + } + m.bPopular = popular + m.b2j = b2j +} + +func (m *SequenceMatcher) isBJunk(s string) bool { + _, ok := m.bJunk[s] + return ok +} + +// Find longest matching block in a[alo:ahi] and b[blo:bhi]. +// +// If IsJunk is not defined: +// +// Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where +// +// alo <= i <= i+k <= ahi +// blo <= j <= j+k <= bhi +// +// and for all (i',j',k') meeting those conditions, +// +// k >= k' +// i <= i' +// and if i == i', j <= j' +// +// In other words, of all maximal matching blocks, return one that +// starts earliest in a, and of all those maximal matching blocks that +// start earliest in a, return the one that starts earliest in b. +// +// If IsJunk is defined, first the longest matching block is +// determined as above, but with the additional restriction that no +// junk element appears in the block. Then that block is extended as +// far as possible by matching (only) junk elements on both sides. So +// the resulting block never matches on junk except as identical junk +// happens to be adjacent to an "interesting" match. +// +// If no blocks match, return (alo, blo, 0). +func (m *SequenceMatcher) findLongestMatch(alo, ahi, blo, bhi int) Match { + // CAUTION: stripping common prefix or suffix would be incorrect. + // E.g., + // ab + // acab + // Longest matching block is "ab", but if common prefix is + // stripped, it's "a" (tied with "b"). UNIX(tm) diff does so + // strip, so ends up claiming that ab is changed to acab by + // inserting "ca" in the middle. That's minimal but unintuitive: + // "it's obvious" that someone inserted "ac" at the front. + // Windiff ends up at the same place as diff, but by pairing up + // the unique 'b's and then matching the first two 'a's. + besti, bestj, bestsize := alo, blo, 0 + + // find longest junk-free match + // during an iteration of the loop, j2len[j] = length of longest + // junk-free match ending with a[i-1] and b[j] + j2len := map[int]int{} + for i := alo; i != ahi; i++ { + // look at all instances of a[i] in b; note that because + // b2j has no junk keys, the loop is skipped if a[i] is junk + newj2len := map[int]int{} + for _, j := range m.b2j[m.a[i]] { + // a[i] matches b[j] + if j < blo { + continue + } + if j >= bhi { + break + } + k := j2len[j-1] + 1 + newj2len[j] = k + if k > bestsize { + besti, bestj, bestsize = i-k+1, j-k+1, k + } + } + j2len = newj2len + } + + // Extend the best by non-junk elements on each end. In particular, + // "popular" non-junk elements aren't in b2j, which greatly speeds + // the inner loop above, but also means "the best" match so far + // doesn't contain any junk *or* popular non-junk elements. + for besti > alo && bestj > blo && !m.isBJunk(m.b[bestj-1]) && + m.a[besti-1] == m.b[bestj-1] { + besti, bestj, bestsize = besti-1, bestj-1, bestsize+1 + } + for besti+bestsize < ahi && bestj+bestsize < bhi && + !m.isBJunk(m.b[bestj+bestsize]) && + m.a[besti+bestsize] == m.b[bestj+bestsize] { + bestsize++ + } + + // Now that we have a wholly interesting match (albeit possibly + // empty!), we may as well suck up the matching junk on each + // side of it too. Can't think of a good reason not to, and it + // saves post-processing the (possibly considerable) expense of + // figuring out what to do with it. In the case of an empty + // interesting match, this is clearly the right thing to do, + // because no other kind of match is possible in the regions. + for besti > alo && bestj > blo && m.isBJunk(m.b[bestj-1]) && + m.a[besti-1] == m.b[bestj-1] { + besti, bestj, bestsize = besti-1, bestj-1, bestsize+1 + } + for besti+bestsize < ahi && bestj+bestsize < bhi && + m.isBJunk(m.b[bestj+bestsize]) && + m.a[besti+bestsize] == m.b[bestj+bestsize] { + bestsize++ + } + + return Match{A: besti, B: bestj, Size: bestsize} +} + +// GetMatchingBlocks returns list of triples describing matching subsequences. +// +// Each triple is of the form (i, j, n), and means that +// a[i:i+n] == b[j:j+n]. The triples are monotonically increasing in +// i and in j. It's also guaranteed that if (i, j, n) and (i', j', n') are +// adjacent triples in the list, and the second is not the last triple in the +// list, then i+n != i' or j+n != j'. IOW, adjacent triples never describe +// adjacent equal blocks. +// +// The last triple is a dummy, (len(a), len(b), 0), and is the only +// triple with n==0. +func (m *SequenceMatcher) GetMatchingBlocks() []Match { + if m.matchingBlocks != nil { + return m.matchingBlocks + } + + var matchBlocks func(alo, ahi, blo, bhi int, matched []Match) []Match + matchBlocks = func(alo, ahi, blo, bhi int, matched []Match) []Match { + match := m.findLongestMatch(alo, ahi, blo, bhi) + i, j, k := match.A, match.B, match.Size + if match.Size > 0 { + if alo < i && blo < j { + matched = matchBlocks(alo, i, blo, j, matched) + } + matched = append(matched, match) + if i+k < ahi && j+k < bhi { + matched = matchBlocks(i+k, ahi, j+k, bhi, matched) + } + } + return matched + } + matched := matchBlocks(0, len(m.a), 0, len(m.b), nil) + + // It's possible that we have adjacent equal blocks in the + // matching_blocks list now. + nonAdjacent := []Match{} + i1, j1, k1 := 0, 0, 0 + for _, b := range matched { + // Is this block adjacent to i1, j1, k1? + i2, j2, k2 := b.A, b.B, b.Size + if i1+k1 == i2 && j1+k1 == j2 { + // Yes, so collapse them -- this just increases the length of + // the first block by the length of the second, and the first + // block so lengthened remains the block to compare against. + k1 += k2 + } else { + // Not adjacent. Remember the first block (k1==0 means it's + // the dummy we started with), and make the second block the + // new block to compare against. + if k1 > 0 { + nonAdjacent = append(nonAdjacent, Match{i1, j1, k1}) + } + i1, j1, k1 = i2, j2, k2 + } + } + if k1 > 0 { + nonAdjacent = append(nonAdjacent, Match{i1, j1, k1}) + } + + nonAdjacent = append(nonAdjacent, Match{len(m.a), len(m.b), 0}) + m.matchingBlocks = nonAdjacent + return m.matchingBlocks +} + +// GetOpCodes returns a list of 5-tuples describing how to turn a into b. +// +// Each tuple is of the form (tag, i1, i2, j1, j2). The first tuple +// has i1 == j1 == 0, and remaining tuples have i1 == the i2 from the +// tuple preceding it, and likewise for j1 == the previous j2. +// +// The tags are characters, with these meanings: +// +// 'r' (replace): a[i1:i2] should be replaced by b[j1:j2] +// +// 'd' (delete): a[i1:i2] should be deleted, j1==j2 in this case. +// +// 'i' (insert): b[j1:j2] should be inserted at a[i1:i1], i1==i2 in this case. +// +// 'e' (equal): a[i1:i2] == b[j1:j2] +func (m *SequenceMatcher) GetOpCodes() []OpCode { + if m.opCodes != nil { + return m.opCodes + } + i, j := 0, 0 + matching := m.GetMatchingBlocks() + opCodes := make([]OpCode, 0, len(matching)) + for _, m := range matching { + // invariant: we've pumped out correct diffs to change + // a[:i] into b[:j], and the next matching block is + // a[ai:ai+size] == b[bj:bj+size]. So we need to pump + // out a diff to change a[i:ai] into b[j:bj], pump out + // the matching block, and move (i,j) beyond the match + ai, bj, size := m.A, m.B, m.Size + tag := byte(0) + if i < ai && j < bj { + tag = 'r' + } else if i < ai { + tag = 'd' + } else if j < bj { + tag = 'i' + } + if tag > 0 { + opCodes = append(opCodes, OpCode{tag, i, ai, j, bj}) + } + i, j = ai+size, bj+size + // the list of matching blocks is terminated by a + // sentinel with size 0 + if size > 0 { + opCodes = append(opCodes, OpCode{'e', ai, i, bj, j}) + } + } + m.opCodes = opCodes + return m.opCodes +} + +// GetGroupedOpCodes isolates change clusters by eliminating ranges with no changes. +// +// Returns a generator of groups with up to n lines of context. +// Each group is in the same format as returned by GetOpCodes(). +func (m *SequenceMatcher) GetGroupedOpCodes(n int) [][]OpCode { + if n < 0 { + n = 3 + } + codes := m.GetOpCodes() + if len(codes) == 0 { + codes = []OpCode{{'e', 0, 1, 0, 1}} + } + // Fixup leading and trailing groups if they show no changes. + if codes[0].Tag == 'e' { + c := codes[0] + i1, i2, j1, j2 := c.I1, c.I2, c.J1, c.J2 + codes[0] = OpCode{c.Tag, max(i1, i2-n), i2, max(j1, j2-n), j2} + } + if codes[len(codes)-1].Tag == 'e' { + c := codes[len(codes)-1] + i1, i2, j1, j2 := c.I1, c.I2, c.J1, c.J2 + codes[len(codes)-1] = OpCode{c.Tag, i1, min(i2, i1+n), j1, min(j2, j1+n)} + } + nn := n + n + groups := [][]OpCode{} + group := []OpCode{} + for _, c := range codes { + i1, i2, j1, j2 := c.I1, c.I2, c.J1, c.J2 + // End the current group and start a new one whenever + // there is a large range with no changes. + if c.Tag == 'e' && i2-i1 > nn { + group = append(group, OpCode{c.Tag, i1, min(i2, i1+n), + j1, min(j2, j1+n)}) + groups = append(groups, group) + group = []OpCode{} + i1, j1 = max(i1, i2-n), max(j1, j2-n) + } + group = append(group, OpCode{c.Tag, i1, i2, j1, j2}) + } + if len(group) > 0 && !(len(group) == 1 && group[0].Tag == 'e') { + groups = append(groups, group) + } + return groups +} + +// Ratio returns a measure of the sequences' similarity (float in [0,1]). +// +// Where T is the total number of elements in both sequences, and +// M is the number of matches, this is 2.0*M / T. +// Note that this is 1 if the sequences are identical, and 0 if +// they have nothing in common. +// +// .Ratio() is expensive to compute if you haven't already computed +// .GetMatchingBlocks() or .GetOpCodes(), in which case you may +// want to try .QuickRatio() or .RealQuickRation() first to get an +// upper bound. +func (m *SequenceMatcher) Ratio() float64 { + matches := 0 + for _, m := range m.GetMatchingBlocks() { + matches += m.Size + } + return calculateRatio(matches, len(m.a)+len(m.b)) +} + +// QuickRatio returns an upper bound on ratio() relatively quickly. +// +// This isn't defined beyond that it is an upper bound on .Ratio(), and +// is faster to compute. +func (m *SequenceMatcher) QuickRatio() float64 { + // viewing a and b as multisets, set matches to the cardinality + // of their intersection; this counts the number of matches + // without regard to order, so is clearly an upper bound + if m.fullBCount == nil { + m.fullBCount = map[string]int{} + for _, s := range m.b { + m.fullBCount[s] = m.fullBCount[s] + 1 + } + } + + // avail[x] is the number of times x appears in 'b' less the + // number of times we've seen it in 'a' so far ... kinda + avail := map[string]int{} + matches := 0 + for _, s := range m.a { + n, ok := avail[s] + if !ok { + n = m.fullBCount[s] + } + avail[s] = n - 1 + if n > 0 { + matches++ + } + } + return calculateRatio(matches, len(m.a)+len(m.b)) +} + +// RealQuickRatio returns an upper bound on ratio() very quickly. +// +// This isn't defined beyond that it is an upper bound on .Ratio(), and +// is faster to compute than either .Ratio() or .QuickRatio(). +func (m *SequenceMatcher) RealQuickRatio() float64 { + la, lb := len(m.a), len(m.b) + return calculateRatio(min(la, lb), la+lb) +} + +// Convert range to the "ed" format +func formatRangeUnified(start, stop int) string { + // Per the diff spec at http://www.unix.org/single_unix_specification/ + beginning := start + 1 // lines start numbering with one + length := stop - start + if length == 1 { + return fmt.Sprintf("%d", beginning) + } + if length == 0 { + beginning-- // empty ranges begin at line just before the range + } + return fmt.Sprintf("%d,%d", beginning, length) +} + +// UnifiedDiff represents the unified diff parameters. +type UnifiedDiff struct { + A []string // First sequence lines + FromFile string // First file name + FromDate string // First file time + B []string // Second sequence lines + ToFile string // Second file name + ToDate string // Second file time + Eol string // Headers end of line, defaults to LF + Context int // Number of context lines +} + +// WriteUnifiedDiff compares two sequences of lines; generates the delta as +// a unified diff. +// +// Unified diffs are a compact way of showing line changes and a few +// lines of context. The number of context lines is set by 'n' which +// defaults to three. +// +// By default, the diff control lines (those with ---, +++, or @@) are +// created with a trailing newline. This is helpful so that inputs +// created from file.readlines() result in diffs that are suitable for +// file.writelines() since both the inputs and outputs have trailing +// newlines. +// +// For inputs that do not have trailing newlines, set the lineterm +// argument to "" so that the output will be uniformly newline free. +// +// The unidiff format normally has a header for filenames and modification +// times. Any or all of these may be specified using strings for +// 'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'. +// The modification times are normally expressed in the ISO 8601 format. +func WriteUnifiedDiff(writer io.Writer, diff UnifiedDiff) error { + buf := bufio.NewWriter(writer) + defer buf.Flush() + wf := func(format string, args ...interface{}) error { + _, err := buf.WriteString(fmt.Sprintf(format, args...)) + return err + } + ws := func(s string) error { + _, err := buf.WriteString(s) + return err + } + + if len(diff.Eol) == 0 { + diff.Eol = "\n" + } + + started := false + m := NewMatcher(diff.A, diff.B) + for _, g := range m.GetGroupedOpCodes(diff.Context) { + if !started { + started = true + fromDate := "" + if len(diff.FromDate) > 0 { + fromDate = "\t" + diff.FromDate + } + toDate := "" + if len(diff.ToDate) > 0 { + toDate = "\t" + diff.ToDate + } + if diff.FromFile != "" || diff.ToFile != "" { + err := wf("--- %s%s%s", diff.FromFile, fromDate, diff.Eol) + if err != nil { + return err + } + err = wf("+++ %s%s%s", diff.ToFile, toDate, diff.Eol) + if err != nil { + return err + } + } + } + first, last := g[0], g[len(g)-1] + range1 := formatRangeUnified(first.I1, last.I2) + range2 := formatRangeUnified(first.J1, last.J2) + if err := wf("@@ -%s +%s @@%s", range1, range2, diff.Eol); err != nil { + return err + } + for _, c := range g { + i1, i2, j1, j2 := c.I1, c.I2, c.J1, c.J2 + if c.Tag == 'e' { + for _, line := range diff.A[i1:i2] { + if err := ws(" " + line); err != nil { + return err + } + } + continue + } + if c.Tag == 'r' || c.Tag == 'd' { + for _, line := range diff.A[i1:i2] { + if err := ws("-" + line); err != nil { + return err + } + } + } + if c.Tag == 'r' || c.Tag == 'i' { + for _, line := range diff.B[j1:j2] { + if err := ws("+" + line); err != nil { + return err + } + } + } + } + } + return nil +} + +// GetUnifiedDiffString is like WriteUnifiedDiff but returns the diff as a string. +func GetUnifiedDiffString(diff UnifiedDiff) (string, error) { + w := &bytes.Buffer{} + err := WriteUnifiedDiff(w, diff) + return w.String(), err +} + +// Convert range to the "ed" format. +func formatRangeContext(start, stop int) string { + // Per the diff spec at http://www.unix.org/single_unix_specification/ + beginning := start + 1 // lines start numbering with one + length := stop - start + if length == 0 { + beginning-- // empty ranges begin at line just before the range + } + if length <= 1 { + return fmt.Sprintf("%d", beginning) + } + return fmt.Sprintf("%d,%d", beginning, beginning+length-1) +} + +type ContextDiff UnifiedDiff + +// WriteContextDiff compares two sequences of lines; generates the delta as a context diff. +// +// Context diffs are a compact way of showing line changes and a few +// lines of context. The number of context lines is set by diff.Context +// which defaults to three. +// +// By default, the diff control lines (those with *** or ---) are +// created with a trailing newline. +// +// For inputs that do not have trailing newlines, set the diff.Eol +// argument to "" so that the output will be uniformly newline free. +// +// The context diff format normally has a header for filenames and +// modification times. Any or all of these may be specified using +// strings for diff.FromFile, diff.ToFile, diff.FromDate, diff.ToDate. +// The modification times are normally expressed in the ISO 8601 format. +// If not specified, the strings default to blanks. +func WriteContextDiff(writer io.Writer, diff ContextDiff) error { + buf := bufio.NewWriter(writer) + defer buf.Flush() + var diffErr error + wf := func(format string, args ...interface{}) { + _, err := buf.WriteString(fmt.Sprintf(format, args...)) + if diffErr == nil && err != nil { + diffErr = err + } + } + ws := func(s string) { + _, err := buf.WriteString(s) + if diffErr == nil && err != nil { + diffErr = err + } + } + + if len(diff.Eol) == 0 { + diff.Eol = "\n" + } + + prefix := map[byte]string{ + 'i': "+ ", + 'd': "- ", + 'r': "! ", + 'e': " ", + } + + started := false + m := NewMatcher(diff.A, diff.B) + for _, g := range m.GetGroupedOpCodes(diff.Context) { + if !started { + started = true + fromDate := "" + if len(diff.FromDate) > 0 { + fromDate = "\t" + diff.FromDate + } + toDate := "" + if len(diff.ToDate) > 0 { + toDate = "\t" + diff.ToDate + } + if diff.FromFile != "" || diff.ToFile != "" { + wf("*** %s%s%s", diff.FromFile, fromDate, diff.Eol) + wf("--- %s%s%s", diff.ToFile, toDate, diff.Eol) + } + } + + first, last := g[0], g[len(g)-1] + ws("***************" + diff.Eol) + + range1 := formatRangeContext(first.I1, last.I2) + wf("*** %s ****%s", range1, diff.Eol) + for _, c := range g { + if c.Tag == 'r' || c.Tag == 'd' { + for _, cc := range g { + if cc.Tag == 'i' { + continue + } + for _, line := range diff.A[cc.I1:cc.I2] { + ws(prefix[cc.Tag] + line) + } + } + break + } + } + + range2 := formatRangeContext(first.J1, last.J2) + wf("--- %s ----%s", range2, diff.Eol) + for _, c := range g { + if c.Tag == 'r' || c.Tag == 'i' { + for _, cc := range g { + if cc.Tag == 'd' { + continue + } + for _, line := range diff.B[cc.J1:cc.J2] { + ws(prefix[cc.Tag] + line) + } + } + break + } + } + } + return diffErr +} + +// GetContextDiffString is like WriteContextDiff but returns the diff as a string. +func GetContextDiffString(diff ContextDiff) (string, error) { + w := &bytes.Buffer{} + err := WriteContextDiff(w, diff) + return w.String(), err +} + +// SplitLines splits a string on "\n" while preserving them. The output can be used +// as input for UnifiedDiff and ContextDiff structures. +func SplitLines(s string) []string { + lines := strings.SplitAfter(s, "\n") + lines[len(lines)-1] += "\n" + return lines +} diff --git a/vendor/go.mongodb.org/mongo-driver/internal/integtest/integtest.go b/vendor/go.mongodb.org/mongo-driver/internal/integtest/integtest.go new file mode 100644 index 000000000..c9048362b --- /dev/null +++ b/vendor/go.mongodb.org/mongo-driver/internal/integtest/integtest.go @@ -0,0 +1,285 @@ +// Copyright (C) MongoDB, Inc. 2017-present. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +package integtest + +import ( + "context" + "errors" + "fmt" + "math" + "os" + "reflect" + "strconv" + "strings" + "sync" + "testing" + + "go.mongodb.org/mongo-driver/event" + "go.mongodb.org/mongo-driver/internal/require" + "go.mongodb.org/mongo-driver/mongo/description" + "go.mongodb.org/mongo-driver/mongo/options" + "go.mongodb.org/mongo-driver/x/bsonx/bsoncore" + "go.mongodb.org/mongo-driver/x/mongo/driver" + "go.mongodb.org/mongo-driver/x/mongo/driver/connstring" + "go.mongodb.org/mongo-driver/x/mongo/driver/operation" + "go.mongodb.org/mongo-driver/x/mongo/driver/topology" +) + +var connectionString *connstring.ConnString +var connectionStringOnce sync.Once +var connectionStringErr error +var liveTopology *topology.Topology +var liveTopologyOnce sync.Once +var liveTopologyErr error + +// AddOptionsToURI appends connection string options to a URI. +func AddOptionsToURI(uri string, opts ...string) string { + if !strings.ContainsRune(uri, '?') { + if uri[len(uri)-1] != '/' { + uri += "/" + } + + uri += "?" + } else { + uri += "&" + } + + for _, opt := range opts { + uri += opt + } + + return uri +} + +// AddTLSConfigToURI checks for the environmental variable indicating that the tests are being run +// on an SSL-enabled server, and if so, returns a new URI with the necessary configuration. +func AddTLSConfigToURI(uri string) string { + caFile := os.Getenv("MONGO_GO_DRIVER_CA_FILE") + if len(caFile) == 0 { + return uri + } + + return AddOptionsToURI(uri, "ssl=true&sslCertificateAuthorityFile=", caFile) +} + +// AddCompressorToURI checks for the environment variable indicating that the tests are being run with compression +// enabled. If so, it returns a new URI with the necessary configuration +func AddCompressorToURI(uri string) string { + comp := os.Getenv("MONGO_GO_DRIVER_COMPRESSOR") + if len(comp) == 0 { + return uri + } + + return AddOptionsToURI(uri, "compressors=", comp) +} + +// AddTestServerAPIVersion adds the latest server API version in a ServerAPIOptions to passed-in opts. +func AddTestServerAPIVersion(opts *options.ClientOptions) { + if os.Getenv("REQUIRE_API_VERSION") == "true" { + opts.SetServerAPIOptions(options.ServerAPI(driver.TestServerAPIVersion)) + } +} + +// MonitoredTopology returns a new topology with the command monitor attached +func MonitoredTopology(t *testing.T, dbName string, monitor *event.CommandMonitor) *topology.Topology { + uri, err := MongoDBURI() + if err != nil { + t.Fatal(err) + } + cfg, err := topology.NewConfig(options.Client().ApplyURI(uri).SetMonitor(monitor), nil) + if err != nil { + t.Fatal(err) + } + + monitoredTopology, err := topology.New(cfg) + if err != nil { + t.Fatal(err) + } else { + _ = monitoredTopology.Connect() + + err = operation.NewCommand(bsoncore.BuildDocument(nil, bsoncore.AppendInt32Element(nil, "dropDatabase", 1))). + Database(dbName).ServerSelector(description.WriteSelector()).Deployment(monitoredTopology).Execute(context.Background()) + + require.NoError(t, err) + } + + return monitoredTopology +} + +// Topology gets the globally configured topology. +func Topology(t *testing.T) *topology.Topology { + uri, err := MongoDBURI() + require.NoError(t, err, "error constructing mongodb URI: %v", err) + cfg, err := topology.NewConfig(options.Client().ApplyURI(uri), nil) + require.NoError(t, err, "error constructing topology config: %v", err) + + liveTopologyOnce.Do(func() { + var err error + liveTopology, err = topology.New(cfg) + if err != nil { + liveTopologyErr = err + } else { + _ = liveTopology.Connect() + + err = operation.NewCommand(bsoncore.BuildDocument(nil, bsoncore.AppendInt32Element(nil, "dropDatabase", 1))). + Database(DBName(t)).ServerSelector(description.WriteSelector()).Deployment(liveTopology).Execute(context.Background()) + require.NoError(t, err) + } + }) + + if liveTopologyErr != nil { + t.Fatal(liveTopologyErr) + } + + return liveTopology +} + +// TopologyWithCredential takes an "options.Credential" object and returns a connected topology. +func TopologyWithCredential(t *testing.T, credential options.Credential) *topology.Topology { + uri, err := MongoDBURI() + if err != nil { + t.Fatalf("error constructing mongodb URI: %v", err) + } + cfg, err := topology.NewConfig(options.Client().ApplyURI(uri).SetAuth(credential), nil) + if err != nil { + t.Fatalf("error constructing topology config: %v", err) + } + topology, err := topology.New(cfg) + if err != nil { + t.Fatal("Could not construct topology") + } + err = topology.Connect() + if err != nil { + t.Fatal("Could not start topology connection") + } + return topology +} + +// ColName gets a collection name that should be unique +// to the currently executing test. +func ColName(t *testing.T) string { + // Get this indirectly to avoid copying a mutex + v := reflect.Indirect(reflect.ValueOf(t)) + name := v.FieldByName("name") + return name.String() +} + +// MongoDBURI will construct the MongoDB URI from the MONGODB_URI environment variable for testing. The default host is +// "localhost" and the default port is "27017" +func MongoDBURI() (string, error) { + uri := os.Getenv("MONGODB_URI") + if uri == "" { + uri = "mongodb://localhost:27017" + } + + uri = AddTLSConfigToURI(uri) + uri = AddCompressorToURI(uri) + uri, err := AddServerlessAuthCredentials(uri) + return uri, err +} + +// AddServerlessAuthCredentials will attempt to construct the serverless auth credentials for a URI. +func AddServerlessAuthCredentials(uri string) (string, error) { + if os.Getenv("SERVERLESS") != "serverless" { + return uri, nil + } + user := os.Getenv("SERVERLESS_ATLAS_USER") + if user == "" { + return "", fmt.Errorf("serverless expects SERVERLESS_ATLAS_USER to be set") + } + password := os.Getenv("SERVERLESS_ATLAS_PASSWORD") + if password == "" { + return "", fmt.Errorf("serverless expects SERVERLESS_ATLAS_PASSWORD to be set") + } + + var scheme string + // remove the scheme + switch { + case strings.HasPrefix(uri, "mongodb+srv://"): + scheme = "mongodb+srv://" + case strings.HasPrefix(uri, "mongodb://"): + scheme = "mongodb://" + default: + return "", errors.New(`scheme must be "mongodb" or "mongodb+srv"`) + } + + uri = scheme + user + ":" + password + "@" + uri[len(scheme):] + return uri, nil +} + +// ConnString gets the globally configured connection string. +func ConnString(t *testing.T) *connstring.ConnString { + connectionStringOnce.Do(func() { + uri, err := MongoDBURI() + require.NoError(t, err, "error constructing mongodb URI: %v", err) + + connectionString, err = connstring.ParseAndValidate(uri) + if err != nil { + connectionStringErr = err + } + }) + if connectionStringErr != nil { + t.Fatal(connectionStringErr) + } + + return connectionString +} + +func GetConnString() (*connstring.ConnString, error) { + mongodbURI := os.Getenv("MONGODB_URI") + if mongodbURI == "" { + mongodbURI = "mongodb://localhost:27017" + } + + mongodbURI = AddTLSConfigToURI(mongodbURI) + + cs, err := connstring.ParseAndValidate(mongodbURI) + if err != nil { + return nil, err + } + + return cs, nil +} + +// DBName gets the globally configured database name. +func DBName(t *testing.T) string { + return GetDBName(ConnString(t)) +} + +func GetDBName(cs *connstring.ConnString) string { + if cs.Database != "" { + return cs.Database + } + + return fmt.Sprintf("mongo-go-driver-%d", os.Getpid()) +} + +// CompareVersions compares two version number strings (i.e. positive integers separated by +// periods). Comparisons are done to the lesser precision of the two versions. For example, 3.2 is +// considered equal to 3.2.11, whereas 3.2.0 is considered less than 3.2.11. +// +// Returns a positive int if version1 is greater than version2, a negative int if version1 is less +// than version2, and 0 if version1 is equal to version2. +func CompareVersions(t *testing.T, v1 string, v2 string) int { + n1 := strings.Split(v1, ".") + n2 := strings.Split(v2, ".") + + for i := 0; i < int(math.Min(float64(len(n1)), float64(len(n2)))); i++ { + i1, err := strconv.Atoi(n1[i]) + require.NoError(t, err) + + i2, err := strconv.Atoi(n2[i]) + require.NoError(t, err) + + difference := i1 - i2 + if difference != 0 { + return difference + } + } + + return 0 +} diff --git a/vendor/go.mongodb.org/mongo-driver/internal/require/require.go b/vendor/go.mongodb.org/mongo-driver/internal/require/require.go new file mode 100644 index 000000000..4854bbcd5 --- /dev/null +++ b/vendor/go.mongodb.org/mongo-driver/internal/require/require.go @@ -0,0 +1,819 @@ +// Copied from https://github.com/stretchr/testify/blob/1333b5d3bda8cf5aedcf3e1aaa95cac28aaab892/require/require.go + +// Copyright 2020 Mat Ryer, Tyler Bunnell and all contributors. All rights reserved. +// Use of this source code is governed by an MIT-style license that can be found in +// the THIRD-PARTY-NOTICES file. + +package require + +import ( + time "time" + + assert "go.mongodb.org/mongo-driver/internal/assert" +) + +// TestingT is an interface wrapper around *testing.T +type TestingT interface { + Errorf(format string, args ...interface{}) + FailNow() +} + +type tHelper interface { + Helper() +} + +// Contains asserts that the specified string, list(array, slice...) or map contains the +// specified substring or element. +// +// assert.Contains(t, "Hello World", "World") +// assert.Contains(t, ["Hello", "World"], "World") +// assert.Contains(t, {"Hello": "World"}, "Hello") +func Contains(t TestingT, s interface{}, contains interface{}, msgAndArgs ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.Contains(t, s, contains, msgAndArgs...) { + return + } + t.FailNow() +} + +// Containsf asserts that the specified string, list(array, slice...) or map contains the +// specified substring or element. +// +// assert.Containsf(t, "Hello World", "World", "error message %s", "formatted") +// assert.Containsf(t, ["Hello", "World"], "World", "error message %s", "formatted") +// assert.Containsf(t, {"Hello": "World"}, "Hello", "error message %s", "formatted") +func Containsf(t TestingT, s interface{}, contains interface{}, msg string, args ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.Containsf(t, s, contains, msg, args...) { + return + } + t.FailNow() +} + +// ElementsMatch asserts that the specified listA(array, slice...) is equal to specified +// listB(array, slice...) ignoring the order of the elements. If there are duplicate elements, +// the number of appearances of each of them in both lists should match. +// +// assert.ElementsMatch(t, [1, 3, 2, 3], [1, 3, 3, 2]) +func ElementsMatch(t TestingT, listA interface{}, listB interface{}, msgAndArgs ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.ElementsMatch(t, listA, listB, msgAndArgs...) { + return + } + t.FailNow() +} + +// ElementsMatchf asserts that the specified listA(array, slice...) is equal to specified +// listB(array, slice...) ignoring the order of the elements. If there are duplicate elements, +// the number of appearances of each of them in both lists should match. +// +// assert.ElementsMatchf(t, [1, 3, 2, 3], [1, 3, 3, 2], "error message %s", "formatted") +func ElementsMatchf(t TestingT, listA interface{}, listB interface{}, msg string, args ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.ElementsMatchf(t, listA, listB, msg, args...) { + return + } + t.FailNow() +} + +// Equal asserts that two objects are equal. +// +// assert.Equal(t, 123, 123) +// +// Pointer variable equality is determined based on the equality of the +// referenced values (as opposed to the memory addresses). Function equality +// cannot be determined and will always fail. +func Equal(t TestingT, expected interface{}, actual interface{}, msgAndArgs ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.Equal(t, expected, actual, msgAndArgs...) { + return + } + t.FailNow() +} + +// EqualError asserts that a function returned an error (i.e. not `nil`) +// and that it is equal to the provided error. +// +// actualObj, err := SomeFunction() +// assert.EqualError(t, err, expectedErrorString) +func EqualError(t TestingT, theError error, errString string, msgAndArgs ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.EqualError(t, theError, errString, msgAndArgs...) { + return + } + t.FailNow() +} + +// EqualErrorf asserts that a function returned an error (i.e. not `nil`) +// and that it is equal to the provided error. +// +// actualObj, err := SomeFunction() +// assert.EqualErrorf(t, err, expectedErrorString, "error message %s", "formatted") +func EqualErrorf(t TestingT, theError error, errString string, msg string, args ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.EqualErrorf(t, theError, errString, msg, args...) { + return + } + t.FailNow() +} + +// EqualValues asserts that two objects are equal or convertible to the same types +// and equal. +// +// assert.EqualValues(t, uint32(123), int32(123)) +func EqualValues(t TestingT, expected interface{}, actual interface{}, msgAndArgs ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.EqualValues(t, expected, actual, msgAndArgs...) { + return + } + t.FailNow() +} + +// EqualValuesf asserts that two objects are equal or convertible to the same types +// and equal. +// +// assert.EqualValuesf(t, uint32(123), int32(123), "error message %s", "formatted") +func EqualValuesf(t TestingT, expected interface{}, actual interface{}, msg string, args ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.EqualValuesf(t, expected, actual, msg, args...) { + return + } + t.FailNow() +} + +// Equalf asserts that two objects are equal. +// +// assert.Equalf(t, 123, 123, "error message %s", "formatted") +// +// Pointer variable equality is determined based on the equality of the +// referenced values (as opposed to the memory addresses). Function equality +// cannot be determined and will always fail. +func Equalf(t TestingT, expected interface{}, actual interface{}, msg string, args ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.Equalf(t, expected, actual, msg, args...) { + return + } + t.FailNow() +} + +// Error asserts that a function returned an error (i.e. not `nil`). +// +// actualObj, err := SomeFunction() +// if assert.Error(t, err) { +// assert.Equal(t, expectedError, err) +// } +func Error(t TestingT, err error, msgAndArgs ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.Error(t, err, msgAndArgs...) { + return + } + t.FailNow() +} + +// ErrorContains asserts that a function returned an error (i.e. not `nil`) +// and that the error contains the specified substring. +// +// actualObj, err := SomeFunction() +// assert.ErrorContains(t, err, expectedErrorSubString) +func ErrorContains(t TestingT, theError error, contains string, msgAndArgs ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.ErrorContains(t, theError, contains, msgAndArgs...) { + return + } + t.FailNow() +} + +// ErrorContainsf asserts that a function returned an error (i.e. not `nil`) +// and that the error contains the specified substring. +// +// actualObj, err := SomeFunction() +// assert.ErrorContainsf(t, err, expectedErrorSubString, "error message %s", "formatted") +func ErrorContainsf(t TestingT, theError error, contains string, msg string, args ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.ErrorContainsf(t, theError, contains, msg, args...) { + return + } + t.FailNow() +} + +// Errorf asserts that a function returned an error (i.e. not `nil`). +// +// actualObj, err := SomeFunction() +// if assert.Errorf(t, err, "error message %s", "formatted") { +// assert.Equal(t, expectedErrorf, err) +// } +func Errorf(t TestingT, err error, msg string, args ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.Errorf(t, err, msg, args...) { + return + } + t.FailNow() +} + +// Eventually asserts that given condition will be met in waitFor time, +// periodically checking target function each tick. +// +// assert.Eventually(t, func() bool { return true; }, time.Second, 10*time.Millisecond) +func Eventually(t TestingT, condition func() bool, waitFor time.Duration, tick time.Duration, msgAndArgs ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.Eventually(t, condition, waitFor, tick, msgAndArgs...) { + return + } + t.FailNow() +} + +// Eventuallyf asserts that given condition will be met in waitFor time, +// periodically checking target function each tick. +// +// assert.Eventuallyf(t, func() bool { return true; }, time.Second, 10*time.Millisecond, "error message %s", "formatted") +func Eventuallyf(t TestingT, condition func() bool, waitFor time.Duration, tick time.Duration, msg string, args ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.Eventuallyf(t, condition, waitFor, tick, msg, args...) { + return + } + t.FailNow() +} + +// Fail reports a failure through +func Fail(t TestingT, failureMessage string, msgAndArgs ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.Fail(t, failureMessage, msgAndArgs...) { + return + } + t.FailNow() +} + +// FailNow fails test +func FailNow(t TestingT, failureMessage string, msgAndArgs ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.FailNow(t, failureMessage, msgAndArgs...) { + return + } + t.FailNow() +} + +// FailNowf fails test +func FailNowf(t TestingT, failureMessage string, msg string, args ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.FailNowf(t, failureMessage, msg, args...) { + return + } + t.FailNow() +} + +// Failf reports a failure through +func Failf(t TestingT, failureMessage string, msg string, args ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.Failf(t, failureMessage, msg, args...) { + return + } + t.FailNow() +} + +// False asserts that the specified value is false. +// +// assert.False(t, myBool) +func False(t TestingT, value bool, msgAndArgs ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.False(t, value, msgAndArgs...) { + return + } + t.FailNow() +} + +// Falsef asserts that the specified value is false. +// +// assert.Falsef(t, myBool, "error message %s", "formatted") +func Falsef(t TestingT, value bool, msg string, args ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.Falsef(t, value, msg, args...) { + return + } + t.FailNow() +} + +// Greater asserts that the first element is greater than the second +// +// assert.Greater(t, 2, 1) +// assert.Greater(t, float64(2), float64(1)) +// assert.Greater(t, "b", "a") +func Greater(t TestingT, e1 interface{}, e2 interface{}, msgAndArgs ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.Greater(t, e1, e2, msgAndArgs...) { + return + } + t.FailNow() +} + +// GreaterOrEqual asserts that the first element is greater than or equal to the second +// +// assert.GreaterOrEqual(t, 2, 1) +// assert.GreaterOrEqual(t, 2, 2) +// assert.GreaterOrEqual(t, "b", "a") +// assert.GreaterOrEqual(t, "b", "b") +func GreaterOrEqual(t TestingT, e1 interface{}, e2 interface{}, msgAndArgs ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.GreaterOrEqual(t, e1, e2, msgAndArgs...) { + return + } + t.FailNow() +} + +// GreaterOrEqualf asserts that the first element is greater than or equal to the second +// +// assert.GreaterOrEqualf(t, 2, 1, "error message %s", "formatted") +// assert.GreaterOrEqualf(t, 2, 2, "error message %s", "formatted") +// assert.GreaterOrEqualf(t, "b", "a", "error message %s", "formatted") +// assert.GreaterOrEqualf(t, "b", "b", "error message %s", "formatted") +func GreaterOrEqualf(t TestingT, e1 interface{}, e2 interface{}, msg string, args ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.GreaterOrEqualf(t, e1, e2, msg, args...) { + return + } + t.FailNow() +} + +// Greaterf asserts that the first element is greater than the second +// +// assert.Greaterf(t, 2, 1, "error message %s", "formatted") +// assert.Greaterf(t, float64(2), float64(1), "error message %s", "formatted") +// assert.Greaterf(t, "b", "a", "error message %s", "formatted") +func Greaterf(t TestingT, e1 interface{}, e2 interface{}, msg string, args ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.Greaterf(t, e1, e2, msg, args...) { + return + } + t.FailNow() +} + +// InDelta asserts that the two numerals are within delta of each other. +// +// assert.InDelta(t, math.Pi, 22/7.0, 0.01) +func InDelta(t TestingT, expected interface{}, actual interface{}, delta float64, msgAndArgs ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.InDelta(t, expected, actual, delta, msgAndArgs...) { + return + } + t.FailNow() +} + +// InDeltaf asserts that the two numerals are within delta of each other. +// +// assert.InDeltaf(t, math.Pi, 22/7.0, 0.01, "error message %s", "formatted") +func InDeltaf(t TestingT, expected interface{}, actual interface{}, delta float64, msg string, args ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.InDeltaf(t, expected, actual, delta, msg, args...) { + return + } + t.FailNow() +} + +// IsType asserts that the specified objects are of the same type. +func IsType(t TestingT, expectedType interface{}, object interface{}, msgAndArgs ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.IsType(t, expectedType, object, msgAndArgs...) { + return + } + t.FailNow() +} + +// IsTypef asserts that the specified objects are of the same type. +func IsTypef(t TestingT, expectedType interface{}, object interface{}, msg string, args ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.IsTypef(t, expectedType, object, msg, args...) { + return + } + t.FailNow() +} + +// Len asserts that the specified object has specific length. +// Len also fails if the object has a type that len() not accept. +// +// assert.Len(t, mySlice, 3) +func Len(t TestingT, object interface{}, length int, msgAndArgs ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.Len(t, object, length, msgAndArgs...) { + return + } + t.FailNow() +} + +// Lenf asserts that the specified object has specific length. +// Lenf also fails if the object has a type that len() not accept. +// +// assert.Lenf(t, mySlice, 3, "error message %s", "formatted") +func Lenf(t TestingT, object interface{}, length int, msg string, args ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.Lenf(t, object, length, msg, args...) { + return + } + t.FailNow() +} + +// Less asserts that the first element is less than the second +// +// assert.Less(t, 1, 2) +// assert.Less(t, float64(1), float64(2)) +// assert.Less(t, "a", "b") +func Less(t TestingT, e1 interface{}, e2 interface{}, msgAndArgs ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.Less(t, e1, e2, msgAndArgs...) { + return + } + t.FailNow() +} + +// LessOrEqual asserts that the first element is less than or equal to the second +// +// assert.LessOrEqual(t, 1, 2) +// assert.LessOrEqual(t, 2, 2) +// assert.LessOrEqual(t, "a", "b") +// assert.LessOrEqual(t, "b", "b") +func LessOrEqual(t TestingT, e1 interface{}, e2 interface{}, msgAndArgs ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.LessOrEqual(t, e1, e2, msgAndArgs...) { + return + } + t.FailNow() +} + +// LessOrEqualf asserts that the first element is less than or equal to the second +// +// assert.LessOrEqualf(t, 1, 2, "error message %s", "formatted") +// assert.LessOrEqualf(t, 2, 2, "error message %s", "formatted") +// assert.LessOrEqualf(t, "a", "b", "error message %s", "formatted") +// assert.LessOrEqualf(t, "b", "b", "error message %s", "formatted") +func LessOrEqualf(t TestingT, e1 interface{}, e2 interface{}, msg string, args ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.LessOrEqualf(t, e1, e2, msg, args...) { + return + } + t.FailNow() +} + +// Lessf asserts that the first element is less than the second +// +// assert.Lessf(t, 1, 2, "error message %s", "formatted") +// assert.Lessf(t, float64(1), float64(2), "error message %s", "formatted") +// assert.Lessf(t, "a", "b", "error message %s", "formatted") +func Lessf(t TestingT, e1 interface{}, e2 interface{}, msg string, args ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.Lessf(t, e1, e2, msg, args...) { + return + } + t.FailNow() +} + +// Negative asserts that the specified element is negative +// +// assert.Negative(t, -1) +// assert.Negative(t, -1.23) +func Negative(t TestingT, e interface{}, msgAndArgs ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.Negative(t, e, msgAndArgs...) { + return + } + t.FailNow() +} + +// Negativef asserts that the specified element is negative +// +// assert.Negativef(t, -1, "error message %s", "formatted") +// assert.Negativef(t, -1.23, "error message %s", "formatted") +func Negativef(t TestingT, e interface{}, msg string, args ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.Negativef(t, e, msg, args...) { + return + } + t.FailNow() +} + +// Nil asserts that the specified object is nil. +// +// assert.Nil(t, err) +func Nil(t TestingT, object interface{}, msgAndArgs ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.Nil(t, object, msgAndArgs...) { + return + } + t.FailNow() +} + +// Nilf asserts that the specified object is nil. +// +// assert.Nilf(t, err, "error message %s", "formatted") +func Nilf(t TestingT, object interface{}, msg string, args ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.Nilf(t, object, msg, args...) { + return + } + t.FailNow() +} + +// NoError asserts that a function returned no error (i.e. `nil`). +// +// actualObj, err := SomeFunction() +// if assert.NoError(t, err) { +// assert.Equal(t, expectedObj, actualObj) +// } +func NoError(t TestingT, err error, msgAndArgs ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.NoError(t, err, msgAndArgs...) { + return + } + t.FailNow() +} + +// NoErrorf asserts that a function returned no error (i.e. `nil`). +// +// actualObj, err := SomeFunction() +// if assert.NoErrorf(t, err, "error message %s", "formatted") { +// assert.Equal(t, expectedObj, actualObj) +// } +func NoErrorf(t TestingT, err error, msg string, args ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.NoErrorf(t, err, msg, args...) { + return + } + t.FailNow() +} + +// NotContains asserts that the specified string, list(array, slice...) or map does NOT contain the +// specified substring or element. +// +// assert.NotContains(t, "Hello World", "Earth") +// assert.NotContains(t, ["Hello", "World"], "Earth") +// assert.NotContains(t, {"Hello": "World"}, "Earth") +func NotContains(t TestingT, s interface{}, contains interface{}, msgAndArgs ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.NotContains(t, s, contains, msgAndArgs...) { + return + } + t.FailNow() +} + +// NotContainsf asserts that the specified string, list(array, slice...) or map does NOT contain the +// specified substring or element. +// +// assert.NotContainsf(t, "Hello World", "Earth", "error message %s", "formatted") +// assert.NotContainsf(t, ["Hello", "World"], "Earth", "error message %s", "formatted") +// assert.NotContainsf(t, {"Hello": "World"}, "Earth", "error message %s", "formatted") +func NotContainsf(t TestingT, s interface{}, contains interface{}, msg string, args ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.NotContainsf(t, s, contains, msg, args...) { + return + } + t.FailNow() +} + +// NotEqual asserts that the specified values are NOT equal. +// +// assert.NotEqual(t, obj1, obj2) +// +// Pointer variable equality is determined based on the equality of the +// referenced values (as opposed to the memory addresses). +func NotEqual(t TestingT, expected interface{}, actual interface{}, msgAndArgs ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.NotEqual(t, expected, actual, msgAndArgs...) { + return + } + t.FailNow() +} + +// NotEqualValues asserts that two objects are not equal even when converted to the same type +// +// assert.NotEqualValues(t, obj1, obj2) +func NotEqualValues(t TestingT, expected interface{}, actual interface{}, msgAndArgs ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.NotEqualValues(t, expected, actual, msgAndArgs...) { + return + } + t.FailNow() +} + +// NotEqualValuesf asserts that two objects are not equal even when converted to the same type +// +// assert.NotEqualValuesf(t, obj1, obj2, "error message %s", "formatted") +func NotEqualValuesf(t TestingT, expected interface{}, actual interface{}, msg string, args ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.NotEqualValuesf(t, expected, actual, msg, args...) { + return + } + t.FailNow() +} + +// NotEqualf asserts that the specified values are NOT equal. +// +// assert.NotEqualf(t, obj1, obj2, "error message %s", "formatted") +// +// Pointer variable equality is determined based on the equality of the +// referenced values (as opposed to the memory addresses). +func NotEqualf(t TestingT, expected interface{}, actual interface{}, msg string, args ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.NotEqualf(t, expected, actual, msg, args...) { + return + } + t.FailNow() +} + +// NotNil asserts that the specified object is not nil. +// +// assert.NotNil(t, err) +func NotNil(t TestingT, object interface{}, msgAndArgs ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.NotNil(t, object, msgAndArgs...) { + return + } + t.FailNow() +} + +// NotNilf asserts that the specified object is not nil. +// +// assert.NotNilf(t, err, "error message %s", "formatted") +func NotNilf(t TestingT, object interface{}, msg string, args ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.NotNilf(t, object, msg, args...) { + return + } + t.FailNow() +} + +// Positive asserts that the specified element is positive +// +// assert.Positive(t, 1) +// assert.Positive(t, 1.23) +func Positive(t TestingT, e interface{}, msgAndArgs ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.Positive(t, e, msgAndArgs...) { + return + } + t.FailNow() +} + +// Positivef asserts that the specified element is positive +// +// assert.Positivef(t, 1, "error message %s", "formatted") +// assert.Positivef(t, 1.23, "error message %s", "formatted") +func Positivef(t TestingT, e interface{}, msg string, args ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.Positivef(t, e, msg, args...) { + return + } + t.FailNow() +} + +// True asserts that the specified value is true. +// +// assert.True(t, myBool) +func True(t TestingT, value bool, msgAndArgs ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.True(t, value, msgAndArgs...) { + return + } + t.FailNow() +} + +// Truef asserts that the specified value is true. +// +// assert.Truef(t, myBool, "error message %s", "formatted") +func Truef(t TestingT, value bool, msg string, args ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.Truef(t, value, msg, args...) { + return + } + t.FailNow() +} + +// WithinDuration asserts that the two times are within duration delta of each other. +// +// assert.WithinDuration(t, time.Now(), time.Now(), 10*time.Second) +func WithinDuration(t TestingT, expected time.Time, actual time.Time, delta time.Duration, msgAndArgs ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.WithinDuration(t, expected, actual, delta, msgAndArgs...) { + return + } + t.FailNow() +} + +// WithinDurationf asserts that the two times are within duration delta of each other. +// +// assert.WithinDurationf(t, time.Now(), time.Now(), 10*time.Second, "error message %s", "formatted") +func WithinDurationf(t TestingT, expected time.Time, actual time.Time, delta time.Duration, msg string, args ...interface{}) { + if h, ok := t.(tHelper); ok { + h.Helper() + } + if assert.WithinDurationf(t, expected, actual, delta, msg, args...) { + return + } + t.FailNow() +} diff --git a/vendor/go.mongodb.org/mongo-driver/mongo/bulk_write.go b/vendor/go.mongodb.org/mongo-driver/mongo/bulk_write.go index 3fdb67b9a..40f1181e0 100644 --- a/vendor/go.mongodb.org/mongo-driver/mongo/bulk_write.go +++ b/vendor/go.mongodb.org/mongo-driver/mongo/bulk_write.go @@ -186,7 +186,7 @@ func (bw *bulkWrite) runInsert(ctx context.Context, batch bulkWriteBatch) (opera Database(bw.collection.db.name).Collection(bw.collection.name). Deployment(bw.collection.client.deployment).Crypt(bw.collection.client.cryptFLE). ServerAPI(bw.collection.client.serverAPI).Timeout(bw.collection.client.timeout). - Logger(bw.collection.client.logger) + Logger(bw.collection.client.logger).Authenticator(bw.collection.client.authenticator) if bw.comment != nil { comment, err := marshalValue(bw.comment, bw.collection.bsonOpts, bw.collection.registry) if err != nil { @@ -256,7 +256,7 @@ func (bw *bulkWrite) runDelete(ctx context.Context, batch bulkWriteBatch) (opera Database(bw.collection.db.name).Collection(bw.collection.name). Deployment(bw.collection.client.deployment).Crypt(bw.collection.client.cryptFLE).Hint(hasHint). ServerAPI(bw.collection.client.serverAPI).Timeout(bw.collection.client.timeout). - Logger(bw.collection.client.logger) + Logger(bw.collection.client.logger).Authenticator(bw.collection.client.authenticator) if bw.comment != nil { comment, err := marshalValue(bw.comment, bw.collection.bsonOpts, bw.collection.registry) if err != nil { @@ -387,7 +387,8 @@ func (bw *bulkWrite) runUpdate(ctx context.Context, batch bulkWriteBatch) (opera Database(bw.collection.db.name).Collection(bw.collection.name). Deployment(bw.collection.client.deployment).Crypt(bw.collection.client.cryptFLE).Hint(hasHint). ArrayFilters(hasArrayFilters).ServerAPI(bw.collection.client.serverAPI). - Timeout(bw.collection.client.timeout).Logger(bw.collection.client.logger) + Timeout(bw.collection.client.timeout).Logger(bw.collection.client.logger). + Authenticator(bw.collection.client.authenticator) if bw.comment != nil { comment, err := marshalValue(bw.comment, bw.collection.bsonOpts, bw.collection.registry) if err != nil { diff --git a/vendor/go.mongodb.org/mongo-driver/mongo/change_stream.go b/vendor/go.mongodb.org/mongo-driver/mongo/change_stream.go index 8d0a2031d..3ea8baf1f 100644 --- a/vendor/go.mongodb.org/mongo-driver/mongo/change_stream.go +++ b/vendor/go.mongodb.org/mongo-driver/mongo/change_stream.go @@ -137,7 +137,8 @@ func newChangeStream(ctx context.Context, config changeStreamConfig, pipeline in ReadPreference(config.readPreference).ReadConcern(config.readConcern). Deployment(cs.client.deployment).ClusterClock(cs.client.clock). CommandMonitor(cs.client.monitor).Session(cs.sess).ServerSelector(cs.selector).Retry(driver.RetryNone). - ServerAPI(cs.client.serverAPI).Crypt(config.crypt).Timeout(cs.client.timeout) + ServerAPI(cs.client.serverAPI).Crypt(config.crypt).Timeout(cs.client.timeout). + Authenticator(cs.client.authenticator) if cs.options.Collation != nil { cs.aggregate.Collation(bsoncore.Document(cs.options.Collation.ToDocument())) diff --git a/vendor/go.mongodb.org/mongo-driver/mongo/client.go b/vendor/go.mongodb.org/mongo-driver/mongo/client.go index 4266412aa..0ce6d2e24 100644 --- a/vendor/go.mongodb.org/mongo-driver/mongo/client.go +++ b/vendor/go.mongodb.org/mongo-driver/mongo/client.go @@ -26,6 +26,7 @@ import ( "go.mongodb.org/mongo-driver/mongo/writeconcern" "go.mongodb.org/mongo-driver/x/bsonx/bsoncore" "go.mongodb.org/mongo-driver/x/mongo/driver" + "go.mongodb.org/mongo-driver/x/mongo/driver/auth" "go.mongodb.org/mongo-driver/x/mongo/driver/mongocrypt" mcopts "go.mongodb.org/mongo-driver/x/mongo/driver/mongocrypt/options" "go.mongodb.org/mongo-driver/x/mongo/driver/operation" @@ -79,6 +80,7 @@ type Client struct { metadataClientFLE *Client internalClientFLE *Client encryptedFieldsMap map[string]interface{} + authenticator driver.Authenticator } // Connect creates a new Client and then initializes it using the Connect method. This is equivalent to calling @@ -209,10 +211,22 @@ func NewClient(opts ...*options.ClientOptions) (*Client, error) { clientOpt.SetMaxPoolSize(defaultMaxPoolSize) } - cfg, err := topology.NewConfig(clientOpt, client.clock) + if clientOpt.Auth != nil { + client.authenticator, err = auth.CreateAuthenticator( + clientOpt.Auth.AuthMechanism, + topology.ConvertCreds(clientOpt.Auth), + clientOpt.HTTPClient, + ) + if err != nil { + return nil, fmt.Errorf("error creating authenticator: %w", err) + } + } + + cfg, err := topology.NewConfigWithAuthenticator(clientOpt, client.clock, client.authenticator) if err != nil { return nil, err } + client.serverAPI = topology.ServerAPIFromServerOptions(cfg.ServerOpts) if client.deployment == nil { @@ -690,7 +704,7 @@ func (c *Client) ListDatabases(ctx context.Context, filter interface{}, opts ... op := operation.NewListDatabases(filterDoc). Session(sess).ReadPreference(c.readPreference).CommandMonitor(c.monitor). ServerSelector(selector).ClusterClock(c.clock).Database("admin").Deployment(c.deployment).Crypt(c.cryptFLE). - ServerAPI(c.serverAPI).Timeout(c.timeout) + ServerAPI(c.serverAPI).Timeout(c.timeout).Authenticator(c.authenticator) if ldo.NameOnly != nil { op = op.NameOnly(*ldo.NameOnly) diff --git a/vendor/go.mongodb.org/mongo-driver/mongo/client_encryption.go b/vendor/go.mongodb.org/mongo-driver/mongo/client_encryption.go index b51f57b47..352dac1f0 100644 --- a/vendor/go.mongodb.org/mongo-driver/mongo/client_encryption.go +++ b/vendor/go.mongodb.org/mongo-driver/mongo/client_encryption.go @@ -204,7 +204,12 @@ func transformExplicitEncryptionOptions(opts ...*options.EncryptOptions) *mcopts if eo.RangeOptions.Precision != nil { transformedRange.Precision = eo.RangeOptions.Precision } - transformedRange.Sparsity = eo.RangeOptions.Sparsity + if eo.RangeOptions.Sparsity != nil { + transformedRange.Sparsity = eo.RangeOptions.Sparsity + } + if eo.RangeOptions.TrimFactor != nil { + transformedRange.TrimFactor = eo.RangeOptions.TrimFactor + } transformed.SetRangeOptions(transformedRange) } return transformed @@ -230,8 +235,7 @@ func (ce *ClientEncryption) Encrypt(ctx context.Context, val bson.RawValue, // 2. An Aggregate Expression of this form: // {$and: [{$gt: [, ]}, {$lt: [, ]}] // $gt may also be $gte. $lt may also be $lte. -// Only supported for queryType "rangePreview" -// Beta: The Range algorithm is experimental only. It is not intended for public use. It is subject to breaking changes. +// Only supported for queryType "range" func (ce *ClientEncryption) EncryptExpression(ctx context.Context, expr interface{}, result interface{}, opts ...*options.EncryptOptions) error { transformed := transformExplicitEncryptionOptions(opts...) diff --git a/vendor/go.mongodb.org/mongo-driver/mongo/collection.go b/vendor/go.mongodb.org/mongo-driver/mongo/collection.go index 4cf6fd1a1..dbe238a9e 100644 --- a/vendor/go.mongodb.org/mongo-driver/mongo/collection.go +++ b/vendor/go.mongodb.org/mongo-driver/mongo/collection.go @@ -291,7 +291,8 @@ func (coll *Collection) insert(ctx context.Context, documents []interface{}, ServerSelector(selector).ClusterClock(coll.client.clock). Database(coll.db.name).Collection(coll.name). Deployment(coll.client.deployment).Crypt(coll.client.cryptFLE).Ordered(true). - ServerAPI(coll.client.serverAPI).Timeout(coll.client.timeout).Logger(coll.client.logger) + ServerAPI(coll.client.serverAPI).Timeout(coll.client.timeout).Logger(coll.client.logger). + Authenticator(coll.client.authenticator) imo := options.MergeInsertManyOptions(opts...) if imo.BypassDocumentValidation != nil && *imo.BypassDocumentValidation { op = op.BypassDocumentValidation(*imo.BypassDocumentValidation) @@ -471,7 +472,8 @@ func (coll *Collection) delete(ctx context.Context, filter interface{}, deleteOn ServerSelector(selector).ClusterClock(coll.client.clock). Database(coll.db.name).Collection(coll.name). Deployment(coll.client.deployment).Crypt(coll.client.cryptFLE).Ordered(true). - ServerAPI(coll.client.serverAPI).Timeout(coll.client.timeout).Logger(coll.client.logger) + ServerAPI(coll.client.serverAPI).Timeout(coll.client.timeout).Logger(coll.client.logger). + Authenticator(coll.client.authenticator) if do.Comment != nil { comment, err := marshalValue(do.Comment, coll.bsonOpts, coll.registry) if err != nil { @@ -588,7 +590,7 @@ func (coll *Collection) updateOrReplace(ctx context.Context, filter bsoncore.Doc Database(coll.db.name).Collection(coll.name). Deployment(coll.client.deployment).Crypt(coll.client.cryptFLE).Hint(uo.Hint != nil). ArrayFilters(uo.ArrayFilters != nil).Ordered(true).ServerAPI(coll.client.serverAPI). - Timeout(coll.client.timeout).Logger(coll.client.logger) + Timeout(coll.client.timeout).Logger(coll.client.logger).Authenticator(coll.client.authenticator) if uo.Let != nil { let, err := marshal(uo.Let, coll.bsonOpts, coll.registry) if err != nil { @@ -861,7 +863,8 @@ func aggregate(a aggregateParams) (cur *Cursor, err error) { ServerAPI(a.client.serverAPI). HasOutputStage(hasOutputStage). Timeout(a.client.timeout). - MaxTime(ao.MaxTime) + MaxTime(ao.MaxTime). + Authenticator(a.client.authenticator) // Omit "maxTimeMS" from operations that return a user-managed cursor to // prevent confusing "cursor not found" errors. To maintain existing @@ -992,7 +995,7 @@ func (coll *Collection) CountDocuments(ctx context.Context, filter interface{}, op := operation.NewAggregate(pipelineArr).Session(sess).ReadConcern(rc).ReadPreference(coll.readPreference). CommandMonitor(coll.client.monitor).ServerSelector(selector).ClusterClock(coll.client.clock).Database(coll.db.name). Collection(coll.name).Deployment(coll.client.deployment).Crypt(coll.client.cryptFLE).ServerAPI(coll.client.serverAPI). - Timeout(coll.client.timeout).MaxTime(countOpts.MaxTime) + Timeout(coll.client.timeout).MaxTime(countOpts.MaxTime).Authenticator(coll.client.authenticator) if countOpts.Collation != nil { op.Collation(bsoncore.Document(countOpts.Collation.ToDocument())) } @@ -1077,7 +1080,7 @@ func (coll *Collection) EstimatedDocumentCount(ctx context.Context, Database(coll.db.name).Collection(coll.name).CommandMonitor(coll.client.monitor). Deployment(coll.client.deployment).ReadConcern(rc).ReadPreference(coll.readPreference). ServerSelector(selector).Crypt(coll.client.cryptFLE).ServerAPI(coll.client.serverAPI). - Timeout(coll.client.timeout).MaxTime(co.MaxTime) + Timeout(coll.client.timeout).MaxTime(co.MaxTime).Authenticator(coll.client.authenticator) if co.Comment != nil { comment, err := marshalValue(co.Comment, coll.bsonOpts, coll.registry) @@ -1144,7 +1147,7 @@ func (coll *Collection) Distinct(ctx context.Context, fieldName string, filter i Database(coll.db.name).Collection(coll.name).CommandMonitor(coll.client.monitor). Deployment(coll.client.deployment).ReadConcern(rc).ReadPreference(coll.readPreference). ServerSelector(selector).Crypt(coll.client.cryptFLE).ServerAPI(coll.client.serverAPI). - Timeout(coll.client.timeout).MaxTime(option.MaxTime) + Timeout(coll.client.timeout).MaxTime(option.MaxTime).Authenticator(coll.client.authenticator) if option.Collation != nil { op.Collation(bsoncore.Document(option.Collation.ToDocument())) @@ -1257,7 +1260,7 @@ func (coll *Collection) find( ClusterClock(coll.client.clock).Database(coll.db.name).Collection(coll.name). Deployment(coll.client.deployment).Crypt(coll.client.cryptFLE).ServerAPI(coll.client.serverAPI). Timeout(coll.client.timeout).MaxTime(fo.MaxTime).Logger(coll.client.logger). - OmitCSOTMaxTimeMS(omitCSOTMaxTimeMS) + OmitCSOTMaxTimeMS(omitCSOTMaxTimeMS).Authenticator(coll.client.authenticator) cursorOpts := coll.client.createBaseCursorOptions() @@ -1521,7 +1524,7 @@ func (coll *Collection) FindOneAndDelete(ctx context.Context, filter interface{} } fod := options.MergeFindOneAndDeleteOptions(opts...) op := operation.NewFindAndModify(f).Remove(true).ServerAPI(coll.client.serverAPI).Timeout(coll.client.timeout). - MaxTime(fod.MaxTime) + MaxTime(fod.MaxTime).Authenticator(coll.client.authenticator) if fod.Collation != nil { op = op.Collation(bsoncore.Document(fod.Collation.ToDocument())) } @@ -1601,7 +1604,8 @@ func (coll *Collection) FindOneAndReplace(ctx context.Context, filter interface{ fo := options.MergeFindOneAndReplaceOptions(opts...) op := operation.NewFindAndModify(f).Update(bsoncore.Value{Type: bsontype.EmbeddedDocument, Data: r}). - ServerAPI(coll.client.serverAPI).Timeout(coll.client.timeout).MaxTime(fo.MaxTime) + ServerAPI(coll.client.serverAPI).Timeout(coll.client.timeout).MaxTime(fo.MaxTime).Authenticator(coll.client.authenticator) + if fo.BypassDocumentValidation != nil && *fo.BypassDocumentValidation { op = op.BypassDocumentValidation(*fo.BypassDocumentValidation) } @@ -1688,7 +1692,7 @@ func (coll *Collection) FindOneAndUpdate(ctx context.Context, filter interface{} fo := options.MergeFindOneAndUpdateOptions(opts...) op := operation.NewFindAndModify(f).ServerAPI(coll.client.serverAPI).Timeout(coll.client.timeout). - MaxTime(fo.MaxTime) + MaxTime(fo.MaxTime).Authenticator(coll.client.authenticator) u, err := marshalUpdateValue(update, coll.bsonOpts, coll.registry, true) if err != nil { @@ -1894,7 +1898,8 @@ func (coll *Collection) drop(ctx context.Context) error { ServerSelector(selector).ClusterClock(coll.client.clock). Database(coll.db.name).Collection(coll.name). Deployment(coll.client.deployment).Crypt(coll.client.cryptFLE). - ServerAPI(coll.client.serverAPI).Timeout(coll.client.timeout) + ServerAPI(coll.client.serverAPI).Timeout(coll.client.timeout). + Authenticator(coll.client.authenticator) err = op.Execute(ctx) // ignore namespace not found errors diff --git a/vendor/go.mongodb.org/mongo-driver/mongo/cursor.go b/vendor/go.mongodb.org/mongo-driver/mongo/cursor.go index c77d1109f..1e01e398d 100644 --- a/vendor/go.mongodb.org/mongo-driver/mongo/cursor.go +++ b/vendor/go.mongodb.org/mongo-driver/mongo/cursor.go @@ -286,8 +286,9 @@ func (c *Cursor) Close(ctx context.Context) error { } // All iterates the cursor and decodes each document into results. The results parameter must be a pointer to a slice. -// The slice pointed to by results will be completely overwritten. This method will close the cursor after retrieving -// all documents. If the cursor has been iterated, any previously iterated documents will not be included in results. +// The slice pointed to by results will be completely overwritten. A nil slice pointer will not be modified if the cursor +// has been closed, exhausted, or is empty. This method will close the cursor after retrieving all documents. If the +// cursor has been iterated, any previously iterated documents will not be included in results. // // This method requires driver version >= 1.1.0. func (c *Cursor) All(ctx context.Context, results interface{}) error { diff --git a/vendor/go.mongodb.org/mongo-driver/mongo/database.go b/vendor/go.mongodb.org/mongo-driver/mongo/database.go index 57c0186ec..5344c9641 100644 --- a/vendor/go.mongodb.org/mongo-driver/mongo/database.go +++ b/vendor/go.mongodb.org/mongo-driver/mongo/database.go @@ -189,7 +189,7 @@ func (db *Database) processRunCommand(ctx context.Context, cmd interface{}, ServerSelector(readSelect).ClusterClock(db.client.clock). Database(db.name).Deployment(db.client.deployment). Crypt(db.client.cryptFLE).ReadPreference(ro.ReadPreference).ServerAPI(db.client.serverAPI). - Timeout(db.client.timeout).Logger(db.client.logger), sess, nil + Timeout(db.client.timeout).Logger(db.client.logger).Authenticator(db.client.authenticator), sess, nil } // RunCommand executes the given command against the database. @@ -308,7 +308,7 @@ func (db *Database) Drop(ctx context.Context) error { Session(sess).WriteConcern(wc).CommandMonitor(db.client.monitor). ServerSelector(selector).ClusterClock(db.client.clock). Database(db.name).Deployment(db.client.deployment).Crypt(db.client.cryptFLE). - ServerAPI(db.client.serverAPI) + ServerAPI(db.client.serverAPI).Authenticator(db.client.authenticator) err = op.Execute(ctx) @@ -402,7 +402,7 @@ func (db *Database) ListCollections(ctx context.Context, filter interface{}, opt Session(sess).ReadPreference(db.readPreference).CommandMonitor(db.client.monitor). ServerSelector(selector).ClusterClock(db.client.clock). Database(db.name).Deployment(db.client.deployment).Crypt(db.client.cryptFLE). - ServerAPI(db.client.serverAPI).Timeout(db.client.timeout) + ServerAPI(db.client.serverAPI).Timeout(db.client.timeout).Authenticator(db.client.authenticator) cursorOpts := db.client.createBaseCursorOptions() @@ -679,7 +679,7 @@ func (db *Database) createCollection(ctx context.Context, name string, opts ...* func (db *Database) createCollectionOperation(name string, opts ...*options.CreateCollectionOptions) (*operation.Create, error) { cco := options.MergeCreateCollectionOptions(opts...) - op := operation.NewCreate(name).ServerAPI(db.client.serverAPI) + op := operation.NewCreate(name).ServerAPI(db.client.serverAPI).Authenticator(db.client.authenticator) if cco.Capped != nil { op.Capped(*cco.Capped) @@ -805,7 +805,8 @@ func (db *Database) CreateView(ctx context.Context, viewName, viewOn string, pip op := operation.NewCreate(viewName). ViewOn(viewOn). Pipeline(pipelineArray). - ServerAPI(db.client.serverAPI) + ServerAPI(db.client.serverAPI). + Authenticator(db.client.authenticator) cvo := options.MergeCreateViewOptions(opts...) if cvo.Collation != nil { op.Collation(bsoncore.Document(cvo.Collation.ToDocument())) diff --git a/vendor/go.mongodb.org/mongo-driver/mongo/description/server.go b/vendor/go.mongodb.org/mongo-driver/mongo/description/server.go index cf3942383..19f2760e2 100644 --- a/vendor/go.mongodb.org/mongo-driver/mongo/description/server.go +++ b/vendor/go.mongodb.org/mongo-driver/mongo/description/server.go @@ -316,21 +316,23 @@ func NewServer(addr address.Address, response bson.Raw) Server { desc.Kind = Standalone - if isReplicaSet { + switch { + case isReplicaSet: desc.Kind = RSGhost - } else if desc.SetName != "" { - if isWritablePrimary { + case desc.SetName != "": + switch { + case isWritablePrimary: desc.Kind = RSPrimary - } else if hidden { + case hidden: desc.Kind = RSMember - } else if secondary { + case secondary: desc.Kind = RSSecondary - } else if arbiterOnly { + case arbiterOnly: desc.Kind = RSArbiter - } else { + default: desc.Kind = RSMember } - } else if msg == "isdbgrid" { + case msg == "isdbgrid": desc.Kind = Mongos } diff --git a/vendor/go.mongodb.org/mongo-driver/mongo/index_view.go b/vendor/go.mongodb.org/mongo-driver/mongo/index_view.go index 8d3555d0b..db65f7507 100644 --- a/vendor/go.mongodb.org/mongo-driver/mongo/index_view.go +++ b/vendor/go.mongodb.org/mongo-driver/mongo/index_view.go @@ -94,7 +94,7 @@ func (iv IndexView) List(ctx context.Context, opts ...*options.ListIndexesOption ServerSelector(selector).ClusterClock(iv.coll.client.clock). Database(iv.coll.db.name).Collection(iv.coll.name). Deployment(iv.coll.client.deployment).ServerAPI(iv.coll.client.serverAPI). - Timeout(iv.coll.client.timeout) + Timeout(iv.coll.client.timeout).Authenticator(iv.coll.client.authenticator) cursorOpts := iv.coll.client.createBaseCursorOptions() @@ -262,7 +262,7 @@ func (iv IndexView) CreateMany(ctx context.Context, models []IndexModel, opts .. Session(sess).WriteConcern(wc).ClusterClock(iv.coll.client.clock). Database(iv.coll.db.name).Collection(iv.coll.name).CommandMonitor(iv.coll.client.monitor). Deployment(iv.coll.client.deployment).ServerSelector(selector).ServerAPI(iv.coll.client.serverAPI). - Timeout(iv.coll.client.timeout).MaxTime(option.MaxTime) + Timeout(iv.coll.client.timeout).MaxTime(option.MaxTime).Authenticator(iv.coll.client.authenticator) if option.CommitQuorum != nil { commitQuorum, err := marshalValue(option.CommitQuorum, iv.coll.bsonOpts, iv.coll.registry) if err != nil { @@ -367,7 +367,7 @@ func (iv IndexView) createOptionsDoc(opts *options.IndexOptions) (bsoncore.Docum return optsDoc, nil } -func (iv IndexView) drop(ctx context.Context, name string, opts ...*options.DropIndexesOptions) (bson.Raw, error) { +func (iv IndexView) drop(ctx context.Context, index any, opts ...*options.DropIndexesOptions) (bson.Raw, error) { if ctx == nil { ctx = context.Background() } @@ -397,12 +397,12 @@ func (iv IndexView) drop(ctx context.Context, name string, opts ...*options.Drop // TODO(GODRIVER-3038): This operation should pass CSE to the DropIndexes // Crypt setter to be applied to the operation. - op := operation.NewDropIndexes(name). - Session(sess).WriteConcern(wc).CommandMonitor(iv.coll.client.monitor). + op := operation.NewDropIndexes(index).Session(sess).WriteConcern(wc).CommandMonitor(iv.coll.client.monitor). ServerSelector(selector).ClusterClock(iv.coll.client.clock). Database(iv.coll.db.name).Collection(iv.coll.name). Deployment(iv.coll.client.deployment).ServerAPI(iv.coll.client.serverAPI). - Timeout(iv.coll.client.timeout).MaxTime(dio.MaxTime) + Timeout(iv.coll.client.timeout).MaxTime(dio.MaxTime). + Authenticator(iv.coll.client.authenticator) err = op.Execute(ctx) if err != nil { @@ -435,6 +435,20 @@ func (iv IndexView) DropOne(ctx context.Context, name string, opts ...*options.D return iv.drop(ctx, name, opts...) } +// DropOneWithKey drops a collection index by key using the dropIndexes operation. If the operation succeeds, this returns +// a BSON document in the form {nIndexesWas: }. The "nIndexesWas" field in the response contains the number of +// indexes that existed prior to the drop. +// +// This function is useful to drop an index using its key specification instead of its name. +func (iv IndexView) DropOneWithKey(ctx context.Context, keySpecDocument interface{}, opts ...*options.DropIndexesOptions) (bson.Raw, error) { + doc, err := marshal(keySpecDocument, iv.coll.bsonOpts, iv.coll.registry) + if err != nil { + return nil, err + } + + return iv.drop(ctx, doc, opts...) +} + // DropAll executes a dropIndexes operation to drop all indexes on the collection. If the operation succeeds, this // returns a BSON document in the form {nIndexesWas: }. The "nIndexesWas" field in the response contains the // number of indexes that existed prior to the drop. diff --git a/vendor/go.mongodb.org/mongo-driver/mongo/integration/mtest/csfle_enabled.go b/vendor/go.mongodb.org/mongo-driver/mongo/integration/mtest/csfle_enabled.go new file mode 100644 index 000000000..588e9ad6d --- /dev/null +++ b/vendor/go.mongodb.org/mongo-driver/mongo/integration/mtest/csfle_enabled.go @@ -0,0 +1,16 @@ +// Copyright (C) MongoDB, Inc. 2022-present. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +//go:build cse +// +build cse + +package mtest + +// IsCSFLEEnabled returns true if driver is built with Client Side Field Level Encryption support. +// Client Side Field Level Encryption support is enabled with the cse build tag. +func IsCSFLEEnabled() bool { + return true +} diff --git a/vendor/go.mongodb.org/mongo-driver/mongo/integration/mtest/csfle_not_enabled.go b/vendor/go.mongodb.org/mongo-driver/mongo/integration/mtest/csfle_not_enabled.go new file mode 100644 index 000000000..289cf5ce6 --- /dev/null +++ b/vendor/go.mongodb.org/mongo-driver/mongo/integration/mtest/csfle_not_enabled.go @@ -0,0 +1,16 @@ +// Copyright (C) MongoDB, Inc. 2022-present. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +//go:build !cse +// +build !cse + +package mtest + +// IsCSFLEEnabled returns true if driver is built with Client Side Field Level Encryption support. +// Client Side Field Level Encryption support is enabled with the cse build tag. +func IsCSFLEEnabled() bool { + return false +} diff --git a/vendor/go.mongodb.org/mongo-driver/mongo/integration/mtest/deployment_helpers.go b/vendor/go.mongodb.org/mongo-driver/mongo/integration/mtest/deployment_helpers.go new file mode 100644 index 000000000..299fae567 --- /dev/null +++ b/vendor/go.mongodb.org/mongo-driver/mongo/integration/mtest/deployment_helpers.go @@ -0,0 +1,120 @@ +// Copyright (C) MongoDB, Inc. 2017-present. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +package mtest + +import ( + "go.mongodb.org/mongo-driver/bson" +) + +// BatchIdentifier specifies the keyword to identify the batch in a cursor response. +type BatchIdentifier string + +// These constants specify valid values for BatchIdentifier. +const ( + FirstBatch BatchIdentifier = "firstBatch" + NextBatch BatchIdentifier = "nextBatch" +) + +// CommandError is a representation of a command error from the server. +type CommandError struct { + Code int32 + Message string + Name string + Labels []string +} + +// WriteError is a representation of a write error from the server. +type WriteError struct { + Index int + Code int + Message string +} + +// WriteConcernError is a representation of a write concern error from the server. +type WriteConcernError struct { + Name string `bson:"codeName"` + Code int `bson:"code"` + Message string `bson:"errmsg"` + Details bson.Raw `bson:"errInfo"` +} + +// CreateCursorResponse creates a response for a cursor command. +func CreateCursorResponse(cursorID int64, ns string, identifier BatchIdentifier, batch ...bson.D) bson.D { + batchArr := bson.A{} + for _, doc := range batch { + batchArr = append(batchArr, doc) + } + + return bson.D{ + {"ok", 1}, + {"cursor", bson.D{ + {"id", cursorID}, + {"ns", ns}, + {string(identifier), batchArr}, + }}, + } +} + +// CreateCommandErrorResponse creates a response with a command error. +func CreateCommandErrorResponse(ce CommandError) bson.D { + res := bson.D{ + {"ok", 0}, + {"code", ce.Code}, + {"errmsg", ce.Message}, + {"codeName", ce.Name}, + } + if len(ce.Labels) > 0 { + var labelsArr bson.A + for _, label := range ce.Labels { + labelsArr = append(labelsArr, label) + } + res = append(res, bson.E{Key: "errorLabels", Value: labelsArr}) + } + return res +} + +// CreateWriteErrorsResponse creates a response with one or more write errors. +func CreateWriteErrorsResponse(writeErrorrs ...WriteError) bson.D { + arr := make(bson.A, len(writeErrorrs)) + for idx, we := range writeErrorrs { + arr[idx] = bson.D{ + {"index", we.Index}, + {"code", we.Code}, + {"errmsg", we.Message}, + } + } + + return bson.D{ + {"ok", 1}, + {"writeErrors", arr}, + } +} + +// CreateWriteConcernErrorResponse creates a response with a write concern error. +func CreateWriteConcernErrorResponse(wce WriteConcernError) bson.D { + wceDoc := bson.D{ + {"code", wce.Code}, + {"codeName", wce.Name}, + {"errmsg", wce.Message}, + } + if len(wce.Details) > 0 { + wceDoc = append(wceDoc, bson.E{Key: "errInfo", Value: wce.Details}) + } + + return bson.D{ + {"ok", 1}, + {"writeConcernError", wceDoc}, + } +} + +// CreateSuccessResponse creates a response for a successful operation with the given elements. +func CreateSuccessResponse(elems ...bson.E) bson.D { + res := bson.D{ + {"ok", 1}, + } + return append(res, elems...) +} diff --git a/vendor/go.mongodb.org/mongo-driver/mongo/integration/mtest/doc.go b/vendor/go.mongodb.org/mongo-driver/mongo/integration/mtest/doc.go new file mode 100644 index 000000000..9d4ae6f32 --- /dev/null +++ b/vendor/go.mongodb.org/mongo-driver/mongo/integration/mtest/doc.go @@ -0,0 +1,9 @@ +// Copyright (C) MongoDB, Inc. 2017-present. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +// Package mtest is unstable and there is no backward compatibility guarantee. +// It is experimental and subject to change. +package mtest diff --git a/vendor/go.mongodb.org/mongo-driver/mongo/integration/mtest/global_state.go b/vendor/go.mongodb.org/mongo-driver/mongo/integration/mtest/global_state.go new file mode 100644 index 000000000..adb262203 --- /dev/null +++ b/vendor/go.mongodb.org/mongo-driver/mongo/integration/mtest/global_state.go @@ -0,0 +1,95 @@ +// Copyright (C) MongoDB, Inc. 2017-present. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +package mtest + +import ( + "context" + "fmt" + + "go.mongodb.org/mongo-driver/bson" + "go.mongodb.org/mongo-driver/mongo" + "go.mongodb.org/mongo-driver/x/mongo/driver/connstring" + "go.mongodb.org/mongo-driver/x/mongo/driver/topology" +) + +// AuthEnabled returns whether or not the cluster requires auth. +func AuthEnabled() bool { + return testContext.authEnabled +} + +// SSLEnabled returns whether or not the cluster requires SSL. +func SSLEnabled() bool { + return testContext.sslEnabled +} + +// ClusterTopologyKind returns the topology kind of the cluster under test. +func ClusterTopologyKind() TopologyKind { + return testContext.topoKind +} + +// ClusterURI returns the connection string for the cluster. +func ClusterURI() string { + return testContext.connString.Original +} + +// Serverless returns whether the test is running against a serverless instance. +func Serverless() bool { + return testContext.serverless +} + +// SingleMongosLoadBalancerURI returns the URI for a load balancer fronting a single mongos. This will only be set +// if the cluster is load balanced. +func SingleMongosLoadBalancerURI() string { + return testContext.singleMongosLoadBalancerURI +} + +// MultiMongosLoadBalancerURI returns the URI for a load balancer fronting multiple mongoses. This will only be set +// if the cluster is load balanced. +func MultiMongosLoadBalancerURI() string { + return testContext.multiMongosLoadBalancerURI +} + +// ClusterConnString returns the parsed ConnString for the cluster. +func ClusterConnString() *connstring.ConnString { + return testContext.connString +} + +// GlobalClient returns a Client connected to the cluster configured with read concern majority, write concern majority, +// and read preference primary. +func GlobalClient() *mongo.Client { + return testContext.client +} + +// GlobalTopology returns a Topology that's connected to the cluster. +func GlobalTopology() *topology.Topology { + return testContext.topo +} + +// ServerVersion returns the server version of the cluster. This assumes that all nodes in the cluster have the same +// version. +func ServerVersion() string { + return testContext.serverVersion +} + +// SetFailPoint configures the provided fail point on the cluster under test using the provided Client. +func SetFailPoint(fp FailPoint, client *mongo.Client) error { + admin := client.Database("admin") + if err := admin.RunCommand(context.Background(), fp).Err(); err != nil { + return fmt.Errorf("error creating fail point: %w", err) + } + return nil +} + +// SetRawFailPoint configures the fail point represented by the fp parameter on the cluster under test using the +// provided Client +func SetRawFailPoint(fp bson.Raw, client *mongo.Client) error { + admin := client.Database("admin") + if err := admin.RunCommand(context.Background(), fp).Err(); err != nil { + return fmt.Errorf("error creating fail point: %w", err) + } + return nil +} diff --git a/vendor/go.mongodb.org/mongo-driver/mongo/integration/mtest/mongotest.go b/vendor/go.mongodb.org/mongo-driver/mongo/integration/mtest/mongotest.go new file mode 100644 index 000000000..41292e67a --- /dev/null +++ b/vendor/go.mongodb.org/mongo-driver/mongo/integration/mtest/mongotest.go @@ -0,0 +1,914 @@ +// Copyright (C) MongoDB, Inc. 2017-present. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +package mtest + +import ( + "context" + "errors" + "fmt" + "strings" + "sync" + "sync/atomic" + "testing" + "time" + + "go.mongodb.org/mongo-driver/bson" + "go.mongodb.org/mongo-driver/event" + "go.mongodb.org/mongo-driver/internal/assert" + "go.mongodb.org/mongo-driver/internal/csfle" + "go.mongodb.org/mongo-driver/mongo" + "go.mongodb.org/mongo-driver/mongo/options" + "go.mongodb.org/mongo-driver/mongo/readconcern" + "go.mongodb.org/mongo-driver/mongo/readpref" + "go.mongodb.org/mongo-driver/mongo/writeconcern" + "go.mongodb.org/mongo-driver/x/bsonx/bsoncore" + "go.mongodb.org/mongo-driver/x/mongo/driver" +) + +var ( + // MajorityWc is the majority write concern. + MajorityWc = writeconcern.New(writeconcern.WMajority()) + // PrimaryRp is the primary read preference. + PrimaryRp = readpref.Primary() + // SecondaryRp is the secondary read preference. + SecondaryRp = readpref.Secondary() + // LocalRc is the local read concern + LocalRc = readconcern.Local() + // MajorityRc is the majority read concern + MajorityRc = readconcern.Majority() +) + +const ( + namespaceExistsErrCode int32 = 48 +) + +// FailPoint is a representation of a server fail point. +// See https://github.com/mongodb/specifications/tree/HEAD/source/transactions/tests#server-fail-point +// for more information regarding fail points. +type FailPoint struct { + ConfigureFailPoint string `bson:"configureFailPoint"` + // Mode should be a string, FailPointMode, or map[string]interface{} + Mode interface{} `bson:"mode"` + Data FailPointData `bson:"data"` +} + +// FailPointMode is a representation of the Failpoint.Mode field. +type FailPointMode struct { + Times int32 `bson:"times"` + Skip int32 `bson:"skip"` +} + +// FailPointData is a representation of the FailPoint.Data field. +type FailPointData struct { + FailCommands []string `bson:"failCommands,omitempty"` + CloseConnection bool `bson:"closeConnection,omitempty"` + ErrorCode int32 `bson:"errorCode,omitempty"` + FailBeforeCommitExceptionCode int32 `bson:"failBeforeCommitExceptionCode,omitempty"` + ErrorLabels *[]string `bson:"errorLabels,omitempty"` + WriteConcernError *WriteConcernErrorData `bson:"writeConcernError,omitempty"` + BlockConnection bool `bson:"blockConnection,omitempty"` + BlockTimeMS int32 `bson:"blockTimeMS,omitempty"` + AppName string `bson:"appName,omitempty"` +} + +// WriteConcernErrorData is a representation of the FailPoint.Data.WriteConcern field. +type WriteConcernErrorData struct { + Code int32 `bson:"code"` + Name string `bson:"codeName"` + Errmsg string `bson:"errmsg"` + ErrorLabels *[]string `bson:"errorLabels,omitempty"` + ErrInfo bson.Raw `bson:"errInfo,omitempty"` +} + +// T is a wrapper around testing.T. +type T struct { + // connsCheckedOut is the net number of connections checked out during test execution. + // It must be accessed using the atomic package and should be at the beginning of the struct. + // - atomic bug: https://pkg.go.dev/sync/atomic#pkg-note-BUG + // - suggested layout: https://go101.org/article/memory-layout.html + connsCheckedOut int64 + + *testing.T + + // members for only this T instance + createClient *bool + createCollection *bool + runOn []RunOnBlock + mockDeployment *mockDeployment // nil if the test is not being run against a mock + mockResponses []bson.D + createdColls []*Collection // collections created in this test + proxyDialer *proxyDialer + dbName, collName string + failPointNames []string + minServerVersion string + maxServerVersion string + validTopologies []TopologyKind + auth *bool + enterprise *bool + dataLake *bool + ssl *bool + collCreateOpts *options.CreateCollectionOptions + requireAPIVersion *bool + + // options copied to sub-tests + clientType ClientType + clientOpts *options.ClientOptions + collOpts *options.CollectionOptions + shareClient *bool + + baseOpts *Options // used to create subtests + + // command monitoring channels + monitorLock sync.Mutex + started []*event.CommandStartedEvent + succeeded []*event.CommandSucceededEvent + failed []*event.CommandFailedEvent + + Client *mongo.Client + DB *mongo.Database + Coll *mongo.Collection +} + +func newT(wrapped *testing.T, opts ...*Options) *T { + t := &T{ + T: wrapped, + } + for _, opt := range opts { + for _, optFn := range opt.optFuncs { + optFn(t) + } + } + + if err := t.verifyConstraints(); err != nil { + t.Skipf("skipping due to environmental constraints: %v", err) + } + + if t.collName == "" { + t.collName = t.Name() + } + if t.dbName == "" { + t.dbName = TestDb + } + t.collName = sanitizeCollectionName(t.dbName, t.collName) + + // create a set of base options for sub-tests + t.baseOpts = NewOptions().ClientOptions(t.clientOpts).CollectionOptions(t.collOpts).ClientType(t.clientType) + if t.shareClient != nil { + t.baseOpts.ShareClient(*t.shareClient) + } + + return t +} + +// New creates a new T instance with the given options. If the current environment does not satisfy constraints +// specified in the options, the test will be skipped automatically. +func New(wrapped *testing.T, opts ...*Options) *T { + // All tests that use mtest.New() are expected to be integration tests, so skip them when the + // -short flag is included in the "go test" command. + if testing.Short() { + wrapped.Skip("skipping mtest integration test in short mode") + } + + t := newT(wrapped, opts...) + + // only create a client if it needs to be shared in sub-tests + // otherwise, a new client will be created for each subtest + if t.shareClient != nil && *t.shareClient { + t.createTestClient() + } + + wrapped.Cleanup(t.cleanup) + + return t +} + +// cleanup cleans up any resources associated with a T. It is intended to be +// called by [testing.T.Cleanup]. +func (t *T) cleanup() { + if t.Client == nil { + return + } + + // only clear collections and fail points if the test is not running against a mock + if t.clientType != Mock { + t.ClearCollections() + t.ClearFailPoints() + } + + // always disconnect the client regardless of clientType because Client.Disconnect will work against + // all deployments + _ = t.Client.Disconnect(context.Background()) +} + +// Run creates a new T instance for a sub-test and runs the given callback. It also creates a new collection using the +// given name which is available to the callback through the T.Coll variable and is dropped after the callback +// returns. +func (t *T) Run(name string, callback func(mt *T)) { + t.RunOpts(name, NewOptions(), callback) +} + +// RunOpts creates a new T instance for a sub-test with the given options. If the current environment does not satisfy +// constraints specified in the options, the new sub-test will be skipped automatically. If the test is not skipped, +// the callback will be run with the new T instance. RunOpts creates a new collection with the given name which is +// available to the callback through the T.Coll variable and is dropped after the callback returns. +func (t *T) RunOpts(name string, opts *Options, callback func(mt *T)) { + t.T.Run(name, func(wrapped *testing.T) { + sub := newT(wrapped, t.baseOpts, opts) + + // add any mock responses for this test + if sub.clientType == Mock && len(sub.mockResponses) > 0 { + sub.AddMockResponses(sub.mockResponses...) + } + + // for shareClient, inherit the client from the parent + if sub.shareClient != nil && *sub.shareClient && sub.clientType == t.clientType { + sub.Client = t.Client + } + // only create a client if not already set + if sub.Client == nil { + if sub.createClient == nil || *sub.createClient { + sub.createTestClient() + } + } + // create a collection for this test + if sub.Client != nil { + sub.createTestCollection() + } + + // defer dropping all collections if the test is using a client + defer func() { + if sub.Client == nil { + return + } + + // store number of sessions and connections checked out here but assert that they're equal to 0 after + // cleaning up test resources to make sure resources are always cleared + sessions := sub.Client.NumberSessionsInProgress() + conns := sub.NumberConnectionsCheckedOut() + + if sub.clientType != Mock { + sub.ClearFailPoints() + sub.ClearCollections() + } + // only disconnect client if it's not being shared + if sub.shareClient == nil || !*sub.shareClient { + _ = sub.Client.Disconnect(context.Background()) + } + assert.Equal(sub, 0, sessions, "%v sessions checked out", sessions) + assert.Equal(sub, 0, conns, "%v connections checked out", conns) + }() + + // clear any events that may have happened during setup and run the test + sub.ClearEvents() + callback(sub) + }) +} + +// AddMockResponses adds responses to be returned by the mock deployment. This should only be used if T is being run +// against a mock deployment. +func (t *T) AddMockResponses(responses ...bson.D) { + t.mockDeployment.addResponses(responses...) +} + +// ClearMockResponses clears all responses in the mock deployment. +func (t *T) ClearMockResponses() { + t.mockDeployment.clearResponses() +} + +// GetStartedEvent returns the most recent CommandStartedEvent, or nil if one is not present. +// This can only be called once per event. +func (t *T) GetStartedEvent() *event.CommandStartedEvent { + // TODO(GODRIVER-2075): GetStartedEvent documents that it returns the most recent event, but actually returns the first + // TODO event. Update either the documentation or implementation. + if len(t.started) == 0 { + return nil + } + e := t.started[0] + t.started = t.started[1:] + return e +} + +// GetSucceededEvent returns the most recent CommandSucceededEvent, or nil if one is not present. +// This can only be called once per event. +func (t *T) GetSucceededEvent() *event.CommandSucceededEvent { + // TODO(GODRIVER-2075): GetSucceededEvent documents that it returns the most recent event, but actually returns the + // TODO first event. Update either the documentation or implementation. + if len(t.succeeded) == 0 { + return nil + } + e := t.succeeded[0] + t.succeeded = t.succeeded[1:] + return e +} + +// GetFailedEvent returns the most recent CommandFailedEvent, or nil if one is not present. +// This can only be called once per event. +func (t *T) GetFailedEvent() *event.CommandFailedEvent { + // TODO(GODRIVER-2075): GetFailedEvent documents that it returns the most recent event, but actually returns the first + // TODO event. Update either the documentation or implementation. + if len(t.failed) == 0 { + return nil + } + e := t.failed[0] + t.failed = t.failed[1:] + return e +} + +// GetAllStartedEvents returns a slice of all CommandStartedEvent instances for this test. This can be called multiple +// times. +func (t *T) GetAllStartedEvents() []*event.CommandStartedEvent { + return t.started +} + +// GetAllSucceededEvents returns a slice of all CommandSucceededEvent instances for this test. This can be called multiple +// times. +func (t *T) GetAllSucceededEvents() []*event.CommandSucceededEvent { + return t.succeeded +} + +// GetAllFailedEvents returns a slice of all CommandFailedEvent instances for this test. This can be called multiple +// times. +func (t *T) GetAllFailedEvents() []*event.CommandFailedEvent { + return t.failed +} + +// FilterStartedEvents filters the existing CommandStartedEvent instances for this test using the provided filter +// callback. An event will be retained if the filter returns true. The list of filtered events will be used to overwrite +// the list of events for this test and will therefore change the output of t.GetAllStartedEvents(). +func (t *T) FilterStartedEvents(filter func(*event.CommandStartedEvent) bool) { + var newEvents []*event.CommandStartedEvent + for _, evt := range t.started { + if filter(evt) { + newEvents = append(newEvents, evt) + } + } + t.started = newEvents +} + +// FilterSucceededEvents filters the existing CommandSucceededEvent instances for this test using the provided filter +// callback. An event will be retained if the filter returns true. The list of filtered events will be used to overwrite +// the list of events for this test and will therefore change the output of t.GetAllSucceededEvents(). +func (t *T) FilterSucceededEvents(filter func(*event.CommandSucceededEvent) bool) { + var newEvents []*event.CommandSucceededEvent + for _, evt := range t.succeeded { + if filter(evt) { + newEvents = append(newEvents, evt) + } + } + t.succeeded = newEvents +} + +// FilterFailedEvents filters the existing CommandFailedEVent instances for this test using the provided filter +// callback. An event will be retained if the filter returns true. The list of filtered events will be used to overwrite +// the list of events for this test and will therefore change the output of t.GetAllFailedEvents(). +func (t *T) FilterFailedEvents(filter func(*event.CommandFailedEvent) bool) { + var newEvents []*event.CommandFailedEvent + for _, evt := range t.failed { + if filter(evt) { + newEvents = append(newEvents, evt) + } + } + t.failed = newEvents +} + +// GetProxiedMessages returns the messages proxied to the server by the test. If the client type is not Proxy, this +// returns nil. +func (t *T) GetProxiedMessages() []*ProxyMessage { + if t.proxyDialer == nil { + return nil + } + return t.proxyDialer.Messages() +} + +// NumberConnectionsCheckedOut returns the number of connections checked out from the test Client. +func (t *T) NumberConnectionsCheckedOut() int { + return int(atomic.LoadInt64(&t.connsCheckedOut)) +} + +// ClearEvents clears the existing command monitoring events. +func (t *T) ClearEvents() { + t.started = t.started[:0] + t.succeeded = t.succeeded[:0] + t.failed = t.failed[:0] +} + +// ResetClient resets the existing client with the given options. If opts is nil, the existing options will be used. +// If t.Coll is not-nil, it will be reset to use the new client. Should only be called if the existing client is +// not nil. This will Disconnect the existing client but will not drop existing collections. To do so, ClearCollections +// must be called before calling ResetClient. +func (t *T) ResetClient(opts *options.ClientOptions) { + if opts != nil { + t.clientOpts = opts + } + + _ = t.Client.Disconnect(context.Background()) + t.createTestClient() + t.DB = t.Client.Database(t.dbName) + t.Coll = t.DB.Collection(t.collName, t.collOpts) + + for _, coll := range t.createdColls { + // If the collection was created using a different Client, it doesn't need to be reset. + if coll.hasDifferentClient { + continue + } + + // If the namespace is the same as t.Coll, we can use t.Coll. + if coll.created.Name() == t.collName && coll.created.Database().Name() == t.dbName { + coll.created = t.Coll + continue + } + + // Otherwise, reset the collection to use the new Client. + coll.created = t.Client.Database(coll.DB).Collection(coll.Name, coll.Opts) + } +} + +// Collection is used to configure a new collection created during a test. +type Collection struct { + Name string + DB string // defaults to mt.DB.Name() if not specified + Client *mongo.Client // defaults to mt.Client if not specified + Opts *options.CollectionOptions + CreateOpts *options.CreateCollectionOptions + ViewOn string + ViewPipeline interface{} + hasDifferentClient bool + created *mongo.Collection // the actual collection that was created +} + +// CreateCollection creates a new collection with the given configuration. The collection will be dropped after the test +// finishes running. If createOnServer is true, the function ensures that the collection has been created server-side +// by running the create command. The create command will appear in command monitoring channels. +func (t *T) CreateCollection(coll Collection, createOnServer bool) *mongo.Collection { + if coll.DB == "" { + coll.DB = t.DB.Name() + } + if coll.Client == nil { + coll.Client = t.Client + } + coll.hasDifferentClient = coll.Client != t.Client + + db := coll.Client.Database(coll.DB) + + if coll.CreateOpts != nil && coll.CreateOpts.EncryptedFields != nil { + // An encrypted collection consists of a data collection and three state collections. + // Aborted test runs may leave these collections. + // Drop all four collections to avoid a quiet failure to create all collections. + DropEncryptedCollection(t, db.Collection(coll.Name), coll.CreateOpts.EncryptedFields) + } + + if createOnServer && t.clientType != Mock { + var err error + if coll.ViewOn != "" { + err = db.CreateView(context.Background(), coll.Name, coll.ViewOn, coll.ViewPipeline) + } else { + err = db.CreateCollection(context.Background(), coll.Name, coll.CreateOpts) + } + + // ignore ErrUnacknowledgedWrite. Client may be configured with unacknowledged write concern. + if err != nil && !errors.Is(err, driver.ErrUnacknowledgedWrite) { + // ignore NamespaceExists errors for idempotency + + var cmdErr mongo.CommandError + if !errors.As(err, &cmdErr) || cmdErr.Code != namespaceExistsErrCode { + t.Fatalf("error creating collection or view: %v on server: %v", coll.Name, err) + } + } + } + + coll.created = db.Collection(coll.Name, coll.Opts) + t.createdColls = append(t.createdColls, &coll) + return coll.created +} + +// DropEncryptedCollection drops a collection with EncryptedFields. +// The EncryptedFields option is not supported in Collection.Drop(). See GODRIVER-2413. +func DropEncryptedCollection(t *T, coll *mongo.Collection, encryptedFields interface{}) { + t.Helper() + + var efBSON bsoncore.Document + efBSON, err := bson.Marshal(encryptedFields) + assert.Nil(t, err, "error in Marshal: %v", err) + + // Drop the two encryption-related, associated collections: `escCollection` and `ecocCollection`. + // Drop ESCCollection. + escCollection, err := csfle.GetEncryptedStateCollectionName(efBSON, coll.Name(), csfle.EncryptedStateCollection) + assert.Nil(t, err, "error in getEncryptedStateCollectionName: %v", err) + err = coll.Database().Collection(escCollection).Drop(context.Background()) + assert.Nil(t, err, "error in Drop: %v", err) + + // Drop ECOCCollection. + ecocCollection, err := csfle.GetEncryptedStateCollectionName(efBSON, coll.Name(), csfle.EncryptedCompactionCollection) + assert.Nil(t, err, "error in getEncryptedStateCollectionName: %v", err) + err = coll.Database().Collection(ecocCollection).Drop(context.Background()) + assert.Nil(t, err, "error in Drop: %v", err) + + // Drop the data collection. + err = coll.Drop(context.Background()) + assert.Nil(t, err, "error in Drop: %v", err) +} + +// ClearCollections drops all collections previously created by this test. +func (t *T) ClearCollections() { + // Collections should not be dropped when testing against Atlas Data Lake because the data is pre-inserted. + if !testContext.dataLake { + for _, coll := range t.createdColls { + if coll.CreateOpts != nil && coll.CreateOpts.EncryptedFields != nil { + DropEncryptedCollection(t, coll.created, coll.CreateOpts.EncryptedFields) + } + + err := coll.created.Drop(context.Background()) + if errors.Is(err, mongo.ErrUnacknowledgedWrite) || errors.Is(err, driver.ErrUnacknowledgedWrite) { + // It's possible that a collection could have an unacknowledged write concern, which + // could prevent it from being dropped for sharded clusters. We can resolve this by + // re-instantiating the collection with a majority write concern before dropping. + collname := coll.created.Name() + wcm := writeconcern.New(writeconcern.WMajority(), writeconcern.WTimeout(1*time.Second)) + wccoll := t.DB.Collection(collname, options.Collection().SetWriteConcern(wcm)) + _ = wccoll.Drop(context.Background()) + + } + } + } + t.createdColls = t.createdColls[:0] +} + +// SetFailPoint sets a fail point for the client associated with T. Commands to create the failpoint will appear +// in command monitoring channels. The fail point will automatically be disabled after this test has run. +func (t *T) SetFailPoint(fp FailPoint) { + // ensure mode fields are int32 + if modeMap, ok := fp.Mode.(map[string]interface{}); ok { + var key string + var err error + + if times, ok := modeMap["times"]; ok { + key = "times" + modeMap["times"], err = t.interfaceToInt32(times) + } + if skip, ok := modeMap["skip"]; ok { + key = "skip" + modeMap["skip"], err = t.interfaceToInt32(skip) + } + + if err != nil { + t.Fatalf("error converting %s to int32: %v", key, err) + } + } + + if err := SetFailPoint(fp, t.Client); err != nil { + t.Fatal(err) + } + t.failPointNames = append(t.failPointNames, fp.ConfigureFailPoint) +} + +// SetFailPointFromDocument sets the fail point represented by the given document for the client associated with T. This +// method assumes that the given document is in the form {configureFailPoint: , ...}. Commands to create +// the failpoint will appear in command monitoring channels. The fail point will be automatically disabled after this +// test has run. +func (t *T) SetFailPointFromDocument(fp bson.Raw) { + if err := SetRawFailPoint(fp, t.Client); err != nil { + t.Fatal(err) + } + + name := fp.Index(0).Value().StringValue() + t.failPointNames = append(t.failPointNames, name) +} + +// TrackFailPoint adds the given fail point to the list of fail points to be disabled when the current test finishes. +// This function does not create a fail point on the server. +func (t *T) TrackFailPoint(fpName string) { + t.failPointNames = append(t.failPointNames, fpName) +} + +// ClearFailPoints disables all previously set failpoints for this test. +func (t *T) ClearFailPoints() { + db := t.Client.Database("admin") + for _, fp := range t.failPointNames { + cmd := bson.D{ + {"configureFailPoint", fp}, + {"mode", "off"}, + } + err := db.RunCommand(context.Background(), cmd).Err() + if err != nil { + t.Fatalf("error clearing fail point %s: %v", fp, err) + } + } + t.failPointNames = t.failPointNames[:0] +} + +// CloneDatabase modifies the default database for this test to match the given options. +func (t *T) CloneDatabase(opts *options.DatabaseOptions) { + t.DB = t.Client.Database(t.dbName, opts) +} + +// CloneCollection modifies the default collection for this test to match the given options. +func (t *T) CloneCollection(opts *options.CollectionOptions) { + var err error + t.Coll, err = t.Coll.Clone(opts) + assert.Nil(t, err, "error cloning collection: %v", err) +} + +func sanitizeCollectionName(db string, coll string) string { + // Collections can't have "$" in their names, so we substitute it with "%". + coll = strings.ReplaceAll(coll, "$", "%") + + // Namespaces can only have 120 bytes max. + if len(db+"."+coll) >= 120 { + // coll len must be <= remaining + remaining := 120 - (len(db) + 1) // +1 for "." + coll = coll[len(coll)-remaining:] + } + return coll +} + +func (t *T) createTestClient() { + clientOpts := t.clientOpts + if clientOpts == nil { + // default opts + clientOpts = options.Client().SetWriteConcern(MajorityWc).SetReadPreference(PrimaryRp) + } + // set ServerAPIOptions to latest version if required + if clientOpts.Deployment == nil && t.clientType != Mock && clientOpts.ServerAPIOptions == nil && testContext.requireAPIVersion { + clientOpts.SetServerAPIOptions(options.ServerAPI(driver.TestServerAPIVersion)) + } + + // Setup command monitor + var customMonitor = clientOpts.Monitor + clientOpts.SetMonitor(&event.CommandMonitor{ + Started: func(ctx context.Context, cse *event.CommandStartedEvent) { + if customMonitor != nil && customMonitor.Started != nil { + customMonitor.Started(ctx, cse) + } + t.monitorLock.Lock() + defer t.monitorLock.Unlock() + t.started = append(t.started, cse) + }, + Succeeded: func(ctx context.Context, cse *event.CommandSucceededEvent) { + if customMonitor != nil && customMonitor.Succeeded != nil { + customMonitor.Succeeded(ctx, cse) + } + t.monitorLock.Lock() + defer t.monitorLock.Unlock() + t.succeeded = append(t.succeeded, cse) + }, + Failed: func(ctx context.Context, cfe *event.CommandFailedEvent) { + if customMonitor != nil && customMonitor.Failed != nil { + customMonitor.Failed(ctx, cfe) + } + t.monitorLock.Lock() + defer t.monitorLock.Unlock() + t.failed = append(t.failed, cfe) + }, + }) + // only specify connection pool monitor if no deployment is given + if clientOpts.Deployment == nil { + previousPoolMonitor := clientOpts.PoolMonitor + + clientOpts.SetPoolMonitor(&event.PoolMonitor{ + Event: func(evt *event.PoolEvent) { + if previousPoolMonitor != nil { + previousPoolMonitor.Event(evt) + } + + switch evt.Type { + case event.GetSucceeded: + atomic.AddInt64(&t.connsCheckedOut, 1) + case event.ConnectionReturned: + atomic.AddInt64(&t.connsCheckedOut, -1) + } + }, + }) + } + + var err error + switch t.clientType { + case Pinned: + // pin to first mongos + pinnedHostList := []string{testContext.connString.Hosts[0]} + uriOpts := options.Client().ApplyURI(testContext.connString.Original).SetHosts(pinnedHostList) + t.Client, err = mongo.NewClient(uriOpts, clientOpts) + case Mock: + // clear pool monitor to avoid configuration error + clientOpts.PoolMonitor = nil + t.mockDeployment = newMockDeployment() + clientOpts.Deployment = t.mockDeployment + t.Client, err = mongo.NewClient(clientOpts) + case Proxy: + t.proxyDialer = newProxyDialer() + clientOpts.SetDialer(t.proxyDialer) + + // After setting the Dialer, fall-through to the Default case to apply the correct URI + fallthrough + case Default: + // Use a different set of options to specify the URI because clientOpts may already have a URI or host seedlist + // specified. + var uriOpts *options.ClientOptions + if clientOpts.Deployment == nil { + // Only specify URI if the deployment is not set to avoid setting topology/server options along with the + // deployment. + uriOpts = options.Client().ApplyURI(testContext.connString.Original) + } + + // Pass in uriOpts first so clientOpts wins if there are any conflicting settings. + t.Client, err = mongo.NewClient(uriOpts, clientOpts) + } + if err != nil { + t.Fatalf("error creating client: %v", err) + } + if err := t.Client.Connect(context.Background()); err != nil { + t.Fatalf("error connecting client: %v", err) + } +} + +func (t *T) createTestCollection() { + t.DB = t.Client.Database(t.dbName) + t.createdColls = t.createdColls[:0] + + // Collections should not be explicitly created when testing against Atlas Data Lake because they already exist in + // the server with pre-seeded data. + createOnServer := (t.createCollection == nil || *t.createCollection) && !testContext.dataLake + t.Coll = t.CreateCollection(Collection{ + Name: t.collName, + CreateOpts: t.collCreateOpts, + Opts: t.collOpts, + }, createOnServer) +} + +// verifyVersionConstraints returns an error if the cluster's server version is not in the range [min, max]. Server +// versions will only be checked if they are non-empty. +func verifyVersionConstraints(min, max string) error { + if min != "" && CompareServerVersions(testContext.serverVersion, min) < 0 { + return fmt.Errorf("server version %q is lower than min required version %q", testContext.serverVersion, min) + } + if max != "" && CompareServerVersions(testContext.serverVersion, max) > 0 { + return fmt.Errorf("server version %q is higher than max version %q", testContext.serverVersion, max) + } + return nil +} + +// verifyTopologyConstraints returns an error if the cluster's topology kind does not match one of the provided +// kinds. If the topologies slice is empty, nil is returned without any additional checks. +func verifyTopologyConstraints(topologies []TopologyKind) error { + if len(topologies) == 0 { + return nil + } + + for _, topo := range topologies { + // For ShardedReplicaSet, we won't get an exact match because testContext.topoKind will be Sharded so we do an + // additional comparison with the testContext.shardedReplicaSet field. + if topo == testContext.topoKind || (topo == ShardedReplicaSet && testContext.shardedReplicaSet) { + return nil + } + } + return fmt.Errorf("topology kind %q does not match any of the required kinds %q", testContext.topoKind, topologies) +} + +func verifyServerParametersConstraints(serverParameters map[string]bson.RawValue) error { + for param, expected := range serverParameters { + actual, err := testContext.serverParameters.LookupErr(param) + if err != nil { + return fmt.Errorf("server does not support parameter %q", param) + } + if !expected.Equal(actual) { + return fmt.Errorf("mismatched values for server parameter %q; expected %s, got %s", param, expected, actual) + } + } + return nil +} + +func verifyAuthConstraint(expected *bool) error { + if expected != nil && *expected != testContext.authEnabled { + return fmt.Errorf("test requires auth value: %v, cluster auth value: %v", *expected, testContext.authEnabled) + } + return nil +} + +func verifyServerlessConstraint(expected string) error { + switch expected { + case "require": + if !testContext.serverless { + return fmt.Errorf("test requires serverless") + } + case "forbid": + if testContext.serverless { + return fmt.Errorf("test forbids serverless") + } + case "allow", "": + default: + return fmt.Errorf("invalid value for serverless: %s", expected) + } + return nil +} + +// verifyRunOnBlockConstraint returns an error if the current environment does not match the provided RunOnBlock. +func verifyRunOnBlockConstraint(rob RunOnBlock) error { + if err := verifyVersionConstraints(rob.MinServerVersion, rob.MaxServerVersion); err != nil { + return err + } + if err := verifyTopologyConstraints(rob.Topology); err != nil { + return err + } + + // Tests in the unified test format have runOn.auth to indicate whether the + // test should be run against an auth-enabled configuration. SDAM integration + // spec tests have runOn.authEnabled to indicate the same thing. Use whichever + // is set for verifyAuthConstraint(). + auth := rob.Auth + if rob.AuthEnabled != nil { + if auth != nil { + return fmt.Errorf("runOnBlock cannot specify both auth and authEnabled") + } + auth = rob.AuthEnabled + } + if err := verifyAuthConstraint(auth); err != nil { + return err + } + + if err := verifyServerlessConstraint(rob.Serverless); err != nil { + return err + } + if err := verifyServerParametersConstraints(rob.ServerParameters); err != nil { + return err + } + + if rob.CSFLE != nil { + if *rob.CSFLE && !IsCSFLEEnabled() { + return fmt.Errorf("runOnBlock requires CSFLE to be enabled. Build with the cse tag to enable") + } else if !*rob.CSFLE && IsCSFLEEnabled() { + return fmt.Errorf("runOnBlock requires CSFLE to be disabled. Build without the cse tag to disable") + } + if *rob.CSFLE { + if err := verifyVersionConstraints("4.2", ""); err != nil { + return err + } + } + } + return nil +} + +// verifyConstraints returns an error if the current environment does not match the constraints specified for the test. +func (t *T) verifyConstraints() error { + // Check constraints not specified as runOn blocks + if err := verifyVersionConstraints(t.minServerVersion, t.maxServerVersion); err != nil { + return err + } + if err := verifyTopologyConstraints(t.validTopologies); err != nil { + return err + } + if err := verifyAuthConstraint(t.auth); err != nil { + return err + } + if t.ssl != nil && *t.ssl != testContext.sslEnabled { + return fmt.Errorf("test requires ssl value: %v, cluster ssl value: %v", *t.ssl, testContext.sslEnabled) + } + if t.enterprise != nil && *t.enterprise != testContext.enterpriseServer { + return fmt.Errorf("test requires enterprise value: %v, cluster enterprise value: %v", *t.enterprise, + testContext.enterpriseServer) + } + if t.dataLake != nil && *t.dataLake != testContext.dataLake { + return fmt.Errorf("test requires cluster to be data lake: %v, cluster is data lake: %v", *t.dataLake, + testContext.dataLake) + } + if t.requireAPIVersion != nil && *t.requireAPIVersion != testContext.requireAPIVersion { + return fmt.Errorf("test requires RequireAPIVersion value: %v, local RequireAPIVersion value: %v", *t.requireAPIVersion, + testContext.requireAPIVersion) + } + + // Check runOn blocks. The test can be executed if there are no blocks or at least block matches the current test + // setup. + if len(t.runOn) == 0 { + return nil + } + + // Stop once we find a RunOnBlock that matches the current environment. Record all errors as we go because if we + // don't find any matching blocks, we want to report the comparison errors for each block. + runOnErrors := make([]error, 0, len(t.runOn)) + for _, runOn := range t.runOn { + err := verifyRunOnBlockConstraint(runOn) + if err == nil { + return nil + } + + runOnErrors = append(runOnErrors, err) + } + return fmt.Errorf("no matching RunOnBlock; comparison errors: %v", runOnErrors) +} + +func (t *T) interfaceToInt32(i interface{}) (int32, error) { + switch conv := i.(type) { + case int: + return int32(conv), nil + case int32: + return conv, nil + case int64: + return int32(conv), nil + case float64: + return int32(conv), nil + } + + return 0, fmt.Errorf("type %T cannot be converted to int32", i) +} diff --git a/vendor/go.mongodb.org/mongo-driver/mongo/integration/mtest/opmsg_deployment.go b/vendor/go.mongodb.org/mongo-driver/mongo/integration/mtest/opmsg_deployment.go new file mode 100644 index 000000000..2ddc23c41 --- /dev/null +++ b/vendor/go.mongodb.org/mongo-driver/mongo/integration/mtest/opmsg_deployment.go @@ -0,0 +1,214 @@ +// Copyright (C) MongoDB, Inc. 2017-present. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +package mtest + +import ( + "context" + "errors" + + "go.mongodb.org/mongo-driver/bson" + "go.mongodb.org/mongo-driver/internal/csot" + "go.mongodb.org/mongo-driver/mongo/address" + "go.mongodb.org/mongo-driver/mongo/description" + "go.mongodb.org/mongo-driver/x/bsonx/bsoncore" + "go.mongodb.org/mongo-driver/x/mongo/driver" + "go.mongodb.org/mongo-driver/x/mongo/driver/topology" + "go.mongodb.org/mongo-driver/x/mongo/driver/wiremessage" +) + +const ( + serverAddress = address.Address("127.0.0.1:27017") + maxDocumentSize uint32 = 16777216 + maxMessageSize uint32 = 48000000 + maxBatchCount uint32 = 100000 +) + +var ( + sessionTimeoutMinutes uint32 = 30 + sessionTimeoutMinutesInt64 = int64(sessionTimeoutMinutes) + + // MockDescription is the server description used for the mock deployment. Each mocked connection returns this + // value from its Description method. + MockDescription = description.Server{ + CanonicalAddr: serverAddress, + MaxDocumentSize: maxDocumentSize, + MaxMessageSize: maxMessageSize, + MaxBatchCount: maxBatchCount, + // TODO(GODRIVER-2885): This can be removed once legacy + // SessionTimeoutMinutes is removed. + SessionTimeoutMinutes: sessionTimeoutMinutes, + SessionTimeoutMinutesPtr: &sessionTimeoutMinutesInt64, + Kind: description.RSPrimary, + WireVersion: &description.VersionRange{ + Max: topology.SupportedWireVersions.Max, + }, + } +) + +// connection implements the driver.Connection interface and responds to wire messages with pre-configured responses. +type connection struct { + responses []bson.D // responses to send when ReadWireMessage is called +} + +var _ driver.Connection = &connection{} + +// WriteWireMessage is a no-op. +func (c *connection) WriteWireMessage(context.Context, []byte) error { + return nil +} + +func (c *connection) OIDCTokenGenID() uint64 { + return 0 +} + +func (c *connection) SetOIDCTokenGenID(uint64) { +} + +// ReadWireMessage returns the next response in the connection's list of responses. +func (c *connection) ReadWireMessage(_ context.Context) ([]byte, error) { + var dst []byte + if len(c.responses) == 0 { + return dst, errors.New("no responses remaining") + } + nextRes := c.responses[0] + c.responses = c.responses[1:] + + var wmindex int32 + wmindex, dst = wiremessage.AppendHeaderStart(dst, wiremessage.NextRequestID(), 0, wiremessage.OpMsg) + dst = wiremessage.AppendMsgFlags(dst, 0) + dst = wiremessage.AppendMsgSectionType(dst, wiremessage.SingleDocument) + resBytes, _ := bson.Marshal(nextRes) + dst = append(dst, resBytes...) + dst = bsoncore.UpdateLength(dst, wmindex, int32(len(dst[wmindex:]))) + return dst, nil +} + +// Description returns a fixed server description for the connection. +func (c *connection) Description() description.Server { + return MockDescription +} + +// Close is a no-op operation. +func (*connection) Close() error { + return nil +} + +// ID returns a fixed identifier for the connection. +func (*connection) ID() string { + return "" +} + +// DriverConnectionID returns a fixed identifier for the driver pool connection. +// TODO(GODRIVER-2824): replace return type with int64. +func (*connection) DriverConnectionID() uint64 { + return 0 +} + +// ServerConnectionID returns a fixed identifier for the server connection. +func (*connection) ServerConnectionID() *int64 { + serverConnectionID := int64(42) + return &serverConnectionID +} + +// Address returns a fixed address for the connection. +func (*connection) Address() address.Address { + return serverAddress +} + +// Stale returns if the connection is stale. +func (*connection) Stale() bool { + return false +} + +// mockDeployment wraps a connection and implements the driver.Deployment interface. +type mockDeployment struct { + conn *connection + updates chan description.Topology +} + +var _ driver.Deployment = &mockDeployment{} +var _ driver.Server = &mockDeployment{} +var _ driver.Connector = &mockDeployment{} +var _ driver.Disconnector = &mockDeployment{} +var _ driver.Subscriber = &mockDeployment{} + +// SelectServer implements the Deployment interface. This method does not use the +// description.SelectedServer provided and instead returns itself. The Connections returned from the +// Connection method have a no-op Close method. +func (md *mockDeployment) SelectServer(context.Context, description.ServerSelector) (driver.Server, error) { + return md, nil +} + +// Kind implements the Deployment interface. It always returns description.Single. +func (md *mockDeployment) Kind() description.TopologyKind { + return description.Single +} + +// Connection implements the driver.Server interface. +func (md *mockDeployment) Connection(context.Context) (driver.Connection, error) { + return md.conn, nil +} + +// RTTMonitor implements the driver.Server interface. +func (md *mockDeployment) RTTMonitor() driver.RTTMonitor { + return &csot.ZeroRTTMonitor{} +} + +// Connect is a no-op method which implements the driver.Connector interface. +func (md *mockDeployment) Connect() error { + return nil +} + +// Disconnect is a no-op method which implements the driver.Disconnector interface { +func (md *mockDeployment) Disconnect(context.Context) error { + close(md.updates) + return nil +} + +// Subscribe returns a subscription from which new topology descriptions can be retrieved. +// Subscribe implements the driver.Subscriber interface. +func (md *mockDeployment) Subscribe() (*driver.Subscription, error) { + if md.updates == nil { + md.updates = make(chan description.Topology, 1) + + md.updates <- description.Topology{ + SessionTimeoutMinutesPtr: &sessionTimeoutMinutesInt64, + + // TODO(GODRIVER-2885): This can be removed once legacy + // SessionTimeoutMinutes is removed. + SessionTimeoutMinutes: sessionTimeoutMinutes, + } + } + + return &driver.Subscription{ + Updates: md.updates, + }, nil +} + +// Unsubscribe is a no-op method which implements the driver.Subscriber interface. +func (md *mockDeployment) Unsubscribe(*driver.Subscription) error { + return nil +} + +// addResponses adds responses to this mock deployment. +func (md *mockDeployment) addResponses(responses ...bson.D) { + md.conn.responses = append(md.conn.responses, responses...) +} + +// clearResponses clears all remaining responses in this mock deployment. +func (md *mockDeployment) clearResponses() { + md.conn.responses = md.conn.responses[:0] +} + +// newMockDeployment returns a mock driver.Deployment that responds with OP_MSG wire messages. +func newMockDeployment(responses ...bson.D) *mockDeployment { + return &mockDeployment{ + conn: &connection{ + responses: responses, + }, + } +} diff --git a/vendor/go.mongodb.org/mongo-driver/mongo/integration/mtest/options.go b/vendor/go.mongodb.org/mongo-driver/mongo/integration/mtest/options.go new file mode 100644 index 000000000..0f103f7cd --- /dev/null +++ b/vendor/go.mongodb.org/mongo-driver/mongo/integration/mtest/options.go @@ -0,0 +1,283 @@ +// Copyright (C) MongoDB, Inc. 2017-present. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +package mtest + +import ( + "errors" + "fmt" + + "go.mongodb.org/mongo-driver/bson" + "go.mongodb.org/mongo-driver/mongo/options" +) + +// TopologyKind describes the topology that a test is run on. +type TopologyKind string + +// These constants specify valid values for TopologyKind +const ( + ReplicaSet TopologyKind = "replicaset" + Sharded TopologyKind = "sharded" + Single TopologyKind = "single" + LoadBalanced TopologyKind = "load-balanced" + // ShardedReplicaSet is a special case of sharded that requires each shard to be a replica set rather than a + // standalone server. + ShardedReplicaSet TopologyKind = "sharded-replicaset" +) + +// ClientType specifies the type of Client that should be created for a test. +type ClientType int + +// These constants specify valid values for ClientType +const ( + // Default specifies a client to the connection string in the MONGODB_URI env variable with command monitoring + // enabled. + Default ClientType = iota + // Pinned specifies a client that is pinned to a single mongos in a sharded cluster. + Pinned + // Mock specifies a client that communicates with a mock deployment. + Mock + // Proxy specifies a client that proxies messages to the server and also stores parsed copies. The proxied + // messages can be retrieved via T.GetProxiedMessages or T.GetRawProxiedMessages. + Proxy +) + +var ( + falseBool = false +) + +// RunOnBlock describes a constraint for a test. +type RunOnBlock struct { + MinServerVersion string `bson:"minServerVersion"` + MaxServerVersion string `bson:"maxServerVersion"` + Topology []TopologyKind `bson:"topology"` + Serverless string `bson:"serverless"` + ServerParameters map[string]bson.RawValue `bson:"serverParameters"` + Auth *bool `bson:"auth"` + AuthEnabled *bool `bson:"authEnabled"` + CSFLE *bool `bson:"csfle"` +} + +// UnmarshalBSON implements custom BSON unmarshalling behavior for RunOnBlock because some test formats use the +// "topology" key while the unified test format uses "topologies". +func (r *RunOnBlock) UnmarshalBSON(data []byte) error { + var temp struct { + MinServerVersion string `bson:"minServerVersion"` + MaxServerVersion string `bson:"maxServerVersion"` + Topology []TopologyKind `bson:"topology"` + Topologies []TopologyKind `bson:"topologies"` + Serverless string `bson:"serverless"` + ServerParameters map[string]bson.RawValue `bson:"serverParameters"` + Auth *bool `bson:"auth"` + AuthEnabled *bool `bson:"authEnabled"` + CSFLE *bool `bson:"csfle"` + Extra map[string]interface{} `bson:",inline"` + } + if err := bson.Unmarshal(data, &temp); err != nil { + return fmt.Errorf("error unmarshalling to temporary RunOnBlock object: %w", err) + } + if len(temp.Extra) > 0 { + return fmt.Errorf("unrecognized fields for RunOnBlock: %v", temp.Extra) + } + + r.MinServerVersion = temp.MinServerVersion + r.MaxServerVersion = temp.MaxServerVersion + r.Serverless = temp.Serverless + r.ServerParameters = temp.ServerParameters + r.Auth = temp.Auth + r.AuthEnabled = temp.AuthEnabled + r.CSFLE = temp.CSFLE + + if temp.Topology != nil { + r.Topology = temp.Topology + } + if temp.Topologies != nil { + if r.Topology != nil { + return errors.New("both 'topology' and 'topologies' keys cannot be specified for a RunOnBlock") + } + + r.Topology = temp.Topologies + } + return nil +} + +// optionFunc is a function type that configures a T instance. +type optionFunc func(*T) + +// Options is the type used to configure a new T instance. +type Options struct { + optFuncs []optionFunc +} + +// NewOptions creates an empty Options instance. +func NewOptions() *Options { + return &Options{} +} + +// CollectionCreateOptions sets the options to pass to Database.CreateCollection() when creating a collection for a test. +func (op *Options) CollectionCreateOptions(opts *options.CreateCollectionOptions) *Options { + op.optFuncs = append(op.optFuncs, func(t *T) { + t.collCreateOpts = opts + }) + return op +} + +// CollectionOptions sets the options to use when creating a collection for a test. +func (op *Options) CollectionOptions(opts *options.CollectionOptions) *Options { + op.optFuncs = append(op.optFuncs, func(t *T) { + t.collOpts = opts + }) + return op +} + +// ClientOptions sets the options to use when creating a client for a test. +func (op *Options) ClientOptions(opts *options.ClientOptions) *Options { + op.optFuncs = append(op.optFuncs, func(t *T) { + t.clientOpts = opts + }) + return op +} + +// CreateClient specifies whether or not a client should be created for a test. This should be set to false when running +// a test that only runs other tests. +func (op *Options) CreateClient(create bool) *Options { + op.optFuncs = append(op.optFuncs, func(t *T) { + t.createClient = &create + }) + return op +} + +// CreateCollection specifies whether or not a collection should be created for a test. The default value is true. +func (op *Options) CreateCollection(create bool) *Options { + op.optFuncs = append(op.optFuncs, func(t *T) { + t.createCollection = &create + }) + return op +} + +// ShareClient specifies whether or not a test should pass its client down to sub-tests. This should be set when calling +// New() if the inheriting behavior is desired. This option must not be used if the test accesses command monitoring +// events. +func (op *Options) ShareClient(share bool) *Options { + op.optFuncs = append(op.optFuncs, func(t *T) { + t.shareClient = &share + }) + return op +} + +// CollectionName specifies the name for the collection for the test. +func (op *Options) CollectionName(collName string) *Options { + op.optFuncs = append(op.optFuncs, func(t *T) { + t.collName = collName + }) + return op +} + +// DatabaseName specifies the name of the database for the test. +func (op *Options) DatabaseName(dbName string) *Options { + op.optFuncs = append(op.optFuncs, func(t *T) { + t.dbName = dbName + }) + return op +} + +// ClientType specifies the type of client that should be created for a test. This option will be propagated to all +// sub-tests. If the provided ClientType is Proxy, the SSL(false) option will be also be added because the internal +// proxy dialer and connection types do not support SSL. +func (op *Options) ClientType(ct ClientType) *Options { + op.optFuncs = append(op.optFuncs, func(t *T) { + t.clientType = ct + + if ct == Proxy { + t.ssl = &falseBool + } + }) + return op +} + +// MockResponses specifies the responses returned by a mock deployment. This should only be used if the current test +// is being run with MockDeployment(true). Responses can also be added after a sub-test has already been created. +func (op *Options) MockResponses(responses ...bson.D) *Options { + op.optFuncs = append(op.optFuncs, func(t *T) { + t.mockResponses = responses + }) + return op +} + +// RunOn specifies run-on blocks used to determine if a test should run. If a test's environment meets at least one of the +// given constraints, it will be run. Otherwise, it will be skipped. +func (op *Options) RunOn(blocks ...RunOnBlock) *Options { + op.optFuncs = append(op.optFuncs, func(t *T) { + t.runOn = append(t.runOn, blocks...) + }) + return op +} + +// MinServerVersion specifies the minimum server version for the test. +func (op *Options) MinServerVersion(version string) *Options { + op.optFuncs = append(op.optFuncs, func(t *T) { + t.minServerVersion = version + }) + return op +} + +// MaxServerVersion specifies the maximum server version for the test. +func (op *Options) MaxServerVersion(version string) *Options { + op.optFuncs = append(op.optFuncs, func(t *T) { + t.maxServerVersion = version + }) + return op +} + +// Topologies specifies a list of topologies that the test can run on. +func (op *Options) Topologies(topos ...TopologyKind) *Options { + op.optFuncs = append(op.optFuncs, func(t *T) { + t.validTopologies = topos + }) + return op +} + +// Auth specifies whether or not auth should be enabled for this test to run. By default, a test will run regardless +// of whether or not auth is enabled. +func (op *Options) Auth(auth bool) *Options { + op.optFuncs = append(op.optFuncs, func(t *T) { + t.auth = &auth + }) + return op +} + +// SSL specifies whether or not SSL should be enabled for this test to run. By default, a test will run regardless +// of whether or not SSL is enabled. +func (op *Options) SSL(ssl bool) *Options { + op.optFuncs = append(op.optFuncs, func(t *T) { + t.ssl = &ssl + }) + return op +} + +// Enterprise specifies whether or not this test should only be run on enterprise server variants. Defaults to false. +func (op *Options) Enterprise(ent bool) *Options { + op.optFuncs = append(op.optFuncs, func(t *T) { + t.enterprise = &ent + }) + return op +} + +// AtlasDataLake specifies whether this test should only be run against Atlas Data Lake servers. Defaults to false. +func (op *Options) AtlasDataLake(adl bool) *Options { + op.optFuncs = append(op.optFuncs, func(t *T) { + t.dataLake = &adl + }) + return op +} + +// RequireAPIVersion specifies whether this test should only be run when REQUIRE_API_VERSION is true. Defaults to false. +func (op *Options) RequireAPIVersion(rav bool) *Options { + op.optFuncs = append(op.optFuncs, func(t *T) { + t.requireAPIVersion = &rav + }) + return op +} diff --git a/vendor/go.mongodb.org/mongo-driver/mongo/integration/mtest/proxy_dialer.go b/vendor/go.mongodb.org/mongo-driver/mongo/integration/mtest/proxy_dialer.go new file mode 100644 index 000000000..c8e9e6d45 --- /dev/null +++ b/vendor/go.mongodb.org/mongo-driver/mongo/integration/mtest/proxy_dialer.go @@ -0,0 +1,186 @@ +// Copyright (C) MongoDB, Inc. 2017-present. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +package mtest + +import ( + "context" + "errors" + "fmt" + "net" + "sync" + "time" + + "go.mongodb.org/mongo-driver/mongo/options" + "go.mongodb.org/mongo-driver/x/bsonx/bsoncore" +) + +// ProxyMessage represents a sent/received pair of parsed wire messages. +type ProxyMessage struct { + ServerAddress string + CommandName string + Sent *SentMessage + Received *ReceivedMessage +} + +// proxyDialer is a ContextDialer implementation that wraps a net.Dialer and records the messages sent and received +// using connections created through it. +type proxyDialer struct { + *net.Dialer + sync.Mutex + + messages []*ProxyMessage + // sentMap temporarily stores the message sent to the server using the requestID so it can map requests to their + // responses. + sentMap sync.Map + // addressTranslations maps dialed addresses to the remote addresses reported by the created connections if they + // differ. This can happen if a connection is dialed to a host name, in which case the reported remote address will + // be the resolved IP address. + addressTranslations sync.Map +} + +var _ options.ContextDialer = (*proxyDialer)(nil) + +func newProxyDialer() *proxyDialer { + return &proxyDialer{ + Dialer: &net.Dialer{Timeout: 30 * time.Second}, + } +} + +func newProxyErrorWithWireMsg(wm []byte, err error) error { + return fmt.Errorf("proxy error for wiremessage %v: %w", wm, err) +} + +// DialContext creates a new proxyConnection. +func (p *proxyDialer) DialContext(ctx context.Context, network, address string) (net.Conn, error) { + netConn, err := p.Dialer.DialContext(ctx, network, address) + if err != nil { + return netConn, err + } + + // If the connection's remote address does not match the dialed address, store it in the translations map for + // future look-up. Use the remote address as they key because that's what we'll have access to in the connection's + // Read/Write functions. + if remoteAddress := netConn.RemoteAddr().String(); remoteAddress != address { + p.addressTranslations.Store(remoteAddress, address) + } + + proxy := &proxyConn{ + Conn: netConn, + dialer: p, + } + return proxy, nil +} + +func (p *proxyDialer) storeSentMessage(wm []byte) error { + p.Lock() + defer p.Unlock() + + // Create a copy of the wire message so it can be parsed/stored and will not be affected if the wm slice is + // changed by the driver. + wmCopy := copyBytes(wm) + parsed, err := parseSentMessage(wmCopy) + if err != nil { + return err + } + p.sentMap.Store(parsed.RequestID, parsed) + return nil +} + +func (p *proxyDialer) storeReceivedMessage(wm []byte, addr string) error { + p.Lock() + defer p.Unlock() + + serverAddress := addr + if translated, ok := p.addressTranslations.Load(addr); ok { + serverAddress = translated.(string) + } + + // Create a copy of the wire message so it can be parsed/stored and will not be affected if the wm slice is + // changed by the driver. Parse the incoming message and get the corresponding outgoing message. + wmCopy := copyBytes(wm) + parsed, err := parseReceivedMessage(wmCopy) + if err != nil { + return err + } + mapValue, ok := p.sentMap.Load(parsed.ResponseTo) + if !ok { + return errors.New("no sent message found") + } + sent := mapValue.(*SentMessage) + p.sentMap.Delete(parsed.ResponseTo) + + // Store the parsed message pair. + msgPair := &ProxyMessage{ + // The command name is always the first key in the command document. + CommandName: sent.Command.Index(0).Key(), + ServerAddress: serverAddress, + Sent: sent, + Received: parsed, + } + p.messages = append(p.messages, msgPair) + return nil +} + +// Messages returns a slice of proxied messages. This slice is a copy of the messages proxied so far and will not be +// updated for messages proxied after this call. +func (p *proxyDialer) Messages() []*ProxyMessage { + p.Lock() + defer p.Unlock() + + copiedMessages := make([]*ProxyMessage, len(p.messages)) + copy(copiedMessages, p.messages) + return copiedMessages +} + +// proxyConn is a net.Conn that wraps a network connection. All messages sent/received through a proxyConn are stored +// in the associated proxyDialer and are forwarded over the wrapped connection. Errors encountered when parsing and +// storing wire messages are wrapped to add context, while errors returned from the underlying network connection are +// forwarded without wrapping. +type proxyConn struct { + net.Conn + dialer *proxyDialer +} + +// Write stores the given message in the proxyDialer associated with this connection and forwards the message to the +// server. +func (pc *proxyConn) Write(wm []byte) (n int, err error) { + if err := pc.dialer.storeSentMessage(wm); err != nil { + wrapped := fmt.Errorf("error storing sent message: %w", err) + return 0, newProxyErrorWithWireMsg(wm, wrapped) + } + + return pc.Conn.Write(wm) +} + +// Read reads the message from the server into the given buffer and stores the read message in the proxyDialer +// associated with this connection. +func (pc *proxyConn) Read(buffer []byte) (int, error) { + n, err := pc.Conn.Read(buffer) + if err != nil { + return n, err + } + + // The driver reads wire messages in two phases: a four-byte read to get the length of the incoming wire message + // and a (length-4) byte read to get the message itself. There's nothing to be stored during the initial four-byte + // read because we can calculate the length from the rest of the message. + if len(buffer) == 4 { + return 4, nil + } + + // The buffer contains the entire wire message except for the length bytes. Re-create the full message by appending + // buffer to the end of a four-byte slice and using UpdateLength to set the length bytes. + idx, wm := bsoncore.ReserveLength(nil) + wm = append(wm, buffer...) + wm = bsoncore.UpdateLength(wm, idx, int32(len(wm[idx:]))) + + if err := pc.dialer.storeReceivedMessage(wm, pc.RemoteAddr().String()); err != nil { + wrapped := fmt.Errorf("error storing received message: %w", err) + return 0, newProxyErrorWithWireMsg(wm, wrapped) + } + + return n, nil +} diff --git a/vendor/go.mongodb.org/mongo-driver/mongo/integration/mtest/received_message.go b/vendor/go.mongodb.org/mongo-driver/mongo/integration/mtest/received_message.go new file mode 100644 index 000000000..2e2f95224 --- /dev/null +++ b/vendor/go.mongodb.org/mongo-driver/mongo/integration/mtest/received_message.go @@ -0,0 +1,124 @@ +// Copyright (C) MongoDB, Inc. 2017-present. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +package mtest + +import ( + "errors" + "fmt" + + "go.mongodb.org/mongo-driver/x/bsonx/bsoncore" + "go.mongodb.org/mongo-driver/x/mongo/driver/wiremessage" +) + +// ReceivedMessage represents a message received from the server. +type ReceivedMessage struct { + ResponseTo int32 + RawMessage wiremessage.WireMessage + Response bsoncore.Document +} + +type receivedMsgParseFn func([]byte) (*ReceivedMessage, error) + +func getReceivedMessageParser(opcode wiremessage.OpCode) (receivedMsgParseFn, bool) { + switch opcode { + case wiremessage.OpReply: + return parseOpReply, true + case wiremessage.OpMsg: + return parseReceivedOpMsg, true + case wiremessage.OpCompressed: + return parseReceivedOpCompressed, true + default: + return nil, false + } +} + +func parseReceivedMessage(wm []byte) (*ReceivedMessage, error) { + // Re-assign the wire message to "remaining" so "wm" continues to point to the entire message after parsing. + _, _, responseTo, opcode, remaining, ok := wiremessage.ReadHeader(wm) + if !ok { + return nil, errors.New("failed to read wiremessage header") + } + + parseFn, ok := getReceivedMessageParser(opcode) + if !ok { + return nil, fmt.Errorf("unknown opcode: %s", opcode) + } + received, err := parseFn(remaining) + if err != nil { + return nil, fmt.Errorf("error parsing wiremessage with opcode %s: %w", opcode, err) + } + + received.ResponseTo = responseTo + received.RawMessage = wm + return received, nil +} + +func parseOpReply(wm []byte) (*ReceivedMessage, error) { + var ok bool + + if _, wm, ok = wiremessage.ReadReplyFlags(wm); !ok { + return nil, errors.New("failed to read reply flags") + } + if _, wm, ok = wiremessage.ReadReplyCursorID(wm); !ok { + return nil, errors.New("failed to read cursor ID") + } + if _, wm, ok = wiremessage.ReadReplyStartingFrom(wm); !ok { + return nil, errors.New("failed to read starting from") + } + if _, wm, ok = wiremessage.ReadReplyNumberReturned(wm); !ok { + return nil, errors.New("failed to read number returned") + } + + var replyDocuments []bsoncore.Document + replyDocuments, wm, ok = wiremessage.ReadReplyDocuments(wm) + if !ok { + return nil, errors.New("failed to read reply documents") + } + if len(replyDocuments) == 0 { + return nil, errors.New("no documents in response") + } + + rm := &ReceivedMessage{ + Response: replyDocuments[0], + } + return rm, nil +} + +func parseReceivedOpMsg(wm []byte) (*ReceivedMessage, error) { + var ok bool + var err error + + if _, wm, ok = wiremessage.ReadMsgFlags(wm); !ok { + return nil, errors.New("failed to read flags") + } + + if wm, err = assertMsgSectionType(wm, wiremessage.SingleDocument); err != nil { + return nil, fmt.Errorf("error verifying section type for response document: %w", err) + } + + response, wm, ok := wiremessage.ReadMsgSectionSingleDocument(wm) + if !ok { + return nil, errors.New("failed to read response document") + } + rm := &ReceivedMessage{ + Response: response, + } + return rm, nil +} + +func parseReceivedOpCompressed(wm []byte) (*ReceivedMessage, error) { + originalOpcode, wm, err := parseOpCompressed(wm) + if err != nil { + return nil, err + } + + parser, ok := getReceivedMessageParser(originalOpcode) + if !ok { + return nil, fmt.Errorf("unknown original opcode %v", originalOpcode) + } + return parser(wm) +} diff --git a/vendor/go.mongodb.org/mongo-driver/mongo/integration/mtest/sent_message.go b/vendor/go.mongodb.org/mongo-driver/mongo/integration/mtest/sent_message.go new file mode 100644 index 000000000..94eed1225 --- /dev/null +++ b/vendor/go.mongodb.org/mongo-driver/mongo/integration/mtest/sent_message.go @@ -0,0 +1,197 @@ +// Copyright (C) MongoDB, Inc. 2017-present. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +package mtest + +import ( + "errors" + "fmt" + + "go.mongodb.org/mongo-driver/x/bsonx/bsoncore" + "go.mongodb.org/mongo-driver/x/mongo/driver/wiremessage" +) + +// SentMessage represents a message sent by the driver to the server. +type SentMessage struct { + RequestID int32 + RawMessage wiremessage.WireMessage + Command bsoncore.Document + OpCode wiremessage.OpCode + + // The $readPreference document. This is separated into its own field even though it's included in the larger + // command document in both OP_QUERY and OP_MSG because OP_QUERY separates the command into a $query sub-document + // if there is a read preference. To unify OP_QUERY and OP_MSG, we pull this out into a separate field and set + // the Command field to the $query sub-document. + ReadPreference bsoncore.Document + + // The documents sent for an insert, update, or delete command. This is separated into its own field because it's + // sent as part of the command document in OP_QUERY and as a document sequence outside the command document in + // OP_MSG. + DocumentSequence *bsoncore.DocumentSequence +} + +type sentMsgParseFn func([]byte) (*SentMessage, error) + +func getSentMessageParser(opcode wiremessage.OpCode) (sentMsgParseFn, bool) { + switch opcode { + case wiremessage.OpQuery: + return parseOpQuery, true + case wiremessage.OpMsg: + return parseSentOpMsg, true + case wiremessage.OpCompressed: + return parseSentOpCompressed, true + default: + return nil, false + } +} + +func parseOpQuery(wm []byte) (*SentMessage, error) { + var ok bool + + if _, wm, ok = wiremessage.ReadQueryFlags(wm); !ok { + return nil, errors.New("failed to read query flags") + } + if _, wm, ok = wiremessage.ReadQueryFullCollectionName(wm); !ok { + return nil, errors.New("failed to read full collection name") + } + if _, wm, ok = wiremessage.ReadQueryNumberToSkip(wm); !ok { + return nil, errors.New("failed to read number to skip") + } + if _, wm, ok = wiremessage.ReadQueryNumberToReturn(wm); !ok { + return nil, errors.New("failed to read number to return") + } + + query, wm, ok := wiremessage.ReadQueryQuery(wm) + if !ok { + return nil, errors.New("failed to read query") + } + + // If there is no read preference document, the command document is query. + // Otherwise, query is in the format {$query: , $readPreference: }. + commandDoc := query + var rpDoc bsoncore.Document + + dollarQueryVal, err := query.LookupErr("$query") + if err == nil { + commandDoc = dollarQueryVal.Document() + + rpVal, err := query.LookupErr("$readPreference") + if err != nil { + return nil, fmt.Errorf("query %s contains $query but not $readPreference fields", query) + } + rpDoc = rpVal.Document() + } + + // For OP_QUERY, inserts, updates, and deletes are sent as a BSON array of documents inside the main command + // document. Pull these sequences out into an ArrayStyle DocumentSequence. + var docSequence *bsoncore.DocumentSequence + cmdElems, _ := commandDoc.Elements() + for _, elem := range cmdElems { + switch elem.Key() { + case "documents", "updates", "deletes": + docSequence = &bsoncore.DocumentSequence{ + Style: bsoncore.ArrayStyle, + Data: elem.Value().Array(), + } + } + if docSequence != nil { + // There can only be one of these arrays in a well-formed command, so we exit the loop once one is found. + break + } + } + + sm := &SentMessage{ + Command: commandDoc, + ReadPreference: rpDoc, + DocumentSequence: docSequence, + } + return sm, nil +} + +func parseSentMessage(wm []byte) (*SentMessage, error) { + // Re-assign the wire message to "remaining" so "wm" continues to point to the entire message after parsing. + _, requestID, _, opcode, remaining, ok := wiremessage.ReadHeader(wm) + if !ok { + return nil, errors.New("failed to read wiremessage header") + } + + parseFn, ok := getSentMessageParser(opcode) + if !ok { + return nil, fmt.Errorf("unknown opcode: %v", opcode) + } + sent, err := parseFn(remaining) + if err != nil { + return nil, fmt.Errorf("error parsing wiremessage with opcode %s: %w", opcode, err) + } + + sent.RequestID = requestID + sent.RawMessage = wm + sent.OpCode = opcode + return sent, nil +} + +func parseSentOpMsg(wm []byte) (*SentMessage, error) { + var ok bool + var err error + + if _, wm, ok = wiremessage.ReadMsgFlags(wm); !ok { + return nil, errors.New("failed to read flags") + } + + if wm, err = assertMsgSectionType(wm, wiremessage.SingleDocument); err != nil { + return nil, fmt.Errorf("error verifying section type for command document: %w", err) + } + + var commandDoc bsoncore.Document + commandDoc, wm, ok = wiremessage.ReadMsgSectionSingleDocument(wm) + if !ok { + return nil, errors.New("failed to read command document") + } + + var rpDoc bsoncore.Document + if rpVal, err := commandDoc.LookupErr("$readPreference"); err == nil { + rpDoc = rpVal.Document() + } + + var docSequence *bsoncore.DocumentSequence + if len(wm) != 0 { + // If there are bytes remaining in the wire message, they must correspond to a DocumentSequence section. + if wm, err = assertMsgSectionType(wm, wiremessage.DocumentSequence); err != nil { + return nil, fmt.Errorf("error verifying section type for document sequence: %w", err) + } + + var data []byte + _, data, wm, ok = wiremessage.ReadMsgSectionRawDocumentSequence(wm) + if !ok { + return nil, errors.New("failed to read document sequence") + } + + docSequence = &bsoncore.DocumentSequence{ + Style: bsoncore.SequenceStyle, + Data: data, + } + } + + sm := &SentMessage{ + Command: commandDoc, + ReadPreference: rpDoc, + DocumentSequence: docSequence, + } + return sm, nil +} + +func parseSentOpCompressed(wm []byte) (*SentMessage, error) { + originalOpcode, wm, err := parseOpCompressed(wm) + if err != nil { + return nil, err + } + + parser, ok := getSentMessageParser(originalOpcode) + if !ok { + return nil, fmt.Errorf("unknown original opcode %v", originalOpcode) + } + return parser(wm) +} diff --git a/vendor/go.mongodb.org/mongo-driver/mongo/integration/mtest/setup.go b/vendor/go.mongodb.org/mongo-driver/mongo/integration/mtest/setup.go new file mode 100644 index 000000000..cdd33219c --- /dev/null +++ b/vendor/go.mongodb.org/mongo-driver/mongo/integration/mtest/setup.go @@ -0,0 +1,368 @@ +// Copyright (C) MongoDB, Inc. 2017-present. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +package mtest + +import ( + "context" + "errors" + "fmt" + "math" + "os" + "strconv" + "strings" + "time" + + "go.mongodb.org/mongo-driver/bson" + "go.mongodb.org/mongo-driver/internal/integtest" + "go.mongodb.org/mongo-driver/mongo" + "go.mongodb.org/mongo-driver/mongo/description" + "go.mongodb.org/mongo-driver/mongo/options" + "go.mongodb.org/mongo-driver/mongo/readpref" + "go.mongodb.org/mongo-driver/mongo/writeconcern" + "go.mongodb.org/mongo-driver/x/mongo/driver" + "go.mongodb.org/mongo-driver/x/mongo/driver/connstring" + "go.mongodb.org/mongo-driver/x/mongo/driver/topology" +) + +const ( + // TestDb specifies the name of default test database. + TestDb = "test" +) + +// testContext holds the global context for the integration tests. The testContext members should only be initialized +// once during the global setup in TestMain. These variables should only be accessed indirectly through MongoTest +// instances. +var testContext struct { + connString *connstring.ConnString + topo *topology.Topology + topoKind TopologyKind + // shardedReplicaSet will be true if we're connected to a sharded cluster and each shard is backed by a replica set. + // We track this as a separate boolean rather than setting topoKind to ShardedReplicaSet because a general + // "Sharded" constraint in a test should match both Sharded and ShardedReplicaSet. + shardedReplicaSet bool + client *mongo.Client // client used for setup and teardown + serverVersion string + authEnabled bool + sslEnabled bool + enterpriseServer bool + dataLake bool + requireAPIVersion bool + serverParameters bson.Raw + singleMongosLoadBalancerURI string + multiMongosLoadBalancerURI string + serverless bool +} + +func setupClient(opts *options.ClientOptions) (*mongo.Client, error) { + wcMajority := writeconcern.New(writeconcern.WMajority()) + // set ServerAPIOptions to latest version if required + if opts.ServerAPIOptions == nil && testContext.requireAPIVersion { + opts.SetServerAPIOptions(options.ServerAPI(driver.TestServerAPIVersion)) + } + // for sharded clusters, pin to one host. Due to how the cache is implemented on 4.0 and 4.2, behavior + // can be inconsistent when multiple mongoses are used + return mongo.Connect(context.Background(), opts.SetWriteConcern(wcMajority).SetHosts(opts.Hosts[:1])) +} + +// Setup initializes the current testing context. +// This function must only be called one time and must be called before any tests run. +func Setup(setupOpts ...*SetupOptions) error { + opts := MergeSetupOptions(setupOpts...) + + var uri string + var err error + + switch { + case opts.URI != nil: + uri = *opts.URI + default: + var err error + uri, err = integtest.MongoDBURI() + if err != nil { + return fmt.Errorf("error getting uri: %w", err) + } + } + + testContext.connString, err = connstring.ParseAndValidate(uri) + if err != nil { + return fmt.Errorf("error parsing and validating connstring: %w", err) + } + + testContext.dataLake = os.Getenv("ATLAS_DATA_LAKE_INTEGRATION_TEST") == "true" + testContext.requireAPIVersion = os.Getenv("REQUIRE_API_VERSION") == "true" + + clientOpts := options.Client().ApplyURI(uri) + integtest.AddTestServerAPIVersion(clientOpts) + + cfg, err := topology.NewConfig(clientOpts, nil) + if err != nil { + return fmt.Errorf("error constructing topology config: %w", err) + } + + testContext.topo, err = topology.New(cfg) + if err != nil { + return fmt.Errorf("error creating topology: %w", err) + } + if err = testContext.topo.Connect(); err != nil { + return fmt.Errorf("error connecting topology: %w", err) + } + + testContext.client, err = setupClient(options.Client().ApplyURI(uri)) + if err != nil { + return fmt.Errorf("error connecting test client: %w", err) + } + + pingCtx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + if err := testContext.client.Ping(pingCtx, readpref.Primary()); err != nil { + return fmt.Errorf("ping error: %w; make sure the deployment is running on URI %v", err, + testContext.connString.Original) + } + + if testContext.serverVersion, err = getServerVersion(); err != nil { + return fmt.Errorf("error getting server version: %w", err) + } + + switch testContext.topo.Kind() { + case description.Single: + testContext.topoKind = Single + case description.ReplicaSet, description.ReplicaSetWithPrimary, description.ReplicaSetNoPrimary: + testContext.topoKind = ReplicaSet + case description.Sharded: + testContext.topoKind = Sharded + case description.LoadBalanced: + testContext.topoKind = LoadBalanced + default: + return fmt.Errorf("could not detect topology kind; current topology: %s", testContext.topo.String()) + } + + // If we're connected to a sharded cluster, determine if the cluster is backed by replica sets. + if testContext.topoKind == Sharded { + // Run a find against config.shards and get each document in the collection. + cursor, err := testContext.client.Database("config").Collection("shards").Find(context.Background(), bson.D{}) + if err != nil { + return fmt.Errorf("error running find against config.shards: %w", err) + } + defer cursor.Close(context.Background()) + + var shards []struct { + Host string `bson:"host"` + } + if err := cursor.All(context.Background(), &shards); err != nil { + return fmt.Errorf("error getting results find against config.shards: %w", err) + } + + // Each document's host field will contain a single hostname if the shard is a standalone. If it's a replica + // set, the host field will be in the format "replicaSetName/host1,host2,...". Therefore, we can determine that + // the shard is a standalone if the "/" character isn't present. + var foundStandalone bool + for _, shard := range shards { + if !strings.Contains(shard.Host, "/") { + foundStandalone = true + break + } + } + if !foundStandalone { + testContext.shardedReplicaSet = true + } + } + + // For non-serverless, load balanced clusters, retrieve the required LB URIs and add additional information (e.g. TLS options) to + // them if necessary. + testContext.serverless = os.Getenv("SERVERLESS") == "serverless" + if !testContext.serverless && testContext.topoKind == LoadBalanced { + singleMongosURI := os.Getenv("SINGLE_MONGOS_LB_URI") + if singleMongosURI == "" { + return errors.New("SINGLE_MONGOS_LB_URI must be set when running against load balanced clusters") + } + testContext.singleMongosLoadBalancerURI, err = addNecessaryParamsToURI(singleMongosURI) + if err != nil { + return fmt.Errorf("error getting single mongos load balancer uri: %w", err) + } + + multiMongosURI := os.Getenv("MULTI_MONGOS_LB_URI") + if multiMongosURI == "" { + return errors.New("MULTI_MONGOS_LB_URI must be set when running against load balanced clusters") + } + testContext.multiMongosLoadBalancerURI, err = addNecessaryParamsToURI(multiMongosURI) + if err != nil { + return fmt.Errorf("error getting multi mongos load balancer uri: %w", err) + } + } + + testContext.authEnabled = os.Getenv("AUTH") == "auth" + testContext.sslEnabled = os.Getenv("SSL") == "ssl" + biRes, err := testContext.client.Database("admin").RunCommand(context.Background(), bson.D{{"buildInfo", 1}}).Raw() + if err != nil { + return fmt.Errorf("buildInfo error: %w", err) + } + modulesRaw, err := biRes.LookupErr("modules") + if err == nil { + // older server versions don't report "modules" field in buildInfo result + modules, _ := modulesRaw.Array().Values() + for _, module := range modules { + if module.StringValue() == "enterprise" { + testContext.enterpriseServer = true + break + } + } + } + + // Get server parameters if test is not running against ADL; ADL does not have "getParameter" command. + if !testContext.dataLake { + db := testContext.client.Database("admin") + testContext.serverParameters, err = db.RunCommand(context.Background(), bson.D{{"getParameter", "*"}}).Raw() + if err != nil { + return fmt.Errorf("error getting serverParameters: %w", err) + } + } + return nil +} + +// Teardown cleans up resources initialized by Setup. +// This function must be called once after all tests have finished running. +func Teardown() error { + // Dropping the test database causes an error against Atlas Data Lake. + if !testContext.dataLake { + if err := testContext.client.Database(TestDb).Drop(context.Background()); err != nil { + return fmt.Errorf("error dropping test database: %w", err) + } + } + if err := testContext.client.Disconnect(context.Background()); err != nil { + return fmt.Errorf("error disconnecting test client: %w", err) + } + if err := testContext.topo.Disconnect(context.Background()); err != nil { + return fmt.Errorf("error disconnecting test topology: %w", err) + } + return nil +} + +func getServerVersion() (string, error) { + var serverStatus bson.Raw + err := testContext.client.Database(TestDb).RunCommand( + context.Background(), + bson.D{{"buildInfo", 1}}, + ).Decode(&serverStatus) + if err != nil { + return "", err + } + + version, err := serverStatus.LookupErr("version") + if err != nil { + return "", errors.New("no version string in serverStatus response") + } + + return version.StringValue(), nil +} + +// addOptions appends connection string options to a URI. +func addOptions(uri string, opts ...string) string { + if !strings.ContainsRune(uri, '?') { + if uri[len(uri)-1] != '/' { + uri += "/" + } + + uri += "?" + } else { + uri += "&" + } + + for _, opt := range opts { + uri += opt + } + + return uri +} + +// addTLSConfig checks for the environmental variable indicating that the tests are being run +// on an SSL-enabled server, and if so, returns a new URI with the necessary configuration. +func addTLSConfig(uri string) string { + if os.Getenv("SSL") == "ssl" { + uri = addOptions(uri, "ssl=", "true") + } + caFile := os.Getenv("MONGO_GO_DRIVER_CA_FILE") + if len(caFile) == 0 { + return uri + } + + return addOptions(uri, "sslCertificateAuthorityFile=", caFile) +} + +// addCompressors checks for the environment variable indicating that the tests are being run with compression +// enabled. If so, it returns a new URI with the necessary configuration +func addCompressors(uri string) string { + comp := os.Getenv("MONGO_GO_DRIVER_COMPRESSOR") + if len(comp) == 0 { + return uri + } + + return addOptions(uri, "compressors=", comp) +} + +func addServerlessAuthCredentials(uri string) (string, error) { + if os.Getenv("SERVERLESS") != "serverless" { + return uri, nil + } + user := os.Getenv("SERVERLESS_ATLAS_USER") + if user == "" { + return "", fmt.Errorf("serverless expects SERVERLESS_ATLAS_USER to be set") + } + password := os.Getenv("SERVERLESS_ATLAS_PASSWORD") + if password == "" { + return "", fmt.Errorf("serverless expects SERVERLESS_ATLAS_PASSWORD to be set") + } + + var scheme string + // remove the scheme + switch { + case strings.HasPrefix(uri, "mongodb+srv://"): + scheme = "mongodb+srv://" + case strings.HasPrefix(uri, "mongodb://"): + scheme = "mongodb://" + default: + return "", errors.New(`scheme must be "mongodb" or "mongodb+srv"`) + } + + uri = scheme + user + ":" + password + "@" + uri[len(scheme):] + return uri, nil +} + +func addNecessaryParamsToURI(uri string) (string, error) { + uri = addTLSConfig(uri) + uri = addCompressors(uri) + return addServerlessAuthCredentials(uri) +} + +// CompareServerVersions compares two version number strings (i.e. positive integers separated by +// periods). Comparisons are done to the lesser precision of the two versions. For example, 3.2 is +// considered equal to 3.2.11, whereas 3.2.0 is considered less than 3.2.11. +// +// Returns a positive int if version1 is greater than version2, a negative int if version1 is less +// than version2, and 0 if version1 is equal to version2. +func CompareServerVersions(v1 string, v2 string) int { + n1 := strings.Split(v1, ".") + n2 := strings.Split(v2, ".") + + for i := 0; i < int(math.Min(float64(len(n1)), float64(len(n2)))); i++ { + i1, err := strconv.Atoi(n1[i]) + if err != nil { + return 1 + } + + i2, err := strconv.Atoi(n2[i]) + if err != nil { + return -1 + } + + difference := i1 - i2 + if difference != 0 { + return difference + } + } + + return 0 +} diff --git a/vendor/go.mongodb.org/mongo-driver/mongo/integration/mtest/setup_options.go b/vendor/go.mongodb.org/mongo-driver/mongo/integration/mtest/setup_options.go new file mode 100644 index 000000000..bd403a731 --- /dev/null +++ b/vendor/go.mongodb.org/mongo-driver/mongo/integration/mtest/setup_options.go @@ -0,0 +1,42 @@ +// Copyright (C) MongoDB, Inc. 2017-present. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +package mtest + +// SetupOptions is the type used to configure mtest setup +type SetupOptions struct { + // Specifies the URI to connect to. Defaults to URI based on the environment variables MONGODB_URI, + // MONGO_GO_DRIVER_CA_FILE, and MONGO_GO_DRIVER_COMPRESSOR + URI *string +} + +// NewSetupOptions creates an empty SetupOptions struct +func NewSetupOptions() *SetupOptions { + return &SetupOptions{} +} + +// SetURI sets the uri to connect to +func (so *SetupOptions) SetURI(uri string) *SetupOptions { + so.URI = &uri + return so +} + +// MergeSetupOptions combines the given *SetupOptions into a single *Options in a last one wins fashion. +// +// Deprecated: Merging options structs will not be supported in Go Driver 2.0. Users should create a +// single options struct instead. +func MergeSetupOptions(opts ...*SetupOptions) *SetupOptions { + op := NewSetupOptions() + for _, opt := range opts { + if opt == nil { + continue + } + if opt.URI != nil { + op.URI = opt.URI + } + } + return op +} diff --git a/vendor/go.mongodb.org/mongo-driver/mongo/integration/mtest/wiremessage_helpers.go b/vendor/go.mongodb.org/mongo-driver/mongo/integration/mtest/wiremessage_helpers.go new file mode 100644 index 000000000..5fd2bc9fa --- /dev/null +++ b/vendor/go.mongodb.org/mongo-driver/mongo/integration/mtest/wiremessage_helpers.go @@ -0,0 +1,67 @@ +// Copyright (C) MongoDB, Inc. 2017-present. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +package mtest + +import ( + "errors" + "fmt" + + "go.mongodb.org/mongo-driver/x/mongo/driver" + "go.mongodb.org/mongo-driver/x/mongo/driver/wiremessage" +) + +func copyBytes(original []byte) []byte { + newSlice := make([]byte, len(original)) + copy(newSlice, original) + return newSlice +} + +// assertMsgSectionType asserts that the next section type in the OP_MSG wire message is equal to the provided type. +// It returns the remainder of the wire message and an error if the section type could not be read or was not equal +// to the expected type. +func assertMsgSectionType(wm []byte, expected wiremessage.SectionType) ([]byte, error) { + var actual wiremessage.SectionType + var ok bool + + actual, wm, ok = wiremessage.ReadMsgSectionType(wm) + if !ok { + return wm, errors.New("failed to read section type") + } + if expected != actual { + return wm, fmt.Errorf("unexpected section type %v; expected %v", actual, expected) + } + return wm, nil +} + +func parseOpCompressed(wm []byte) (wiremessage.OpCode, []byte, error) { + // Store the original opcode to forward to another parser later. + originalOpcode, wm, ok := wiremessage.ReadCompressedOriginalOpCode(wm) + if !ok { + return originalOpcode, nil, errors.New("failed to read original opcode") + } + + uncompressedSize, wm, ok := wiremessage.ReadCompressedUncompressedSize(wm) + if !ok { + return originalOpcode, nil, errors.New("failed to read uncompressed size") + } + + compressorID, compressedMsg, ok := wiremessage.ReadCompressedCompressorID(wm) + if !ok { + return originalOpcode, nil, errors.New("failed to read compressor ID") + } + + opts := driver.CompressionOpts{ + Compressor: compressorID, + UncompressedSize: uncompressedSize, + } + decompressed, err := driver.DecompressPayload(compressedMsg, opts) + if err != nil { + return originalOpcode, nil, fmt.Errorf("error decompressing payload: %w", err) + } + + return originalOpcode, decompressed, nil +} diff --git a/vendor/go.mongodb.org/mongo-driver/mongo/options/clientoptions.go b/vendor/go.mongodb.org/mongo-driver/mongo/options/clientoptions.go index 17b373130..c3a9d439e 100644 --- a/vendor/go.mongodb.org/mongo-driver/mongo/options/clientoptions.go +++ b/vendor/go.mongodb.org/mongo-driver/mongo/options/clientoptions.go @@ -30,6 +30,7 @@ import ( "go.mongodb.org/mongo-driver/mongo/writeconcern" "go.mongodb.org/mongo-driver/tag" "go.mongodb.org/mongo-driver/x/mongo/driver" + "go.mongodb.org/mongo-driver/x/mongo/driver/auth" "go.mongodb.org/mongo-driver/x/mongo/driver/connstring" "go.mongodb.org/mongo-driver/x/mongo/driver/wiremessage" ) @@ -89,9 +90,9 @@ type ContextDialer interface { // The SERVICE_HOST and CANONICALIZE_HOST_NAME properties must not be used at the same time on Linux and Darwin // systems. // -// AuthSource: the name of the database to use for authentication. This defaults to "$external" for MONGODB-X509, -// GSSAPI, and PLAIN and "admin" for all other mechanisms. This can also be set through the "authSource" URI option -// (e.g. "authSource=otherDb"). +// AuthSource: the name of the database to use for authentication. This defaults to "$external" for MONGODB-AWS, +// MONGODB-OIDC, MONGODB-X509, GSSAPI, and PLAIN. It defaults to "admin" for all other auth mechanisms. This can +// also be set through the "authSource" URI option (e.g. "authSource=otherDb"). // // Username: the username for authentication. This can also be set through the URI as a username:password pair before // the first @ character. For example, a URI for user "user", password "pwd", and host "localhost:27017" would be @@ -111,6 +112,34 @@ type Credential struct { Username string Password string PasswordSet bool + OIDCMachineCallback OIDCCallback + OIDCHumanCallback OIDCCallback +} + +// OIDCCallback is the type for both Human and Machine Callback flows. +// RefreshToken will always be nil in the OIDCArgs for the Machine flow. +type OIDCCallback func(context.Context, *OIDCArgs) (*OIDCCredential, error) + +// OIDCArgs contains the arguments for the OIDC callback. +type OIDCArgs struct { + Version int + IDPInfo *IDPInfo + RefreshToken *string +} + +// OIDCCredential contains the access token and refresh token. +type OIDCCredential struct { + AccessToken string + ExpiresAt *time.Time + RefreshToken *string +} + +// IDPInfo contains the information needed to perform OIDC authentication with +// an Identity Provider. +type IDPInfo struct { + Issuer string + ClientID string + RequestScopes []string } // BSONOptions are optional BSON marshaling and unmarshaling behaviors. @@ -332,6 +361,37 @@ func (c *ClientOptions) validate() error { return fmt.Errorf("invalid server monitoring mode: %q", *mode) } + // OIDC Validation + if c.Auth != nil && c.Auth.AuthMechanism == auth.MongoDBOIDC { + if c.Auth.Password != "" { + return fmt.Errorf("password must not be set for the %s auth mechanism", auth.MongoDBOIDC) + } + if c.Auth.OIDCMachineCallback != nil && c.Auth.OIDCHumanCallback != nil { + return fmt.Errorf("cannot set both OIDCMachineCallback and OIDCHumanCallback, only one may be specified") + } + if c.Auth.OIDCHumanCallback == nil && c.Auth.AuthMechanismProperties[auth.AllowedHostsProp] != "" { + return fmt.Errorf("Cannot specify ALLOWED_HOSTS without an OIDCHumanCallback") + } + if env, ok := c.Auth.AuthMechanismProperties[auth.EnvironmentProp]; ok { + switch env { + case auth.GCPEnvironmentValue, auth.AzureEnvironmentValue: + if c.Auth.OIDCMachineCallback != nil { + return fmt.Errorf("OIDCMachineCallback cannot be specified with the %s %q", env, auth.EnvironmentProp) + } + if c.Auth.OIDCHumanCallback != nil { + return fmt.Errorf("OIDCHumanCallback cannot be specified with the %s %q", env, auth.EnvironmentProp) + } + if c.Auth.AuthMechanismProperties[auth.ResourceProp] == "" { + return fmt.Errorf("%q must be set for the %s %q", auth.ResourceProp, env, auth.EnvironmentProp) + } + default: + if c.Auth.AuthMechanismProperties[auth.ResourceProp] != "" { + return fmt.Errorf("%q must not be set for the %s %q", auth.ResourceProp, env, auth.EnvironmentProp) + } + } + } + } + return nil } @@ -1258,7 +1318,10 @@ func addClientCertFromBytes(cfg *tls.Config, data []byte, keyPasswd string) (str } } var encoded bytes.Buffer - pem.Encode(&encoded, &pem.Block{Type: currentBlock.Type, Bytes: keyBytes}) + err = pem.Encode(&encoded, &pem.Block{Type: currentBlock.Type, Bytes: keyBytes}) + if err != nil { + return "", fmt.Errorf("error encoding private key as PEM: %w", err) + } keyBlock := encoded.Bytes() keyBlocks = append(keyBlocks, keyBlock) start = len(data) - len(remaining) diff --git a/vendor/go.mongodb.org/mongo-driver/mongo/options/encryptoptions.go b/vendor/go.mongodb.org/mongo-driver/mongo/options/encryptoptions.go index 88517d0c8..68278ba45 100644 --- a/vendor/go.mongodb.org/mongo-driver/mongo/options/encryptoptions.go +++ b/vendor/go.mongodb.org/mongo-driver/mongo/options/encryptoptions.go @@ -17,13 +17,13 @@ const ( QueryTypeEquality string = "equality" ) -// RangeOptions specifies index options for a Queryable Encryption field supporting "rangePreview" queries. -// Beta: The Range algorithm is experimental only. It is not intended for public use. It is subject to breaking changes. +// RangeOptions specifies index options for a Queryable Encryption field supporting "range" queries. type RangeOptions struct { - Min *bson.RawValue - Max *bson.RawValue - Sparsity int64 - Precision *int32 + Min *bson.RawValue + Max *bson.RawValue + Sparsity *int64 + TrimFactor *int32 + Precision *int32 } // EncryptOptions represents options to explicitly encrypt a value. @@ -58,6 +58,7 @@ func (e *EncryptOptions) SetKeyAltName(keyAltName string) *EncryptOptions { // - AEAD_AES_256_CBC_HMAC_SHA_512-Random // - Indexed // - Unindexed +// - Range // This is required. // Indexed and Unindexed are used for Queryable Encryption. func (e *EncryptOptions) SetAlgorithm(algorithm string) *EncryptOptions { @@ -81,36 +82,37 @@ func (e *EncryptOptions) SetContentionFactor(contentionFactor int64) *EncryptOpt return e } -// SetRangeOptions specifies the options to use for explicit encryption with range. It is only valid to set if algorithm is "rangePreview". -// Beta: The Range algorithm is experimental only. It is not intended for public use. It is subject to breaking changes. +// SetRangeOptions specifies the options to use for explicit encryption with range. It is only valid to set if algorithm is "Range". func (e *EncryptOptions) SetRangeOptions(ro RangeOptions) *EncryptOptions { e.RangeOptions = &ro return e } // SetMin sets the range index minimum value. -// Beta: The Range algorithm is experimental only. It is not intended for public use. It is subject to breaking changes. func (ro *RangeOptions) SetMin(min bson.RawValue) *RangeOptions { ro.Min = &min return ro } // SetMax sets the range index maximum value. -// Beta: The Range algorithm is experimental only. It is not intended for public use. It is subject to breaking changes. func (ro *RangeOptions) SetMax(max bson.RawValue) *RangeOptions { ro.Max = &max return ro } // SetSparsity sets the range index sparsity. -// Beta: The Range algorithm is experimental only. It is not intended for public use. It is subject to breaking changes. func (ro *RangeOptions) SetSparsity(sparsity int64) *RangeOptions { - ro.Sparsity = sparsity + ro.Sparsity = &sparsity + return ro +} + +// SetTrimFactor sets the range index trim factor. +func (ro *RangeOptions) SetTrimFactor(trimFactor int32) *RangeOptions { + ro.TrimFactor = &trimFactor return ro } // SetPrecision sets the range index precision. -// Beta: The Range algorithm is experimental only. It is not intended for public use. It is subject to breaking changes. func (ro *RangeOptions) SetPrecision(precision int32) *RangeOptions { ro.Precision = &precision return ro diff --git a/vendor/go.mongodb.org/mongo-driver/mongo/options/serverapioptions.go b/vendor/go.mongodb.org/mongo-driver/mongo/options/serverapioptions.go index 5beb795e4..4eef2e199 100644 --- a/vendor/go.mongodb.org/mongo-driver/mongo/options/serverapioptions.go +++ b/vendor/go.mongodb.org/mongo-driver/mongo/options/serverapioptions.go @@ -52,8 +52,7 @@ const ( // Validate determines if the provided ServerAPIVersion is currently supported by the driver. func (sav ServerAPIVersion) Validate() error { - switch sav { - case ServerAPIVersion1: + if sav == ServerAPIVersion1 { return nil } return fmt.Errorf("api version %q not supported; this driver version only supports API version \"1\"", sav) diff --git a/vendor/go.mongodb.org/mongo-driver/mongo/search_index_view.go b/vendor/go.mongodb.org/mongo-driver/mongo/search_index_view.go index 73fe8534e..3253a73a2 100644 --- a/vendor/go.mongodb.org/mongo-driver/mongo/search_index_view.go +++ b/vendor/go.mongodb.org/mongo-driver/mongo/search_index_view.go @@ -143,7 +143,7 @@ func (siv SearchIndexView) CreateMany( ServerSelector(selector).ClusterClock(siv.coll.client.clock). Collection(siv.coll.name).Database(siv.coll.db.name). Deployment(siv.coll.client.deployment).ServerAPI(siv.coll.client.serverAPI). - Timeout(siv.coll.client.timeout) + Timeout(siv.coll.client.timeout).Authenticator(siv.coll.client.authenticator) err = op.Execute(ctx) if err != nil { @@ -198,7 +198,7 @@ func (siv SearchIndexView) DropOne( ServerSelector(selector).ClusterClock(siv.coll.client.clock). Collection(siv.coll.name).Database(siv.coll.db.name). Deployment(siv.coll.client.deployment).ServerAPI(siv.coll.client.serverAPI). - Timeout(siv.coll.client.timeout) + Timeout(siv.coll.client.timeout).Authenticator(siv.coll.client.authenticator) err = op.Execute(ctx) if de, ok := err.(driver.Error); ok && de.NamespaceNotFound() { @@ -252,7 +252,7 @@ func (siv SearchIndexView) UpdateOne( ServerSelector(selector).ClusterClock(siv.coll.client.clock). Collection(siv.coll.name).Database(siv.coll.db.name). Deployment(siv.coll.client.deployment).ServerAPI(siv.coll.client.serverAPI). - Timeout(siv.coll.client.timeout) + Timeout(siv.coll.client.timeout).Authenticator(siv.coll.client.authenticator) return op.Execute(ctx) } diff --git a/vendor/go.mongodb.org/mongo-driver/mongo/session.go b/vendor/go.mongodb.org/mongo-driver/mongo/session.go index 8f1e029b9..77be4ab6d 100644 --- a/vendor/go.mongodb.org/mongo-driver/mongo/session.go +++ b/vendor/go.mongodb.org/mongo-driver/mongo/session.go @@ -296,7 +296,8 @@ func (s *sessionImpl) AbortTransaction(ctx context.Context) error { _ = operation.NewAbortTransaction().Session(s.clientSession).ClusterClock(s.client.clock).Database("admin"). Deployment(s.deployment).WriteConcern(s.clientSession.CurrentWc).ServerSelector(selector). Retry(driver.RetryOncePerCommand).CommandMonitor(s.client.monitor). - RecoveryToken(bsoncore.Document(s.clientSession.RecoveryToken)).ServerAPI(s.client.serverAPI).Execute(ctx) + RecoveryToken(bsoncore.Document(s.clientSession.RecoveryToken)).ServerAPI(s.client.serverAPI). + Authenticator(s.client.authenticator).Execute(ctx) s.clientSession.Aborting = false _ = s.clientSession.AbortTransaction() @@ -328,7 +329,7 @@ func (s *sessionImpl) CommitTransaction(ctx context.Context) error { Session(s.clientSession).ClusterClock(s.client.clock).Database("admin").Deployment(s.deployment). WriteConcern(s.clientSession.CurrentWc).ServerSelector(selector).Retry(driver.RetryOncePerCommand). CommandMonitor(s.client.monitor).RecoveryToken(bsoncore.Document(s.clientSession.RecoveryToken)). - ServerAPI(s.client.serverAPI).MaxTime(s.clientSession.CurrentMct) + ServerAPI(s.client.serverAPI).MaxTime(s.clientSession.CurrentMct).Authenticator(s.client.authenticator) err = op.Execute(ctx) // Return error without updating transaction state if it is a timeout, as the transaction has not diff --git a/vendor/go.mongodb.org/mongo-driver/version/version.go b/vendor/go.mongodb.org/mongo-driver/version/version.go index 659d48d7a..3dc006afd 100644 --- a/vendor/go.mongodb.org/mongo-driver/version/version.go +++ b/vendor/go.mongodb.org/mongo-driver/version/version.go @@ -8,4 +8,4 @@ package version // import "go.mongodb.org/mongo-driver/version" // Driver is the current version of the driver. -var Driver = "1.16.0" +var Driver = "1.17.1" diff --git a/vendor/go.mongodb.org/mongo-driver/x/bsonx/bsoncore/element.go b/vendor/go.mongodb.org/mongo-driver/x/bsonx/bsoncore/element.go index 1fe0897c9..9c1ab2ae9 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/bsonx/bsoncore/element.go +++ b/vendor/go.mongodb.org/mongo-driver/x/bsonx/bsoncore/element.go @@ -51,7 +51,7 @@ func (e Element) KeyErr() (string, error) { // KeyBytesErr returns the key for this element as a []byte, returning an error if the element is // not valid. func (e Element) KeyBytesErr() ([]byte, error) { - if len(e) <= 0 { + if len(e) == 0 { return nil, ErrElementMissingType } idx := bytes.IndexByte(e[1:], 0x00) @@ -99,7 +99,7 @@ func (e Element) Value() Value { // ValueErr returns the value for this element, returning an error if the element is not valid. func (e Element) ValueErr() (Value, error) { - if len(e) <= 0 { + if len(e) == 0 { return Value{}, ErrElementMissingType } idx := bytes.IndexByte(e[1:], 0x00) @@ -116,7 +116,7 @@ func (e Element) ValueErr() (Value, error) { // String implements the fmt.String interface. The output will be in extended JSON format. func (e Element) String() string { - if len(e) <= 0 { + if len(e) == 0 { return "" } t := bsontype.Type(e[0]) @@ -135,7 +135,7 @@ func (e Element) String() string { // DebugString outputs a human readable version of RawElement. It will attempt to stringify the // valid components of the element even if the entire element is not valid. func (e Element) DebugString() string { - if len(e) <= 0 { + if len(e) == 0 { return "" } t := bsontype.Type(e[0]) diff --git a/vendor/go.mongodb.org/mongo-driver/x/bsonx/bsoncore/value.go b/vendor/go.mongodb.org/mongo-driver/x/bsonx/bsoncore/value.go index 69c1f9edb..fcb0428bb 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/bsonx/bsoncore/value.go +++ b/vendor/go.mongodb.org/mongo-driver/x/bsonx/bsoncore/value.go @@ -924,13 +924,14 @@ func escapeString(s string) string { func formatDouble(f float64) string { var s string - if math.IsInf(f, 1) { + switch { + case math.IsInf(f, 1): s = "Infinity" - } else if math.IsInf(f, -1) { + case math.IsInf(f, -1): s = "-Infinity" - } else if math.IsNaN(f) { + case math.IsNaN(f): s = "NaN" - } else { + default: // Print exactly one decimalType place for integers; otherwise, print as many are necessary to // perfectly represent it. s = strconv.FormatFloat(f, 'G', -1, 64) @@ -953,9 +954,7 @@ func (ss sortableString) Less(i, j int) bool { } func (ss sortableString) Swap(i, j int) { - oldI := ss[i] - ss[i] = ss[j] - ss[j] = oldI + ss[i], ss[j] = ss[j], ss[i] } func sortStringAlphebeticAscending(s string) string { diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/auth.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/auth.go index 6eeaf0ee0..34a4a68f5 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/auth.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/auth.go @@ -19,8 +19,13 @@ import ( "go.mongodb.org/mongo-driver/x/mongo/driver/session" ) +const sourceExternal = "$external" + +// Config contains the configuration for an Authenticator. +type Config = driver.AuthConfig + // AuthenticatorFactory constructs an authenticator. -type AuthenticatorFactory func(cred *Cred) (Authenticator, error) +type AuthenticatorFactory func(*Cred, *http.Client) (Authenticator, error) var authFactories = make(map[string]AuthenticatorFactory) @@ -33,12 +38,13 @@ func init() { RegisterAuthenticatorFactory(GSSAPI, newGSSAPIAuthenticator) RegisterAuthenticatorFactory(MongoDBX509, newMongoDBX509Authenticator) RegisterAuthenticatorFactory(MongoDBAWS, newMongoDBAWSAuthenticator) + RegisterAuthenticatorFactory(MongoDBOIDC, newOIDCAuthenticator) } // CreateAuthenticator creates an authenticator. -func CreateAuthenticator(name string, cred *Cred) (Authenticator, error) { +func CreateAuthenticator(name string, cred *Cred, httpClient *http.Client) (Authenticator, error) { if f, ok := authFactories[name]; ok { - return f(cred) + return f(cred, httpClient) } return nil, newAuthError(fmt.Sprintf("unknown authenticator: %s", name), nil) @@ -61,7 +67,6 @@ type HandshakeOptions struct { ClusterClock *session.ClusterClock ServerAPI *driver.ServerAPIOptions LoadBalanced bool - HTTPClient *http.Client } type authHandshaker struct { @@ -97,12 +102,17 @@ func (ah *authHandshaker) GetHandshakeInformation(ctx context.Context, addr addr return driver.HandshakeInformation{}, newAuthError("failed to create conversation", err) } - firstMsg, err := ah.conversation.FirstMessage() - if err != nil { - return driver.HandshakeInformation{}, newAuthError("failed to create speculative authentication message", err) - } + // It is possible for the speculative conversation to be nil even without error if the authenticator + // cannot perform speculative authentication. An example of this is MONGODB-OIDC when there is + // no AccessToken in the cache. + if ah.conversation != nil { + firstMsg, err := ah.conversation.FirstMessage() + if err != nil { + return driver.HandshakeInformation{}, newAuthError("failed to create speculative authentication message", err) + } - op = op.SpeculativeAuthenticate(firstMsg) + op = op.SpeculativeAuthenticate(firstMsg) + } } } @@ -132,7 +142,6 @@ func (ah *authHandshaker) FinishHandshake(ctx context.Context, conn driver.Conne ClusterClock: ah.options.ClusterClock, HandshakeInfo: ah.handshakeInfo, ServerAPI: ah.options.ServerAPI, - HTTPClient: ah.options.HTTPClient, } if err := ah.authenticate(ctx, cfg); err != nil { @@ -170,21 +179,8 @@ func Handshaker(h driver.Handshaker, options *HandshakeOptions) driver.Handshake } } -// Config holds the information necessary to perform an authentication attempt. -type Config struct { - Description description.Server - Connection driver.Connection - ClusterClock *session.ClusterClock - HandshakeInfo driver.HandshakeInformation - ServerAPI *driver.ServerAPIOptions - HTTPClient *http.Client -} - // Authenticator handles authenticating a connection. -type Authenticator interface { - // Auth authenticates the connection. - Auth(context.Context, *Config) error -} +type Authenticator = driver.Authenticator func newAuthError(msg string, inner error) error { return &Error{ diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/cred.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/cred.go index 7b2b8f17d..a9685f6ed 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/cred.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/cred.go @@ -6,11 +6,9 @@ package auth -// Cred is a user's credential. -type Cred struct { - Source string - Username string - Password string - PasswordSet bool - Props map[string]string -} +import ( + "go.mongodb.org/mongo-driver/x/mongo/driver" +) + +// Cred is the type of user credential +type Cred = driver.Cred diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/default.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/default.go index 6f2ca5224..785a41951 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/default.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/default.go @@ -9,10 +9,13 @@ package auth import ( "context" "fmt" + "net/http" + + "go.mongodb.org/mongo-driver/x/mongo/driver" ) -func newDefaultAuthenticator(cred *Cred) (Authenticator, error) { - scram, err := newScramSHA256Authenticator(cred) +func newDefaultAuthenticator(cred *Cred, httpClient *http.Client) (Authenticator, error) { + scram, err := newScramSHA256Authenticator(cred, httpClient) if err != nil { return nil, newAuthError("failed to create internal authenticator", err) } @@ -25,6 +28,7 @@ func newDefaultAuthenticator(cred *Cred) (Authenticator, error) { return &DefaultAuthenticator{ Cred: cred, speculativeAuthenticator: speculative, + httpClient: httpClient, }, nil } @@ -36,6 +40,8 @@ type DefaultAuthenticator struct { // The authenticator to use for speculative authentication. Because the correct auth mechanism is unknown when doing // the initial hello, SCRAM-SHA-256 is used for the speculative attempt. speculativeAuthenticator SpeculativeAuthenticator + + httpClient *http.Client } var _ SpeculativeAuthenticator = (*DefaultAuthenticator)(nil) @@ -52,11 +58,11 @@ func (a *DefaultAuthenticator) Auth(ctx context.Context, cfg *Config) error { switch chooseAuthMechanism(cfg) { case SCRAMSHA256: - actual, err = newScramSHA256Authenticator(a.Cred) + actual, err = newScramSHA256Authenticator(a.Cred, a.httpClient) case SCRAMSHA1: - actual, err = newScramSHA1Authenticator(a.Cred) + actual, err = newScramSHA1Authenticator(a.Cred, a.httpClient) default: - actual, err = newMongoDBCRAuthenticator(a.Cred) + actual, err = newMongoDBCRAuthenticator(a.Cred, a.httpClient) } if err != nil { @@ -66,6 +72,11 @@ func (a *DefaultAuthenticator) Auth(ctx context.Context, cfg *Config) error { return actual.Auth(ctx, cfg) } +// Reauth reauthenticates the connection. +func (a *DefaultAuthenticator) Reauth(_ context.Context, _ *driver.AuthConfig) error { + return newAuthError("DefaultAuthenticator does not support reauthentication", nil) +} + // If a server provides a list of supported mechanisms, we choose // SCRAM-SHA-256 if it exists or else MUST use SCRAM-SHA-1. // Otherwise, we decide based on what is supported. diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/gssapi.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/gssapi.go index 4b860ba63..b342e9a7d 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/gssapi.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/gssapi.go @@ -14,15 +14,17 @@ import ( "context" "fmt" "net" + "net/http" + "go.mongodb.org/mongo-driver/x/mongo/driver" "go.mongodb.org/mongo-driver/x/mongo/driver/auth/internal/gssapi" ) // GSSAPI is the mechanism name for GSSAPI. const GSSAPI = "GSSAPI" -func newGSSAPIAuthenticator(cred *Cred) (Authenticator, error) { - if cred.Source != "" && cred.Source != "$external" { +func newGSSAPIAuthenticator(cred *Cred, _ *http.Client) (Authenticator, error) { + if cred.Source != "" && cred.Source != sourceExternal { return nil, newAuthError("GSSAPI source must be empty or $external", nil) } @@ -55,5 +57,10 @@ func (a *GSSAPIAuthenticator) Auth(ctx context.Context, cfg *Config) error { if err != nil { return newAuthError("error creating gssapi", err) } - return ConductSaslConversation(ctx, cfg, "$external", client) + return ConductSaslConversation(ctx, cfg, sourceExternal, client) +} + +// Reauth reauthenticates the connection. +func (a *GSSAPIAuthenticator) Reauth(_ context.Context, _ *driver.AuthConfig) error { + return newAuthError("GSSAPI does not support reauthentication", nil) } diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/gssapi_not_enabled.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/gssapi_not_enabled.go index 7ba5fe860..e50553c7a 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/gssapi_not_enabled.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/gssapi_not_enabled.go @@ -9,9 +9,11 @@ package auth +import "net/http" + // GSSAPI is the mechanism name for GSSAPI. const GSSAPI = "GSSAPI" -func newGSSAPIAuthenticator(*Cred) (Authenticator, error) { +func newGSSAPIAuthenticator(*Cred, *http.Client) (Authenticator, error) { return nil, newAuthError("GSSAPI support not enabled during build (-tags gssapi)", nil) } diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/gssapi_not_supported.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/gssapi_not_supported.go index 10312c228..12046ff67 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/gssapi_not_supported.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/gssapi_not_supported.go @@ -11,12 +11,13 @@ package auth import ( "fmt" + "net/http" "runtime" ) // GSSAPI is the mechanism name for GSSAPI. const GSSAPI = "GSSAPI" -func newGSSAPIAuthenticator(cred *Cred) (Authenticator, error) { +func newGSSAPIAuthenticator(*Cred, *http.Client) (Authenticator, error) { return nil, newAuthError(fmt.Sprintf("GSSAPI is not supported on %s", runtime.GOOS), nil) } diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/internal/gssapi/gss.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/internal/gssapi/gss.go index abfa4db47..496057882 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/internal/gssapi/gss.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/internal/gssapi/gss.go @@ -19,6 +19,7 @@ package gssapi */ import "C" import ( + "context" "fmt" "runtime" "strings" @@ -91,12 +92,12 @@ func (sc *SaslClient) Start() (string, []byte, error) { return mechName, nil, sc.getError("unable to initialize client") } - payload, err := sc.Next(nil) + payload, err := sc.Next(nil, nil) return mechName, payload, err } -func (sc *SaslClient) Next(challenge []byte) ([]byte, error) { +func (sc *SaslClient) Next(_ context.Context, challenge []byte) ([]byte, error) { var buf unsafe.Pointer var bufLen C.size_t diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/internal/gssapi/sspi.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/internal/gssapi/sspi.go index 6e7d3ed8a..f1da5a852 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/internal/gssapi/sspi.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/internal/gssapi/sspi.go @@ -12,6 +12,7 @@ package gssapi // #include "sspi_wrapper.h" import "C" import ( + "context" "fmt" "net" "strconv" @@ -115,12 +116,12 @@ func (sc *SaslClient) Start() (string, []byte, error) { return mechName, nil, sc.getError("unable to initialize client") } - payload, err := sc.Next(nil) + payload, err := sc.Next(nil, nil) return mechName, payload, err } -func (sc *SaslClient) Next(challenge []byte) ([]byte, error) { +func (sc *SaslClient) Next(_ context.Context, challenge []byte) ([]byte, error) { var outBuf C.PVOID var outBufLen C.ULONG diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/mongodbaws.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/mongodbaws.go index 7ae4b0899..679c54e9f 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/mongodbaws.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/mongodbaws.go @@ -9,57 +9,62 @@ package auth import ( "context" "errors" + "net/http" "go.mongodb.org/mongo-driver/internal/aws/credentials" "go.mongodb.org/mongo-driver/internal/credproviders" + "go.mongodb.org/mongo-driver/x/mongo/driver" "go.mongodb.org/mongo-driver/x/mongo/driver/auth/creds" ) // MongoDBAWS is the mechanism name for MongoDBAWS. const MongoDBAWS = "MONGODB-AWS" -func newMongoDBAWSAuthenticator(cred *Cred) (Authenticator, error) { - if cred.Source != "" && cred.Source != "$external" { +func newMongoDBAWSAuthenticator(cred *Cred, httpClient *http.Client) (Authenticator, error) { + if cred.Source != "" && cred.Source != sourceExternal { return nil, newAuthError("MONGODB-AWS source must be empty or $external", nil) } + if httpClient == nil { + return nil, errors.New("httpClient must not be nil") + } return &MongoDBAWSAuthenticator{ - source: cred.Source, credentials: &credproviders.StaticProvider{ Value: credentials.Value{ - ProviderName: cred.Source, AccessKeyID: cred.Username, SecretAccessKey: cred.Password, SessionToken: cred.Props["AWS_SESSION_TOKEN"], }, }, + httpClient: httpClient, }, nil } // MongoDBAWSAuthenticator uses AWS-IAM credentials over SASL to authenticate a connection. type MongoDBAWSAuthenticator struct { - source string credentials *credproviders.StaticProvider + httpClient *http.Client } // Auth authenticates the connection. func (a *MongoDBAWSAuthenticator) Auth(ctx context.Context, cfg *Config) error { - httpClient := cfg.HTTPClient - if httpClient == nil { - return errors.New("cfg.HTTPClient must not be nil") - } - providers := creds.NewAWSCredentialProvider(httpClient, a.credentials) + providers := creds.NewAWSCredentialProvider(a.httpClient, a.credentials) adapter := &awsSaslAdapter{ conversation: &awsConversation{ credentials: providers.Cred, }, } - err := ConductSaslConversation(ctx, cfg, a.source, adapter) + err := ConductSaslConversation(ctx, cfg, sourceExternal, adapter) if err != nil { return newAuthError("sasl conversation error", err) } return nil } +// Reauth reauthenticates the connection. +func (a *MongoDBAWSAuthenticator) Reauth(_ context.Context, _ *driver.AuthConfig) error { + return newAuthError("AWS authentication does not support reauthentication", nil) +} + type awsSaslAdapter struct { conversation *awsConversation } @@ -74,7 +79,7 @@ func (a *awsSaslAdapter) Start() (string, []byte, error) { return MongoDBAWS, step, nil } -func (a *awsSaslAdapter) Next(challenge []byte) ([]byte, error) { +func (a *awsSaslAdapter) Next(_ context.Context, challenge []byte) ([]byte, error) { step, err := a.conversation.Step(challenge) if err != nil { return nil, err diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/mongodbcr.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/mongodbcr.go index 6e2c2f4dc..1861956b7 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/mongodbcr.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/mongodbcr.go @@ -10,6 +10,7 @@ import ( "context" "fmt" "io" + "net/http" // Ignore gosec warning "Blocklisted import crypto/md5: weak cryptographic primitive". We need // to use MD5 here to implement the MONGODB-CR specification. @@ -28,9 +29,13 @@ import ( // MongoDB 4.0. const MONGODBCR = "MONGODB-CR" -func newMongoDBCRAuthenticator(cred *Cred) (Authenticator, error) { +func newMongoDBCRAuthenticator(cred *Cred, _ *http.Client) (Authenticator, error) { + source := cred.Source + if source == "" { + source = "admin" + } return &MongoDBCRAuthenticator{ - DB: cred.Source, + DB: source, Username: cred.Username, Password: cred.Password, }, nil @@ -97,6 +102,11 @@ func (a *MongoDBCRAuthenticator) Auth(ctx context.Context, cfg *Config) error { return nil } +// Reauth reauthenticates the connection. +func (a *MongoDBCRAuthenticator) Reauth(_ context.Context, _ *driver.AuthConfig) error { + return newAuthError("MONGODB-CR does not support reauthentication", nil) +} + func (a *MongoDBCRAuthenticator) createKey(nonce string) string { // Ignore gosec warning "Use of weak cryptographic primitive". We need to use MD5 here to // implement the MONGODB-CR specification. diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/oidc.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/oidc.go new file mode 100644 index 000000000..13fd10ec3 --- /dev/null +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/oidc.go @@ -0,0 +1,556 @@ +// Copyright (C) MongoDB, Inc. 2024-present. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +package auth + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "regexp" + "strings" + "sync" + "time" + + "go.mongodb.org/mongo-driver/bson" + "go.mongodb.org/mongo-driver/x/bsonx/bsoncore" + "go.mongodb.org/mongo-driver/x/mongo/driver" +) + +// MongoDBOIDC is the string constant for the MONGODB-OIDC authentication mechanism. +const MongoDBOIDC = "MONGODB-OIDC" + +// EnvironmentProp is the property key name that specifies the environment for the OIDC authenticator. +const EnvironmentProp = "ENVIRONMENT" + +// ResourceProp is the property key name that specifies the token resource for GCP and AZURE OIDC auth. +const ResourceProp = "TOKEN_RESOURCE" + +// AllowedHostsProp is the property key name that specifies the allowed hosts for the OIDC authenticator. +const AllowedHostsProp = "ALLOWED_HOSTS" + +// AzureEnvironmentValue is the value for the Azure environment. +const AzureEnvironmentValue = "azure" + +// GCPEnvironmentValue is the value for the GCP environment. +const GCPEnvironmentValue = "gcp" + +// TestEnvironmentValue is the value for the test environment. +const TestEnvironmentValue = "test" + +const apiVersion = 1 +const invalidateSleepTimeout = 100 * time.Millisecond + +// The CSOT specification says to apply a 1-minute timeout if "CSOT is not applied". That's +// ambiguous for the v1.x Go Driver because it could mean either "no timeout provided" or "CSOT not +// enabled". Always use a maximum timeout duration of 1 minute, allowing us to ignore the ambiguity. +// Contexts with a shorter timeout are unaffected. +const machineCallbackTimeout = time.Minute +const humanCallbackTimeout = 5 * time.Minute + +var defaultAllowedHosts = []*regexp.Regexp{ + regexp.MustCompile(`^.*[.]mongodb[.]net(:\d+)?$`), + regexp.MustCompile(`^.*[.]mongodb-qa[.]net(:\d+)?$`), + regexp.MustCompile(`^.*[.]mongodb-dev[.]net(:\d+)?$`), + regexp.MustCompile(`^.*[.]mongodbgov[.]net(:\d+)?$`), + regexp.MustCompile(`^localhost(:\d+)?$`), + regexp.MustCompile(`^127[.]0[.]0[.]1(:\d+)?$`), + regexp.MustCompile(`^::1(:\d+)?$`), +} + +// OIDCCallback is a function that takes a context and OIDCArgs and returns an OIDCCredential. +type OIDCCallback = driver.OIDCCallback + +// OIDCArgs contains the arguments for the OIDC callback. +type OIDCArgs = driver.OIDCArgs + +// OIDCCredential contains the access token and refresh token. +type OIDCCredential = driver.OIDCCredential + +// IDPInfo contains the information needed to perform OIDC authentication with an Identity Provider. +type IDPInfo = driver.IDPInfo + +var _ driver.Authenticator = (*OIDCAuthenticator)(nil) +var _ SpeculativeAuthenticator = (*OIDCAuthenticator)(nil) +var _ SaslClient = (*oidcOneStep)(nil) +var _ SaslClient = (*oidcTwoStep)(nil) + +// OIDCAuthenticator is synchronized and handles caching of the access token, refreshToken, +// and IDPInfo. It also provides a mechanism to refresh the access token, but this functionality +// is only for the OIDC Human flow. +type OIDCAuthenticator struct { + mu sync.Mutex // Guards all of the info in the OIDCAuthenticator struct. + + AuthMechanismProperties map[string]string + OIDCMachineCallback OIDCCallback + OIDCHumanCallback OIDCCallback + + allowedHosts *[]*regexp.Regexp + userName string + httpClient *http.Client + accessToken string + refreshToken *string + idpInfo *IDPInfo + tokenGenID uint64 +} + +// SetAccessToken allows for manually setting the access token for the OIDCAuthenticator, this is +// only for testing purposes. +func (oa *OIDCAuthenticator) SetAccessToken(accessToken string) { + oa.mu.Lock() + defer oa.mu.Unlock() + oa.accessToken = accessToken +} + +func newOIDCAuthenticator(cred *Cred, httpClient *http.Client) (Authenticator, error) { + if cred.Source != "" && cred.Source != sourceExternal { + return nil, newAuthError("MONGODB-OIDC source must be empty or $external", nil) + } + if cred.Password != "" { + return nil, fmt.Errorf("password cannot be specified for %q", MongoDBOIDC) + } + if cred.Props != nil { + if env, ok := cred.Props[EnvironmentProp]; ok { + switch strings.ToLower(env) { + case AzureEnvironmentValue: + fallthrough + case GCPEnvironmentValue: + if _, ok := cred.Props[ResourceProp]; !ok { + return nil, fmt.Errorf("%q must be specified for %q %q", ResourceProp, env, EnvironmentProp) + } + fallthrough + case TestEnvironmentValue: + if cred.OIDCMachineCallback != nil || cred.OIDCHumanCallback != nil { + return nil, fmt.Errorf("OIDC callbacks are not allowed for %q %q", env, EnvironmentProp) + } + } + } + } + oa := &OIDCAuthenticator{ + userName: cred.Username, + httpClient: httpClient, + AuthMechanismProperties: cred.Props, + OIDCMachineCallback: cred.OIDCMachineCallback, + OIDCHumanCallback: cred.OIDCHumanCallback, + } + err := oa.setAllowedHosts() + return oa, err +} + +func createPatternsForGlobs(hosts []string) ([]*regexp.Regexp, error) { + var err error + ret := make([]*regexp.Regexp, len(hosts)) + for i := range hosts { + hosts[i] = strings.ReplaceAll(hosts[i], ".", "[.]") + hosts[i] = strings.ReplaceAll(hosts[i], "*", ".*") + hosts[i] = "^" + hosts[i] + "(:\\d+)?$" + ret[i], err = regexp.Compile(hosts[i]) + if err != nil { + return nil, err + } + } + return ret, nil +} + +func (oa *OIDCAuthenticator) setAllowedHosts() error { + if oa.AuthMechanismProperties == nil { + oa.allowedHosts = &defaultAllowedHosts + return nil + } + + allowedHosts, ok := oa.AuthMechanismProperties[AllowedHostsProp] + if !ok { + oa.allowedHosts = &defaultAllowedHosts + return nil + } + globs := strings.Split(allowedHosts, ",") + ret, err := createPatternsForGlobs(globs) + if err != nil { + return err + } + oa.allowedHosts = &ret + return nil +} + +func (oa *OIDCAuthenticator) validateConnectionAddressWithAllowedHosts(conn driver.Connection) error { + if oa.allowedHosts == nil { + // should be unreachable, but this is a safety check. + return newAuthError(fmt.Sprintf("%q missing", AllowedHostsProp), nil) + } + allowedHosts := *oa.allowedHosts + if len(allowedHosts) == 0 { + return newAuthError(fmt.Sprintf("empty %q specified", AllowedHostsProp), nil) + } + for _, pattern := range allowedHosts { + if pattern.MatchString(string(conn.Address())) { + return nil + } + } + return newAuthError(fmt.Sprintf("address %q not allowed by %q: %v", conn.Address(), AllowedHostsProp, allowedHosts), nil) +} + +type oidcOneStep struct { + userName string + accessToken string +} + +type oidcTwoStep struct { + conn driver.Connection + oa *OIDCAuthenticator +} + +func jwtStepRequest(accessToken string) []byte { + return bsoncore.NewDocumentBuilder(). + AppendString("jwt", accessToken). + Build() +} + +func principalStepRequest(principal string) []byte { + doc := bsoncore.NewDocumentBuilder() + if principal != "" { + doc.AppendString("n", principal) + } + return doc.Build() +} + +func (oos *oidcOneStep) Start() (string, []byte, error) { + return MongoDBOIDC, jwtStepRequest(oos.accessToken), nil +} + +func (oos *oidcOneStep) Next(context.Context, []byte) ([]byte, error) { + return nil, newAuthError("unexpected step in OIDC authentication", nil) +} + +func (*oidcOneStep) Completed() bool { + return true +} + +func (ots *oidcTwoStep) Start() (string, []byte, error) { + return MongoDBOIDC, principalStepRequest(ots.oa.userName), nil +} + +func (ots *oidcTwoStep) Next(ctx context.Context, msg []byte) ([]byte, error) { + var idpInfo IDPInfo + err := bson.Unmarshal(msg, &idpInfo) + if err != nil { + return nil, fmt.Errorf("error unmarshaling BSON document: %w", err) + } + + accessToken, err := ots.oa.getAccessToken(ctx, + ots.conn, + &OIDCArgs{ + Version: apiVersion, + // idpInfo is nil for machine callbacks in the current spec. + IDPInfo: &idpInfo, + // there is no way there could be a refresh token when there is no IDPInfo. + RefreshToken: nil, + }, + // two-step callbacks are always human callbacks. + ots.oa.OIDCHumanCallback) + + return jwtStepRequest(accessToken), err +} + +func (*oidcTwoStep) Completed() bool { + return true +} + +func (oa *OIDCAuthenticator) providerCallback() (OIDCCallback, error) { + env, ok := oa.AuthMechanismProperties[EnvironmentProp] + if !ok { + return nil, nil + } + + switch env { + case AzureEnvironmentValue: + resource, ok := oa.AuthMechanismProperties[ResourceProp] + if !ok { + return nil, newAuthError(fmt.Sprintf("%q must be specified for Azure OIDC", ResourceProp), nil) + } + return getAzureOIDCCallback(oa.userName, resource, oa.httpClient), nil + case GCPEnvironmentValue: + resource, ok := oa.AuthMechanismProperties[ResourceProp] + if !ok { + return nil, newAuthError(fmt.Sprintf("%q must be specified for GCP OIDC", ResourceProp), nil) + } + return getGCPOIDCCallback(resource, oa.httpClient), nil + } + + return nil, fmt.Errorf("%q %q not supported for MONGODB-OIDC", EnvironmentProp, env) +} + +// getAzureOIDCCallback returns the callback for the Azure Identity Provider. +func getAzureOIDCCallback(clientID string, resource string, httpClient *http.Client) OIDCCallback { + // return the callback parameterized by the clientID and resource, also passing in the user + // configured httpClient. + return func(ctx context.Context, _ *OIDCArgs) (*OIDCCredential, error) { + resource = url.QueryEscape(resource) + var uri string + if clientID != "" { + uri = fmt.Sprintf("http://169.254.169.254/metadata/identity/oauth2/token?api-version=2018-02-01&resource=%s&client_id=%s", resource, clientID) + } else { + uri = fmt.Sprintf("http://169.254.169.254/metadata/identity/oauth2/token?api-version=2018-02-01&resource=%s", resource) + } + req, err := http.NewRequestWithContext(ctx, http.MethodGet, uri, nil) + if err != nil { + return nil, newAuthError("error creating http request to Azure Identity Provider", err) + } + req.Header.Add("Metadata", "true") + req.Header.Add("Accept", "application/json") + resp, err := httpClient.Do(req) + if err != nil { + return nil, newAuthError("error getting access token from Azure Identity Provider", err) + } + defer resp.Body.Close() + var azureResp struct { + AccessToken string `json:"access_token"` + ExpiresOn int64 `json:"expires_on,string"` + } + + if resp.StatusCode != http.StatusOK { + return nil, newAuthError(fmt.Sprintf("failed to get a valid response from Azure Identity Provider, http code: %d", resp.StatusCode), nil) + } + err = json.NewDecoder(resp.Body).Decode(&azureResp) + if err != nil { + return nil, newAuthError("failed parsing result from Azure Identity Provider", err) + } + expireTime := time.Unix(azureResp.ExpiresOn, 0) + return &OIDCCredential{ + AccessToken: azureResp.AccessToken, + ExpiresAt: &expireTime, + }, nil + } +} + +// getGCPOIDCCallback returns the callback for the GCP Identity Provider. +func getGCPOIDCCallback(resource string, httpClient *http.Client) OIDCCallback { + // return the callback parameterized by the clientID and resource, also passing in the user + // configured httpClient. + return func(ctx context.Context, _ *OIDCArgs) (*OIDCCredential, error) { + resource = url.QueryEscape(resource) + uri := fmt.Sprintf("http://metadata/computeMetadata/v1/instance/service-accounts/default/identity?audience=%s", resource) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, uri, nil) + if err != nil { + return nil, newAuthError("error creating http request to GCP Identity Provider", err) + } + req.Header.Add("Metadata-Flavor", "Google") + resp, err := httpClient.Do(req) + if err != nil { + return nil, newAuthError("error getting access token from GCP Identity Provider", err) + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return nil, newAuthError(fmt.Sprintf("failed to get a valid response from GCP Identity Provider, http code: %d", resp.StatusCode), nil) + } + accessToken, err := io.ReadAll(resp.Body) + if err != nil { + return nil, newAuthError("failed parsing reading response from GCP Identity Provider", err) + } + return &OIDCCredential{ + AccessToken: string(accessToken), + ExpiresAt: nil, + }, nil + } +} + +func (oa *OIDCAuthenticator) getAccessToken( + ctx context.Context, + conn driver.Connection, + args *OIDCArgs, + callback OIDCCallback, +) (string, error) { + oa.mu.Lock() + defer oa.mu.Unlock() + + if oa.accessToken != "" { + return oa.accessToken, nil + } + + // Attempt to refresh the access token if a refresh token is available. + if args.RefreshToken != nil { + cred, err := callback(ctx, args) + if err == nil && cred != nil { + oa.accessToken = cred.AccessToken + oa.tokenGenID++ + conn.SetOIDCTokenGenID(oa.tokenGenID) + oa.refreshToken = cred.RefreshToken + return cred.AccessToken, nil + } + oa.refreshToken = nil + args.RefreshToken = nil + } + // If we get here this means there either was no refresh token or the refresh token failed. + cred, err := callback(ctx, args) + if err != nil { + return "", err + } + // This line should never occur, if go conventions are followed, but it is a safety check such + // that we do not throw nil pointer errors to our users if they abuse the API. + if cred == nil { + return "", newAuthError("OIDC callback returned nil credential with no specified error", nil) + } + + oa.accessToken = cred.AccessToken + oa.tokenGenID++ + conn.SetOIDCTokenGenID(oa.tokenGenID) + oa.refreshToken = cred.RefreshToken + // always set the IdPInfo, in most cases, this should just be recopying the same pointer, or nil + // in the machine flow. + oa.idpInfo = args.IDPInfo + + return cred.AccessToken, nil +} + +// invalidateAccessToken invalidates the access token, if the force flag is set to true (which is +// only on a Reauth call) or if the tokenGenID of the connection is greater than or equal to the +// tokenGenID of the OIDCAuthenticator. It should never actually be greater than, but only equal, +// but this is a safety check, since extra invalidation is only a performance impact, not a +// correctness impact. +func (oa *OIDCAuthenticator) invalidateAccessToken(conn driver.Connection) { + oa.mu.Lock() + defer oa.mu.Unlock() + tokenGenID := conn.OIDCTokenGenID() + // If the connection used in a Reauth is a new connection it will not have a correct tokenGenID, + // it will instead be set to 0. In the absence of information, the only safe thing to do is to + // invalidate the cached accessToken. + if tokenGenID == 0 || tokenGenID >= oa.tokenGenID { + oa.accessToken = "" + conn.SetOIDCTokenGenID(0) + } +} + +// Reauth reauthenticates the connection when the server returns a 391 code. Reauth is part of the +// driver.Authenticator interface. +func (oa *OIDCAuthenticator) Reauth(ctx context.Context, cfg *Config) error { + oa.invalidateAccessToken(cfg.Connection) + return oa.Auth(ctx, cfg) +} + +// Auth authenticates the connection. +func (oa *OIDCAuthenticator) Auth(ctx context.Context, cfg *Config) error { + var err error + + if cfg == nil { + return newAuthError(fmt.Sprintf("config must be set for %q authentication", MongoDBOIDC), nil) + } + conn := cfg.Connection + + oa.mu.Lock() + cachedAccessToken := oa.accessToken + cachedRefreshToken := oa.refreshToken + cachedIDPInfo := oa.idpInfo + oa.mu.Unlock() + + if cachedAccessToken != "" { + err = ConductSaslConversation(ctx, cfg, sourceExternal, &oidcOneStep{ + userName: oa.userName, + accessToken: cachedAccessToken, + }) + if err == nil { + return nil + } + // this seems like it could be incorrect since we could be inavlidating an access token that + // has already been replaced by a different auth attempt, but the TokenGenID will prevernt + // that from happening. + oa.invalidateAccessToken(conn) + time.Sleep(invalidateSleepTimeout) + } + + if oa.OIDCHumanCallback != nil { + return oa.doAuthHuman(ctx, cfg, oa.OIDCHumanCallback, cachedIDPInfo, cachedRefreshToken) + } + + // Handle user provided or automatic provider machine callback. + var machineCallback OIDCCallback + if oa.OIDCMachineCallback != nil { + machineCallback = oa.OIDCMachineCallback + } else { + machineCallback, err = oa.providerCallback() + if err != nil { + return fmt.Errorf("error getting built-in OIDC provider: %w", err) + } + } + + if machineCallback != nil { + return oa.doAuthMachine(ctx, cfg, machineCallback) + } + return newAuthError("no OIDC callback provided", nil) +} + +func (oa *OIDCAuthenticator) doAuthHuman(ctx context.Context, cfg *Config, humanCallback OIDCCallback, idpInfo *IDPInfo, refreshToken *string) error { + // Ensure that the connection address is allowed by the allowed hosts. + err := oa.validateConnectionAddressWithAllowedHosts(cfg.Connection) + if err != nil { + return err + } + subCtx, cancel := context.WithTimeout(ctx, humanCallbackTimeout) + defer cancel() + // If the idpInfo exists, we can just do one step + if idpInfo != nil { + accessToken, err := oa.getAccessToken(subCtx, + cfg.Connection, + &OIDCArgs{ + Version: apiVersion, + // idpInfo is nil for machine callbacks in the current spec. + IDPInfo: idpInfo, + RefreshToken: refreshToken, + }, + humanCallback) + if err != nil { + return err + } + return ConductSaslConversation( + subCtx, + cfg, + sourceExternal, + &oidcOneStep{accessToken: accessToken}, + ) + } + // otherwise, we need the two step where we ask the server for the IdPInfo first. + ots := &oidcTwoStep{ + conn: cfg.Connection, + oa: oa, + } + return ConductSaslConversation(subCtx, cfg, sourceExternal, ots) +} + +func (oa *OIDCAuthenticator) doAuthMachine(ctx context.Context, cfg *Config, machineCallback OIDCCallback) error { + subCtx, cancel := context.WithTimeout(ctx, machineCallbackTimeout) + accessToken, err := oa.getAccessToken(subCtx, + cfg.Connection, + &OIDCArgs{ + Version: apiVersion, + // idpInfo is nil for machine callbacks in the current spec. + IDPInfo: nil, + RefreshToken: nil, + }, + machineCallback) + cancel() + if err != nil { + return err + } + return ConductSaslConversation( + ctx, + cfg, + sourceExternal, + &oidcOneStep{accessToken: accessToken}, + ) +} + +// CreateSpeculativeConversation creates a speculative conversation for OIDC authentication. +func (oa *OIDCAuthenticator) CreateSpeculativeConversation() (SpeculativeConversation, error) { + oa.mu.Lock() + defer oa.mu.Unlock() + accessToken := oa.accessToken + if accessToken == "" { + return nil, nil // Skip speculative auth. + } + + return newSaslConversation(&oidcOneStep{accessToken: accessToken}, sourceExternal, true), nil +} diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/plain.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/plain.go index 532d43e39..fb0645aaa 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/plain.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/plain.go @@ -8,12 +8,30 @@ package auth import ( "context" + "net/http" + + "go.mongodb.org/mongo-driver/x/mongo/driver" ) // PLAIN is the mechanism name for PLAIN. const PLAIN = "PLAIN" -func newPlainAuthenticator(cred *Cred) (Authenticator, error) { +func newPlainAuthenticator(cred *Cred, _ *http.Client) (Authenticator, error) { + // TODO(GODRIVER-3317): The PLAIN specification says about auth source: + // + // "MUST be specified. Defaults to the database name if supplied on the + // connection string or $external." + // + // We should actually pass through the auth source, not always pass + // $external. If it's empty, we should default to $external. + // + // For example: + // + // source := cred.Source + // if source == "" { + // source = "$external" + // } + // return &PlainAuthenticator{ Username: cred.Username, Password: cred.Password, @@ -28,12 +46,17 @@ type PlainAuthenticator struct { // Auth authenticates the connection. func (a *PlainAuthenticator) Auth(ctx context.Context, cfg *Config) error { - return ConductSaslConversation(ctx, cfg, "$external", &plainSaslClient{ + return ConductSaslConversation(ctx, cfg, sourceExternal, &plainSaslClient{ username: a.Username, password: a.Password, }) } +// Reauth reauthenticates the connection. +func (a *PlainAuthenticator) Reauth(_ context.Context, _ *driver.AuthConfig) error { + return newAuthError("Plain authentication does not support reauthentication", nil) +} + type plainSaslClient struct { username string password string @@ -46,7 +69,7 @@ func (c *plainSaslClient) Start() (string, []byte, error) { return PLAIN, b, nil } -func (c *plainSaslClient) Next([]byte) ([]byte, error) { +func (c *plainSaslClient) Next(context.Context, []byte) ([]byte, error) { return nil, newAuthError("unexpected server challenge", nil) } diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/sasl.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/sasl.go index 2a84b53a6..1ef67f02b 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/sasl.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/sasl.go @@ -19,7 +19,7 @@ import ( // SaslClient is the client piece of a sasl conversation. type SaslClient interface { Start() (string, []byte, error) - Next(challenge []byte) ([]byte, error) + Next(ctx context.Context, challenge []byte) ([]byte, error) Completed() bool } @@ -118,7 +118,7 @@ func (sc *saslConversation) Finish(ctx context.Context, cfg *Config, firstRespon return nil } - payload, err = sc.client.Next(saslResp.Payload) + payload, err = sc.client.Next(ctx, saslResp.Payload) if err != nil { return newError(err, sc.mechanism) } @@ -156,7 +156,6 @@ func (sc *saslConversation) Finish(ctx context.Context, cfg *Config, firstRespon func ConductSaslConversation(ctx context.Context, cfg *Config, authSource string, client SaslClient) error { // Create a non-speculative SASL conversation. conversation := newSaslConversation(client, authSource, false) - saslStartDoc, err := conversation.FirstMessage() if err != nil { return newError(err, conversation.mechanism) diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/scram.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/scram.go index c1238cd6a..0d7deaee0 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/scram.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/scram.go @@ -14,10 +14,12 @@ package auth import ( "context" + "net/http" "github.com/xdg-go/scram" "github.com/xdg-go/stringprep" "go.mongodb.org/mongo-driver/x/bsonx/bsoncore" + "go.mongodb.org/mongo-driver/x/mongo/driver" ) const ( @@ -35,7 +37,11 @@ var ( ) ) -func newScramSHA1Authenticator(cred *Cred) (Authenticator, error) { +func newScramSHA1Authenticator(cred *Cred, _ *http.Client) (Authenticator, error) { + source := cred.Source + if source == "" { + source = "admin" + } passdigest := mongoPasswordDigest(cred.Username, cred.Password) client, err := scram.SHA1.NewClientUnprepped(cred.Username, passdigest, "") if err != nil { @@ -44,12 +50,16 @@ func newScramSHA1Authenticator(cred *Cred) (Authenticator, error) { client.WithMinIterations(4096) return &ScramAuthenticator{ mechanism: SCRAMSHA1, - source: cred.Source, + source: source, client: client, }, nil } -func newScramSHA256Authenticator(cred *Cred) (Authenticator, error) { +func newScramSHA256Authenticator(cred *Cred, _ *http.Client) (Authenticator, error) { + source := cred.Source + if source == "" { + source = "admin" + } passprep, err := stringprep.SASLprep.Prepare(cred.Password) if err != nil { return nil, newAuthError("error SASLprepping password", err) @@ -61,7 +71,7 @@ func newScramSHA256Authenticator(cred *Cred) (Authenticator, error) { client.WithMinIterations(4096) return &ScramAuthenticator{ mechanism: SCRAMSHA256, - source: cred.Source, + source: source, client: client, }, nil } @@ -84,6 +94,11 @@ func (a *ScramAuthenticator) Auth(ctx context.Context, cfg *Config) error { return nil } +// Reauth reauthenticates the connection. +func (a *ScramAuthenticator) Reauth(_ context.Context, _ *driver.AuthConfig) error { + return newAuthError("SCRAM does not support reauthentication", nil) +} + // CreateSpeculativeConversation creates a speculative conversation for SCRAM authentication. func (a *ScramAuthenticator) CreateSpeculativeConversation() (SpeculativeConversation, error) { return newSaslConversation(a.createSaslClient(), a.source, true), nil @@ -112,7 +127,7 @@ func (a *scramSaslAdapter) Start() (string, []byte, error) { return a.mechanism, []byte(step), nil } -func (a *scramSaslAdapter) Next(challenge []byte) ([]byte, error) { +func (a *scramSaslAdapter) Next(_ context.Context, challenge []byte) ([]byte, error) { step, err := a.conversation.Step(string(challenge)) if err != nil { return nil, err diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/x509.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/x509.go index 03a9d750e..1b84e00b4 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/x509.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/auth/x509.go @@ -8,6 +8,7 @@ package auth import ( "context" + "net/http" "go.mongodb.org/mongo-driver/x/bsonx/bsoncore" "go.mongodb.org/mongo-driver/x/mongo/driver" @@ -17,7 +18,10 @@ import ( // MongoDBX509 is the mechanism name for MongoDBX509. const MongoDBX509 = "MONGODB-X509" -func newMongoDBX509Authenticator(cred *Cred) (Authenticator, error) { +func newMongoDBX509Authenticator(cred *Cred, _ *http.Client) (Authenticator, error) { + // TODO(GODRIVER-3309): Validate that cred.Source is either empty or + // "$external" to make validation uniform with other auth mechanisms that + // require Source to be "$external" (e.g. MONGODB-AWS, MONGODB-OIDC, etc). return &MongoDBX509Authenticator{User: cred.Username}, nil } @@ -65,7 +69,7 @@ func (a *MongoDBX509Authenticator) Auth(ctx context.Context, cfg *Config) error requestDoc := createFirstX509Message() authCmd := operation. NewCommand(requestDoc). - Database("$external"). + Database(sourceExternal). Deployment(driver.SingleConnectionDeployment{cfg.Connection}). ClusterClock(cfg.ClusterClock). ServerAPI(cfg.ServerAPI) @@ -76,3 +80,8 @@ func (a *MongoDBX509Authenticator) Auth(ctx context.Context, cfg *Config) error return nil } + +// Reauth reauthenticates the connection. +func (a *MongoDBX509Authenticator) Reauth(_ context.Context, _ *driver.AuthConfig) error { + return newAuthError("X509 does not support reauthentication", nil) +} diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/batch_cursor.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/batch_cursor.go index 23b4a6539..2aa0aca69 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/batch_cursor.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/batch_cursor.go @@ -108,12 +108,12 @@ func NewCursorResponse(info ResponseInfo) (CursorResponse, error) { if !ok { return CursorResponse{}, fmt.Errorf("ns should be a string but is a BSON %s", elem.Value().Type) } - index := strings.Index(ns, ".") - if index == -1 { + database, collection, ok := strings.Cut(ns, ".") + if !ok { return CursorResponse{}, errors.New("ns field must contain a valid namespace, but is missing '.'") } - curresp.Database = ns[:index] - curresp.Collection = ns[index+1:] + curresp.Database = database + curresp.Collection = collection case "id": curresp.ID, ok = elem.Value().Int64OK() if !ok { @@ -314,7 +314,7 @@ func (bc *BatchCursor) KillCursor(ctx context.Context) error { } return Operation{ - CommandFn: func(dst []byte, desc description.SelectedServer) ([]byte, error) { + CommandFn: func(dst []byte, _ description.SelectedServer) ([]byte, error) { dst = bsoncore.AppendStringElement(dst, "killCursors", bc.collection) dst = bsoncore.BuildArrayElement(dst, "cursors", bsoncore.Value{Type: bsontype.Int64, Data: bsoncore.AppendInt64(nil, bc.id)}) return dst, nil @@ -369,7 +369,7 @@ func (bc *BatchCursor) getMore(ctx context.Context) { } bc.err = Operation{ - CommandFn: func(dst []byte, desc description.SelectedServer) ([]byte, error) { + CommandFn: func(dst []byte, _ description.SelectedServer) ([]byte, error) { dst = bsoncore.AppendInt64Element(dst, "getMore", bc.id) dst = bsoncore.AppendStringElement(dst, "collection", bc.collection) if numToReturn > 0 { diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/connstring/connstring.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/connstring/connstring.go index 686458e29..fd69eb490 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/connstring/connstring.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/connstring/connstring.go @@ -24,6 +24,7 @@ import ( "go.mongodb.org/mongo-driver/internal/randutil" "go.mongodb.org/mongo-driver/mongo/writeconcern" + "go.mongodb.org/mongo-driver/x/mongo/driver/auth" "go.mongodb.org/mongo-driver/x/mongo/driver/dns" "go.mongodb.org/mongo-driver/x/mongo/driver/wiremessage" ) @@ -258,6 +259,16 @@ func (u *ConnString) Validate() error { } } + // Check for OIDC auth mechanism properties that cannot be set in the ConnString. + if u.AuthMechanism == auth.MongoDBOIDC { + if _, ok := u.AuthMechanismProperties[auth.AllowedHostsProp]; ok { + return fmt.Errorf( + "ALLOWED_HOSTS cannot be specified in the URI connection string for the %q auth mechanism, it must be specified through the ClientOptions directly", + auth.MongoDBOIDC, + ) + } + } + return nil } @@ -285,7 +296,7 @@ func (u *ConnString) setDefaultAuthParams(dbName string) error { u.AuthMechanismProperties["SERVICE_NAME"] = "mongodb" } fallthrough - case "mongodb-aws", "mongodb-x509": + case "mongodb-aws", "mongodb-x509", "mongodb-oidc": if u.AuthSource == "" { u.AuthSource = "$external" } else if u.AuthSource != "$external" { @@ -781,6 +792,10 @@ func (u *ConnString) validateAuth() error { if u.AuthMechanismProperties != nil { return fmt.Errorf("SCRAM-SHA-256 cannot have mechanism properties") } + case "mongodb-oidc": + if u.Password != "" { + return fmt.Errorf("password cannot be specified for MONGODB-OIDC") + } case "": if u.UsernameSet && u.Username == "" { return fmt.Errorf("username required if URI contains user info") @@ -887,15 +902,16 @@ func (p *parser) parse(original string) (*ConnString, error) { uri := original var err error - if strings.HasPrefix(uri, SchemeMongoDBSRV+"://") { + switch { + case strings.HasPrefix(uri, SchemeMongoDBSRV+"://"): connStr.Scheme = SchemeMongoDBSRV // remove the scheme uri = uri[len(SchemeMongoDBSRV)+3:] - } else if strings.HasPrefix(uri, SchemeMongoDB+"://") { + case strings.HasPrefix(uri, SchemeMongoDB+"://"): connStr.Scheme = SchemeMongoDB // remove the scheme uri = uri[len(SchemeMongoDB)+3:] - } else { + default: return nil, errors.New(`scheme must be "mongodb" or "mongodb+srv"`) } @@ -906,9 +922,9 @@ func (p *parser) parse(original string) (*ConnString, error) { username := userInfo var password string - if idx := strings.Index(userInfo, ":"); idx != -1 { - username = userInfo[:idx] - password = userInfo[idx+1:] + if u, p, ok := strings.Cut(userInfo, ":"); ok { + username = u + password = p connStr.PasswordSet = true } diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/driver.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/driver.go index 900729bf8..363f4d6be 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/driver.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/driver.go @@ -24,6 +24,63 @@ import ( "go.mongodb.org/mongo-driver/x/mongo/driver/session" ) +// AuthConfig holds the information necessary to perform an authentication attempt. +// this was moved from the auth package to avoid a circular dependency. The auth package +// reexports this under the old name to avoid breaking the public api. +type AuthConfig struct { + Description description.Server + Connection Connection + ClusterClock *session.ClusterClock + HandshakeInfo HandshakeInformation + ServerAPI *ServerAPIOptions +} + +// OIDCCallback is the type for both Human and Machine Callback flows. RefreshToken will always be +// nil in the OIDCArgs for the Machine flow. +type OIDCCallback func(context.Context, *OIDCArgs) (*OIDCCredential, error) + +// OIDCArgs contains the arguments for the OIDC callback. +type OIDCArgs struct { + Version int + IDPInfo *IDPInfo + RefreshToken *string +} + +// OIDCCredential contains the access token and refresh token. +type OIDCCredential struct { + AccessToken string + ExpiresAt *time.Time + RefreshToken *string +} + +// IDPInfo contains the information needed to perform OIDC authentication with an Identity Provider. +type IDPInfo struct { + Issuer string `bson:"issuer"` + ClientID string `bson:"clientId"` + RequestScopes []string `bson:"requestScopes"` +} + +// Authenticator handles authenticating a connection. The implementers of this interface +// are all in the auth package. Most authentication mechanisms do not allow for Reauth, +// but this is included in the interface so that whenever a new mechanism is added, it +// must be explicitly considered. +type Authenticator interface { + // Auth authenticates the connection. + Auth(context.Context, *AuthConfig) error + Reauth(context.Context, *AuthConfig) error +} + +// Cred is a user's credential. +type Cred struct { + Source string + Username string + Password string + PasswordSet bool + Props map[string]string + OIDCMachineCallback OIDCCallback + OIDCHumanCallback OIDCCallback +} + // Deployment is implemented by types that can select a server from a deployment. type Deployment interface { SelectServer(context.Context, description.ServerSelector) (Server, error) @@ -79,6 +136,8 @@ type Connection interface { DriverConnectionID() uint64 // TODO(GODRIVER-2824): change type to int64. Address() address.Address Stale() bool + OIDCTokenGenID() uint64 + SetOIDCTokenGenID(uint64) } // RTTMonitor represents a round-trip-time monitor. diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/errors.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/errors.go index 177c2d450..59c3992e9 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/errors.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/errors.go @@ -140,7 +140,7 @@ func (wce WriteCommandError) Retryable(wireVersion *description.VersionRange) bo if wce.WriteConcernError == nil { return false } - return (*wce.WriteConcernError).Retryable() + return wce.WriteConcernError.Retryable() } // HasErrorLabel returns true if the error contains the specified label. diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/mongocrypt/mongocrypt.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/mongocrypt/mongocrypt.go index 20f6ff0aa..c5044f8f5 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/mongocrypt/mongocrypt.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/mongocrypt/mongocrypt.go @@ -278,7 +278,12 @@ func (m *MongoCrypt) createExplicitEncryptionContext(opts *options.ExplicitEncry if opts.RangeOptions.Precision != nil { mongocryptDoc = bsoncore.AppendInt32Element(mongocryptDoc, "precision", *opts.RangeOptions.Precision) } - mongocryptDoc = bsoncore.AppendInt64Element(mongocryptDoc, "sparsity", opts.RangeOptions.Sparsity) + if opts.RangeOptions.Sparsity != nil { + mongocryptDoc = bsoncore.AppendInt64Element(mongocryptDoc, "sparsity", *opts.RangeOptions.Sparsity) + } + if opts.RangeOptions.TrimFactor != nil { + mongocryptDoc = bsoncore.AppendInt32Element(mongocryptDoc, "trimFactor", *opts.RangeOptions.TrimFactor) + } mongocryptDoc, err := bsoncore.AppendDocumentEnd(mongocryptDoc, idx) if err != nil { diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/mongocrypt/options/mongocrypt_context_options.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/mongocrypt/options/mongocrypt_context_options.go index 325777eb2..81805e714 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/mongocrypt/options/mongocrypt_context_options.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/mongocrypt/options/mongocrypt_context_options.go @@ -61,10 +61,11 @@ type ExplicitEncryptionOptions struct { // ExplicitRangeOptions specifies options for the range index. type ExplicitRangeOptions struct { - Min *bsoncore.Value - Max *bsoncore.Value - Sparsity int64 - Precision *int32 + Min *bsoncore.Value + Max *bsoncore.Value + Sparsity *int64 + TrimFactor *int32 + Precision *int32 } // ExplicitEncryption creates a new ExplicitEncryptionOptions instance. diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation.go index db5367bed..ec6f69eca 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation.go @@ -315,6 +315,10 @@ type Operation struct { // [Operation.MaxTime]. OmitCSOTMaxTimeMS bool + // Authenticator is the authenticator to use for this operation when a reauthentication is + // required. + Authenticator Authenticator + // omitReadPreference is a boolean that indicates whether to omit the // read preference from the command. This omition includes the case // where a default read preference is used when the operation @@ -573,8 +577,7 @@ func (op Operation) Execute(ctx context.Context) error { // Set the previous indefinite error to be returned in any case where a retryable write error does not have a // NoWritesPerfomed label (the definite case). - switch err := err.(type) { - case labeledError: + if err, ok := err.(labeledError); ok { // If the "prevIndefiniteErr" is nil, then the current error is the first error encountered // during the retry attempt cycle. We must persist the first error in the case where all // following errors are labeled "NoWritesPerformed", which would otherwise raise nil as the @@ -912,6 +915,28 @@ func (op Operation) Execute(ctx context.Context) error { operationErr.Labels = tt.Labels operationErr.Raw = tt.Raw case Error: + // 391 is the reauthentication required error code, so we will attempt a reauth and + // retry the operation, if it is successful. + if tt.Code == 391 { + if op.Authenticator != nil { + cfg := AuthConfig{ + Description: conn.Description(), + Connection: conn, + ClusterClock: op.Clock, + ServerAPI: op.ServerAPI, + } + if err := op.Authenticator.Reauth(ctx, &cfg); err != nil { + return fmt.Errorf("error reauthenticating: %w", err) + } + if op.Client != nil && op.Client.Committing { + // Apply majority write concern for retries + op.Client.UpdateCommitTransactionWriteConcern() + op.WriteConcern = op.Client.CurrentWc + } + resetForRetry(tt) + continue + } + } if tt.HasErrorLabel(TransientTransactionError) || tt.HasErrorLabel(UnknownTransactionCommitResult) { if err := op.Client.ClearPinnedResources(); err != nil { return err @@ -1173,18 +1198,12 @@ func (Operation) decompressWireMessage(wm []byte) (wiremessage.OpCode, []byte, e if !ok { return 0, nil, errors.New("malformed OP_COMPRESSED: missing compressor ID") } - compressedSize := len(wm) - 9 // original opcode (4) + uncompressed size (4) + compressor ID (1) - // return the original wiremessage - msg, _, ok := wiremessage.ReadCompressedCompressedMessage(rem, int32(compressedSize)) - if !ok { - return 0, nil, errors.New("malformed OP_COMPRESSED: insufficient bytes for compressed wiremessage") - } opts := CompressionOpts{ Compressor: compressorID, UncompressedSize: uncompressedSize, } - uncompressed, err := DecompressPayload(msg, opts) + uncompressed, err := DecompressPayload(rem, opts) if err != nil { return 0, nil, err } diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/abort_transaction.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/abort_transaction.go index 941372713..aeee53353 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/abort_transaction.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/abort_transaction.go @@ -21,6 +21,7 @@ import ( // AbortTransaction performs an abortTransaction operation. type AbortTransaction struct { + authenticator driver.Authenticator recoveryToken bsoncore.Document session *session.Client clock *session.ClusterClock @@ -66,6 +67,7 @@ func (at *AbortTransaction) Execute(ctx context.Context) error { WriteConcern: at.writeConcern, ServerAPI: at.serverAPI, Name: driverutil.AbortTransactionOp, + Authenticator: at.authenticator, }.Execute(ctx) } @@ -199,3 +201,13 @@ func (at *AbortTransaction) ServerAPI(serverAPI *driver.ServerAPIOptions) *Abort at.serverAPI = serverAPI return at } + +// Authenticator sets the authenticator to use for this operation. +func (at *AbortTransaction) Authenticator(authenticator driver.Authenticator) *AbortTransaction { + if at == nil { + at = new(AbortTransaction) + } + + at.authenticator = authenticator + return at +} diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/aggregate.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/aggregate.go index 44467df8f..df6b8fa9d 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/aggregate.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/aggregate.go @@ -25,6 +25,7 @@ import ( // Aggregate represents an aggregate operation. type Aggregate struct { + authenticator driver.Authenticator allowDiskUse *bool batchSize *int32 bypassDocumentValidation *bool @@ -115,6 +116,7 @@ func (a *Aggregate) Execute(ctx context.Context) error { Timeout: a.timeout, Name: driverutil.AggregateOp, OmitCSOTMaxTimeMS: a.omitCSOTMaxTimeMS, + Authenticator: a.authenticator, }.Execute(ctx) } @@ -433,3 +435,13 @@ func (a *Aggregate) OmitCSOTMaxTimeMS(omit bool) *Aggregate { a.omitCSOTMaxTimeMS = omit return a } + +// Authenticator sets the authenticator to use for this operation. +func (a *Aggregate) Authenticator(authenticator driver.Authenticator) *Aggregate { + if a == nil { + a = new(Aggregate) + } + + a.authenticator = authenticator + return a +} diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/command.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/command.go index 35283794a..64c98ba19 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/command.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/command.go @@ -22,6 +22,7 @@ import ( // Command is used to run a generic operation. type Command struct { + authenticator driver.Authenticator command bsoncore.Document database string deployment driver.Deployment @@ -78,7 +79,7 @@ func (c *Command) Execute(ctx context.Context) error { } return driver.Operation{ - CommandFn: func(dst []byte, desc description.SelectedServer) ([]byte, error) { + CommandFn: func(dst []byte, _ description.SelectedServer) ([]byte, error) { return append(dst, c.command[4:len(c.command)-1]...), nil }, ProcessResponseFn: func(info driver.ResponseInfo) error { @@ -107,6 +108,7 @@ func (c *Command) Execute(ctx context.Context) error { ServerAPI: c.serverAPI, Timeout: c.timeout, Logger: c.logger, + Authenticator: c.authenticator, }.Execute(ctx) } @@ -219,3 +221,13 @@ func (c *Command) Logger(logger *logger.Logger) *Command { c.logger = logger return c } + +// Authenticator sets the authenticator to use for this operation. +func (c *Command) Authenticator(authenticator driver.Authenticator) *Command { + if c == nil { + c = new(Command) + } + + c.authenticator = authenticator + return c +} diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/commit_transaction.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/commit_transaction.go index 11c6f69dd..6b402bdf6 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/commit_transaction.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/commit_transaction.go @@ -22,6 +22,7 @@ import ( // CommitTransaction attempts to commit a transaction. type CommitTransaction struct { + authenticator driver.Authenticator maxTime *time.Duration recoveryToken bsoncore.Document session *session.Client @@ -68,6 +69,7 @@ func (ct *CommitTransaction) Execute(ctx context.Context) error { WriteConcern: ct.writeConcern, ServerAPI: ct.serverAPI, Name: driverutil.CommitTransactionOp, + Authenticator: ct.authenticator, }.Execute(ctx) } @@ -201,3 +203,13 @@ func (ct *CommitTransaction) ServerAPI(serverAPI *driver.ServerAPIOptions) *Comm ct.serverAPI = serverAPI return ct } + +// Authenticator sets the authenticator to use for this operation. +func (ct *CommitTransaction) Authenticator(authenticator driver.Authenticator) *CommitTransaction { + if ct == nil { + ct = new(CommitTransaction) + } + + ct.authenticator = authenticator + return ct +} diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/count.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/count.go index 8de1e9f8d..eaafc9a24 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/count.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/count.go @@ -25,6 +25,7 @@ import ( // Count represents a count operation. type Count struct { + authenticator driver.Authenticator maxTime *time.Duration query bsoncore.Document session *session.Client @@ -128,6 +129,7 @@ func (c *Count) Execute(ctx context.Context) error { ServerAPI: c.serverAPI, Timeout: c.timeout, Name: driverutil.CountOp, + Authenticator: c.authenticator, }.Execute(ctx) // Swallow error if NamespaceNotFound(26) is returned from aggregate on non-existent namespace @@ -311,3 +313,13 @@ func (c *Count) Timeout(timeout *time.Duration) *Count { c.timeout = timeout return c } + +// Authenticator sets the authenticator to use for this operation. +func (c *Count) Authenticator(authenticator driver.Authenticator) *Count { + if c == nil { + c = new(Count) + } + + c.authenticator = authenticator + return c +} diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/create.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/create.go index 45b26cb70..4878e2c77 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/create.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/create.go @@ -20,6 +20,7 @@ import ( // Create represents a create operation. type Create struct { + authenticator driver.Authenticator capped *bool collation bsoncore.Document changeStreamPreAndPostImages bsoncore.Document @@ -77,6 +78,7 @@ func (c *Create) Execute(ctx context.Context) error { Selector: c.selector, WriteConcern: c.writeConcern, ServerAPI: c.serverAPI, + Authenticator: c.authenticator, }.Execute(ctx) } @@ -399,3 +401,13 @@ func (c *Create) ClusteredIndex(ci bsoncore.Document) *Create { c.clusteredIndex = ci return c } + +// Authenticator sets the authenticator to use for this operation. +func (c *Create) Authenticator(authenticator driver.Authenticator) *Create { + if c == nil { + c = new(Create) + } + + c.authenticator = authenticator + return c +} diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/create_indexes.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/create_indexes.go index 77daf676a..464c1762d 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/create_indexes.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/create_indexes.go @@ -24,21 +24,22 @@ import ( // CreateIndexes performs a createIndexes operation. type CreateIndexes struct { - commitQuorum bsoncore.Value - indexes bsoncore.Document - maxTime *time.Duration - session *session.Client - clock *session.ClusterClock - collection string - monitor *event.CommandMonitor - crypt driver.Crypt - database string - deployment driver.Deployment - selector description.ServerSelector - writeConcern *writeconcern.WriteConcern - result CreateIndexesResult - serverAPI *driver.ServerAPIOptions - timeout *time.Duration + authenticator driver.Authenticator + commitQuorum bsoncore.Value + indexes bsoncore.Document + maxTime *time.Duration + session *session.Client + clock *session.ClusterClock + collection string + monitor *event.CommandMonitor + crypt driver.Crypt + database string + deployment driver.Deployment + selector description.ServerSelector + writeConcern *writeconcern.WriteConcern + result CreateIndexesResult + serverAPI *driver.ServerAPIOptions + timeout *time.Duration } // CreateIndexesResult represents a createIndexes result returned by the server. @@ -119,6 +120,7 @@ func (ci *CreateIndexes) Execute(ctx context.Context) error { ServerAPI: ci.serverAPI, Timeout: ci.timeout, Name: driverutil.CreateIndexesOp, + Authenticator: ci.authenticator, }.Execute(ctx) } @@ -278,3 +280,13 @@ func (ci *CreateIndexes) Timeout(timeout *time.Duration) *CreateIndexes { ci.timeout = timeout return ci } + +// Authenticator sets the authenticator to use for this operation. +func (ci *CreateIndexes) Authenticator(authenticator driver.Authenticator) *CreateIndexes { + if ci == nil { + ci = new(CreateIndexes) + } + + ci.authenticator = authenticator + return ci +} diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/create_search_indexes.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/create_search_indexes.go index cb0d80795..8185d27fe 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/create_search_indexes.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/create_search_indexes.go @@ -22,18 +22,19 @@ import ( // CreateSearchIndexes performs a createSearchIndexes operation. type CreateSearchIndexes struct { - indexes bsoncore.Document - session *session.Client - clock *session.ClusterClock - collection string - monitor *event.CommandMonitor - crypt driver.Crypt - database string - deployment driver.Deployment - selector description.ServerSelector - result CreateSearchIndexesResult - serverAPI *driver.ServerAPIOptions - timeout *time.Duration + authenticator driver.Authenticator + indexes bsoncore.Document + session *session.Client + clock *session.ClusterClock + collection string + monitor *event.CommandMonitor + crypt driver.Crypt + database string + deployment driver.Deployment + selector description.ServerSelector + result CreateSearchIndexesResult + serverAPI *driver.ServerAPIOptions + timeout *time.Duration } // CreateSearchIndexResult represents a single search index result in CreateSearchIndexesResult. @@ -116,6 +117,7 @@ func (csi *CreateSearchIndexes) Execute(ctx context.Context) error { Selector: csi.selector, ServerAPI: csi.serverAPI, Timeout: csi.timeout, + Authenticator: csi.authenticator, }.Execute(ctx) } @@ -237,3 +239,13 @@ func (csi *CreateSearchIndexes) Timeout(timeout *time.Duration) *CreateSearchInd csi.timeout = timeout return csi } + +// Authenticator sets the authenticator to use for this operation. +func (csi *CreateSearchIndexes) Authenticator(authenticator driver.Authenticator) *CreateSearchIndexes { + if csi == nil { + csi = new(CreateSearchIndexes) + } + + csi.authenticator = authenticator + return csi +} diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/delete.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/delete.go index bf95cf496..4b520a548 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/delete.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/delete.go @@ -25,25 +25,26 @@ import ( // Delete performs a delete operation type Delete struct { - comment bsoncore.Value - deletes []bsoncore.Document - ordered *bool - session *session.Client - clock *session.ClusterClock - collection string - monitor *event.CommandMonitor - crypt driver.Crypt - database string - deployment driver.Deployment - selector description.ServerSelector - writeConcern *writeconcern.WriteConcern - retry *driver.RetryMode - hint *bool - result DeleteResult - serverAPI *driver.ServerAPIOptions - let bsoncore.Document - timeout *time.Duration - logger *logger.Logger + authenticator driver.Authenticator + comment bsoncore.Value + deletes []bsoncore.Document + ordered *bool + session *session.Client + clock *session.ClusterClock + collection string + monitor *event.CommandMonitor + crypt driver.Crypt + database string + deployment driver.Deployment + selector description.ServerSelector + writeConcern *writeconcern.WriteConcern + retry *driver.RetryMode + hint *bool + result DeleteResult + serverAPI *driver.ServerAPIOptions + let bsoncore.Document + timeout *time.Duration + logger *logger.Logger } // DeleteResult represents a delete result returned by the server. @@ -59,8 +60,7 @@ func buildDeleteResult(response bsoncore.Document) (DeleteResult, error) { } dr := DeleteResult{} for _, element := range elements { - switch element.Key() { - case "n": + if element.Key() == "n" { var ok bool dr.N, ok = element.Value().AsInt64OK() if !ok { @@ -116,6 +116,7 @@ func (d *Delete) Execute(ctx context.Context) error { Timeout: d.timeout, Logger: d.logger, Name: driverutil.DeleteOp, + Authenticator: d.authenticator, }.Execute(ctx) } @@ -328,3 +329,13 @@ func (d *Delete) Logger(logger *logger.Logger) *Delete { return d } + +// Authenticator sets the authenticator to use for this operation. +func (d *Delete) Authenticator(authenticator driver.Authenticator) *Delete { + if d == nil { + d = new(Delete) + } + + d.authenticator = authenticator + return d +} diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/distinct.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/distinct.go index b7e675ce4..0c39027e7 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/distinct.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/distinct.go @@ -24,6 +24,7 @@ import ( // Distinct performs a distinct operation. type Distinct struct { + authenticator driver.Authenticator collation bsoncore.Document key *string maxTime *time.Duration @@ -58,8 +59,7 @@ func buildDistinctResult(response bsoncore.Document) (DistinctResult, error) { } dr := DistinctResult{} for _, element := range elements { - switch element.Key() { - case "values": + if element.Key() == "values" { dr.Values = element.Value() } } @@ -107,6 +107,7 @@ func (d *Distinct) Execute(ctx context.Context) error { ServerAPI: d.serverAPI, Timeout: d.timeout, Name: driverutil.DistinctOp, + Authenticator: d.authenticator, }.Execute(ctx) } @@ -311,3 +312,13 @@ func (d *Distinct) Timeout(timeout *time.Duration) *Distinct { d.timeout = timeout return d } + +// Authenticator sets the authenticator to use for this operation. +func (d *Distinct) Authenticator(authenticator driver.Authenticator) *Distinct { + if d == nil { + d = new(Distinct) + } + + d.authenticator = authenticator + return d +} diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/drop_collection.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/drop_collection.go index 8c6596756..5a32c2f8d 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/drop_collection.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/drop_collection.go @@ -23,18 +23,19 @@ import ( // DropCollection performs a drop operation. type DropCollection struct { - session *session.Client - clock *session.ClusterClock - collection string - monitor *event.CommandMonitor - crypt driver.Crypt - database string - deployment driver.Deployment - selector description.ServerSelector - writeConcern *writeconcern.WriteConcern - result DropCollectionResult - serverAPI *driver.ServerAPIOptions - timeout *time.Duration + authenticator driver.Authenticator + session *session.Client + clock *session.ClusterClock + collection string + monitor *event.CommandMonitor + crypt driver.Crypt + database string + deployment driver.Deployment + selector description.ServerSelector + writeConcern *writeconcern.WriteConcern + result DropCollectionResult + serverAPI *driver.ServerAPIOptions + timeout *time.Duration } // DropCollectionResult represents a dropCollection result returned by the server. @@ -104,6 +105,7 @@ func (dc *DropCollection) Execute(ctx context.Context) error { ServerAPI: dc.serverAPI, Timeout: dc.timeout, Name: driverutil.DropOp, + Authenticator: dc.authenticator, }.Execute(ctx) } @@ -222,3 +224,13 @@ func (dc *DropCollection) Timeout(timeout *time.Duration) *DropCollection { dc.timeout = timeout return dc } + +// Authenticator sets the authenticator to use for this operation. +func (dc *DropCollection) Authenticator(authenticator driver.Authenticator) *DropCollection { + if dc == nil { + dc = new(DropCollection) + } + + dc.authenticator = authenticator + return dc +} diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/drop_database.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/drop_database.go index a8f9b45ba..19956210d 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/drop_database.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/drop_database.go @@ -21,15 +21,16 @@ import ( // DropDatabase performs a dropDatabase operation type DropDatabase struct { - session *session.Client - clock *session.ClusterClock - monitor *event.CommandMonitor - crypt driver.Crypt - database string - deployment driver.Deployment - selector description.ServerSelector - writeConcern *writeconcern.WriteConcern - serverAPI *driver.ServerAPIOptions + authenticator driver.Authenticator + session *session.Client + clock *session.ClusterClock + monitor *event.CommandMonitor + crypt driver.Crypt + database string + deployment driver.Deployment + selector description.ServerSelector + writeConcern *writeconcern.WriteConcern + serverAPI *driver.ServerAPIOptions } // NewDropDatabase constructs and returns a new DropDatabase. @@ -55,6 +56,7 @@ func (dd *DropDatabase) Execute(ctx context.Context) error { WriteConcern: dd.writeConcern, ServerAPI: dd.serverAPI, Name: driverutil.DropDatabaseOp, + Authenticator: dd.authenticator, }.Execute(ctx) } @@ -154,3 +156,13 @@ func (dd *DropDatabase) ServerAPI(serverAPI *driver.ServerAPIOptions) *DropDatab dd.serverAPI = serverAPI return dd } + +// Authenticator sets the authenticator to use for this operation. +func (dd *DropDatabase) Authenticator(authenticator driver.Authenticator) *DropDatabase { + if dd == nil { + dd = new(DropDatabase) + } + + dd.authenticator = authenticator + return dd +} diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/drop_indexes.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/drop_indexes.go index 0c3d45970..a22496b1e 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/drop_indexes.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/drop_indexes.go @@ -23,20 +23,21 @@ import ( // DropIndexes performs an dropIndexes operation. type DropIndexes struct { - index *string - maxTime *time.Duration - session *session.Client - clock *session.ClusterClock - collection string - monitor *event.CommandMonitor - crypt driver.Crypt - database string - deployment driver.Deployment - selector description.ServerSelector - writeConcern *writeconcern.WriteConcern - result DropIndexesResult - serverAPI *driver.ServerAPIOptions - timeout *time.Duration + authenticator driver.Authenticator + index any + maxTime *time.Duration + session *session.Client + clock *session.ClusterClock + collection string + monitor *event.CommandMonitor + crypt driver.Crypt + database string + deployment driver.Deployment + selector description.ServerSelector + writeConcern *writeconcern.WriteConcern + result DropIndexesResult + serverAPI *driver.ServerAPIOptions + timeout *time.Duration } // DropIndexesResult represents a dropIndexes result returned by the server. @@ -52,8 +53,7 @@ func buildDropIndexesResult(response bsoncore.Document) (DropIndexesResult, erro } dir := DropIndexesResult{} for _, element := range elements { - switch element.Key() { - case "nIndexesWas": + if element.Key() == "nIndexesWas" { var ok bool dir.NIndexesWas, ok = element.Value().AsInt32OK() if !ok { @@ -65,9 +65,9 @@ func buildDropIndexesResult(response bsoncore.Document) (DropIndexesResult, erro } // NewDropIndexes constructs and returns a new DropIndexes. -func NewDropIndexes(index string) *DropIndexes { +func NewDropIndexes(index any) *DropIndexes { return &DropIndexes{ - index: &index, + index: index, } } @@ -101,25 +101,33 @@ func (di *DropIndexes) Execute(ctx context.Context) error { ServerAPI: di.serverAPI, Timeout: di.timeout, Name: driverutil.DropIndexesOp, + Authenticator: di.authenticator, }.Execute(ctx) } func (di *DropIndexes) command(dst []byte, _ description.SelectedServer) ([]byte, error) { dst = bsoncore.AppendStringElement(dst, "dropIndexes", di.collection) - if di.index != nil { - dst = bsoncore.AppendStringElement(dst, "index", *di.index) + + switch t := di.index.(type) { + case string: + dst = bsoncore.AppendStringElement(dst, "index", t) + case bsoncore.Document: + if di.index != nil { + dst = bsoncore.AppendDocumentElement(dst, "index", t) + } } + return dst, nil } // Index specifies the name of the index to drop. If '*' is specified, all indexes will be dropped. -func (di *DropIndexes) Index(index string) *DropIndexes { +func (di *DropIndexes) Index(index any) *DropIndexes { if di == nil { di = new(DropIndexes) } - di.index = &index + di.index = index return di } @@ -242,3 +250,13 @@ func (di *DropIndexes) Timeout(timeout *time.Duration) *DropIndexes { di.timeout = timeout return di } + +// Authenticator sets the authenticator to use for this operation. +func (di *DropIndexes) Authenticator(authenticator driver.Authenticator) *DropIndexes { + if di == nil { + di = new(DropIndexes) + } + + di.authenticator = authenticator + return di +} diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/drop_search_index.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/drop_search_index.go index 3992c8316..94e4ddfb0 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/drop_search_index.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/drop_search_index.go @@ -21,18 +21,19 @@ import ( // DropSearchIndex performs an dropSearchIndex operation. type DropSearchIndex struct { - index string - session *session.Client - clock *session.ClusterClock - collection string - monitor *event.CommandMonitor - crypt driver.Crypt - database string - deployment driver.Deployment - selector description.ServerSelector - result DropSearchIndexResult - serverAPI *driver.ServerAPIOptions - timeout *time.Duration + authenticator driver.Authenticator + index string + session *session.Client + clock *session.ClusterClock + collection string + monitor *event.CommandMonitor + crypt driver.Crypt + database string + deployment driver.Deployment + selector description.ServerSelector + result DropSearchIndexResult + serverAPI *driver.ServerAPIOptions + timeout *time.Duration } // DropSearchIndexResult represents a dropSearchIndex result returned by the server. @@ -47,8 +48,7 @@ func buildDropSearchIndexResult(response bsoncore.Document) (DropSearchIndexResu } dsir := DropSearchIndexResult{} for _, element := range elements { - switch element.Key() { - case "ok": + if element.Key() == "ok" { var ok bool dsir.Ok, ok = element.Value().AsInt32OK() if !ok { @@ -93,6 +93,7 @@ func (dsi *DropSearchIndex) Execute(ctx context.Context) error { Selector: dsi.selector, ServerAPI: dsi.serverAPI, Timeout: dsi.timeout, + Authenticator: dsi.authenticator, }.Execute(ctx) } @@ -212,3 +213,13 @@ func (dsi *DropSearchIndex) Timeout(timeout *time.Duration) *DropSearchIndex { dsi.timeout = timeout return dsi } + +// Authenticator sets the authenticator to use for this operation. +func (dsi *DropSearchIndex) Authenticator(authenticator driver.Authenticator) *DropSearchIndex { + if dsi == nil { + dsi = new(DropSearchIndex) + } + + dsi.authenticator = authenticator + return dsi +} diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/end_sessions.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/end_sessions.go index 52f300bb7..8b24b3d8c 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/end_sessions.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/end_sessions.go @@ -20,15 +20,16 @@ import ( // EndSessions performs an endSessions operation. type EndSessions struct { - sessionIDs bsoncore.Document - session *session.Client - clock *session.ClusterClock - monitor *event.CommandMonitor - crypt driver.Crypt - database string - deployment driver.Deployment - selector description.ServerSelector - serverAPI *driver.ServerAPIOptions + authenticator driver.Authenticator + sessionIDs bsoncore.Document + session *session.Client + clock *session.ClusterClock + monitor *event.CommandMonitor + crypt driver.Crypt + database string + deployment driver.Deployment + selector description.ServerSelector + serverAPI *driver.ServerAPIOptions } // NewEndSessions constructs and returns a new EndSessions. @@ -61,6 +62,7 @@ func (es *EndSessions) Execute(ctx context.Context) error { Selector: es.selector, ServerAPI: es.serverAPI, Name: driverutil.EndSessionsOp, + Authenticator: es.authenticator, }.Execute(ctx) } @@ -161,3 +163,13 @@ func (es *EndSessions) ServerAPI(serverAPI *driver.ServerAPIOptions) *EndSession es.serverAPI = serverAPI return es } + +// Authenticator sets the authenticator to use for this operation. +func (es *EndSessions) Authenticator(authenticator driver.Authenticator) *EndSessions { + if es == nil { + es = new(EndSessions) + } + + es.authenticator = authenticator + return es +} diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/find.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/find.go index 8950fde86..c71b7d755 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/find.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/find.go @@ -25,6 +25,7 @@ import ( // Find performs a find operation. type Find struct { + authenticator driver.Authenticator allowDiskUse *bool allowPartialResults *bool awaitData *bool @@ -112,6 +113,7 @@ func (f *Find) Execute(ctx context.Context) error { Logger: f.logger, Name: driverutil.FindOp, OmitCSOTMaxTimeMS: f.omitCSOTMaxTimeMS, + Authenticator: f.authenticator, }.Execute(ctx) } @@ -575,3 +577,13 @@ func (f *Find) Logger(logger *logger.Logger) *Find { f.logger = logger return f } + +// Authenticator sets the authenticator to use for this operation. +func (f *Find) Authenticator(authenticator driver.Authenticator) *Find { + if f == nil { + f = new(Find) + } + + f.authenticator = authenticator + return f +} diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/find_and_modify.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/find_and_modify.go index 7faf56113..ea365ccb2 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/find_and_modify.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/find_and_modify.go @@ -25,6 +25,7 @@ import ( // FindAndModify performs a findAndModify operation. type FindAndModify struct { + authenticator driver.Authenticator arrayFilters bsoncore.Array bypassDocumentValidation *bool collation bsoncore.Document @@ -145,6 +146,7 @@ func (fam *FindAndModify) Execute(ctx context.Context) error { ServerAPI: fam.serverAPI, Timeout: fam.timeout, Name: driverutil.FindAndModifyOp, + Authenticator: fam.authenticator, }.Execute(ctx) } @@ -477,3 +479,13 @@ func (fam *FindAndModify) Timeout(timeout *time.Duration) *FindAndModify { fam.timeout = timeout return fam } + +// Authenticator sets the authenticator to use for this operation. +func (fam *FindAndModify) Authenticator(authenticator driver.Authenticator) *FindAndModify { + if fam == nil { + fam = new(FindAndModify) + } + + fam.authenticator = authenticator + return fam +} diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/hello.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/hello.go index 16f2ebf6c..60c99f063 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/hello.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/hello.go @@ -36,6 +36,7 @@ const driverName = "mongo-go-driver" // Hello is used to run the handshake operation. type Hello struct { + authenticator driver.Authenticator appname string compressors []string saslSupportedMechs string @@ -649,3 +650,13 @@ func (h *Hello) GetHandshakeInformation(ctx context.Context, _ address.Address, func (h *Hello) FinishHandshake(context.Context, driver.Connection) error { return nil } + +// Authenticator sets the authenticator to use for this operation. +func (h *Hello) Authenticator(authenticator driver.Authenticator) *Hello { + if h == nil { + h = new(Hello) + } + + h.authenticator = authenticator + return h +} diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/insert.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/insert.go index 7da4b8b0f..a65a4895f 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/insert.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/insert.go @@ -25,6 +25,7 @@ import ( // Insert performs an insert operation. type Insert struct { + authenticator driver.Authenticator bypassDocumentValidation *bool comment bsoncore.Value documents []bsoncore.Document @@ -58,8 +59,7 @@ func buildInsertResult(response bsoncore.Document) (InsertResult, error) { } ir := InsertResult{} for _, element := range elements { - switch element.Key() { - case "n": + if element.Key() == "n" { var ok bool ir.N, ok = element.Value().AsInt64OK() if !ok { @@ -115,6 +115,7 @@ func (i *Insert) Execute(ctx context.Context) error { Timeout: i.timeout, Logger: i.logger, Name: driverutil.InsertOp, + Authenticator: i.authenticator, }.Execute(ctx) } @@ -306,3 +307,13 @@ func (i *Insert) Logger(logger *logger.Logger) *Insert { i.logger = logger return i } + +// Authenticator sets the authenticator to use for this operation. +func (i *Insert) Authenticator(authenticator driver.Authenticator) *Insert { + if i == nil { + i = new(Insert) + } + + i.authenticator = authenticator + return i +} diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/listDatabases.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/listDatabases.go index c70248e2a..3df171e37 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/listDatabases.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/listDatabases.go @@ -24,6 +24,7 @@ import ( // ListDatabases performs a listDatabases operation. type ListDatabases struct { + authenticator driver.Authenticator filter bsoncore.Document authorizedDatabases *bool nameOnly *bool @@ -165,6 +166,7 @@ func (ld *ListDatabases) Execute(ctx context.Context) error { ServerAPI: ld.serverAPI, Timeout: ld.timeout, Name: driverutil.ListDatabasesOp, + Authenticator: ld.authenticator, }.Execute(ctx) } @@ -327,3 +329,13 @@ func (ld *ListDatabases) Timeout(timeout *time.Duration) *ListDatabases { ld.timeout = timeout return ld } + +// Authenticator sets the authenticator to use for this operation. +func (ld *ListDatabases) Authenticator(authenticator driver.Authenticator) *ListDatabases { + if ld == nil { + ld = new(ListDatabases) + } + + ld.authenticator = authenticator + return ld +} diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/list_collections.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/list_collections.go index 6fe68fa03..1e39f5bfb 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/list_collections.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/list_collections.go @@ -22,6 +22,7 @@ import ( // ListCollections performs a listCollections operation. type ListCollections struct { + authenticator driver.Authenticator filter bsoncore.Document nameOnly *bool authorizedCollections *bool @@ -83,6 +84,7 @@ func (lc *ListCollections) Execute(ctx context.Context) error { ServerAPI: lc.serverAPI, Timeout: lc.timeout, Name: driverutil.ListCollectionsOp, + Authenticator: lc.authenticator, }.Execute(ctx) } @@ -259,3 +261,13 @@ func (lc *ListCollections) Timeout(timeout *time.Duration) *ListCollections { lc.timeout = timeout return lc } + +// Authenticator sets the authenticator to use for this operation. +func (lc *ListCollections) Authenticator(authenticator driver.Authenticator) *ListCollections { + if lc == nil { + lc = new(ListCollections) + } + + lc.authenticator = authenticator + return lc +} diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/list_indexes.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/list_indexes.go index 79d50eca9..433344f30 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/list_indexes.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/list_indexes.go @@ -21,19 +21,20 @@ import ( // ListIndexes performs a listIndexes operation. type ListIndexes struct { - batchSize *int32 - maxTime *time.Duration - session *session.Client - clock *session.ClusterClock - collection string - monitor *event.CommandMonitor - database string - deployment driver.Deployment - selector description.ServerSelector - retry *driver.RetryMode - crypt driver.Crypt - serverAPI *driver.ServerAPIOptions - timeout *time.Duration + authenticator driver.Authenticator + batchSize *int32 + maxTime *time.Duration + session *session.Client + clock *session.ClusterClock + collection string + monitor *event.CommandMonitor + database string + deployment driver.Deployment + selector description.ServerSelector + retry *driver.RetryMode + crypt driver.Crypt + serverAPI *driver.ServerAPIOptions + timeout *time.Duration result driver.CursorResponse } @@ -85,6 +86,7 @@ func (li *ListIndexes) Execute(ctx context.Context) error { ServerAPI: li.serverAPI, Timeout: li.timeout, Name: driverutil.ListIndexesOp, + Authenticator: li.authenticator, }.Execute(ctx) } @@ -233,3 +235,13 @@ func (li *ListIndexes) Timeout(timeout *time.Duration) *ListIndexes { li.timeout = timeout return li } + +// Authenticator sets the authenticator to use for this operation. +func (li *ListIndexes) Authenticator(authenticator driver.Authenticator) *ListIndexes { + if li == nil { + li = new(ListIndexes) + } + + li.authenticator = authenticator + return li +} diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/update.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/update.go index 881b1bcf7..1070e7ca7 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/update.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/update.go @@ -26,6 +26,7 @@ import ( // Update performs an update operation. type Update struct { + authenticator driver.Authenticator bypassDocumentValidation *bool comment bsoncore.Value ordered *bool @@ -167,6 +168,7 @@ func (u *Update) Execute(ctx context.Context) error { Timeout: u.timeout, Logger: u.logger, Name: driverutil.UpdateOp, + Authenticator: u.authenticator, }.Execute(ctx) } @@ -414,3 +416,13 @@ func (u *Update) Logger(logger *logger.Logger) *Update { u.logger = logger return u } + +// Authenticator sets the authenticator to use for this operation. +func (u *Update) Authenticator(authenticator driver.Authenticator) *Update { + if u == nil { + u = new(Update) + } + + u.authenticator = authenticator + return u +} diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/update_search_index.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/update_search_index.go index 64f2da7f6..c63e048f2 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/update_search_index.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/operation/update_search_index.go @@ -21,19 +21,20 @@ import ( // UpdateSearchIndex performs a updateSearchIndex operation. type UpdateSearchIndex struct { - index string - definition bsoncore.Document - session *session.Client - clock *session.ClusterClock - collection string - monitor *event.CommandMonitor - crypt driver.Crypt - database string - deployment driver.Deployment - selector description.ServerSelector - result UpdateSearchIndexResult - serverAPI *driver.ServerAPIOptions - timeout *time.Duration + authenticator driver.Authenticator + index string + definition bsoncore.Document + session *session.Client + clock *session.ClusterClock + collection string + monitor *event.CommandMonitor + crypt driver.Crypt + database string + deployment driver.Deployment + selector description.ServerSelector + result UpdateSearchIndexResult + serverAPI *driver.ServerAPIOptions + timeout *time.Duration } // UpdateSearchIndexResult represents a single index in the updateSearchIndexResult result. @@ -48,8 +49,7 @@ func buildUpdateSearchIndexResult(response bsoncore.Document) (UpdateSearchIndex } usir := UpdateSearchIndexResult{} for _, element := range elements { - switch element.Key() { - case "ok": + if element.Key() == "ok" { var ok bool usir.Ok, ok = element.Value().AsInt32OK() if !ok { @@ -95,6 +95,7 @@ func (usi *UpdateSearchIndex) Execute(ctx context.Context) error { Selector: usi.selector, ServerAPI: usi.serverAPI, Timeout: usi.timeout, + Authenticator: usi.authenticator, }.Execute(ctx) } @@ -225,3 +226,13 @@ func (usi *UpdateSearchIndex) Timeout(timeout *time.Duration) *UpdateSearchIndex usi.timeout = timeout return usi } + +// Authenticator sets the authenticator to use for this operation. +func (usi *UpdateSearchIndex) Authenticator(authenticator driver.Authenticator) *UpdateSearchIndex { + if usi == nil { + usi = new(UpdateSearchIndex) + } + + usi.authenticator = authenticator + return usi +} diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/session/client_session.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/session/client_session.go index 8dac0932d..eff27bfe3 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/session/client_session.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/session/client_session.go @@ -90,6 +90,8 @@ type LoadBalancedTransactionConnection interface { DriverConnectionID() uint64 // TODO(GODRIVER-2824): change type to int64. Address() address.Address Stale() bool + OIDCTokenGenID() uint64 + SetOIDCTokenGenID(uint64) // Functions copied over from driver.PinnedConnection that are not part of Connection or Expirable. PinToCursor() error @@ -158,13 +160,14 @@ func MaxClusterTime(ct1, ct2 bson.Raw) bson.Raw { epoch1, ord1 := getClusterTime(ct1) epoch2, ord2 := getClusterTime(ct2) - if epoch1 > epoch2 { + switch { + case epoch1 > epoch2: return ct1 - } else if epoch1 < epoch2 { + case epoch1 < epoch2: return ct2 - } else if ord1 > ord2 { + case ord1 > ord2: return ct1 - } else if ord1 < ord2 { + case ord1 < ord2: return ct2 } @@ -476,11 +479,12 @@ func (c *Client) UpdateCommitTransactionWriteConcern() { // CheckAbortTransaction checks to see if allowed to abort transaction and returns // an error if not allowed. func (c *Client) CheckAbortTransaction() error { - if c.TransactionState == None { + switch { + case c.TransactionState == None: return ErrNoTransactStarted - } else if c.TransactionState == Committed { + case c.TransactionState == Committed: return ErrAbortAfterCommit - } else if c.TransactionState == Aborted { + case c.TransactionState == Aborted: return ErrAbortTwice } return nil diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/topology/connection.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/topology/connection.go index 649e87b3d..e00363a54 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/topology/connection.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/topology/connection.go @@ -9,10 +9,12 @@ package topology import ( "context" "crypto/tls" + "encoding/binary" "errors" "fmt" "io" "net" + "os" "strings" "sync" "sync/atomic" @@ -54,7 +56,7 @@ type connection struct { nc net.Conn // When nil, the connection is closed. addr address.Address idleTimeout time.Duration - idleDeadline atomic.Value // Stores a time.Time + idleStart atomic.Value // Stores a time.Time readTimeout time.Duration writeTimeout time.Duration desc description.Server @@ -79,9 +81,13 @@ type connection struct { driverConnectionID uint64 generation uint64 - // awaitingResponse indicates that the server response was not completely + // awaitRemainingBytes indicates the size of server response that was not completely // read before returning the connection to the pool. - awaitingResponse bool + awaitRemainingBytes *int32 + + // oidcTokenGenID is the monotonic generation ID for OIDC tokens, used to invalidate + // accessTokens in the OIDC authenticator cache. + oidcTokenGenID uint64 } // newConnection handles the creation of a connection. It does not connect the connection. @@ -111,12 +117,6 @@ func newConnection(addr address.Address, opts ...ConnectionOption) *connection { return c } -// DriverConnectionID returns the driver connection ID. -// TODO(GODRIVER-2824): change return type to int64. -func (c *connection) DriverConnectionID() uint64 { - return c.driverConnectionID -} - // setGenerationNumber sets the connection's generation number if a callback has been provided to do so in connection // configuration. func (c *connection) setGenerationNumber() { @@ -138,6 +138,39 @@ func (c *connection) hasGenerationNumber() bool { return c.desc.LoadBalanced() } +func configureTLS(ctx context.Context, + tlsConnSource tlsConnectionSource, + nc net.Conn, + addr address.Address, + config *tls.Config, + ocspOpts *ocsp.VerifyOptions, +) (net.Conn, error) { + // Ensure config.ServerName is always set for SNI. + if config.ServerName == "" { + hostname := addr.String() + colonPos := strings.LastIndex(hostname, ":") + if colonPos == -1 { + colonPos = len(hostname) + } + + hostname = hostname[:colonPos] + config.ServerName = hostname + } + + client := tlsConnSource.Client(nc, config) + if err := clientHandshake(ctx, client); err != nil { + return nil, err + } + + // Only do OCSP verification if TLS verification is requested. + if !config.InsecureSkipVerify { + if ocspErr := ocsp.Verify(ctx, client.ConnectionState(), ocspOpts); ocspErr != nil { + return nil, ocspErr + } + } + return client, nil +} + // connect handles the I/O for a connection. It will dial, configure TLS, and perform initialization // handshakes. All errors returned by connect are considered "before the handshake completes" and // must be handled by calling the appropriate SDAM handshake error handler. @@ -313,6 +346,10 @@ func (c *connection) closeConnectContext() { } } +func (c *connection) cancellationListenerCallback() { + _ = c.close() +} + func transformNetworkError(ctx context.Context, originalError error, contextDeadlineUsed bool) error { if originalError == nil { return nil @@ -335,10 +372,6 @@ func transformNetworkError(ctx context.Context, originalError error, contextDead return originalError } -func (c *connection) cancellationListenerCallback() { - _ = c.close() -} - func (c *connection) writeWireMessage(ctx context.Context, wm []byte) error { var err error if atomic.LoadInt64(&c.state) != connConnected { @@ -419,15 +452,10 @@ func (c *connection) readWireMessage(ctx context.Context) ([]byte, error) { dst, errMsg, err := c.read(ctx) if err != nil { - if nerr := net.Error(nil); errors.As(err, &nerr) && nerr.Timeout() && csot.IsTimeoutContext(ctx) { - // If the error was a timeout error and CSOT is enabled, instead of - // closing the connection mark it as awaiting response so the pool - // can read the response before making it available to other - // operations. - c.awaitingResponse = true - } else { - // Otherwise, use the pre-CSOT behavior and close the connection - // because we don't know if there are other bytes left to read. + if c.awaitRemainingBytes == nil { + // If the connection was not marked as awaiting response, use the + // pre-CSOT behavior and close the connection because we don't know + // if there are other bytes left to read. c.close() } message := errMsg @@ -444,6 +472,26 @@ func (c *connection) readWireMessage(ctx context.Context) ([]byte, error) { return dst, nil } +func (c *connection) parseWmSizeBytes(wmSizeBytes [4]byte) (int32, error) { + // read the length as an int32 + size := int32(binary.LittleEndian.Uint32(wmSizeBytes[:])) + + if size < 4 { + return 0, fmt.Errorf("malformed message length: %d", size) + } + // In the case of a hello response where MaxMessageSize has not yet been set, use the hard-coded + // defaultMaxMessageSize instead. + maxMessageSize := c.desc.MaxMessageSize + if maxMessageSize == 0 { + maxMessageSize = defaultMaxMessageSize + } + if uint32(size) > maxMessageSize { + return 0, errResponseTooLarge + } + + return size, nil +} + func (c *connection) read(ctx context.Context) (bytesRead []byte, errMsg string, err error) { go c.cancellationListener.Listen(ctx, c.cancellationListenerCallback) defer func() { @@ -457,6 +505,15 @@ func (c *connection) read(ctx context.Context) (bytesRead []byte, errMsg string, } }() + isCSOTTimeout := func(err error) bool { + // If the error was a timeout error and CSOT is enabled, instead of + // closing the connection mark it as awaiting response so the pool + // can read the response before making it available to other + // operations. + nerr := net.Error(nil) + return errors.As(err, &nerr) && nerr.Timeout() && csot.IsTimeoutContext(ctx) + } + // We use an array here because it only costs 4 bytes on the stack and means we'll only need to // reslice dst once instead of twice. var sizeBuf [4]byte @@ -464,29 +521,27 @@ func (c *connection) read(ctx context.Context) (bytesRead []byte, errMsg string, // We do a ReadFull into an array here instead of doing an opportunistic ReadAtLeast into dst // because there might be more than one wire message waiting to be read, for example when // reading messages from an exhaust cursor. - _, err = io.ReadFull(c.nc, sizeBuf[:]) + n, err := io.ReadFull(c.nc, sizeBuf[:]) if err != nil { + if l := int32(n); l == 0 && isCSOTTimeout(err) { + c.awaitRemainingBytes = &l + } return nil, "incomplete read of message header", err } - - // read the length as an int32 - size := (int32(sizeBuf[0])) | (int32(sizeBuf[1]) << 8) | (int32(sizeBuf[2]) << 16) | (int32(sizeBuf[3]) << 24) - - // In the case of a hello response where MaxMessageSize has not yet been set, use the hard-coded - // defaultMaxMessageSize instead. - maxMessageSize := c.desc.MaxMessageSize - if maxMessageSize == 0 { - maxMessageSize = defaultMaxMessageSize - } - if uint32(size) > maxMessageSize { - return nil, errResponseTooLarge.Error(), errResponseTooLarge + size, err := c.parseWmSizeBytes(sizeBuf) + if err != nil { + return nil, err.Error(), err } dst := make([]byte, size) copy(dst, sizeBuf[:]) - _, err = io.ReadFull(c.nc, dst[4:]) + n, err = io.ReadFull(c.nc, dst[4:]) if err != nil { + remainingBytes := size - 4 - int32(n) + if remainingBytes > 0 && isCSOTTimeout(err) { + c.awaitRemainingBytes = &remainingBytes + } return dst, "incomplete read of full message", err } @@ -507,25 +562,65 @@ func (c *connection) close() error { return err } +// closed returns true if the connection has been closed by the driver. func (c *connection) closed() bool { return atomic.LoadInt64(&c.state) == connDisconnected } +// isAlive returns true if the connection is alive and ready to be used for an +// operation. +// +// Note that the liveness check can be slow (at least 1ms), so isAlive only +// checks the liveness of the connection if it's been idle for at least 10 +// seconds. For frequently in-use connections, a network error during an +// operation will be the first indication of a dead connection. +func (c *connection) isAlive() bool { + if c.nc == nil { + return false + } + + // If the connection has been idle for less than 10 seconds, skip the + // liveness check. + // + // The 10-seconds idle bypass is based on the liveness check implementation + // in the Python Driver. That implementation uses 1 second as the idle + // threshold, but we chose to be more conservative in the Go Driver because + // this is new behavior with unknown side-effects. See + // https://github.com/mongodb/mongo-python-driver/blob/e6b95f65953e01e435004af069a6976473eaf841/pymongo/synchronous/pool.py#L983-L985 + idleStart, ok := c.idleStart.Load().(time.Time) + if !ok || idleStart.Add(10*time.Second).After(time.Now()) { + return true + } + + // Set a 1ms read deadline and attempt to read 1 byte from the connection. + // Expect it to block for 1ms then return a deadline exceeded error. If it + // returns any other error, the connection is not usable, so return false. + // If it doesn't return an error and actually reads data, the connection is + // also not usable, so return false. + // + // Note that we don't need to un-set the read deadline because the "read" + // and "write" methods always reset the deadlines. + err := c.nc.SetReadDeadline(time.Now().Add(1 * time.Millisecond)) + if err != nil { + return false + } + var b [1]byte + _, err = c.nc.Read(b[:]) + return errors.Is(err, os.ErrDeadlineExceeded) +} + func (c *connection) idleTimeoutExpired() bool { - now := time.Now() - if c.idleTimeout > 0 { - idleDeadline, ok := c.idleDeadline.Load().(time.Time) - if ok && now.After(idleDeadline) { - return true - } + if c.idleTimeout == 0 { + return false } - return false + idleStart, ok := c.idleStart.Load().(time.Time) + return ok && idleStart.Add(c.idleTimeout).Before(time.Now()) } -func (c *connection) bumpIdleDeadline() { +func (c *connection) bumpIdleStart() { if c.idleTimeout > 0 { - c.idleDeadline.Store(time.Now().Add(c.idleTimeout)) + c.idleStart.Store(time.Now()) } } @@ -533,10 +628,6 @@ func (c *connection) setCanStream(canStream bool) { c.canStream = canStream } -func (c initConnection) supportsStreaming() bool { - return c.canStream -} - func (c *connection) setStreaming(streaming bool) { c.currentlyStreaming = streaming } @@ -550,6 +641,12 @@ func (c *connection) setSocketTimeout(timeout time.Duration) { c.writeTimeout = timeout } +// DriverConnectionID returns the driver connection ID. +// TODO(GODRIVER-2824): change return type to int64. +func (c *connection) DriverConnectionID() uint64 { + return c.driverConnectionID +} + func (c *connection) ID() string { return c.id } @@ -558,6 +655,14 @@ func (c *connection) ServerConnectionID() *int64 { return c.serverConnectionID } +func (c *connection) OIDCTokenGenID() uint64 { + return c.oidcTokenGenID +} + +func (c *connection) SetOIDCTokenGenID(genID uint64) { + c.oidcTokenGenID = genID +} + // initConnection is an adapter used during connection initialization. It has the minimum // functionality necessary to implement the driver.Connection interface, which is required to pass a // *connection to a Handshaker. @@ -595,7 +700,7 @@ func (c initConnection) CurrentlyStreaming() bool { return c.getCurrentlyStreaming() } func (c initConnection) SupportsStreaming() bool { - return c.supportsStreaming() + return c.canStream } // Connection implements the driver.Connection interface to allow reading and writing wire @@ -606,6 +711,8 @@ type Connection struct { refCount int cleanupPoolFn func() + oidcTokenGenID uint64 + // cleanupServerFn resets the server state when a connection is returned to the connection pool // via Close() or expired via Expire(). cleanupServerFn func() @@ -827,37 +934,14 @@ func (c *Connection) DriverConnectionID() uint64 { return c.connection.DriverConnectionID() } -func configureTLS(ctx context.Context, - tlsConnSource tlsConnectionSource, - nc net.Conn, - addr address.Address, - config *tls.Config, - ocspOpts *ocsp.VerifyOptions, -) (net.Conn, error) { - // Ensure config.ServerName is always set for SNI. - if config.ServerName == "" { - hostname := addr.String() - colonPos := strings.LastIndex(hostname, ":") - if colonPos == -1 { - colonPos = len(hostname) - } - - hostname = hostname[:colonPos] - config.ServerName = hostname - } - - client := tlsConnSource.Client(nc, config) - if err := clientHandshake(ctx, client); err != nil { - return nil, err - } +// OIDCTokenGenID returns the OIDC token generation ID. +func (c *Connection) OIDCTokenGenID() uint64 { + return c.oidcTokenGenID +} - // Only do OCSP verification if TLS verification is requested. - if !config.InsecureSkipVerify { - if ocspErr := ocsp.Verify(ctx, client.ConnectionState(), ocspOpts); ocspErr != nil { - return nil, ocspErr - } - } - return client, nil +// SetOIDCTokenGenID sets the OIDC token generation ID. +func (c *Connection) SetOIDCTokenGenID(genID uint64) { + c.oidcTokenGenID = genID } // TODO: Naming? diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/topology/fsm.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/topology/fsm.go index 2acf527b9..1d097b65c 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/topology/fsm.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/topology/fsm.go @@ -22,7 +22,7 @@ var ( MinSupportedMongoDBVersion = "3.6" // SupportedWireVersions is the range of wire versions supported by the driver. - SupportedWireVersions = description.NewVersionRange(6, 21) + SupportedWireVersions = description.NewVersionRange(6, 25) ) type fsm struct { diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/topology/pool.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/topology/pool.go index 52461eb68..e9565425d 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/topology/pool.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/topology/pool.go @@ -9,6 +9,7 @@ package topology import ( "context" "fmt" + "io" "net" "sync" "sync/atomic" @@ -166,8 +167,11 @@ type reason struct { // connectionPerished checks if a given connection is perished and should be removed from the pool. func connectionPerished(conn *connection) (reason, bool) { switch { - case conn.closed(): - // A connection would only be closed if it encountered a network error during an operation and closed itself. + case conn.closed() || !conn.isAlive(): + // A connection would only be closed if it encountered a network error + // during an operation and closed itself. If a connection is not alive + // (e.g. the connection was closed by the server-side), it's also + // considered a network error. return reason{ loggerConn: logger.ReasonConnClosedError, event: event.ReasonError, @@ -788,17 +792,27 @@ var ( // // It calls the package-global BGReadCallback function, if set, with the // address, timings, and any errors that occurred. -func bgRead(pool *pool, conn *connection) { - var start, read time.Time - start = time.Now() - errs := make([]error, 0) - connClosed := false +func bgRead(pool *pool, conn *connection, size int32) { + var err error + start := time.Now() defer func() { + read := time.Now() + errs := make([]error, 0) + connClosed := false + if err != nil { + errs = append(errs, err) + connClosed = true + err = conn.close() + if err != nil { + errs = append(errs, fmt.Errorf("error closing conn after reading: %w", err)) + } + } + // No matter what happens, always check the connection back into the // pool, which will either make it available for other operations or // remove it from the pool if it was closed. - err := pool.checkInNoEvent(conn) + err = pool.checkInNoEvent(conn) if err != nil { errs = append(errs, fmt.Errorf("error checking in: %w", err)) } @@ -808,34 +822,28 @@ func bgRead(pool *pool, conn *connection) { } }() - err := conn.nc.SetReadDeadline(time.Now().Add(BGReadTimeout)) + err = conn.nc.SetReadDeadline(time.Now().Add(BGReadTimeout)) if err != nil { - errs = append(errs, fmt.Errorf("error setting a read deadline: %w", err)) - - connClosed = true - err := conn.close() - if err != nil { - errs = append(errs, fmt.Errorf("error closing conn after setting read deadline: %w", err)) - } - + err = fmt.Errorf("error setting a read deadline: %w", err) return } - // The context here is only used for cancellation, not deadline timeout, so - // use context.Background(). The read timeout is set by calling - // SetReadDeadline above. - _, _, err = conn.read(context.Background()) - read = time.Now() - if err != nil { - errs = append(errs, fmt.Errorf("error reading: %w", err)) - - connClosed = true - err := conn.close() + if size == 0 { + var sizeBuf [4]byte + _, err = io.ReadFull(conn.nc, sizeBuf[:]) if err != nil { - errs = append(errs, fmt.Errorf("error closing conn after reading: %w", err)) + err = fmt.Errorf("error reading the message size: %w", err) + return } - - return + size, err = conn.parseWmSizeBytes(sizeBuf) + if err != nil { + return + } + size -= 4 + } + _, err = io.CopyN(io.Discard, conn.nc, int64(size)) + if err != nil { + err = fmt.Errorf("error discarding %d byte message: %w", size, err) } } @@ -886,19 +894,22 @@ func (p *pool) checkInNoEvent(conn *connection) error { // means that connections in "awaiting response" state are checked in but // not usable, which is not covered by the current pool events. We may need // to add pool event information in the future to communicate that. - if conn.awaitingResponse { - conn.awaitingResponse = false - go bgRead(p, conn) + if conn.awaitRemainingBytes != nil { + size := *conn.awaitRemainingBytes + conn.awaitRemainingBytes = nil + go bgRead(p, conn, size) return nil } - // Bump the connection idle deadline here because we're about to make the connection "available". - // The idle deadline is used to determine when a connection has reached its max idle time and - // should be closed. A connection reaches its max idle time when it has been "available" in the - // idle connections stack for more than the configured duration (maxIdleTimeMS). Set it before - // we call connectionPerished(), which checks the idle deadline, because a newly "available" - // connection should never be perished due to max idle time. - conn.bumpIdleDeadline() + // Bump the connection idle start time here because we're about to make the + // connection "available". The idle start time is used to determine how long + // a connection has been idle and when it has reached its max idle time and + // should be closed. A connection reaches its max idle time when it has been + // "available" in the idle connections stack for more than the configured + // duration (maxIdleTimeMS). Set it before we call connectionPerished(), + // which checks the idle deadline, because a newly "available" connection + // should never be perished due to max idle time. + conn.bumpIdleStart() r, perished := connectionPerished(conn) if !perished && conn.pool.getState() == poolClosed { diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/topology/server.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/topology/server.go index 99f8dd618..062363267 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/topology/server.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/topology/server.go @@ -666,7 +666,7 @@ func (s *Server) update() { s.monitorOnce.Do(s.rttMonitor.connect) } - if isStreamable(s) || connectionIsStreaming || transitionedFromNetworkError { + if isStreamingEnabled(s) && (isStreamable(s) || connectionIsStreaming) || transitionedFromNetworkError { continue } @@ -730,7 +730,7 @@ func (s *Server) createConnection() *connection { WithWriteTimeout(func(time.Duration) time.Duration { return s.cfg.heartbeatTimeout }), // We override whatever handshaker is currently attached to the options with a basic // one because need to make sure we don't do auth. - WithHandshaker(func(h Handshaker) Handshaker { + WithHandshaker(func(Handshaker) Handshaker { return operation.NewHello().AppName(s.cfg.appname).Compressors(s.cfg.compressionOpts). ServerAPI(s.cfg.serverAPI) }), diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/topology/topology_options.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/topology/topology_options.go index b5eb4a972..2c0518a54 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/topology/topology_options.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/topology/topology_options.go @@ -7,10 +7,10 @@ package topology import ( + "context" "crypto/tls" "fmt" "net/http" - "strings" "time" "go.mongodb.org/mongo-driver/event" @@ -71,9 +71,80 @@ func newLogger(opts *options.LoggerOptions) (*logger.Logger, error) { return log, nil } -// NewConfig will translate data from client options into a topology config for building non-default deployments. -// Server and topology options are not honored if a custom deployment is used. +// convertOIDCArgs converts the internal *driver.OIDCArgs into the equivalent +// public type *options.OIDCArgs. +func convertOIDCArgs(args *driver.OIDCArgs) *options.OIDCArgs { + if args == nil { + return nil + } + return &options.OIDCArgs{ + Version: args.Version, + IDPInfo: (*options.IDPInfo)(args.IDPInfo), + RefreshToken: args.RefreshToken, + } +} + +// ConvertCreds takes an [options.Credential] and returns the equivalent +// [driver.Cred]. +func ConvertCreds(cred *options.Credential) *driver.Cred { + if cred == nil { + return nil + } + + var oidcMachineCallback auth.OIDCCallback + if cred.OIDCMachineCallback != nil { + oidcMachineCallback = func(ctx context.Context, args *driver.OIDCArgs) (*driver.OIDCCredential, error) { + cred, err := cred.OIDCMachineCallback(ctx, convertOIDCArgs(args)) + return (*driver.OIDCCredential)(cred), err + } + } + + var oidcHumanCallback auth.OIDCCallback + if cred.OIDCHumanCallback != nil { + oidcHumanCallback = func(ctx context.Context, args *driver.OIDCArgs) (*driver.OIDCCredential, error) { + cred, err := cred.OIDCHumanCallback(ctx, convertOIDCArgs(args)) + return (*driver.OIDCCredential)(cred), err + } + } + + return &auth.Cred{ + Source: cred.AuthSource, + Username: cred.Username, + Password: cred.Password, + PasswordSet: cred.PasswordSet, + Props: cred.AuthMechanismProperties, + OIDCMachineCallback: oidcMachineCallback, + OIDCHumanCallback: oidcHumanCallback, + } +} + +// NewConfig will translate data from client options into a topology config for +// building non-default deployments. func NewConfig(co *options.ClientOptions, clock *session.ClusterClock) (*Config, error) { + var authenticator driver.Authenticator + var err error + if co.Auth != nil { + authenticator, err = auth.CreateAuthenticator( + co.Auth.AuthMechanism, + ConvertCreds(co.Auth), + co.HTTPClient, + ) + if err != nil { + return nil, fmt.Errorf("error creating authenticator: %w", err) + } + } + return NewConfigWithAuthenticator(co, clock, authenticator) +} + +// NewConfigWithAuthenticator will translate data from client options into a +// topology config for building non-default deployments. Server and topology +// options are not honored if a custom deployment is used. It uses a passed in +// authenticator to authenticate the connection. +func NewConfigWithAuthenticator( + co *options.ClientOptions, + clock *session.ClusterClock, + authenticator driver.Authenticator, +) (*Config, error) { var serverAPI *driver.ServerAPIOptions if err := co.Validate(); err != nil { @@ -135,11 +206,11 @@ func NewConfig(co *options.ClientOptions, clock *session.ClusterClock) (*Config, for _, comp := range comps { switch comp { case "zlib": - connOpts = append(connOpts, WithZlibLevel(func(level *int) *int { + connOpts = append(connOpts, WithZlibLevel(func(*int) *int { return co.ZlibLevel })) case "zstd": - connOpts = append(connOpts, WithZstdLevel(func(level *int) *int { + connOpts = append(connOpts, WithZstdLevel(func(*int) *int { return co.ZstdLevel })) } @@ -156,35 +227,8 @@ func NewConfig(co *options.ClientOptions, clock *session.ClusterClock) (*Config, } // Handshaker - var handshaker = func(driver.Handshaker) driver.Handshaker { - return operation.NewHello().AppName(appName).Compressors(comps).ClusterClock(clock). - ServerAPI(serverAPI).LoadBalanced(loadBalanced) - } - // Auth & Database & Password & Username - if co.Auth != nil { - cred := &auth.Cred{ - Username: co.Auth.Username, - Password: co.Auth.Password, - PasswordSet: co.Auth.PasswordSet, - Props: co.Auth.AuthMechanismProperties, - Source: co.Auth.AuthSource, - } - mechanism := co.Auth.AuthMechanism - - if len(cred.Source) == 0 { - switch strings.ToUpper(mechanism) { - case auth.MongoDBX509, auth.GSSAPI, auth.PLAIN: - cred.Source = "$external" - default: - cred.Source = "admin" - } - } - - authenticator, err := auth.CreateAuthenticator(mechanism, cred) - if err != nil { - return nil, err - } - + var handshaker func(driver.Handshaker) driver.Handshaker + if authenticator != nil { handshakeOpts := &auth.HandshakeOptions{ AppName: appName, Authenticator: authenticator, @@ -192,16 +236,15 @@ func NewConfig(co *options.ClientOptions, clock *session.ClusterClock) (*Config, ServerAPI: serverAPI, LoadBalanced: loadBalanced, ClusterClock: clock, - HTTPClient: co.HTTPClient, } - if mechanism == "" { + if co.Auth.AuthMechanism == "" { // Required for SASL mechanism negotiation during handshake - handshakeOpts.DBUser = cred.Source + "." + cred.Username + handshakeOpts.DBUser = co.Auth.AuthSource + "." + co.Auth.Username } if co.AuthenticateToAnything != nil && *co.AuthenticateToAnything { // Authenticate arbiters - handshakeOpts.PerformAuthentication = func(serv description.Server) bool { + handshakeOpts.PerformAuthentication = func(description.Server) bool { return true } } @@ -209,7 +252,17 @@ func NewConfig(co *options.ClientOptions, clock *session.ClusterClock) (*Config, handshaker = func(driver.Handshaker) driver.Handshaker { return auth.Handshaker(nil, handshakeOpts) } + } else { + handshaker = func(driver.Handshaker) driver.Handshaker { + return operation.NewHello(). + AppName(appName). + Compressors(comps). + ClusterClock(clock). + ServerAPI(serverAPI). + LoadBalanced(loadBalanced) + } } + connOpts = append(connOpts, WithHandshaker(handshaker)) // ConnectTimeout if co.ConnectTimeout != nil { diff --git a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/wiremessage/wiremessage.go b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/wiremessage/wiremessage.go index 2199f855b..987ae16c0 100644 --- a/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/wiremessage/wiremessage.go +++ b/vendor/go.mongodb.org/mongo-driver/x/mongo/driver/wiremessage/wiremessage.go @@ -381,16 +381,9 @@ func ReadMsgSectionSingleDocument(src []byte) (doc bsoncore.Document, rem []byte // ReadMsgSectionDocumentSequence reads an identifier and document sequence from src and returns the document sequence // data parsed into a slice of BSON documents. func ReadMsgSectionDocumentSequence(src []byte) (identifier string, docs []bsoncore.Document, rem []byte, ok bool) { - length, rem, ok := readi32(src) - if !ok || int(length) > len(src) { - return "", nil, rem, false - } - - rem, ret := rem[:length-4], rem[length-4:] // reslice so we can just iterate a loop later - - identifier, rem, ok = readcstring(rem) + identifier, rem, ret, ok := ReadMsgSectionRawDocumentSequence(src) if !ok { - return "", nil, rem, false + return "", nil, src, false } docs = make([]bsoncore.Document, 0) @@ -403,7 +396,7 @@ func ReadMsgSectionDocumentSequence(src []byte) (identifier string, docs []bsonc docs = append(docs, doc) } if len(rem) > 0 { - return "", nil, append(rem, ret...), false + return "", nil, src, false } return identifier, docs, ret, true @@ -413,8 +406,8 @@ func ReadMsgSectionDocumentSequence(src []byte) (identifier string, docs []bsonc // sequence data. func ReadMsgSectionRawDocumentSequence(src []byte) (identifier string, data []byte, rem []byte, ok bool) { length, rem, ok := readi32(src) - if !ok || int(length) > len(src) { - return "", nil, rem, false + if !ok || int(length) > len(src) || length-4 < 0 { + return "", nil, src, false } // After these assignments, rem will be the data containing the identifier string + the document sequence bytes and @@ -423,7 +416,7 @@ func ReadMsgSectionRawDocumentSequence(src []byte) (identifier string, data []by identifier, rem, ok = readcstring(rem) if !ok { - return "", nil, rem, false + return "", nil, src, false } return identifier, rem, rest, true @@ -546,14 +539,6 @@ func ReadCompressedCompressorID(src []byte) (id CompressorID, rem []byte, ok boo return CompressorID(src[0]), src[1:], true } -// ReadCompressedCompressedMessage reads the compressed wiremessage to dst. -func ReadCompressedCompressedMessage(src []byte, length int32) (msg []byte, rem []byte, ok bool) { - if len(src) < int(length) { - return nil, src, false - } - return src[:length], src[length:], true -} - // ReadKillCursorsZero reads the zero field from src. func ReadKillCursorsZero(src []byte) (zero int32, rem []byte, ok bool) { return readi32(src) diff --git a/vendor/modules.txt b/vendor/modules.txt index 6819ddad6..9d8913d53 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -112,6 +112,9 @@ github.com/aws/aws-sdk-go/service/sts github.com/aws/aws-sdk-go/service/sts/stsiface # github.com/containerd/log v0.1.0 ## explicit; go 1.20 +# github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc +## explicit +github.com/davecgh/go-spew/spew # github.com/distribution/reference v0.5.0 ## explicit; go 1.20 github.com/distribution/reference @@ -149,8 +152,6 @@ github.com/docker/go-units # github.com/felixge/httpsnoop v1.0.4 ## explicit; go 1.13 github.com/felixge/httpsnoop -# github.com/frankban/quicktest v1.14.6 -## explicit; go 1.13 # github.com/fsnotify/fsnotify v1.7.0 ## explicit; go 1.17 github.com/fsnotify/fsnotify @@ -191,8 +192,8 @@ github.com/jessevdk/go-flags # github.com/jmespath/go-jmespath v0.4.0 ## explicit; go 1.14 github.com/jmespath/go-jmespath -# github.com/klauspost/compress v1.17.8 -## explicit; go 1.20 +# github.com/klauspost/compress v1.17.11 +## explicit; go 1.21 github.com/klauspost/compress github.com/klauspost/compress/flate github.com/klauspost/compress/fse @@ -313,10 +314,10 @@ github.com/xdg-go/scram # github.com/xdg-go/stringprep v1.0.4 ## explicit; go 1.11 github.com/xdg-go/stringprep -# github.com/youmark/pkcs8 v0.0.0-20201027041543-1326539a0a0a -## explicit; go 1.12 +# github.com/youmark/pkcs8 v0.0.0-20240726163527-a2c0da244d78 +## explicit; go 1.17 github.com/youmark/pkcs8 -# go.mongodb.org/mongo-driver v1.16.0 +# go.mongodb.org/mongo-driver v1.17.1 ## explicit; go 1.18 go.mongodb.org/mongo-driver/bson go.mongodb.org/mongo-driver/bson/bsoncodec @@ -325,6 +326,7 @@ go.mongodb.org/mongo-driver/bson/bsonrw go.mongodb.org/mongo-driver/bson/bsontype go.mongodb.org/mongo-driver/bson/primitive go.mongodb.org/mongo-driver/event +go.mongodb.org/mongo-driver/internal/assert go.mongodb.org/mongo-driver/internal/aws go.mongodb.org/mongo-driver/internal/aws/awserr go.mongodb.org/mongo-driver/internal/aws/credentials @@ -337,14 +339,17 @@ go.mongodb.org/mongo-driver/internal/csot go.mongodb.org/mongo-driver/internal/driverutil go.mongodb.org/mongo-driver/internal/handshake go.mongodb.org/mongo-driver/internal/httputil +go.mongodb.org/mongo-driver/internal/integtest go.mongodb.org/mongo-driver/internal/logger go.mongodb.org/mongo-driver/internal/ptrutil go.mongodb.org/mongo-driver/internal/rand go.mongodb.org/mongo-driver/internal/randutil +go.mongodb.org/mongo-driver/internal/require go.mongodb.org/mongo-driver/internal/uuid go.mongodb.org/mongo-driver/mongo go.mongodb.org/mongo-driver/mongo/address go.mongodb.org/mongo-driver/mongo/description +go.mongodb.org/mongo-driver/mongo/integration/mtest go.mongodb.org/mongo-driver/mongo/options go.mongodb.org/mongo-driver/mongo/readconcern go.mongodb.org/mongo-driver/mongo/readpref @@ -445,8 +450,6 @@ golang.org/x/text/secure/bidirule golang.org/x/text/transform golang.org/x/text/unicode/bidi golang.org/x/text/unicode/norm -# golang.org/x/time v0.5.0 -## explicit; go 1.18 # gopkg.in/ini.v1 v1.67.0 ## explicit gopkg.in/ini.v1