From adb2b080445c9d561f76242048ee665f02120ba5 Mon Sep 17 00:00:00 2001 From: Dmytro Zghoba Date: Fri, 29 Sep 2023 12:54:39 +0300 Subject: [PATCH] [PBM-1189] reorganize internal interfaces (#881) --- .github/workflows/ci.yml | 1 + .github/workflows/trivy.yml | 4 +- README.md | 2 +- agent/agent.go | 615 --------- .../backup-mongo.jpeg => backup-mongo.jpeg | Bin cli/cli.go | 780 ------------ cmd/pbm-agent/agent.go | 715 +++++++++++ {agent => cmd/pbm-agent}/backup.go | 117 +- cmd/pbm-agent/main.go | 23 +- {agent => cmd/pbm-agent}/oplog.go | 32 +- {agent => cmd/pbm-agent}/restore.go | 141 ++- {speedt => cmd/pbm-speed-test}/b_test.go | 2 +- {speedt => cmd/pbm-speed-test}/dataset.go | 2 +- cmd/pbm-speed-test/main.go | 36 +- {speedt => cmd/pbm-speed-test}/speedt.go | 21 +- {cli => cmd/pbm}/backup.go | 190 +-- {cli => cmd/pbm}/backup_test.go | 226 ++-- cmd/pbm/common.go | 15 + {cli => cmd/pbm}/config.go | 42 +- {cli => cmd/pbm}/delete.go | 90 +- {cli => cmd/pbm}/list.go | 118 +- {cli => cmd/pbm}/list_test.go | 12 +- cmd/pbm/main.go | 783 +++++++++++- {cli => cmd/pbm}/ns.go | 12 +- {cli => cmd/pbm}/ns_test.go | 5 +- {cli => cmd/pbm}/oplog.go | 30 +- {cli => cmd/pbm}/restore.go | 197 +-- {cli => cmd/pbm}/status.go | 273 ++-- doc/source/conf.pyc | Bin 2765 -> 0 bytes e2e-tests/Dockerfile | 10 - e2e-tests/README.md | 6 +- e2e-tests/cmd/ensure-oplog/main.go | 87 +- e2e-tests/cmd/pbm-test/run.go | 45 +- e2e-tests/cmd/pbm-test/run_physical.go | 19 +- e2e-tests/cmd/pbm-test/run_remapping.go | 10 +- .../docker/docker-compose-remapping.yaml | 18 +- e2e-tests/docker/docker-compose-rs.yaml | 32 +- e2e-tests/docker/docker-compose-single.yaml | 12 +- e2e-tests/docker/docker-compose.yaml | 84 +- .../Dockerfile => mongodb.dockerfile} | 2 +- e2e-tests/docker/pbm-agent/Dockerfile | 36 - e2e-tests/docker/pbm.dockerfile | 18 + e2e-tests/docker/tests.dockerfile | 12 + e2e-tests/functions | 4 +- e2e-tests/pkg/pbm/clock_skew.go | 11 +- e2e-tests/pkg/pbm/docker.go | 10 +- e2e-tests/pkg/pbm/mongo_pbm.go | 64 +- e2e-tests/pkg/pbm/mongod.go | 31 +- e2e-tests/pkg/pbm/pbm_ctl.go | 46 +- e2e-tests/pkg/tests/data.go | 12 +- e2e-tests/pkg/tests/sharded/backuper.go | 12 +- e2e-tests/pkg/tests/sharded/cluster.go | 94 +- .../tests/sharded/test_backup_cancellation.go | 15 +- e2e-tests/pkg/tests/sharded/test_basic.go | 14 +- .../pkg/tests/sharded/test_bounds_check.go | 18 +- .../pkg/tests/sharded/test_clock_skew.go | 4 +- .../pkg/tests/sharded/test_delete_backup.go | 26 +- .../pkg/tests/sharded/test_dist_commit.go | 3 +- .../tests/sharded/test_dr_restart_agents.go | 14 +- .../tests/sharded/test_incremental_backup.go | 16 +- .../pkg/tests/sharded/test_leader_lag.go | 22 +- .../pkg/tests/sharded/test_network_cut.go | 12 +- .../pkg/tests/sharded/test_oplog_replay.go | 10 +- .../pkg/tests/sharded/test_pitr_basic.go | 22 +- e2e-tests/pkg/tests/sharded/test_remapping.go | 18 +- e2e-tests/pkg/tests/sharded/test_selective.go | 20 +- .../pkg/tests/sharded/test_timeseries.go | 4 +- e2e-tests/pkg/tests/sharded/trx.go | 7 +- e2e-tests/pkg/tests/sharded/trx_phys.go | 8 +- e2e-tests/pkg/tests/state.go | 66 +- e2e-tests/run-new-cluster | 2 +- e2e-tests/run-remapping | 2 +- e2e-tests/run-rs | 2 +- e2e-tests/run-sharded | 2 +- e2e-tests/run-single | 2 +- e2e-tests/start-cluster | 2 +- e2e-tests/start-replset | 2 +- {pbm => internal}/archive/archive.go | 71 +- {pbm => internal}/compress/compress.go | 93 +- {pbm => internal/config}/config.go | 292 ++--- internal/connect/connect.go | 187 +++ internal/connect/todo.go | 81 ++ internal/context/context.go | 39 + internal/defs/cmd.go | 42 + internal/defs/defs.go | 165 +++ internal/errors/errors.go | 46 + internal/lock/errors.go | 58 + {pbm => internal/lock}/lock.go | 221 ++-- internal/log/context.go | 44 + {pbm => internal}/log/log.go | 40 +- .../priority/priority.go | 80 +- internal/query/backup.go | 379 ++++++ internal/query/restore.go | 259 ++++ internal/query/setup.go | 129 ++ {pbm => internal/resync}/rsync.go | 126 +- pbm/pitr/pitr.go => internal/slicer/slicer.go | 199 +-- internal/storage/azure/azure.go | 272 ++++ .../storage/blackhole/blackhole.go | 2 +- {pbm => internal}/storage/fs/fs.go | 12 +- {pbm => internal}/storage/s3/download.go | 5 +- {pbm => internal}/storage/s3/s3.go | 9 +- internal/storage/storage.go | 157 +++ pbm/agent_status.go => internal/topo/agent.go | 84 +- internal/topo/cluster.go | 174 +++ pbm/bsontypes.go => internal/topo/node.go | 242 ++-- internal/topo/status.go | 53 + internal/topo/topo.go | 210 +++ internal/types/backup.go | 145 +++ internal/types/cmd.go | 142 +++ internal/types/restore.go | 153 +++ internal/types/types.go | 20 + {pbm => internal/util}/rs_map.go | 2 +- {pbm/sel => internal/util}/sel.go | 4 +- {pbm/sel => internal/util}/sel_test.go | 6 +- internal/util/storage.go | 46 + internal/version/version.go | 276 ++++ {version => internal/version}/version_test.go | 0 pbm/backup/backup.go | 404 +++--- pbm/backup/logical.go | 117 +- pbm/backup/physical.go | 142 ++- pbm/cleanup.go | 80 +- pbm/cmd.go | 99 -- pbm/delete.go | 119 +- pbm/node.go | 278 +--- pbm/oplog/backup.go | 16 +- pbm/{pitr.go => oplog/chunk.go} | 165 +-- pbm/{pitr_test.go => oplog/chunk_test.go} | 2 +- pbm/oplog/restore.go | 40 +- pbm/pbm.go | 1123 +---------------- pbm/restore.go | 382 ------ pbm/restore/logical.go | 381 +++--- pbm/restore/physical.go | 364 +++--- pbm/restore/restore.go | 139 +- pbm/restore/selective.go | 109 +- pbm/snapshot/backup.go | 6 +- pbm/snapshot/dump.go | 25 +- pbm/snapshot/restore.go | 33 +- pbm/storage/azure/azure.go | 271 +--- pbm/storage/storage.go | 60 - pbm/topo.go | 124 -- pbm/version.go | 126 -- version/version.go | 136 -- 142 files changed, 7756 insertions(+), 7022 deletions(-) delete mode 100644 agent/agent.go rename doc/source/images/backup-mongo.jpeg => backup-mongo.jpeg (100%) delete mode 100644 cli/cli.go create mode 100644 cmd/pbm-agent/agent.go rename {agent => cmd/pbm-agent}/backup.go (57%) rename {agent => cmd/pbm-agent}/oplog.go (53%) rename {agent => cmd/pbm-agent}/restore.go (62%) rename {speedt => cmd/pbm-speed-test}/b_test.go (95%) rename {speedt => cmd/pbm-speed-test}/dataset.go (99%) rename {speedt => cmd/pbm-speed-test}/speedt.go (86%) rename {cli => cmd/pbm}/backup.go (65%) rename {cli => cmd/pbm}/backup_test.go (66%) create mode 100644 cmd/pbm/common.go rename {cli => cmd/pbm}/config.go (64%) rename {cli => cmd/pbm}/delete.go (73%) rename {cli => cmd/pbm}/list.go (68%) rename {cli => cmd/pbm}/list_test.go (90%) rename {cli => cmd/pbm}/ns.go (88%) rename {cli => cmd/pbm}/ns_test.go (96%) rename {cli => cmd/pbm}/oplog.go (63%) rename {cli => cmd/pbm}/restore.go (70%) rename {cli => cmd/pbm}/status.go (66%) delete mode 100644 doc/source/conf.pyc delete mode 100644 e2e-tests/Dockerfile rename e2e-tests/docker/{mongodb-rs/Dockerfile => mongodb.dockerfile} (91%) delete mode 100644 e2e-tests/docker/pbm-agent/Dockerfile create mode 100644 e2e-tests/docker/pbm.dockerfile create mode 100644 e2e-tests/docker/tests.dockerfile rename {pbm => internal}/archive/archive.go (81%) rename {pbm => internal}/compress/compress.go (58%) rename {pbm => internal/config}/config.go (66%) create mode 100644 internal/connect/connect.go create mode 100644 internal/connect/todo.go create mode 100644 internal/context/context.go create mode 100644 internal/defs/cmd.go create mode 100644 internal/defs/defs.go create mode 100644 internal/errors/errors.go create mode 100644 internal/lock/errors.go rename {pbm => internal/lock}/lock.go (51%) create mode 100644 internal/log/context.go rename {pbm => internal}/log/log.go (90%) rename pbm/bcp_nodes_priority.go => internal/priority/priority.go (56%) create mode 100644 internal/query/backup.go create mode 100644 internal/query/restore.go create mode 100644 internal/query/setup.go rename {pbm => internal/resync}/rsync.go (72%) rename pbm/pitr/pitr.go => internal/slicer/slicer.go (70%) create mode 100644 internal/storage/azure/azure.go rename {pbm => internal}/storage/blackhole/blackhole.go (93%) rename {pbm => internal}/storage/fs/fs.go (90%) rename {pbm => internal}/storage/s3/download.go (99%) rename {pbm => internal}/storage/s3/s3.go (98%) create mode 100644 internal/storage/storage.go rename pbm/agent_status.go => internal/topo/agent.go (59%) create mode 100644 internal/topo/cluster.go rename pbm/bsontypes.go => internal/topo/node.go (66%) create mode 100644 internal/topo/status.go create mode 100644 internal/topo/topo.go create mode 100644 internal/types/backup.go create mode 100644 internal/types/cmd.go create mode 100644 internal/types/restore.go create mode 100644 internal/types/types.go rename {pbm => internal/util}/rs_map.go (97%) rename {pbm/sel => internal/util}/sel.go (96%) rename {pbm/sel => internal/util}/sel_test.go (88%) create mode 100644 internal/util/storage.go create mode 100644 internal/version/version.go rename {version => internal/version}/version_test.go (100%) delete mode 100644 pbm/cmd.go rename pbm/{pitr.go => oplog/chunk.go} (68%) rename pbm/{pitr_test.go => oplog/chunk_test.go} (99%) delete mode 100644 pbm/restore.go delete mode 100644 pbm/storage/storage.go delete mode 100644 pbm/topo.go delete mode 100644 pbm/version.go delete mode 100644 version/version.go diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 808d2e09e..59db5a2b5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -19,6 +19,7 @@ on: pull_request: branches: - main + - dev paths-ignore: - "e2e-tests/**" - "packaging/**" diff --git a/.github/workflows/trivy.yml b/.github/workflows/trivy.yml index 1c2b93ef3..a43b43fc4 100644 --- a/.github/workflows/trivy.yml +++ b/.github/workflows/trivy.yml @@ -1,9 +1,9 @@ name: Scan on: push: - branches: ["main"] + branches: ["main", "dev"] pull_request: - branches: ["main"] + branches: ["main", "dev"] jobs: scan: name: Trivy diff --git a/README.md b/README.md index b256b1701..34c3a24b9 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Percona Backup for MongoDB [![Go Report Card](https://goreportcard.com/badge/github.com/percona/percona-backup-mongodb)](https://goreportcard.com/report/github.com/percona/percona-backup-mongodb) [![codecov](https://codecov.io/gh/percona/percona-backup-mongodb/branch/master/graph/badge.svg?token=TiuOmTfp2p)](https://codecov.io/gh/percona/percona-backup-mongodb) [![CLA assistant](https://cla-assistant.percona.com/readme/badge/percona/percona-backup-mongodb)](https://cla-assistant.percona.com/percona/percona-backup-mongodb) -![PBM logo](doc/source/images/backup-mongo.jpeg) +![PBM logo](backup-mongo.jpeg) Percona Backup for MongoDB (PBM) is a distributed, low-impact solution for achieving consistent backups of MongoDB sharded clusters and replica sets. Percona Backup for MongoDB supports Percona Server for MongoDB and MongoDB Community Edition v4.4 and higher. diff --git a/agent/agent.go b/agent/agent.go deleted file mode 100644 index b333147db..000000000 --- a/agent/agent.go +++ /dev/null @@ -1,615 +0,0 @@ -package agent - -import ( - "bytes" - "context" - "fmt" - "runtime" - "sync" - "sync/atomic" - "time" - - "github.com/pkg/errors" - "go.mongodb.org/mongo-driver/bson/primitive" - "golang.org/x/sync/errgroup" - - "github.com/percona/percona-backup-mongodb/pbm" - "github.com/percona/percona-backup-mongodb/pbm/log" - "github.com/percona/percona-backup-mongodb/pbm/storage" - "github.com/percona/percona-backup-mongodb/version" -) - -type Agent struct { - pbm *pbm.PBM - node *pbm.Node - bcp *currentBackup - pitrjob *currentPitr - mx sync.Mutex - log *log.Logger - - closeCMD chan struct{} - pauseHB int32 - - // prevOO is previous pitr.oplogOnly value - prevOO *bool -} - -func New(pbm *pbm.PBM) *Agent { - return &Agent{ - pbm: pbm, - closeCMD: make(chan struct{}), - } -} - -func (a *Agent) AddNode(ctx context.Context, curi string, dumpConns int) error { - var err error - a.node, err = pbm.NewNode(ctx, curi, dumpConns) - return err -} - -func (a *Agent) InitLogger(cn *pbm.PBM) { - a.pbm.InitLogger(a.node.RS(), a.node.Name()) - a.log = a.pbm.Logger() -} - -func (a *Agent) Close() { - if a.log != nil { - a.log.Close() - } -} - -func (a *Agent) CanStart() error { - info, err := a.node.GetInfo() - if err != nil { - return errors.WithMessage(err, "get node info") - } - - if info.Msg == "isdbgrid" { - return errors.New("mongos is not supported") - } - - ver, err := pbm.GetMongoVersion(context.Background(), a.pbm.Conn) - if err != nil { - return errors.WithMessage(err, "get mongo version") - } - if err := pbm.FeatureSupport(ver).PBMSupport(); err != nil { - a.log.Warning("", "", "", primitive.Timestamp{}, "WARNING: %v", err) - } - - return nil -} - -// Start starts listening the commands stream. -func (a *Agent) Start() error { - a.log.Printf("pbm-agent:\n%s", version.Current().All("")) - a.log.Printf("node: %s", a.node.ID()) - - c, cerr := a.pbm.ListenCmd(a.closeCMD) - - a.log.Printf("listening for the commands") - - for { - select { - case cmd, ok := <-c: - if !ok { - a.log.Printf("change stream was closed") - return nil - } - - a.log.Printf("got command %s", cmd) - - ep, err := a.pbm.GetEpoch() - if err != nil { - a.log.Error(string(cmd.Cmd), "", cmd.OPID.String(), ep.TS(), "get epoch: %v", err) - continue - } - - a.log.Printf("got epoch %v", ep) - - switch cmd.Cmd { - case pbm.CmdBackup: - // backup runs in the go-routine so it can be canceled - go a.Backup(cmd.Backup, cmd.OPID, ep) - case pbm.CmdCancelBackup: - a.CancelBackup() - case pbm.CmdRestore: - a.Restore(cmd.Restore, cmd.OPID, ep) - case pbm.CmdReplay: - a.OplogReplay(cmd.Replay, cmd.OPID, ep) - case pbm.CmdResync: - a.Resync(cmd.OPID, ep) - case pbm.CmdDeleteBackup: - a.Delete(cmd.Delete, cmd.OPID, ep) - case pbm.CmdDeletePITR: - a.DeletePITR(cmd.DeletePITR, cmd.OPID, ep) - case pbm.CmdCleanup: - a.Cleanup(cmd.Cleanup, cmd.OPID, ep) - } - case err, ok := <-cerr: - if !ok { - a.log.Printf("change stream was closed") - return nil - } - - if errors.Is(err, pbm.CursorClosedError{}) { - return errors.WithMessage(err, "stop listening") - } - - ep, _ := a.pbm.GetEpoch() - a.log.Error("", "", "", ep.TS(), "listening commands: %v", err) - } - } -} - -// Delete deletes backup(s) from the store and cleans up its metadata -func (a *Agent) Delete(d *pbm.DeleteBackupCmd, opid pbm.OPID, ep pbm.Epoch) { - if d == nil { - l := a.log.NewEvent(string(pbm.CmdDeleteBackup), "", opid.String(), ep.TS()) - l.Error("missed command") - return - } - - l := a.pbm.Logger().NewEvent(string(pbm.CmdDeleteBackup), "", opid.String(), ep.TS()) - - nodeInfo, err := a.node.GetInfo() - if err != nil { - l.Error("get node info data: %v", err) - return - } - - if !nodeInfo.IsLeader() { - l.Info("not a member of the leader rs, skipping") - return - } - - epts := ep.TS() - lock := a.pbm.NewLockCol(pbm.LockHeader{ - Replset: a.node.RS(), - Node: a.node.Name(), - Type: pbm.CmdDeleteBackup, - OPID: opid.String(), - Epoch: &epts, - }, pbm.LockOpCollection) - - got, err := a.acquireLock(lock, l, nil) - if err != nil { - l.Error("acquire lock: %v", err) - return - } - if !got { - l.Debug("skip: lock not acquired") - return - } - defer func() { - if err := lock.Release(); err != nil { - l.Error("release lock: %v", err) - } - }() - - switch { - case d.OlderThan > 0: - t := time.Unix(d.OlderThan, 0).UTC() - obj := t.Format("2006-01-02T15:04:05Z") - l = a.pbm.Logger().NewEvent(string(pbm.CmdDeleteBackup), obj, opid.String(), ep.TS()) - l.Info("deleting backups older than %v", t) - err := a.pbm.DeleteOlderThan(t, l) - if err != nil { - l.Error("deleting: %v", err) - return - } - case d.Backup != "": - l = a.pbm.Logger().NewEvent(string(pbm.CmdDeleteBackup), d.Backup, opid.String(), ep.TS()) - l.Info("deleting backup") - err := a.pbm.DeleteBackup(d.Backup, l) - if err != nil { - l.Error("deleting: %v", err) - return - } - default: - l.Error("malformed command received in Delete() of backup: %v", d) - return - } - - l.Info("done") -} - -// DeletePITR deletes PITR chunks from the store and cleans up its metadata -func (a *Agent) DeletePITR(d *pbm.DeletePITRCmd, opid pbm.OPID, ep pbm.Epoch) { - if d == nil { - l := a.log.NewEvent(string(pbm.CmdDeletePITR), "", opid.String(), ep.TS()) - l.Error("missed command") - return - } - - l := a.pbm.Logger().NewEvent(string(pbm.CmdDeletePITR), "", opid.String(), ep.TS()) - - nodeInfo, err := a.node.GetInfo() - if err != nil { - l.Error("get node info data: %v", err) - return - } - - if !nodeInfo.IsLeader() { - l.Info("not a member of the leader rs, skipping") - return - } - - epts := ep.TS() - lock := a.pbm.NewLockCol(pbm.LockHeader{ - Replset: a.node.RS(), - Node: a.node.Name(), - Type: pbm.CmdDeletePITR, - OPID: opid.String(), - Epoch: &epts, - }, pbm.LockOpCollection) - - got, err := a.acquireLock(lock, l, nil) - if err != nil { - l.Error("acquire lock: %v", err) - return - } - if !got { - l.Debug("skip: lock not acquired") - return - } - defer func() { - if err := lock.Release(); err != nil { - l.Error("release lock: %v", err) - } - }() - - if d.OlderThan > 0 { - t := time.Unix(d.OlderThan, 0).UTC() - obj := t.Format("2006-01-02T15:04:05Z") - l = a.pbm.Logger().NewEvent(string(pbm.CmdDeletePITR), obj, opid.String(), ep.TS()) - l.Info("deleting pitr chunks older than %v", t) - err = a.pbm.DeletePITR(&t, l) - } else { - l = a.pbm.Logger().NewEvent(string(pbm.CmdDeletePITR), "_all_", opid.String(), ep.TS()) - l.Info("deleting all pitr chunks") - err = a.pbm.DeletePITR(nil, l) - } - if err != nil { - l.Error("deleting: %v", err) - return - } - - l.Info("done") -} - -// Cleanup deletes backups and PITR chunks from the store and cleans up its metadata -func (a *Agent) Cleanup(d *pbm.CleanupCmd, opid pbm.OPID, ep pbm.Epoch) { - l := a.log.NewEvent(string(pbm.CmdCleanup), "", opid.String(), ep.TS()) - - if d == nil { - l.Error("missed command") - return - } - - nodeInfo, err := a.node.GetInfo() - if err != nil { - l.Error("get node info data: %v", err) - return - } - if !nodeInfo.IsLeader() { - l.Info("not a member of the leader rs, skipping") - return - } - - epts := ep.TS() - lock := a.pbm.NewLockCol(pbm.LockHeader{ - Replset: a.node.RS(), - Node: a.node.Name(), - Type: pbm.CmdCleanup, - OPID: opid.String(), - Epoch: &epts, - }, pbm.LockOpCollection) - - got, err := a.acquireLock(lock, l, nil) - if err != nil { - l.Error("acquire lock: %v", err) - return - } - if !got { - l.Debug("skip: lock not acquired") - return - } - defer func() { - if err := lock.Release(); err != nil { - l.Error("release lock: %v", err) - } - }() - - stg, err := a.pbm.GetStorage(l) - if err != nil { - l.Error("get storage: " + err.Error()) - } - - eg := errgroup.Group{} - eg.SetLimit(runtime.NumCPU()) - - cr, err := pbm.MakeCleanupInfo(a.pbm.Context(), a.pbm.Conn, d.OlderThan) - if err != nil { - l.Error("make cleanup report: " + err.Error()) - return - } - - for i := range cr.Chunks { - name := cr.Chunks[i].FName - - eg.Go(func() error { - err := stg.Delete(name) - return errors.WithMessagef(err, "delete chunk file %q", name) - }) - } - if err := eg.Wait(); err != nil { - l.Error(err.Error()) - } - - for i := range cr.Backups { - bcp := &cr.Backups[i] - - eg.Go(func() error { - err := a.pbm.DeleteBackupFiles(bcp, stg) - return errors.WithMessagef(err, "delete backup files %q", bcp.Name) - }) - } - if err := eg.Wait(); err != nil { - l.Error(err.Error()) - } - - err = a.pbm.ResyncStorage(l) - if err != nil { - l.Error("storage resync: " + err.Error()) - } -} - -// Resync uploads a backup list from the remote store -func (a *Agent) Resync(opid pbm.OPID, ep pbm.Epoch) { - l := a.pbm.Logger().NewEvent(string(pbm.CmdResync), "", opid.String(), ep.TS()) - - a.HbResume() - a.pbm.Logger().ResumeMgo() - - nodeInfo, err := a.node.GetInfo() - if err != nil { - l.Error("get node info data: %v", err) - return - } - - if !nodeInfo.IsLeader() { - l.Info("not a member of the leader rs") - return - } - - epts := ep.TS() - lock := a.pbm.NewLock(pbm.LockHeader{ - Type: pbm.CmdResync, - Replset: nodeInfo.SetName, - Node: nodeInfo.Me, - OPID: opid.String(), - Epoch: &epts, - }) - - got, err := a.acquireLock(lock, l, nil) - if err != nil { - l.Error("acquiring lock: %v", err) - return - } - if !got { - l.Debug("lock not acquired") - return - } - - defer func() { - if err := lock.Release(); err != nil { - l.Error("reslase lock %v: %v", lock, err) - } - }() - - l.Info("started") - err = a.pbm.ResyncStorage(l) - if err != nil { - l.Error("%v", err) - return - } - l.Info("succeed") - - epch, err := a.pbm.ResetEpoch() - if err != nil { - l.Error("reset epoch: %v", err) - return - } - - l.Debug("epoch set to %v", epch) -} - -type lockAquireFn func() (bool, error) - -// acquireLock tries to acquire the lock. If there is a stale lock -// it tries to mark op that held the lock (backup, [pitr]restore) as failed. -func (a *Agent) acquireLock(l *pbm.Lock, lg *log.Event, acquireFn lockAquireFn) (bool, error) { - if acquireFn == nil { - acquireFn = l.Acquire - } - - got, err := acquireFn() - if err == nil { - return got, nil - } - - if errors.Is(err, pbm.DuplicatedOpError{}) || errors.Is(err, pbm.ConcurrentOpError{}) { - lg.Debug("get lock: %v", err) - return false, nil - } - - var er pbm.StaleLockError - if !errors.As(err, &er) { - return false, err - } - - lock := er.Lock - lg.Debug("stale lock: %v", lock) - var fn func(opid string) error - switch lock.Type { - case pbm.CmdBackup: - fn = a.pbm.MarkBcpStale - case pbm.CmdRestore: - fn = a.pbm.MarkRestoreStale - default: - return acquireFn() - } - - if err := fn(lock.OPID); err != nil { - lg.Warning("failed to mark stale op '%s' as failed: %v", lock.OPID, err) - } - - return acquireFn() -} - -func (a *Agent) HbPause() { - atomic.StoreInt32(&a.pauseHB, 1) -} - -func (a *Agent) HbResume() { - atomic.StoreInt32(&a.pauseHB, 0) -} - -func (a *Agent) HbIsRun() bool { - return atomic.LoadInt32(&a.pauseHB) == 0 -} - -func (a *Agent) HbStatus() { - l := a.log.NewEvent("agentCheckup", "", "", primitive.Timestamp{}) - - nodeVersion, err := a.node.GetMongoVersion() - if err != nil { - l.Error("get mongo version: %v", err) - } - - hb := pbm.AgentStat{ - Node: a.node.Name(), - RS: a.node.RS(), - AgentVer: version.Current().Version, - MongoVer: nodeVersion.VersionString, - PerconaVer: nodeVersion.PSMDBVersion, - } - defer func() { - if err := a.pbm.RemoveAgentStatus(hb); err != nil { - logger := a.log.NewEvent("agentCheckup", "", "", primitive.Timestamp{}) - logger.Error("remove agent heartbeat: %v", err) - } - }() - - tk := time.NewTicker(pbm.AgentsStatCheckRange) - defer tk.Stop() - - // check storage once in a while if all is ok (see https://jira.percona.com/browse/PBM-647) - const checkStoreIn = int(60 / (pbm.AgentsStatCheckRange / time.Second)) - cc := 0 - for range tk.C { - // don't check if on pause (e.g. physical restore) - if !a.HbIsRun() { - continue - } - - hb.PBMStatus = a.pbmStatus() - logHbStatus("PBM connection", hb.PBMStatus, l) - - hb.NodeStatus = a.nodeStatus() - logHbStatus("node connection", hb.NodeStatus, l) - - cc++ - hb.StorageStatus = a.storStatus(l, cc == checkStoreIn) - logHbStatus("storage connection", hb.StorageStatus, l) - if cc == checkStoreIn { - cc = 0 - } - - hb.Err = "" - - hb.State = pbm.NodeStateUnknown - hb.StateStr = "unknown" - n, err := a.node.Status() - if err != nil { - l.Error("get replSetGetStatus: %v", err) - hb.Err += fmt.Sprintf("get replSetGetStatus: %v", err) - } else { - hb.State = n.State - hb.StateStr = n.StateStr - } - - hb.Hidden = false - hb.Passive = false - - inf, err := a.node.GetInfo() - if err != nil { - l.Error("get NodeInfo: %v", err) - hb.Err += fmt.Sprintf("get NodeInfo: %v", err) - } else { - hb.Hidden = inf.Hidden - hb.Passive = inf.Passive - } - hb.Arbiter = inf.ArbiterOnly - - err = a.pbm.SetAgentStatus(hb) - if err != nil { - l.Error("set status: %v", err) - } - } -} - -func (a *Agent) pbmStatus() pbm.SubsysStatus { - err := a.pbm.Conn.Ping(a.pbm.Context(), nil) - if err != nil { - return pbm.SubsysStatus{Err: err.Error()} - } - - return pbm.SubsysStatus{OK: true} -} - -func (a *Agent) nodeStatus() pbm.SubsysStatus { - err := a.node.Session().Ping(a.pbm.Context(), nil) - if err != nil { - return pbm.SubsysStatus{Err: err.Error()} - } - - return pbm.SubsysStatus{OK: true} -} - -func (a *Agent) storStatus(log *log.Event, forceCheckStorage bool) pbm.SubsysStatus { - // check storage once in a while if all is ok (see https://jira.percona.com/browse/PBM-647) - // but if storage was(is) failed, check it always - stat, err := a.pbm.GetAgentStatus(a.node.RS(), a.node.Name()) - if err != nil { - log.Warning("get current storage status: %v", err) - } - if !forceCheckStorage && stat.StorageStatus.OK { - return pbm.SubsysStatus{OK: true} - } - - stg, err := a.pbm.GetStorage(log) - if err != nil { - return pbm.SubsysStatus{Err: fmt.Sprintf("unable to get storage: %v", err)} - } - - _, err = stg.FileStat(pbm.StorInitFile) - if errors.Is(err, storage.ErrNotExist) { - err := stg.Save(pbm.StorInitFile, bytes.NewBufferString(version.Current().Version), 0) - if err != nil { - return pbm.SubsysStatus{ - Err: fmt.Sprintf("storage: no init file, attempt to create failed: %v", err), - } - } - } else if err != nil { - return pbm.SubsysStatus{Err: fmt.Sprintf("storage check failed with: %v", err)} - } - - return pbm.SubsysStatus{OK: true} -} - -func logHbStatus(name string, st pbm.SubsysStatus, l *log.Event) { - if !st.OK { - l.Error("check %s: %s", name, st.Err) - } -} diff --git a/doc/source/images/backup-mongo.jpeg b/backup-mongo.jpeg similarity index 100% rename from doc/source/images/backup-mongo.jpeg rename to backup-mongo.jpeg diff --git a/cli/cli.go b/cli/cli.go deleted file mode 100644 index f6ace729e..000000000 --- a/cli/cli.go +++ /dev/null @@ -1,780 +0,0 @@ -package cli - -import ( - "bytes" - "context" - "encoding/json" - "fmt" - stdlog "log" - "os" - "strings" - "time" - - "github.com/alecthomas/kingpin" - "github.com/pkg/errors" - "go.mongodb.org/mongo-driver/mongo" - - "github.com/percona/percona-backup-mongodb/pbm" - "github.com/percona/percona-backup-mongodb/pbm/compress" - "github.com/percona/percona-backup-mongodb/pbm/log" - "github.com/percona/percona-backup-mongodb/version" -) - -const ( - datetimeFormat = "2006-01-02T15:04:05" - dateFormat = "2006-01-02" -) - -const ( - RSMappingEnvVar = "PBM_REPLSET_REMAPPING" - RSMappingFlag = "replset-remapping" - RSMappingDoc = "re-map replset names for backups/oplog (e.g. to_name_1=from_name_1,to_name_2=from_name_2)" -) - -type outFormat string - -const ( - outJSON outFormat = "json" - outJSONpretty outFormat = "json-pretty" - outText outFormat = "text" -) - -type logsOpts struct { - tail int64 - node string - severity string - event string - opid string - location string - extr bool - follow bool -} - -type cliResult interface { - HasError() bool -} - -func Main() { - var ( - pbmCmd = kingpin.New("pbm", "Percona Backup for MongoDB") - mURL = pbmCmd.Flag("mongodb-uri", - "MongoDB connection string (Default = PBM_MONGODB_URI environment variable)"). - Envar("PBM_MONGODB_URI"). - String() - pbmOutFormat = pbmCmd.Flag("out", "Output format /"). - Short('o'). - Default(string(outText)). - Enum(string(outJSON), string(outJSONpretty), string(outText)) - ) - pbmCmd.HelpFlag.Short('h') - - versionCmd := pbmCmd.Command("version", "PBM version info") - versionShort := versionCmd.Flag("short", "Show only version info"). - Short('s'). - Default("false"). - Bool() - versionCommit := versionCmd.Flag("commit", "Show only git commit info"). - Short('c'). - Default("false"). - Bool() - - configCmd := pbmCmd.Command("config", "Set, change or list the config") - cfg := configOpts{set: make(map[string]string)} - configCmd.Flag("force-resync", "Resync backup list with the current store"). - BoolVar(&cfg.rsync) - configCmd.Flag("list", "List current settings"). - BoolVar(&cfg.list) - configCmd.Flag("file", "Upload config from YAML file"). - StringVar(&cfg.file) - configCmd.Flag("set", "Set the option value "). - StringMapVar(&cfg.set) - configCmd.Arg("key", "Show the value of a specified key"). - StringVar(&cfg.key) - - backupCmd := pbmCmd.Command("backup", "Make backup") - backup := backupOpts{} - backupCmd.Flag("compression", "Compression type //////"). - EnumVar(&backup.compression, - string(compress.CompressionTypeNone), - string(compress.CompressionTypeGZIP), - string(compress.CompressionTypeSNAPPY), - string(compress.CompressionTypeLZ4), - string(compress.CompressionTypeS2), - string(compress.CompressionTypePGZIP), - string(compress.CompressionTypeZstandard)) - backupCmd.Flag("type", - fmt.Sprintf("backup type: <%s>/<%s>/<%s>/<%s>", - pbm.PhysicalBackup, - pbm.LogicalBackup, - pbm.IncrementalBackup, - pbm.ExternalBackup)). - Default(string(pbm.LogicalBackup)). - Short('t'). - EnumVar(&backup.typ, - string(pbm.PhysicalBackup), - string(pbm.LogicalBackup), - string(pbm.IncrementalBackup), - string(pbm.ExternalBackup)) - backupCmd.Flag("base", "Is this a base for incremental backups"). - BoolVar(&backup.base) - backupCmd.Flag("compression-level", "Compression level (specific to the compression type)"). - IntsVar(&backup.compressionLevel) - backupCmd.Flag("ns", `Namespaces to backup (e.g. "db.*", "db.collection"). If not set, backup all ("*.*")`). - StringVar(&backup.ns) - backupCmd.Flag("wait", "Wait for the backup to finish"). - Short('w'). - BoolVar(&backup.wait) - backupCmd.Flag("list-files", "Wait for the backup to finish"). - Short('l'). - BoolVar(&backup.externList) - - cancelBcpCmd := pbmCmd.Command("cancel-backup", "Cancel backup") - - descBcpCmd := pbmCmd.Command("describe-backup", "Describe backup") - descBcp := descBcp{} - descBcpCmd.Flag("with-collections", "Show collections in backup"). - BoolVar(&descBcp.coll) - descBcpCmd.Arg("backup_name", "Backup name"). - StringVar(&descBcp.name) - - finishBackupName := "" - backupFinishCmd := pbmCmd.Command("backup-finish", "Finish external backup") - backupFinishCmd.Arg("backup_name", "Backup name"). - StringVar(&finishBackupName) - - finishRestore := descrRestoreOpts{} - restoreFinishCmd := pbmCmd.Command("restore-finish", "Finish external backup") - restoreFinishCmd.Arg("restore_name", "Restore name"). - StringVar(&finishRestore.restore) - restoreFinishCmd.Flag("config", "Path to PBM config"). - Short('c'). - Required(). - StringVar(&finishRestore.cfg) - - restoreCmd := pbmCmd.Command("restore", "Restore backup") - restore := restoreOpts{} - restoreCmd.Arg("backup_name", "Backup name to restore"). - StringVar(&restore.bcp) - restoreCmd.Flag("time", fmt.Sprintf("Restore to the point-in-time. Set in format %s", datetimeFormat)). - StringVar(&restore.pitr) - restoreCmd.Flag("base-snapshot", - "Override setting: Name of older snapshot that PITR will be based on during restore."). - StringVar(&restore.pitrBase) - restoreCmd.Flag("ns", `Namespaces to restore (e.g. "db1.*,db2.collection2"). If not set, restore all ("*.*")`). - StringVar(&restore.ns) - restoreCmd.Flag("wait", "Wait for the restore to finish."). - Short('w'). - BoolVar(&restore.wait) - restoreCmd.Flag("external", "External restore."). - Short('x'). - BoolVar(&restore.extern) - restoreCmd.Flag("config", "Mongod config for the source data. External backups only!"). - Short('c'). - StringVar(&restore.conf) - restoreCmd.Flag("ts", - "MongoDB cluster time to restore to. In format (e.g. 1682093090,9). External backups only!"). - StringVar(&restore.ts) - restoreCmd.Flag(RSMappingFlag, RSMappingDoc). - Envar(RSMappingEnvVar). - StringVar(&restore.rsMap) - - replayCmd := pbmCmd.Command("oplog-replay", "Replay oplog") - replayOpts := replayOptions{} - replayCmd.Flag("start", fmt.Sprintf("Replay oplog from the time. Set in format %s", datetimeFormat)). - Required(). - StringVar(&replayOpts.start) - replayCmd.Flag("end", "Replay oplog to the time. Set in format %s"). - Required(). - StringVar(&replayOpts.end) - replayCmd.Flag("wait", "Wait for the restore to finish."). - Short('w'). - BoolVar(&replayOpts.wait) - replayCmd.Flag(RSMappingFlag, RSMappingDoc). - Envar(RSMappingEnvVar). - StringVar(&replayOpts.rsMap) - // todo(add oplog cancel) - - listCmd := pbmCmd.Command("list", "Backup list") - list := listOpts{} - listCmd.Flag("restore", "Show last N restores"). - Default("false"). - BoolVar(&list.restore) - listCmd.Flag("unbacked", "Show unbacked oplog ranges"). - Default("false"). - BoolVar(&list.unbacked) - listCmd.Flag("full", "Show extended restore info"). - Default("false"). - Short('f'). - Hidden(). - BoolVar(&list.full) - listCmd.Flag("size", "Show last N backups"). - Default("0"). - IntVar(&list.size) - listCmd.Flag(RSMappingFlag, RSMappingDoc). - Envar(RSMappingEnvVar). - StringVar(&list.rsMap) - - deleteBcpCmd := pbmCmd.Command("delete-backup", "Delete a backup") - deleteBcp := deleteBcpOpts{} - deleteBcpCmd.Arg("name", "Backup name"). - StringVar(&deleteBcp.name) - deleteBcpCmd.Flag("older-than", - fmt.Sprintf("Delete backups older than date/time in format %s or %s", - datetimeFormat, - dateFormat)). - StringVar(&deleteBcp.olderThan) - deleteBcpCmd.Flag("yes", "Don't ask confirmation"). - Short('y'). - BoolVar(&deleteBcp.force) - deleteBcpCmd.Flag("force", "Force. Don't ask confirmation"). - Short('f'). - BoolVar(&deleteBcp.force) - - deletePitrCmd := pbmCmd.Command("delete-pitr", "Delete PITR chunks") - deletePitr := deletePitrOpts{} - deletePitrCmd.Flag("older-than", - fmt.Sprintf("Delete backups older than date/time in format %s or %s", - datetimeFormat, - dateFormat)). - StringVar(&deletePitr.olderThan) - deletePitrCmd.Flag("all", "Delete all chunks"). - Short('a'). - BoolVar(&deletePitr.all) - deletePitrCmd.Flag("yes", "Don't ask confirmation"). - Short('y'). - BoolVar(&deletePitr.force) - deletePitrCmd.Flag("force", "Force. Don't ask confirmation"). - Short('f'). - BoolVar(&deletePitr.force) - - cleanupCmd := pbmCmd.Command("cleanup", "Delete Backups and PITR chunks") - cleanupOpts := cleanupOptions{} - cleanupCmd.Flag("older-than", - fmt.Sprintf("Delete older than date/time in format %s or %s", - datetimeFormat, - dateFormat)). - StringVar(&cleanupOpts.olderThan) - cleanupCmd.Flag("yes", "Don't ask confirmation"). - Short('y'). - BoolVar(&cleanupOpts.yes) - cleanupCmd.Flag("wait", "Wait for deletion done"). - Short('w'). - BoolVar(&cleanupOpts.wait) - cleanupCmd.Flag("dry-run", "Report but do not delete"). - BoolVar(&cleanupOpts.dryRun) - - logsCmd := pbmCmd.Command("logs", "PBM logs") - logs := logsOpts{} - logsCmd.Flag("follow", "Follow output"). - Short('f'). - Default("false"). - BoolVar(&logs.follow) - logsCmd.Flag("tail", "Show last N entries, 20 entries are shown by default, 0 for all logs"). - Short('t'). - Default("20"). - Int64Var(&logs.tail) - logsCmd.Flag("node", "Target node in format replset[/host:posrt]"). - Short('n'). - StringVar(&logs.node) - logsCmd.Flag("severity", "Severity level D, I, W, E or F, low to high. Choosing one includes higher levels too."). - Short('s'). - Default("I"). - EnumVar(&logs.severity, "D", "I", "W", "E", "F") - logsCmd.Flag("event", - "Event in format backup[/2020-10-06T11:45:14Z]. Events: backup, restore, cancelBackup, resync, pitr, delete"). - Short('e'). - StringVar(&logs.event) - logsCmd.Flag("opid", "Operation ID"). - Short('i'). - StringVar(&logs.opid) - logsCmd.Flag("timezone", - "Timezone of log output. `Local`, `UTC` or a location name corresponding to "+ - "a file in the IANA Time Zone database, such as `America/New_York`"). - StringVar(&logs.location) - logsCmd.Flag("extra", "Show extra data in text format"). - Hidden(). - Short('x'). - BoolVar(&logs.extr) - - statusOpts := statusOptions{} - statusCmd := pbmCmd.Command("status", "Show PBM status") - statusCmd.Flag(RSMappingFlag, RSMappingDoc). - Envar(RSMappingEnvVar). - StringVar(&statusOpts.rsMap) - statusCmd.Flag("sections", "Sections of status to display ///."). - Short('s'). - EnumsVar(&statusOpts.sections, "cluster", "pitr", "running", "backups") - - describeRestoreCmd := pbmCmd.Command("describe-restore", "Describe restore") - describeRestoreOpts := descrRestoreOpts{} - describeRestoreCmd.Arg("name", "Restore name"). - StringVar(&describeRestoreOpts.restore) - describeRestoreCmd.Flag("config", "Path to PBM config"). - Short('c'). - StringVar(&describeRestoreOpts.cfg) - - cmd, err := pbmCmd.DefaultEnvars().Parse(os.Args[1:]) - if err != nil { - fmt.Fprintln(os.Stderr, "Error: parse command line parameters:", err) - os.Exit(1) - } - pbmOutF := outFormat(*pbmOutFormat) - var out fmt.Stringer - - if cmd == versionCmd.FullCommand() { - switch { - case *versionCommit: - out = outCaption{"GitCommit", version.Current().GitCommit} - case *versionShort: - out = outCaption{"Version", version.Current().Version} - default: - out = version.Current() - } - printo(out, pbmOutF) - return - } - - if *mURL == "" { - fmt.Fprintln(os.Stderr, "Error: no mongodb connection URI supplied") - fmt.Fprintln(os.Stderr, - " Usual practice is the set it by the PBM_MONGODB_URI environment variable. "+ - "It can also be set with commandline argument --mongodb-uri.") - pbmCmd.Usage(os.Args[1:]) - os.Exit(1) - } - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - var pbmClient *pbm.PBM - // we don't need pbm connection if it is `pbm describe-restore -c ...` - // or `pbm restore-finish ` - if describeRestoreOpts.cfg == "" && finishRestore.cfg == "" { - pbmClient, err = pbm.New(ctx, *mURL, "pbm-ctl") - if err != nil { - exitErr(errors.Wrap(err, "connect to mongodb"), pbmOutF) - } - pbmClient.InitLogger("", "") - - ver, err := pbm.GetMongoVersion(ctx, pbmClient.Conn) - if err != nil { - stdlog.Fatalf("get mongo version: %v", err) - } - if err := pbm.FeatureSupport(ver).PBMSupport(); err != nil { - fmt.Fprintf(os.Stderr, "WARNING: %v\n", err) - } - } - - switch cmd { - case configCmd.FullCommand(): - out, err = runConfig(pbmClient, &cfg) - case backupCmd.FullCommand(): - backup.name = time.Now().UTC().Format(time.RFC3339) - out, err = runBackup(pbmClient, &backup, pbmOutF) - case cancelBcpCmd.FullCommand(): - out, err = cancelBcp(pbmClient) - case backupFinishCmd.FullCommand(): - out, err = runFinishBcp(pbmClient, finishBackupName) - case restoreFinishCmd.FullCommand(): - out, err = runFinishRestore(finishRestore) - case descBcpCmd.FullCommand(): - out, err = describeBackup(pbmClient, &descBcp) - case restoreCmd.FullCommand(): - out, err = runRestore(pbmClient, &restore, pbmOutF) - case replayCmd.FullCommand(): - out, err = replayOplog(pbmClient, replayOpts, pbmOutF) - case listCmd.FullCommand(): - out, err = runList(pbmClient, &list) - case deleteBcpCmd.FullCommand(): - out, err = deleteBackup(pbmClient, &deleteBcp, pbmOutF) - case deletePitrCmd.FullCommand(): - out, err = deletePITR(pbmClient, &deletePitr, pbmOutF) - case cleanupCmd.FullCommand(): - out, err = retentionCleanup(pbmClient, &cleanupOpts) - case logsCmd.FullCommand(): - out, err = runLogs(pbmClient, &logs) - case statusCmd.FullCommand(): - out, err = status(pbmClient, *mURL, statusOpts, pbmOutF == outJSONpretty) - case describeRestoreCmd.FullCommand(): - out, err = describeRestore(pbmClient, describeRestoreOpts) - } - - if err != nil { - exitErr(err, pbmOutF) - } - - printo(out, pbmOutF) - - if r, ok := out.(cliResult); ok && r.HasError() { - os.Exit(1) - } -} - -func printo(out fmt.Stringer, f outFormat) { - if out == nil { - return - } - - switch f { - case outJSON: - err := json.NewEncoder(os.Stdout).Encode(out) - if err != nil { - exitErr(errors.Wrap(err, "encode output"), f) - } - case outJSONpretty: - enc := json.NewEncoder(os.Stdout) - enc.SetIndent("", " ") - err := enc.Encode(out) - if err != nil { - exitErr(errors.Wrap(err, "encode output"), f) - } - default: - fmt.Println(strings.TrimSpace(out.String())) - } -} - -func exitErr(e error, f outFormat) { - switch f { - case outJSON, outJSONpretty: - var m interface{} - m = e - if _, ok := e.(json.Marshaler); !ok { //nolint:errorlint - m = map[string]string{"Error": e.Error()} - } - - j := json.NewEncoder(os.Stdout) - if f == outJSONpretty { - j.SetIndent("", " ") - } - - if err := j.Encode(m); err != nil { - fmt.Fprintf(os.Stderr, "Error: encoding error \"%v\": %v", m, err) - } - default: - fmt.Fprintln(os.Stderr, "Error:", e) - } - - os.Exit(1) -} - -func runLogs(cn *pbm.PBM, l *logsOpts) (fmt.Stringer, error) { - r := &log.LogRequest{} - - if l.node != "" { - n := strings.Split(l.node, "/") - r.RS = n[0] - if len(n) > 1 { - r.Node = n[1] - } - } - - if l.event != "" { - e := strings.Split(l.event, "/") - r.Event = e[0] - if len(e) > 1 { - r.ObjName = e[1] - } - } - - if l.opid != "" { - r.OPID = l.opid - } - - switch l.severity { - case "F": - r.Severity = log.Fatal - case "E": - r.Severity = log.Error - case "W": - r.Severity = log.Warning - case "I": - r.Severity = log.Info - case "D": - r.Severity = log.Debug - default: - r.Severity = log.Info - } - - if l.follow { - err := followLogs(cn, r, r.Node == "", l.extr) - return nil, err - } - - o, err := cn.LogGet(r, l.tail) - if err != nil { - return nil, errors.Wrap(err, "get logs") - } - - o.ShowNode = r.Node == "" - o.Extr = l.extr - - // reverse list - for i := len(o.Data)/2 - 1; i >= 0; i-- { - opp := len(o.Data) - 1 - i - o.Data[i], o.Data[opp] = o.Data[opp], o.Data[i] - } - - err = o.SetLocation(l.location) - if err != nil { - fmt.Fprintf(os.Stderr, "Error: failed to parse timezone: %v\n\n", err) - } - - return o, nil -} - -func followLogs(cn *pbm.PBM, r *log.LogRequest, showNode, expr bool) error { - outC, errC := log.Follow(cn.Context(), cn.Conn.Database(pbm.DB).Collection(pbm.LogCollection), r, false) - - for { - select { - case entry, ok := <-outC: - if !ok { - return nil - } - - fmt.Println(entry.Stringify(tsUTC, showNode, expr)) - case err, ok := <-errC: - if !ok { - return nil - } - - return err - } - } -} - -func tsUTC(ts int64) string { - return time.Unix(ts, 0).UTC().Format(time.RFC3339) -} - -type snapshotStat struct { - Name string `json:"name"` - Namespaces []string `json:"nss,omitempty"` - Size int64 `json:"size,omitempty"` - Status pbm.Status `json:"status"` - Err error `json:"-"` - ErrString string `json:"error,omitempty"` - RestoreTS int64 `json:"restoreTo"` - PBMVersion string `json:"pbmVersion"` - Type pbm.BackupType `json:"type"` - SrcBackup string `json:"src"` -} - -type pitrRange struct { - Err error `json:"error,omitempty"` - Range pbm.Timeline `json:"range"` - NoBaseSnapshot bool `json:"noBaseSnapshot,omitempty"` -} - -func (pr pitrRange) String() string { - return fmt.Sprintf("{ %s }", pr.Range) -} - -func fmtTS(ts int64) string { - return time.Unix(ts, 0).UTC().Format(time.RFC3339) -} - -type outMsg struct { - Msg string `json:"msg"` -} - -func (m outMsg) String() string { - return m.Msg -} - -type outCaption struct { - k string - v interface{} -} - -func (c outCaption) String() string { - return fmt.Sprint(c.v) -} - -func (c outCaption) MarshalJSON() ([]byte, error) { - var b bytes.Buffer - b.WriteString("{") - b.WriteString(fmt.Sprintf("\"%s\":", c.k)) - err := json.NewEncoder(&b).Encode(c.v) - if err != nil { - return nil, err - } - b.WriteString("}") - return b.Bytes(), nil -} - -func cancelBcp(cn *pbm.PBM) (fmt.Stringer, error) { - err := cn.SendCmd(pbm.Cmd{ - Cmd: pbm.CmdCancelBackup, - }) - if err != nil { - return nil, errors.Wrap(err, "send backup canceling") - } - return outMsg{"Backup cancellation has started"}, nil -} - -var errInvalidFormat = errors.New("invalid format") - -func parseDateT(v string) (time.Time, error) { - switch len(v) { - case len(datetimeFormat): - return time.Parse(datetimeFormat, v) - case len(dateFormat): - return time.Parse(dateFormat, v) - } - - return time.Time{}, errInvalidFormat -} - -func findLock(cn *pbm.PBM, fn func(*pbm.LockHeader) ([]pbm.LockData, error)) (*pbm.LockData, error) { - locks, err := fn(&pbm.LockHeader{}) - if err != nil { - return nil, errors.Wrap(err, "get locks") - } - - ct, err := cn.ClusterTime() - if err != nil { - return nil, errors.Wrap(err, "get cluster time") - } - - var lk *pbm.LockData - for _, l := range locks { - // We don't care about the PITR slicing here. It is a subject of other status sections - if l.Type == pbm.CmdPITR || l.Heartbeat.T+pbm.StaleFrameSec < ct.T { - continue - } - - // Just check if all locks are for the same op - // - // It could happen that the healthy `lk` became stale by the time of this check - // or the op was finished and the new one was started. So the `l.Type != lk.Type` - // would be true but for the legit reason (no error). - // But chances for that are quite low and on the next run of `pbm status` everything - // would be ok. So no reason to complicate code to avoid that. - if lk != nil && l.OPID != lk.OPID { - if err != nil { - return nil, errors.Errorf("conflicting ops running: [%s/%s::%s-%s] [%s/%s::%s-%s]. "+ - "This conflict may naturally resolve after 10 seconds", - l.Replset, l.Node, l.Type, l.OPID, - lk.Replset, lk.Node, lk.Type, lk.OPID, - ) - } - } - - l := l - lk = &l - } - - return lk, nil -} - -var errTout = errors.Errorf("timeout reached") - -// waitOp waits up to waitFor duration until operations which acquires a given lock are finished -func waitOp(pbmClient *pbm.PBM, lock *pbm.LockHeader, waitFor time.Duration) error { - // just to be sure the check hasn't started before the lock were created - time.Sleep(1 * time.Second) - fmt.Print(".") - - tmr := time.NewTimer(waitFor) - defer tmr.Stop() - tkr := time.NewTicker(1 * time.Second) - defer tkr.Stop() - for { - select { - case <-tmr.C: - return errTout - case <-tkr.C: - fmt.Print(".") - lock, err := pbmClient.GetLockData(lock) - if err != nil { - // No lock, so operation has finished - if errors.Is(err, mongo.ErrNoDocuments) { - return nil - } - return errors.Wrap(err, "get lock data") - } - clusterTime, err := pbmClient.ClusterTime() - if err != nil { - return errors.Wrap(err, "read cluster time") - } - if lock.Heartbeat.T+pbm.StaleFrameSec < clusterTime.T { - return errors.Errorf("operation stale, last beat ts: %d", lock.Heartbeat.T) - } - } - } -} - -func lastLogErr(cn *pbm.PBM, op pbm.Command, after int64) (string, error) { - l, err := cn.LogGet( - &log.LogRequest{ - LogKeys: log.LogKeys{ - Severity: log.Error, - Event: string(op), - }, - }, 1) - if err != nil { - return "", errors.Wrap(err, "get log records") - } - if len(l.Data) == 0 { - return "", nil - } - - if l.Data[0].TS < after { - return "", nil - } - - return l.Data[0].Msg, nil -} - -type concurentOpError struct { - op *pbm.LockHeader -} - -func (e concurentOpError) Error() string { - return fmt.Sprintf("another operation in progress, %s/%s [%s/%s]", e.op.Type, e.op.OPID, e.op.Replset, e.op.Node) -} - -func (e concurentOpError) As(err any) bool { - if err == nil { - return false - } - - er, ok := err.(concurentOpError) - if !ok { - return false - } - - er.op = e.op - return true -} - -func (e concurentOpError) MarshalJSON() ([]byte, error) { - s := make(map[string]interface{}) - s["error"] = "another operation in progress" - s["operation"] = e.op - return json.Marshal(s) -} - -func checkConcurrentOp(cn *pbm.PBM) error { - locks, err := cn.GetLocks(&pbm.LockHeader{}) - if err != nil { - return errors.Wrap(err, "get locks") - } - - ts, err := cn.ClusterTime() - if err != nil { - return errors.Wrap(err, "read cluster time") - } - - // Stop if there is some live operation. - // But in case of stale lock just move on - // and leave it for agents to deal with. - for _, l := range locks { - if l.Heartbeat.T+pbm.StaleFrameSec >= ts.T { - return concurentOpError{&l.LockHeader} - } - } - - return nil -} diff --git a/cmd/pbm-agent/agent.go b/cmd/pbm-agent/agent.go new file mode 100644 index 000000000..6c6d44fb1 --- /dev/null +++ b/cmd/pbm-agent/agent.go @@ -0,0 +1,715 @@ +package main + +import ( + "bytes" + "fmt" + "runtime" + "sync" + "sync/atomic" + "time" + + "go.mongodb.org/mongo-driver/bson" + "go.mongodb.org/mongo-driver/bson/primitive" + "golang.org/x/sync/errgroup" + + "github.com/percona/percona-backup-mongodb/internal/config" + "github.com/percona/percona-backup-mongodb/internal/connect" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/lock" + "github.com/percona/percona-backup-mongodb/internal/log" + "github.com/percona/percona-backup-mongodb/internal/resync" + "github.com/percona/percona-backup-mongodb/internal/storage" + "github.com/percona/percona-backup-mongodb/internal/topo" + "github.com/percona/percona-backup-mongodb/internal/types" + "github.com/percona/percona-backup-mongodb/internal/util" + "github.com/percona/percona-backup-mongodb/internal/version" + "github.com/percona/percona-backup-mongodb/pbm" +) + +type Agent struct { + pbm *pbm.PBM + node *pbm.Node + bcp *currentBackup + pitrjob *currentPitr + mx sync.Mutex + log *log.Logger + + closeCMD chan struct{} + pauseHB int32 + + // prevOO is previous pitr.oplogOnly value + prevOO *bool +} + +func newAgent(pbm *pbm.PBM) *Agent { + return &Agent{ + pbm: pbm, + closeCMD: make(chan struct{}), + } +} + +func (a *Agent) AddNode(ctx context.Context, curi string, dumpConns int) error { + var err error + a.node, err = pbm.NewNode(ctx, curi, dumpConns) + return err +} + +func (a *Agent) InitLogger() { + a.pbm.InitLogger(a.node.RS(), a.node.Name()) + a.log = a.pbm.Logger() +} + +func (a *Agent) Close() { + if a.log != nil { + a.log.Close() + } +} + +func (a *Agent) CanStart(ctx context.Context) error { + info, err := topo.GetNodeInfoExt(ctx, a.node.Session()) + if err != nil { + return errors.Wrap(err, "get node info") + } + + if info.Msg == "isdbgrid" { + return errors.New("mongos is not supported") + } + + ver, err := version.GetMongoVersion(ctx, a.pbm.Conn.MongoClient()) + if err != nil { + return errors.Wrap(err, "get mongo version") + } + if err := version.FeatureSupport(ver).PBMSupport(); err != nil { + a.log.Warning("", "", "", primitive.Timestamp{}, "WARNING: %v", err) + } + + return nil +} + +// Start starts listening the commands stream. +func (a *Agent) Start(ctx context.Context) error { + a.log.Printf("pbm-agent:\n%s", version.Current().All("")) + a.log.Printf("node: %s", a.node.ID()) + + c, cerr := ListenCmd(ctx, a.pbm.Conn, a.closeCMD) + + a.log.Printf("listening for the commands") + + for { + select { + case cmd, ok := <-c: + if !ok { + a.log.Printf("change stream was closed") + return nil + } + + a.log.Printf("got command %s", cmd) + + ep, err := config.GetEpoch(ctx, a.pbm.Conn) + if err != nil { + a.log.Error(string(cmd.Cmd), "", cmd.OPID.String(), ep.TS(), "get epoch: %v", err) + continue + } + + a.log.Printf("got epoch %v", ep) + + switch cmd.Cmd { + case defs.CmdBackup: + // backup runs in the go-routine so it can be canceled + go a.Backup(ctx, cmd.Backup, cmd.OPID, ep) + case defs.CmdCancelBackup: + a.CancelBackup() + case defs.CmdRestore: + a.Restore(ctx, cmd.Restore, cmd.OPID, ep) + case defs.CmdReplay: + a.OplogReplay(ctx, cmd.Replay, cmd.OPID, ep) + case defs.CmdResync: + a.Resync(ctx, cmd.OPID, ep) + case defs.CmdDeleteBackup: + a.Delete(ctx, cmd.Delete, cmd.OPID, ep) + case defs.CmdDeletePITR: + a.DeletePITR(ctx, cmd.DeletePITR, cmd.OPID, ep) + case defs.CmdCleanup: + a.Cleanup(ctx, cmd.Cleanup, cmd.OPID, ep) + } + case err, ok := <-cerr: + if !ok { + a.log.Printf("change stream was closed") + return nil + } + + if errors.Is(err, cursorClosedError{}) { + return errors.Wrap(err, "stop listening") + } + + ep, _ := config.GetEpoch(ctx, a.pbm.Conn) + a.log.Error("", "", "", ep.TS(), "listening commands: %v", err) + } + } +} + +func ListenCmd(ctx context.Context, m connect.Client, cl <-chan struct{}) (<-chan types.Cmd, <-chan error) { + cmd := make(chan types.Cmd) + errc := make(chan error) + + go func() { + defer close(cmd) + defer close(errc) + + ts := time.Now().UTC().Unix() + var lastTS int64 + var lastCmd defs.Command + for { + select { + case <-cl: + return + default: + } + cur, err := m.CmdStreamCollection().Find( + ctx, + bson.M{"ts": bson.M{"$gte": ts}}, + ) + if err != nil { + errc <- errors.Wrap(err, "watch the cmd stream") + continue + } + + for cur.Next(ctx) { + c := types.Cmd{} + err := cur.Decode(&c) + if err != nil { + errc <- errors.Wrap(err, "message decode") + continue + } + + if c.Cmd == lastCmd && c.TS == lastTS { + continue + } + + opid, ok := cur.Current.Lookup("_id").ObjectIDOK() + if !ok { + errc <- errors.New("unable to get operation ID") + continue + } + + c.OPID = types.OPID(opid) + + lastCmd = c.Cmd + lastTS = c.TS + cmd <- c + ts = time.Now().UTC().Unix() + } + if err := cur.Err(); err != nil { + errc <- cursorClosedError{err} + cur.Close(ctx) + return + } + cur.Close(ctx) + time.Sleep(time.Second * 1) + } + }() + + return cmd, errc +} + +type cursorClosedError struct { + Err error +} + +func (c cursorClosedError) Error() string { + return "cursor was closed with:" + c.Err.Error() +} + +func (c cursorClosedError) Is(err error) bool { + if err == nil { + return false + } + + _, ok := err.(cursorClosedError) //nolint:errorlint + return ok +} + +func (c cursorClosedError) Unwrap() error { + return c.Err +} + +// Delete deletes backup(s) from the store and cleans up its metadata +func (a *Agent) Delete(ctx context.Context, d *types.DeleteBackupCmd, opid types.OPID, ep config.Epoch) { + if d == nil { + l := a.log.NewEvent(string(defs.CmdDeleteBackup), "", opid.String(), ep.TS()) + l.Error("missed command") + return + } + + l := a.pbm.Logger().NewEvent(string(defs.CmdDeleteBackup), "", opid.String(), ep.TS()) + ctx = log.SetLoggerToContext(ctx, a.log) + + nodeInfo, err := topo.GetNodeInfoExt(ctx, a.node.Session()) + if err != nil { + l.Error("get node info data: %v", err) + return + } + + if !nodeInfo.IsLeader() { + l.Info("not a member of the leader rs, skipping") + return + } + + epts := ep.TS() + lock := lock.NewOpLock(a.pbm.Conn, lock.LockHeader{ + Replset: a.node.RS(), + Node: a.node.Name(), + Type: defs.CmdDeleteBackup, + OPID: opid.String(), + Epoch: &epts, + }) + + got, err := a.acquireLock(ctx, lock, l, nil) + if err != nil { + l.Error("acquire lock: %v", err) + return + } + if !got { + l.Debug("skip: lock not acquired") + return + } + defer func() { + if err := lock.Release(); err != nil { + l.Error("release lock: %v", err) + } + }() + + switch { + case d.OlderThan > 0: + t := time.Unix(d.OlderThan, 0).UTC() + obj := t.Format("2006-01-02T15:04:05Z") + l = a.pbm.Logger().NewEvent(string(defs.CmdDeleteBackup), obj, opid.String(), ep.TS()) + l.Info("deleting backups older than %v", t) + err := a.pbm.DeleteOlderThan(ctx, t, l) + if err != nil { + l.Error("deleting: %v", err) + return + } + case d.Backup != "": + l = a.pbm.Logger().NewEvent(string(defs.CmdDeleteBackup), d.Backup, opid.String(), ep.TS()) + l.Info("deleting backup") + err := a.pbm.DeleteBackup(ctx, d.Backup, l) + if err != nil { + l.Error("deleting: %v", err) + return + } + default: + l.Error("malformed command received in Delete() of backup: %v", d) + return + } + + l.Info("done") +} + +// DeletePITR deletes PITR chunks from the store and cleans up its metadata +func (a *Agent) DeletePITR(ctx context.Context, d *types.DeletePITRCmd, opid types.OPID, ep config.Epoch) { + if d == nil { + l := a.log.NewEvent(string(defs.CmdDeletePITR), "", opid.String(), ep.TS()) + l.Error("missed command") + return + } + + l := a.pbm.Logger().NewEvent(string(defs.CmdDeletePITR), "", opid.String(), ep.TS()) + ctx = log.SetLoggerToContext(ctx, a.log) + + nodeInfo, err := topo.GetNodeInfoExt(ctx, a.node.Session()) + if err != nil { + l.Error("get node info data: %v", err) + return + } + + if !nodeInfo.IsLeader() { + l.Info("not a member of the leader rs, skipping") + return + } + + epts := ep.TS() + lock := lock.NewOpLock(a.pbm.Conn, lock.LockHeader{ + Replset: a.node.RS(), + Node: a.node.Name(), + Type: defs.CmdDeletePITR, + OPID: opid.String(), + Epoch: &epts, + }) + + got, err := a.acquireLock(ctx, lock, l, nil) + if err != nil { + l.Error("acquire lock: %v", err) + return + } + if !got { + l.Debug("skip: lock not acquired") + return + } + defer func() { + if err := lock.Release(); err != nil { + l.Error("release lock: %v", err) + } + }() + + if d.OlderThan > 0 { + t := time.Unix(d.OlderThan, 0).UTC() + obj := t.Format("2006-01-02T15:04:05Z") + l = a.pbm.Logger().NewEvent(string(defs.CmdDeletePITR), obj, opid.String(), ep.TS()) + l.Info("deleting pitr chunks older than %v", t) + err = a.pbm.DeletePITR(ctx, &t, l) + } else { + l = a.pbm.Logger().NewEvent(string(defs.CmdDeletePITR), "_all_", opid.String(), ep.TS()) + l.Info("deleting all pitr chunks") + err = a.pbm.DeletePITR(ctx, nil, l) + } + if err != nil { + l.Error("deleting: %v", err) + return + } + + l.Info("done") +} + +// Cleanup deletes backups and PITR chunks from the store and cleans up its metadata +func (a *Agent) Cleanup(ctx context.Context, d *types.CleanupCmd, opid types.OPID, ep config.Epoch) { + l := a.log.NewEvent(string(defs.CmdCleanup), "", opid.String(), ep.TS()) + ctx = log.SetLoggerToContext(ctx, a.log) + + if d == nil { + l.Error("missed command") + return + } + + nodeInfo, err := topo.GetNodeInfoExt(ctx, a.node.Session()) + if err != nil { + l.Error("get node info data: %v", err) + return + } + if !nodeInfo.IsLeader() { + l.Info("not a member of the leader rs, skipping") + return + } + + epts := ep.TS() + lock := lock.NewOpLock(a.pbm.Conn, lock.LockHeader{ + Replset: a.node.RS(), + Node: a.node.Name(), + Type: defs.CmdCleanup, + OPID: opid.String(), + Epoch: &epts, + }) + + got, err := a.acquireLock(ctx, lock, l, nil) + if err != nil { + l.Error("acquire lock: %v", err) + return + } + if !got { + l.Debug("skip: lock not acquired") + return + } + defer func() { + if err := lock.Release(); err != nil { + l.Error("release lock: %v", err) + } + }() + + stg, err := util.GetStorage(ctx, a.pbm.Conn, l) + if err != nil { + l.Error("get storage: " + err.Error()) + } + + eg := errgroup.Group{} + eg.SetLimit(runtime.NumCPU()) + + cr, err := pbm.MakeCleanupInfo(ctx, a.pbm.Conn, d.OlderThan) + if err != nil { + l.Error("make cleanup report: " + err.Error()) + return + } + + for i := range cr.Chunks { + name := cr.Chunks[i].FName + + eg.Go(func() error { + err := stg.Delete(name) + return errors.Wrapf(err, "delete chunk file %q", name) + }) + } + if err := eg.Wait(); err != nil { + l.Error(err.Error()) + } + + for i := range cr.Backups { + bcp := &cr.Backups[i] + + eg.Go(func() error { + err := a.pbm.DeleteBackupFiles(bcp, stg) + return errors.Wrapf(err, "delete backup files %q", bcp.Name) + }) + } + if err := eg.Wait(); err != nil { + l.Error(err.Error()) + } + + err = resync.ResyncStorage(ctx, a.pbm.Conn, l) + if err != nil { + l.Error("storage resync: " + err.Error()) + } +} + +// Resync uploads a backup list from the remote store +func (a *Agent) Resync(ctx context.Context, opid types.OPID, ep config.Epoch) { + l := a.pbm.Logger().NewEvent(string(defs.CmdResync), "", opid.String(), ep.TS()) + ctx = log.SetLoggerToContext(ctx, a.log) + + a.HbResume() + a.pbm.Logger().ResumeMgo() + + nodeInfo, err := topo.GetNodeInfoExt(ctx, a.node.Session()) + if err != nil { + l.Error("get node info data: %v", err) + return + } + + if !nodeInfo.IsLeader() { + l.Info("not a member of the leader rs") + return + } + + epts := ep.TS() + lock := lock.NewLock(a.pbm.Conn, lock.LockHeader{ + Type: defs.CmdResync, + Replset: nodeInfo.SetName, + Node: nodeInfo.Me, + OPID: opid.String(), + Epoch: &epts, + }) + + got, err := a.acquireLock(ctx, lock, l, nil) + if err != nil { + l.Error("acquiring lock: %v", err) + return + } + if !got { + l.Debug("lock not acquired") + return + } + + defer func() { + if err := lock.Release(); err != nil { + l.Error("reslase lock %v: %v", lock, err) + } + }() + + l.Info("started") + err = resync.ResyncStorage(ctx, a.pbm.Conn, l) + if err != nil { + l.Error("%v", err) + return + } + l.Info("succeed") + + epch, err := config.ResetEpoch(a.pbm.Conn) + if err != nil { + l.Error("reset epoch: %v", err) + return + } + + l.Debug("epoch set to %v", epch) +} + +type lockAquireFn func(context.Context) (bool, error) + +// acquireLock tries to acquire the lock. If there is a stale lock +// it tries to mark op that held the lock (backup, [pitr]restore) as failed. +func (a *Agent) acquireLock(ctx context.Context, l *lock.Lock, lg *log.Event, acquireFn lockAquireFn) (bool, error) { + ctx = log.SetLoggerToContext(ctx, a.log) + if acquireFn == nil { + acquireFn = l.Acquire + } + + got, err := acquireFn(ctx) + if err == nil { + return got, nil + } + + if errors.Is(err, lock.DuplicatedOpError{}) || errors.Is(err, lock.ConcurrentOpError{}) { + lg.Debug("get lock: %v", err) + return false, nil + } + + var er lock.StaleLockError + if !errors.As(err, &er) { + return false, err + } + + lck := er.Lock + lg.Debug("stale lock: %v", lck) + var fn func(context.Context, *lock.Lock, string) error + switch lck.Type { + case defs.CmdBackup: + fn = lock.MarkBcpStale + case defs.CmdRestore: + fn = lock.MarkRestoreStale + default: + return acquireFn(ctx) + } + + if err := fn(ctx, l, lck.OPID); err != nil { + lg.Warning("failed to mark stale op '%s' as failed: %v", lck.OPID, err) + } + + return acquireFn(ctx) +} + +func (a *Agent) HbPause() { + atomic.StoreInt32(&a.pauseHB, 1) +} + +func (a *Agent) HbResume() { + atomic.StoreInt32(&a.pauseHB, 0) +} + +func (a *Agent) HbIsRun() bool { + return atomic.LoadInt32(&a.pauseHB) == 0 +} + +func (a *Agent) HbStatus(ctx context.Context) { + l := a.log.NewEvent("agentCheckup", "", "", primitive.Timestamp{}) + ctx = log.SetLoggerToContext(ctx, a.log) + + nodeVersion, err := version.GetMongoVersion(ctx, a.node.Session()) + if err != nil { + l.Error("get mongo version: %v", err) + } + + hb := topo.AgentStat{ + Node: a.node.Name(), + RS: a.node.RS(), + AgentVer: version.Current().Version, + MongoVer: nodeVersion.VersionString, + PerconaVer: nodeVersion.PSMDBVersion, + } + defer func() { + if err := topo.RemoveAgentStatus(ctx, a.pbm.Conn, hb); err != nil { + logger := a.log.NewEvent("agentCheckup", "", "", primitive.Timestamp{}) + logger.Error("remove agent heartbeat: %v", err) + } + }() + + tk := time.NewTicker(defs.AgentsStatCheckRange) + defer tk.Stop() + + // check storage once in a while if all is ok (see https://jira.percona.com/browse/PBM-647) + const checkStoreIn = int(60 / (defs.AgentsStatCheckRange / time.Second)) + cc := 0 + for range tk.C { + // don't check if on pause (e.g. physical restore) + if !a.HbIsRun() { + continue + } + + hb.PBMStatus = a.pbmStatus(ctx) + logHbStatus("PBM connection", hb.PBMStatus, l) + + hb.NodeStatus = a.nodeStatus(ctx) + logHbStatus("node connection", hb.NodeStatus, l) + + cc++ + hb.StorageStatus = a.storStatus(ctx, l, cc == checkStoreIn) + logHbStatus("storage connection", hb.StorageStatus, l) + if cc == checkStoreIn { + cc = 0 + } + + hb.Err = "" + + hb.State = defs.NodeStateUnknown + hb.StateStr = "unknown" + n, err := a.node.Status(ctx) + if err != nil { + l.Error("get replSetGetStatus: %v", err) + hb.Err += fmt.Sprintf("get replSetGetStatus: %v", err) + } else { + hb.State = n.State + hb.StateStr = n.StateStr + } + + hb.Hidden = false + hb.Passive = false + + inf, err := topo.GetNodeInfoExt(ctx, a.node.Session()) + if err != nil { + l.Error("get NodeInfo: %v", err) + hb.Err += fmt.Sprintf("get NodeInfo: %v", err) + } else { + hb.Hidden = inf.Hidden + hb.Passive = inf.Passive + } + hb.Arbiter = inf.ArbiterOnly + + err = topo.SetAgentStatus(ctx, a.pbm.Conn, hb) + if err != nil { + l.Error("set status: %v", err) + } + } +} + +func (a *Agent) pbmStatus(ctx context.Context) topo.SubsysStatus { + err := a.pbm.Conn.MongoClient().Ping(ctx, nil) + if err != nil { + return topo.SubsysStatus{Err: err.Error()} + } + + return topo.SubsysStatus{OK: true} +} + +func (a *Agent) nodeStatus(ctx context.Context) topo.SubsysStatus { + err := a.node.Session().Ping(ctx, nil) + if err != nil { + return topo.SubsysStatus{Err: err.Error()} + } + + return topo.SubsysStatus{OK: true} +} + +func (a *Agent) storStatus(ctx context.Context, log *log.Event, forceCheckStorage bool) topo.SubsysStatus { + // check storage once in a while if all is ok (see https://jira.percona.com/browse/PBM-647) + // but if storage was(is) failed, check it always + stat, err := topo.GetAgentStatus(ctx, a.pbm.Conn, a.node.RS(), a.node.Name()) + if err != nil { + log.Warning("get current storage status: %v", err) + } + if !forceCheckStorage && stat.StorageStatus.OK { + return topo.SubsysStatus{OK: true} + } + + stg, err := util.GetStorage(ctx, a.pbm.Conn, log) + if err != nil { + return topo.SubsysStatus{Err: fmt.Sprintf("unable to get storage: %v", err)} + } + + _, err = stg.FileStat(defs.StorInitFile) + if errors.Is(err, storage.ErrNotExist) { + err := stg.Save(defs.StorInitFile, bytes.NewBufferString(version.Current().Version), 0) + if err != nil { + return topo.SubsysStatus{ + Err: fmt.Sprintf("storage: no init file, attempt to create failed: %v", err), + } + } + } else if err != nil { + return topo.SubsysStatus{Err: fmt.Sprintf("storage check failed with: %v", err)} + } + + return topo.SubsysStatus{OK: true} +} + +func logHbStatus(name string, st topo.SubsysStatus, l *log.Event) { + if !st.OK { + l.Error("check %s: %s", name, st.Err) + } +} diff --git a/agent/backup.go b/cmd/pbm-agent/backup.go similarity index 57% rename from agent/backup.go rename to cmd/pbm-agent/backup.go index b4306ad5c..abe8e21cc 100644 --- a/agent/backup.go +++ b/cmd/pbm-agent/backup.go @@ -1,19 +1,25 @@ -package agent +package main import ( - "context" "time" - "github.com/pkg/errors" - - "github.com/percona/percona-backup-mongodb/pbm" + "github.com/percona/percona-backup-mongodb/internal/config" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/lock" + "github.com/percona/percona-backup-mongodb/internal/log" + "github.com/percona/percona-backup-mongodb/internal/priority" + "github.com/percona/percona-backup-mongodb/internal/query" + "github.com/percona/percona-backup-mongodb/internal/storage" + "github.com/percona/percona-backup-mongodb/internal/topo" + "github.com/percona/percona-backup-mongodb/internal/types" + "github.com/percona/percona-backup-mongodb/internal/version" "github.com/percona/percona-backup-mongodb/pbm/backup" - "github.com/percona/percona-backup-mongodb/pbm/log" - "github.com/percona/percona-backup-mongodb/pbm/storage" ) type currentBackup struct { - header *pbm.BackupCmd + header *types.BackupCmd cancel context.CancelFunc } @@ -46,16 +52,17 @@ func (a *Agent) CancelBackup() { } // Backup starts backup -func (a *Agent) Backup(cmd *pbm.BackupCmd, opid pbm.OPID, ep pbm.Epoch) { +func (a *Agent) Backup(ctx context.Context, cmd *types.BackupCmd, opid types.OPID, ep config.Epoch) { if cmd == nil { - l := a.log.NewEvent(string(pbm.CmdBackup), "", opid.String(), ep.TS()) + l := a.log.NewEvent(string(defs.CmdBackup), "", opid.String(), ep.TS()) l.Error("missed command") return } - l := a.log.NewEvent(string(pbm.CmdBackup), cmd.Name, opid.String(), ep.TS()) + l := a.log.NewEvent(string(defs.CmdBackup), cmd.Name, opid.String(), ep.TS()) + ctx = log.SetLoggerToContext(ctx, a.log) - nodeInfo, err := a.node.GetInfo() + nodeInfo, err := topo.GetNodeInfoExt(ctx, a.node.Session()) if err != nil { l.Error("get node info: %v", err) return @@ -68,7 +75,7 @@ func (a *Agent) Backup(cmd *pbm.BackupCmd, opid pbm.OPID, ep pbm.Epoch) { } isClusterLeader := nodeInfo.IsClusterLeader() - canRunBackup, err := backup.NodeSuitsExt(a.node, nodeInfo, cmd.Type) + canRunBackup, err := topo.NodeSuitsExt(ctx, a.node.Session(), nodeInfo, cmd.Type) if err != nil { l.Error("node check: %v", err) if !isClusterLeader { @@ -89,19 +96,19 @@ func (a *Agent) Backup(cmd *pbm.BackupCmd, opid pbm.OPID, ep pbm.Epoch) { var bcp *backup.Backup switch cmd.Type { - case pbm.PhysicalBackup: + case defs.PhysicalBackup: bcp = backup.NewPhysical(a.pbm, a.node) - case pbm.ExternalBackup: + case defs.ExternalBackup: bcp = backup.NewExternal(a.pbm, a.node) - case pbm.IncrementalBackup: + case defs.IncrementalBackup: bcp = backup.NewIncremental(a.pbm, a.node, cmd.IncrBase) - case pbm.LogicalBackup: + case defs.LogicalBackup: fallthrough default: bcp = backup.New(a.pbm, a.node) } - cfg, err := a.pbm.GetConfig() + cfg, err := config.GetConfig(ctx, a.pbm.Conn) if err != nil { l.Error("unable to get PBM config settings: " + err.Error()) return @@ -113,27 +120,27 @@ func (a *Agent) Backup(cmd *pbm.BackupCmd, opid pbm.OPID, ep pbm.Epoch) { bcp.SetTimeouts(cfg.Backup.Timeouts) if isClusterLeader { - balancer := pbm.BalancerModeOff + balancer := topo.BalancerModeOff if nodeInfo.IsSharded() { - bs, err := a.pbm.GetBalancerStatus() + bs, err := topo.GetBalancerStatus(ctx, a.pbm.Conn) if err != nil { l.Error("get balancer status: %v", err) return } if bs.IsOn() { - balancer = pbm.BalancerModeOn + balancer = topo.BalancerModeOn } } - err = bcp.Init(cmd, opid, nodeInfo, cfg.Storage, balancer) + err = bcp.Init(ctx, cmd, opid, nodeInfo, cfg.Storage, balancer, l) if err != nil { l.Error("init meta: %v", err) return } l.Debug("init backup meta") - if err = pbm.CheckTopoForBackup(a.pbm, cmd.Type); err != nil { - ferr := a.pbm.ChangeBackupState(cmd.Name, pbm.StatusError, err.Error()) - l.Info("mark backup as %s `%v`: %v", pbm.StatusError, err, ferr) + if err = topo.CheckTopoForBackup(ctx, a.pbm.Conn, cmd.Type); err != nil { + ferr := query.ChangeBackupState(a.pbm.Conn, cmd.Name, defs.StatusError, err.Error()) + l.Info("mark backup as %s `%v`: %v", defs.StatusError, err, ferr) return } @@ -142,8 +149,8 @@ func (a *Agent) Backup(cmd *pbm.BackupCmd, opid pbm.OPID, ep pbm.Epoch) { // the agent that made a previous (src) backup. const srcHostMultiplier = 3.0 var c map[string]float64 - if cmd.Type == pbm.IncrementalBackup && !cmd.IncrBase { - src, err := a.pbm.LastIncrementalBackup() + if cmd.Type == defs.IncrementalBackup && !cmd.IncrBase { + src, err := query.LastIncrementalBackup(ctx, a.pbm.Conn) if err != nil { // try backup anyway l.Warning("define source backup: %v", err) @@ -155,34 +162,34 @@ func (a *Agent) Backup(cmd *pbm.BackupCmd, opid pbm.OPID, ep pbm.Epoch) { } } - agents, err := a.pbm.ListAgentStatuses() + agents, err := topo.ListAgentStatuses(ctx, a.pbm.Conn) if err != nil { l.Error("get agents list: %v", err) return } - validCandidates := make([]pbm.AgentStat, 0, len(agents)) + validCandidates := make([]topo.AgentStat, 0, len(agents)) for _, s := range agents { - if pbm.FeatureSupport(s.MongoVersion()).BackupType(cmd.Type) != nil { + if version.FeatureSupport(s.MongoVersion()).BackupType(cmd.Type) != nil { continue } validCandidates = append(validCandidates, s) } - nodes, err := a.pbm.BcpNodesPriority(c, validCandidates) + nodes, err := priority.BcpNodesPriority(ctx, a.pbm.Conn, c, validCandidates) if err != nil { l.Error("get nodes priority: %v", err) return } - shards, err := a.pbm.ClusterMembers() + shards, err := topo.ClusterMembers(ctx, a.pbm.Conn.MongoClient()) if err != nil { l.Error("get cluster members: %v", err) return } for _, sh := range shards { go func(rs string) { - err := a.nominateRS(cmd.Name, rs, nodes.RS(rs), l) + err := a.nominateRS(ctx, cmd.Name, rs, nodes.RS(rs), l) if err != nil { l.Error("nodes nomination for %s: %v", rs, err) } @@ -190,7 +197,7 @@ func (a *Agent) Backup(cmd *pbm.BackupCmd, opid pbm.OPID, ep pbm.Epoch) { } } - nominated, err := a.waitNomination(cmd.Name, nodeInfo.SetName, nodeInfo.Me, l) + nominated, err := a.waitNomination(ctx, cmd.Name, nodeInfo.SetName, nodeInfo.Me, l) if err != nil { l.Error("wait for nomination: %v", err) } @@ -201,8 +208,8 @@ func (a *Agent) Backup(cmd *pbm.BackupCmd, opid pbm.OPID, ep pbm.Epoch) { } epoch := ep.TS() - lock := a.pbm.NewLock(pbm.LockHeader{ - Type: pbm.CmdBackup, + lck := lock.NewLock(a.pbm.Conn, lock.LockHeader{ + Type: defs.CmdBackup, Replset: nodeInfo.SetName, Node: nodeInfo.Me, OPID: opid.String(), @@ -210,10 +217,10 @@ func (a *Agent) Backup(cmd *pbm.BackupCmd, opid pbm.OPID, ep pbm.Epoch) { }) // install a backup lock despite having PITR one - got, err := a.acquireLock(lock, l, func() (bool, error) { - return lock.Rewrite(&pbm.LockHeader{ + got, err := a.acquireLock(ctx, lck, l, func(ctx context.Context) (bool, error) { + return lck.Rewrite(ctx, &lock.LockHeader{ Replset: a.node.RS(), - Type: pbm.CmdPITR, + Type: defs.CmdPITR, }) }) if err != nil { @@ -225,21 +232,21 @@ func (a *Agent) Backup(cmd *pbm.BackupCmd, opid pbm.OPID, ep pbm.Epoch) { return } - err = a.pbm.SetRSNomineeACK(cmd.Name, nodeInfo.SetName, nodeInfo.Me) + err = query.SetRSNomineeACK(ctx, a.pbm.Conn, cmd.Name, nodeInfo.SetName, nodeInfo.Me) if err != nil { l.Warning("set nominee ack: %v", err) } - ctx, cancel := context.WithCancel(context.Background()) + bcpCtx, cancel := context.WithCancel(ctx) a.setBcp(¤tBackup{ header: cmd, cancel: cancel, }) l.Info("backup started") - err = bcp.Run(ctx, cmd, opid, l) + err = bcp.Run(bcpCtx, cmd, opid, l) a.unsetBcp() if err != nil { - if errors.Is(err, backup.ErrCancelled) { + if errors.Is(err, storage.ErrCancelled) || errors.Is(err, context.Canceled) { l.Info("backup was canceled") } else { l.Error("backup: %v", err) @@ -249,24 +256,24 @@ func (a *Agent) Backup(cmd *pbm.BackupCmd, opid pbm.OPID, ep pbm.Epoch) { } l.Debug("releasing lock") - err = lock.Release() + err = lck.Release() if err != nil { - l.Error("unable to release backup lock %v: %v", lock, err) + l.Error("unable to release backup lock %v: %v", lck, err) } } const renominationFrame = 5 * time.Second -func (a *Agent) nominateRS(bcp, rs string, nodes [][]string, l *log.Event) error { +func (a *Agent) nominateRS(ctx context.Context, bcp, rs string, nodes [][]string, l *log.Event) error { l.Debug("nomination list for %s: %v", rs, nodes) - err := a.pbm.SetRSNomination(bcp, rs) + err := query.SetRSNomination(ctx, a.pbm.Conn, bcp, rs) if err != nil { return errors.Wrap(err, "set nomination meta") } for _, n := range nodes { - nms, err := a.pbm.GetRSNominees(bcp, rs) - if err != nil && !errors.Is(err, pbm.ErrNotFound) { + nms, err := query.GetRSNominees(ctx, a.pbm.Conn, bcp, rs) + if err != nil && !errors.Is(err, errors.ErrNotFound) { return errors.Wrap(err, "get nomination meta") } if nms != nil && len(nms.Ack) > 0 { @@ -274,13 +281,13 @@ func (a *Agent) nominateRS(bcp, rs string, nodes [][]string, l *log.Event) error return nil } - err = a.pbm.SetRSNominees(bcp, rs, n) + err = query.SetRSNominees(ctx, a.pbm.Conn, bcp, rs, n) if err != nil { return errors.Wrap(err, "set nominees") } l.Debug("nomination %s, set candidates %v", rs, n) - err = a.pbm.BackupHB(bcp) + err = query.BackupHB(ctx, a.pbm.Conn, bcp) if err != nil { l.Warning("send heartbeat: %v", err) } @@ -291,19 +298,19 @@ func (a *Agent) nominateRS(bcp, rs string, nodes [][]string, l *log.Event) error return nil } -func (a *Agent) waitNomination(bcp, rs, node string, l *log.Event) (bool, error) { +func (a *Agent) waitNomination(ctx context.Context, bcp, rs, node string, l *log.Event) (bool, error) { tk := time.NewTicker(time.Millisecond * 500) defer tk.Stop() - stop := time.NewTimer(pbm.WaitActionStart) + stop := time.NewTimer(defs.WaitActionStart) defer stop.Stop() for { select { case <-tk.C: - nm, err := a.pbm.GetRSNominees(bcp, rs) + nm, err := query.GetRSNominees(ctx, a.pbm.Conn, bcp, rs) if err != nil { - if errors.Is(err, pbm.ErrNotFound) { + if errors.Is(err, errors.ErrNotFound) { continue } return false, errors.Wrap(err, "check nomination") diff --git a/cmd/pbm-agent/main.go b/cmd/pbm-agent/main.go index 672e27c43..d89fa258e 100644 --- a/cmd/pbm-agent/main.go +++ b/cmd/pbm-agent/main.go @@ -1,7 +1,6 @@ package main import ( - "context" "fmt" "log" "os" @@ -12,12 +11,12 @@ import ( "github.com/alecthomas/kingpin" mlog "github.com/mongodb/mongo-tools/common/log" "github.com/mongodb/mongo-tools/common/options" - "github.com/pkg/errors" - "github.com/percona/percona-backup-mongodb/agent" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/errors" + plog "github.com/percona/percona-backup-mongodb/internal/log" + "github.com/percona/percona-backup-mongodb/internal/version" "github.com/percona/percona-backup-mongodb/pbm" - plog "github.com/percona/percona-backup-mongodb/pbm/log" - "github.com/percona/percona-backup-mongodb/version" ) const mongoConnFlag = "mongodb-uri" @@ -92,20 +91,20 @@ func runAgent(mongoURI string, dumpConns int) error { return errors.Wrap(err, "connect to PBM") } - agnt := agent.New(pbmClient) + agnt := newAgent(pbmClient) defer agnt.Close() err = agnt.AddNode(ctx, mongoURI, dumpConns) if err != nil { return errors.Wrap(err, "connect to the node") } - agnt.InitLogger(pbmClient) + agnt.InitLogger() - if err := agnt.CanStart(); err != nil { - return errors.WithMessage(err, "pre-start check") + if err := agnt.CanStart(ctx); err != nil { + return errors.Wrap(err, "pre-start check") } - go agnt.PITR() - go agnt.HbStatus() + go agnt.PITR(ctx) + go agnt.HbStatus(ctx) - return errors.Wrap(agnt.Start(), "listen the commands stream") + return errors.Wrap(agnt.Start(ctx), "listen the commands stream") } diff --git a/agent/oplog.go b/cmd/pbm-agent/oplog.go similarity index 53% rename from agent/oplog.go rename to cmd/pbm-agent/oplog.go index 5a2b07e8d..a43238f22 100644 --- a/agent/oplog.go +++ b/cmd/pbm-agent/oplog.go @@ -1,30 +1,34 @@ -package agent +package main import ( "time" - "github.com/pkg/errors" - - "github.com/percona/percona-backup-mongodb/pbm" + "github.com/percona/percona-backup-mongodb/internal/config" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/lock" + "github.com/percona/percona-backup-mongodb/internal/topo" + "github.com/percona/percona-backup-mongodb/internal/types" "github.com/percona/percona-backup-mongodb/pbm/restore" ) // OplogReplay replays oplog between r.Start and r.End timestamps (wall time in UTC tz) -func (a *Agent) OplogReplay(r *pbm.ReplayCmd, opID pbm.OPID, ep pbm.Epoch) { +func (a *Agent) OplogReplay(ctx context.Context, r *types.ReplayCmd, opID types.OPID, ep config.Epoch) { if r == nil { - l := a.log.NewEvent(string(pbm.CmdReplay), "", opID.String(), ep.TS()) + l := a.log.NewEvent(string(defs.CmdReplay), "", opID.String(), ep.TS()) l.Error("missed command") return } - l := a.log.NewEvent(string(pbm.CmdReplay), r.Name, opID.String(), ep.TS()) + l := a.log.NewEvent(string(defs.CmdReplay), r.Name, opID.String(), ep.TS()) l.Info("time: %s-%s", time.Unix(int64(r.Start.T), 0).UTC().Format(time.RFC3339), time.Unix(int64(r.End.T), 0).UTC().Format(time.RFC3339), ) - nodeInfo, err := a.node.GetInfo() + nodeInfo, err := topo.GetNodeInfoExt(ctx, a.node.Session()) if err != nil { l.Error("get node info: %s", err.Error()) return @@ -35,15 +39,15 @@ func (a *Agent) OplogReplay(r *pbm.ReplayCmd, opID pbm.OPID, ep pbm.Epoch) { } epoch := ep.TS() - lock := a.pbm.NewLock(pbm.LockHeader{ - Type: pbm.CmdReplay, + lck := lock.NewLock(a.pbm.Conn, lock.LockHeader{ + Type: defs.CmdReplay, Replset: nodeInfo.SetName, Node: nodeInfo.Me, OPID: opID.String(), Epoch: &epoch, }) - nominated, err := a.acquireLock(lock, l, nil) + nominated, err := a.acquireLock(ctx, lck, l, nil) if err != nil { l.Error("acquiring lock: %s", err.Error()) return @@ -54,13 +58,13 @@ func (a *Agent) OplogReplay(r *pbm.ReplayCmd, opID pbm.OPID, ep pbm.Epoch) { } defer func() { - if err := lock.Release(); err != nil { + if err := lck.Release(); err != nil { l.Error("release lock: %s", err.Error()) } }() l.Info("oplog replay started") - if err := restore.New(a.pbm, a.node, r.RSMap).ReplayOplog(r, opID, l); err != nil { + if err := restore.New(a.pbm, a.node, r.RSMap).ReplayOplog(ctx, r, opID, l); err != nil { if errors.Is(err, restore.ErrNoDataForShard) { l.Info("no oplog for the shard, skipping") } else { @@ -70,7 +74,7 @@ func (a *Agent) OplogReplay(r *pbm.ReplayCmd, opID pbm.OPID, ep pbm.Epoch) { } l.Info("oplog replay successfully finished") - resetEpoch, err := a.pbm.ResetEpoch() + resetEpoch, err := config.ResetEpoch(a.pbm.Conn) if err != nil { l.Error("reset epoch: %s", err.Error()) return diff --git a/agent/restore.go b/cmd/pbm-agent/restore.go similarity index 62% rename from agent/restore.go rename to cmd/pbm-agent/restore.go index 0da3dc555..cff569ebd 100644 --- a/agent/restore.go +++ b/cmd/pbm-agent/restore.go @@ -1,21 +1,25 @@ -package agent +package main import ( - "context" "time" - "github.com/pkg/errors" "go.mongodb.org/mongo-driver/mongo" - "github.com/percona/percona-backup-mongodb/pbm" - "github.com/percona/percona-backup-mongodb/pbm/backup" - "github.com/percona/percona-backup-mongodb/pbm/pitr" + "github.com/percona/percona-backup-mongodb/internal/config" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/lock" + "github.com/percona/percona-backup-mongodb/internal/slicer" + "github.com/percona/percona-backup-mongodb/internal/topo" + "github.com/percona/percona-backup-mongodb/internal/types" + "github.com/percona/percona-backup-mongodb/internal/util" "github.com/percona/percona-backup-mongodb/pbm/restore" ) type currentPitr struct { - slicer *pitr.Slicer - w chan *pbm.OPID // to wake up a slicer on demand (not to wait for the tick) + slicer *slicer.Slicer + w chan *types.OPID // to wake up a slicer on demand (not to wait for the tick) cancel context.CancelFunc } @@ -45,18 +49,18 @@ func (a *Agent) getPitr() *currentPitr { const pitrCheckPeriod = time.Second * 15 // PITR starts PITR processing routine -func (a *Agent) PITR() { +func (a *Agent) PITR(ctx context.Context) { a.log.Printf("starting PITR routine") for { wait := pitrCheckPeriod - err := a.pitr() + err := a.pitr(ctx) if err != nil { // we need epoch just to log pitr err with an extra context // so not much care if we get it or not - ep, _ := a.pbm.GetEpoch() - a.log.Error(string(pbm.CmdPITR), "", "", ep.TS(), "init: %v", err) + ep, _ := config.GetEpoch(ctx, a.pbm.Conn) + a.log.Error(string(defs.CmdPITR), "", "", ep.TS(), "init: %v", err) // penalty to the failed node so healthy nodes would have priority on next try wait *= 2 @@ -84,13 +88,13 @@ func (a *Agent) stopPitrOnOplogOnlyChange(currOO bool) { } } -func (a *Agent) pitr() error { +func (a *Agent) pitr(ctx context.Context) error { // pausing for physical restore if !a.HbIsRun() { return nil } - cfg, err := a.pbm.GetConfig() + cfg, err := config.GetConfig(ctx, a.pbm.Conn) if err != nil && !errors.Is(err, mongo.ErrNoDocuments) { return errors.Wrap(err, "get conf") } @@ -105,16 +109,16 @@ func (a *Agent) pitr() error { return nil } - ep, err := a.pbm.GetEpoch() + ep, err := config.GetEpoch(ctx, a.pbm.Conn) if err != nil { return errors.Wrap(err, "get epoch") } - l := a.log.NewEvent(string(pbm.CmdPITR), "", "", ep.TS()) + l := a.log.NewEvent(string(defs.CmdPITR), "", "", ep.TS()) spant := time.Duration(cfg.PITR.OplogSpanMin * float64(time.Minute)) if spant == 0 { - spant = pbm.PITRdefaultSpan + spant = defs.PITRdefaultSpan } // already do the job @@ -136,7 +140,7 @@ func (a *Agent) pitr() error { // just a check before a real locking // just trying to avoid redundant heavy operations - moveOn, err := a.pitrLockCheck() + moveOn, err := a.pitrLockCheck(ctx) if err != nil { return errors.Wrap(err, "check if already run") } @@ -150,11 +154,11 @@ func (a *Agent) pitr() error { // if node failing, then some other agent with healthy node will hopefully catch up // so this code won't be reached and will not pollute log with "pitr" errors while // the other node does successfully slice - ninf, err := a.node.GetInfo() + ninf, err := topo.GetNodeInfoExt(ctx, a.node.Session()) if err != nil { return errors.Wrap(err, "get node info") } - q, err := backup.NodeSuits(a.node, ninf) + q, err := topo.NodeSuits(ctx, a.node.Session(), ninf) if err != nil { return errors.Wrap(err, "node check") } @@ -164,20 +168,20 @@ func (a *Agent) pitr() error { return nil } - stg, err := a.pbm.GetStorage(l) + stg, err := util.GetStorage(ctx, a.pbm.Conn, l) if err != nil { return errors.Wrap(err, "unable to get storage configuration") } epts := ep.TS() - lock := a.pbm.NewLock(pbm.LockHeader{ + lck := lock.NewLock(a.pbm.Conn, lock.LockHeader{ Replset: a.node.RS(), Node: a.node.Name(), - Type: pbm.CmdPITR, + Type: defs.CmdPITR, Epoch: &epts, }) - got, err := a.acquireLock(lock, l, nil) + got, err := a.acquireLock(ctx, lck, l, nil) if err != nil { return errors.Wrap(err, "acquiring lock") } @@ -186,41 +190,48 @@ func (a *Agent) pitr() error { return nil } - ibcp := pitr.NewSlicer(a.node.RS(), a.pbm, a.node, stg, ep) + ibcp := slicer.NewSlicer(a.node.RS(), a.pbm.Conn, a.node.Session(), stg, ep, a.pbm.Logger()) ibcp.SetSpan(spant) if cfg.PITR.OplogOnly { - err = ibcp.OplogOnlyCatchup() + err = ibcp.OplogOnlyCatchup(ctx) } else { - err = ibcp.Catchup() + err = ibcp.Catchup(ctx) } if err != nil { - if err := lock.Release(); err != nil { + if err := lck.Release(); err != nil { l.Error("release lock: %v", err) } return errors.Wrap(err, "catchup") } go func() { - ctx, cancel := context.WithCancel(context.Background()) + stopSlicingCtx, stopSlicing := context.WithCancel(ctx) + defer stopSlicing() + stopC := make(chan struct{}) - w := make(chan *pbm.OPID, 1) + w := make(chan *types.OPID, 1) a.setPitr(¤tPitr{ slicer: ibcp, - cancel: cancel, + cancel: stopSlicing, w: w, }) - streamErr := ibcp.Stream(ctx, w, cfg.PITR.Compression, cfg.PITR.CompressionLevel, cfg.Backup.Timeouts) + go func() { + <-stopSlicingCtx.Done() + close(stopC) + }() + + streamErr := ibcp.Stream(ctx, stopC, w, cfg.PITR.Compression, cfg.PITR.CompressionLevel, cfg.Backup.Timeouts) if streamErr != nil { out := l.Error - if errors.Is(streamErr, pitr.OpMovedError{}) { + if errors.Is(streamErr, slicer.OpMovedError{}) { out = l.Info } out("streaming oplog: %v", streamErr) } - if err := lock.Release(); err != nil { + if err := lck.Release(); err != nil { l.Error("release lock: %v", err) } @@ -237,13 +248,13 @@ func (a *Agent) pitr() error { return nil } -func (a *Agent) pitrLockCheck() (bool, error) { - ts, err := a.pbm.ClusterTime() +func (a *Agent) pitrLockCheck(ctx context.Context) (bool, error) { + ts, err := topo.GetClusterTime(ctx, a.pbm.Conn) if err != nil { return false, errors.Wrap(err, "read cluster time") } - tl, err := a.pbm.GetLockData(&pbm.LockHeader{Replset: a.node.RS()}) + tl, err := lock.GetLockData(ctx, a.pbm.Conn, &lock.LockHeader{Replset: a.node.RS()}) if err != nil { if errors.Is(err, mongo.ErrNoDocuments) { // no lock. good to move on @@ -254,40 +265,40 @@ func (a *Agent) pitrLockCheck() (bool, error) { } // stale lock means we should move on and clean it up during the lock.Acquire - return tl.Heartbeat.T+pbm.StaleFrameSec < ts.T, nil + return tl.Heartbeat.T+defs.StaleFrameSec < ts.T, nil } -func (a *Agent) Restore(r *pbm.RestoreCmd, opid pbm.OPID, ep pbm.Epoch) { +func (a *Agent) Restore(ctx context.Context, r *types.RestoreCmd, opid types.OPID, ep config.Epoch) { if r == nil { - l := a.log.NewEvent(string(pbm.CmdRestore), "", opid.String(), ep.TS()) + l := a.log.NewEvent(string(defs.CmdRestore), "", opid.String(), ep.TS()) l.Error("missed command") return } - l := a.log.NewEvent(string(pbm.CmdRestore), r.Name, opid.String(), ep.TS()) + l := a.log.NewEvent(string(defs.CmdRestore), r.Name, opid.String(), ep.TS()) if !r.OplogTS.IsZero() { l.Info("to time: %s", time.Unix(int64(r.OplogTS.T), 0).UTC().Format(time.RFC3339)) } - nodeInfo, err := a.node.GetInfo() + nodeInfo, err := topo.GetNodeInfoExt(ctx, a.node.Session()) if err != nil { l.Error("get node info: %v", err) return } - var lock *pbm.Lock + var lck *lock.Lock if nodeInfo.IsPrimary { epts := ep.TS() - lock = a.pbm.NewLock(pbm.LockHeader{ - Type: pbm.CmdRestore, + lck = lock.NewLock(a.pbm.Conn, lock.LockHeader{ + Type: defs.CmdRestore, Replset: nodeInfo.SetName, Node: nodeInfo.Me, OPID: opid.String(), Epoch: &epts, }) - got, err := a.acquireLock(lock, l, nil) + got, err := a.acquireLock(ctx, lck, l, nil) if err != nil { l.Error("acquiring lock: %v", err) return @@ -299,30 +310,30 @@ func (a *Agent) Restore(r *pbm.RestoreCmd, opid pbm.OPID, ep pbm.Epoch) { } defer func() { - if lock == nil { + if lck == nil { return } - if err := lock.Release(); err != nil { + if err := lck.Release(); err != nil { l.Error("release lock: %v", err) } }() } - stg, err := a.pbm.GetStorage(l) + stg, err := util.GetStorage(ctx, a.pbm.Conn, l) if err != nil { l.Error("get storage: %v", err) return } - var bcpType pbm.BackupType - bcp := &pbm.BackupMeta{} + var bcpType defs.BackupType + bcp := &types.BackupMeta{} if r.External && r.BackupName == "" { - bcpType = pbm.ExternalBackup + bcpType = defs.ExternalBackup } else { l.Info("backup: %s", r.BackupName) - bcp, err = restore.SnapshotMeta(a.pbm, r.BackupName, stg) + bcp, err = restore.SnapshotMeta(ctx, a.pbm, r.BackupName, stg) if err != nil { l.Error("define base backup: %v", err) return @@ -339,35 +350,35 @@ func (a *Agent) Restore(r *pbm.RestoreCmd, opid pbm.OPID, ep pbm.Epoch) { l.Info("recovery started") switch bcpType { - case pbm.LogicalBackup: + case defs.LogicalBackup: if !nodeInfo.IsPrimary { l.Info("Node in not suitable for restore") return } if r.OplogTS.IsZero() { - err = restore.New(a.pbm, a.node, r.RSMap).Snapshot(r, opid, l) + err = restore.New(a.pbm, a.node, r.RSMap).Snapshot(ctx, r, opid, l) } else { - err = restore.New(a.pbm, a.node, r.RSMap).PITR(r, opid, l) + err = restore.New(a.pbm, a.node, r.RSMap).PITR(ctx, r, opid, l) } - case pbm.PhysicalBackup, pbm.IncrementalBackup, pbm.ExternalBackup: - if lock != nil { + case defs.PhysicalBackup, defs.IncrementalBackup, defs.ExternalBackup: + if lck != nil { // Don't care about errors. Anyway, the lock gonna disappear after the // restore. And the commands stream is down as well. // The lock also updates its heartbeats but Restore waits only for one state - // with the timeout twice as short pbm.StaleFrameSec. - _ = lock.Release() - lock = nil + // with the timeout twice as short defs.StaleFrameSec. + _ = lck.Release() + lck = nil } var rstr *restore.PhysRestore - rstr, err = restore.NewPhysical(a.pbm, a.node, nodeInfo, r.RSMap) + rstr, err = restore.NewPhysical(ctx, a.pbm, a.node, nodeInfo, r.RSMap) if err != nil { l.Error("init physical backup: %v", err) return } r.BackupName = bcp.Name - err = rstr.Snapshot(r, r.OplogTS, opid, l, a.closeCMD, a.HbPause) + err = rstr.Snapshot(ctx, r, r.OplogTS, opid, l, a.closeCMD, a.HbPause) } if err != nil { if errors.Is(err, restore.ErrNoDataForShard) { @@ -378,8 +389,8 @@ func (a *Agent) Restore(r *pbm.RestoreCmd, opid pbm.OPID, ep pbm.Epoch) { return } - if bcpType == pbm.LogicalBackup && nodeInfo.IsLeader() { - epch, err := a.pbm.ResetEpoch() + if bcpType == defs.LogicalBackup && nodeInfo.IsLeader() { + epch, err := config.ResetEpoch(a.pbm.Conn) if err != nil { l.Error("reset epoch: %v", err) } diff --git a/speedt/b_test.go b/cmd/pbm-speed-test/b_test.go similarity index 95% rename from speedt/b_test.go rename to cmd/pbm-speed-test/b_test.go index 9b1da7c2d..00df76a15 100644 --- a/speedt/b_test.go +++ b/cmd/pbm-speed-test/b_test.go @@ -1,4 +1,4 @@ -package speedt +package main import ( "io" diff --git a/speedt/dataset.go b/cmd/pbm-speed-test/dataset.go similarity index 99% rename from speedt/dataset.go rename to cmd/pbm-speed-test/dataset.go index c01b8cf75..4d96edefc 100644 --- a/speedt/dataset.go +++ b/cmd/pbm-speed-test/dataset.go @@ -1,4 +1,4 @@ -package speedt +package main //nolint:lll,misspell var dataset = [...]string{ diff --git a/cmd/pbm-speed-test/main.go b/cmd/pbm-speed-test/main.go index fdf8dce01..0901e5b89 100644 --- a/cmd/pbm-speed-test/main.go +++ b/cmd/pbm-speed-test/main.go @@ -1,7 +1,6 @@ package main import ( - "context" "fmt" "log" "math/rand" @@ -9,13 +8,15 @@ import ( "time" "github.com/alecthomas/kingpin" + "go.mongodb.org/mongo-driver/bson/primitive" "go.mongodb.org/mongo-driver/mongo" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/storage/blackhole" + "github.com/percona/percona-backup-mongodb/internal/util" + "github.com/percona/percona-backup-mongodb/internal/version" "github.com/percona/percona-backup-mongodb/pbm" - "github.com/percona/percona-backup-mongodb/pbm/compress" - "github.com/percona/percona-backup-mongodb/pbm/storage/blackhole" - "github.com/percona/percona-backup-mongodb/speedt" - "github.com/percona/percona-backup-mongodb/version" ) func main() { @@ -29,11 +30,11 @@ func main() { compressLevel *int compressType = tCmd.Flag("compression", "Compression type //////"). - Default(string(compress.CompressionTypeS2)). - Enum(string(compress.CompressionTypeNone), string(compress.CompressionTypeGZIP), - string(compress.CompressionTypeSNAPPY), string(compress.CompressionTypeLZ4), - string(compress.CompressionTypeS2), string(compress.CompressionTypePGZIP), - string(compress.CompressionTypeZstandard), + Default(string(defs.CompressionTypeS2)). + Enum(string(defs.CompressionTypeNone), string(defs.CompressionTypeGZIP), + string(defs.CompressionTypeSNAPPY), string(defs.CompressionTypeLZ4), + string(defs.CompressionTypeS2), string(defs.CompressionTypePGZIP), + string(defs.CompressionTypeZstandard), ) compressionCmd = tCmd.Command("compression", "Run compression test") @@ -66,10 +67,10 @@ func main() { switch cmd { case compressionCmd.FullCommand(): fmt.Print("Test started ") - compression(*mURL, compress.CompressionType(*compressType), compressLevel, *sampleSizeF, *sampleColF) + testCompression(*mURL, defs.CompressionType(*compressType), compressLevel, *sampleSizeF, *sampleColF) case storageCmd.FullCommand(): fmt.Print("Test started ") - storage(*mURL, compress.CompressionType(*compressType), compressLevel, *sampleSizeF, *sampleColF) + testStorage(*mURL, defs.CompressionType(*compressType), compressLevel, *sampleSizeF, *sampleColF) case versionCmd.FullCommand(): switch { case *versionCommit: @@ -82,7 +83,7 @@ func main() { } } -func compression(mURL string, compression compress.CompressionType, level *int, sizeGb float64, collection string) { +func testCompression(mURL string, compression defs.CompressionType, level *int, sizeGb float64, collection string) { ctx := context.Background() var cn *mongo.Client @@ -101,7 +102,7 @@ func compression(mURL string, compression compress.CompressionType, level *int, done := make(chan struct{}) go printw(done) - r, err := speedt.Run(cn, stg, compression, level, sizeGb, collection) + r, err := doTest(cn, stg, compression, level, sizeGb, collection) if err != nil { log.Fatalln("Error:", err) } @@ -111,7 +112,7 @@ func compression(mURL string, compression compress.CompressionType, level *int, fmt.Println(r) } -func storage(mURL string, compression compress.CompressionType, level *int, sizeGb float64, collection string) { +func testStorage(mURL string, compression defs.CompressionType, level *int, sizeGb float64, collection string) { ctx := context.Background() node, err := pbm.NewNode(ctx, mURL, 1) @@ -128,13 +129,14 @@ func storage(mURL string, compression compress.CompressionType, level *int, size } defer pbmClient.Conn.Disconnect(ctx) //nolint:errcheck - stg, err := pbmClient.GetStorage(nil) + l := pbmClient.Logger().NewEvent("", "", "", primitive.Timestamp{}) + stg, err := util.GetStorage(ctx, pbmClient.Conn, l) if err != nil { log.Fatalln("Error: get storage:", err) } done := make(chan struct{}) go printw(done) - r, err := speedt.Run(sess, stg, compression, level, sizeGb, collection) + r, err := doTest(sess, stg, compression, level, sizeGb, collection) if err != nil { log.Fatalln("Error:", err) } diff --git a/speedt/speedt.go b/cmd/pbm-speed-test/speedt.go similarity index 86% rename from speedt/speedt.go rename to cmd/pbm-speed-test/speedt.go index 91d067567..e191834db 100644 --- a/speedt/speedt.go +++ b/cmd/pbm-speed-test/speedt.go @@ -1,7 +1,6 @@ -package speedt +package main import ( - "context" "fmt" "io" "reflect" @@ -9,13 +8,15 @@ import ( "time" "unsafe" - "github.com/pkg/errors" + "github.com/percona/percona-backup-mongodb/internal/context" + "go.mongodb.org/mongo-driver/bson" "go.mongodb.org/mongo-driver/mongo" - "github.com/percona/percona-backup-mongodb/pbm/backup" - "github.com/percona/percona-backup-mongodb/pbm/compress" - "github.com/percona/percona-backup-mongodb/pbm/storage" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" + + "github.com/percona/percona-backup-mongodb/internal/storage" ) type Results struct { @@ -118,15 +119,15 @@ func (c *Collection) WriteTo(w io.Writer) (int64, error) { const fileName = "pbmSpeedTest" -func Run( +func doTest( nodeCN *mongo.Client, stg storage.Storage, - compression compress.CompressionType, + compression defs.CompressionType, level *int, sizeGb float64, collection string, ) (*Results, error) { - var src backup.Source + var src storage.Source var err error if collection != "" { src, err = NewCollection(Byte(sizeGb)*GB, nodeCN, collection) @@ -139,7 +140,7 @@ func Run( r := &Results{} ts := time.Now() - size, err := backup.Upload(context.Background(), src, stg, compression, level, fileName, -1) + size, err := storage.Upload(context.Background(), src, stg, compression, level, fileName, -1) r.Size = Byte(size) if err != nil { return nil, errors.Wrap(err, "upload") diff --git a/cli/backup.go b/cmd/pbm/backup.go similarity index 65% rename from cli/backup.go rename to cmd/pbm/backup.go index 92f60a57e..716816fee 100644 --- a/cli/backup.go +++ b/cmd/pbm/backup.go @@ -1,24 +1,30 @@ -package cli +package main import ( - "context" "fmt" "log" "sort" "strings" "time" - "github.com/pkg/errors" "go.mongodb.org/mongo-driver/bson/primitive" "go.mongodb.org/mongo-driver/mongo" "golang.org/x/mod/semver" "gopkg.in/yaml.v2" + "github.com/percona/percona-backup-mongodb/internal/archive" + "github.com/percona/percona-backup-mongodb/internal/config" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/query" + "github.com/percona/percona-backup-mongodb/internal/resync" + "github.com/percona/percona-backup-mongodb/internal/storage" + "github.com/percona/percona-backup-mongodb/internal/topo" + "github.com/percona/percona-backup-mongodb/internal/types" + "github.com/percona/percona-backup-mongodb/internal/util" + "github.com/percona/percona-backup-mongodb/internal/version" "github.com/percona/percona-backup-mongodb/pbm" - "github.com/percona/percona-backup-mongodb/pbm/archive" - "github.com/percona/percona-backup-mongodb/pbm/compress" - "github.com/percona/percona-backup-mongodb/pbm/storage" - "github.com/percona/percona-backup-mongodb/version" ) type backupOpts struct { @@ -72,34 +78,34 @@ type descBcp struct { coll bool } -func runBackup(cn *pbm.PBM, b *backupOpts, outf outFormat) (fmt.Stringer, error) { +func runBackup(ctx context.Context, cn *pbm.PBM, b *backupOpts, outf outFormat) (fmt.Stringer, error) { nss, err := parseCLINSOption(b.ns) if err != nil { - return nil, errors.WithMessage(err, "parse --ns option") + return nil, errors.Wrap(err, "parse --ns option") } if len(nss) > 1 { return nil, errors.New("parse --ns option: multiple namespaces are not supported") } - if len(nss) != 0 && b.typ != string(pbm.LogicalBackup) { + if len(nss) != 0 && b.typ != string(defs.LogicalBackup) { return nil, errors.New("--ns flag is only allowed for logical backup") } - if err := pbm.CheckTopoForBackup(cn, pbm.BackupType(b.typ)); err != nil { - return nil, errors.WithMessage(err, "backup pre-check") + if err := topo.CheckTopoForBackup(ctx, cn.Conn, defs.BackupType(b.typ)); err != nil { + return nil, errors.Wrap(err, "backup pre-check") } - if err := checkConcurrentOp(cn); err != nil { + if err := checkConcurrentOp(ctx, cn); err != nil { // PITR slicing can be run along with the backup start - agents will resolve it. var e concurentOpError if !errors.As(err, &e) { return nil, err } - if e.op.Type != pbm.CmdPITR { + if e.op.Type != defs.CmdPITR { return nil, err } } - cfg, err := cn.GetConfig() + cfg, err := config.GetConfig(ctx, cn.Conn) if err != nil { if errors.Is(err, mongo.ErrNoDocuments) { return nil, errors.New("no store set. Set remote store with ") @@ -109,7 +115,7 @@ func runBackup(cn *pbm.PBM, b *backupOpts, outf outFormat) (fmt.Stringer, error) compression := cfg.Backup.Compression if b.compression != "" { - compression = compress.CompressionType(b.compression) + compression = defs.CompressionType(b.compression) } level := cfg.Backup.CompressionLevel @@ -117,10 +123,10 @@ func runBackup(cn *pbm.PBM, b *backupOpts, outf outFormat) (fmt.Stringer, error) level = &b.compressionLevel[0] } - err = cn.SendCmd(pbm.Cmd{ - Cmd: pbm.CmdBackup, - Backup: &pbm.BackupCmd{ - Type: pbm.BackupType(b.typ), + err = sendCmd(ctx, cn.Conn, types.Cmd{ + Cmd: defs.CmdBackup, + Backup: &types.BackupCmd{ + Type: defs.BackupType(b.typ), IncrBase: b.base, Name: b.name, Namespaces: nss, @@ -137,19 +143,19 @@ func runBackup(cn *pbm.PBM, b *backupOpts, outf outFormat) (fmt.Stringer, error) } fmt.Printf("Starting backup '%s'", b.name) - ctx, cancel := context.WithTimeout(context.Background(), cfg.Backup.Timeouts.StartingStatus()) + startCtx, cancel := context.WithTimeout(ctx, cfg.Backup.Timeouts.StartingStatus()) defer cancel() - err = waitForBcpStatus(ctx, cn, b.name) + err = waitForBcpStatus(startCtx, cn, b.name) if err != nil { return nil, err } - if b.typ == string(pbm.ExternalBackup) { - s, err := waitBackup(context.Background(), cn, b.name, pbm.StatusCopyReady) + if b.typ == string(defs.ExternalBackup) { + s, err := waitBackup(ctx, cn, b.name, defs.StatusCopyReady) if err != nil { return nil, errors.Wrap(err, "waiting for the `copyReady` status") } - if s == nil || *s != pbm.StatusCopyReady { + if s == nil || *s != defs.StatusCopyReady { str := "nil" if s != nil { str = string(*s) @@ -157,7 +163,7 @@ func runBackup(cn *pbm.PBM, b *backupOpts, outf outFormat) (fmt.Stringer, error) return nil, errors.Errorf("unexpected backup status %v", str) } - bcp, err := cn.GetBackupMeta(b.name) + bcp, err := query.GetBackupMeta(ctx, cn.Conn, b.name) if err != nil { return nil, errors.Wrap(err, "get backup meta") } @@ -174,7 +180,7 @@ func runBackup(cn *pbm.PBM, b *backupOpts, outf outFormat) (fmt.Stringer, error) if b.wait { fmt.Printf("\nWaiting for '%s' backup...", b.name) - s, err := waitBackup(context.Background(), cn, b.name, pbm.StatusDone) + s, err := waitBackup(ctx, cn, b.name, defs.StatusDone) if s != nil { fmt.Printf(" %s\n", *s) } @@ -184,23 +190,23 @@ func runBackup(cn *pbm.PBM, b *backupOpts, outf outFormat) (fmt.Stringer, error) return backupOut{b.name, cfg.Storage.Path()}, nil } -func runFinishBcp(cn *pbm.PBM, bcp string) (fmt.Stringer, error) { - meta, err := cn.GetBackupMeta(bcp) +func runFinishBcp(ctx context.Context, cn *pbm.PBM, bcp string) (fmt.Stringer, error) { + meta, err := query.GetBackupMeta(ctx, cn.Conn, bcp) if err != nil { - if errors.Is(err, pbm.ErrNotFound) { + if errors.Is(err, errors.ErrNotFound) { return nil, errors.Errorf("backup %q not found", bcp) } return nil, err } - if meta.Status != pbm.StatusCopyReady { - return nil, errors.Errorf("expected %q status. got %q", pbm.StatusCopyReady, meta.Status) + if meta.Status != defs.StatusCopyReady { + return nil, errors.Errorf("expected %q status. got %q", defs.StatusCopyReady, meta.Status) } return outMsg{fmt.Sprintf("Command sent. Check `pbm describe-backup %s` for the result.", bcp)}, - cn.ChangeBackupState(bcp, pbm.StatusCopyDone, "") + query.ChangeBackupState(cn.Conn, bcp, defs.StatusCopyDone, "") } -func waitBackup(ctx context.Context, cn *pbm.PBM, name string, status pbm.Status) (*pbm.Status, error) { +func waitBackup(ctx context.Context, cn *pbm.PBM, name string, status defs.Status) (*defs.Status, error) { t := time.NewTicker(time.Second) defer t.Stop() @@ -209,15 +215,15 @@ func waitBackup(ctx context.Context, cn *pbm.PBM, name string, status pbm.Status case <-ctx.Done(): return nil, ctx.Err() case <-t.C: - bcp, err := cn.GetBackupMeta(name) + bcp, err := query.GetBackupMeta(ctx, cn.Conn, name) if err != nil { return nil, err } switch bcp.Status { - case status, pbm.StatusDone, pbm.StatusCancelled: + case status, defs.StatusDone, defs.StatusCancelled: return &bcp.Status, nil - case pbm.StatusError: + case defs.StatusError: return &bcp.Status, bcp.Error() } } @@ -230,23 +236,23 @@ func waitForBcpStatus(ctx context.Context, cn *pbm.PBM, bcpName string) error { tk := time.NewTicker(time.Second) defer tk.Stop() - var bmeta *pbm.BackupMeta + var bmeta *types.BackupMeta for { select { case <-tk.C: fmt.Print(".") var err error - bmeta, err = cn.GetBackupMeta(bcpName) - if errors.Is(err, pbm.ErrNotFound) { + bmeta, err = query.GetBackupMeta(ctx, cn.Conn, bcpName) + if errors.Is(err, errors.ErrNotFound) { continue } if err != nil { return errors.Wrap(err, "get backup metadata") } switch bmeta.Status { - case pbm.StatusRunning, pbm.StatusDumpDone, pbm.StatusDone, pbm.StatusCancelled: + case defs.StatusRunning, defs.StatusDumpDone, defs.StatusDone, defs.StatusCancelled: return nil - case pbm.StatusError: + case defs.StatusError: rs := "" for _, s := range bmeta.Replsets { rs += fmt.Sprintf("\n- Backup on replicaset \"%s\" in state: %v", s.Name, s.Status) @@ -277,37 +283,37 @@ func waitForBcpStatus(ctx context.Context, cn *pbm.PBM, bcpName string) error { } type bcpDesc struct { - Name string `json:"name" yaml:"name"` - OPID string `json:"opid" yaml:"opid"` - Type pbm.BackupType `json:"type" yaml:"type"` - LastWriteTS int64 `json:"last_write_ts" yaml:"-"` - LastTransitionTS int64 `json:"last_transition_ts" yaml:"-"` - LastWriteTime string `json:"last_write_time" yaml:"last_write_time"` - LastTransitionTime string `json:"last_transition_time" yaml:"last_transition_time"` - Namespaces []string `json:"namespaces,omitempty" yaml:"namespaces,omitempty"` - MongoVersion string `json:"mongodb_version" yaml:"mongodb_version"` - FCV string `json:"fcv" yaml:"fcv"` - PBMVersion string `json:"pbm_version" yaml:"pbm_version"` - Status pbm.Status `json:"status" yaml:"status"` - Size int64 `json:"size" yaml:"-"` - HSize string `json:"size_h" yaml:"size_h"` - Err *string `json:"error,omitempty" yaml:"error,omitempty"` - Replsets []bcpReplDesc `json:"replsets" yaml:"replsets"` + Name string `json:"name" yaml:"name"` + OPID string `json:"opid" yaml:"opid"` + Type defs.BackupType `json:"type" yaml:"type"` + LastWriteTS int64 `json:"last_write_ts" yaml:"-"` + LastTransitionTS int64 `json:"last_transition_ts" yaml:"-"` + LastWriteTime string `json:"last_write_time" yaml:"last_write_time"` + LastTransitionTime string `json:"last_transition_time" yaml:"last_transition_time"` + Namespaces []string `json:"namespaces,omitempty" yaml:"namespaces,omitempty"` + MongoVersion string `json:"mongodb_version" yaml:"mongodb_version"` + FCV string `json:"fcv" yaml:"fcv"` + PBMVersion string `json:"pbm_version" yaml:"pbm_version"` + Status defs.Status `json:"status" yaml:"status"` + Size int64 `json:"size" yaml:"-"` + HSize string `json:"size_h" yaml:"size_h"` + Err *string `json:"error,omitempty" yaml:"error,omitempty"` + Replsets []bcpReplDesc `json:"replsets" yaml:"replsets"` } type bcpReplDesc struct { - Name string `json:"name" yaml:"name"` - Status pbm.Status `json:"status" yaml:"status"` - Node string `json:"node" yaml:"node"` - Files []pbm.File `json:"files,omitempty" yaml:"-"` - LastWriteTS int64 `json:"last_write_ts" yaml:"-"` - LastTransitionTS int64 `json:"last_transition_ts" yaml:"-"` - LastWriteTime string `json:"last_write_time" yaml:"last_write_time"` - LastTransitionTime string `json:"last_transition_time" yaml:"last_transition_time"` - IsConfigSvr *bool `json:"configsvr,omitempty" yaml:"configsvr,omitempty"` - SecurityOpts *pbm.MongodOptsSec `json:"security,omitempty" yaml:"security,omitempty"` - Error *string `json:"error,omitempty" yaml:"error,omitempty"` - Collections []string `json:"collections,omitempty" yaml:"collections,omitempty"` + Name string `json:"name" yaml:"name"` + Status defs.Status `json:"status" yaml:"status"` + Node string `json:"node" yaml:"node"` + Files []types.File `json:"files,omitempty" yaml:"-"` + LastWriteTS int64 `json:"last_write_ts" yaml:"-"` + LastTransitionTS int64 `json:"last_transition_ts" yaml:"-"` + LastWriteTime string `json:"last_write_time" yaml:"last_write_time"` + LastTransitionTime string `json:"last_transition_time" yaml:"last_transition_time"` + IsConfigSvr *bool `json:"configsvr,omitempty" yaml:"configsvr,omitempty"` + SecurityOpts *topo.MongodOptsSec `json:"security,omitempty" yaml:"security,omitempty"` + Error *string `json:"error,omitempty" yaml:"error,omitempty"` + Collections []string `json:"collections,omitempty" yaml:"collections,omitempty"` } func (b *bcpDesc) String() string { @@ -335,22 +341,23 @@ func byteCountIEC(b int64) string { return fmt.Sprintf("%.1f %ciB", float64(b)/float64(div), "KMGTPE"[exp]) } -func describeBackup(cn *pbm.PBM, b *descBcp) (fmt.Stringer, error) { - bcp, err := cn.GetBackupMeta(b.name) +func describeBackup(ctx context.Context, cn *pbm.PBM, b *descBcp) (fmt.Stringer, error) { + bcp, err := query.GetBackupMeta(ctx, cn.Conn, b.name) if err != nil { return nil, err } var stg storage.Storage if b.coll { - stg, err = cn.GetStorage(nil) + l := cn.Logger().NewEvent("", "", "", primitive.Timestamp{}) + stg, err = util.GetStorage(ctx, cn.Conn, l) if err != nil { - return nil, errors.WithMessage(err, "get storage") + return nil, errors.Wrap(err, "get storage") } - _, err := stg.FileStat(pbm.StorInitFile) + _, err := stg.FileStat(defs.StorInitFile) if err != nil { - return nil, errors.WithMessage(err, "check storage access") + return nil, errors.Wrap(err, "check storage access") } } @@ -376,16 +383,17 @@ func describeBackup(cn *pbm.PBM, b *descBcp) (fmt.Stringer, error) { if bcp.Size == 0 { switch bcp.Status { - case pbm.StatusDone, pbm.StatusCancelled, pbm.StatusError: - stg, err := cn.GetStorage(cn.Logger().NewEvent("", "", "", primitive.Timestamp{})) + case defs.StatusDone, defs.StatusCancelled, defs.StatusError: + l := cn.Logger().NewEvent("", "", "", primitive.Timestamp{}) + stg, err := util.GetStorage(ctx, cn.Conn, l) if err != nil { - return nil, errors.WithMessage(err, "get storage") + return nil, errors.Wrap(err, "get storage") } rv.Size, err = getLegacySnapshotSize(bcp, stg) - if errors.Is(err, errMissedFile) && bcp.Status != pbm.StatusDone { + if errors.Is(err, errMissedFile) && bcp.Status != defs.StatusDone { // canceled/failed backup can be incomplete. ignore - return nil, errors.WithMessage(err, "get snapshot size") + return nil, errors.Wrap(err, "get snapshot size") } } } @@ -409,17 +417,17 @@ func describeBackup(cn *pbm.PBM, b *descBcp) (fmt.Stringer, error) { if r.MongodOpts != nil && r.MongodOpts.Security != nil { rv.Replsets[i].SecurityOpts = r.MongodOpts.Security } - if bcp.Type == pbm.ExternalBackup { + if bcp.Type == defs.ExternalBackup { rv.Replsets[i].Files = r.Files } - if !b.coll || bcp.Type != pbm.LogicalBackup { + if !b.coll || bcp.Type != defs.LogicalBackup { continue } - nss, err := pbm.ReadArchiveNamespaces(stg, r.DumpName) + nss, err := resync.ReadArchiveNamespaces(stg, r.DumpName) if err != nil { - return nil, errors.WithMessage(err, "read archive metadata") + return nil, errors.Wrap(err, "read archive metadata") } rv.Replsets[i].Collections = make([]string, len(nss)) @@ -443,10 +451,10 @@ func describeBackup(cn *pbm.PBM, b *descBcp) (fmt.Stringer, error) { // storage nor in DB (backup is ok, it just doesn't cluster), it is just "in-flight" changes // in given `bcps`. func bcpsMatchCluster( - bcps []pbm.BackupMeta, + bcps []types.BackupMeta, ver, fcv string, - shards []pbm.Shard, + shards []topo.Shard, confsrv string, rsMap map[string]string, ) { @@ -455,17 +463,17 @@ func bcpsMatchCluster( sh[s.RS] = s.RS == confsrv } - mapRS, mapRevRS := pbm.MakeRSMapFunc(rsMap), pbm.MakeReverseRSMapFunc(rsMap) + mapRS, mapRevRS := util.MakeRSMapFunc(rsMap), util.MakeReverseRSMapFunc(rsMap) for i := 0; i < len(bcps); i++ { bcpMatchCluster(&bcps[i], ver, fcv, sh, mapRS, mapRevRS) } } -func bcpMatchCluster(bcp *pbm.BackupMeta, ver, fcv string, shards map[string]bool, mapRS, mapRevRS pbm.RSMapFunc) { - if bcp.Status != pbm.StatusDone { +func bcpMatchCluster(bcp *types.BackupMeta, ver, fcv string, shards map[string]bool, mapRS, mapRevRS util.RSMapFunc) { + if bcp.Status != defs.StatusDone { return } - if !version.CompatibleWith(bcp.PBMVersion, pbm.BreakingChangesMap[bcp.Type]) { + if !version.CompatibleWith(bcp.PBMVersion, version.BreakingChangesMap[bcp.Type]) { bcp.SetRuntimeError(incompatiblePBMVersionError{bcp.PBMVersion}) return } diff --git a/cli/backup_test.go b/cmd/pbm/backup_test.go similarity index 66% rename from cli/backup_test.go rename to cmd/pbm/backup_test.go index 27f6b8ce7..50b8c13ac 100644 --- a/cli/backup_test.go +++ b/cmd/pbm/backup_test.go @@ -1,160 +1,164 @@ -package cli +package main import ( - "errors" "fmt" "sort" "strings" "testing" - "github.com/percona/percona-backup-mongodb/pbm" - "github.com/percona/percona-backup-mongodb/version" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/types" + "github.com/percona/percona-backup-mongodb/internal/util" + + "github.com/percona/percona-backup-mongodb/internal/topo" + "github.com/percona/percona-backup-mongodb/internal/version" ) func TestBcpMatchCluster(t *testing.T) { type bcase struct { - meta pbm.BackupMeta - expect pbm.Status + meta types.BackupMeta + expect defs.Status } cases := []struct { confsrv string - shards []pbm.Shard + shards []topo.Shard bcps []bcase }{ { confsrv: "config", - shards: []pbm.Shard{ + shards: []topo.Shard{ {RS: "config"}, {RS: "rs1"}, {RS: "rs2"}, }, bcps: []bcase{ { - pbm.BackupMeta{ + types.BackupMeta{ Name: "bcp1", - Replsets: []pbm.BackupReplset{ + Replsets: []types.BackupReplset{ {Name: "rs3"}, }, }, - pbm.StatusError, + defs.StatusError, }, { - pbm.BackupMeta{ + types.BackupMeta{ Name: "bcp2", - Replsets: []pbm.BackupReplset{ + Replsets: []types.BackupReplset{ {Name: "config"}, }, }, - pbm.StatusDone, + defs.StatusDone, }, { - pbm.BackupMeta{ + types.BackupMeta{ Name: "bcp2", - Replsets: []pbm.BackupReplset{ + Replsets: []types.BackupReplset{ {Name: "rs1"}, {Name: "rs2"}, }, }, - pbm.StatusError, + defs.StatusError, }, { - pbm.BackupMeta{ + types.BackupMeta{ Name: "bcp2", - Replsets: []pbm.BackupReplset{ + Replsets: []types.BackupReplset{ {Name: "rs1"}, }, }, - pbm.StatusError, + defs.StatusError, }, { - pbm.BackupMeta{ + types.BackupMeta{ Name: "bcp3", - Replsets: []pbm.BackupReplset{ + Replsets: []types.BackupReplset{ {Name: "config"}, {Name: "rs1"}, {Name: "rs3"}, }, }, - pbm.StatusError, + defs.StatusError, }, { - pbm.BackupMeta{ + types.BackupMeta{ Name: "bcp4", - Replsets: []pbm.BackupReplset{ + Replsets: []types.BackupReplset{ {Name: "config"}, {Name: "rs1"}, }, }, - pbm.StatusDone, + defs.StatusDone, }, { - pbm.BackupMeta{ + types.BackupMeta{ Name: "bcp5", - Replsets: []pbm.BackupReplset{ + Replsets: []types.BackupReplset{ {Name: "config"}, {Name: "rs1"}, {Name: "rs2"}, }, }, - pbm.StatusDone, + defs.StatusDone, }, }, }, { confsrv: "rs1", - shards: []pbm.Shard{ + shards: []topo.Shard{ {RS: "rs1"}, }, bcps: []bcase{ { - pbm.BackupMeta{ + types.BackupMeta{ Name: "bcp1", - Replsets: []pbm.BackupReplset{ + Replsets: []types.BackupReplset{ {Name: "rs3"}, }, }, - pbm.StatusError, + defs.StatusError, }, { - pbm.BackupMeta{ + types.BackupMeta{ Name: "bcp2", - Replsets: []pbm.BackupReplset{ + Replsets: []types.BackupReplset{ {Name: "rs1"}, }, }, - pbm.StatusDone, + defs.StatusDone, }, { - pbm.BackupMeta{ + types.BackupMeta{ Name: "bcp3", - Replsets: []pbm.BackupReplset{ + Replsets: []types.BackupReplset{ {Name: "config"}, {Name: "rs1"}, {Name: "rs3"}, }, }, - pbm.StatusError, + defs.StatusError, }, { - pbm.BackupMeta{ + types.BackupMeta{ Name: "bcp4", - Replsets: []pbm.BackupReplset{ + Replsets: []types.BackupReplset{ {Name: "config"}, {Name: "rs1"}, }, }, - pbm.StatusError, + defs.StatusError, }, { - pbm.BackupMeta{ + types.BackupMeta{ Name: "bcp5", - Replsets: []pbm.BackupReplset{ + Replsets: []types.BackupReplset{ {Name: "config"}, {Name: "rs1"}, {Name: "rs2"}, }, }, - pbm.StatusError, + defs.StatusError, }, }, }, @@ -162,10 +166,10 @@ func TestBcpMatchCluster(t *testing.T) { for i, c := range cases { t.Run(fmt.Sprint(i), func(t *testing.T) { - m := []pbm.BackupMeta{} + m := []types.BackupMeta{} for _, b := range c.bcps { b.meta.PBMVersion = string(version.Current().Version) - b.meta.Status = pbm.StatusDone + b.meta.Status = defs.StatusDone m = append(m, b.meta) } bcpsMatchCluster(m, "", "", c.shards, c.confsrv, nil) @@ -186,13 +190,13 @@ func TestBcpMatchRemappedCluster(t *testing.T) { cases := []struct { topology map[string]bool - bcp pbm.BackupMeta + bcp types.BackupMeta rsMap map[string]string expected error }{ { - bcp: pbm.BackupMeta{ - Replsets: []pbm.BackupReplset{ + bcp: types.BackupMeta{ + Replsets: []types.BackupReplset{ {Name: "rs0"}, {Name: "rs1"}, }, @@ -201,16 +205,16 @@ func TestBcpMatchRemappedCluster(t *testing.T) { expected: nil, }, { - bcp: pbm.BackupMeta{ - Replsets: []pbm.BackupReplset{ + bcp: types.BackupMeta{ + Replsets: []types.BackupReplset{ {Name: "rs0"}, }, }, expected: nil, }, { - bcp: pbm.BackupMeta{ - Replsets: []pbm.BackupReplset{ + bcp: types.BackupMeta{ + Replsets: []types.BackupReplset{ {Name: "rs0"}, {Name: "rs1"}, }, @@ -222,8 +226,8 @@ func TestBcpMatchRemappedCluster(t *testing.T) { expected: nil, }, { - bcp: pbm.BackupMeta{ - Replsets: []pbm.BackupReplset{ + bcp: types.BackupMeta{ + Replsets: []types.BackupReplset{ {Name: "rs0"}, {Name: "rs1"}, {Name: "rs2"}, @@ -235,8 +239,8 @@ func TestBcpMatchRemappedCluster(t *testing.T) { }, }, { - bcp: pbm.BackupMeta{ - Replsets: []pbm.BackupReplset{ + bcp: types.BackupMeta{ + Replsets: []types.BackupReplset{ {Name: "rs0"}, {Name: "rs1"}, }, @@ -258,8 +262,8 @@ func TestBcpMatchRemappedCluster(t *testing.T) { "rs4": false, "rs6": false, }, - bcp: pbm.BackupMeta{ - Replsets: []pbm.BackupReplset{ + bcp: types.BackupMeta{ + Replsets: []types.BackupReplset{ {Name: "cfg"}, {Name: "rs0"}, {Name: "rs1"}, @@ -280,15 +284,15 @@ func TestBcpMatchRemappedCluster(t *testing.T) { }, }, { - bcp: pbm.BackupMeta{}, + bcp: types.BackupMeta{}, expected: missedReplsetsError{configsrv: true}, }, } - types := []pbm.BackupType{ - pbm.LogicalBackup, - pbm.PhysicalBackup, - pbm.IncrementalBackup, + types := []defs.BackupType{ + defs.LogicalBackup, + defs.PhysicalBackup, + defs.IncrementalBackup, } for _, tt := range types { t.Logf("backup type: %s", tt) @@ -300,8 +304,8 @@ func TestBcpMatchRemappedCluster(t *testing.T) { } c.bcp.Type = tt - c.bcp.Status = pbm.StatusDone - mapRS, mapRevRS := pbm.MakeRSMapFunc(c.rsMap), pbm.MakeReverseRSMapFunc(c.rsMap) + c.bcp.Status = defs.StatusDone + mapRS, mapRevRS := util.MakeRSMapFunc(c.rsMap), util.MakeReverseRSMapFunc(c.rsMap) bcpMatchCluster(&c.bcp, "", "", topology, mapRS, mapRevRS) if msg := checkBcpMatchClusterError(c.bcp.Error(), c.expected); msg != "" { @@ -350,23 +354,23 @@ func checkBcpMatchClusterError(err, target error) string { } func BenchmarkBcpMatchCluster3x10(b *testing.B) { - shards := []pbm.Shard{ + shards := []topo.Shard{ {RS: "config"}, {RS: "rs1"}, {RS: "rs2"}, } - bcps := []pbm.BackupMeta{} + bcps := []types.BackupMeta{} for i := 0; i < 10; i++ { - bcps = append(bcps, pbm.BackupMeta{ + bcps = append(bcps, types.BackupMeta{ Name: "bcp", - Replsets: []pbm.BackupReplset{ + Replsets: []types.BackupReplset{ {Name: "config"}, {Name: "rs1"}, {Name: "rs2"}, }, - Status: pbm.StatusDone, + Status: defs.StatusDone, }) } b.ResetTimer() @@ -376,23 +380,23 @@ func BenchmarkBcpMatchCluster3x10(b *testing.B) { } func BenchmarkBcpMatchCluster3x100(b *testing.B) { - shards := []pbm.Shard{ + shards := []topo.Shard{ {RS: "config"}, {RS: "rs1"}, {RS: "rs2"}, } - bcps := []pbm.BackupMeta{} + bcps := []types.BackupMeta{} for i := 0; i < 100; i++ { - bcps = append(bcps, pbm.BackupMeta{ + bcps = append(bcps, types.BackupMeta{ Name: "bcp", - Replsets: []pbm.BackupReplset{ + Replsets: []types.BackupReplset{ {Name: "config"}, {Name: "rs1"}, {Name: "rs2"}, }, - Status: pbm.StatusDone, + Status: defs.StatusDone, }) } b.ResetTimer() @@ -402,7 +406,7 @@ func BenchmarkBcpMatchCluster3x100(b *testing.B) { } func BenchmarkBcpMatchCluster17x100(b *testing.B) { - shards := []pbm.Shard{ + shards := []topo.Shard{ {RS: "config"}, {RS: "rs1"}, {RS: "rs12"}, @@ -422,12 +426,12 @@ func BenchmarkBcpMatchCluster17x100(b *testing.B) { {RS: "rs333333333332"}, } - bcps := []pbm.BackupMeta{} + bcps := []types.BackupMeta{} for i := 0; i < 100; i++ { - bcps = append(bcps, pbm.BackupMeta{ + bcps = append(bcps, types.BackupMeta{ Name: "bcp3", - Replsets: []pbm.BackupReplset{ + Replsets: []types.BackupReplset{ {Name: "config"}, {Name: "rs1"}, {Name: "rs12"}, @@ -446,7 +450,7 @@ func BenchmarkBcpMatchCluster17x100(b *testing.B) { {Name: "rs333333332"}, {Name: "rs333333333332"}, }, - Status: pbm.StatusDone, + Status: defs.StatusDone, }) } b.ResetTimer() @@ -456,23 +460,23 @@ func BenchmarkBcpMatchCluster17x100(b *testing.B) { } func BenchmarkBcpMatchCluster3x1000(b *testing.B) { - shards := []pbm.Shard{ + shards := []topo.Shard{ {RS: "config"}, {RS: "rs1"}, {RS: "rs2"}, } - bcps := []pbm.BackupMeta{} + bcps := []types.BackupMeta{} for i := 0; i < 1000; i++ { - bcps = append(bcps, pbm.BackupMeta{ + bcps = append(bcps, types.BackupMeta{ Name: "bcp3", - Replsets: []pbm.BackupReplset{ + Replsets: []types.BackupReplset{ {Name: "config"}, {Name: "rs1"}, {Name: "rs2"}, }, - Status: pbm.StatusDone, + Status: defs.StatusDone, }) } b.ResetTimer() @@ -482,20 +486,20 @@ func BenchmarkBcpMatchCluster3x1000(b *testing.B) { } func BenchmarkBcpMatchCluster1000x1000(b *testing.B) { - shards := []pbm.Shard{{RS: "config"}} - rss := []pbm.BackupReplset{{Name: "config"}} + shards := []topo.Shard{{RS: "config"}} + rss := []types.BackupReplset{{Name: "config"}} for i := 0; i < 1000; i++ { - shards = append(shards, pbm.Shard{RS: fmt.Sprint(i)}) - rss = append(rss, pbm.BackupReplset{Name: fmt.Sprint(i)}) + shards = append(shards, topo.Shard{RS: fmt.Sprint(i)}) + rss = append(rss, types.BackupReplset{Name: fmt.Sprint(i)}) } - bcps := []pbm.BackupMeta{} + bcps := []types.BackupMeta{} for i := 0; i < 1000; i++ { - bcps = append(bcps, pbm.BackupMeta{ + bcps = append(bcps, types.BackupMeta{ Replsets: rss, - Status: pbm.StatusDone, + Status: defs.StatusDone, }) } b.ResetTimer() @@ -505,38 +509,38 @@ func BenchmarkBcpMatchCluster1000x1000(b *testing.B) { } func BenchmarkBcpMatchCluster3x10Err(b *testing.B) { - shards := []pbm.Shard{ + shards := []topo.Shard{ {RS: "config"}, {RS: "rs2"}, } - bcps := []pbm.BackupMeta{ + bcps := []types.BackupMeta{ { Name: "bcp1", - Replsets: []pbm.BackupReplset{ + Replsets: []types.BackupReplset{ {Name: "config"}, {Name: "rs1"}, {Name: "rs3"}, }, - Status: pbm.StatusDone, + Status: defs.StatusDone, }, { Name: "bcp2", - Replsets: []pbm.BackupReplset{ + Replsets: []types.BackupReplset{ {Name: "config"}, {Name: "rs1"}, }, - Status: pbm.StatusDone, + Status: defs.StatusDone, }, } for i := 0; i < 8; i++ { - bcps = append(bcps, pbm.BackupMeta{ - Replsets: []pbm.BackupReplset{ + bcps = append(bcps, types.BackupMeta{ + Replsets: []types.BackupReplset{ {Name: "config"}, {Name: "rs1"}, {Name: "rs3"}, }, - Status: pbm.StatusDone, + Status: defs.StatusDone, }) } for i := 0; i < b.N; i++ { @@ -545,21 +549,21 @@ func BenchmarkBcpMatchCluster3x10Err(b *testing.B) { } func BenchmarkBcpMatchCluster1000x1000Err(b *testing.B) { - shards := []pbm.Shard{{RS: "config"}} - rss := []pbm.BackupReplset{{Name: "config"}} + shards := []topo.Shard{{RS: "config"}} + rss := []types.BackupReplset{{Name: "config"}} for i := 0; i < 1000; i++ { - shards = append(shards, pbm.Shard{RS: fmt.Sprint(i)}) - rss = append(rss, pbm.BackupReplset{Name: fmt.Sprint(i)}) + shards = append(shards, topo.Shard{RS: fmt.Sprint(i)}) + rss = append(rss, types.BackupReplset{Name: fmt.Sprint(i)}) } - rss = append(rss, pbm.BackupReplset{Name: "newrs"}) - bcps := []pbm.BackupMeta{} + rss = append(rss, types.BackupReplset{Name: "newrs"}) + bcps := []types.BackupMeta{} for i := 0; i < 1000; i++ { - bcps = append(bcps, pbm.BackupMeta{ + bcps = append(bcps, types.BackupMeta{ Replsets: rss, - Status: pbm.StatusDone, + Status: defs.StatusDone, }) } b.ResetTimer() diff --git a/cmd/pbm/common.go b/cmd/pbm/common.go new file mode 100644 index 000000000..436f3f3f6 --- /dev/null +++ b/cmd/pbm/common.go @@ -0,0 +1,15 @@ +package main + +import ( + "time" + + "github.com/percona/percona-backup-mongodb/internal/connect" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/types" +) + +func sendCmd(ctx context.Context, m connect.Client, cmd types.Cmd) error { + cmd.TS = time.Now().UTC().Unix() + _, err := m.CmdStreamCollection().InsertOne(ctx, cmd) + return err +} diff --git a/cli/config.go b/cmd/pbm/config.go similarity index 64% rename from cli/config.go rename to cmd/pbm/config.go index c66787e3d..372e8c94e 100644 --- a/cli/config.go +++ b/cmd/pbm/config.go @@ -1,4 +1,4 @@ -package cli +package main import ( "fmt" @@ -7,10 +7,14 @@ import ( "reflect" "strings" - "github.com/pkg/errors" "go.mongodb.org/mongo-driver/mongo" "gopkg.in/yaml.v2" + "github.com/percona/percona-backup-mongodb/internal/config" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/types" "github.com/percona/percona-backup-mongodb/pbm" ) @@ -42,13 +46,13 @@ func (c confVals) String() string { return s } -func runConfig(cn *pbm.PBM, c *configOpts) (fmt.Stringer, error) { +func runConfig(ctx context.Context, cn *pbm.PBM, c *configOpts) (fmt.Stringer, error) { switch { case len(c.set) > 0: var o confVals rsnc := false for k, v := range c.set { - err := cn.SetConfigVar(k, v) + err := config.SetConfigVar(ctx, cn.Conn, k, v) if err != nil { return nil, errors.Wrapf(err, "set %s", k) } @@ -60,20 +64,20 @@ func runConfig(cn *pbm.PBM, c *configOpts) (fmt.Stringer, error) { } } if rsnc { - if err := rsync(cn); err != nil { - return nil, errors.WithMessage(err, "resync") + if err := rsync(ctx, cn); err != nil { + return nil, errors.Wrap(err, "resync") } } return o, nil case len(c.key) > 0: - k, err := cn.GetConfigVar(c.key) + k, err := config.GetConfigVar(ctx, cn.Conn, c.key) if err != nil { return nil, errors.Wrap(err, "unable to get config key") } return confKV{c.key, fmt.Sprint(k)}, nil case c.rsync: - if err := rsync(cn); err != nil { - return nil, errors.WithMessage(err, "resync") + if err := rsync(ctx, cn); err != nil { + return nil, errors.Wrap(err, "resync") } return outMsg{"Storage resync started"}, nil case len(c.file) > 0: @@ -89,18 +93,18 @@ func runConfig(cn *pbm.PBM, c *configOpts) (fmt.Stringer, error) { return nil, errors.Wrap(err, "unable to read config file") } - var cfg pbm.Config + var cfg config.Config err = yaml.UnmarshalStrict(buf, &cfg) if err != nil { return nil, errors.Wrap(err, "unable to unmarshal config file") } - cCfg, err := cn.GetConfig() + cCfg, err := config.GetConfig(ctx, cn.Conn) if err != nil && !errors.Is(err, mongo.ErrNoDocuments) { return nil, errors.Wrap(err, "unable to get current config") } - err = cn.SetConfigByte(buf) + err = config.SetConfigByte(ctx, cn.Conn, buf) if err != nil { return nil, errors.Wrap(err, "unable to set config") } @@ -109,19 +113,19 @@ func runConfig(cn *pbm.PBM, c *configOpts) (fmt.Stringer, error) { cCfg.Storage.S3.Provider = cfg.Storage.S3.Provider // resync storage only if Storage options have changed if !reflect.DeepEqual(cfg.Storage, cCfg.Storage) { - if err := rsync(cn); err != nil { - return nil, errors.WithMessage(err, "resync") + if err := rsync(ctx, cn); err != nil { + return nil, errors.Wrap(err, "resync") } } - return cn.GetConfig() + return config.GetConfig(ctx, cn.Conn) default: - return cn.GetConfig() + return config.GetConfig(ctx, cn.Conn) } } -func rsync(cn *pbm.PBM) error { - return cn.SendCmd(pbm.Cmd{ - Cmd: pbm.CmdResync, +func rsync(ctx context.Context, cn *pbm.PBM) error { + return sendCmd(ctx, cn.Conn, types.Cmd{ + Cmd: defs.CmdResync, }) } diff --git a/cli/delete.go b/cmd/pbm/delete.go similarity index 73% rename from cli/delete.go rename to cmd/pbm/delete.go index 8d42e1a50..9fd4576d5 100644 --- a/cli/delete.go +++ b/cmd/pbm/delete.go @@ -1,4 +1,4 @@ -package cli +package main import ( "bufio" @@ -8,11 +8,16 @@ import ( "strings" "time" - "github.com/pkg/errors" "go.mongodb.org/mongo-driver/bson/primitive" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/lock" + "github.com/percona/percona-backup-mongodb/internal/types" + "github.com/percona/percona-backup-mongodb/internal/util" "github.com/percona/percona-backup-mongodb/pbm" - "github.com/percona/percona-backup-mongodb/pbm/sel" + "github.com/percona/percona-backup-mongodb/pbm/oplog" ) type deleteBcpOpts struct { @@ -21,7 +26,7 @@ type deleteBcpOpts struct { force bool } -func deleteBackup(pbmClient *pbm.PBM, d *deleteBcpOpts, outf outFormat) (fmt.Stringer, error) { +func deleteBackup(ctx context.Context, pbmClient *pbm.PBM, d *deleteBcpOpts, outf outFormat) (fmt.Stringer, error) { if !d.force { if err := askConfirmation("Are you sure you want to delete backup(s)?"); err != nil { if errors.Is(err, errUserCanceled) { @@ -31,9 +36,9 @@ func deleteBackup(pbmClient *pbm.PBM, d *deleteBcpOpts, outf outFormat) (fmt.Str } } - cmd := pbm.Cmd{ - Cmd: pbm.CmdDeleteBackup, - Delete: &pbm.DeleteBackupCmd{}, + cmd := types.Cmd{ + Cmd: defs.CmdDeleteBackup, + Delete: &types.DeleteBackupCmd{}, } if len(d.olderThan) > 0 { t, err := parseDateT(d.olderThan) @@ -48,7 +53,7 @@ func deleteBackup(pbmClient *pbm.PBM, d *deleteBcpOpts, outf outFormat) (fmt.Str cmd.Delete.Backup = d.name } tsop := time.Now().UTC().Unix() - err := pbmClient.SendCmd(cmd) + err := sendCmd(ctx, pbmClient.Conn, cmd) if err != nil { return nil, errors.Wrap(err, "schedule delete") } @@ -57,16 +62,15 @@ func deleteBackup(pbmClient *pbm.PBM, d *deleteBcpOpts, outf outFormat) (fmt.Str } fmt.Print("Waiting for delete to be done ") - err = waitOp(pbmClient, - &pbm.LockHeader{ - Type: pbm.CmdDeleteBackup, - }, + err = waitOp(ctx, + pbmClient, + &lock.LockHeader{Type: defs.CmdDeleteBackup}, time.Second*60) if err != nil && !errors.Is(err, errTout) { return nil, err } - errl, err := lastLogErr(pbmClient, pbm.CmdDeleteBackup, tsop) + errl, err := lastLogErr(ctx, pbmClient, defs.CmdDeleteBackup, tsop) if err != nil { return nil, errors.Wrap(err, "read agents log") } @@ -84,7 +88,7 @@ func deleteBackup(pbmClient *pbm.PBM, d *deleteBcpOpts, outf outFormat) (fmt.Str fmt.Println("[done]") } - return runList(pbmClient, &listOpts{}) + return runList(ctx, pbmClient, &listOpts{}) } type deletePitrOpts struct { @@ -93,7 +97,7 @@ type deletePitrOpts struct { all bool } -func deletePITR(pbmClient *pbm.PBM, d *deletePitrOpts, outf outFormat) (fmt.Stringer, error) { +func deletePITR(ctx context.Context, pbmClient *pbm.PBM, d *deletePitrOpts, outf outFormat) (fmt.Stringer, error) { if !d.all && len(d.olderThan) == 0 { return nil, errors.New("either --older-than or --all should be set") } @@ -111,9 +115,9 @@ func deletePITR(pbmClient *pbm.PBM, d *deletePitrOpts, outf outFormat) (fmt.Stri } } - cmd := pbm.Cmd{ - Cmd: pbm.CmdDeletePITR, - DeletePITR: &pbm.DeletePITRCmd{}, + cmd := types.Cmd{ + Cmd: defs.CmdDeletePITR, + DeletePITR: &types.DeletePITRCmd{}, } if !d.all && len(d.olderThan) > 0 { t, err := parseDateT(d.olderThan) @@ -123,7 +127,7 @@ func deletePITR(pbmClient *pbm.PBM, d *deletePitrOpts, outf outFormat) (fmt.Stri cmd.DeletePITR.OlderThan = t.UTC().Unix() } tsop := time.Now().UTC().Unix() - err := pbmClient.SendCmd(cmd) + err := sendCmd(ctx, pbmClient.Conn, cmd) if err != nil { return nil, errors.Wrap(err, "schedule pitr delete") } @@ -132,16 +136,15 @@ func deletePITR(pbmClient *pbm.PBM, d *deletePitrOpts, outf outFormat) (fmt.Stri } fmt.Print("Waiting for delete to be done ") - err = waitOp(pbmClient, - &pbm.LockHeader{ - Type: pbm.CmdDeletePITR, - }, + err = waitOp(ctx, + pbmClient, + &lock.LockHeader{Type: defs.CmdDeletePITR}, time.Second*60) if err != nil && !errors.Is(err, errTout) { return nil, err } - errl, err := lastLogErr(pbmClient, pbm.CmdDeletePITR, tsop) + errl, err := lastLogErr(ctx, pbmClient, defs.CmdDeletePITR, tsop) if err != nil { return nil, errors.Wrap(err, "read agents log") } @@ -159,7 +162,7 @@ func deletePITR(pbmClient *pbm.PBM, d *deletePitrOpts, outf outFormat) (fmt.Stri fmt.Println("[done]") } - return runList(pbmClient, &listOpts{}) + return runList(ctx, pbmClient, &listOpts{}) } type cleanupOptions struct { @@ -169,14 +172,14 @@ type cleanupOptions struct { dryRun bool } -func retentionCleanup(pbmClient *pbm.PBM, d *cleanupOptions) (fmt.Stringer, error) { +func retentionCleanup(ctx context.Context, pbmClient *pbm.PBM, d *cleanupOptions) (fmt.Stringer, error) { ts, err := parseOlderThan(d.olderThan) if err != nil { return nil, errors.Wrap(err, "parse --older-than") } - info, err := pbm.MakeCleanupInfo(pbmClient.Context(), pbmClient.Conn, ts) + info, err := pbm.MakeCleanupInfo(ctx, pbmClient.Conn, ts) if err != nil { - return nil, errors.WithMessage(err, "make cleanup report") + return nil, errors.Wrap(err, "make cleanup report") } if len(info.Backups) == 0 && len(info.Chunks) == 0 { return outMsg{"nothing to delete"}, nil @@ -198,19 +201,22 @@ func retentionCleanup(pbmClient *pbm.PBM, d *cleanupOptions) (fmt.Stringer, erro } tsop := time.Now().Unix() - err = pbmClient.SendCmd(pbm.Cmd{ - Cmd: pbm.CmdCleanup, - Cleanup: &pbm.CleanupCmd{OlderThan: ts}, + err = sendCmd(ctx, pbmClient.Conn, types.Cmd{ + Cmd: defs.CmdCleanup, + Cleanup: &types.CleanupCmd{OlderThan: ts}, }) if err != nil { - return nil, errors.WithMessage(err, "send command") + return nil, errors.Wrap(err, "send command") } if !d.wait { return outMsg{"Processing by agents. Please check status later"}, nil } fmt.Print("Waiting") - err = waitOp(pbmClient, &pbm.LockHeader{Type: pbm.CmdCleanup}, 10*time.Minute) + err = waitOp(ctx, + pbmClient, + &lock.LockHeader{Type: defs.CmdCleanup}, + 10*time.Minute) fmt.Println() if err != nil { if errors.Is(err, errTout) { @@ -219,9 +225,9 @@ func retentionCleanup(pbmClient *pbm.PBM, d *cleanupOptions) (fmt.Stringer, erro return nil, err } - errl, err := lastLogErr(pbmClient, pbm.CmdCleanup, tsop) + errl, err := lastLogErr(ctx, pbmClient, defs.CmdCleanup, tsop) if err != nil { - return nil, errors.WithMessage(err, "read agents log") + return nil, errors.Wrap(err, "read agents log") } if errl != "" { return nil, errors.New(errl) @@ -271,15 +277,15 @@ func parseDuration(s string) (time.Duration, error) { return time.Duration(d * 24 * int64(time.Hour)), nil } -func printCleanupInfoTo(w io.Writer, backups []pbm.BackupMeta, chunks []pbm.OplogChunk) { +func printCleanupInfoTo(w io.Writer, backups []types.BackupMeta, chunks []oplog.OplogChunk) { if len(backups) != 0 { fmt.Fprintln(w, "Snapshots:") for i := range backups { bcp := &backups[i] t := string(bcp.Type) - if sel.IsSelective(bcp.Namespaces) { + if util.IsSelective(bcp.Namespaces) { t += ", selective" - } else if bcp.Type == pbm.IncrementalBackup && bcp.SrcBackup == "" { + } else if bcp.Type == defs.IncrementalBackup && bcp.SrcBackup == "" { t += ", base" } fmt.Fprintf(w, " - %s <%s> [restore_time: %s]\n", @@ -304,11 +310,11 @@ func printCleanupInfoTo(w io.Writer, backups []pbm.BackupMeta, chunks []pbm.Oplo } lastWrite := &rs[len(rs)-1].End - if primitive.CompareTimestamp(*lastWrite, c.StartTS) == -1 { + if lastWrite.Compare(c.StartTS) == -1 { oplogRanges[c.RS] = append(rs, oplogRange{c.StartTS, c.EndTS}) continue } - if primitive.CompareTimestamp(*lastWrite, c.EndTS) == -1 { + if lastWrite.Compare(c.EndTS) == -1 { *lastWrite = c.EndTS } } @@ -333,7 +339,7 @@ var errUserCanceled = errors.New("canceled") func askConfirmation(question string) error { fi, err := os.Stdin.Stat() if err != nil { - return errors.WithMessage(err, "stat stdin") + return errors.Wrap(err, "stat stdin") } if (fi.Mode() & os.ModeCharDevice) == 0 { return errors.New("no tty") @@ -344,7 +350,7 @@ func askConfirmation(question string) error { scanner := bufio.NewScanner(os.Stdin) scanner.Scan() if err := scanner.Err(); err != nil { - return errors.WithMessage(err, "read stdin") + return errors.Wrap(err, "read stdin") } switch strings.TrimSpace(scanner.Text()) { diff --git a/cli/list.go b/cmd/pbm/list.go similarity index 68% rename from cli/list.go rename to cmd/pbm/list.go index 37b2940bc..80caf6e69 100644 --- a/cli/list.go +++ b/cmd/pbm/list.go @@ -1,4 +1,4 @@ -package cli +package main import ( "encoding/json" @@ -6,11 +6,19 @@ import ( "sort" "time" - "github.com/pkg/errors" "go.mongodb.org/mongo-driver/bson/primitive" + "github.com/percona/percona-backup-mongodb/internal/config" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/lock" + "github.com/percona/percona-backup-mongodb/internal/query" + "github.com/percona/percona-backup-mongodb/internal/topo" + "github.com/percona/percona-backup-mongodb/internal/util" + "github.com/percona/percona-backup-mongodb/internal/version" "github.com/percona/percona-backup-mongodb/pbm" - "github.com/percona/percona-backup-mongodb/pbm/sel" + "github.com/percona/percona-backup-mongodb/pbm/oplog" ) type listOpts struct { @@ -23,7 +31,7 @@ type listOpts struct { type restoreStatus struct { StartTS int64 `json:"start"` - Status pbm.Status `json:"status"` + Status defs.Status `json:"status"` Type restoreListType `json:"type"` Snapshot string `json:"snapshot,omitempty"` StartPointInTime int64 `json:"start-point-in-time,omitempty"` @@ -53,7 +61,7 @@ func (r restoreListOut) String() string { switch v.Type { case restoreSnapshot: t := string(v.Type) - if sel.IsSelective(v.Namespaces) { + if util.IsSelective(v.Namespaces) { t += ", selective" } name = fmt.Sprintf("%s [backup: %s]", v.Name, t) @@ -63,16 +71,16 @@ func (r restoreListOut) String() string { time.Unix(v.PointInTime, 0).UTC().Format(time.RFC3339)) default: n := time.Unix(v.PointInTime, 0).UTC().Format(time.RFC3339) - if sel.IsSelective(v.Namespaces) { + if util.IsSelective(v.Namespaces) { n = ", selective" } name = fmt.Sprintf("PITR: %s [restore time: %s]", v.Name, n) } switch v.Status { - case pbm.StatusDone, pbm.StatusPartlyDone: + case defs.StatusDone, defs.StatusPartlyDone: rprint = fmt.Sprintf("%s\t%s", name, v.Status) - case pbm.StatusError: + case defs.StatusError: rprint = fmt.Sprintf("%s\tFailed with \"%s\"", name, v.Error) default: rprint = fmt.Sprintf("%s\tIn progress [%s] (Launched at %s)", @@ -87,26 +95,26 @@ func (r restoreListOut) MarshalJSON() ([]byte, error) { return json.Marshal(r.list) } -func runList(cn *pbm.PBM, l *listOpts) (fmt.Stringer, error) { +func runList(ctx context.Context, cn *pbm.PBM, l *listOpts) (fmt.Stringer, error) { rsMap, err := parseRSNamesMapping(l.rsMap) if err != nil { - return nil, errors.WithMessage(err, "cannot parse replset mapping") + return nil, errors.Wrap(err, "cannot parse replset mapping") } if l.restore { - return restoreList(cn, int64(l.size)) + return restoreList(ctx, cn, int64(l.size)) } // show message and skip when resync is running - lk, err := findLock(cn, cn.GetLocks) - if err == nil && lk != nil && lk.Type == pbm.CmdResync { + lk, err := findLock(ctx, cn, lock.GetLocks) + if err == nil && lk != nil && lk.Type == defs.CmdResync { return outMsg{"Storage resync is running. Backups list will be available after sync finishes."}, nil } - return backupList(cn, l.size, l.full, l.unbacked, rsMap) + return backupList(ctx, cn, l.size, l.full, l.unbacked, rsMap) } -func restoreList(cn *pbm.PBM, size int64) (*restoreListOut, error) { - rlist, err := cn.RestoresList(size) +func restoreList(ctx context.Context, cn *pbm.PBM, size int64) (*restoreListOut, error) { + rlist, err := query.RestoresList(ctx, cn.Conn, size) if err != nil { return nil, errors.Wrap(err, "unable to get restore list") } @@ -159,9 +167,9 @@ func (bl backupListOut) String() string { for i := range bl.Snapshots { b := &bl.Snapshots[i] t := string(b.Type) - if sel.IsSelective(b.Namespaces) { + if util.IsSelective(b.Namespaces) { t += ", selective" - } else if b.Type == pbm.IncrementalBackup && b.SrcBackup == "" { + } else if b.Type == defs.IncrementalBackup && b.SrcBackup == "" { t += ", base" } s += fmt.Sprintf(" %s <%s> [restore_to_time: %s]\n", b.Name, t, fmtTS(int64(b.RestoreTS))) @@ -192,20 +200,26 @@ func (bl backupListOut) String() string { return s } -func backupList(cn *pbm.PBM, size int, full, unbacked bool, rsMap map[string]string) (backupListOut, error) { +func backupList( + ctx context.Context, + cn *pbm.PBM, + size int, + full, unbacked bool, + rsMap map[string]string, +) (backupListOut, error) { var list backupListOut var err error - list.Snapshots, err = getSnapshotList(cn, size, rsMap) + list.Snapshots, err = getSnapshotList(ctx, cn, size, rsMap) if err != nil { return list, errors.Wrap(err, "get snapshots") } - list.PITR.Ranges, list.PITR.RsRanges, err = getPitrList(cn, size, full, unbacked, rsMap) + list.PITR.Ranges, list.PITR.RsRanges, err = getPitrList(ctx, cn, size, full, unbacked, rsMap) if err != nil { return list, errors.Wrap(err, "get PITR ranges") } - list.PITR.On, err = cn.IsPITR() + list.PITR.On, _, err = config.IsPITREnabled(ctx, cn.Conn) if err != nil { return list, errors.Wrap(err, "check if PITR is on") } @@ -213,29 +227,29 @@ func backupList(cn *pbm.PBM, size int, full, unbacked bool, rsMap map[string]str return list, nil } -func getSnapshotList(cn *pbm.PBM, size int, rsMap map[string]string) ([]snapshotStat, error) { - bcps, err := cn.BackupsList(int64(size)) +func getSnapshotList(ctx context.Context, cn *pbm.PBM, size int, rsMap map[string]string) ([]snapshotStat, error) { + bcps, err := query.BackupsList(ctx, cn.Conn, int64(size)) if err != nil { return nil, errors.Wrap(err, "unable to get backups list") } - shards, err := cn.ClusterMembers() + shards, err := topo.ClusterMembers(ctx, cn.Conn.MongoClient()) if err != nil { return nil, errors.Wrap(err, "get cluster members") } - inf, err := cn.GetNodeInfo() + inf, err := topo.GetNodeInfoExt(ctx, cn.Conn.MongoClient()) if err != nil { return nil, errors.Wrap(err, "define cluster state") } - ver, err := pbm.GetMongoVersion(cn.Context(), cn.Conn) + ver, err := version.GetMongoVersion(ctx, cn.Conn.MongoClient()) if err != nil { - return nil, errors.WithMessage(err, "get mongo version") + return nil, errors.Wrap(err, "get mongo version") } - fcv, err := cn.GetFeatureCompatibilityVersion() + fcv, err := version.GetFCV(ctx, cn.Conn.MongoClient()) if err != nil { - return nil, errors.WithMessage(err, "get featureCompatibilityVersion") + return nil, errors.Wrap(err, "get featureCompatibilityVersion") } // pbm.PBM is always connected either to config server or to the sole (hence main) RS @@ -246,7 +260,7 @@ func getSnapshotList(cn *pbm.PBM, size int, rsMap map[string]string) ([]snapshot for i := len(bcps) - 1; i >= 0; i-- { b := bcps[i] - if b.Status != pbm.StatusDone { + if b.Status != defs.StatusDone { continue } @@ -266,32 +280,33 @@ func getSnapshotList(cn *pbm.PBM, size int, rsMap map[string]string) ([]snapshot // getPitrList shows only chunks derived from `Done` and compatible version's backups func getPitrList( + ctx context.Context, cn *pbm.PBM, size int, full, unbacked bool, rsMap map[string]string, ) ([]pitrRange, map[string][]pitrRange, error) { - inf, err := cn.GetNodeInfo() + inf, err := topo.GetNodeInfoExt(ctx, cn.Conn.MongoClient()) if err != nil { return nil, nil, errors.Wrap(err, "define cluster state") } - shards, err := cn.ClusterMembers() + shards, err := topo.ClusterMembers(ctx, cn.Conn.MongoClient()) if err != nil { return nil, nil, errors.Wrap(err, "get cluster members") } - now, err := cn.ClusterTime() + now, err := topo.GetClusterTime(ctx, cn.Conn) if err != nil { return nil, nil, errors.Wrap(err, "get cluster time") } - mapRevRS := pbm.MakeReverseRSMapFunc(rsMap) + mapRevRS := util.MakeReverseRSMapFunc(rsMap) rsRanges := make(map[string][]pitrRange) - var rstlines [][]pbm.Timeline + var rstlines [][]oplog.Timeline for _, s := range shards { - tlns, err := cn.PITRGetValidTimelines(mapRevRS(s.RS), now) + tlns, err := oplog.PITRGetValidTimelines(ctx, cn.Conn, mapRevRS(s.RS), now) if err != nil { return nil, nil, errors.Wrapf(err, "get PITR timelines for %s replset", s.RS) } @@ -320,8 +335,8 @@ func getPitrList( } ranges := []pitrRange{} - for _, tl := range pbm.MergeTimelines(rstlines...) { - lastWrite, err := getBaseSnapshotLastWrite(cn, sh, rsMap, tl) + for _, tl := range oplog.MergeTimelines(rstlines...) { + lastWrite, err := getBaseSnapshotLastWrite(ctx, cn, sh, rsMap, tl) if err != nil { return nil, nil, err } @@ -340,14 +355,15 @@ func getPitrList( } func getBaseSnapshotLastWrite( + ctx context.Context, cn *pbm.PBM, sh map[string]bool, rsMap map[string]string, - tl pbm.Timeline, + tl oplog.Timeline, ) (primitive.Timestamp, error) { - bcp, err := cn.GetFirstBackup(&primitive.Timestamp{T: tl.Start, I: 0}) + bcp, err := query.GetFirstBackup(ctx, cn.Conn, &primitive.Timestamp{T: tl.Start, I: 0}) if err != nil { - if !errors.Is(err, pbm.ErrNotFound) { + if !errors.Is(err, errors.ErrNotFound) { return primitive.Timestamp{}, errors.Wrapf(err, "get backup for timeline: %s", tl) } @@ -357,25 +373,25 @@ func getBaseSnapshotLastWrite( return primitive.Timestamp{}, nil } - ver, err := pbm.GetMongoVersion(cn.Context(), cn.Conn) + ver, err := version.GetMongoVersion(ctx, cn.Conn.MongoClient()) if err != nil { - return primitive.Timestamp{}, errors.WithMessage(err, "get mongo version") + return primitive.Timestamp{}, errors.Wrap(err, "get mongo version") } - fcv, err := cn.GetFeatureCompatibilityVersion() + fcv, err := version.GetFCV(ctx, cn.Conn.MongoClient()) if err != nil { - return primitive.Timestamp{}, errors.WithMessage(err, "get featureCompatibilityVersion") + return primitive.Timestamp{}, errors.Wrap(err, "get featureCompatibilityVersion") } - bcpMatchCluster(bcp, ver.VersionString, fcv, sh, pbm.MakeRSMapFunc(rsMap), pbm.MakeReverseRSMapFunc(rsMap)) + bcpMatchCluster(bcp, ver.VersionString, fcv, sh, util.MakeRSMapFunc(rsMap), util.MakeReverseRSMapFunc(rsMap)) - if bcp.Status != pbm.StatusDone { + if bcp.Status != defs.StatusDone { return primitive.Timestamp{}, nil } return bcp.LastWriteTS, nil } -func splitByBaseSnapshot(lastWrite primitive.Timestamp, tl pbm.Timeline) []pitrRange { +func splitByBaseSnapshot(lastWrite primitive.Timestamp, tl oplog.Timeline) []pitrRange { if lastWrite.IsZero() || (lastWrite.T < tl.Start || lastWrite.T > tl.End) { return []pitrRange{{Range: tl, NoBaseSnapshot: true}} } @@ -384,7 +400,7 @@ func splitByBaseSnapshot(lastWrite primitive.Timestamp, tl pbm.Timeline) []pitrR if lastWrite.T > tl.Start { ranges = append(ranges, pitrRange{ - Range: pbm.Timeline{ + Range: oplog.Timeline{ Start: tl.Start, End: lastWrite.T, }, @@ -394,7 +410,7 @@ func splitByBaseSnapshot(lastWrite primitive.Timestamp, tl pbm.Timeline) []pitrR if lastWrite.T < tl.End { ranges = append(ranges, pitrRange{ - Range: pbm.Timeline{ + Range: oplog.Timeline{ Start: lastWrite.T + 1, End: tl.End, }, diff --git a/cli/list_test.go b/cmd/pbm/list_test.go similarity index 90% rename from cli/list_test.go rename to cmd/pbm/list_test.go index 339393596..54a127d17 100644 --- a/cli/list_test.go +++ b/cmd/pbm/list_test.go @@ -1,4 +1,4 @@ -package cli +package main import ( "reflect" @@ -6,11 +6,11 @@ import ( "go.mongodb.org/mongo-driver/bson/primitive" - "github.com/percona/percona-backup-mongodb/pbm" + "github.com/percona/percona-backup-mongodb/pbm/oplog" ) func Test_splitByBaseSnapshot(t *testing.T) { - tl := pbm.Timeline{Start: 3, End: 7} + tl := oplog.Timeline{Start: 3, End: 7} t.Run("lastWrite is nil", func(t *testing.T) { lastWrite := primitive.Timestamp{} @@ -62,7 +62,7 @@ func Test_splitByBaseSnapshot(t *testing.T) { want := []pitrRange{ { - Range: pbm.Timeline{ + Range: oplog.Timeline{ Start: lastWrite.T + 1, End: tl.End, }, @@ -79,14 +79,14 @@ func Test_splitByBaseSnapshot(t *testing.T) { want := []pitrRange{ { - Range: pbm.Timeline{ + Range: oplog.Timeline{ Start: tl.Start, End: lastWrite.T, }, NoBaseSnapshot: true, }, { - Range: pbm.Timeline{ + Range: oplog.Timeline{ Start: lastWrite.T + 1, End: tl.End, }, diff --git a/cmd/pbm/main.go b/cmd/pbm/main.go index 668802251..78e4f1297 100644 --- a/cmd/pbm/main.go +++ b/cmd/pbm/main.go @@ -1,9 +1,788 @@ package main import ( - "github.com/percona/percona-backup-mongodb/cli" + "bytes" + "encoding/json" + "fmt" + stdlog "log" + "os" + "strings" + "time" + + "github.com/alecthomas/kingpin" + "go.mongodb.org/mongo-driver/mongo" + + "github.com/percona/percona-backup-mongodb/internal/connect" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/lock" + "github.com/percona/percona-backup-mongodb/internal/log" + "github.com/percona/percona-backup-mongodb/internal/topo" + "github.com/percona/percona-backup-mongodb/internal/types" + "github.com/percona/percona-backup-mongodb/internal/version" + "github.com/percona/percona-backup-mongodb/pbm" + "github.com/percona/percona-backup-mongodb/pbm/oplog" +) + +const ( + datetimeFormat = "2006-01-02T15:04:05" + dateFormat = "2006-01-02" +) + +const ( + RSMappingEnvVar = "PBM_REPLSET_REMAPPING" + RSMappingFlag = "replset-remapping" + RSMappingDoc = "re-map replset names for backups/oplog (e.g. to_name_1=from_name_1,to_name_2=from_name_2)" +) + +type outFormat string + +const ( + outJSON outFormat = "json" + outJSONpretty outFormat = "json-pretty" + outText outFormat = "text" ) +type logsOpts struct { + tail int64 + node string + severity string + event string + opid string + location string + extr bool + follow bool +} + +type cliResult interface { + HasError() bool +} + func main() { - cli.Main() + var ( + pbmCmd = kingpin.New("pbm", "Percona Backup for MongoDB") + mURL = pbmCmd.Flag("mongodb-uri", + "MongoDB connection string (Default = PBM_MONGODB_URI environment variable)"). + Envar("PBM_MONGODB_URI"). + String() + pbmOutFormat = pbmCmd.Flag("out", "Output format /"). + Short('o'). + Default(string(outText)). + Enum(string(outJSON), string(outJSONpretty), string(outText)) + ) + pbmCmd.HelpFlag.Short('h') + + versionCmd := pbmCmd.Command("version", "PBM version info") + versionShort := versionCmd.Flag("short", "Show only version info"). + Short('s'). + Default("false"). + Bool() + versionCommit := versionCmd.Flag("commit", "Show only git commit info"). + Short('c'). + Default("false"). + Bool() + + configCmd := pbmCmd.Command("config", "Set, change or list the config") + cfg := configOpts{set: make(map[string]string)} + configCmd.Flag("force-resync", "Resync backup list with the current store"). + BoolVar(&cfg.rsync) + configCmd.Flag("list", "List current settings"). + BoolVar(&cfg.list) + configCmd.Flag("file", "Upload config from YAML file"). + StringVar(&cfg.file) + configCmd.Flag("set", "Set the option value "). + StringMapVar(&cfg.set) + configCmd.Arg("key", "Show the value of a specified key"). + StringVar(&cfg.key) + + backupCmd := pbmCmd.Command("backup", "Make backup") + backup := backupOpts{} + backupCmd.Flag("compression", "Compression type //////"). + EnumVar(&backup.compression, + string(defs.CompressionTypeNone), + string(defs.CompressionTypeGZIP), + string(defs.CompressionTypeSNAPPY), + string(defs.CompressionTypeLZ4), + string(defs.CompressionTypeS2), + string(defs.CompressionTypePGZIP), + string(defs.CompressionTypeZstandard)) + backupCmd.Flag("type", + fmt.Sprintf("backup type: <%s>/<%s>/<%s>/<%s>", + defs.PhysicalBackup, + defs.LogicalBackup, + defs.IncrementalBackup, + defs.ExternalBackup)). + Default(string(defs.LogicalBackup)). + Short('t'). + EnumVar(&backup.typ, + string(defs.PhysicalBackup), + string(defs.LogicalBackup), + string(defs.IncrementalBackup), + string(defs.ExternalBackup)) + backupCmd.Flag("base", "Is this a base for incremental backups"). + BoolVar(&backup.base) + backupCmd.Flag("compression-level", "Compression level (specific to the compression type)"). + IntsVar(&backup.compressionLevel) + backupCmd.Flag("ns", `Namespaces to backup (e.g. "db.*", "db.collection"). If not set, backup all ("*.*")`). + StringVar(&backup.ns) + backupCmd.Flag("wait", "Wait for the backup to finish"). + Short('w'). + BoolVar(&backup.wait) + backupCmd.Flag("list-files", "Wait for the backup to finish"). + Short('l'). + BoolVar(&backup.externList) + + cancelBcpCmd := pbmCmd.Command("cancel-backup", "Cancel backup") + + descBcpCmd := pbmCmd.Command("describe-backup", "Describe backup") + descBcp := descBcp{} + descBcpCmd.Flag("with-collections", "Show collections in backup"). + BoolVar(&descBcp.coll) + descBcpCmd.Arg("backup_name", "Backup name"). + StringVar(&descBcp.name) + + finishBackupName := "" + backupFinishCmd := pbmCmd.Command("backup-finish", "Finish external backup") + backupFinishCmd.Arg("backup_name", "Backup name"). + StringVar(&finishBackupName) + + finishRestore := descrRestoreOpts{} + restoreFinishCmd := pbmCmd.Command("restore-finish", "Finish external backup") + restoreFinishCmd.Arg("restore_name", "Restore name"). + StringVar(&finishRestore.restore) + restoreFinishCmd.Flag("config", "Path to PBM config"). + Short('c'). + Required(). + StringVar(&finishRestore.cfg) + + restoreCmd := pbmCmd.Command("restore", "Restore backup") + restore := restoreOpts{} + restoreCmd.Arg("backup_name", "Backup name to restore"). + StringVar(&restore.bcp) + restoreCmd.Flag("time", fmt.Sprintf("Restore to the point-in-time. Set in format %s", datetimeFormat)). + StringVar(&restore.pitr) + restoreCmd.Flag("base-snapshot", + "Override setting: Name of older snapshot that PITR will be based on during restore."). + StringVar(&restore.pitrBase) + restoreCmd.Flag("ns", `Namespaces to restore (e.g. "db1.*,db2.collection2"). If not set, restore all ("*.*")`). + StringVar(&restore.ns) + restoreCmd.Flag("wait", "Wait for the restore to finish."). + Short('w'). + BoolVar(&restore.wait) + restoreCmd.Flag("external", "External restore."). + Short('x'). + BoolVar(&restore.extern) + restoreCmd.Flag("config", "Mongod config for the source data. External backups only!"). + Short('c'). + StringVar(&restore.conf) + restoreCmd.Flag("ts", + "MongoDB cluster time to restore to. In format (e.g. 1682093090,9). External backups only!"). + StringVar(&restore.ts) + restoreCmd.Flag(RSMappingFlag, RSMappingDoc). + Envar(RSMappingEnvVar). + StringVar(&restore.rsMap) + + replayCmd := pbmCmd.Command("oplog-replay", "Replay oplog") + replayOpts := replayOptions{} + replayCmd.Flag("start", fmt.Sprintf("Replay oplog from the time. Set in format %s", datetimeFormat)). + Required(). + StringVar(&replayOpts.start) + replayCmd.Flag("end", "Replay oplog to the time. Set in format %s"). + Required(). + StringVar(&replayOpts.end) + replayCmd.Flag("wait", "Wait for the restore to finish."). + Short('w'). + BoolVar(&replayOpts.wait) + replayCmd.Flag(RSMappingFlag, RSMappingDoc). + Envar(RSMappingEnvVar). + StringVar(&replayOpts.rsMap) + // todo(add oplog cancel) + + listCmd := pbmCmd.Command("list", "Backup list") + list := listOpts{} + listCmd.Flag("restore", "Show last N restores"). + Default("false"). + BoolVar(&list.restore) + listCmd.Flag("unbacked", "Show unbacked oplog ranges"). + Default("false"). + BoolVar(&list.unbacked) + listCmd.Flag("full", "Show extended restore info"). + Default("false"). + Short('f'). + Hidden(). + BoolVar(&list.full) + listCmd.Flag("size", "Show last N backups"). + Default("0"). + IntVar(&list.size) + listCmd.Flag(RSMappingFlag, RSMappingDoc). + Envar(RSMappingEnvVar). + StringVar(&list.rsMap) + + deleteBcpCmd := pbmCmd.Command("delete-backup", "Delete a backup") + deleteBcp := deleteBcpOpts{} + deleteBcpCmd.Arg("name", "Backup name"). + StringVar(&deleteBcp.name) + deleteBcpCmd.Flag("older-than", + fmt.Sprintf("Delete backups older than date/time in format %s or %s", + datetimeFormat, + dateFormat)). + StringVar(&deleteBcp.olderThan) + deleteBcpCmd.Flag("yes", "Don't ask confirmation"). + Short('y'). + BoolVar(&deleteBcp.force) + deleteBcpCmd.Flag("force", "Force. Don't ask confirmation"). + Short('f'). + BoolVar(&deleteBcp.force) + + deletePitrCmd := pbmCmd.Command("delete-pitr", "Delete PITR chunks") + deletePitr := deletePitrOpts{} + deletePitrCmd.Flag("older-than", + fmt.Sprintf("Delete backups older than date/time in format %s or %s", + datetimeFormat, + dateFormat)). + StringVar(&deletePitr.olderThan) + deletePitrCmd.Flag("all", "Delete all chunks"). + Short('a'). + BoolVar(&deletePitr.all) + deletePitrCmd.Flag("yes", "Don't ask confirmation"). + Short('y'). + BoolVar(&deletePitr.force) + deletePitrCmd.Flag("force", "Force. Don't ask confirmation"). + Short('f'). + BoolVar(&deletePitr.force) + + cleanupCmd := pbmCmd.Command("cleanup", "Delete Backups and PITR chunks") + cleanupOpts := cleanupOptions{} + cleanupCmd.Flag("older-than", + fmt.Sprintf("Delete older than date/time in format %s or %s", + datetimeFormat, + dateFormat)). + StringVar(&cleanupOpts.olderThan) + cleanupCmd.Flag("yes", "Don't ask confirmation"). + Short('y'). + BoolVar(&cleanupOpts.yes) + cleanupCmd.Flag("wait", "Wait for deletion done"). + Short('w'). + BoolVar(&cleanupOpts.wait) + cleanupCmd.Flag("dry-run", "Report but do not delete"). + BoolVar(&cleanupOpts.dryRun) + + logsCmd := pbmCmd.Command("logs", "PBM logs") + logs := logsOpts{} + logsCmd.Flag("follow", "Follow output"). + Short('f'). + Default("false"). + BoolVar(&logs.follow) + logsCmd.Flag("tail", "Show last N entries, 20 entries are shown by default, 0 for all logs"). + Short('t'). + Default("20"). + Int64Var(&logs.tail) + logsCmd.Flag("node", "Target node in format replset[/host:posrt]"). + Short('n'). + StringVar(&logs.node) + logsCmd.Flag("severity", "Severity level D, I, W, E or F, low to high. Choosing one includes higher levels too."). + Short('s'). + Default("I"). + EnumVar(&logs.severity, "D", "I", "W", "E", "F") + logsCmd.Flag("event", + "Event in format backup[/2020-10-06T11:45:14Z]. Events: backup, restore, cancelBackup, resync, pitr, delete"). + Short('e'). + StringVar(&logs.event) + logsCmd.Flag("opid", "Operation ID"). + Short('i'). + StringVar(&logs.opid) + logsCmd.Flag("timezone", + "Timezone of log output. `Local`, `UTC` or a location name corresponding to "+ + "a file in the IANA Time Zone database, such as `America/New_York`"). + StringVar(&logs.location) + logsCmd.Flag("extra", "Show extra data in text format"). + Hidden(). + Short('x'). + BoolVar(&logs.extr) + + statusOpts := statusOptions{} + statusCmd := pbmCmd.Command("status", "Show PBM status") + statusCmd.Flag(RSMappingFlag, RSMappingDoc). + Envar(RSMappingEnvVar). + StringVar(&statusOpts.rsMap) + statusCmd.Flag("sections", "Sections of status to display ///."). + Short('s'). + EnumsVar(&statusOpts.sections, "cluster", "pitr", "running", "backups") + + describeRestoreCmd := pbmCmd.Command("describe-restore", "Describe restore") + describeRestoreOpts := descrRestoreOpts{} + describeRestoreCmd.Arg("name", "Restore name"). + StringVar(&describeRestoreOpts.restore) + describeRestoreCmd.Flag("config", "Path to PBM config"). + Short('c'). + StringVar(&describeRestoreOpts.cfg) + + cmd, err := pbmCmd.DefaultEnvars().Parse(os.Args[1:]) + if err != nil { + fmt.Fprintln(os.Stderr, "Error: parse command line parameters:", err) + os.Exit(1) + } + pbmOutF := outFormat(*pbmOutFormat) + var out fmt.Stringer + + if cmd == versionCmd.FullCommand() { + switch { + case *versionCommit: + out = outCaption{"GitCommit", version.Current().GitCommit} + case *versionShort: + out = outCaption{"Version", version.Current().Version} + default: + out = version.Current() + } + printo(out, pbmOutF) + return + } + + if *mURL == "" { + fmt.Fprintln(os.Stderr, "Error: no mongodb connection URI supplied") + fmt.Fprintln(os.Stderr, + " Usual practice is the set it by the PBM_MONGODB_URI environment variable. "+ + "It can also be set with commandline argument --mongodb-uri.") + pbmCmd.Usage(os.Args[1:]) + os.Exit(1) + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + var pbmClient *pbm.PBM + // we don't need pbm connection if it is `pbm describe-restore -c ...` + // or `pbm restore-finish ` + if describeRestoreOpts.cfg == "" && finishRestore.cfg == "" { + pbmClient, err = pbm.New(ctx, *mURL, "pbm-ctl") + if err != nil { + exitErr(errors.Wrap(err, "connect to mongodb"), pbmOutF) + } + pbmClient.InitLogger("", "") + + ver, err := version.GetMongoVersion(ctx, pbmClient.Conn.MongoClient()) + if err != nil { + stdlog.Fatalf("get mongo version: %v", err) + } + if err := version.FeatureSupport(ver).PBMSupport(); err != nil { + fmt.Fprintf(os.Stderr, "WARNING: %v\n", err) + } + } + + switch cmd { + case configCmd.FullCommand(): + out, err = runConfig(ctx, pbmClient, &cfg) + case backupCmd.FullCommand(): + backup.name = time.Now().UTC().Format(time.RFC3339) + out, err = runBackup(ctx, pbmClient, &backup, pbmOutF) + case cancelBcpCmd.FullCommand(): + out, err = cancelBcp(ctx, pbmClient) + case backupFinishCmd.FullCommand(): + out, err = runFinishBcp(ctx, pbmClient, finishBackupName) + case restoreFinishCmd.FullCommand(): + out, err = runFinishRestore(finishRestore) + case descBcpCmd.FullCommand(): + out, err = describeBackup(ctx, pbmClient, &descBcp) + case restoreCmd.FullCommand(): + out, err = runRestore(ctx, pbmClient, &restore, pbmOutF) + case replayCmd.FullCommand(): + out, err = replayOplog(ctx, pbmClient, replayOpts, pbmOutF) + case listCmd.FullCommand(): + out, err = runList(ctx, pbmClient, &list) + case deleteBcpCmd.FullCommand(): + out, err = deleteBackup(ctx, pbmClient, &deleteBcp, pbmOutF) + case deletePitrCmd.FullCommand(): + out, err = deletePITR(ctx, pbmClient, &deletePitr, pbmOutF) + case cleanupCmd.FullCommand(): + out, err = retentionCleanup(ctx, pbmClient, &cleanupOpts) + case logsCmd.FullCommand(): + out, err = runLogs(ctx, pbmClient, &logs) + case statusCmd.FullCommand(): + out, err = status(ctx, pbmClient, *mURL, statusOpts, pbmOutF == outJSONpretty) + case describeRestoreCmd.FullCommand(): + out, err = describeRestore(ctx, pbmClient, describeRestoreOpts) + } + + if err != nil { + exitErr(err, pbmOutF) + } + + printo(out, pbmOutF) + + if r, ok := out.(cliResult); ok && r.HasError() { + os.Exit(1) + } +} + +func printo(out fmt.Stringer, f outFormat) { + if out == nil { + return + } + + switch f { + case outJSON: + err := json.NewEncoder(os.Stdout).Encode(out) + if err != nil { + exitErr(errors.Wrap(err, "encode output"), f) + } + case outJSONpretty: + enc := json.NewEncoder(os.Stdout) + enc.SetIndent("", " ") + err := enc.Encode(out) + if err != nil { + exitErr(errors.Wrap(err, "encode output"), f) + } + default: + fmt.Println(strings.TrimSpace(out.String())) + } +} + +func exitErr(e error, f outFormat) { + switch f { + case outJSON, outJSONpretty: + var m interface{} + m = e + if _, ok := e.(json.Marshaler); !ok { //nolint:errorlint + m = map[string]string{"Error": e.Error()} + } + + j := json.NewEncoder(os.Stdout) + if f == outJSONpretty { + j.SetIndent("", " ") + } + + if err := j.Encode(m); err != nil { + fmt.Fprintf(os.Stderr, "Error: encoding error \"%v\": %v", m, err) + } + default: + fmt.Fprintln(os.Stderr, "Error:", e) + } + + os.Exit(1) +} + +func runLogs(ctx context.Context, cn *pbm.PBM, l *logsOpts) (fmt.Stringer, error) { + r := &log.LogRequest{} + + if l.node != "" { + n := strings.Split(l.node, "/") + r.RS = n[0] + if len(n) > 1 { + r.Node = n[1] + } + } + + if l.event != "" { + e := strings.Split(l.event, "/") + r.Event = e[0] + if len(e) > 1 { + r.ObjName = e[1] + } + } + + if l.opid != "" { + r.OPID = l.opid + } + + switch l.severity { + case "F": + r.Severity = log.Fatal + case "E": + r.Severity = log.Error + case "W": + r.Severity = log.Warning + case "I": + r.Severity = log.Info + case "D": + r.Severity = log.Debug + default: + r.Severity = log.Info + } + + if l.follow { + err := followLogs(ctx, cn, r, r.Node == "", l.extr) + return nil, err + } + + o, err := log.LogGet(ctx, cn.Conn, r, l.tail) + if err != nil { + return nil, errors.Wrap(err, "get logs") + } + + o.ShowNode = r.Node == "" + o.Extr = l.extr + + // reverse list + for i := len(o.Data)/2 - 1; i >= 0; i-- { + opp := len(o.Data) - 1 - i + o.Data[i], o.Data[opp] = o.Data[opp], o.Data[i] + } + + err = o.SetLocation(l.location) + if err != nil { + fmt.Fprintf(os.Stderr, "Error: failed to parse timezone: %v\n\n", err) + } + + return o, nil +} + +func followLogs(ctx context.Context, cn *pbm.PBM, r *log.LogRequest, showNode, expr bool) error { + outC, errC := log.Follow(ctx, cn.Conn.LogCollection(), r, false) + + for { + select { + case entry, ok := <-outC: + if !ok { + return nil + } + + fmt.Println(entry.Stringify(tsUTC, showNode, expr)) + case err, ok := <-errC: + if !ok { + return nil + } + + return err + } + } +} + +func tsUTC(ts int64) string { + return time.Unix(ts, 0).UTC().Format(time.RFC3339) +} + +type snapshotStat struct { + Name string `json:"name"` + Namespaces []string `json:"nss,omitempty"` + Size int64 `json:"size,omitempty"` + Status defs.Status `json:"status"` + Err error `json:"-"` + ErrString string `json:"error,omitempty"` + RestoreTS int64 `json:"restoreTo"` + PBMVersion string `json:"pbmVersion"` + Type defs.BackupType `json:"type"` + SrcBackup string `json:"src"` +} + +type pitrRange struct { + Err error `json:"error,omitempty"` + Range oplog.Timeline `json:"range"` + NoBaseSnapshot bool `json:"noBaseSnapshot,omitempty"` +} + +func (pr pitrRange) String() string { + return fmt.Sprintf("{ %s }", pr.Range) +} + +func fmtTS(ts int64) string { + return time.Unix(ts, 0).UTC().Format(time.RFC3339) +} + +type outMsg struct { + Msg string `json:"msg"` +} + +func (m outMsg) String() string { + return m.Msg +} + +type outCaption struct { + k string + v interface{} +} + +func (c outCaption) String() string { + return fmt.Sprint(c.v) +} + +func (c outCaption) MarshalJSON() ([]byte, error) { + var b bytes.Buffer + b.WriteString("{") + b.WriteString(fmt.Sprintf("\"%s\":", c.k)) + err := json.NewEncoder(&b).Encode(c.v) + if err != nil { + return nil, err + } + b.WriteString("}") + return b.Bytes(), nil +} + +func cancelBcp(ctx context.Context, cn *pbm.PBM) (fmt.Stringer, error) { + err := sendCmd(ctx, cn.Conn, types.Cmd{ + Cmd: defs.CmdCancelBackup, + }) + if err != nil { + return nil, errors.Wrap(err, "send backup canceling") + } + return outMsg{"Backup cancellation has started"}, nil +} + +var errInvalidFormat = errors.New("invalid format") + +func parseDateT(v string) (time.Time, error) { + switch len(v) { + case len(datetimeFormat): + return time.Parse(datetimeFormat, v) + case len(dateFormat): + return time.Parse(dateFormat, v) + } + + return time.Time{}, errInvalidFormat +} + +type findLockFn = func(ctx context.Context, m connect.Client, lh *lock.LockHeader) ([]lock.LockData, error) + +func findLock(ctx context.Context, cn *pbm.PBM, fn findLockFn) (*lock.LockData, error) { + locks, err := fn(ctx, cn.Conn, &lock.LockHeader{}) + if err != nil { + return nil, errors.Wrap(err, "get locks") + } + + ct, err := topo.GetClusterTime(ctx, cn.Conn) + if err != nil { + return nil, errors.Wrap(err, "get cluster time") + } + + var lck *lock.LockData + for _, l := range locks { + // We don't care about the PITR slicing here. It is a subject of other status sections + if l.Type == defs.CmdPITR || l.Heartbeat.T+defs.StaleFrameSec < ct.T { + continue + } + + // Just check if all locks are for the same op + // + // It could happen that the healthy `lk` became stale by the time of this check + // or the op was finished and the new one was started. So the `l.Type != lk.Type` + // would be true but for the legit reason (no error). + // But chances for that are quite low and on the next run of `pbm status` everything + // would be ok. So no reason to complicate code to avoid that. + if lck != nil && l.OPID != lck.OPID { + if err != nil { + return nil, errors.Errorf("conflicting ops running: [%s/%s::%s-%s] [%s/%s::%s-%s]. "+ + "This conflict may naturally resolve after 10 seconds", + l.Replset, l.Node, l.Type, l.OPID, + lck.Replset, lck.Node, lck.Type, lck.OPID, + ) + } + } + + l := l + lck = &l + } + + return lck, nil +} + +var errTout = errors.Errorf("timeout reached") + +// waitOp waits up to waitFor duration until operations which acquires a given lock are finished +func waitOp(ctx context.Context, pbmClient *pbm.PBM, lck *lock.LockHeader, waitFor time.Duration) error { + // just to be sure the check hasn't started before the lock were created + time.Sleep(1 * time.Second) + fmt.Print(".") + + tmr := time.NewTimer(waitFor) + defer tmr.Stop() + tkr := time.NewTicker(1 * time.Second) + defer tkr.Stop() + for { + select { + case <-tmr.C: + return errTout + case <-tkr.C: + fmt.Print(".") + lock, err := lock.GetLockData(ctx, pbmClient.Conn, lck) + if err != nil { + // No lock, so operation has finished + if errors.Is(err, mongo.ErrNoDocuments) { + return nil + } + return errors.Wrap(err, "get lock data") + } + clusterTime, err := topo.GetClusterTime(ctx, pbmClient.Conn) + if err != nil { + return errors.Wrap(err, "read cluster time") + } + if lock.Heartbeat.T+defs.StaleFrameSec < clusterTime.T { + return errors.Errorf("operation stale, last beat ts: %d", lock.Heartbeat.T) + } + } + } +} + +func lastLogErr(ctx context.Context, cn *pbm.PBM, op defs.Command, after int64) (string, error) { + l, err := log.LogGet(ctx, + cn.Conn, + &log.LogRequest{ + LogKeys: log.LogKeys{ + Severity: log.Error, + Event: string(op), + }, + }, 1) + if err != nil { + return "", errors.Wrap(err, "get log records") + } + if len(l.Data) == 0 { + return "", nil + } + + if l.Data[0].TS < after { + return "", nil + } + + return l.Data[0].Msg, nil +} + +type concurentOpError struct { + op *lock.LockHeader +} + +func (e concurentOpError) Error() string { + return fmt.Sprintf("another operation in progress, %s/%s [%s/%s]", e.op.Type, e.op.OPID, e.op.Replset, e.op.Node) +} + +func (e concurentOpError) As(err any) bool { + if err == nil { + return false + } + + er, ok := err.(concurentOpError) + if !ok { + return false + } + + er.op = e.op + return true +} + +func (e concurentOpError) MarshalJSON() ([]byte, error) { + s := make(map[string]interface{}) + s["error"] = "another operation in progress" + s["operation"] = e.op + return json.Marshal(s) +} + +func checkConcurrentOp(ctx context.Context, cn *pbm.PBM) error { + locks, err := lock.GetLocks(ctx, cn.Conn, &lock.LockHeader{}) + if err != nil { + return errors.Wrap(err, "get locks") + } + + ts, err := topo.GetClusterTime(ctx, cn.Conn) + if err != nil { + return errors.Wrap(err, "read cluster time") + } + + // Stop if there is some live operation. + // But in case of stale lock just move on + // and leave it for agents to deal with. + for _, l := range locks { + if l.Heartbeat.T+defs.StaleFrameSec >= ts.T { + return concurentOpError{&l.LockHeader} + } + } + + return nil } diff --git a/cli/ns.go b/cmd/pbm/ns.go similarity index 88% rename from cli/ns.go rename to cmd/pbm/ns.go index 0ddfe714a..5058d9e3d 100644 --- a/cli/ns.go +++ b/cmd/pbm/ns.go @@ -1,9 +1,9 @@ -package cli +package main import ( "strings" - "github.com/pkg/errors" + "github.com/percona/percona-backup-mongodb/internal/errors" ) func parseRSNamesMapping(s string) (map[string]string, error) { @@ -66,10 +66,10 @@ func parseCLINSOption(s string) ([]string, error) { for _, ns := range strings.Split(s, ",") { db, coll, ok := strings.Cut(strings.TrimSpace(ns), ".") if !ok { - return nil, errors.WithMessage(ErrInvalidNamespace, ns) + return nil, errors.Wrap(ErrInvalidNamespace, ns) } if db == "" || coll == "" || (db == "*" && coll != "*") { - return nil, errors.WithMessage(ErrInvalidNamespace, ns) + return nil, errors.Wrap(ErrInvalidNamespace, ns) } if db == "admin" || db == "config" || db == "local" { return nil, ErrForbiddenDatabase @@ -85,14 +85,14 @@ func parseCLINSOption(s string) ([]string, error) { } if _, ok := m["*"]; ok && len(m) != 1 { - return nil, errors.WithMessage(ErrAmbiguousNamespace, + return nil, errors.Wrap(ErrAmbiguousNamespace, "cannot use * with other databases") } rv := []string{} for db, colls := range m { if _, ok := colls["*"]; ok && len(colls) != 1 { - return nil, errors.WithMessagef(ErrAmbiguousNamespace, + return nil, errors.Wrapf(ErrAmbiguousNamespace, "cannot use * with other collections in %q database", db) } diff --git a/cli/ns_test.go b/cmd/pbm/ns_test.go similarity index 96% rename from cli/ns_test.go rename to cmd/pbm/ns_test.go index 120a564e9..ed169532b 100644 --- a/cli/ns_test.go +++ b/cmd/pbm/ns_test.go @@ -1,8 +1,9 @@ -package cli +package main import ( - "errors" "testing" + + "github.com/percona/percona-backup-mongodb/internal/errors" ) func TestParseCLINSOption(t *testing.T) { diff --git a/cli/oplog.go b/cmd/pbm/oplog.go similarity index 63% rename from cli/oplog.go rename to cmd/pbm/oplog.go index 6d2cba4da..62a416dc0 100644 --- a/cli/oplog.go +++ b/cmd/pbm/oplog.go @@ -1,12 +1,14 @@ -package cli +package main import ( - "context" "fmt" "time" - "github.com/pkg/errors" - + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/query" + "github.com/percona/percona-backup-mongodb/internal/types" "github.com/percona/percona-backup-mongodb/pbm" ) @@ -37,10 +39,10 @@ func (r oplogReplayResult) String() string { return fmt.Sprintf("Oplog replay %q has started", r.Name) } -func replayOplog(cn *pbm.PBM, o replayOptions, outf outFormat) (fmt.Stringer, error) { +func replayOplog(ctx context.Context, cn *pbm.PBM, o replayOptions, outf outFormat) (fmt.Stringer, error) { rsMap, err := parseRSNamesMapping(o.rsMap) if err != nil { - return nil, errors.WithMessage(err, "cannot parse replset mapping") + return nil, errors.Wrap(err, "cannot parse replset mapping") } startTS, err := parseTS(o.start) @@ -52,22 +54,22 @@ func replayOplog(cn *pbm.PBM, o replayOptions, outf outFormat) (fmt.Stringer, er return nil, errors.Wrap(err, "parse end time") } - err = checkConcurrentOp(cn) + err = checkConcurrentOp(ctx, cn) if err != nil { return nil, err } name := time.Now().UTC().Format(time.RFC3339Nano) - cmd := pbm.Cmd{ - Cmd: pbm.CmdReplay, - Replay: &pbm.ReplayCmd{ + cmd := types.Cmd{ + Cmd: defs.CmdReplay, + Replay: &types.ReplayCmd{ Name: name, Start: startTS, End: endTS, RSMap: rsMap, }, } - if err := cn.SendCmd(cmd); err != nil { + if err := sendCmd(ctx, cn.Conn, cmd); err != nil { return nil, errors.Wrap(err, "send command") } @@ -77,10 +79,10 @@ func replayOplog(cn *pbm.PBM, o replayOptions, outf outFormat) (fmt.Stringer, er fmt.Printf("Starting oplog replay '%s - %s'", o.start, o.end) - ctx, cancel := context.WithTimeout(context.Background(), pbm.WaitActionStart) + startCtx, cancel := context.WithTimeout(ctx, defs.WaitActionStart) defer cancel() - m, err := waitForRestoreStatus(ctx, name, cn.GetRestoreMeta) + m, err := waitForRestoreStatus(startCtx, cn.Conn, name, query.GetRestoreMeta) if err != nil { return nil, err } @@ -90,7 +92,7 @@ func replayOplog(cn *pbm.PBM, o replayOptions, outf outFormat) (fmt.Stringer, er } fmt.Print("Started.\nWaiting to finish") - err = waitRestore(cn, m, pbm.StatusDone, 0) + err = waitRestore(ctx, cn, m, defs.StatusDone, 0) if err != nil { return oplogReplayResult{err: err.Error()}, nil //nolint:nilerr } diff --git a/cli/restore.go b/cmd/pbm/restore.go similarity index 70% rename from cli/restore.go rename to cmd/pbm/restore.go index 9d98f9e16..225461acc 100644 --- a/cli/restore.go +++ b/cmd/pbm/restore.go @@ -1,8 +1,7 @@ -package cli +package main import ( "bytes" - "context" "encoding/json" "fmt" "io" @@ -12,13 +11,22 @@ import ( "time" "github.com/mongodb/mongo-tools/common/db" - "github.com/pkg/errors" "go.mongodb.org/mongo-driver/bson/primitive" "gopkg.in/yaml.v2" + "github.com/percona/percona-backup-mongodb/internal/config" + "github.com/percona/percona-backup-mongodb/internal/connect" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/log" + "github.com/percona/percona-backup-mongodb/internal/query" + "github.com/percona/percona-backup-mongodb/internal/resync" + "github.com/percona/percona-backup-mongodb/internal/storage" + "github.com/percona/percona-backup-mongodb/internal/topo" + "github.com/percona/percona-backup-mongodb/internal/types" + "github.com/percona/percona-backup-mongodb/internal/util" "github.com/percona/percona-backup-mongodb/pbm" - "github.com/percona/percona-backup-mongodb/pbm/log" - "github.com/percona/percona-backup-mongodb/pbm/storage" ) type restoreOpts struct { @@ -89,33 +97,33 @@ func (r externRestoreRet) String() string { r.Name, r.Name) } -func runRestore(cn *pbm.PBM, o *restoreOpts, outf outFormat) (fmt.Stringer, error) { +func runRestore(ctx context.Context, cn *pbm.PBM, o *restoreOpts, outf outFormat) (fmt.Stringer, error) { nss, err := parseCLINSOption(o.ns) if err != nil { - return nil, errors.WithMessage(err, "parse --ns option") + return nil, errors.Wrap(err, "parse --ns option") } rsMap, err := parseRSNamesMapping(o.rsMap) if err != nil { - return nil, errors.WithMessage(err, "cannot parse replset mapping") + return nil, errors.Wrap(err, "cannot parse replset mapping") } if o.pitr != "" && o.bcp != "" { return nil, errors.New("either a backup name or point in time should be set, non both together!") } - clusterTime, err := cn.ClusterTime() + clusterTime, err := topo.GetClusterTime(ctx, cn.Conn) if err != nil { return nil, errors.Wrap(err, "read cluster time") } tdiff := time.Now().Unix() - int64(clusterTime.T) - m, err := restore(cn, o, nss, rsMap, outf) + m, err := restore(ctx, cn, o, nss, rsMap, outf) if err != nil { return nil, err } if o.extern && outf == outText { - err = waitRestore(cn, m, pbm.StatusCopyReady, tdiff) + err = waitRestore(ctx, cn, m, defs.StatusCopyReady, tdiff) if err != nil { return nil, errors.Wrap(err, "waiting for the `copyReady` status") } @@ -126,21 +134,21 @@ func runRestore(cn *pbm.PBM, o *restoreOpts, outf outFormat) (fmt.Stringer, erro return restoreRet{ Name: m.Name, Snapshot: o.bcp, - physical: m.Type == pbm.PhysicalBackup || m.Type == pbm.IncrementalBackup, + physical: m.Type == defs.PhysicalBackup || m.Type == defs.IncrementalBackup, }, nil } typ := " logical restore.\nWaiting to finish" - if m.Type == pbm.PhysicalBackup { + if m.Type == defs.PhysicalBackup { typ = " physical restore.\nWaiting to finish" } fmt.Printf("Started%s", typ) - err = waitRestore(cn, m, pbm.StatusDone, tdiff) + err = waitRestore(ctx, cn, m, defs.StatusDone, tdiff) if err == nil { return restoreRet{ Name: m.Name, done: true, - physical: m.Type == pbm.PhysicalBackup || m.Type == pbm.IncrementalBackup, + physical: m.Type == defs.PhysicalBackup || m.Type == defs.IncrementalBackup, }, nil } @@ -155,10 +163,10 @@ func runRestore(cn *pbm.PBM, o *restoreOpts, outf outFormat) (fmt.Stringer, erro // But for physical ones, the cluster by this time is down. So we compare with // the wall time taking into account a time skew (wallTime - clusterTime) taken // when the cluster time was still available. -func waitRestore(cn *pbm.PBM, m *pbm.RestoreMeta, status pbm.Status, tskew int64) error { - ep, _ := cn.GetEpoch() - l := cn.Logger().NewEvent(string(pbm.CmdRestore), m.Backup, m.OPID, ep.TS()) - stg, err := cn.GetStorage(l) +func waitRestore(ctx context.Context, cn *pbm.PBM, m *types.RestoreMeta, status defs.Status, tskew int64) error { + ep, _ := config.GetEpoch(ctx, cn.Conn) + l := cn.Logger().NewEvent(string(defs.CmdRestore), m.Backup, m.OPID, ep.TS()) + stg, err := util.GetStorage(ctx, cn.Conn, l) if err != nil { return errors.Wrap(err, "get storage") } @@ -166,24 +174,24 @@ func waitRestore(cn *pbm.PBM, m *pbm.RestoreMeta, status pbm.Status, tskew int64 tk := time.NewTicker(time.Second * 1) defer tk.Stop() - var rmeta *pbm.RestoreMeta + var rmeta *types.RestoreMeta - getMeta := cn.GetRestoreMeta - if m.Type == pbm.PhysicalBackup || m.Type == pbm.IncrementalBackup { - getMeta = func(name string) (*pbm.RestoreMeta, error) { - return pbm.GetPhysRestoreMeta(name, stg, l) + getMeta := query.GetRestoreMeta + if m.Type == defs.PhysicalBackup || m.Type == defs.IncrementalBackup { + getMeta = func(_ context.Context, _ connect.Client, name string) (*types.RestoreMeta, error) { + return resync.GetPhysRestoreMeta(name, stg, l) } } var ctime uint32 - frameSec := pbm.StaleFrameSec - if m.Type != pbm.LogicalBackup { + frameSec := defs.StaleFrameSec + if m.Type != defs.LogicalBackup { frameSec = 60 * 3 } for range tk.C { fmt.Print(".") - rmeta, err = getMeta(m.Name) - if errors.Is(err, pbm.ErrNotFound) { + rmeta, err = getMeta(ctx, cn.Conn, m.Name) + if errors.Is(err, errors.ErrNotFound) { continue } if err != nil { @@ -191,14 +199,14 @@ func waitRestore(cn *pbm.PBM, m *pbm.RestoreMeta, status pbm.Status, tskew int64 } switch rmeta.Status { - case status, pbm.StatusDone, pbm.StatusPartlyDone: + case status, defs.StatusDone, defs.StatusPartlyDone: return nil - case pbm.StatusError: + case defs.StatusError: return restoreFailedError{fmt.Sprintf("operation failed with: %s", rmeta.Error)} } - if m.Type == pbm.LogicalBackup { - clusterTime, err := cn.ClusterTime() + if m.Type == defs.LogicalBackup { + clusterTime, err := topo.GetClusterTime(ctx, cn.Conn) if err != nil { return errors.Wrap(err, "read cluster time") } @@ -232,9 +240,9 @@ func (e restoreFailedError) Is(err error) bool { return ok } -func checkBackup(cn *pbm.PBM, o *restoreOpts, nss []string) (string, pbm.BackupType, error) { +func checkBackup(ctx context.Context, cn *pbm.PBM, o *restoreOpts, nss []string) (string, defs.BackupType, error) { if o.extern && o.bcp == "" { - return "", pbm.ExternalBackup, nil + return "", defs.ExternalBackup, nil } b := o.bcp @@ -243,31 +251,31 @@ func checkBackup(cn *pbm.PBM, o *restoreOpts, nss []string) (string, pbm.BackupT } var err error - var bcp *pbm.BackupMeta + var bcp *types.BackupMeta if b != "" { - bcp, err = cn.GetBackupMeta(b) - if errors.Is(err, pbm.ErrNotFound) { + bcp, err = query.GetBackupMeta(ctx, cn.Conn, b) + if errors.Is(err, errors.ErrNotFound) { return "", "", errors.Errorf("backup '%s' not found", b) } } else { var ts primitive.Timestamp ts, err = parseTS(o.pitr) if err != nil { - return "", "", errors.WithMessage(err, "parse pitr") + return "", "", errors.Wrap(err, "parse pitr") } - bcp, err = cn.GetLastBackup(&primitive.Timestamp{T: ts.T + 1, I: 0}) - if errors.Is(err, pbm.ErrNotFound) { + bcp, err = query.GetLastBackup(ctx, cn.Conn, &primitive.Timestamp{T: ts.T + 1, I: 0}) + if errors.Is(err, errors.ErrNotFound) { return "", "", errors.New("no base snapshot found") } } if err != nil { - return "", "", errors.WithMessage(err, "get backup data") + return "", "", errors.Wrap(err, "get backup data") } - if len(nss) != 0 && bcp.Type != pbm.LogicalBackup { + if len(nss) != 0 && bcp.Type != defs.LogicalBackup { return "", "", errors.New("--ns flag is only allowed for logical restore") } - if bcp.Status != pbm.StatusDone { + if bcp.Status != defs.StatusDone { return "", "", errors.Errorf("backup '%s' didn't finish successfully", b) } @@ -275,26 +283,27 @@ func checkBackup(cn *pbm.PBM, o *restoreOpts, nss []string) (string, pbm.BackupT } func restore( + ctx context.Context, cn *pbm.PBM, o *restoreOpts, nss []string, rsMapping map[string]string, outf outFormat, -) (*pbm.RestoreMeta, error) { - bcp, bcpType, err := checkBackup(cn, o, nss) +) (*types.RestoreMeta, error) { + bcp, bcpType, err := checkBackup(ctx, cn, o, nss) if err != nil { return nil, err } - err = checkConcurrentOp(cn) + err = checkConcurrentOp(ctx, cn) if err != nil { return nil, err } name := time.Now().UTC().Format(time.RFC3339Nano) - cmd := pbm.Cmd{ - Cmd: pbm.CmdRestore, - Restore: &pbm.RestoreCmd{ + cmd := types.Cmd{ + Cmd: defs.CmdRestore, + Restore: &types.RestoreCmd{ Name: name, BackupName: bcp, Namespaces: nss, @@ -331,13 +340,13 @@ func restore( } } - err = cn.SendCmd(cmd) + err = sendCmd(ctx, cn.Conn, cmd) if err != nil { return nil, errors.Wrap(err, "send command") } if outf != outText { - return &pbm.RestoreMeta{ + return &types.RestoreMeta{ Name: name, Backup: bcp, Type: bcpType, @@ -359,30 +368,31 @@ func restore( var ( fn getRestoreMetaFn - ctx context.Context cancel context.CancelFunc ) // physical restore may take more time to start const waitPhysRestoreStart = time.Second * 120 - if bcpType == pbm.LogicalBackup { - fn = cn.GetRestoreMeta - ctx, cancel = context.WithTimeout(context.Background(), pbm.WaitActionStart) + var startCtx context.Context + if bcpType == defs.LogicalBackup { + fn = query.GetRestoreMeta + startCtx, cancel = context.WithTimeout(ctx, defs.WaitActionStart) } else { - ep, _ := cn.GetEpoch() - stg, err := cn.GetStorage(cn.Logger().NewEvent(string(pbm.CmdRestore), bcp, "", ep.TS())) + ep, _ := config.GetEpoch(ctx, cn.Conn) + l := cn.Logger().NewEvent(string(defs.CmdRestore), bcp, "", ep.TS()) + stg, err := util.GetStorage(ctx, cn.Conn, l) if err != nil { return nil, errors.Wrap(err, "get storage") } - fn = func(name string) (*pbm.RestoreMeta, error) { - return pbm.GetPhysRestoreMeta(name, stg, cn.Logger().NewEvent(string(pbm.CmdRestore), bcp, "", ep.TS())) + fn = func(_ context.Context, _ connect.Client, name string) (*types.RestoreMeta, error) { + return resync.GetPhysRestoreMeta(name, stg, cn.Logger().NewEvent(string(defs.CmdRestore), bcp, "", ep.TS())) } - ctx, cancel = context.WithTimeout(context.Background(), waitPhysRestoreStart) + startCtx, cancel = context.WithTimeout(ctx, waitPhysRestoreStart) } defer cancel() - return waitForRestoreStatus(ctx, name, fn) + return waitForRestoreStatus(startCtx, cn.Conn, name, fn) } func runFinishRestore(o descrRestoreOpts) (fmt.Stringer, error) { @@ -391,9 +401,9 @@ func runFinishRestore(o descrRestoreOpts) (fmt.Stringer, error) { return nil, errors.Wrap(err, "get storage") } - path := fmt.Sprintf("%s/%s/cluster", pbm.PhysRestoresDir, o.restore) + path := fmt.Sprintf("%s/%s/cluster", defs.PhysRestoresDir, o.restore) return outMsg{"Command sent. Check `pbm describe-restore ...` for the result."}, - stg.Save(path+"."+string(pbm.StatusCopyDone), + stg.Save(path+"."+string(defs.StatusCopyDone), bytes.NewReader([]byte( fmt.Sprintf("%d", time.Now().Unix()), )), -1) @@ -422,21 +432,26 @@ func parseTS(t string) (primitive.Timestamp, error) { return primitive.Timestamp{T: uint32(tsto.Unix()), I: 0}, nil } -type getRestoreMetaFn func(name string) (*pbm.RestoreMeta, error) +type getRestoreMetaFn func(ctx context.Context, m connect.Client, name string) (*types.RestoreMeta, error) -func waitForRestoreStatus(ctx context.Context, name string, getfn getRestoreMetaFn) (*pbm.RestoreMeta, error) { +func waitForRestoreStatus( + ctx context.Context, + m connect.Client, + name string, + getfn getRestoreMetaFn, +) (*types.RestoreMeta, error) { tk := time.NewTicker(time.Second * 1) defer tk.Stop() - meta := new(pbm.RestoreMeta) // TODO + meta := new(types.RestoreMeta) // TODO for { select { case <-tk.C: fmt.Print(".") var err error - meta, err = getfn(name) - if errors.Is(err, pbm.ErrNotFound) { + meta, err = getfn(ctx, m, name) + if errors.Is(err, errors.ErrNotFound) { continue } if err != nil { @@ -446,9 +461,9 @@ func waitForRestoreStatus(ctx context.Context, name string, getfn getRestoreMeta continue } switch meta.Status { - case pbm.StatusRunning, pbm.StatusDumpDone, pbm.StatusDone: + case defs.StatusRunning, defs.StatusDumpDone, defs.StatusDone: return meta, nil - case pbm.StatusError: + case defs.StatusError: rs := "" for _, s := range meta.Replsets { rs += fmt.Sprintf("\n- Restore on replicaset \"%s\" in state: %v", s.Name, s.Status) @@ -486,8 +501,8 @@ type describeRestoreResult struct { Name string `json:"name" yaml:"name"` OPID string `json:"opid" yaml:"opid"` Backup string `json:"backup" yaml:"backup"` - Type pbm.BackupType `json:"type" yaml:"type"` - Status pbm.Status `json:"status" yaml:"status"` + Type defs.BackupType `json:"type" yaml:"type"` + Status defs.Status `json:"status" yaml:"status"` Error *string `json:"error,omitempty" yaml:"error,omitempty"` Namespaces []string `json:"namespaces,omitempty" yaml:"namespaces,omitempty"` StartTS *int64 `json:"start_ts,omitempty" yaml:"-"` @@ -501,7 +516,7 @@ type describeRestoreResult struct { type RestoreReplset struct { Name string `json:"name" yaml:"name"` - Status pbm.Status `json:"status" yaml:"status"` + Status defs.Status `json:"status" yaml:"status"` PartialTxn []db.Oplog `json:"partial_txn,omitempty" yaml:"-"` PartialTxnStr *string `json:"-" yaml:"partial_txn,omitempty"` LastTransitionTS int64 `json:"last_transition_ts" yaml:"-"` @@ -511,11 +526,11 @@ type RestoreReplset struct { } type RestoreNode struct { - Name string `json:"name" yaml:"name"` - Status pbm.Status `json:"status" yaml:"status"` - Error *string `json:"error,omitempty" yaml:"error,omitempty"` - LastTransitionTS int64 `json:"last_transition_ts" yaml:"-"` - LastTransitionTime string `json:"last_transition_time" yaml:"last_transition_time"` + Name string `json:"name" yaml:"name"` + Status defs.Status `json:"status" yaml:"status"` + Error *string `json:"error,omitempty" yaml:"error,omitempty"` + LastTransitionTS int64 `json:"last_transition_ts" yaml:"-"` + LastTransitionTime string `json:"last_transition_time" yaml:"last_transition_time"` } func (r describeRestoreResult) String() string { @@ -533,19 +548,19 @@ func getRestoreMetaStg(cfgPath string) (storage.Storage, error) { return nil, errors.Wrap(err, "unable to read config file") } - var cfg pbm.Config + var cfg config.Config err = yaml.UnmarshalStrict(buf, &cfg) if err != nil { return nil, errors.Wrap(err, "unable to unmarshal config file") } l := log.New(nil, "cli", "").NewEvent("", "", "", primitive.Timestamp{}) - return pbm.Storage(cfg, l) + return util.StorageFromConfig(cfg, l) } -func describeRestore(cn *pbm.PBM, o descrRestoreOpts) (fmt.Stringer, error) { +func describeRestore(ctx context.Context, cn *pbm.PBM, o descrRestoreOpts) (fmt.Stringer, error) { var ( - meta *pbm.RestoreMeta + meta *types.RestoreMeta err error res describeRestoreResult ) @@ -554,13 +569,13 @@ func describeRestore(cn *pbm.PBM, o descrRestoreOpts) (fmt.Stringer, error) { if err != nil { return nil, errors.Wrap(err, "get storage") } - meta, err = pbm.GetPhysRestoreMeta(o.restore, stg, log.New(nil, "cli", ""). + meta, err = resync.GetPhysRestoreMeta(o.restore, stg, log.New(nil, "cli", ""). NewEvent("", "", "", primitive.Timestamp{})) if err != nil && meta == nil { return nil, errors.Wrap(err, "get restore meta") } } else { - meta, err = cn.GetRestoreMeta(o.restore) + meta, err = query.GetRestoreMeta(ctx, cn.Conn, o.restore) if err != nil { return nil, errors.Wrap(err, "get restore meta") } @@ -578,7 +593,7 @@ func describeRestore(cn *pbm.PBM, o descrRestoreOpts) (fmt.Stringer, error) { res.OPID = meta.OPID res.LastTransitionTS = meta.LastTransitionTS res.LastTransitionTime = time.Unix(res.LastTransitionTS, 0).UTC().Format(time.RFC3339) - if meta.Status == pbm.StatusError { + if meta.Status == defs.StatusError { res.Error = &meta.Error } if meta.StartPITR != 0 { @@ -600,7 +615,7 @@ func describeRestore(cn *pbm.PBM, o descrRestoreOpts) (fmt.Stringer, error) { PartialTxn: rs.PartialTxn, LastTransitionTime: time.Unix(rs.LastTransitionTS, 0).UTC().Format(time.RFC3339), } - if rs.Status == pbm.StatusError { + if rs.Status == defs.StatusError { mrs.Error = &rs.Error } else if len(mrs.PartialTxn) > 0 { b, err := json.Marshal(mrs.PartialTxn) @@ -620,15 +635,15 @@ func describeRestore(cn *pbm.PBM, o descrRestoreOpts) (fmt.Stringer, error) { LastTransitionTS: node.LastTransitionTS, LastTransitionTime: time.Unix(node.LastTransitionTS, 0).UTC().Format(time.RFC3339), } - if node.Status == pbm.StatusError { + if node.Status == defs.StatusError { serr := node.Error mnode.Error = &serr } - if rs.Status == pbm.StatusPartlyDone && - node.Status != pbm.StatusDone && - node.Status != pbm.StatusError { - mnode.Status = pbm.StatusError + if rs.Status == defs.StatusPartlyDone && + node.Status != defs.StatusDone && + node.Status != defs.StatusError { + mnode.Status = defs.StatusError serr := fmt.Sprintf("Node lost. Last heartbeat: %d", node.Hb.T) mnode.Error = &serr } diff --git a/cli/status.go b/cmd/pbm/status.go similarity index 66% rename from cli/status.go rename to cmd/pbm/status.go index 5a365fb90..e90f17e06 100644 --- a/cli/status.go +++ b/cmd/pbm/status.go @@ -1,27 +1,36 @@ -package cli +package main import ( - "context" "encoding/json" "fmt" - "log" + stdlog "log" "net/url" "sort" "strings" "sync" "time" - "github.com/pkg/errors" "go.mongodb.org/mongo-driver/bson/primitive" "go.mongodb.org/mongo-driver/mongo" "go.mongodb.org/mongo-driver/mongo/options" "golang.org/x/sync/errgroup" + "github.com/percona/percona-backup-mongodb/internal/config" + "github.com/percona/percona-backup-mongodb/internal/connect" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/lock" + "github.com/percona/percona-backup-mongodb/internal/log" + "github.com/percona/percona-backup-mongodb/internal/query" + "github.com/percona/percona-backup-mongodb/internal/slicer" + "github.com/percona/percona-backup-mongodb/internal/storage" + "github.com/percona/percona-backup-mongodb/internal/topo" + "github.com/percona/percona-backup-mongodb/internal/types" + "github.com/percona/percona-backup-mongodb/internal/util" + "github.com/percona/percona-backup-mongodb/internal/version" "github.com/percona/percona-backup-mongodb/pbm" - plog "github.com/percona/percona-backup-mongodb/pbm/log" - "github.com/percona/percona-backup-mongodb/pbm/pitr" - "github.com/percona/percona-backup-mongodb/pbm/sel" - "github.com/percona/percona-backup-mongodb/pbm/storage" + "github.com/percona/percona-backup-mongodb/pbm/oplog" ) type statusOptions struct { @@ -63,14 +72,14 @@ type statusSect struct { Name string longName string Obj fmt.Stringer - f func(cn *pbm.PBM) (fmt.Stringer, error) + f func(ctx context.Context, cn *pbm.PBM) (fmt.Stringer, error) } func (f statusSect) String() string { return fmt.Sprintf("%s\n%s\n", sprinth(f.longName), f.Obj) } -func (o statusOut) set(cn *pbm.PBM, sfilter map[string]bool) error { +func (o statusOut) set(ctx context.Context, cn *pbm.PBM, sfilter map[string]bool) error { for _, se := range o.data { if sfilter != nil && !sfilter[se.Name] { se.Obj = nil @@ -78,7 +87,7 @@ func (o statusOut) set(cn *pbm.PBM, sfilter map[string]bool) error { } var err error - se.Obj, err = se.f(cn) + se.Obj, err = se.f(ctx, cn) if err != nil { return errors.Wrapf(err, "get status of %s", se.Name) } @@ -87,22 +96,22 @@ func (o statusOut) set(cn *pbm.PBM, sfilter map[string]bool) error { return nil } -func status(cn *pbm.PBM, curi string, opts statusOptions, pretty bool) (fmt.Stringer, error) { +func status(ctx context.Context, cn *pbm.PBM, curi string, opts statusOptions, pretty bool) (fmt.Stringer, error) { rsMap, err := parseRSNamesMapping(opts.rsMap) if err != nil { - return nil, errors.WithMessage(err, "cannot parse replset mapping") + return nil, errors.Wrap(err, "cannot parse replset mapping") } - storageStatFn := func(cn *pbm.PBM) (fmt.Stringer, error) { - return getStorageStat(cn, rsMap) + storageStatFn := func(ctx context.Context, cn *pbm.PBM) (fmt.Stringer, error) { + return getStorageStat(ctx, cn, rsMap) } out := statusOut{ data: []*statusSect{ { "cluster", "Cluster", nil, - func(cn *pbm.PBM) (fmt.Stringer, error) { - return clusterStatus(cn, curi) + func(ctx context.Context, cn *pbm.PBM) (fmt.Stringer, error) { + return clusterStatus(ctx, cn, curi) }, }, {"pitr", "PITR incremental backup", nil, getPitrStatus}, @@ -120,7 +129,7 @@ func status(cn *pbm.PBM, curi string, opts statusOptions, pretty bool) (fmt.Stri } } - err = out.set(cn, sfilter) + err = out.set(ctx, cn, sfilter) return out, err } @@ -212,18 +221,18 @@ func (c cluster) String() string { return s } -func clusterStatus(cn *pbm.PBM, uri string) (fmt.Stringer, error) { - clstr, err := cn.ClusterMembers() +func clusterStatus(ctx context.Context, cn *pbm.PBM, uri string) (fmt.Stringer, error) { + clstr, err := topo.ClusterMembers(ctx, cn.Conn.MongoClient()) if err != nil { return nil, errors.Wrap(err, "get cluster members") } - clusterTime, err := cn.ClusterTime() + clusterTime, err := topo.GetClusterTime(ctx, cn.Conn) if err != nil { return nil, errors.Wrap(err, "read cluster time") } - eg, ctx := errgroup.WithContext(cn.Context()) + eg, ctx := errgroup.WithContext(ctx) m := sync.Mutex{} var ret cluster @@ -231,21 +240,21 @@ func clusterStatus(cn *pbm.PBM, uri string) (fmt.Stringer, error) { c := c eg.Go(func() error { - client, err := connect(ctx, uri, c.Host) + client, err := directConnect(ctx, uri, c.Host) if err != nil { return errors.Wrapf(err, "connect to `%s` [%s]", c.RS, c.Host) } - rsConfig, err := pbm.GetReplSetConfig(ctx, client) + rsConfig, err := topo.GetReplSetConfig(ctx, client) if err != nil { _ = client.Disconnect(ctx) return errors.Wrapf(err, "get replset status for `%s`", c.RS) } - info, err := pbm.GetNodeInfo(ctx, client) + info, err := topo.GetNodeInfo(ctx, client) // don't need the connection anymore despite the result _ = client.Disconnect(ctx) if err != nil { - return errors.WithMessage(err, "get node info") + return errors.Wrap(err, "get node info") } lrs := rs{Name: c.RS} @@ -264,7 +273,7 @@ func clusterStatus(cn *pbm.PBM, uri string) (fmt.Stringer, error) { nd.Role = RoleHidden } - stat, err := cn.GetAgentStatus(c.RS, n.Host) + stat, err := topo.GetAgentStatus(ctx, cn.Conn, c.RS, n.Host) if errors.Is(err, mongo.ErrNoDocuments) { nd.Ver = "NOT FOUND" continue @@ -272,7 +281,7 @@ func clusterStatus(cn *pbm.PBM, uri string) (fmt.Stringer, error) { nd.Errs = append(nd.Errs, fmt.Sprintf("ERROR: get agent status: %v", err)) continue } - if stat.Heartbeat.T+pbm.StaleFrameSec < clusterTime.T { + if stat.Heartbeat.T+defs.StaleFrameSec < clusterTime.T { nd.Errs = append(nd.Errs, fmt.Sprintf("ERROR: lost agent, last heartbeat: %v", stat.Heartbeat.T)) continue } @@ -291,7 +300,7 @@ func clusterStatus(cn *pbm.PBM, uri string) (fmt.Stringer, error) { return ret, err } -func connect(ctx context.Context, uri, hosts string) (*mongo.Client, error) { +func directConnect(ctx context.Context, uri, hosts string) (*mongo.Client, error) { var host string chost := strings.Split(hosts, "/") if len(chost) > 1 { @@ -312,11 +321,7 @@ func connect(ctx context.Context, uri, hosts string) (*mongo.Client, error) { curi.RawQuery = query.Encode() curi.Host = host - conn, err := mongo.NewClient(options.Client().ApplyURI(curi.String()).SetAppName("pbm-status")) - if err != nil { - return nil, errors.Wrap(err, "create mongo client") - } - err = conn.Connect(ctx) + conn, err := mongo.Connect(ctx, options.Client().ApplyURI(curi.String()).SetAppName("pbm-status")) if err != nil { return nil, errors.Wrap(err, "connect") } @@ -347,43 +352,70 @@ func (p pitrStat) String() string { return s } -func getPitrStatus(cn *pbm.PBM) (fmt.Stringer, error) { +// isOplogSlicing checks if PITR slicing is running. It looks for PITR locks +// and returns true if there is at least one not stale. +func isOplogSlicing(ctx context.Context, m connect.Client) (bool, error) { + l, err := lock.GetLocks(ctx, m, &lock.LockHeader{Type: defs.CmdPITR}) + if errors.Is(err, mongo.ErrNoDocuments) || len(l) == 0 { + return false, nil + } + + if err != nil { + return false, errors.Wrap(err, "get locks") + } + + ct, err := topo.GetClusterTime(ctx, m) + if err != nil { + return false, errors.Wrap(err, "get cluster time") + } + + for _, lk := range l { + if lk.Heartbeat.T+defs.StaleFrameSec >= ct.T { + return true, nil + } + } + + return false, nil +} + +func getPitrStatus(ctx context.Context, cn *pbm.PBM) (fmt.Stringer, error) { var p pitrStat var err error - p.InConf, err = cn.IsPITR() + p.InConf, _, err = config.IsPITREnabled(ctx, cn.Conn) if err != nil { return p, errors.Wrap(err, "unable check PITR config status") } - p.Running, err = cn.PITRrun() + p.Running, err = isOplogSlicing(ctx, cn.Conn) if err != nil { return p, errors.Wrap(err, "unable check PITR running status") } - p.Err, err = getPitrErr(cn) + p.Err, err = getPitrErr(ctx, cn) return p, errors.Wrap(err, "check for errors") } -func getPitrErr(cn *pbm.PBM) (string, error) { - epch, err := cn.GetEpoch() +func getPitrErr(ctx context.Context, cn *pbm.PBM) (string, error) { + epch, err := config.GetEpoch(ctx, cn.Conn) if err != nil { return "", errors.Wrap(err, "get current epoch") } - shards, err := cn.ClusterMembers() + shards, err := topo.ClusterMembers(ctx, cn.Conn.MongoClient()) if err != nil { - log.Fatalf("Error: get cluster members: %v", err) + stdlog.Fatalf("Error: get cluster members: %v", err) } var errs []string LOOP: for _, s := range shards { - l, err := cn.LogGetExactSeverity( - &plog.LogRequest{ - LogKeys: plog.LogKeys{ - Severity: plog.Error, - Event: string(pbm.CmdPITR), + l, err := log.LogGetExactSeverity(ctx, + cn.Conn, + &log.LogRequest{ + LogKeys: log.LogKeys{ + Severity: log.Error, + Event: string(defs.CmdPITR), Epoch: epch.TS(), RS: s.RS, }, @@ -398,11 +430,12 @@ LOOP: } // check if some node in the RS had successfully restarted slicing - nl, err := cn.LogGetExactSeverity( - &plog.LogRequest{ - LogKeys: plog.LogKeys{ - Severity: plog.Debug, - Event: string(pbm.CmdPITR), + nl, err := log.LogGetExactSeverity(ctx, + cn.Conn, + &log.LogRequest{ + LogKeys: log.LogKeys{ + Severity: log.Debug, + Event: string(defs.CmdPITR), Epoch: epch.TS(), RS: s.RS, }, @@ -412,7 +445,7 @@ LOOP: return "", errors.Wrap(err, "get debug log records") } for _, r := range nl.Data { - if r.Msg == pitr.LogStartMsg && r.ObjID.Timestamp().After(l.Data[0].ObjID.Timestamp()) { + if r.Msg == slicer.LogStartMsg && r.ObjID.Timestamp().After(l.Data[0].ObjID.Timestamp()) { continue LOOP } } @@ -424,22 +457,22 @@ LOOP: } type currOp struct { - Type pbm.Command `json:"type,omitempty"` - Name string `json:"name,omitempty"` - StartTS int64 `json:"startTS,omitempty"` - Status string `json:"status,omitempty"` - OPID string `json:"opID,omitempty"` + Type defs.Command `json:"type,omitempty"` + Name string `json:"name,omitempty"` + StartTS int64 `json:"startTS,omitempty"` + Status string `json:"status,omitempty"` + OPID string `json:"opID,omitempty"` } func (c currOp) String() string { - if c.Type == pbm.CmdUndefined { + if c.Type == defs.CmdUndefined { return "(none)" } switch c.Type { default: return fmt.Sprintf("%s [op id: %s]", c.Type, c.OPID) - case pbm.CmdBackup, pbm.CmdRestore: + case defs.CmdBackup, defs.CmdRestore: return fmt.Sprintf("%s \"%s\", started at %s. Status: %s. [op id: %s]", c.Type, c.Name, time.Unix((c.StartTS), 0).UTC().Format("2006-01-02T15:04:05Z"), c.Status, c.OPID, @@ -447,18 +480,18 @@ func (c currOp) String() string { } } -func getCurrOps(cn *pbm.PBM) (fmt.Stringer, error) { +func getCurrOps(ctx context.Context, cn *pbm.PBM) (fmt.Stringer, error) { var r currOp // check for ops - lk, err := findLock(cn, cn.GetLocks) + lk, err := findLock(ctx, cn, lock.GetLocks) if err != nil { return r, errors.Wrap(err, "get ops") } if lk == nil { // check for delete ops - lk, err = findLock(cn, cn.GetOpLocks) + lk, err = findLock(ctx, cn, lock.GetOpLocks) if err != nil { return r, errors.Wrap(err, "get delete ops") } @@ -476,8 +509,8 @@ func getCurrOps(cn *pbm.PBM) (fmt.Stringer, error) { // reaching here means no conflict operation, hence all locks are the same, // hence any lock in `lk` contains info on the current op switch r.Type { - case pbm.CmdBackup: - bcp, err := cn.GetBackupByOPID(r.OPID) + case defs.CmdBackup: + bcp, err := query.GetBackupByOPID(ctx, cn.Conn, r.OPID) if err != nil { return r, errors.Wrap(err, "get backup info") } @@ -485,13 +518,13 @@ func getCurrOps(cn *pbm.PBM) (fmt.Stringer, error) { r.StartTS = bcp.StartTS r.Status = string(bcp.Status) switch bcp.Status { - case pbm.StatusRunning: + case defs.StatusRunning: r.Status = "snapshot backup" - case pbm.StatusDumpDone: + case defs.StatusDumpDone: r.Status = "oplog backup" } - case pbm.CmdRestore: - rst, err := cn.GetRestoreMetaByOPID(r.OPID) + case defs.CmdRestore: + rst, err := query.GetRestoreMetaByOPID(ctx, cn.Conn, r.OPID) if err != nil { return r, errors.Wrap(err, "get restore info") } @@ -499,9 +532,9 @@ func getCurrOps(cn *pbm.PBM) (fmt.Stringer, error) { r.StartTS = rst.StartTS r.Status = string(rst.Status) switch rst.Status { - case pbm.StatusRunning: + case defs.StatusRunning: r.Status = "snapshot restore" - case pbm.StatusDumpDone: + case defs.StatusDumpDone: r.Status = "oplog restore" } } @@ -539,11 +572,11 @@ func (s storageStat) String() string { ss := &s.Snapshot[i] var status string switch ss.Status { - case pbm.StatusDone: + case defs.StatusDone: status = fmt.Sprintf("[restore_to_time: %s]", fmtTS(ss.RestoreTS)) - case pbm.StatusCancelled: + case defs.StatusCancelled: status = fmt.Sprintf("[!canceled: %s]", fmtTS(ss.RestoreTS)) - case pbm.StatusError: + case defs.StatusError: if errors.Is(ss.Err, errIncompatible) { status = fmt.Sprintf("[incompatible: %s] [%s]", ss.Err.Error(), fmtTS(ss.RestoreTS)) } else { @@ -554,9 +587,9 @@ func (s storageStat) String() string { } t := string(ss.Type) - if sel.IsSelective(ss.Namespaces) { + if util.IsSelective(ss.Namespaces) { t += ", selective" - } else if ss.Type == pbm.IncrementalBackup && ss.SrcBackup == "" { + } else if ss.Type == defs.IncrementalBackup && ss.SrcBackup == "" { t += ", base" } ret += fmt.Sprintf(" %s %s <%s> %s\n", ss.Name, fmtSize(ss.Size), t, status) @@ -575,7 +608,7 @@ func (s storageStat) String() string { for _, sn := range s.PITR.Ranges { var v string - if sn.Err != nil && !errors.Is(sn.Err, pbm.ErrNotFound) { + if sn.Err != nil && !errors.Is(sn.Err, errors.ErrNotFound) { v = fmt.Sprintf(" !!! %s", sn.Err.Error()) } f := "" @@ -588,10 +621,10 @@ func (s storageStat) String() string { return ret } -func getStorageStat(cn *pbm.PBM, rsMap map[string]string) (fmt.Stringer, error) { +func getStorageStat(ctx context.Context, cn *pbm.PBM, rsMap map[string]string) (fmt.Stringer, error) { var s storageStat - cfg, err := cn.GetConfig() + cfg, err := config.GetConfig(ctx, cn.Conn) if err != nil { return s, errors.Wrap(err, "get config") } @@ -603,25 +636,25 @@ func getStorageStat(cn *pbm.PBM, rsMap map[string]string) (fmt.Stringer, error) } s.Path = cfg.Storage.Path() - bcps, err := cn.BackupsList(0) + bcps, err := query.BackupsList(ctx, cn.Conn, 0) if err != nil { return s, errors.Wrap(err, "get backups list") } - inf, err := cn.GetNodeInfo() + inf, err := topo.GetNodeInfoExt(ctx, cn.Conn.MongoClient()) if err != nil { return s, errors.Wrap(err, "define cluster state") } - ver, err := pbm.GetMongoVersion(cn.Context(), cn.Conn) + ver, err := version.GetMongoVersion(ctx, cn.Conn.MongoClient()) if err != nil { - return nil, errors.WithMessage(err, "get mongo version") + return nil, errors.Wrap(err, "get mongo version") } - fcv, err := cn.GetFeatureCompatibilityVersion() + fcv, err := version.GetFCV(ctx, cn.Conn.MongoClient()) if err != nil { - return nil, errors.WithMessage(err, "get featureCompatibilityVersion") + return nil, errors.Wrap(err, "get featureCompatibilityVersion") } - shards, err := cn.ClusterMembers() + shards, err := topo.ClusterMembers(ctx, cn.Conn.MongoClient()) if err != nil { return s, errors.Wrap(err, "get cluster members") } @@ -630,12 +663,13 @@ func getStorageStat(cn *pbm.PBM, rsMap map[string]string) (fmt.Stringer, error) // which the `confsrv` param in `bcpMatchCluster` is all about bcpsMatchCluster(bcps, ver.VersionString, fcv, shards, inf.SetName, rsMap) - stg, err := cn.GetStorage(cn.Logger().NewEvent("", "", "", primitive.Timestamp{})) + stg, err := util.GetStorage(ctx, cn.Conn, + cn.Logger().NewEvent("", "", "", primitive.Timestamp{})) if err != nil { return s, errors.Wrap(err, "get storage") } - now, err := cn.ClusterTime() + now, err := topo.GetClusterTime(ctx, cn.Conn) if err != nil { return nil, errors.Wrap(err, "get cluster time") } @@ -656,21 +690,21 @@ func getStorageStat(cn *pbm.PBM, rsMap map[string]string) (fmt.Stringer, error) } switch bcp.Status { - case pbm.StatusError: + case defs.StatusError: if !errors.Is(snpsht.Err, errIncompatible) { break } fallthrough - case pbm.StatusDone: + case defs.StatusDone: snpsht.RestoreTS = int64(bcp.LastWriteTS.T) - case pbm.StatusCancelled: + case defs.StatusCancelled: // leave as it is, not to rewrite status with the `stuck` error default: - if bcp.Hb.T+pbm.StaleFrameSec < now.T { + if bcp.Hb.T+defs.StaleFrameSec < now.T { errStr := fmt.Sprintf("Backup stuck at `%v` stage, last beat ts: %d", bcp.Status, bcp.Hb.T) snpsht.Err = errors.New(errStr) snpsht.ErrString = errStr - snpsht.Status = pbm.StatusError + snpsht.Status = defs.StatusError } } @@ -679,13 +713,13 @@ func getStorageStat(cn *pbm.PBM, rsMap map[string]string) (fmt.Stringer, error) if err != nil { snpsht.Err = err snpsht.ErrString = err.Error() - snpsht.Status = pbm.StatusError + snpsht.Status = defs.StatusError } s.Snapshot = append(s.Snapshot, snpsht) } - s.PITR, err = getPITRranges(cn, bcps, rsMap) + s.PITR, err = getPITRranges(ctx, cn, bcps, rsMap) if err != nil { return s, errors.Wrap(err, "get PITR chunks") } @@ -693,22 +727,27 @@ func getStorageStat(cn *pbm.PBM, rsMap map[string]string) (fmt.Stringer, error) return s, nil } -func getPITRranges(cn *pbm.PBM, bcps []pbm.BackupMeta, rsMap map[string]string) (*pitrRanges, error) { - shards, err := cn.ClusterMembers() +func getPITRranges( + ctx context.Context, + cn *pbm.PBM, + bcps []types.BackupMeta, + rsMap map[string]string, +) (*pitrRanges, error) { + shards, err := topo.ClusterMembers(ctx, cn.Conn.MongoClient()) if err != nil { return nil, errors.Wrap(err, "get cluster members") } - now, err := cn.ClusterTime() + now, err := topo.GetClusterTime(ctx, cn.Conn) if err != nil { return nil, errors.Wrap(err, "get cluster time") } - mapRevRS := pbm.MakeReverseRSMapFunc(rsMap) + mapRevRS := util.MakeReverseRSMapFunc(rsMap) var size int64 - var rstlines [][]pbm.Timeline + var rstlines [][]oplog.Timeline for _, s := range shards { - tlns, err := cn.PITRGetValidTimelines(mapRevRS(s.RS), now) + tlns, err := oplog.PITRGetValidTimelines(ctx, cn.Conn, mapRevRS(s.RS), now) if err != nil { return nil, errors.Wrapf(err, "get PITR timelines for %s replset: %s", s.RS, err) } @@ -724,11 +763,11 @@ func getPITRranges(cn *pbm.PBM, bcps []pbm.BackupMeta, rsMap map[string]string) } sort.Slice(bcps, func(i, j int) bool { - return primitive.CompareTimestamp(bcps[i].LastWriteTS, bcps[j].LastWriteTS) == -1 + return bcps[i].LastWriteTS.Compare(bcps[j].LastWriteTS) == -1 }) var pr []pitrRange - for _, tl := range pbm.MergeTimelines(rstlines...) { + for _, tl := range oplog.MergeTimelines(rstlines...) { var bcplastWrite primitive.Timestamp for i := range bcps { @@ -751,8 +790,8 @@ func getPITRranges(cn *pbm.PBM, bcps []pbm.BackupMeta, rsMap map[string]string) return &pitrRanges{Ranges: pr, Size: size}, nil } -func isValidBaseSnapshot(bcp *pbm.BackupMeta) bool { - if bcp.Status != pbm.StatusDone || sel.IsSelective(bcp.Namespaces) { +func isValidBaseSnapshot(bcp *types.BackupMeta) bool { + if bcp.Status != defs.StatusDone || util.IsSelective(bcp.Namespaces) { return false } @@ -765,7 +804,7 @@ func isValidBaseSnapshot(bcp *pbm.BackupMeta) bool { case errors.Is(err, missedReplsetsError{}), errors.Is(err, incompatibleFCVVersionError{}): return true case errors.Is(err, incompatibleMongodVersionError{}): - if bcp.Type == pbm.LogicalBackup { + if bcp.Type == defs.LogicalBackup { return true } } @@ -773,7 +812,7 @@ func isValidBaseSnapshot(bcp *pbm.BackupMeta) bool { return false } -func getBackupSize(bcp *pbm.BackupMeta, stg storage.Storage) (int64, error) { +func getBackupSize(bcp *types.BackupMeta, stg storage.Storage) (int64, error) { if bcp.Size > 0 { return bcp.Size, nil } @@ -781,9 +820,9 @@ func getBackupSize(bcp *pbm.BackupMeta, stg storage.Storage) (int64, error) { var s int64 var err error switch bcp.Status { - case pbm.StatusDone, pbm.StatusCancelled, pbm.StatusError: + case defs.StatusDone, defs.StatusCancelled, defs.StatusError: s, err = getLegacySnapshotSize(bcp, stg) - if errors.Is(err, errMissedFile) && bcp.Status != pbm.StatusDone { + if errors.Is(err, errMissedFile) && bcp.Status != defs.StatusDone { // canceled/failed backup can be incomplete. ignore err = nil } @@ -792,20 +831,20 @@ func getBackupSize(bcp *pbm.BackupMeta, stg storage.Storage) (int64, error) { return s, err } -func getLegacySnapshotSize(bcp *pbm.BackupMeta, stg storage.Storage) (int64, error) { +func getLegacySnapshotSize(bcp *types.BackupMeta, stg storage.Storage) (int64, error) { switch bcp.Type { - case pbm.LogicalBackup: + case defs.LogicalBackup: return getLegacyLogicalSize(bcp, stg) - case pbm.PhysicalBackup, pbm.IncrementalBackup: + case defs.PhysicalBackup, defs.IncrementalBackup: return getLegacyPhysSize(bcp.Replsets) - case pbm.ExternalBackup: + case defs.ExternalBackup: return 0, nil default: return 0, errors.Errorf("unknown backup type %s", bcp.Type) } } -func getLegacyPhysSize(rsets []pbm.BackupReplset) (int64, error) { +func getLegacyPhysSize(rsets []types.BackupReplset) (int64, error) { var s int64 for _, rs := range rsets { for _, f := range rs.Files { @@ -818,13 +857,13 @@ func getLegacyPhysSize(rsets []pbm.BackupReplset) (int64, error) { var errMissedFile = errors.New("missed file") -func getLegacyLogicalSize(bcp *pbm.BackupMeta, stg storage.Storage) (int64, error) { +func getLegacyLogicalSize(bcp *types.BackupMeta, stg storage.Storage) (int64, error) { var s int64 var err error for _, rs := range bcp.Replsets { ds, er := stg.FileStat(rs.DumpName) if er != nil { - if bcp.Status == pbm.StatusDone || !errors.Is(er, storage.ErrNotExist) { + if bcp.Status == defs.StatusDone || !errors.Is(er, storage.ErrNotExist) { return s, errors.Wrapf(er, "get file %s", rs.DumpName) } @@ -833,7 +872,7 @@ func getLegacyLogicalSize(bcp *pbm.BackupMeta, stg storage.Storage) (int64, erro op, er := stg.FileStat(rs.OplogName) if er != nil { - if bcp.Status == pbm.StatusDone || !errors.Is(er, storage.ErrNotExist) { + if bcp.Status == defs.StatusDone || !errors.Is(er, storage.ErrNotExist) { return s, errors.Wrapf(er, "get file %s", rs.OplogName) } diff --git a/doc/source/conf.pyc b/doc/source/conf.pyc deleted file mode 100644 index 883b2a6c0d70218173f720d6ec2ce1f74589fcb7..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2765 zcma)7>vq#d5FS~HjE$WDArMFaaVzFxIY|pGDfDnCr@x@(wExuhsIqn~uaUK?c14oY z(_cJ8AED3Dm*|`H0os|7O)fwWb|UT0&dz-E&Bg9N-S%I9{{2e=vriM>Pw?OT(Et$m z=RgJ!zFj*IIv^dGG+^R_1YRtFXuunX{v6>#xCjD8`1C+DL7V{5g4BZ$w{1w#1PV_= zi1H4EEf8G@+aQ)8JPBeM!VZX25OzVFhRG7h4#+N~?0|%4AY6tNF`WW=8p1OmRv=sf zaTda}pd1j7K%9pM;VOtV2+xrfae?lK=Rvt3E`nHx2Vw)nC45~5aRrsvK=|yeP5#7H zNL`91yl_N51A!KssZ8#h?^ij0dWgb48Di9Z_}~3gXS-RI7jB|r)w`qj2Ou`# zm|Sc@cooDpgx5HxPeFW!aoh*-0E4*BSdPVUgW~xAXlCf{A)kX#Xy(7vc``` zBbBMxO5<;DZr$&1Z*Fgyb5!BQSmk>GUJq1mWm)gd^3fx!g*GckWr=<*%Q%(C5oQ1v+S(Yv^!nZDDo> zgNZ8Rpr{iYBzig+mU`F7!A)aP3m7Qr46D?1Q7}{O+p(NVPGvASD!YPObJ6_WA2OZ& zDN!kgwuL&SEOeB|Xk`m`b3CcC<7&uAW@bNR8!u1t^U{>YmDp8B$P3m=wl zX~SfjePiqR?U-4Qi$dmtdm|p2BA$yYJJux+8cdJO4Hbo@5_vmzB37o@>-vmLh0}A7 z?@VKZLo%X76O36z+$)JL_Dhvwlh6s)Cz<9o7N3lbWPB5iV|IhC&SqXjqi!$Kl`Se8 zVNFb9Ybm{QFUcw)2@SeLkTe9p!9uE2YQ}muiaDAZf3crZw-{`M{Y)}i9GwwD!~&^% zY&L=Uj9Me&31O)l4z_jpR5dK48I4RGx}z+bCxm>CI7%B?7E%kbkV^893U}rKt@XXS zo$9bmktu6W&#sSURzx@kB~2biS964{^FS~Rrk1g%3wW{KT3uxFwag-eg@*iNhjMc3 zQ_KTX#8hT_SC*VPY)}M;zhsO6qgDAxQ;utI)R=Ja;F)k}d=2R5E-3by;zv_0s!V=G zv(A+G=r|o`qw6{J?YJ$^YkDrOn_k0nRu{Zg{JLJ-TkM|jS{>JQn)q!u*A`Y6y_2m~ u*I9A8kJp@4r*qM5Y&vHg*Xb;_oMq2j@EWAuzUn%copsbe`%5jS<@^U0>&-#{ diff --git a/e2e-tests/Dockerfile b/e2e-tests/Dockerfile deleted file mode 100644 index 2461893fd..000000000 --- a/e2e-tests/Dockerfile +++ /dev/null @@ -1,10 +0,0 @@ -FROM golang:1.19-bullseye -WORKDIR /opt/pbm-test -ARG TESTS_BCP_TYPE -ENV TESTS_BCP_TYPE=${TESTS_BCP_TYPE} - -COPY . . - -RUN go install -mod=vendor ./e2e-tests/cmd/pbm-test - -CMD ["pbm-test"] diff --git a/e2e-tests/README.md b/e2e-tests/README.md index 430682848..16dcf9af9 100644 --- a/e2e-tests/README.md +++ b/e2e-tests/README.md @@ -3,9 +3,9 @@ ## Run tests Run all tests ``` -$ MONGODB_VERSION=4.2 ./run-all +$ MONGODB_VERSION=4.4 ./run-all ``` -`MONGODB_VERSION` is a PSMDB version (e.g. 4.0/4.2/4.4). Default is `4.2` +`MONGODB_VERSION` is a PSMDB version (e.g. 4.4/5.0/6.0). Default is `4.4` `./run-all` would run all tests both on a sharded cluster and a non-sharded replica set. @@ -22,7 +22,7 @@ To start tests with a running pbm-agent and minio storage: ``` $ MONGODB_VERSION=4.4 ./start-cluster ``` -`MONGODB_VERSION` is a PSMDB version (e.g. 4.0/4.2/4.4). Default is `4.2` +`MONGODB_VERSION` is a PSMDB version (e.g. 4.4/5.0/6.0). Default is `4.4` `./start-replset` - to start a non-sharded replica set. diff --git a/e2e-tests/cmd/ensure-oplog/main.go b/e2e-tests/cmd/ensure-oplog/main.go index 31a5648ba..1f2789040 100644 --- a/e2e-tests/cmd/ensure-oplog/main.go +++ b/e2e-tests/cmd/ensure-oplog/main.go @@ -1,7 +1,6 @@ package main import ( - "context" "fmt" "log" "os" @@ -10,18 +9,21 @@ import ( "time" "github.com/alecthomas/kingpin" - "github.com/pkg/errors" "go.mongodb.org/mongo-driver/bson" "go.mongodb.org/mongo-driver/bson/primitive" "go.mongodb.org/mongo-driver/mongo" "go.mongodb.org/mongo-driver/mongo/options" "golang.org/x/sync/errgroup" + "github.com/percona/percona-backup-mongodb/internal/config" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/slicer" + "github.com/percona/percona-backup-mongodb/internal/storage" + "github.com/percona/percona-backup-mongodb/internal/util" "github.com/percona/percona-backup-mongodb/pbm" - "github.com/percona/percona-backup-mongodb/pbm/backup" - "github.com/percona/percona-backup-mongodb/pbm/compress" "github.com/percona/percona-backup-mongodb/pbm/oplog" - "github.com/percona/percona-backup-mongodb/pbm/pitr" ) var logger = log.New(os.Stdout, "", log.Ltime) @@ -78,13 +80,13 @@ const ( func connTopo(ctx context.Context, uri string) (topo, error) { m, err := connect(ctx, uri) if err != nil { - return topoUnknown, errors.WithMessage(err, "connect") + return topoUnknown, errors.Wrap(err, "connect") } defer m.Disconnect(context.Background()) // nolint:errcheck r, err := sayHello(ctx, m) if err != nil { - return topoUnknown, errors.WithMessage(err, "getShortHello") + return topoUnknown, errors.Wrap(err, "getShortHello") } switch { @@ -106,11 +108,11 @@ func parseTS(t string) (primitive.Timestamp, error) { if tt, ii, ok := strings.Cut(t, ","); ok { t, err := strconv.ParseUint(tt, 10, 32) if err != nil { - return ts, errors.WithMessage(err, "parse clusterTime T") + return ts, errors.Wrap(err, "parse clusterTime T") } i, err := strconv.ParseUint(ii, 10, 32) if err != nil { - return ts, errors.WithMessage(err, "parse clusterTime I") + return ts, errors.Wrap(err, "parse clusterTime I") } ts.T = uint32(t) @@ -133,7 +135,7 @@ func parseTS(t string) (primitive.Timestamp, error) { } if err != nil { - return ts, errors.WithMessage(err, "parse date") + return ts, errors.Wrap(err, "parse date") } ts.T = uint32(tsto.Unix()) @@ -147,12 +149,12 @@ func connect(ctx context.Context, uri string) (*mongo.Client, error) { m, err := mongo.Connect(ctx, options.Client().ApplyURI(uri)) if err != nil { - return nil, errors.WithMessage(err, "connect") + return nil, errors.Wrap(err, "connect") } if err = m.Ping(ctx, nil); err != nil { m.Disconnect(context.Background()) // nolint:errcheck - return nil, errors.WithMessage(err, "ping") + return nil, errors.Wrap(err, "ping") } return m, nil @@ -166,12 +168,12 @@ type hello struct { func sayHello(ctx context.Context, m *mongo.Client) (*hello, error) { res := m.Database("admin").RunCommand(ctx, bson.D{{"hello", 1}}) if err := res.Err(); err != nil { - return nil, errors.WithMessage(err, "query") + return nil, errors.Wrap(err, "query") } var r *hello err := res.Decode(&r) - return r, errors.WithMessage(err, "decode") + return r, errors.Wrap(err, "decode") } func ensureClusterOplog(ctx context.Context, uri string, from, till primitive.Timestamp) error { @@ -180,18 +182,18 @@ func ensureClusterOplog(ctx context.Context, uri string, from, till primitive.Ti m, err := connect(ctx, uri) if err != nil { - return errors.WithMessage(err, "connect") + return errors.Wrap(err, "connect") } defer m.Disconnect(context.Background()) // nolint:errcheck res := m.Database("admin").RunCommand(ctx, bson.D{{"getShardMap", 1}}) if err := res.Err(); err != nil { - return errors.WithMessage(err, "getShardMap: query") + return errors.Wrap(err, "getShardMap: query") } var r struct{ ConnStrings map[string]string } if err := res.Decode(&r); err != nil { - return errors.WithMessage(err, "getShardMap: decode") + return errors.Wrap(err, "getShardMap: decode") } eg, gc := errgroup.WithContext(ctx) @@ -200,7 +202,7 @@ func ensureClusterOplog(ctx context.Context, uri string, from, till primitive.Ti eg.Go(func() error { err := ensureReplsetOplog(gc, rsURI, from, till) - return errors.WithMessagef(err, "[%s] ensure oplog", id) + return errors.Wrapf(err, "[%s] ensure oplog", id) }) } @@ -220,12 +222,12 @@ func ensureReplsetOplog(ctx context.Context, uri string, from, till primitive.Ti m, err := connect(ctx, uri) if err != nil { - return errors.WithMessage(err, "connect") + return errors.Wrap(err, "connect") } info, err := sayHello(ctx, m) if err != nil { - return errors.WithMessage(err, "get node info") + return errors.Wrap(err, "get node info") } if info.SetName == "" { return errors.New("cannot ensure oplog in standalone mode") @@ -233,12 +235,12 @@ func ensureReplsetOplog(ctx context.Context, uri string, from, till primitive.Ti firstOpT, err := findPreviousOplogTS(ctx, m, from) if err != nil { - return errors.WithMessage(err, "lookup first oplog record") + return errors.Wrap(err, "lookup first oplog record") } lastOpT, err := findFollowingOplogTS(ctx, m, till) if err != nil { - return errors.WithMessage(err, "lookup first oplog record") + return errors.Wrap(err, "lookup first oplog record") } logger.Printf("[%s] ensuring replset oplog (actual): %s - %s", @@ -246,12 +248,12 @@ func ensureReplsetOplog(ctx context.Context, uri string, from, till primitive.Ti pbmC, err := pbm.New(ctx, uri, "ensure-oplog") if err != nil { - return errors.WithMessage(err, "connect to PBM") + return errors.Wrap(err, "connect to PBM") } - chunks, err := pbmC.PITRGetChunksSlice(info.SetName, firstOpT, lastOpT) + chunks, err := oplog.PITRGetChunksSlice(ctx, pbmC.Conn, info.SetName, firstOpT, lastOpT) if err != nil { - return errors.WithMessage(err, "get chunks") + return errors.Wrap(err, "get chunks") } missedChunks := findChunkRanges(chunks, firstOpT, lastOpT) @@ -261,36 +263,37 @@ func ensureReplsetOplog(ctx context.Context, uri string, from, till primitive.Ti return nil } - cfg, err := pbmC.GetConfig() + cfg, err := config.GetConfig(ctx, pbmC.Conn) if err != nil { - return errors.WithMessage(err, "get config") + return errors.Wrap(err, "get config") } - stg, err := pbmC.GetStorage(nil) + stg, err := util.StorageFromConfig(cfg, + pbmC.Logger().NewEvent("", "", "", primitive.Timestamp{})) if err != nil { - return errors.WithMessage(err, "get storage") + return errors.Wrap(err, "get storage") } - compression := compress.CompressionType(cfg.PITR.Compression) + compression := defs.CompressionType(cfg.PITR.Compression) for _, t := range missedChunks { logger.Printf("[%s] ensure missed chunk: %s - %s", uri, formatTimestamp(t.from), formatTimestamp(t.till)) - filename := pitr.ChunkName(info.SetName, t.from, t.till, compression) + filename := slicer.ChunkName(info.SetName, t.from, t.till, compression) o := oplog.NewOplogBackup(m) o.SetTailingSpan(t.from, t.till) - n, err := backup.Upload(ctx, o, stg, compression, cfg.PITR.CompressionLevel, filename, -1) + n, err := storage.Upload(ctx, o, stg, compression, cfg.PITR.CompressionLevel, filename, -1) if err != nil { - return errors.WithMessagef(err, "failed to upload %s - %s chunk", + return errors.Wrapf(err, "failed to upload %s - %s chunk", formatTimestamp(t.from), formatTimestamp(t.till)) } logger.Printf("[%s] uploaded chunk: %s - %s (%d bytes)", uri, formatTimestamp(t.from), formatTimestamp(t.till), n) - meta := pbm.OplogChunk{ + meta := oplog.OplogChunk{ RS: info.SetName, FName: filename, Compression: compression, @@ -298,8 +301,8 @@ func ensureReplsetOplog(ctx context.Context, uri string, from, till primitive.Ti EndTS: t.till, } - if err := pbmC.PITRAddChunk(meta); err != nil { - return errors.WithMessagef(err, "failed to save %s - %s chunk meta", + if err := oplog.PITRAddChunk(ctx, pbmC.Conn, meta); err != nil { + return errors.Wrapf(err, "failed to save %s - %s chunk meta", formatTimestamp(t.from), formatTimestamp(t.till)) } @@ -340,7 +343,7 @@ func findOplogTSHelper(res *mongo.SingleResult) (primitive.Timestamp, error) { var v struct{ TS primitive.Timestamp } if err := res.Decode(&v); err != nil { - return primitive.Timestamp{}, errors.WithMessage(err, "decode") + return primitive.Timestamp{}, errors.Wrap(err, "decode") } return v.TS, nil @@ -350,7 +353,7 @@ type timerange struct { from, till primitive.Timestamp } -func findChunkRanges(rs []pbm.OplogChunk, from, till primitive.Timestamp) []timerange { +func findChunkRanges(rs []oplog.OplogChunk, from, till primitive.Timestamp) []timerange { if len(rs) == 0 { return []timerange{{from, till}} } @@ -358,23 +361,23 @@ func findChunkRanges(rs []pbm.OplogChunk, from, till primitive.Timestamp) []time rv := []timerange{} c := rs[0] - if primitive.CompareTimestamp(from, c.StartTS) == -1 { + if from.Compare(c.StartTS) == -1 { rv = append(rv, timerange{from, c.StartTS}) } endTS := c.EndTS for _, c = range rs[1:] { - if primitive.CompareTimestamp(endTS, c.StartTS) == -1 { + if endTS.Compare(c.StartTS) == -1 { rv = append(rv, timerange{endTS, c.StartTS}) } - if primitive.CompareTimestamp(till, c.EndTS) != 1 { + if till.Compare(c.EndTS) != 1 { return rv } endTS = c.EndTS } - if primitive.CompareTimestamp(endTS, till) == -1 { + if endTS.Compare(till) == -1 { rv = append(rv, timerange{endTS, till}) } diff --git a/e2e-tests/cmd/pbm-test/run.go b/e2e-tests/cmd/pbm-test/run.go index 16f7d06ca..236bcf56c 100644 --- a/e2e-tests/cmd/pbm-test/run.go +++ b/e2e-tests/cmd/pbm-test/run.go @@ -11,7 +11,8 @@ import ( "golang.org/x/mod/semver" "github.com/percona/percona-backup-mongodb/e2e-tests/pkg/tests/sharded" - "github.com/percona/percona-backup-mongodb/pbm" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" ) func run(t *sharded.Cluster, typ testTyp) { @@ -49,13 +50,13 @@ func run(t *sharded.Cluster, typ testTyp) { storage = stg.conf - t.ApplyConfig(storage) + t.ApplyConfig(context.TODO(), storage) flush(t) t.SetBallastData(1e5) runTest("Logical Backup & Restore "+stg.name, - func() { t.BackupAndRestore(pbm.LogicalBackup) }) + func() { t.BackupAndRestore(defs.LogicalBackup) }) runTest("Logical PITR & Restore "+stg.name, t.PITRbasic) @@ -85,7 +86,7 @@ func run(t *sharded.Cluster, typ testTyp) { t.LeaderLag) runTest("Logical Backup Data Bounds Check", - func() { t.BackupBoundsCheck(pbm.LogicalBackup, cVersion) }) + func() { t.BackupBoundsCheck(defs.LogicalBackup, cVersion) }) if typ == testsSharded { t.SetBallastData(1e6) @@ -105,29 +106,25 @@ func run(t *sharded.Cluster, typ testTyp) { runTest("Restart agents during the backup", t.RestartAgents) - if semver.Compare(cVersion, "v4.2") >= 0 { - runTest("Distributed Transactions backup", - t.DistributedTrxSnapshot) + runTest("Distributed Transactions backup", + t.DistributedTrxSnapshot) - runTest("Distributed Transactions PITR", - t.DistributedTrxPITR) - } + runTest("Distributed Transactions PITR", + t.DistributedTrxPITR) - if semver.Compare(cVersion, "v4.4") >= 0 { - disttxnconf := "/etc/pbm/fs-disttxn-4x.yaml" - tsTo := primitive.Timestamp{1644410656, 8} + disttxnconf := "/etc/pbm/fs-disttxn-4x.yaml" + tsTo := primitive.Timestamp{1644410656, 8} - if semver.Compare(cVersion, "v5.0") >= 0 { - disttxnconf = "/etc/pbm/fs-disttxn-50.yaml" - tsTo = primitive.Timestamp{1644243375, 7} - } + if semver.Compare(cVersion, "v5.0") >= 0 { + disttxnconf = "/etc/pbm/fs-disttxn-50.yaml" + tsTo = primitive.Timestamp{1644243375, 7} + } - t.ApplyConfig(disttxnconf) - runTest("Distributed Transactions PITR", - func() { t.DistributedCommit(tsTo) }) + t.ApplyConfig(context.TODO(), disttxnconf) + runTest("Distributed Transactions PITR", + func() { t.DistributedCommit(tsTo) }) - t.ApplyConfig(storage) - } + t.ApplyConfig(context.TODO(), storage) } if semver.Compare(cVersion, "v5.0") >= 0 { @@ -143,7 +140,7 @@ func run(t *sharded.Cluster, typ testTyp) { t.SetBallastData(1e5) runTest("Clock Skew Tests", - func() { t.ClockSkew(pbm.LogicalBackup, cVersion) }) + func() { t.ClockSkew(defs.LogicalBackup, cVersion) }) flushStore(t) } @@ -175,7 +172,7 @@ func flushPbm(t *sharded.Cluster) { } func flushStore(t *sharded.Cluster) { - err := t.FlushStorage() + err := t.FlushStorage(context.TODO()) if err != nil { log.Fatalln("Error: unable flush storage:", err) } diff --git a/e2e-tests/cmd/pbm-test/run_physical.go b/e2e-tests/cmd/pbm-test/run_physical.go index 716200d80..783e1e15b 100644 --- a/e2e-tests/cmd/pbm-test/run_physical.go +++ b/e2e-tests/cmd/pbm-test/run_physical.go @@ -4,10 +4,9 @@ import ( "math/rand" "time" - "golang.org/x/mod/semver" - "github.com/percona/percona-backup-mongodb/e2e-tests/pkg/tests/sharded" - "github.com/percona/percona-backup-mongodb/pbm" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" ) func runPhysical(t *sharded.Cluster, typ testTyp) { @@ -39,32 +38,30 @@ func runPhysical(t *sharded.Cluster, typ testTyp) { continue } - t.ApplyConfig(stg.conf) + t.ApplyConfig(context.TODO(), stg.conf) flush(t) t.SetBallastData(1e5) runTest("Physical Backup & Restore "+stg.name, - func() { t.BackupAndRestore(pbm.PhysicalBackup) }) + func() { t.BackupAndRestore(defs.PhysicalBackup) }) flushStore(t) } runTest("Physical Backup Data Bounds Check", - func() { t.BackupBoundsCheck(pbm.PhysicalBackup, cVersion) }) + func() { t.BackupBoundsCheck(defs.PhysicalBackup, cVersion) }) runTest("Incremental Backup & Restore ", func() { t.IncrementalBackup(cVersion) }) if typ == testsSharded { - if semver.Compare(cVersion, "v4.2") >= 0 { - runTest("Physical Distributed Transactions backup", - t.DistributedTrxPhysical) - } + runTest("Physical Distributed Transactions backup", + t.DistributedTrxPhysical) } runTest("Clock Skew Tests", - func() { t.ClockSkew(pbm.PhysicalBackup, cVersion) }) + func() { t.ClockSkew(defs.PhysicalBackup, cVersion) }) flushStore(t) } diff --git a/e2e-tests/cmd/pbm-test/run_remapping.go b/e2e-tests/cmd/pbm-test/run_remapping.go index 3245f8c62..cf1b69065 100644 --- a/e2e-tests/cmd/pbm-test/run_remapping.go +++ b/e2e-tests/cmd/pbm-test/run_remapping.go @@ -1,8 +1,10 @@ package main import ( + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/e2e-tests/pkg/tests/sharded" - "github.com/percona/percona-backup-mongodb/pbm" + "github.com/percona/percona-backup-mongodb/internal/defs" ) func runRemappingTests(t *sharded.RemappingEnvironment) { @@ -11,15 +13,15 @@ func runRemappingTests(t *sharded.RemappingEnvironment) { return } - t.Donor.ApplyConfig(storage) + t.Donor.ApplyConfig(context.TODO(), storage) flush(t.Donor) - t.Recipient.ApplyConfig(storage) + t.Recipient.ApplyConfig(context.TODO(), storage) flush(t.Recipient) t.Donor.SetBallastData(1e4) runTest("Logical Backup & Restore with remapping Minio", - func() { t.BackupAndRestore(pbm.LogicalBackup) }) + func() { t.BackupAndRestore(defs.LogicalBackup) }) flushStore(t.Recipient) } diff --git a/e2e-tests/docker/docker-compose-remapping.yaml b/e2e-tests/docker/docker-compose-remapping.yaml index d9fd13c68..9b8e72a26 100644 --- a/e2e-tests/docker/docker-compose-remapping.yaml +++ b/e2e-tests/docker/docker-compose-remapping.yaml @@ -2,7 +2,7 @@ version: "3.4" services: tests: build: - dockerfile: ./e2e-tests/Dockerfile + dockerfile: ./e2e-tests/docker/tests.dockerfile context: ../.. args: - TESTS_BCP_TYPE=logical @@ -22,10 +22,10 @@ services: rs101: build: - dockerfile: ./e2e-tests/docker/mongodb-rs/Dockerfile + dockerfile: ./e2e-tests/docker/mongodb.dockerfile context: ../.. args: - - MONGODB_VERSION=${MONGODB_VERSION:-4.2} + - MONGODB_VERSION=${MONGODB_VERSION:-4.4} - MONGODB_IMAGE=${MONGODB_IMAGE:-percona/percona-server-mongodb} hostname: rs101 labels: @@ -52,10 +52,10 @@ services: build: labels: - "com.percona.pbm.app=agent" - dockerfile: ./e2e-tests/docker/pbm-agent/Dockerfile + dockerfile: ./e2e-tests/docker/pbm.dockerfile context: ../.. args: - - MONGODB_VERSION=${MONGODB_VERSION:-4.2} + - MONGODB_VERSION=${MONGODB_VERSION:-4.4} - MONGODB_IMAGE=${MONGODB_IMAGE:-percona/percona-server-mongodb} volumes: - ./conf:/etc/pbm @@ -67,10 +67,10 @@ services: rs201: build: - dockerfile: ./e2e-tests/docker/mongodb-rs/Dockerfile + dockerfile: ./e2e-tests/docker/mongodb.dockerfile context: ../.. args: - - MONGODB_VERSION=${MONGODB_VERSION:-4.2} + - MONGODB_VERSION=${MONGODB_VERSION:-4.4} - MONGODB_IMAGE=${MONGODB_IMAGE:-percona/percona-server-mongodb} hostname: rs201 labels: @@ -97,10 +97,10 @@ services: build: labels: - "com.percona.pbm.app=agent" - dockerfile: ./e2e-tests/docker/pbm-agent/Dockerfile + dockerfile: ./e2e-tests/docker/pbm.dockerfile context: ../.. args: - - MONGODB_VERSION=${MONGODB_VERSION:-4.2} + - MONGODB_VERSION=${MONGODB_VERSION:-4.4} - MONGODB_IMAGE=${MONGODB_IMAGE:-percona/percona-server-mongodb} volumes: - ./conf:/etc/pbm diff --git a/e2e-tests/docker/docker-compose-rs.yaml b/e2e-tests/docker/docker-compose-rs.yaml index 3fa3d90c3..8790e34ed 100644 --- a/e2e-tests/docker/docker-compose-rs.yaml +++ b/e2e-tests/docker/docker-compose-rs.yaml @@ -2,7 +2,7 @@ version: "3.4" services: tests: build: - dockerfile: ./e2e-tests/Dockerfile + dockerfile: ./e2e-tests/docker/tests.dockerfile context: ../.. args: - TESTS_BCP_TYPE=${TESTS_BCP_TYPE} @@ -26,10 +26,10 @@ services: environment: - "PBM_MONGODB_URI=mongodb://${BACKUP_USER:-bcp}:${MONGO_PASS:-test1234}@rs101:27017" build: - dockerfile: ./e2e-tests/docker/pbm-agent/Dockerfile + dockerfile: ./e2e-tests/docker/pbm.dockerfile context: ../.. args: - - MONGODB_VERSION=${MONGODB_VERSION:-4.2} + - MONGODB_VERSION=${MONGODB_VERSION:-4.4} - MONGODB_IMAGE=${MONGODB_IMAGE:-percona/percona-server-mongodb} volumes: - ./conf:/etc/pbm @@ -39,10 +39,10 @@ services: rs101: build: - dockerfile: ./e2e-tests/docker/mongodb-rs/Dockerfile + dockerfile: ./e2e-tests/docker/mongodb.dockerfile context: ../.. args: - - MONGODB_VERSION=${MONGODB_VERSION:-4.2} + - MONGODB_VERSION=${MONGODB_VERSION:-4.4} - MONGODB_IMAGE=${MONGODB_IMAGE:-percona/percona-server-mongodb} hostname: rs101 labels: @@ -52,17 +52,17 @@ services: - MONGO_USER=dba - BACKUP_USER=bcp - MONGO_PASS=test1234 - - MONGODB_VERSION=${MONGODB_VERSION:-4.2} + - MONGODB_VERSION=${MONGODB_VERSION:-4.4} command: mongod --replSet rs1 --directoryperdb --port 27017 --dbpath=/data/db/ --storageEngine wiredTiger --keyFile /opt/keyFile --wiredTigerCacheSizeGB 1 volumes: - data-rs101:/data/db - ./scripts/start.sh:/opt/start.sh rs102: build: - dockerfile: ./e2e-tests/docker/mongodb-rs/Dockerfile + dockerfile: ./e2e-tests/docker/mongodb.dockerfile context: ../.. args: - - MONGODB_VERSION=${MONGODB_VERSION:-4.2} + - MONGODB_VERSION=${MONGODB_VERSION:-4.4} - MONGODB_IMAGE=${MONGODB_IMAGE:-percona/percona-server-mongodb} hostname: rs102 labels: @@ -72,10 +72,10 @@ services: - data-rs102:/data/db rs103: build: - dockerfile: ./e2e-tests/docker/mongodb-rs/Dockerfile + dockerfile: ./e2e-tests/docker/mongodb.dockerfile context: ../.. args: - - MONGODB_VERSION=${MONGODB_VERSION:-4.2} + - MONGODB_VERSION=${MONGODB_VERSION:-4.4} - MONGODB_IMAGE=${MONGODB_IMAGE:-percona/percona-server-mongodb} hostname: rs103 labels: @@ -96,10 +96,10 @@ services: build: labels: - "com.percona.pbm.app=agent" - dockerfile: ./e2e-tests/docker/pbm-agent/Dockerfile + dockerfile: ./e2e-tests/docker/pbm.dockerfile context: ../.. args: - - MONGODB_VERSION=${MONGODB_VERSION:-4.2} + - MONGODB_VERSION=${MONGODB_VERSION:-4.4} - MONGODB_IMAGE=${MONGODB_IMAGE:-percona/percona-server-mongodb} volumes: - ./conf:/etc/pbm @@ -121,10 +121,10 @@ services: build: labels: - "com.percona.pbm.app=agent" - dockerfile: ./e2e-tests/docker/pbm-agent/Dockerfile + dockerfile: ./e2e-tests/docker/pbm.dockerfile context: ../.. args: - - MONGODB_VERSION=${MONGODB_VERSION:-4.2} + - MONGODB_VERSION=${MONGODB_VERSION:-4.4} - MONGODB_IMAGE=${MONGODB_IMAGE:-percona/percona-server-mongodb} command: pbm-agent cap_add: @@ -145,10 +145,10 @@ services: build: labels: - "com.percona.pbm.app=agent" - dockerfile: ./e2e-tests/docker/pbm-agent/Dockerfile + dockerfile: ./e2e-tests/docker/pbm.dockerfile context: ../.. args: - - MONGODB_VERSION=${MONGODB_VERSION:-4.2} + - MONGODB_VERSION=${MONGODB_VERSION:-4.4} - MONGODB_IMAGE=${MONGODB_IMAGE:-percona/percona-server-mongodb} command: pbm-agent cap_add: diff --git a/e2e-tests/docker/docker-compose-single.yaml b/e2e-tests/docker/docker-compose-single.yaml index 4580bce34..86bf9e628 100644 --- a/e2e-tests/docker/docker-compose-single.yaml +++ b/e2e-tests/docker/docker-compose-single.yaml @@ -2,7 +2,7 @@ version: "3.4" services: tests: build: - dockerfile: ./e2e-tests/Dockerfile + dockerfile: ./e2e-tests/docker/tests.dockerfile context: ../.. args: - TESTS_BCP_TYPE=${TESTS_BCP_TYPE} @@ -22,10 +22,10 @@ services: rs101: build: - dockerfile: ./e2e-tests/docker/mongodb-rs/Dockerfile + dockerfile: ./e2e-tests/docker/mongodb.dockerfile context: ../.. args: - - MONGODB_VERSION=${MONGODB_VERSION:-4.2} + - MONGODB_VERSION=${MONGODB_VERSION:-4.4} - MONGODB_IMAGE=${MONGODB_IMAGE:-percona/percona-server-mongodb} hostname: rs101 labels: @@ -36,7 +36,7 @@ services: - BACKUP_USER=bcp - MONGO_PASS=test1234 - SINGLE_NODE=true - - MONGODB_VERSION=${MONGODB_VERSION:-4.2} + - MONGODB_VERSION=${MONGODB_VERSION:-4.4} command: mongod --replSet rs1 --port 27017 --storageEngine wiredTiger --keyFile /opt/keyFile --wiredTigerCacheSizeGB 1 volumes: - data-rs101:/data/db @@ -53,10 +53,10 @@ services: build: labels: - "com.percona.pbm.app=agent" - dockerfile: ./e2e-tests/docker/pbm-agent/Dockerfile + dockerfile: ./e2e-tests/docker/pbm.dockerfile context: ../.. args: - - MONGODB_VERSION=${MONGODB_VERSION:-4.2} + - MONGODB_VERSION=${MONGODB_VERSION:-4.4} - MONGODB_IMAGE=${MONGODB_IMAGE:-percona/percona-server-mongodb} volumes: - ./conf:/etc/pbm diff --git a/e2e-tests/docker/docker-compose.yaml b/e2e-tests/docker/docker-compose.yaml index 80d56dd44..26b929e3e 100644 --- a/e2e-tests/docker/docker-compose.yaml +++ b/e2e-tests/docker/docker-compose.yaml @@ -2,7 +2,7 @@ version: "3.4" services: tests: build: - dockerfile: ./e2e-tests/Dockerfile + dockerfile: ./e2e-tests/docker/tests.dockerfile context: ../.. args: - TESTS_BCP_TYPE=${TESTS_BCP_TYPE} @@ -28,10 +28,10 @@ services: environment: - "PBM_MONGODB_URI=mongodb://${BACKUP_USER:-bcp}:${MONGO_PASS:-test1234}@rs101:27017/?authSource=admin" build: - dockerfile: ./e2e-tests/docker/pbm-agent/Dockerfile + dockerfile: ./e2e-tests/docker/pbm.dockerfile context: ../.. args: - - MONGODB_VERSION=${MONGODB_VERSION:-4.2} + - MONGODB_VERSION=${MONGODB_VERSION:-4.4} - MONGODB_IMAGE=${MONGODB_IMAGE:-percona/percona-server-mongodb} volumes: - ./conf:/etc/pbm @@ -41,10 +41,10 @@ services: cfg01: build: - dockerfile: ./e2e-tests/docker/mongodb-rs/Dockerfile + dockerfile: ./e2e-tests/docker/mongodb.dockerfile context: ../.. args: - - MONGODB_VERSION=${MONGODB_VERSION:-4.2} + - MONGODB_VERSION=${MONGODB_VERSION:-4.4} - MONGODB_IMAGE=${MONGODB_IMAGE:-percona/percona-server-mongodb} hostname: cfg01 labels: @@ -55,17 +55,17 @@ services: - MONGO_USER=dba - BACKUP_USER=bcp - MONGO_PASS=test1234 - - MONGODB_VERSION=${MONGODB_VERSION:-4.2} + - MONGODB_VERSION=${MONGODB_VERSION:-4.4} command: mongod --configsvr --dbpath /data/db --replSet cfg --bind_ip_all --port 27017 --keyFile /opt/keyFile --storageEngine wiredTiger --wiredTigerCacheSizeGB 1 volumes: - ./scripts/start.sh:/opt/start.sh - data-cfg01:/data/db cfg02: build: - dockerfile: ./e2e-tests/docker/mongodb-rs/Dockerfile + dockerfile: ./e2e-tests/docker/mongodb.dockerfile context: ../.. args: - - MONGODB_VERSION=${MONGODB_VERSION:-4.2} + - MONGODB_VERSION=${MONGODB_VERSION:-4.4} - MONGODB_IMAGE=${MONGODB_IMAGE:-percona/percona-server-mongodb} hostname: cfg02 labels: @@ -75,10 +75,10 @@ services: - data-cfg02:/data/db cfg03: build: - dockerfile: ./e2e-tests/docker/mongodb-rs/Dockerfile + dockerfile: ./e2e-tests/docker/mongodb.dockerfile context: ../.. args: - - MONGODB_VERSION=${MONGODB_VERSION:-4.2} + - MONGODB_VERSION=${MONGODB_VERSION:-4.4} - MONGODB_IMAGE=${MONGODB_IMAGE:-percona/percona-server-mongodb} hostname: cfg03 labels: @@ -97,10 +97,10 @@ services: build: labels: - "com.percona.pbm.app=agent" - dockerfile: ./e2e-tests/docker/pbm-agent/Dockerfile + dockerfile: ./e2e-tests/docker/pbm.dockerfile context: ../.. args: - - MONGODB_VERSION=${MONGODB_VERSION:-4.2} + - MONGODB_VERSION=${MONGODB_VERSION:-4.4} - MONGODB_IMAGE=${MONGODB_IMAGE:-percona/percona-server-mongodb} command: pbm-agent cap_add: @@ -119,10 +119,10 @@ services: build: labels: - "com.percona.pbm.app=agent" - dockerfile: ./e2e-tests/docker/pbm-agent/Dockerfile + dockerfile: ./e2e-tests/docker/pbm.dockerfile context: ../.. args: - - MONGODB_VERSION=${MONGODB_VERSION:-4.2} + - MONGODB_VERSION=${MONGODB_VERSION:-4.4} - MONGODB_IMAGE=${MONGODB_IMAGE:-percona/percona-server-mongodb} command: pbm-agent cap_add: @@ -141,10 +141,10 @@ services: build: labels: - "com.percona.pbm.app=agent" - dockerfile: ./e2e-tests/docker/pbm-agent/Dockerfile + dockerfile: ./e2e-tests/docker/pbm.dockerfile context: ../.. args: - - MONGODB_VERSION=${MONGODB_VERSION:-4.2} + - MONGODB_VERSION=${MONGODB_VERSION:-4.4} - MONGODB_IMAGE=${MONGODB_IMAGE:-percona/percona-server-mongodb} command: pbm-agent cap_add: @@ -155,10 +155,10 @@ services: rs101: build: - dockerfile: ./e2e-tests/docker/mongodb-rs/Dockerfile + dockerfile: ./e2e-tests/docker/mongodb.dockerfile context: ../.. args: - - MONGODB_VERSION=${MONGODB_VERSION:-4.2} + - MONGODB_VERSION=${MONGODB_VERSION:-4.4} - MONGODB_IMAGE=${MONGODB_IMAGE:-percona/percona-server-mongodb} hostname: rs101 labels: @@ -175,10 +175,10 @@ services: - ./scripts/start.sh:/opt/start.sh rs102: build: - dockerfile: ./e2e-tests/docker/mongodb-rs/Dockerfile + dockerfile: ./e2e-tests/docker/mongodb.dockerfile context: ../.. args: - - MONGODB_VERSION=${MONGODB_VERSION:-4.2} + - MONGODB_VERSION=${MONGODB_VERSION:-4.4} - MONGODB_IMAGE=${MONGODB_IMAGE:-percona/percona-server-mongodb} hostname: rs102 labels: @@ -188,10 +188,10 @@ services: - data-rs102:/data/db rs103: build: - dockerfile: ./e2e-tests/docker/mongodb-rs/Dockerfile + dockerfile: ./e2e-tests/docker/mongodb.dockerfile context: ../.. args: - - MONGODB_VERSION=${MONGODB_VERSION:-4.2} + - MONGODB_VERSION=${MONGODB_VERSION:-4.4} - MONGODB_IMAGE=${MONGODB_IMAGE:-percona/percona-server-mongodb} hostname: rs103 labels: @@ -210,10 +210,10 @@ services: build: labels: - "com.percona.pbm.app=agent" - dockerfile: ./e2e-tests/docker/pbm-agent/Dockerfile + dockerfile: ./e2e-tests/docker/pbm.dockerfile context: ../.. args: - - MONGODB_VERSION=${MONGODB_VERSION:-4.2} + - MONGODB_VERSION=${MONGODB_VERSION:-4.4} - MONGODB_IMAGE=${MONGODB_IMAGE:-percona/percona-server-mongodb} volumes: - ./conf:/etc/pbm @@ -233,10 +233,10 @@ services: build: labels: - "com.percona.pbm.app=agent" - dockerfile: ./e2e-tests/docker/pbm-agent/Dockerfile + dockerfile: ./e2e-tests/docker/pbm.dockerfile context: ../.. args: - - MONGODB_VERSION=${MONGODB_VERSION:-4.2} + - MONGODB_VERSION=${MONGODB_VERSION:-4.4} - MONGODB_IMAGE=${MONGODB_IMAGE:-percona/percona-server-mongodb} command: pbm-agent cap_add: @@ -255,10 +255,10 @@ services: build: labels: - "com.percona.pbm.app=agent" - dockerfile: ./e2e-tests/docker/pbm-agent/Dockerfile + dockerfile: ./e2e-tests/docker/pbm.dockerfile context: ../.. args: - - MONGODB_VERSION=${MONGODB_VERSION:-4.2} + - MONGODB_VERSION=${MONGODB_VERSION:-4.4} - MONGODB_IMAGE=${MONGODB_IMAGE:-percona/percona-server-mongodb} command: pbm-agent cap_add: @@ -269,10 +269,10 @@ services: rs201: build: - dockerfile: ./e2e-tests/docker/mongodb-rs/Dockerfile + dockerfile: ./e2e-tests/docker/mongodb.dockerfile context: ../.. args: - - MONGODB_VERSION=${MONGODB_VERSION:-4.2} + - MONGODB_VERSION=${MONGODB_VERSION:-4.4} - MONGODB_IMAGE=${MONGODB_IMAGE:-percona/percona-server-mongodb} hostname: rs201 labels: @@ -289,10 +289,10 @@ services: - ./scripts/start.sh:/opt/start.sh rs202: build: - dockerfile: ./e2e-tests/docker/mongodb-rs/Dockerfile + dockerfile: ./e2e-tests/docker/mongodb.dockerfile context: ../.. args: - - MONGODB_VERSION=${MONGODB_VERSION:-4.2} + - MONGODB_VERSION=${MONGODB_VERSION:-4.4} - MONGODB_IMAGE=${MONGODB_IMAGE:-percona/percona-server-mongodb} hostname: rs202 labels: @@ -302,10 +302,10 @@ services: - data-rs202:/data/db rs203: build: - dockerfile: ./e2e-tests/docker/mongodb-rs/Dockerfile + dockerfile: ./e2e-tests/docker/mongodb.dockerfile context: ../.. args: - - MONGODB_VERSION=${MONGODB_VERSION:-4.2} + - MONGODB_VERSION=${MONGODB_VERSION:-4.4} - MONGODB_IMAGE=${MONGODB_IMAGE:-percona/percona-server-mongodb} hostname: rs203 labels: @@ -324,10 +324,10 @@ services: build: labels: - "com.percona.pbm.app=agent" - dockerfile: ./e2e-tests/docker/pbm-agent/Dockerfile + dockerfile: ./e2e-tests/docker/pbm.dockerfile context: ../.. args: - - MONGODB_VERSION=${MONGODB_VERSION:-4.2} + - MONGODB_VERSION=${MONGODB_VERSION:-4.4} - MONGODB_IMAGE=${MONGODB_IMAGE:-percona/percona-server-mongodb} command: pbm-agent cap_add: @@ -346,10 +346,10 @@ services: build: labels: - "com.percona.pbm.app=agent" - dockerfile: ./e2e-tests/docker/pbm-agent/Dockerfile + dockerfile: ./e2e-tests/docker/pbm.dockerfile context: ../.. args: - - MONGODB_VERSION=${MONGODB_VERSION:-4.2} + - MONGODB_VERSION=${MONGODB_VERSION:-4.4} - MONGODB_IMAGE=${MONGODB_IMAGE:-percona/percona-server-mongodb} command: pbm-agent cap_add: @@ -368,10 +368,10 @@ services: build: labels: - "com.percona.pbm.app=agent" - dockerfile: ./e2e-tests/docker/pbm-agent/Dockerfile + dockerfile: ./e2e-tests/docker/pbm.dockerfile context: ../.. args: - - MONGODB_VERSION=${MONGODB_VERSION:-4.2} + - MONGODB_VERSION=${MONGODB_VERSION:-4.4} - MONGODB_IMAGE=${MONGODB_IMAGE:-percona/percona-server-mongodb} command: pbm-agent cap_add: @@ -382,10 +382,10 @@ services: mongos: build: - dockerfile: ./e2e-tests/docker/mongodb-rs/Dockerfile + dockerfile: ./e2e-tests/docker/mongodb.dockerfile context: ../.. args: - - MONGODB_VERSION=${MONGODB_VERSION:-4.2} + - MONGODB_VERSION=${MONGODB_VERSION:-4.4} - MONGODB_IMAGE=${MONGODB_IMAGE:-percona/percona-server-mongodb} hostname: mongos labels: diff --git a/e2e-tests/docker/mongodb-rs/Dockerfile b/e2e-tests/docker/mongodb.dockerfile similarity index 91% rename from e2e-tests/docker/mongodb-rs/Dockerfile rename to e2e-tests/docker/mongodb.dockerfile index 68037c853..3d14eaa24 100644 --- a/e2e-tests/docker/mongodb-rs/Dockerfile +++ b/e2e-tests/docker/mongodb.dockerfile @@ -1,4 +1,4 @@ -ARG MONGODB_VERSION=4.2 +ARG MONGODB_VERSION=4.4 ARG MONGODB_IMAGE=percona/percona-server-mongodb FROM ${MONGODB_IMAGE}:${MONGODB_VERSION} USER root diff --git a/e2e-tests/docker/pbm-agent/Dockerfile b/e2e-tests/docker/pbm-agent/Dockerfile deleted file mode 100644 index 8aa04c8cf..000000000 --- a/e2e-tests/docker/pbm-agent/Dockerfile +++ /dev/null @@ -1,36 +0,0 @@ -ARG MONGODB_VERSION=4.2 -ARG MONGODB_IMAGE=percona/percona-server-mongodb - -FROM golang:1.19-bullseye as goimg - -FROM ${MONGODB_IMAGE}:${MONGODB_VERSION} - -USER 0 - -RUN dnf config-manager --enable ol8_u4_security_validation; \ - dnf config-manager --enable ol8_codeready_builder; \ - dnf -y install epel-release; \ - dnf -y update; \ - dnf -y install \ - libfaketime \ - iproute \ - krb5-devel \ - vim \ - make \ - gcc \ - nload \ - htop; \ - dnf -y install tc - -COPY --from=goimg /usr/local/go /usr/local/go -ENV PATH=$PATH:/usr/local/go/bin -ENV GOPATH /go -ENV PATH $GOPATH/bin:$PATH -RUN mkdir -p "$GOPATH/src" "$GOPATH/bin" && chmod -R 777 "$GOPATH" -RUN mkdir /opt/backups - -WORKDIR /opt/pbm -COPY . . -RUN make install-tests - -USER 1001 diff --git a/e2e-tests/docker/pbm.dockerfile b/e2e-tests/docker/pbm.dockerfile new file mode 100644 index 000000000..109ddd46b --- /dev/null +++ b/e2e-tests/docker/pbm.dockerfile @@ -0,0 +1,18 @@ +ARG MONGODB_VERSION=4.4 +ARG MONGODB_IMAGE=percona/percona-server-mongodb + +FROM ${MONGODB_IMAGE}:${MONGODB_VERSION} as mongo_image + +FROM oraclelinux:8 as base-build +WORKDIR /build + +RUN mkdir -p /data/db + +COPY --from=mongo_image /bin/mongod /bin/ + +RUN dnf update && dnf install make golang tc + +FROM base-build +COPY . . + +RUN make build-tests && cp /build/bin/* /bin/ diff --git a/e2e-tests/docker/tests.dockerfile b/e2e-tests/docker/tests.dockerfile new file mode 100644 index 000000000..756cd6020 --- /dev/null +++ b/e2e-tests/docker/tests.dockerfile @@ -0,0 +1,12 @@ +FROM oraclelinux:8 AS base-build +WORKDIR /build +RUN dnf update && dnf install golang + +FROM pbm:build-base AS pbm-build +ARG TESTS_BCP_TYPE +ENV TESTS_BCP_TYPE=${TESTS_BCP_TYPE} + +COPY . . +RUN go build -o /bin/pbm-test ./e2e-tests/cmd/pbm-test + +CMD ["pbm-test"] diff --git a/e2e-tests/functions b/e2e-tests/functions index 47efd0a36..5685edfdd 100644 --- a/e2e-tests/functions +++ b/e2e-tests/functions @@ -168,7 +168,7 @@ start_cluster() { mkdir "${test_dir}/docker/backups" chmod -R 777 "${test_dir}/docker/backups" fi - export MONGODB_VERSION=${mongo_version:-"4.2"} + export MONGODB_VERSION=${mongo_version:-"4.4"} export MONGODB_IMAGE=${MONGODB_IMAGE:-"percona/percona-server-mongodb"} docker-compose -f $COMPOSE_PATH up --quiet-pull --no-color -d \ cfg01 cfg02 cfg03 rs101 rs102 rs103 rs201 rs202 rs203 mongos minio createbucket @@ -209,7 +209,7 @@ start_replset() { chmod -R 777 "${test_dir}/docker/backups" fi - export MONGODB_VERSION=${mongo_version:-"4.2"} + export MONGODB_VERSION=${mongo_version:-"4.4"} export MONGODB_IMAGE=${MONGODB_IMAGE:-"percona/percona-server-mongodb"} docker-compose -f $compose up --quiet-pull --no-color -d \ $nodes diff --git a/e2e-tests/pkg/pbm/clock_skew.go b/e2e-tests/pkg/pbm/clock_skew.go index 3f81baf36..c6d474ff8 100644 --- a/e2e-tests/pkg/pbm/clock_skew.go +++ b/e2e-tests/pkg/pbm/clock_skew.go @@ -1,21 +1,26 @@ package pbm import ( - "context" "log" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/docker/docker/api/types" "github.com/docker/docker/api/types/container" "github.com/docker/docker/api/types/filters" "github.com/docker/docker/api/types/network" docker "github.com/docker/docker/client" - "github.com/pkg/errors" + + "github.com/percona/percona-backup-mongodb/internal/errors" ) func ClockSkew(rsName, ts, dockerHost string) error { log.Printf("== Skew the clock for %s on the replicaset %s ", ts, rsName) - cn, err := docker.NewClient(dockerHost, "1.39", nil, nil) + cn, err := docker.NewClientWithOpts( + docker.WithHost(dockerHost), + docker.WithVersion("1.39"), + ) if err != nil { return errors.Wrap(err, "docker client") } diff --git a/e2e-tests/pkg/pbm/docker.go b/e2e-tests/pkg/pbm/docker.go index fb428e44d..0bac13144 100644 --- a/e2e-tests/pkg/pbm/docker.go +++ b/e2e-tests/pkg/pbm/docker.go @@ -1,7 +1,6 @@ package pbm import ( - "context" "io" "log" "strings" @@ -12,7 +11,9 @@ import ( "github.com/docker/docker/api/types/container" "github.com/docker/docker/api/types/filters" docker "github.com/docker/docker/client" - "github.com/pkg/errors" + + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/errors" ) type Docker struct { @@ -21,7 +22,10 @@ type Docker struct { } func NewDocker(ctx context.Context, host string) (*Docker, error) { - cn, err := docker.NewClient(host, "1.39", nil, nil) + cn, err := docker.NewClientWithOpts( + docker.WithHost(host), + docker.WithVersion("1.39"), + ) if err != nil { return nil, errors.Wrap(err, "docker client") } diff --git a/e2e-tests/pkg/pbm/mongo_pbm.go b/e2e-tests/pkg/pbm/mongo_pbm.go index 22e71c908..beb120650 100644 --- a/e2e-tests/pkg/pbm/mongo_pbm.go +++ b/e2e-tests/pkg/pbm/mongo_pbm.go @@ -1,15 +1,23 @@ package pbm import ( - "context" "time" - "github.com/pkg/errors" "go.mongodb.org/mongo-driver/bson/primitive" "go.mongodb.org/mongo-driver/mongo" + "github.com/percona/percona-backup-mongodb/internal/connect" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/lock" + "github.com/percona/percona-backup-mongodb/internal/query" + "github.com/percona/percona-backup-mongodb/internal/resync" + "github.com/percona/percona-backup-mongodb/internal/storage" + "github.com/percona/percona-backup-mongodb/internal/topo" + "github.com/percona/percona-backup-mongodb/internal/types" + "github.com/percona/percona-backup-mongodb/internal/util" "github.com/percona/percona-backup-mongodb/pbm" - "github.com/percona/percona-backup-mongodb/pbm/storage" ) type MongoPBM struct { @@ -31,49 +39,55 @@ func NewMongoPBM(ctx context.Context, connectionURI string) (*MongoPBM, error) { }, nil } -func (m *MongoPBM) SendCmd(cmd pbm.Cmd) error { - return m.p.SendCmd(cmd) +func (m *MongoPBM) SendCmd(cmd types.Cmd) error { + cmd.TS = time.Now().UTC().Unix() + _, err := m.p.Conn.CmdStreamCollection().InsertOne(m.ctx, cmd) + return err } -func (m *MongoPBM) BackupsList(limit int64) ([]pbm.BackupMeta, error) { - return m.p.BackupsList(limit) +func (m *MongoPBM) BackupsList(ctx context.Context, limit int64) ([]types.BackupMeta, error) { + return query.BackupsList(ctx, m.p.Conn, limit) } -func (m *MongoPBM) GetBackupMeta(bcpName string) (*pbm.BackupMeta, error) { - return m.p.GetBackupMeta(bcpName) +func (m *MongoPBM) GetBackupMeta(ctx context.Context, bcpName string) (*types.BackupMeta, error) { + return query.GetBackupMeta(ctx, m.p.Conn, bcpName) } -func (m *MongoPBM) DeleteBackup(bcpName string) error { - return m.p.DeleteBackup(bcpName, m.p.Logger().NewEvent(string(pbm.CmdDeleteBackup), "", "", primitive.Timestamp{})) +func (m *MongoPBM) DeleteBackup(ctx context.Context, bcpName string) error { + l := m.p.Logger().NewEvent(string(defs.CmdDeleteBackup), "", "", primitive.Timestamp{}) + return m.p.DeleteBackup(ctx, bcpName, l) } -func (m *MongoPBM) Storage() (storage.Storage, error) { - return m.p.GetStorage(m.p.Logger().NewEvent("", "", "", primitive.Timestamp{})) +func (m *MongoPBM) Storage(ctx context.Context) (storage.Storage, error) { + l := m.p.Logger().NewEvent("", "", "", primitive.Timestamp{}) + return util.GetStorage(ctx, m.p.Conn, l) } -func (m *MongoPBM) StoreResync() error { - return m.p.ResyncStorage(m.p.Logger().NewEvent(string(pbm.CmdResync), "", "", primitive.Timestamp{})) +func (m *MongoPBM) StoreResync(ctx context.Context) error { + l := m.p.Logger().NewEvent(string(defs.CmdResync), "", "", primitive.Timestamp{}) + return resync.ResyncStorage(ctx, m.p.Conn, l) } -func (m *MongoPBM) Conn() *mongo.Client { +func (m *MongoPBM) Conn() connect.Client { return m.p.Conn } // WaitOp waits up to waitFor duration until operations which acquires a given lock are finished -func (m *MongoPBM) WaitOp(lock *pbm.LockHeader, waitFor time.Duration) error { - return m.waitOp(lock, waitFor, m.p.GetLockData) +func (m *MongoPBM) WaitOp(ctx context.Context, lck *lock.LockHeader, waitFor time.Duration) error { + return m.waitOp(ctx, lck, waitFor, lock.GetLockData) } // WaitConcurentOp waits up to waitFor duration until operations which acquires a given lock are finished -func (m *MongoPBM) WaitConcurentOp(lock *pbm.LockHeader, waitFor time.Duration) error { - return m.waitOp(lock, waitFor, m.p.GetOpLockData) +func (m *MongoPBM) WaitConcurentOp(ctx context.Context, lck *lock.LockHeader, waitFor time.Duration) error { + return m.waitOp(ctx, lck, waitFor, lock.GetOpLockData) } // WaitOp waits up to waitFor duration until operations which acquires a given lock are finished func (m *MongoPBM) waitOp( - lock *pbm.LockHeader, + ctx context.Context, + lck *lock.LockHeader, waitFor time.Duration, - f func(*pbm.LockHeader) (pbm.LockData, error), + f func(ctx context.Context, m connect.Client, lh *lock.LockHeader) (lock.LockData, error), ) error { // just to be sure the check hasn't started before the lock were created time.Sleep(1 * time.Second) @@ -85,7 +99,7 @@ func (m *MongoPBM) waitOp( case <-tmr.C: return errors.Errorf("timeout reached") case <-tkr.C: - lock, err := f(lock) + lock, err := f(ctx, m.p.Conn, lck) if err != nil { // No lock, so operation has finished if errors.Is(err, mongo.ErrNoDocuments) { @@ -93,11 +107,11 @@ func (m *MongoPBM) waitOp( } return errors.Wrap(err, "get lock data") } - clusterTime, err := m.p.ClusterTime() + clusterTime, err := topo.GetClusterTime(ctx, m.p.Conn) if err != nil { return errors.Wrap(err, "read cluster time") } - if lock.Heartbeat.T+pbm.StaleFrameSec < clusterTime.T { + if lock.Heartbeat.T+defs.StaleFrameSec < clusterTime.T { return errors.Errorf("operation stale, last beat ts: %d", lock.Heartbeat.T) } } diff --git a/e2e-tests/pkg/pbm/mongod.go b/e2e-tests/pkg/pbm/mongod.go index d99f914f9..6cee0c751 100644 --- a/e2e-tests/pkg/pbm/mongod.go +++ b/e2e-tests/pkg/pbm/mongod.go @@ -1,12 +1,10 @@ package pbm import ( - "context" "fmt" "math/rand" "time" - "github.com/pkg/errors" "go.mongodb.org/mongo-driver/bson" "go.mongodb.org/mongo-driver/bson/primitive" "go.mongodb.org/mongo-driver/mongo" @@ -15,7 +13,9 @@ import ( "go.mongodb.org/mongo-driver/mongo/readpref" "go.mongodb.org/mongo-driver/mongo/writeconcern" - "github.com/percona/percona-backup-mongodb/pbm" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/topo" ) type Mongo struct { @@ -24,7 +24,7 @@ type Mongo struct { } func NewMongo(ctx context.Context, connectionURI string) (*Mongo, error) { - cn, err := connect(ctx, connectionURI, "e2e-tests") + cn, err := directConnect(ctx, connectionURI, "e2e-tests") if err != nil { return nil, errors.Wrap(err, "connect") } @@ -35,18 +35,13 @@ func NewMongo(ctx context.Context, connectionURI string) (*Mongo, error) { }, nil } -func connect(ctx context.Context, uri, appName string) (*mongo.Client, error) { - client, err := mongo.NewClient( - options.Client().ApplyURI(uri). - SetAppName(appName). - SetReadPreference(readpref.Primary()). - SetReadConcern(readconcern.Majority()). - SetWriteConcern(writeconcern.New(writeconcern.WMajority())), - ) - if err != nil { - return nil, errors.Wrap(err, "create mongo client") - } - err = client.Connect(ctx) +func directConnect(ctx context.Context, uri, appName string) (*mongo.Client, error) { + opts := options.Client().ApplyURI(uri). + SetAppName(appName). + SetReadPreference(readpref.Primary()). + SetReadConcern(readconcern.Majority()). + SetWriteConcern(writeconcern.Majority()) + client, err := mongo.Connect(ctx, opts) if err != nil { return nil, errors.Wrap(err, "mongo connect") } @@ -284,8 +279,8 @@ func (m *Mongo) GetCounters() ([]Counter, error) { return data, nil } -func (m *Mongo) GetNodeInfo() (*pbm.NodeInfo, error) { - inf := &pbm.NodeInfo{} +func (m *Mongo) GetNodeInfo() (*topo.NodeInfo, error) { + inf := &topo.NodeInfo{} err := m.cn.Database("test").RunCommand(m.ctx, bson.M{"isMaster": 1}).Decode(inf) if err != nil { return nil, errors.Wrap(err, "run mongo command") diff --git a/e2e-tests/pkg/pbm/pbm_ctl.go b/e2e-tests/pkg/pbm/pbm_ctl.go index 7dcfa1ead..09afd7f1b 100644 --- a/e2e-tests/pkg/pbm/pbm_ctl.go +++ b/e2e-tests/pkg/pbm/pbm_ctl.go @@ -1,7 +1,6 @@ package pbm import ( - "context" "encoding/json" "fmt" "io" @@ -10,11 +9,13 @@ import ( "strings" "time" - "github.com/docker/docker/api/types" + dtypes "github.com/docker/docker/api/types" docker "github.com/docker/docker/client" - "github.com/pkg/errors" - "github.com/percona/percona-backup-mongodb/pbm" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/pbm/oplog" ) type Ctl struct { @@ -27,7 +28,10 @@ type Ctl struct { var backupNameRE = regexp.MustCompile(`Starting backup '([0-9\-\:TZ]+)'`) func NewCtl(ctx context.Context, host, pbmContainer string) (*Ctl, error) { - cn, err := docker.NewClient(host, "1.39", nil, nil) + cn, err := docker.NewClientWithOpts( + docker.WithHost(host), + docker.WithVersion("1.39"), + ) if err != nil { return nil, errors.Wrap(err, "docker client") } @@ -84,7 +88,7 @@ func (c *Ctl) Resync() error { return nil } -func (c *Ctl) Backup(typ pbm.BackupType, opts ...string) (string, error) { +func (c *Ctl) Backup(typ defs.BackupType, opts ...string) (string, error) { cmd := append([]string{"pbm", "backup", "--type", string(typ), "--compression", "s2"}, opts...) out, err := c.RunCmd(cmd...) if err != nil { @@ -109,17 +113,17 @@ type ListOut struct { } type SnapshotStat struct { - Name string `json:"name"` - Size int64 `json:"size,omitempty"` - Status pbm.Status `json:"status"` - Err string `json:"error,omitempty"` - RestoreTS int64 `json:"restoreTo"` - PBMVersion string `json:"pbmVersion"` + Name string `json:"name"` + Size int64 `json:"size,omitempty"` + Status defs.Status `json:"status"` + Err string `json:"error,omitempty"` + RestoreTS int64 `json:"restoreTo"` + PBMVersion string `json:"pbmVersion"` } type PitrRange struct { - Err string `json:"error,omitempty"` - Range pbm.Timeline `json:"range"` + Err string `json:"error,omitempty"` + Range oplog.Timeline `json:"range"` } func (c *Ctl) List() (*ListOut, error) { @@ -149,7 +153,7 @@ func skipCtl(str string) []byte { func (c *Ctl) CheckRestore(bcpName string, waitFor time.Duration) error { type rlist struct { Start int - Status pbm.Status + Status defs.Status Type string Name string Snapshot string @@ -190,9 +194,9 @@ func (c *Ctl) CheckRestore(bcpName string, waitFor time.Duration) error { } switch r.Status { - case pbm.StatusDone: + case defs.StatusDone: return nil - case pbm.StatusError: + case defs.StatusError: return errors.Errorf("failed with %s", r.Error) } } @@ -285,7 +289,7 @@ func (c *Ctl) PITRestoreClusterTime(t, i uint32) error { } func (c *Ctl) RunCmd(cmds ...string) (string, error) { - execConf := types.ExecConfig{ + execConf := dtypes.ExecConfig{ Env: c.env, Cmd: cmds, AttachStderr: true, @@ -296,13 +300,13 @@ func (c *Ctl) RunCmd(cmds ...string) (string, error) { return "", errors.Wrap(err, "ContainerExecCreate") } - container, err := c.cn.ContainerExecAttach(c.ctx, id.ID, types.ExecStartCheck{}) + container, err := c.cn.ContainerExecAttach(c.ctx, id.ID, dtypes.ExecStartCheck{}) if err != nil { return "", errors.Wrap(err, "attach to failed container") } defer container.Close() - tmr := time.NewTimer(time.Duration(float64(pbm.WaitBackupStart) * 1.5)) + tmr := time.NewTimer(time.Duration(float64(defs.WaitBackupStart) * 1.5)) tkr := time.NewTicker(500 * time.Millisecond) for { select { @@ -333,7 +337,7 @@ func (c *Ctl) RunCmd(cmds ...string) (string, error) { func (c *Ctl) ContainerLogs() (string, error) { r, err := c.cn.ContainerLogs( c.ctx, c.container, - types.ContainerLogsOptions{ + dtypes.ContainerLogsOptions{ ShowStderr: true, }) if err != nil { diff --git a/e2e-tests/pkg/tests/data.go b/e2e-tests/pkg/tests/data.go index 2a582abea..96458a8df 100644 --- a/e2e-tests/pkg/tests/data.go +++ b/e2e-tests/pkg/tests/data.go @@ -1,15 +1,17 @@ package tests import ( - "context" "math/rand" "runtime" "time" - "github.com/pkg/errors" + "github.com/percona/percona-backup-mongodb/internal/context" + "go.mongodb.org/mongo-driver/bson" "go.mongodb.org/mongo-driver/mongo" "golang.org/x/sync/errgroup" + + "github.com/percona/percona-backup-mongodb/internal/errors" ) // GenDBSpec describes a database to create @@ -36,7 +38,7 @@ type ShardingOptions struct { func Deploy(ctx context.Context, m *mongo.Client, dbs []GenDBSpec) error { ok, err := isMongos(ctx, m) if err != nil { - return errors.WithMessage(err, "ismongos") + return errors.Wrap(err, "ismongos") } if !ok { return errors.New("mongos connection required") @@ -48,7 +50,7 @@ func Deploy(ctx context.Context, m *mongo.Client, dbs []GenDBSpec) error { sharded := false if err := m.Database(db.Name).Drop(ctx); err != nil { - return errors.WithMessagef(err, "drop database: %q", db.Name) + return errors.Wrapf(err, "drop database: %q", db.Name) } for _, coll := range db.Collections { @@ -84,7 +86,7 @@ func Deploy(ctx context.Context, m *mongo.Client, dbs []GenDBSpec) error { func GenerateData(ctx context.Context, m *mongo.Client, dbs []GenDBSpec) error { ok, err := isMongos(ctx, m) if err != nil { - return errors.WithMessage(err, "ismongos") + return errors.Wrap(err, "ismongos") } if !ok { return errors.New("mongos connection required") diff --git a/e2e-tests/pkg/tests/sharded/backuper.go b/e2e-tests/pkg/tests/sharded/backuper.go index 2db22998b..8e23d33d8 100644 --- a/e2e-tests/pkg/tests/sharded/backuper.go +++ b/e2e-tests/pkg/tests/sharded/backuper.go @@ -5,6 +5,8 @@ import ( "math/rand" "time" + "github.com/percona/percona-backup-mongodb/internal/context" + pbmt "github.com/percona/percona-backup-mongodb/e2e-tests/pkg/pbm" ) @@ -34,7 +36,7 @@ func NewSnapshot(c *Cluster) *Snapshot { func (s *Snapshot) Backup() { s.bcpName = s.c.LogicalBackup() s.started <- struct{}{} - s.c.BackupWaitDone(s.bcpName) + s.c.BackupWaitDone(context.TODO(), s.bcpName) time.Sleep(time.Second * 1) s.done <- struct{}{} } @@ -44,7 +46,7 @@ func (s *Snapshot) WaitDone() { <-s.done } func (s *Snapshot) WaitStarted() { <-s.started } func (s *Snapshot) Restore() { - s.c.LogicalRestore(s.bcpName) + s.c.LogicalRestore(context.TODO(), s.bcpName) } type Pitr struct { @@ -70,7 +72,7 @@ func (p *Pitr) Backup() { bcpName := p.c.LogicalBackup() p.started <- struct{}{} p.c.pitrOn() - p.c.BackupWaitDone(bcpName) + p.c.BackupWaitDone(context.TODO(), bcpName) p.sdone <- struct{}{} ds := time.Second * 30 * time.Duration(rand.Int63n(5)+2) @@ -117,7 +119,7 @@ func NewPhysical(c *Cluster) *Physical { func (s *Physical) Backup() { s.bcpName = s.c.PhysicalBackup() s.started <- struct{}{} - s.c.BackupWaitDone(s.bcpName) + s.c.BackupWaitDone(context.TODO(), s.bcpName) time.Sleep(time.Second * 1) s.done <- struct{}{} } @@ -127,5 +129,5 @@ func (s *Physical) WaitDone() { <-s.done } func (s *Physical) WaitStarted() { <-s.started } func (s *Physical) Restore() { - s.c.PhysicalRestore(s.bcpName) + s.c.PhysicalRestore(context.TODO(), s.bcpName) } diff --git a/e2e-tests/pkg/tests/sharded/cluster.go b/e2e-tests/pkg/tests/sharded/cluster.go index c1bdf959b..6642a3703 100644 --- a/e2e-tests/pkg/tests/sharded/cluster.go +++ b/e2e-tests/pkg/tests/sharded/cluster.go @@ -1,19 +1,21 @@ package sharded import ( - "context" "encoding/json" "fmt" "log" "time" - "github.com/pkg/errors" "go.mongodb.org/mongo-driver/bson" "go.mongodb.org/mongo-driver/bson/primitive" pbmt "github.com/percona/percona-backup-mongodb/e2e-tests/pkg/pbm" - "github.com/percona/percona-backup-mongodb/pbm" - "github.com/percona/percona-backup-mongodb/pbm/storage" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/lock" + "github.com/percona/percona-backup-mongodb/internal/storage" + "github.com/percona/percona-backup-mongodb/internal/types" ) type Cluster struct { @@ -80,7 +82,7 @@ func (c *Cluster) Reconnect() { } } -func (c *Cluster) ApplyConfig(file string) { +func (c *Cluster) ApplyConfig(ctx context.Context, file string) { log.Println("apply config") err := c.pbm.ApplyConfig(file) if err != nil { @@ -89,11 +91,9 @@ func (c *Cluster) ApplyConfig(file string) { } log.Println("waiting for the new storage to resync") - err = c.mongopbm.WaitOp(&pbm.LockHeader{ - Type: pbm.CmdResync, - }, - time.Minute*5, - ) + err = c.mongopbm.WaitOp(ctx, + &lock.LockHeader{Type: defs.CmdResync}, + time.Minute*5) if err != nil { log.Fatalf("waiting for the store resync: %v", err) } @@ -118,11 +118,11 @@ func (c *Cluster) DeleteBallast() { log.Printf("deleted %d documents", deleted) } -func (c *Cluster) LogicalRestore(bcpName string) { - c.LogicalRestoreWithParams(bcpName, []string{}) +func (c *Cluster) LogicalRestore(ctx context.Context, bcpName string) { + c.LogicalRestoreWithParams(ctx, bcpName, []string{}) } -func (c *Cluster) LogicalRestoreWithParams(bcpName string, options []string) { +func (c *Cluster) LogicalRestoreWithParams(_ context.Context, bcpName string, options []string) { log.Println("restoring the backup") _, err := c.pbm.Restore(bcpName, options) if err != nil { @@ -139,11 +139,11 @@ func (c *Cluster) LogicalRestoreWithParams(bcpName string, options []string) { log.Printf("restore finished '%s'\n", bcpName) } -func (c *Cluster) PhysicalRestore(bcpName string) { - c.PhysicalRestoreWithParams(bcpName, []string{}) +func (c *Cluster) PhysicalRestore(ctx context.Context, bcpName string) { + c.PhysicalRestoreWithParams(ctx, bcpName, []string{}) } -func (c *Cluster) PhysicalRestoreWithParams(bcpName string, options []string) { +func (c *Cluster) PhysicalRestoreWithParams(ctx context.Context, bcpName string, options []string) { log.Println("restoring the backup") name, err := c.pbm.Restore(bcpName, options) if err != nil { @@ -151,7 +151,7 @@ func (c *Cluster) PhysicalRestoreWithParams(bcpName string, options []string) { } log.Println("waiting for the restore", name) - err = c.waitPhyRestore(name, time.Minute*25) + err = c.waitPhyRestore(ctx, name, time.Minute*25) if err != nil { log.Fatalln("check backup restore:", err) } @@ -201,11 +201,9 @@ func (c *Cluster) PhysicalRestoreWithParams(bcpName string, options []string) { c.Reconnect() - err = c.mongopbm.WaitOp(&pbm.LockHeader{ - Type: pbm.CmdResync, - }, - time.Minute*5, - ) + err = c.mongopbm.WaitOp(ctx, + &lock.LockHeader{Type: defs.CmdResync}, + time.Minute*5) if err != nil { log.Fatalf("waiting for resync: %v", err) } @@ -215,13 +213,13 @@ func (c *Cluster) PhysicalRestoreWithParams(bcpName string, options []string) { log.Printf("restore finished '%s'\n", bcpName) } -func (c *Cluster) waitPhyRestore(name string, waitFor time.Duration) error { - stg, err := c.mongopbm.Storage() +func (c *Cluster) waitPhyRestore(ctx context.Context, name string, waitFor time.Duration) error { + stg, err := c.mongopbm.Storage(ctx) if err != nil { return errors.Wrap(err, "get storage") } - fname := fmt.Sprintf("%s/%s.json", pbm.PhysRestoresDir, name) + fname := fmt.Sprintf("%s/%s.json", defs.PhysRestoresDir, name) log.Println("checking", fname) tmr := time.NewTimer(waitFor) @@ -236,7 +234,7 @@ func (c *Cluster) waitPhyRestore(name string, waitFor time.Duration) error { return errors.Errorf("timeout reached. status:\n%s", list) case <-tkr.C: rmeta, err := getRestoreMetaStg(fname, stg) - if errors.Is(err, pbm.ErrNotFound) { + if errors.Is(err, errors.ErrNotFound) { continue } if err != nil { @@ -244,19 +242,19 @@ func (c *Cluster) waitPhyRestore(name string, waitFor time.Duration) error { } switch rmeta.Status { - case pbm.StatusDone: + case defs.StatusDone: return nil - case pbm.StatusError: + case defs.StatusError: return errors.Errorf("restore failed with: %s", rmeta.Error) } } } } -func getRestoreMetaStg(name string, stg storage.Storage) (*pbm.RestoreMeta, error) { +func getRestoreMetaStg(name string, stg storage.Storage) (*types.RestoreMeta, error) { _, err := stg.FileStat(name) if errors.Is(err, storage.ErrNotExist) { - return nil, pbm.ErrNotFound + return nil, errors.ErrNotFound } if err != nil { return nil, errors.Wrap(err, "get stat") @@ -267,7 +265,7 @@ func getRestoreMetaStg(name string, stg storage.Storage) (*pbm.RestoreMeta, erro return nil, errors.Wrapf(err, "get file %s", name) } - rmeta := &pbm.RestoreMeta{} + rmeta := &types.RestoreMeta{} err = json.NewDecoder(src).Decode(rmeta) if err != nil { return nil, errors.Wrapf(err, "decode meta %s", name) @@ -311,11 +309,11 @@ func (c *Cluster) PITRestore(t time.Time) { } func (c *Cluster) LogicalBackup() string { - return c.backup(pbm.LogicalBackup) + return c.backup(defs.LogicalBackup) } func (c *Cluster) PhysicalBackup() string { - return c.backup(pbm.PhysicalBackup) + return c.backup(defs.PhysicalBackup) } func (c *Cluster) ReplayOplog(a, b time.Time) { @@ -334,7 +332,7 @@ func (c *Cluster) ReplayOplog(a, b time.Time) { log.Printf("replay oplog from %v to %v finished", a, b) } -func (c *Cluster) backup(typ pbm.BackupType, opts ...string) string { +func (c *Cluster) backup(typ defs.BackupType, opts ...string) string { log.Println("starting backup") bcpName, err := c.pbm.Backup(typ, opts...) if err != nil { @@ -346,17 +344,17 @@ func (c *Cluster) backup(typ pbm.BackupType, opts ...string) string { return bcpName } -func (c *Cluster) BackupWaitDone(bcpName string) { +func (c *Cluster) BackupWaitDone(ctx context.Context, bcpName string) { log.Println("waiting for the backup") ts := time.Now() - err := c.checkBackup(bcpName, time.Minute*25) + err := c.checkBackup(ctx, bcpName, time.Minute*25) if err != nil { log.Fatalln("check backup state:", err) } // locks being released NOT immediately after the backup succeed // see https://github.com/percona/percona-backup-mongodb/blob/v1.1.3/agent/agent.go#L128-L143 - needToWait := pbm.WaitBackupStart + time.Second - time.Since(ts) + needToWait := defs.WaitBackupStart + time.Second - time.Since(ts) if needToWait > 0 { log.Printf("waiting for the lock to be released for %s", needToWait) time.Sleep(needToWait) @@ -402,12 +400,12 @@ func (c *Cluster) DataChecker() func() { // Flush removes all backups, restores and PITR chunks metadata from the PBM db func (c *Cluster) Flush() error { cols := []string{ - pbm.BcpCollection, - pbm.PITRChunksCollection, - pbm.RestoresCollection, + defs.BcpCollection, + defs.PITRChunksCollection, + defs.RestoresCollection, } for _, cl := range cols { - _, err := c.mongopbm.Conn().Database(pbm.DB).Collection(cl).DeleteMany(context.Background(), bson.M{}) + _, err := c.mongopbm.Conn().MongoClient().Database(defs.DB).Collection(cl).DeleteMany(context.Background(), bson.M{}) if err != nil { return errors.Wrapf(err, "delete many from %s", cl) } @@ -416,8 +414,8 @@ func (c *Cluster) Flush() error { return nil } -func (c *Cluster) FlushStorage() error { - stg, err := c.mongopbm.Storage() +func (c *Cluster) FlushStorage(ctx context.Context) error { + stg, err := c.mongopbm.Storage(ctx) if err != nil { return errors.Wrap(err, "get storage") } @@ -437,7 +435,7 @@ func (c *Cluster) FlushStorage() error { return nil } -func (c *Cluster) checkBackup(bcpName string, waitFor time.Duration) error { +func (c *Cluster) checkBackup(ctx context.Context, bcpName string, waitFor time.Duration) error { tmr := time.NewTimer(waitFor) tkr := time.NewTicker(500 * time.Millisecond) for { @@ -449,19 +447,19 @@ func (c *Cluster) checkBackup(bcpName string, waitFor time.Duration) error { } return errors.Errorf("timeout reached. pbm status:\n%s", sts) case <-tkr.C: - m, err := c.mongopbm.GetBackupMeta(bcpName) - if errors.Is(err, pbm.ErrNotFound) { + m, err := c.mongopbm.GetBackupMeta(ctx, bcpName) + if errors.Is(err, errors.ErrNotFound) { continue } if err != nil { return errors.Wrap(err, "get backup meta") } switch m.Status { - case pbm.StatusDone: + case defs.StatusDone: // to be sure the lock is released time.Sleep(time.Second * 3) return nil - case pbm.StatusError: + case defs.StatusError: return m.Error() } } diff --git a/e2e-tests/pkg/tests/sharded/test_backup_cancellation.go b/e2e-tests/pkg/tests/sharded/test_backup_cancellation.go index 62adf009c..40b58df8a 100644 --- a/e2e-tests/pkg/tests/sharded/test_backup_cancellation.go +++ b/e2e-tests/pkg/tests/sharded/test_backup_cancellation.go @@ -8,10 +8,13 @@ import ( "strings" "time" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/minio/minio-go" "gopkg.in/yaml.v2" - "github.com/percona/percona-backup-mongodb/pbm" + "github.com/percona/percona-backup-mongodb/internal/config" + "github.com/percona/percona-backup-mongodb/internal/defs" ) func (c *Cluster) BackupCancellation(storage string) { @@ -30,16 +33,16 @@ func (c *Cluster) BackupCancellation(storage string) { checkNoBackupFiles(bcpName, storage) log.Println("check backup state") - m, err := c.mongopbm.GetBackupMeta(bcpName) + m, err := c.mongopbm.GetBackupMeta(context.TODO(), bcpName) if err != nil { log.Fatalf("Error: get metadata for backup %s: %v", bcpName, err) } - if m.Status != pbm.StatusCancelled { - log.Fatalf("Error: wrong backup status, expect %s, got %v", pbm.StatusCancelled, m.Status) + if m.Status != defs.StatusCancelled { + log.Fatalf("Error: wrong backup status, expect %s, got %v", defs.StatusCancelled, m.Status) } - needToWait := pbm.WaitBackupStart + time.Second - time.Since(ts) + needToWait := defs.WaitBackupStart + time.Second - time.Since(ts) if needToWait > 0 { log.Printf("waiting for the lock to be released for %s", needToWait) time.Sleep(needToWait) @@ -53,7 +56,7 @@ func checkNoBackupFiles(backupName, conf string) { log.Fatalln("Error: unable to read config file:", err) } - var cfg pbm.Config + var cfg config.Config err = yaml.UnmarshalStrict(buf, &cfg) if err != nil { log.Fatalln("Error: unmarshal yaml:", err) diff --git a/e2e-tests/pkg/tests/sharded/test_basic.go b/e2e-tests/pkg/tests/sharded/test_basic.go index ec2f1de48..baa5a112b 100644 --- a/e2e-tests/pkg/tests/sharded/test_basic.go +++ b/e2e-tests/pkg/tests/sharded/test_basic.go @@ -3,13 +3,15 @@ package sharded import ( "log" - "github.com/percona/percona-backup-mongodb/pbm" + "github.com/percona/percona-backup-mongodb/internal/context" + + "github.com/percona/percona-backup-mongodb/internal/defs" ) -func (c *Cluster) BackupAndRestore(typ pbm.BackupType) { +func (c *Cluster) BackupAndRestore(typ defs.BackupType) { backup := c.LogicalBackup restore := c.LogicalRestore - if typ == pbm.PhysicalBackup { + if typ == defs.PhysicalBackup { backup = c.PhysicalBackup restore = c.PhysicalRestore } @@ -17,17 +19,17 @@ func (c *Cluster) BackupAndRestore(typ pbm.BackupType) { checkData := c.DataChecker() bcpName := backup() - c.BackupWaitDone(bcpName) + c.BackupWaitDone(context.TODO(), bcpName) c.DeleteBallast() // to be sure the backup didn't vanish after the resync // i.e. resync finished correctly log.Println("resync backup list") - err := c.mongopbm.StoreResync() + err := c.mongopbm.StoreResync(context.TODO()) if err != nil { log.Fatalln("Error: resync backup lists:", err) } - restore(bcpName) + restore(context.TODO(), bcpName) checkData() } diff --git a/e2e-tests/pkg/tests/sharded/test_bounds_check.go b/e2e-tests/pkg/tests/sharded/test_bounds_check.go index 34a227bea..9edec35e1 100644 --- a/e2e-tests/pkg/tests/sharded/test_bounds_check.go +++ b/e2e-tests/pkg/tests/sharded/test_bounds_check.go @@ -1,7 +1,6 @@ package sharded import ( - "context" "log" "math/rand" "time" @@ -10,7 +9,8 @@ import ( "golang.org/x/mod/semver" pbmt "github.com/percona/percona-backup-mongodb/e2e-tests/pkg/pbm" - "github.com/percona/percona-backup-mongodb/pbm" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" ) type scounter struct { @@ -19,18 +19,18 @@ type scounter struct { } func lte(t1, t2 primitive.Timestamp) bool { - return primitive.CompareTimestamp(t1, t2) <= 0 + return t1.Compare(t2) <= 0 } func lt(t1, t2 primitive.Timestamp) bool { - return primitive.CompareTimestamp(t1, t2) < 0 + return t1.Compare(t2) < 0 } -func (c *Cluster) BackupBoundsCheck(typ pbm.BackupType, mongoVersion string) { +func (c *Cluster) BackupBoundsCheck(typ defs.BackupType, mongoVersion string) { inRange := lte backup := c.LogicalBackup restore := c.LogicalRestore - if typ == pbm.PhysicalBackup { + if typ == defs.PhysicalBackup { backup = c.PhysicalBackup restore = c.PhysicalRestore @@ -56,14 +56,14 @@ func (c *Cluster) BackupBoundsCheck(typ pbm.BackupType, mongoVersion string) { bcpName := backup() - c.BackupWaitDone(bcpName) + c.BackupWaitDone(context.TODO(), bcpName) time.Sleep(time.Second * 1) for _, c := range counters { c.cancel() } - bcpMeta, err := c.mongopbm.GetBackupMeta(bcpName) + bcpMeta, err := c.mongopbm.GetBackupMeta(context.TODO(), bcpName) if err != nil { log.Fatalf("ERROR: get backup '%s' metadata: %v\n", bcpName, err) } @@ -74,7 +74,7 @@ func (c *Cluster) BackupBoundsCheck(typ pbm.BackupType, mongoVersion string) { c.bcheckClear(name, shard) } - restore(bcpName) + restore(context.TODO(), bcpName) for name, shard := range c.shards { c.bcheckCheck(name, shard, <-counters[name].data, bcpMeta.LastWriteTS, inRange) diff --git a/e2e-tests/pkg/tests/sharded/test_clock_skew.go b/e2e-tests/pkg/tests/sharded/test_clock_skew.go index 596f1ff4d..887097df2 100644 --- a/e2e-tests/pkg/tests/sharded/test_clock_skew.go +++ b/e2e-tests/pkg/tests/sharded/test_clock_skew.go @@ -4,10 +4,10 @@ import ( "log" pbmt "github.com/percona/percona-backup-mongodb/e2e-tests/pkg/pbm" - "github.com/percona/percona-backup-mongodb/pbm" + "github.com/percona/percona-backup-mongodb/internal/defs" ) -func (c *Cluster) ClockSkew(typ pbm.BackupType, mongoVersion string) { +func (c *Cluster) ClockSkew(typ defs.BackupType, mongoVersion string) { timeShifts := []string{ "+90m", "-195m", "+2d", "-7h", "+11m", "+42d", "-13h", } diff --git a/e2e-tests/pkg/tests/sharded/test_delete_backup.go b/e2e-tests/pkg/tests/sharded/test_delete_backup.go index 0126713c7..97dd10b6d 100644 --- a/e2e-tests/pkg/tests/sharded/test_delete_backup.go +++ b/e2e-tests/pkg/tests/sharded/test_delete_backup.go @@ -8,10 +8,14 @@ import ( "strings" "time" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/minio/minio-go" "gopkg.in/yaml.v2" - "github.com/percona/percona-backup-mongodb/pbm" + "github.com/percona/percona-backup-mongodb/internal/config" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/lock" ) type backupDelete struct { @@ -35,7 +39,7 @@ func (c *Cluster) BackupDelete(storage string) { name: bcpName, ts: ts, } - c.BackupWaitDone(bcpName) + c.BackupWaitDone(context.TODO(), bcpName) time.Sleep(time.Minute) } @@ -48,7 +52,7 @@ func (c *Cluster) BackupDelete(storage string) { log.Fatalf("ERROR: delete backup %s: %v", backups[4].name, err) } log.Println("wait for delete") - err = c.mongopbm.WaitConcurentOp(&pbm.LockHeader{Type: pbm.CmdDeleteBackup}, time.Minute*5) + err = c.mongopbm.WaitConcurentOp(context.TODO(), &lock.LockHeader{Type: defs.CmdDeleteBackup}, time.Minute*5) if err != nil { log.Fatalf("waiting for the delete: %v", err) } @@ -61,7 +65,7 @@ func (c *Cluster) BackupDelete(storage string) { log.Fatalf("ERROR: delete backups older than %s: %v", backups[3].name, err) } log.Println("wait for delete") - err = c.mongopbm.WaitConcurentOp(&pbm.LockHeader{Type: pbm.CmdDeleteBackup}, time.Minute*5) + err = c.mongopbm.WaitConcurentOp(context.TODO(), &lock.LockHeader{Type: defs.CmdDeleteBackup}, time.Minute*5) if err != nil { log.Fatalf("waiting for the delete: %v", err) } @@ -77,7 +81,7 @@ func (c *Cluster) BackupDelete(storage string) { log.Println("should be only backups", left) checkArtefacts(storage, left) - blist, err := c.mongopbm.BackupsList(0) + blist, err := c.mongopbm.BackupsList(context.TODO(), 0) if err != nil { log.Fatalln("ERROR: get backups list", err) } @@ -110,7 +114,7 @@ func (c *Cluster) BackupDelete(storage string) { log.Fatalf("ERROR: delete pitr older than %s: %v", tsp.Format("2006-01-02T15:04:05"), err) } log.Println("wait for delete-pitr") - err = c.mongopbm.WaitConcurentOp(&pbm.LockHeader{Type: pbm.CmdDeletePITR}, time.Minute*5) + err = c.mongopbm.WaitConcurentOp(context.TODO(), &lock.LockHeader{Type: defs.CmdDeletePITR}, time.Minute*5) if err != nil { log.Fatalf("ERROR: waiting for the delete-pitr: %v", err) } @@ -140,7 +144,7 @@ func (c *Cluster) BackupDelete(storage string) { log.Fatalf("ERROR: delete all pitr: %v", err) } log.Println("wait for delete-pitr all") - err = c.mongopbm.WaitConcurentOp(&pbm.LockHeader{Type: pbm.CmdDeletePITR}, time.Minute*5) + err = c.mongopbm.WaitConcurentOp(context.TODO(), &lock.LockHeader{Type: defs.CmdDeletePITR}, time.Minute*5) if err != nil { log.Fatalf("ERROR: waiting for the delete-pitr: %v", err) } @@ -158,7 +162,7 @@ func (c *Cluster) BackupDelete(storage string) { log.Println("trying to restore from", backups[3]) c.DeleteBallast() - c.LogicalRestore(backups[3].name) + c.LogicalRestore(context.TODO(), backups[3].name) checkData() } @@ -173,7 +177,7 @@ func checkArtefacts(conf string, shouldStay map[string]struct{}) { log.Fatalln("ERROR: unable to read config file:", err) } - var cfg pbm.Config + var cfg config.Config err = yaml.UnmarshalStrict(buf, &cfg) if err != nil { log.Fatalln("ERROR: unmarshal yaml:", err) @@ -197,7 +201,7 @@ func checkArtefacts(conf string, shouldStay map[string]struct{}) { } for object := range mc.ListObjects(stg.S3.Bucket, stg.S3.Prefix, true, nil) { - if strings.Contains(object.Key, pbm.StorInitFile) || strings.Contains(object.Key, "/pbmPitr/") { + if strings.Contains(object.Key, defs.StorInitFile) || strings.Contains(object.Key, "/pbmPitr/") { continue } if object.Err != nil { @@ -229,7 +233,7 @@ func (c *Cluster) BackupNotDeleteRunning() { "Output: %s\nStderr:%v\nBackups list:\n%v\n%v", bcpName, o, err, list, lerr) } - c.BackupWaitDone(bcpName) + c.BackupWaitDone(context.TODO(), bcpName) time.Sleep(time.Second * 2) } diff --git a/e2e-tests/pkg/tests/sharded/test_dist_commit.go b/e2e-tests/pkg/tests/sharded/test_dist_commit.go index 9020e0378..7df84621f 100644 --- a/e2e-tests/pkg/tests/sharded/test_dist_commit.go +++ b/e2e-tests/pkg/tests/sharded/test_dist_commit.go @@ -1,9 +1,10 @@ package sharded import ( - "context" "log" + "github.com/percona/percona-backup-mongodb/internal/context" + "go.mongodb.org/mongo-driver/bson" "go.mongodb.org/mongo-driver/bson/primitive" ) diff --git a/e2e-tests/pkg/tests/sharded/test_dr_restart_agents.go b/e2e-tests/pkg/tests/sharded/test_dr_restart_agents.go index 2de5398b4..b3107a16a 100644 --- a/e2e-tests/pkg/tests/sharded/test_dr_restart_agents.go +++ b/e2e-tests/pkg/tests/sharded/test_dr_restart_agents.go @@ -5,7 +5,9 @@ import ( "strings" "time" - "github.com/percona/percona-backup-mongodb/pbm" + "github.com/percona/percona-backup-mongodb/internal/context" + + "github.com/percona/percona-backup-mongodb/internal/defs" ) const ( @@ -34,20 +36,20 @@ func (c *Cluster) RestartAgents() { log.Println("Agents has stopped", rs) } - waitfor := time.Duration(pbm.StaleFrameSec+10) * time.Second + waitfor := time.Duration(defs.StaleFrameSec+10) * time.Second log.Println("Sleeping for", waitfor) time.Sleep(waitfor) - meta, err := c.mongopbm.GetBackupMeta(bcpName) + meta, err := c.mongopbm.GetBackupMeta(context.TODO(), bcpName) if err != nil { log.Fatalf("ERROR: get metadata for the backup %s: %v", bcpName, err) } - if meta.Status != pbm.StatusError || + if meta.Status != defs.StatusError || meta.Error() == nil || meta.Error().Error() != pbmLostAgentsErr && !strings.Contains(meta.Error().Error(), pbmLostShardErr) { log.Fatalf("ERROR: wrong state of the backup %s. Expect: %s/%s|...%s... Got: %s/%s", - bcpName, pbm.StatusError, pbmLostAgentsErr, pbmLostShardErr, meta.Status, meta.Error()) + bcpName, defs.StatusError, pbmLostAgentsErr, pbmLostShardErr, meta.Status, meta.Error()) } for rs := range c.shards { @@ -62,5 +64,5 @@ func (c *Cluster) RestartAgents() { log.Printf("Sleeping for %v for agents to report status", time.Second*7) time.Sleep(time.Second * 7) log.Println("Trying a new backup") - c.BackupAndRestore(pbm.LogicalBackup) + c.BackupAndRestore(defs.LogicalBackup) } diff --git a/e2e-tests/pkg/tests/sharded/test_incremental_backup.go b/e2e-tests/pkg/tests/sharded/test_incremental_backup.go index fe952823d..a29618f2d 100644 --- a/e2e-tests/pkg/tests/sharded/test_incremental_backup.go +++ b/e2e-tests/pkg/tests/sharded/test_incremental_backup.go @@ -5,9 +5,11 @@ import ( "math/rand" "time" + "github.com/percona/percona-backup-mongodb/internal/context" + "golang.org/x/mod/semver" - pbmt "github.com/percona/percona-backup-mongodb/pbm" + "github.com/percona/percona-backup-mongodb/internal/defs" ) func (c *Cluster) IncrementalBackup(mongoVersion string) { @@ -31,13 +33,13 @@ func (c *Cluster) IncrementalBackup(mongoVersion string) { } } - bcpName := c.backup(pbmt.IncrementalBackup, "--base") - c.BackupWaitDone(bcpName) + bcpName := c.backup(defs.IncrementalBackup, "--base") + c.BackupWaitDone(context.TODO(), bcpName) time.Sleep(time.Second * 1) for i := 0; i < 3; i++ { - bcpName = c.backup(pbmt.IncrementalBackup) - c.BackupWaitDone(bcpName) + bcpName = c.backup(defs.IncrementalBackup) + c.BackupWaitDone(context.TODO(), bcpName) time.Sleep(time.Second * 1) } @@ -48,7 +50,7 @@ func (c *Cluster) IncrementalBackup(mongoVersion string) { c.cancel() } - bcpMeta, err := c.mongopbm.GetBackupMeta(bcpName) + bcpMeta, err := c.mongopbm.GetBackupMeta(context.TODO(), bcpName) if err != nil { log.Fatalf("ERROR: get backup '%s' metadata: %v\n", bcpName, err) } @@ -59,7 +61,7 @@ func (c *Cluster) IncrementalBackup(mongoVersion string) { c.bcheckClear(name, shard) } - c.PhysicalRestore(bcpName) + c.PhysicalRestore(context.TODO(), bcpName) for name, shard := range c.shards { c.bcheckCheck(name, shard, <-counters[name].data, bcpMeta.LastWriteTS, inRange) diff --git a/e2e-tests/pkg/tests/sharded/test_leader_lag.go b/e2e-tests/pkg/tests/sharded/test_leader_lag.go index eadcf8dc3..65af14bff 100644 --- a/e2e-tests/pkg/tests/sharded/test_leader_lag.go +++ b/e2e-tests/pkg/tests/sharded/test_leader_lag.go @@ -4,8 +4,10 @@ import ( "log" "time" - "github.com/percona/percona-backup-mongodb/pbm" - "github.com/percona/percona-backup-mongodb/pbm/compress" + "github.com/percona/percona-backup-mongodb/internal/context" + + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/types" ) // LeaderLag checks if cluster deals with leader lag during backup start @@ -23,12 +25,12 @@ func (c *Cluster) LeaderLag() { bcpName := time.Now().UTC().Format(time.RFC3339) log.Println("Starting backup", bcpName) - err = c.mongopbm.SendCmd(pbm.Cmd{ - Cmd: pbm.CmdBackup, - Backup: &pbm.BackupCmd{ - Type: pbm.LogicalBackup, + err = c.mongopbm.SendCmd(types.Cmd{ + Cmd: defs.CmdBackup, + Backup: &types.BackupCmd{ + Type: defs.LogicalBackup, Name: bcpName, - Compression: compress.CompressionTypeS2, + Compression: defs.CompressionTypeS2, }, }) if err != nil { @@ -46,17 +48,17 @@ func (c *Cluster) LeaderLag() { } log.Println("Agents resumed", c.confsrv) - c.BackupWaitDone(bcpName) + c.BackupWaitDone(context.TODO(), bcpName) c.DeleteBallast() // to be sure the backup didn't vanish after the resync // i.e. resync finished correctly log.Println("resync backup list") - err = c.mongopbm.StoreResync() + err = c.mongopbm.StoreResync(context.TODO()) if err != nil { log.Fatalln("Error: resync backup lists:", err) } - c.LogicalRestore(bcpName) + c.LogicalRestore(context.TODO(), bcpName) checkData() } diff --git a/e2e-tests/pkg/tests/sharded/test_network_cut.go b/e2e-tests/pkg/tests/sharded/test_network_cut.go index c52c5d30c..bd57db0e9 100644 --- a/e2e-tests/pkg/tests/sharded/test_network_cut.go +++ b/e2e-tests/pkg/tests/sharded/test_network_cut.go @@ -5,7 +5,9 @@ import ( "strings" "time" - "github.com/percona/percona-backup-mongodb/pbm" + "github.com/percona/percona-backup-mongodb/internal/context" + + "github.com/percona/percona-backup-mongodb/internal/defs" ) func (c *Cluster) NetworkCut() { @@ -29,21 +31,21 @@ func (c *Cluster) NetworkCut() { log.Fatalf("ERROR: run tc netem on %s: %v", rs, err) } - waitfor := time.Duration(pbm.StaleFrameSec+10) * time.Second + waitfor := time.Duration(defs.StaleFrameSec+10) * time.Second log.Println("Sleeping for", waitfor) time.Sleep(waitfor) - meta, err := c.mongopbm.GetBackupMeta(bcpName) + meta, err := c.mongopbm.GetBackupMeta(context.TODO(), bcpName) if err != nil { log.Fatalf("ERROR: get metadata for the backup %s: %v", bcpName, err) } - if meta.Status != pbm.StatusError || + if meta.Status != defs.StatusError || meta.Error() == nil || meta.Error().Error() != pbmLostAgentsErr && !strings.Contains(meta.Error().Error(), pbmLostShardErr) { log.Fatalf("ERROR: wrong state of the backup %s. Expect: %s/%s|...%s... Got: %s/%s", - bcpName, pbm.StatusError, pbmLostAgentsErr, pbmLostShardErr, meta.Status, meta.Error()) + bcpName, defs.StatusError, pbmLostAgentsErr, pbmLostShardErr, meta.Status, meta.Error()) } log.Printf("Backup status %s/%s\n", meta.Status, meta.Error()) diff --git a/e2e-tests/pkg/tests/sharded/test_oplog_replay.go b/e2e-tests/pkg/tests/sharded/test_oplog_replay.go index f5642f00c..502771ac3 100644 --- a/e2e-tests/pkg/tests/sharded/test_oplog_replay.go +++ b/e2e-tests/pkg/tests/sharded/test_oplog_replay.go @@ -1,7 +1,6 @@ package sharded import ( - "context" "log" "math/rand" "time" @@ -9,6 +8,7 @@ import ( "go.mongodb.org/mongo-driver/bson/primitive" tpbm "github.com/percona/percona-backup-mongodb/e2e-tests/pkg/pbm" + "github.com/percona/percona-backup-mongodb/internal/context" ) func (c *Cluster) OplogReplay() { @@ -32,7 +32,7 @@ func (c *Cluster) OplogReplay() { } } - c.BackupWaitDone(bcpName) + c.BackupWaitDone(context.TODO(), bcpName) c.printBcpList() @@ -43,7 +43,7 @@ func (c *Cluster) OplogReplay() { firstt := getLastWriteTime(counters) bcp2 := c.LogicalBackup() - c.BackupWaitDone(bcp2) + c.BackupWaitDone(context.TODO(), bcp2) ds := time.Second * 30 * time.Duration(rand.Int63n(5)+2) log.Printf("Generating data for %v", ds) @@ -71,7 +71,7 @@ func (c *Cluster) OplogReplay() { c.printBcpList() // +1 sec since we are PITR restore done up to < time (not <=) - c.LogicalRestore(bcp2) + c.LogicalRestore(context.TODO(), bcp2) c.ReplayOplog( time.Unix(int64(firstt.T), 0), time.Unix(int64(lastt.T), 0).Add(time.Second*1)) @@ -89,7 +89,7 @@ func getLastWrittenCounter(counters map[string]shardCounter) tpbm.Counter { log.Printf("\tshard %s: %d [%v] | %v", name, cc.WriteTime.T, time.Unix(int64(cc.WriteTime.T), 0), cc) - if primitive.CompareTimestamp(rv.WriteTime, cc.WriteTime) == -1 { + if rv.WriteTime.Compare(cc.WriteTime) == -1 { rv = cc } } diff --git a/e2e-tests/pkg/tests/sharded/test_pitr_basic.go b/e2e-tests/pkg/tests/sharded/test_pitr_basic.go index 830e742a9..762c6c390 100644 --- a/e2e-tests/pkg/tests/sharded/test_pitr_basic.go +++ b/e2e-tests/pkg/tests/sharded/test_pitr_basic.go @@ -1,7 +1,6 @@ package sharded import ( - "context" "log" "math/rand" "sync" @@ -9,9 +8,10 @@ import ( "go.mongodb.org/mongo-driver/bson/primitive" - "github.com/percona/percona-backup-mongodb/pbm" - pbmt "github.com/percona/percona-backup-mongodb/e2e-tests/pkg/pbm" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/lock" ) func (c *Cluster) PITRbasic() { @@ -35,7 +35,7 @@ func (c *Cluster) PITRbasic() { } } - c.BackupWaitDone(bcpName) + c.BackupWaitDone(context.TODO(), bcpName) c.printBcpList() @@ -43,7 +43,7 @@ func (c *Cluster) PITRbasic() { log.Printf("Sleep for %v", time.Second*30) bcp2 := c.LogicalBackup() - c.BackupWaitDone(bcp2) + c.BackupWaitDone(context.TODO(), bcp2) ds := time.Second * 30 * time.Duration(rand.Int63n(5)+2) log.Printf("Generating data for %v", ds) @@ -69,7 +69,7 @@ func (c *Cluster) PITRbasic() { } log.Printf("Deleting backup %v", bcp2) - err := c.mongopbm.DeleteBackup(bcp2) + err := c.mongopbm.DeleteBackup(context.TODO(), bcp2) if err != nil { log.Fatalf("Error: delete backup %s: %v", bcp2, err) } @@ -107,11 +107,9 @@ func (c *Cluster) pitrOff() { } log.Println("Turning pitr off") log.Println("waiting for the pitr to stop") - err = c.mongopbm.WaitOp(&pbm.LockHeader{ - Type: pbm.CmdPITR, - }, - time.Minute*5, - ) + err = c.mongopbm.WaitOp(context.TODO(), + &lock.LockHeader{Type: defs.CmdPITR}, + time.Minute*5) if err != nil { log.Fatalf("ERROR: waiting for the pitr to stop: %v", err) } @@ -175,7 +173,7 @@ func (c *Cluster) pitrcCheck(name string, shard *pbmt.Mongo, data *[]pbmt.Counte log.Println(name, "checking restored counters") var lastc pbmt.Counter for i, d := range *data { - // if primitive.CompareTimestamp(d.WriteTime, bcpLastWrite) <= 0 { + // if d.WriteTime.Compare(bcpLastWrite) <= 0 { if d.WriteTime.T <= bcpLastWrite.T { if len(restored) <= i { log.Fatalf("ERROR: %s no record #%d/%d in restored (%d) | last: %v\n", diff --git a/e2e-tests/pkg/tests/sharded/test_remapping.go b/e2e-tests/pkg/tests/sharded/test_remapping.go index fc5728b3b..8424bfb0a 100644 --- a/e2e-tests/pkg/tests/sharded/test_remapping.go +++ b/e2e-tests/pkg/tests/sharded/test_remapping.go @@ -5,7 +5,9 @@ import ( "log" "strings" - "github.com/percona/percona-backup-mongodb/pbm" + "github.com/percona/percona-backup-mongodb/internal/context" + + "github.com/percona/percona-backup-mongodb/internal/defs" ) type RemappingEnvironment struct { @@ -14,9 +16,9 @@ type RemappingEnvironment struct { Remapping map[string]string } -func (re *RemappingEnvironment) prepareRestoreOptions(typ pbm.BackupType) []string { +func (re *RemappingEnvironment) prepareRestoreOptions(typ defs.BackupType) []string { var remappings []string - if typ == pbm.PhysicalBackup || len(re.Remapping) == 0 { + if typ == defs.PhysicalBackup || len(re.Remapping) == 0 { return []string{} } @@ -26,10 +28,10 @@ func (re *RemappingEnvironment) prepareRestoreOptions(typ pbm.BackupType) []stri return []string{"--replset-remapping", strings.Join(remappings, ",")} } -func (re *RemappingEnvironment) BackupAndRestore(typ pbm.BackupType) { +func (re *RemappingEnvironment) BackupAndRestore(typ defs.BackupType) { backup := re.Donor.LogicalBackup restore := re.Recipient.LogicalRestoreWithParams - if typ == pbm.PhysicalBackup { + if typ == defs.PhysicalBackup { backup = re.Donor.PhysicalBackup restore = re.Recipient.PhysicalRestoreWithParams } @@ -37,17 +39,17 @@ func (re *RemappingEnvironment) BackupAndRestore(typ pbm.BackupType) { checkData := re.DataChecker() bcpName := backup() - re.Donor.BackupWaitDone(bcpName) + re.Donor.BackupWaitDone(context.TODO(), bcpName) // to be sure the backup didn't vanish after the resync // i.e. resync finished correctly log.Println("resync backup list") - err := re.Recipient.mongopbm.StoreResync() + err := re.Recipient.mongopbm.StoreResync(context.TODO()) if err != nil { log.Fatalln("Error: resync backup lists:", err) } - restore(bcpName, re.prepareRestoreOptions(typ)) + restore(context.TODO(), bcpName, re.prepareRestoreOptions(typ)) checkData() } diff --git a/e2e-tests/pkg/tests/sharded/test_selective.go b/e2e-tests/pkg/tests/sharded/test_selective.go index c8b24ff08..fd775e7b5 100644 --- a/e2e-tests/pkg/tests/sharded/test_selective.go +++ b/e2e-tests/pkg/tests/sharded/test_selective.go @@ -4,8 +4,10 @@ import ( "log" "strings" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/e2e-tests/pkg/tests" - "github.com/percona/percona-backup-mongodb/pbm" + "github.com/percona/percona-backup-mongodb/internal/defs" ) var clusterSpec = []tests.GenDBSpec{ @@ -78,8 +80,8 @@ func (c *Cluster) SelectiveRestoreSharded() { return } - backupName := c.backup(pbm.LogicalBackup) - c.BackupWaitDone(backupName) + backupName := c.backup(defs.LogicalBackup) + c.BackupWaitDone(context.TODO(), backupName) // regenerate new data err = tests.GenerateData(ctx, mongos, clusterSpec) @@ -89,7 +91,7 @@ func (c *Cluster) SelectiveRestoreSharded() { } selected := []string{"db0.c00", "db0.c01", "db1.*"} - c.LogicalRestoreWithParams(backupName, []string{"--ns", strings.Join(selected, ","), "--wait"}) + c.LogicalRestoreWithParams(context.TODO(), backupName, []string{"--ns", strings.Join(selected, ","), "--wait"}) afterState, err := tests.ClusterState(ctx, mongos, creds) if err != nil { @@ -103,7 +105,7 @@ func (c *Cluster) SelectiveRestoreSharded() { } log.Printf("Deleting backup %v", backupName) - err = c.mongopbm.DeleteBackup(backupName) + err = c.mongopbm.DeleteBackup(context.TODO(), backupName) if err != nil { log.Fatalf("Error: delete backup %s: %v", backupName, err) } @@ -139,8 +141,8 @@ func (c *Cluster) SelectiveBackupSharded() { return } - backupName := c.backup(pbm.LogicalBackup, "--ns", "db0.*") - c.BackupWaitDone(backupName) + backupName := c.backup(defs.LogicalBackup, "--ns", "db0.*") + c.BackupWaitDone(context.TODO(), backupName) // regenerate new data err = tests.GenerateData(ctx, mongos, clusterSpec) @@ -150,7 +152,7 @@ func (c *Cluster) SelectiveBackupSharded() { } selected := []string{"db0.c00", "db0.c01"} - c.LogicalRestoreWithParams(backupName, []string{"--ns", strings.Join(selected, ","), "--wait"}) + c.LogicalRestoreWithParams(context.TODO(), backupName, []string{"--ns", strings.Join(selected, ","), "--wait"}) afterState, err := tests.ClusterState(ctx, mongos, creds) if err != nil { @@ -164,7 +166,7 @@ func (c *Cluster) SelectiveBackupSharded() { } log.Printf("Deleting backup %v", backupName) - err = c.mongopbm.DeleteBackup(backupName) + err = c.mongopbm.DeleteBackup(context.TODO(), backupName) if err != nil { log.Fatalf("Error: delete backup %s: %v", backupName, err) } diff --git a/e2e-tests/pkg/tests/sharded/test_timeseries.go b/e2e-tests/pkg/tests/sharded/test_timeseries.go index 1b75e32c9..fda7ac0a4 100644 --- a/e2e-tests/pkg/tests/sharded/test_timeseries.go +++ b/e2e-tests/pkg/tests/sharded/test_timeseries.go @@ -4,6 +4,8 @@ import ( "log" "time" + "github.com/percona/percona-backup-mongodb/internal/context" + pbmt "github.com/percona/percona-backup-mongodb/e2e-tests/pkg/pbm" ) @@ -20,7 +22,7 @@ func (c *Cluster) Timeseries() { c.pitrOn() defer c.pitrOff() - c.BackupWaitDone(bcpName) + c.BackupWaitDone(context.TODO(), bcpName) time.Sleep(time.Second) diff --git a/e2e-tests/pkg/tests/sharded/trx.go b/e2e-tests/pkg/tests/sharded/trx.go index 76a07747e..c603aec4b 100644 --- a/e2e-tests/pkg/tests/sharded/trx.go +++ b/e2e-tests/pkg/tests/sharded/trx.go @@ -1,7 +1,6 @@ package sharded import ( - "context" "log" "strings" "time" @@ -14,6 +13,7 @@ import ( "go.mongodb.org/mongo-driver/mongo/writeconcern" pbmt "github.com/percona/percona-backup-mongodb/e2e-tests/pkg/pbm" + "github.com/percona/percona-backup-mongodb/internal/context" ) const trxdb = "trx" @@ -27,7 +27,7 @@ func (c *Cluster) DistributedTransactions(bcp Backuper, col string) { conn := c.mongos.Conn() log.Println("Updating transactionLifetimeLimitSeconds to", trxLimitT) - err := c.mongopbm.Conn().Database("admin").RunCommand( + err := c.mongopbm.Conn().AdminCommand( ctx, bson.D{{"setParameter", 1}, {"transactionLifetimeLimitSeconds", trxLimitT}}, ).Err() @@ -79,8 +79,7 @@ func (c *Cluster) DistributedTransactions(bcp Backuper, col string) { SetDefaultReadPreference(readpref.Primary()). SetCausalConsistency(true). SetDefaultReadConcern(readconcern.Majority()). - SetDefaultWriteConcern(writeconcern.New(writeconcern.WMajority())), - ) + SetDefaultWriteConcern(writeconcern.Majority())) if err != nil { log.Fatalln("ERROR: start session:", err) } diff --git a/e2e-tests/pkg/tests/sharded/trx_phys.go b/e2e-tests/pkg/tests/sharded/trx_phys.go index 270c841a8..ae8fdcd94 100644 --- a/e2e-tests/pkg/tests/sharded/trx_phys.go +++ b/e2e-tests/pkg/tests/sharded/trx_phys.go @@ -1,7 +1,6 @@ package sharded import ( - "context" "log" "go.mongodb.org/mongo-driver/bson" @@ -10,6 +9,8 @@ import ( "go.mongodb.org/mongo-driver/mongo/readconcern" "go.mongodb.org/mongo-driver/mongo/readpref" "go.mongodb.org/mongo-driver/mongo/writeconcern" + + "github.com/percona/percona-backup-mongodb/internal/context" ) func (c *Cluster) DistributedTransactionsPhys(bcp Backuper, col string) { @@ -21,7 +22,7 @@ func (c *Cluster) DistributedTransactionsPhys(bcp Backuper, col string) { conn := c.mongos.Conn() log.Println("Updating transactionLifetimeLimitSeconds to", trxLimitT) - err := c.mongopbm.Conn().Database("admin").RunCommand( + err := c.mongopbm.Conn().AdminCommand( ctx, bson.D{{"setParameter", 1}, {"transactionLifetimeLimitSeconds", trxLimitT}}, ).Err() @@ -56,8 +57,7 @@ func (c *Cluster) DistributedTransactionsPhys(bcp Backuper, col string) { SetDefaultReadPreference(readpref.Primary()). SetCausalConsistency(true). SetDefaultReadConcern(readconcern.Majority()). - SetDefaultWriteConcern(writeconcern.New(writeconcern.WMajority())), - ) + SetDefaultWriteConcern(writeconcern.Majority())) if err != nil { log.Fatalln("ERROR: start session:", err) } diff --git a/e2e-tests/pkg/tests/state.go b/e2e-tests/pkg/tests/state.go index 51aaa099a..f1691060b 100644 --- a/e2e-tests/pkg/tests/state.go +++ b/e2e-tests/pkg/tests/state.go @@ -1,7 +1,6 @@ package tests import ( - "context" "crypto/md5" "encoding/hex" "fmt" @@ -11,15 +10,18 @@ import ( "strings" "sync" - "github.com/pkg/errors" + "github.com/percona/percona-backup-mongodb/internal/context" + "go.mongodb.org/mongo-driver/bson" "go.mongodb.org/mongo-driver/bson/primitive" "go.mongodb.org/mongo-driver/mongo" "go.mongodb.org/mongo-driver/mongo/options" "golang.org/x/sync/errgroup" - "github.com/percona/percona-backup-mongodb/pbm" - "github.com/percona/percona-backup-mongodb/pbm/sel" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/util" + + "github.com/percona/percona-backup-mongodb/internal/version" ) type ( @@ -123,7 +125,7 @@ func ExtractCredentionals(s string) *Credentials { func ClusterState(ctx context.Context, mongos *mongo.Client, creds *Credentials) (*clusterState, error) { ok, err := isMongos(ctx, mongos) if err != nil { - return nil, errors.WithMessage(err, "ismongos") + return nil, errors.Wrap(err, "ismongos") } if !ok { return nil, errors.New("mongos connection required") @@ -132,12 +134,12 @@ func ClusterState(ctx context.Context, mongos *mongo.Client, creds *Credentials) // get list of shards and configsvr URIs res := mongos.Database("admin").RunCommand(ctx, bson.D{{"getShardMap", 1}}) if err := res.Err(); err != nil { - return nil, errors.WithMessage(err, "getShardMap: query") + return nil, errors.Wrap(err, "getShardMap: query") } var shardMap struct{ Map map[ShardName]string } if err := res.Decode(&shardMap); err != nil { - return nil, errors.WithMessage(err, "getShardMap: decode") + return nil, errors.Wrap(err, "getShardMap: decode") } rv := &clusterState{ @@ -149,7 +151,7 @@ func ClusterState(ctx context.Context, mongos *mongo.Client, creds *Credentials) eg.Go(func() error { var err error rv.Counts, err = countDocuments(egc, mongos) - return errors.WithMessage(err, "count documents") + return errors.Wrap(err, "count documents") }) mu := sync.Mutex{} @@ -164,17 +166,17 @@ func ClusterState(ctx context.Context, mongos *mongo.Client, creds *Credentials) eg.Go(func() error { m, err := mongo.Connect(egc, options.Client().ApplyURI(uri)) if err != nil { - return errors.WithMessagef(err, "connect: %q", uri) + return errors.Wrapf(err, "connect: %q", uri) } if rs == "config" { rv.Config, err = getConfigState(egc, m) - return errors.WithMessagef(err, "config state: %q", uri) + return errors.Wrapf(err, "config state: %q", uri) } state, err := getShardState(egc, m) if err != nil { - return errors.WithMessagef(err, "shard state: %q", uri) + return errors.Wrapf(err, "shard state: %q", uri) } mu.Lock() @@ -199,7 +201,7 @@ func Compare(before, after *clusterState, nss []string) bool { } allowedDBs[db] = true } - selected := sel.MakeSelectedPred(nss) + selected := util.MakeSelectedPred(nss) for db, beforeDBState := range before.Config { if !allowedDBs[""] && !allowedDBs[db] { @@ -262,19 +264,19 @@ func Compare(before, after *clusterState, nss []string) bool { func isMongos(ctx context.Context, m *mongo.Client) (bool, error) { res := m.Database("admin").RunCommand(ctx, bson.D{{"hello", 1}}) if err := res.Err(); err != nil { - return false, errors.WithMessage(err, "query") + return false, errors.Wrap(err, "query") } var r struct{ Msg string } err := res.Decode(&r) - return r.Msg == "isdbgrid", errors.WithMessage(err, "decode") + return r.Msg == "isdbgrid", errors.Wrap(err, "decode") } func countDocuments(ctx context.Context, mongos *mongo.Client) (map[string]int64, error) { f := bson.D{{"name", bson.M{"$nin": bson.A{"admin", "config", "local"}}}} dbs, err := mongos.ListDatabaseNames(ctx, f) if err != nil { - return nil, errors.WithMessage(err, "list databases") + return nil, errors.Wrap(err, "list databases") } rv := make(map[NSName]int64) @@ -284,13 +286,13 @@ func countDocuments(ctx context.Context, mongos *mongo.Client) (map[string]int64 colls, err := db.ListCollectionNames(ctx, bson.D{}) if err != nil { - return nil, errors.WithMessagef(err, "list collections: %q", d) + return nil, errors.Wrapf(err, "list collections: %q", d) } for _, c := range colls { count, err := db.Collection(c).CountDocuments(ctx, bson.D{}) if err != nil { - return nil, errors.WithMessagef(err, "count: %q", d+"."+c) + return nil, errors.Wrapf(err, "count: %q", d+"."+c) } rv[d+"."+c] = count @@ -303,24 +305,24 @@ func countDocuments(ctx context.Context, mongos *mongo.Client) (map[string]int64 func getConfigDatabases(ctx context.Context, m *mongo.Client) ([]*dbSpec, error) { cur, err := m.Database("config").Collection("databases").Find(ctx, bson.D{}) if err != nil { - return nil, errors.WithMessage(err, "query") + return nil, errors.Wrap(err, "query") } rv := []*dbSpec{} err = cur.All(ctx, &rv) - return rv, errors.WithMessage(err, "cursor: all") + return rv, errors.Wrap(err, "cursor: all") } func getConfigCollections(ctx context.Context, m *mongo.Client) ([]*collSpec, error) { f := bson.D{{"_id", bson.M{"$regex": `^(?!(config|system)\.)`}}} cur, err := m.Database("config").Collection("collections").Find(ctx, f) if err != nil { - return nil, errors.WithMessage(err, "query") + return nil, errors.Wrap(err, "query") } rv := []*collSpec{} err = cur.All(ctx, &rv) - return rv, errors.WithMessage(err, "cursor: all") + return rv, errors.Wrap(err, "cursor: all") } func getConfigChunkHashes( @@ -356,7 +358,7 @@ func getConfigChunkHashes( cur, err := m.Database("config").Collection("chunks").Find(ctx, f) if err != nil { - return nil, errors.WithMessage(err, "query") + return nil, errors.Wrap(err, "query") } defer cur.Close(ctx) @@ -372,7 +374,7 @@ func getConfigChunkHashes( counts[id][cur.Current.Lookup("shard").StringValue()]++ } if err := cur.Err(); err != nil { - return nil, errors.WithMessage(err, "cursor") + return nil, errors.Wrap(err, "cursor") } rv := make(map[NSName]chunksState, len(hashes)) @@ -387,20 +389,20 @@ func getConfigChunkHashes( } func getConfigState(ctx context.Context, m *mongo.Client) (map[DBName]configDBState, error) { - ver, err := pbm.GetMongoVersion(ctx, m) + ver, err := version.GetMongoVersion(ctx, m) if err != nil { - return nil, errors.WithMessage(err, "get mongo version") + return nil, errors.Wrap(err, "get mongo version") } useUUID := ver.Major() >= 5 // since v5.0 dbs, err := getConfigDatabases(ctx, m) if err != nil { - return nil, errors.WithMessage(err, "databases") + return nil, errors.Wrap(err, "databases") } colls, err := getConfigCollections(ctx, m) if err != nil { - return nil, errors.WithMessage(err, "collections") + return nil, errors.Wrap(err, "collections") } u2c := make(map[string]CollName, len(colls)) @@ -416,7 +418,7 @@ func getConfigState(ctx context.Context, m *mongo.Client) (map[DBName]configDBSt chunks, err := getConfigChunkHashes(ctx, m, u2c, useUUID) if err != nil { - return nil, errors.WithMessage(err, "config chunk hashes") + return nil, errors.Wrap(err, "config chunk hashes") } rv := make(map[string]configDBState, len(dbs)) @@ -441,7 +443,7 @@ func getShardState(ctx context.Context, m *mongo.Client) (shardState, error) { f := bson.D{{"name", bson.M{"$nin": bson.A{"admin", "config", "local"}}}} res, err := m.ListDatabases(ctx, f) if err != nil { - return nil, errors.WithMessage(err, "list databases") + return nil, errors.Wrap(err, "list databases") } rv := make(map[NSName]*shardCollState, len(res.Databases)) @@ -451,17 +453,17 @@ func getShardState(ctx context.Context, m *mongo.Client) (shardState, error) { colls, err := db.ListCollectionSpecifications(ctx, bson.D{}) if err != nil { - return nil, errors.WithMessagef(err, "list collections: %q", name) + return nil, errors.Wrapf(err, "list collections: %q", name) } res := db.RunCommand(ctx, bson.D{{"dbHash", 1}}) if err := res.Err(); err != nil { - return nil, errors.WithMessagef(err, "dbHash: %q: query", name) + return nil, errors.Wrapf(err, "dbHash: %q: query", name) } dbHash := struct{ Collections map[CollName]string }{} if err := res.Decode(&dbHash); err != nil { - return nil, errors.WithMessagef(err, "dbHash: %q: decode", name) + return nil, errors.Wrapf(err, "dbHash: %q: decode", name) } for _, coll := range colls { diff --git a/e2e-tests/run-new-cluster b/e2e-tests/run-new-cluster index 15d407444..579f69f73 100755 --- a/e2e-tests/run-new-cluster +++ b/e2e-tests/run-new-cluster @@ -6,7 +6,7 @@ set -o xtrace test_dir=$(realpath $(dirname $0)) . ${test_dir}/functions -MONGO_VERSION=${MONGODB_VERSION:-"4.2"} +MONGO_VERSION=${MONGODB_VERSION:-"4.4"} desc 'RUN RESTORE ON NEW CLUSTER TEST' diff --git a/e2e-tests/run-remapping b/e2e-tests/run-remapping index 7fbfd069e..b9480433e 100755 --- a/e2e-tests/run-remapping +++ b/e2e-tests/run-remapping @@ -6,7 +6,7 @@ set -o xtrace test_dir=$(realpath $(dirname $0)) . ${test_dir}/functions -MONGO_VERSION=${MONGODB_VERSION:-"4.2"} +MONGO_VERSION=${MONGODB_VERSION:-"4.4"} desc 'RUN REMAPPING TESTS' diff --git a/e2e-tests/run-rs b/e2e-tests/run-rs index 476599006..ce10fd261 100755 --- a/e2e-tests/run-rs +++ b/e2e-tests/run-rs @@ -6,7 +6,7 @@ set -o xtrace test_dir=$(realpath $(dirname $0)) . ${test_dir}/functions -MONGO_VERSION=${MONGODB_VERSION:-"4.2"} +MONGO_VERSION=${MONGODB_VERSION:-"4.4"} desc 'RUN REPLICA SET TESTS' diff --git a/e2e-tests/run-sharded b/e2e-tests/run-sharded index 6412ab4da..39b9f63d7 100755 --- a/e2e-tests/run-sharded +++ b/e2e-tests/run-sharded @@ -6,7 +6,7 @@ set -o xtrace test_dir=$(realpath $(dirname $0)) . ${test_dir}/functions -MONGO_VERSION=${MONGODB_VERSION:-"4.2"} +MONGO_VERSION=${MONGODB_VERSION:-"4.4"} desc 'RUN SHARDED CLUTER TESTS' diff --git a/e2e-tests/run-single b/e2e-tests/run-single index c3886562a..1f68921b1 100755 --- a/e2e-tests/run-single +++ b/e2e-tests/run-single @@ -6,7 +6,7 @@ set -o xtrace test_dir=$(realpath $(dirname $0)) . ${test_dir}/functions -MONGO_VERSION=${MONGODB_VERSION:-"4.2"} +MONGO_VERSION=${MONGODB_VERSION:-"4.4"} desc 'RUN REPLICA SET TESTS' diff --git a/e2e-tests/start-cluster b/e2e-tests/start-cluster index c39a93148..2a95a12b9 100755 --- a/e2e-tests/start-cluster +++ b/e2e-tests/start-cluster @@ -6,7 +6,7 @@ set -o xtrace test_dir=$(realpath $(dirname $0)) . ${test_dir}/functions -MONGO_VERSION=${MONGODB_VERSION:-"4.2"} +MONGO_VERSION=${MONGODB_VERSION:-"4.4"} desc 'Start cluster' start_cluster $MONGO_VERSION diff --git a/e2e-tests/start-replset b/e2e-tests/start-replset index ed41b0b41..3cc7af616 100755 --- a/e2e-tests/start-replset +++ b/e2e-tests/start-replset @@ -6,7 +6,7 @@ set -o xtrace test_dir=$(realpath $(dirname $0)) . ${test_dir}/functions -MONGO_VERSION=${MONGODB_VERSION:-"4.2"} +MONGO_VERSION=${MONGODB_VERSION:-"4.4"} desc 'Start replicaset' start_replset $MONGO_VERSION "$COMPOSE_RS_PATH" diff --git a/pbm/archive/archive.go b/internal/archive/archive.go similarity index 81% rename from pbm/archive/archive.go rename to internal/archive/archive.go index d28b21234..88013b44a 100644 --- a/pbm/archive/archive.go +++ b/internal/archive/archive.go @@ -7,10 +7,11 @@ import ( "github.com/mongodb/mongo-tools/common/archive" "github.com/mongodb/mongo-tools/common/db" - "github.com/pkg/errors" "go.mongodb.org/mongo-driver/bson" "go.mongodb.org/mongo-driver/x/bsonx/bsoncore" "golang.org/x/sync/errgroup" + + "github.com/percona/percona-backup-mongodb/internal/errors" ) type ( @@ -48,7 +49,7 @@ func DefaultDocFilter(string, bson.Raw) bool { return true } func Decompose(r io.Reader, newWriter NewWriter, nsFilter NSFilterFn, docFilter DocFilterFn) error { meta, err := readPrelude(r) if err != nil { - return errors.WithMessage(err, "prelude") + return errors.Wrap(err, "prelude") } if nsFilter == nil { @@ -60,7 +61,7 @@ func Decompose(r io.Reader, newWriter NewWriter, nsFilter NSFilterFn, docFilter c := newConsumer(newWriter, nsFilter, docFilter) if err := (&archive.Parser{In: r}).ReadAllBlocks(c); err != nil { - return errors.WithMessage(err, "archive parser") + return errors.Wrap(err, "archive parser") } // save metadata for selected namespaces only @@ -78,13 +79,13 @@ func Decompose(r io.Reader, newWriter NewWriter, nsFilter NSFilterFn, docFilter meta.Namespaces = nss err = writeMetadata(meta, newWriter) - return errors.WithMessage(err, "metadata") + return errors.Wrap(err, "metadata") } func Compose(w io.Writer, nsFilter NSFilterFn, newReader NewReader) error { meta, err := readMetadata(newReader) if err != nil { - return errors.WithMessage(err, "metadata") + return errors.Wrap(err, "metadata") } nss := make([]*Namespace, 0, len(meta.Namespaces)) @@ -97,20 +98,20 @@ func Compose(w io.Writer, nsFilter NSFilterFn, newReader NewReader) error { meta.Namespaces = nss if err := writePrelude(w, meta); err != nil { - return errors.WithMessage(err, "prelude") + return errors.Wrap(err, "prelude") } err = writeAllNamespaces(w, newReader, int(meta.Header.ConcurrentCollections), meta.Namespaces) - return errors.WithMessage(err, "write namespaces") + return errors.Wrap(err, "write namespaces") } func readPrelude(r io.Reader) (*archiveMeta, error) { prelude := archive.Prelude{} err := prelude.Read(r) if err != nil { - return nil, errors.WithMessage(err, "read") + return nil, errors.Wrap(err, "read") } m := &archiveMeta{Header: prelude.Header} @@ -130,7 +131,7 @@ func writePrelude(w io.Writer, m *archiveMeta) error { } err := prelude.Write(w) - return errors.WithMessage(err, "write") + return errors.Wrap(err, "write") } func writeAllNamespaces(w io.Writer, newReader NewReader, lim int, nss []*Namespace) error { @@ -146,13 +147,13 @@ func writeAllNamespaces(w io.Writer, newReader NewReader, lim int, nss []*Namesp mu.Lock() defer mu.Unlock() - return errors.WithMessage(closeChunk(w, ns), "close empty chunk") + return errors.Wrap(closeChunk(w, ns), "close empty chunk") } nss := NSify(ns.Database, ns.Collection) r, err := newReader(nss) if err != nil { - return errors.WithMessage(err, "new reader") + return errors.Wrap(err, "new reader") } defer r.Close() @@ -160,16 +161,16 @@ func writeAllNamespaces(w io.Writer, newReader NewReader, lim int, nss []*Namesp mu.Lock() defer mu.Unlock() - return errors.WithMessage(writeChunk(w, ns, b), "write chunk") + return errors.Wrap(writeChunk(w, ns, b), "write chunk") }) if err != nil { - return errors.WithMessage(err, "split") + return errors.Wrap(err, "split") } mu.Lock() defer mu.Unlock() - return errors.WithMessage(closeChunk(w, ns), "close chunk") + return errors.Wrap(closeChunk(w, ns), "close chunk") }) } @@ -192,14 +193,14 @@ func splitChunks(r io.Reader, size int, write func([]byte) error) error { } if !errors.Is(err, io.EOF) { - return errors.WithMessage(err, "read bson") + return errors.Wrap(err, "read bson") } if len(chunk) == 0 { return nil } - return errors.WithMessage(write(chunk), "last") + return errors.Wrap(write(chunk), "last") } // ReadBSONBuffer reads raw bson document from r reader using buf buffer @@ -208,7 +209,7 @@ func ReadBSONBuffer(r io.Reader, buf []byte) ([]byte, error) { _, err := io.ReadFull(r, l[:]) if err != nil { - return nil, errors.WithMessage(err, "length") + return nil, errors.Wrap(err, "length") } size := int(int32(l[0]) | int32(l[1])<<8 | int32(l[2])<<16 | int32(l[3])<<24) @@ -244,19 +245,19 @@ func writeChunk(w io.Writer, ns *Namespace, data []byte) error { header, err := bson.Marshal(nsHeader) if err != nil { - return errors.WithMessage(err, "marshal") + return errors.Wrap(err, "marshal") } if err := SecureWrite(w, header); err != nil { - return errors.WithMessage(err, "header") + return errors.Wrap(err, "header") } if err := SecureWrite(w, data); err != nil { - return errors.WithMessage(err, "data") + return errors.Wrap(err, "data") } err = SecureWrite(w, terminatorBytes) - return errors.WithMessage(err, "terminator") + return errors.Wrap(err, "terminator") } func closeChunk(w io.Writer, ns *Namespace) error { @@ -272,27 +273,27 @@ func closeChunk(w io.Writer, ns *Namespace) error { header, err := bson.Marshal(nsHeader) if err != nil { - return errors.WithMessage(err, "marshal") + return errors.Wrap(err, "marshal") } if err := SecureWrite(w, header); err != nil { - return errors.WithMessage(err, "header") + return errors.Wrap(err, "header") } err = SecureWrite(w, terminatorBytes) - return errors.WithMessage(err, "terminator") + return errors.Wrap(err, "terminator") } func writeMetadata(meta *archiveMeta, newWriter NewWriter) error { w, err := newWriter(MetaFile) if err != nil { - return errors.WithMessage(err, "new writer") + return errors.Wrap(err, "new writer") } defer w.Close() data, err := bson.MarshalExtJSONIndent(meta, true, true, "", "\t") if err != nil { - return errors.WithMessage(err, "marshal") + return errors.Wrap(err, "marshal") } return SecureWrite(w, data) @@ -301,7 +302,7 @@ func writeMetadata(meta *archiveMeta, newWriter NewWriter) error { func readMetadata(newReader NewReader) (*archiveMeta, error) { r, err := newReader(MetaFile) if err != nil { - return nil, errors.WithMessage(err, "new metafile reader") + return nil, errors.Wrap(err, "new metafile reader") } defer r.Close() @@ -311,12 +312,12 @@ func readMetadata(newReader NewReader) (*archiveMeta, error) { func ReadMetadata(r io.Reader) (*archiveMeta, error) { data, err := io.ReadAll(r) if err != nil { - return nil, errors.WithMessage(err, "read") + return nil, errors.Wrap(err, "read") } meta := &archiveMeta{} err = bson.UnmarshalExtJSON(data, true, meta) - return meta, errors.WithMessage(err, "unmarshal") + return meta, errors.Wrap(err, "unmarshal") } type consumer struct { @@ -343,7 +344,7 @@ func newConsumer(newWriter NewWriter, nsFilter NSFilterFn, docFilter DocFilterFn func (c *consumer) HeaderBSON(data []byte) error { h := &archive.NamespaceHeader{} if err := bson.Unmarshal(data, h); err != nil { - return errors.WithMessage(err, "unmarshal") + return errors.Wrap(err, "unmarshal") } ns := NSify(h.Database, h.Collection) @@ -366,7 +367,7 @@ func (c *consumer) HeaderBSON(data []byte) error { } delete(c.nss, ns) - return errors.WithMessage(w.Close(), "close") + return errors.Wrap(w.Close(), "close") } func (c *consumer) BodyBSON(data []byte) error { @@ -385,14 +386,14 @@ func (c *consumer) BodyBSON(data []byte) error { var err error w, err = c.open(ns) if err != nil { - return errors.WithMessagef(err, "open: %q", ns) + return errors.Wrapf(err, "open: %q", ns) } c.nss[ns] = w } c.size[ns] += int64(len(data)) - return errors.WithMessagef(SecureWrite(w, data), "%q", ns) + return errors.Wrapf(SecureWrite(w, data), "%q", ns) } func (c *consumer) End() error { @@ -401,7 +402,7 @@ func (c *consumer) End() error { for ns, w := range c.nss { ns, w := ns, w eg.Go(func() error { - return errors.WithMessagef(w.Close(), "close: %q", ns) + return errors.Wrapf(w.Close(), "close: %q", ns) }) } @@ -411,7 +412,7 @@ func (c *consumer) End() error { func SecureWrite(w io.Writer, data []byte) error { n, err := w.Write(data) if err != nil { - return errors.WithMessage(err, "write") + return errors.Wrap(err, "write") } if n != len(data) { return io.ErrShortWrite diff --git a/pbm/compress/compress.go b/internal/compress/compress.go similarity index 58% rename from pbm/compress/compress.go rename to internal/compress/compress.go index caeec5cc7..e2b7677e6 100644 --- a/pbm/compress/compress.go +++ b/internal/compress/compress.go @@ -11,78 +11,49 @@ import ( "github.com/klauspost/compress/zstd" "github.com/klauspost/pgzip" "github.com/pierrec/lz4" - "github.com/pkg/errors" -) - -type CompressionType string -const ( - CompressionTypeNone CompressionType = "none" - CompressionTypeGZIP CompressionType = "gzip" - CompressionTypePGZIP CompressionType = "pgzip" - CompressionTypeSNAPPY CompressionType = "snappy" - CompressionTypeLZ4 CompressionType = "lz4" - CompressionTypeS2 CompressionType = "s2" - CompressionTypeZstandard CompressionType = "zstd" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" ) func IsValidCompressionType(s string) bool { - switch CompressionType(s) { + switch defs.CompressionType(s) { case - CompressionTypeNone, - CompressionTypeGZIP, - CompressionTypePGZIP, - CompressionTypeSNAPPY, - CompressionTypeLZ4, - CompressionTypeS2, - CompressionTypeZstandard: + defs.CompressionTypeNone, + defs.CompressionTypeGZIP, + defs.CompressionTypePGZIP, + defs.CompressionTypeSNAPPY, + defs.CompressionTypeLZ4, + defs.CompressionTypeS2, + defs.CompressionTypeZstandard: return true } return false } -func (c CompressionType) Suffix() string { - switch c { - case CompressionTypeGZIP, CompressionTypePGZIP: - return ".gz" - case CompressionTypeLZ4: - return ".lz4" - case CompressionTypeSNAPPY: - return ".snappy" - case CompressionTypeS2: - return ".s2" - case CompressionTypeZstandard: - return ".zst" - case CompressionTypeNone: - fallthrough - default: - return "" - } -} - // FileCompression return compression alg based on given file extension -func FileCompression(ext string) CompressionType { +func FileCompression(ext string) defs.CompressionType { switch ext { default: - return CompressionTypeNone + return defs.CompressionTypeNone case "gz": - return CompressionTypePGZIP + return defs.CompressionTypePGZIP case "lz4": - return CompressionTypeLZ4 + return defs.CompressionTypeLZ4 case "snappy": - return CompressionTypeSNAPPY + return defs.CompressionTypeSNAPPY case "s2": - return CompressionTypeS2 + return defs.CompressionTypeS2 case "zst": - return CompressionTypeZstandard + return defs.CompressionTypeZstandard } } // Compress makes a compressed writer from the given one -func Compress(w io.Writer, compression CompressionType, level *int) (io.WriteCloser, error) { +func Compress(w io.Writer, compression defs.CompressionType, level *int) (io.WriteCloser, error) { switch compression { - case CompressionTypeGZIP: + case defs.CompressionTypeGZIP: if level == nil { level = aws.Int(gzip.DefaultCompression) } @@ -91,7 +62,7 @@ func Compress(w io.Writer, compression CompressionType, level *int) (io.WriteClo return nil, err } return gw, nil - case CompressionTypePGZIP: + case defs.CompressionTypePGZIP: if level == nil { level = aws.Int(pgzip.DefaultCompression) } @@ -108,15 +79,15 @@ func Compress(w io.Writer, compression CompressionType, level *int) (io.WriteClo return nil, err } return pgw, nil - case CompressionTypeLZ4: + case defs.CompressionTypeLZ4: lz4w := lz4.NewWriter(w) if level != nil { lz4w.Header.CompressionLevel = *level } return lz4w, nil - case CompressionTypeSNAPPY: + case defs.CompressionTypeSNAPPY: return snappy.NewBufferedWriter(w), nil - case CompressionTypeS2: + case defs.CompressionTypeS2: cc := runtime.NumCPU() / 3 if cc == 0 { cc = 1 @@ -133,13 +104,13 @@ func Compress(w io.Writer, compression CompressionType, level *int) (io.WriteClo } } return s2.NewWriter(w, writerOptions...), nil - case CompressionTypeZstandard: + case defs.CompressionTypeZstandard: encLevel := zstd.SpeedDefault if level != nil { encLevel = zstd.EncoderLevelFromZstd(*level) } return zstd.NewWriter(w, zstd.WithEncoderLevel(encLevel)) - case CompressionTypeNone: + case defs.CompressionTypeNone: fallthrough default: return nopWriteCloser{w}, nil @@ -147,21 +118,21 @@ func Compress(w io.Writer, compression CompressionType, level *int) (io.WriteClo } // Decompress wraps given reader by the decompressing io.ReadCloser -func Decompress(r io.Reader, c CompressionType) (io.ReadCloser, error) { +func Decompress(r io.Reader, c defs.CompressionType) (io.ReadCloser, error) { switch c { - case CompressionTypeGZIP, CompressionTypePGZIP: + case defs.CompressionTypeGZIP, defs.CompressionTypePGZIP: rr, err := gzip.NewReader(r) return rr, errors.Wrap(err, "gzip reader") - case CompressionTypeLZ4: + case defs.CompressionTypeLZ4: return io.NopCloser(lz4.NewReader(r)), nil - case CompressionTypeSNAPPY: + case defs.CompressionTypeSNAPPY: return io.NopCloser(snappy.NewReader(r)), nil - case CompressionTypeS2: + case defs.CompressionTypeS2: return io.NopCloser(s2.NewReader(r)), nil - case CompressionTypeZstandard: + case defs.CompressionTypeZstandard: rr, err := zstd.NewReader(r) return io.NopCloser(rr), errors.Wrap(err, "zstandard reader") - case CompressionTypeNone: + case defs.CompressionTypeNone: fallthrough default: return io.NopCloser(r), nil diff --git a/pbm/config.go b/internal/config/config.go similarity index 66% rename from pbm/config.go rename to internal/config/config.go index c66f5147e..2e3ed31c4 100644 --- a/pbm/config.go +++ b/internal/config/config.go @@ -1,7 +1,6 @@ -package pbm +package config import ( - "context" "fmt" "os" "reflect" @@ -9,22 +8,60 @@ import ( "strings" "time" - "github.com/pkg/errors" "go.mongodb.org/mongo-driver/bson" "go.mongodb.org/mongo-driver/bson/primitive" "go.mongodb.org/mongo-driver/mongo" "go.mongodb.org/mongo-driver/mongo/options" "gopkg.in/yaml.v2" - "github.com/percona/percona-backup-mongodb/pbm/compress" - "github.com/percona/percona-backup-mongodb/pbm/log" - "github.com/percona/percona-backup-mongodb/pbm/storage" - "github.com/percona/percona-backup-mongodb/pbm/storage/azure" - "github.com/percona/percona-backup-mongodb/pbm/storage/blackhole" - "github.com/percona/percona-backup-mongodb/pbm/storage/fs" - "github.com/percona/percona-backup-mongodb/pbm/storage/s3" + "github.com/percona/percona-backup-mongodb/internal/compress" + "github.com/percona/percona-backup-mongodb/internal/connect" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/storage" + "github.com/percona/percona-backup-mongodb/internal/storage/azure" + "github.com/percona/percona-backup-mongodb/internal/storage/fs" + "github.com/percona/percona-backup-mongodb/internal/storage/s3" + "github.com/percona/percona-backup-mongodb/internal/topo" ) +type confMap map[string]reflect.Kind + +// _confmap is a list of config's valid keys and its types +var _confmap confMap + +//nolint:gochecknoinits +func init() { + _confmap = keys(reflect.TypeOf(Config{})) +} + +func keys(t reflect.Type) confMap { + v := make(confMap) + for i := 0; i < t.NumField(); i++ { + name := strings.TrimSpace(strings.Split(t.Field(i).Tag.Get("bson"), ",")[0]) + + typ := t.Field(i).Type + if typ.Kind() == reflect.Ptr { + typ = typ.Elem() + } + if typ.Kind() == reflect.Struct { + for n, t := range keys(typ) { + v[name+"."+n] = t + } + } else { + v[name] = typ.Kind() + } + } + return v +} + +// ValidateConfigKey checks if a config key valid +func ValidateConfigKey(k string) bool { + _, ok := _confmap[k] + return ok +} + // Config is a pbm config type Config struct { PITR PITRConf `bson:"pitr" json:"pitr" yaml:"pitr"` @@ -70,11 +107,11 @@ func (c Config) String() string { // //nolint:lll type PITRConf struct { - Enabled bool `bson:"enabled" json:"enabled" yaml:"enabled"` - OplogSpanMin float64 `bson:"oplogSpanMin" json:"oplogSpanMin" yaml:"oplogSpanMin"` - OplogOnly bool `bson:"oplogOnly,omitempty" json:"oplogOnly,omitempty" yaml:"oplogOnly,omitempty"` - Compression compress.CompressionType `bson:"compression,omitempty" json:"compression,omitempty" yaml:"compression,omitempty"` - CompressionLevel *int `bson:"compressionLevel,omitempty" json:"compressionLevel,omitempty" yaml:"compressionLevel,omitempty"` + Enabled bool `bson:"enabled" json:"enabled" yaml:"enabled"` + OplogSpanMin float64 `bson:"oplogSpanMin" json:"oplogSpanMin" yaml:"oplogSpanMin"` + OplogOnly bool `bson:"oplogOnly,omitempty" json:"oplogOnly,omitempty" yaml:"oplogOnly,omitempty"` + Compression defs.CompressionType `bson:"compression,omitempty" json:"compression,omitempty" yaml:"compression,omitempty"` + CompressionLevel *int `bson:"compressionLevel,omitempty" json:"compressionLevel,omitempty" yaml:"compressionLevel,omitempty"` } // StorageConf is a configuration of the backup storage @@ -154,10 +191,10 @@ type RestoreConf struct { //nolint:lll type BackupConf struct { - Priority map[string]float64 `bson:"priority,omitempty" json:"priority,omitempty" yaml:"priority,omitempty"` - Timeouts *BackupTimeouts `bson:"timeouts,omitempty" json:"timeouts,omitempty" yaml:"timeouts,omitempty"` - Compression compress.CompressionType `bson:"compression,omitempty" json:"compression,omitempty" yaml:"compression,omitempty"` - CompressionLevel *int `bson:"compressionLevel,omitempty" json:"compressionLevel,omitempty" yaml:"compressionLevel,omitempty"` + Priority map[string]float64 `bson:"priority,omitempty" json:"priority,omitempty" yaml:"priority,omitempty"` + Timeouts *BackupTimeouts `bson:"timeouts,omitempty" json:"timeouts,omitempty" yaml:"timeouts,omitempty"` + Compression defs.CompressionType `bson:"compression,omitempty" json:"compression,omitempty" yaml:"compression,omitempty"` + CompressionLevel *int `bson:"compressionLevel,omitempty" json:"compressionLevel,omitempty" yaml:"compressionLevel,omitempty"` } type BackupTimeouts struct { @@ -169,52 +206,46 @@ type BackupTimeouts struct { // If not set or zero, returns default value (WaitBackupStart). func (t *BackupTimeouts) StartingStatus() time.Duration { if t == nil || t.Starting == nil || *t.Starting == 0 { - return WaitBackupStart + return defs.WaitBackupStart } return time.Duration(*t.Starting) * time.Second } -type confMap map[string]reflect.Kind - -// _confmap is a list of config's valid keys and its types -var _confmap confMap - -//nolint:gochecknoinits -func init() { - _confmap = keys(reflect.TypeOf(Config{})) -} +func GetConfig(ctx context.Context, m connect.Client) (Config, error) { + res := m.ConfigCollection().FindOne(ctx, bson.D{}) + if err := res.Err(); err != nil { + return Config{}, errors.Wrap(err, "get") + } -func keys(t reflect.Type) confMap { - v := make(confMap) - for i := 0; i < t.NumField(); i++ { - name := strings.TrimSpace(strings.Split(t.Field(i).Tag.Get("bson"), ",")[0]) + var c Config + if err := res.Decode(&c); err != nil { + return Config{}, errors.Wrap(err, "decode") + } - typ := t.Field(i).Type - if typ.Kind() == reflect.Ptr { - typ = typ.Elem() - } - if typ.Kind() == reflect.Struct { - for n, t := range keys(typ) { - v[name+"."+n] = t - } - } else { - v[name] = typ.Kind() - } + if c.Backup.Compression == "" { + c.Backup.Compression = defs.CompressionTypeS2 } - return v + if c.PITR.Compression == "" { + c.PITR.Compression = c.Backup.Compression + } + if c.PITR.CompressionLevel == nil { + c.PITR.CompressionLevel = c.Backup.CompressionLevel + } + + return c, nil } -func (p *PBM) SetConfigByte(buf []byte) error { +func SetConfigByte(ctx context.Context, m connect.Client, buf []byte) error { var cfg Config err := yaml.UnmarshalStrict(buf, &cfg) if err != nil { return errors.Wrap(err, "unmarshal yaml") } - return errors.Wrap(p.SetConfig(cfg), "write to db") + return errors.Wrap(SetConfig(ctx, m, cfg), "write to db") } -func (p *PBM) SetConfig(cfg Config) error { +func SetConfig(ctx context.Context, m connect.Client, cfg Config) error { switch cfg.Storage.Type { case storage.S3: err := cfg.Storage.S3.Cast() @@ -236,7 +267,7 @@ func (p *PBM) SetConfig(cfg Config) error { return errors.Errorf("unsupported compression type: %q", c) } - ct, err := p.ClusterTime() + ct, err := topo.GetClusterTime(ctx, m) if err != nil { return errors.Wrap(err, "get cluster time") } @@ -245,24 +276,24 @@ func (p *PBM) SetConfig(cfg Config) error { // TODO: if store or pitr changed - need to bump epoch // TODO: struct tags to config opts `pbm:"resync,epoch"`? - _, _ = p.GetConfig() + _, _ = GetConfig(ctx, m) - _, err = p.Conn.Database(DB).Collection(ConfigCollection).UpdateOne( - p.ctx, + _, err = m.ConfigCollection().UpdateOne( + ctx, bson.D{}, bson.M{"$set": cfg}, options.Update().SetUpsert(true), ) - return errors.Wrap(err, "mongo ConfigCollection UpdateOne") + return errors.Wrap(err, "mongo defs.ConfigCollection UpdateOne") } -func (p *PBM) SetConfigVar(key, val string) error { +func SetConfigVar(ctx context.Context, m connect.Client, key, val string) error { if !ValidateConfigKey(key) { return errors.New("invalid config key") } // just check if config was set - _, err := p.GetConfig() + _, err := GetConfig(ctx, m) if err != nil { if errors.Is(err, mongo.ErrNoDocuments) { return errors.New("config is not set") @@ -296,7 +327,7 @@ func (p *PBM) SetConfigVar(key, val string) error { // TODO: how to be with special case options like pitr.enabled switch key { case "pitr.enabled": - return errors.Wrap(p.confSetPITR(key, v.(bool)), "write to db") + return errors.Wrap(confSetPITR(ctx, m, key, v.(bool)), "write to db") case "pitr.compression": if c := v.(string); c != "" && !compress.IsValidCompressionType(c) { return errors.Errorf("unsupported compression type: %q", c) @@ -309,8 +340,8 @@ func (p *PBM) SetConfigVar(key, val string) error { s3.SDKLogLevel(v.(string), os.Stderr) } - _, err = p.Conn.Database(DB).Collection(ConfigCollection).UpdateOne( - p.ctx, + _, err = m.ConfigCollection().UpdateOne( + ctx, bson.D{}, bson.M{"$set": bson.M{key: v}}, ) @@ -318,12 +349,26 @@ func (p *PBM) SetConfigVar(key, val string) error { return errors.Wrap(err, "write to db") } -func (p *PBM) DeleteConfigVar(key string) error { +func confSetPITR(ctx context.Context, m connect.Client, k string, v bool) error { + ct, err := topo.GetClusterTime(ctx, m) + if err != nil { + return errors.Wrap(err, "get cluster time") + } + _, err = m.ConfigCollection().UpdateOne( + ctx, + bson.D{}, + bson.M{"$set": bson.M{k: v, "pitr.changed": time.Now().Unix(), "epoch": ct}}, + ) + + return err +} + +func DeleteConfigVar(ctx context.Context, m connect.Client, key string) error { if !ValidateConfigKey(key) { return errors.New("invalid config key") } - _, err := p.GetConfig() + _, err := GetConfig(ctx, m) if err != nil { if errors.Is(err, mongo.ErrNoDocuments) { return errors.New("config is not set") @@ -331,8 +376,8 @@ func (p *PBM) DeleteConfigVar(key string) error { return err } - _, err = p.Conn.Database(DB).Collection(ConfigCollection).UpdateOne( - p.ctx, + _, err = m.ConfigCollection().UpdateOne( + ctx, bson.D{}, bson.M{"$unset": bson.M{key: 1}}, ) @@ -340,27 +385,13 @@ func (p *PBM) DeleteConfigVar(key string) error { return errors.Wrap(err, "write to db") } -func (p *PBM) confSetPITR(k string, v bool) error { - ct, err := p.ClusterTime() - if err != nil { - return errors.Wrap(err, "get cluster time") - } - _, err = p.Conn.Database(DB).Collection(ConfigCollection).UpdateOne( - p.ctx, - bson.D{}, - bson.M{"$set": bson.M{k: v, "pitr.changed": time.Now().Unix(), "epoch": ct}}, - ) - - return err -} - // GetConfigVar returns value of given config vaiable -func (p *PBM) GetConfigVar(key string) (interface{}, error) { +func GetConfigVar(ctx context.Context, m connect.Client, key string) (interface{}, error) { if !ValidateConfigKey(key) { return nil, errors.New("invalid config key") } - bts, err := p.Conn.Database(DB).Collection(ConfigCollection).FindOne(p.ctx, bson.D{}).DecodeBytes() + bts, err := m.ConfigCollection().FindOne(ctx, bson.D{}).DecodeBytes() if err != nil { return nil, errors.Wrap(err, "get from db") } @@ -385,99 +416,48 @@ func (p *PBM) GetConfigVar(key string) (interface{}, error) { } } -// ValidateConfigKey checks if a config key valid -func ValidateConfigKey(k string) bool { - _, ok := _confmap[k] - return ok -} - -func (p *PBM) GetConfigYaml(fieldRedaction bool) ([]byte, error) { - c, err := p.GetConfig() +func IsPITREnabled(ctx context.Context, m connect.Client) (bool, bool, error) { + cfg, err := GetConfig(ctx, m) if err != nil { - return nil, errors.Wrap(err, "get from db") - } - - if fieldRedaction { - if c.Storage.S3.Credentials.AccessKeyID != "" { - c.Storage.S3.Credentials.AccessKeyID = "***" - } - if c.Storage.S3.Credentials.SecretAccessKey != "" { - c.Storage.S3.Credentials.SecretAccessKey = "***" - } - if c.Storage.S3.Credentials.SessionToken != "" { - c.Storage.S3.Credentials.SessionToken = "***" - } - if c.Storage.S3.Credentials.Vault.Secret != "" { - c.Storage.S3.Credentials.Vault.Secret = "***" - } - if c.Storage.S3.Credentials.Vault.Token != "" { - c.Storage.S3.Credentials.Vault.Token = "***" - } - if c.Storage.Azure.Credentials.Key != "" { - c.Storage.Azure.Credentials.Key = "***" + if errors.Is(err, mongo.ErrNoDocuments) { + return false, false, nil } + + return false, false, errors.Wrap(err, "get config") } - b, err := yaml.Marshal(c) - return b, errors.Wrap(err, "marshal yaml") + return cfg.PITR.Enabled, cfg.PITR.OplogOnly, nil } -func (p *PBM) GetConfig() (Config, error) { - return getPBMConfig(p.ctx, p.Conn) -} +type Epoch primitive.Timestamp -func getPBMConfig(ctx context.Context, m *mongo.Client) (Config, error) { - res := m.Database(DB).Collection(ConfigCollection).FindOne(ctx, bson.D{}) - if err := res.Err(); err != nil { - return Config{}, errors.WithMessage(err, "get") - } - - var c Config - if err := res.Decode(&c); err != nil { - return Config{}, errors.WithMessage(err, "decode") - } - - if c.Backup.Compression == "" { - c.Backup.Compression = compress.CompressionTypeS2 - } - if c.PITR.Compression == "" { - c.PITR.Compression = c.Backup.Compression - } - if c.PITR.CompressionLevel == nil { - c.PITR.CompressionLevel = c.Backup.CompressionLevel +func GetEpoch(ctx context.Context, m connect.Client) (Epoch, error) { + c, err := GetConfig(ctx, m) + if err != nil { + return Epoch{}, errors.Wrap(err, "get config") } - return c, nil + return Epoch(c.Epoch), nil } -// ErrStorageUndefined is an error for undefined storage -var ErrStorageUndefined = errors.New("storage undefined") +func ResetEpoch(m connect.Client) (Epoch, error) { + return ResetEpochWithContext(context.Background(), m) +} -// GetStorage reads current storage config and creates and -// returns respective storage.Storage object -func (p *PBM) GetStorage(l *log.Event) (storage.Storage, error) { - c, err := p.GetConfig() +func ResetEpochWithContext(ctx context.Context, m connect.Client) (Epoch, error) { + ct, err := topo.GetClusterTime(ctx, m) if err != nil { - return nil, errors.Wrap(err, "get config") + return Epoch{}, errors.Wrap(err, "get cluster time") } + _, err = m.ConfigCollection().UpdateOne( + ctx, + bson.D{}, + bson.M{"$set": bson.M{"epoch": ct}}, + ) - return Storage(c, l) + return Epoch(ct), err } -// Storage creates and returns a storage object based on a given config -func Storage(c Config, l *log.Event) (storage.Storage, error) { - switch c.Storage.Type { - case storage.S3: - return s3.New(c.Storage.S3, l) - case storage.Azure: - return azure.New(c.Storage.Azure, l) - case storage.Filesystem: - return fs.New(c.Storage.Filesystem) - case storage.BlackHole: - return blackhole.New(), nil - case storage.Undef: - return nil, ErrStorageUndefined - default: - return nil, errors.Errorf("unknown storage type %s", c.Storage.Type) - } +func (e Epoch) TS() primitive.Timestamp { + return primitive.Timestamp(e) } diff --git a/internal/connect/connect.go b/internal/connect/connect.go new file mode 100644 index 000000000..3fb5d63aa --- /dev/null +++ b/internal/connect/connect.go @@ -0,0 +1,187 @@ +package connect + +import ( + "net/url" + "strings" + + "go.mongodb.org/mongo-driver/mongo" + "go.mongodb.org/mongo-driver/mongo/options" + "go.mongodb.org/mongo-driver/mongo/readconcern" + "go.mongodb.org/mongo-driver/mongo/readpref" + "go.mongodb.org/mongo-driver/mongo/writeconcern" + + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" +) + +type ConnectOptions struct { + AppName string +} + +func connect(ctx context.Context, uri, appName string) (*mongo.Client, error) { + client, err := mongo.Connect(ctx, + options.Client().ApplyURI(uri). + SetAppName(appName). + SetReadPreference(readpref.Primary()). + SetReadConcern(readconcern.Majority()). + SetWriteConcern(writeconcern.Majority()), + ) + if err != nil { + return nil, errors.Wrap(err, "mongo connect") + } + + err = client.Ping(ctx, nil) + if err != nil { + return nil, errors.Wrap(err, "mongo ping") + } + + return client, nil +} + +type clientImpl struct { + client *mongo.Client +} + +func UnsafeClient(m *mongo.Client) Client { + return &clientImpl{m} +} + +func Connect(ctx context.Context, uri string, opts *ConnectOptions) (Client, error) { + if opts == nil { + opts = &ConnectOptions{} + } + + uri = "mongodb://" + strings.Replace(uri, "mongodb://", "", 1) + + client, err := connect(ctx, uri, opts.AppName) + if err != nil { + return nil, errors.Wrap(err, "create mongo connection") + } + + inf, err := getNodeInfo(ctx, client) + if err != nil { + return nil, errors.Wrap(err, "get NodeInfo") + } + if inf.isMongos() || inf.isConfigsvr() { + return &clientImpl{client: client}, nil + } + + inf.Opts, err = getMongodOpts(ctx, client, nil) + if err != nil { + return nil, errors.Wrap(err, "get mongod options") + } + + if !inf.isSharded() { + return &clientImpl{client: client}, nil + } + + csvr, err := getConfigsvrURI(ctx, client) + if err != nil { + return nil, errors.Wrap(err, "get config server connection URI") + } + // no need in this connection anymore, we need a new one with the ConfigServer + err = client.Disconnect(ctx) + if err != nil { + return nil, errors.Wrap(err, "disconnect old client") + } + + chost := strings.Split(csvr, "/") + if len(chost) < 2 { + return nil, errors.Wrapf(err, "define config server connection URI from %s", csvr) + } + + curi, err := url.Parse(uri) + if err != nil { + return nil, errors.Wrapf(err, "parse mongo-uri '%s'", uri) + } + + // Preserving the `replicaSet` parameter will cause an error + // while connecting to the ConfigServer (mismatched replicaset names) + query := curi.Query() + query.Del("replicaSet") + curi.RawQuery = query.Encode() + curi.Host = chost[1] + client, err = connect(ctx, curi.String(), opts.AppName) + if err != nil { + return nil, errors.Wrapf(err, "create mongo connection to configsvr with connection string '%s'", curi) + } + + return &clientImpl{client: client}, nil +} + +func (l *clientImpl) Disconnect(ctx context.Context) error { + return l.client.Disconnect(ctx) +} + +func (l *clientImpl) MongoClient() *mongo.Client { + return l.client +} + +func (l *clientImpl) ConfigDatabase() *mongo.Database { + return l.client.Database("config") +} + +func (l *clientImpl) AdminCommand(ctx context.Context, cmd any, opts ...*options.RunCmdOptions) *mongo.SingleResult { + return l.client.Database(defs.DB).RunCommand(ctx, cmd, opts...) +} + +func (l *clientImpl) LogCollection() *mongo.Collection { + return l.client.Database(defs.DB).Collection(defs.LogCollection) +} + +func (l *clientImpl) ConfigCollection() *mongo.Collection { + return l.client.Database(defs.DB).Collection(defs.ConfigCollection) +} + +func (l *clientImpl) LockCollection() *mongo.Collection { + return l.client.Database(defs.DB).Collection(defs.LockCollection) +} + +func (l *clientImpl) LockOpCollection() *mongo.Collection { + return l.client.Database(defs.DB).Collection(defs.LockOpCollection) +} + +func (l *clientImpl) BcpCollection() *mongo.Collection { + return l.client.Database(defs.DB).Collection(defs.BcpCollection) +} + +func (l *clientImpl) RestoresCollection() *mongo.Collection { + return l.client.Database(defs.DB).Collection(defs.RestoresCollection) +} + +func (l *clientImpl) CmdStreamCollection() *mongo.Collection { + return l.client.Database(defs.DB).Collection(defs.CmdStreamCollection) +} + +func (l *clientImpl) PITRChunksCollection() *mongo.Collection { + return l.client.Database(defs.DB).Collection(defs.PITRChunksCollection) +} + +func (l *clientImpl) PBMOpLogCollection() *mongo.Collection { + return l.client.Database(defs.DB).Collection(defs.PBMOpLogCollection) +} + +func (l *clientImpl) AgentsStatusCollection() *mongo.Collection { + return l.client.Database(defs.DB).Collection(defs.AgentsStatusCollection) +} + +type Client interface { + Disconnect(ctx context.Context) error + + MongoClient() *mongo.Client + + ConfigDatabase() *mongo.Database + AdminCommand(ctx context.Context, cmd any, opts ...*options.RunCmdOptions) *mongo.SingleResult + + LogCollection() *mongo.Collection + ConfigCollection() *mongo.Collection + LockCollection() *mongo.Collection + LockOpCollection() *mongo.Collection + BcpCollection() *mongo.Collection + RestoresCollection() *mongo.Collection + CmdStreamCollection() *mongo.Collection + PITRChunksCollection() *mongo.Collection + PBMOpLogCollection() *mongo.Collection + AgentsStatusCollection() *mongo.Collection +} diff --git a/internal/connect/todo.go b/internal/connect/todo.go new file mode 100644 index 000000000..12c7ba31a --- /dev/null +++ b/internal/connect/todo.go @@ -0,0 +1,81 @@ +package connect + +import ( + "go.mongodb.org/mongo-driver/bson" + "go.mongodb.org/mongo-driver/bson/primitive" + "go.mongodb.org/mongo-driver/mongo" + + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" +) + +// nodeInfo represents the mongo's node info +type nodeInfo struct { + Msg string `bson:"msg"` + SetName string `bson:"setName,omitempty"` + ConfigSvr int `bson:"configsvr,omitempty"` + ConfigServerState *struct { + OpTime *struct { + TS primitive.Timestamp `bson:"ts" json:"ts"` + Term int64 `bson:"t" json:"t"` + } `bson:"opTime"` + } `bson:"$configServerState,omitempty"` + Opts *mongodOpts `bson:"-"` +} + +// isSharded returns true is replset is part sharded cluster +func (i *nodeInfo) isSharded() bool { + return i.SetName != "" && (i.ConfigServerState != nil || i.Opts.Sharding.ClusterRole != "" || i.isConfigsvr()) +} + +// isConfigsvr returns replset role in sharded clister +func (i *nodeInfo) isConfigsvr() bool { + return i.ConfigSvr == 2 +} + +// IsSharded returns true is replset is part sharded cluster +func (i *nodeInfo) isMongos() bool { + return i.Msg == "isdbgrid" +} + +type mongodOpts struct { + Sharding struct { + ClusterRole string `bson:"clusterRole" json:"clusterRole" yaml:"-"` + } `bson:"sharding" json:"sharding" yaml:"-"` +} + +func getNodeInfo(ctx context.Context, m *mongo.Client) (*nodeInfo, error) { + res := m.Database(defs.DB).RunCommand(ctx, bson.D{{"isMaster", 1}}) + if err := res.Err(); err != nil { + return nil, errors.Wrap(err, "cmd: isMaster") + } + + n := &nodeInfo{} + err := res.Decode(&n) + return n, errors.Wrap(err, "decode") +} + +func getMongodOpts(ctx context.Context, m *mongo.Client, defaults *mongodOpts) (*mongodOpts, error) { + opts := struct { + Parsed mongodOpts `bson:"parsed" json:"parsed"` + }{} + if defaults != nil { + opts.Parsed = *defaults + } + err := m.Database("admin").RunCommand(ctx, bson.D{{"getCmdLineOpts", 1}}).Decode(&opts) + if err != nil { + return nil, errors.Wrap(err, "run mongo command") + } + return &opts.Parsed, nil +} + +func getConfigsvrURI(ctx context.Context, cn *mongo.Client) (string, error) { + csvr := struct { + URI string `bson:"configsvrConnectionString"` + }{} + err := cn.Database("admin").Collection("system.version"). + FindOne(ctx, bson.D{{"_id", "shardIdentity"}}).Decode(&csvr) + + return csvr.URI, err +} diff --git a/internal/context/context.go b/internal/context/context.go new file mode 100644 index 000000000..d4e074267 --- /dev/null +++ b/internal/context/context.go @@ -0,0 +1,39 @@ +package context + +import "context" + +type ( + Context = context.Context + CancelFunc = context.CancelFunc +) + +// since 1.20 +// type CancelCauseFunc = context.CancelCauseFunc + +var ( + Canceled = context.Canceled + DeadlineExceeded = context.DeadlineExceeded +) + +var ( + Background = context.Background + TODO = context.TODO + WithCancel = context.WithCancel + WithDeadline = context.WithDeadline + WithTimeout = context.WithTimeout + WithValue = context.WithValue +) + +// since 1.20 +// var ( +// Cause = context.Cause +// WithCancelCause = context.WithCancelCause +// ) + +// since 1.21 +// var ( +// WithoutCancel = context.WithoutCancel +// AfterFunc = context.AfterFunc +// WithDeadlineCause = context.WithDeadlineCause +// WithTimeoutCause = context.WithTimeoutCause +// ) diff --git a/internal/defs/cmd.go b/internal/defs/cmd.go new file mode 100644 index 000000000..242dc9721 --- /dev/null +++ b/internal/defs/cmd.go @@ -0,0 +1,42 @@ +package defs + +// Command represents actions that could be done on behalf of the client by the agents +type Command string + +const ( + CmdUndefined Command = "" + CmdBackup Command = "backup" + CmdRestore Command = "restore" + CmdReplay Command = "replay" + CmdCancelBackup Command = "cancelBackup" + CmdResync Command = "resync" + CmdPITR Command = "pitr" + CmdDeleteBackup Command = "delete" + CmdDeletePITR Command = "deletePitr" + CmdCleanup Command = "cleanup" +) + +func (c Command) String() string { + switch c { + case CmdBackup: + return "Snapshot backup" + case CmdRestore: + return "Snapshot restore" + case CmdReplay: + return "Oplog replay" + case CmdCancelBackup: + return "Backup cancellation" + case CmdResync: + return "Resync storage" + case CmdPITR: + return "PITR incremental backup" + case CmdDeleteBackup: + return "Delete" + case CmdDeletePITR: + return "Delete PITR chunks" + case CmdCleanup: + return "Cleanup backups and PITR chunks" + default: + return "Undefined" + } +} diff --git a/internal/defs/defs.go b/internal/defs/defs.go new file mode 100644 index 000000000..9101fa80d --- /dev/null +++ b/internal/defs/defs.go @@ -0,0 +1,165 @@ +package defs + +import "time" + +const ( + // DB is a name of the PBM database + DB = "admin" + // LogCollection is the name of the mongo collection that contains PBM logs + LogCollection = "pbmLog" + // ConfigCollection is the name of the mongo collection that contains PBM configs + ConfigCollection = "pbmConfig" + // LockCollection is the name of the mongo collection that is used + // by agents to coordinate mutually exclusive operations (e.g. backup/restore) + LockCollection = "pbmLock" + // LockOpCollection is the name of the mongo collection that is used + // by agents to coordinate operations that don't need to be + // mutually exclusive to other operation types (e.g. backup-delete) + LockOpCollection = "pbmLockOp" + // BcpCollection is a collection for backups metadata + BcpCollection = "pbmBackups" + // RestoresCollection is a collection for restores metadata + RestoresCollection = "pbmRestores" + // CmdStreamCollection is the name of the mongo collection that contains backup/restore commands stream + CmdStreamCollection = "pbmCmd" + // PITRChunksCollection contains index metadata of PITR chunks + PITRChunksCollection = "pbmPITRChunks" + // PBMOpLogCollection contains log of acquired locks (hence run ops) + PBMOpLogCollection = "pbmOpLog" + // AgentsStatusCollection is an agents registry with its status/health checks + AgentsStatusCollection = "pbmAgents" +) + +const ( + // TmpUsersCollection and TmpRoles are tmp collections used to avoid + // user related issues while resoring on new cluster and preserving UUID + // See https://jira.percona.com/browse/PBM-425, https://jira.percona.com/browse/PBM-636 + TmpUsersCollection = `pbmRUsers` + TmpRolesCollection = `pbmRRoles` +) + +const ( + PITRcheckRange = time.Second * 15 + AgentsStatCheckRange = time.Second * 5 +) + +var ( + WaitActionStart = time.Second * 15 + WaitBackupStart = WaitActionStart + PITRcheckRange*12/10 // 33 seconds +) + +type NodeHealth int + +const ( + NodeHealthDown NodeHealth = iota + NodeHealthUp +) + +type NodeState int + +const ( + NodeStateStartup NodeState = iota + NodeStatePrimary + NodeStateSecondary + NodeStateRecovering + NodeStateStartup2 + NodeStateUnknown + NodeStateArbiter + NodeStateDown + NodeStateRollback + NodeStateRemoved +) + +type BackupType string + +const ( + PhysicalBackup BackupType = "physical" + ExternalBackup BackupType = "external" + IncrementalBackup BackupType = "incremental" + LogicalBackup BackupType = "logical" +) + +// Status is a backup current status +type Status string + +const ( + StatusInit Status = "init" + StatusReady Status = "ready" + + // for phys restore, to indicate shards have been stopped + StatusDown Status = "down" + + StatusStarting Status = "starting" + StatusRunning Status = "running" + StatusDumpDone Status = "dumpDone" + StatusCopyReady Status = "copyReady" + StatusCopyDone Status = "copyDone" + StatusPartlyDone Status = "partlyDone" + StatusDone Status = "done" + StatusCancelled Status = "canceled" + StatusError Status = "error" + + // status to communicate last op timestamp if it's not set + // during external restore + StatusExtTS Status = "lastTS" +) + +type CompressionType string + +const ( + CompressionTypeNone CompressionType = "none" + CompressionTypeGZIP CompressionType = "gzip" + CompressionTypePGZIP CompressionType = "pgzip" + CompressionTypeSNAPPY CompressionType = "snappy" + CompressionTypeLZ4 CompressionType = "lz4" + CompressionTypeS2 CompressionType = "s2" + CompressionTypeZstandard CompressionType = "zstd" +) + +func (c CompressionType) Suffix() string { + switch c { + case CompressionTypeGZIP, CompressionTypePGZIP: + return ".gz" + case CompressionTypeLZ4: + return ".lz4" + case CompressionTypeSNAPPY: + return ".snappy" + case CompressionTypeS2: + return ".s2" + case CompressionTypeZstandard: + return ".zst" + case CompressionTypeNone: + fallthrough + default: + return "" + } +} + +type Operation string + +const ( + OperationInsert Operation = "i" + OperationNoop Operation = "n" + OperationUpdate Operation = "u" + OperationDelete Operation = "d" + OperationCommand Operation = "c" +) + +const StaleFrameSec uint32 = 30 + +const ( + // MetadataFileSuffix is a suffix for the metadata file on a storage + MetadataFileSuffix = ".pbm.json" + + ExternalRsMetaFile = "pbm.rsmeta.%s.json" + + StorInitFile = ".pbm.init" + PhysRestoresDir = ".pbm.restore" +) + +const ( + // PITRdefaultSpan oplog slicing time span + PITRdefaultSpan = time.Minute * 10 + // PITRfsPrefix is a prefix (folder) for PITR chunks on the storage + PITRfsPrefix = "pbmPitr" +) diff --git a/internal/errors/errors.go b/internal/errors/errors.go new file mode 100644 index 000000000..c9f239364 --- /dev/null +++ b/internal/errors/errors.go @@ -0,0 +1,46 @@ +package errors + +import ( + stderrors "errors" + + gerrs "github.com/pkg/errors" +) + +// ErrNotFound - object not found +var ErrNotFound = New("not found") + +func New(text string) error { + return stderrors.New(text) //nolint:goerr113 +} + +func Errorf(format string, args ...any) error { + return gerrs.Errorf(format, args...) +} + +func Wrap(cause error, text string) error { + return gerrs.WithMessage(cause, text) +} + +func Wrapf(cause error, format string, args ...any) error { + return gerrs.WithMessagef(cause, format, args...) +} + +func Is(cause, target error) bool { + return gerrs.Is(cause, target) +} + +func As(cause error, target interface{}) bool { + return gerrs.As(cause, target) +} + +func Unwrap(cause error) error { + return gerrs.Unwrap(cause) +} + +func Cause(err error) error { + return gerrs.Cause(err) +} + +// func Join(errs ...error) error { +// return stderrors.Join(errs...) +// } diff --git a/internal/lock/errors.go b/internal/lock/errors.go new file mode 100644 index 000000000..f0dbf405c --- /dev/null +++ b/internal/lock/errors.go @@ -0,0 +1,58 @@ +package lock + +import "fmt" + +// ConcurrentOpError means lock was already acquired by another node +type ConcurrentOpError struct { + Lock LockHeader +} + +func (e ConcurrentOpError) Error() string { + return fmt.Sprintf("another operation is running: %s '%s'", e.Lock.Type, e.Lock.OPID) +} + +func (ConcurrentOpError) Is(err error) bool { + if err == nil { + return false + } + + _, ok := err.(ConcurrentOpError) //nolint:errorlint + return ok +} + +// StaleLockError - the lock was already got but the operation seems to be staled (no hb from the node) +type StaleLockError struct { + Lock LockHeader +} + +func (e StaleLockError) Error() string { + return fmt.Sprintf("was stale lock: %s '%s'", e.Lock.Type, e.Lock.OPID) +} + +func (StaleLockError) Is(err error) bool { + if err == nil { + return false + } + + _, ok := err.(StaleLockError) //nolint:errorlint + return ok +} + +// DuplicatedOpError means the operation with the same ID +// alredy had been running +type DuplicatedOpError struct { + Lock LockHeader +} + +func (e DuplicatedOpError) Error() string { + return fmt.Sprintf("duplicate operation: %s [%s]", e.Lock.OPID, e.Lock.Type) +} + +func (DuplicatedOpError) Is(err error) bool { + if err == nil { + return false + } + + _, ok := err.(DuplicatedOpError) //nolint:errorlint + return ok +} diff --git a/pbm/lock.go b/internal/lock/lock.go similarity index 51% rename from pbm/lock.go rename to internal/lock/lock.go index a22b96409..51a83c897 100644 --- a/pbm/lock.go +++ b/internal/lock/lock.go @@ -1,24 +1,27 @@ -package pbm +package lock import ( - "context" - "fmt" "time" - "github.com/pkg/errors" "go.mongodb.org/mongo-driver/bson" "go.mongodb.org/mongo-driver/bson/primitive" "go.mongodb.org/mongo-driver/mongo" -) -const StaleFrameSec uint32 = 30 + "github.com/percona/percona-backup-mongodb/internal/connect" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/log" + "github.com/percona/percona-backup-mongodb/internal/query" + "github.com/percona/percona-backup-mongodb/internal/topo" +) // LockHeader describes the lock. This data will be serialased into the mongo document. type LockHeader struct { - Type Command `bson:"type,omitempty" json:"type,omitempty"` - Replset string `bson:"replset,omitempty" json:"replset,omitempty"` - Node string `bson:"node,omitempty" json:"node,omitempty"` - OPID string `bson:"opid,omitempty" json:"opid,omitempty"` + Type defs.Command `bson:"type,omitempty" json:"type,omitempty"` + Replset string `bson:"replset,omitempty" json:"replset,omitempty"` + Node string `bson:"node,omitempty" json:"node,omitempty"` + OPID string `bson:"opid,omitempty" json:"opid,omitempty"` // should be a pointer so mongo find with empty epoch would work // otherwise it always set it at least to "epoch":{"$timestamp":{"t":0,"i":0}} Epoch *primitive.Timestamp `bson:"epoch,omitempty" json:"epoch,omitempty"` @@ -32,8 +35,8 @@ type LockData struct { // Lock is a lock for the PBM operation (e.g. backup, restore) type Lock struct { LockData - p *PBM - c *mongo.Collection + m connect.Client + coll *mongo.Collection cancel context.CancelFunc hbRate time.Duration staleSec uint32 @@ -41,72 +44,36 @@ type Lock struct { // NewLock creates a new Lock object from geven header. Returned lock has no state. // So Acquire() and Release() methods should be called. -func (p *PBM) NewLock(h LockHeader) *Lock { - return p.newLock(h, LockCollection) +func NewLock(m connect.Client, h LockHeader) *Lock { + return newLock(m, m.LockCollection(), h) } -// NewLockCol creates a new Lock object from geven header in given collection. +// NewOpLock creates a new Lock object from geven header in given op. // Returned lock has no state. So Acquire() and Release() methods should be called. -func (p *PBM) NewLockCol(h LockHeader, collection string) *Lock { - return p.newLock(h, collection) +func NewOpLock(m connect.Client, h LockHeader) *Lock { + return newLock(m, m.LockOpCollection(), h) } -func (p *PBM) newLock(h LockHeader, col string) *Lock { +func newLock(m connect.Client, coll *mongo.Collection, h LockHeader) *Lock { return &Lock{ LockData: LockData{ LockHeader: h, }, - p: p, - c: p.Conn.Database(DB).Collection(col), + m: m, + coll: coll, hbRate: time.Second * 5, - staleSec: StaleFrameSec, + staleSec: defs.StaleFrameSec, } } -// ConcurrentOpError means lock was already acquired by another node -type ConcurrentOpError struct { - Lock LockHeader -} - -func (e ConcurrentOpError) Error() string { - return fmt.Sprintf("another operation is running: %s '%s'", e.Lock.Type, e.Lock.OPID) -} - -func (ConcurrentOpError) Is(err error) bool { - if err == nil { - return false - } - - _, ok := err.(ConcurrentOpError) //nolint:errorlint - return ok -} - -// StaleLockError - the lock was already got but the operation seems to be staled (no hb from the node) -type StaleLockError struct { - Lock LockHeader -} - -func (e StaleLockError) Error() string { - return fmt.Sprintf("was stale lock: %s '%s'", e.Lock.Type, e.Lock.OPID) -} - -func (StaleLockError) Is(err error) bool { - if err == nil { - return false - } - - _, ok := err.(StaleLockError) //nolint:errorlint - return ok -} - // Rewrite tries to acquire the lock instead the `old` one. // It returns true in case of success and false if // a lock already acquired by another process or some error happened. // In case of concurrent lock exists is stale it will be deleted and // ErrWasStaleLock gonna be returned. A client shell mark respective operation // as stale and retry if it needs to -func (l *Lock) Rewrite(old *LockHeader) (bool, error) { - return l.try(old) +func (l *Lock) Rewrite(ctx context.Context, old *LockHeader) (bool, error) { + return l.try(ctx, old) } // Acquire tries to acquire the lock. @@ -115,18 +82,18 @@ func (l *Lock) Rewrite(old *LockHeader) (bool, error) { // In case of concurrent lock exists is stale it will be deleted and // ErrWasStaleLock gonna be returned. A client shell mark respective operation // as stale and retry if it needs to -func (l *Lock) Acquire() (bool, error) { - return l.try(nil) +func (l *Lock) Acquire(ctx context.Context) (bool, error) { + return l.try(ctx, nil) } -func (l *Lock) try(old *LockHeader) (bool, error) { +func (l *Lock) try(ctx context.Context, old *LockHeader) (bool, error) { var got bool var err error if old != nil { - got, err = l.rewrite(old) + got, err = l.rewrite(ctx, old) } else { - got, err = l.acquire() + got, err = l.acquire(ctx) } if err != nil { @@ -135,7 +102,7 @@ func (l *Lock) try(old *LockHeader) (bool, error) { if got { // log the operation. duplicate means error - err := l.log() + err := l.log(ctx) if err != nil { rerr := l.Release() if rerr != nil { @@ -147,12 +114,12 @@ func (l *Lock) try(old *LockHeader) (bool, error) { } // there is some concurrent lock - peer, err := l.p.getLockData(&LockHeader{Replset: l.Replset}, l.c) + peer, err := getLockData(ctx, &LockHeader{Replset: l.Replset}, l.coll) if err != nil { return false, errors.Wrap(err, "check for the peer") } - ts, err := l.p.ClusterTime() + ts, err := topo.GetClusterTime(ctx, l.m) if err != nil { return false, errors.Wrap(err, "read cluster time") } @@ -165,7 +132,7 @@ func (l *Lock) try(old *LockHeader) (bool, error) { return false, nil } - _, err = l.c.DeleteOne(l.p.Context(), peer.LockHeader) + _, err = l.coll.DeleteOne(ctx, peer.LockHeader) if err != nil { return false, errors.Wrap(err, "delete stale lock") } @@ -173,34 +140,15 @@ func (l *Lock) try(old *LockHeader) (bool, error) { return false, StaleLockError{Lock: peer.LockHeader} } -// DuplicatedOpError means the operation with the same ID -// alredy had been running -type DuplicatedOpError struct { - Lock LockHeader -} - -func (e DuplicatedOpError) Error() string { - return fmt.Sprintf("duplicate operation: %s [%s]", e.Lock.OPID, e.Lock.Type) -} - -func (DuplicatedOpError) Is(err error) bool { - if err == nil { - return false - } - - _, ok := err.(DuplicatedOpError) //nolint:errorlint - return ok -} - -func (l *Lock) log() error { +func (l *Lock) log(ctx context.Context) error { // PITR slicing technically speaking is not an OP but // long standing process. It souldn't be logged. Moreover // having no opid it would block all subsequent PITR events. - if l.LockHeader.Type == CmdPITR { + if l.LockHeader.Type == defs.CmdPITR { return nil } - _, err := l.p.Conn.Database(DB).Collection(PBMOpLogCollection).InsertOne(l.p.Context(), l.LockHeader) + _, err := l.m.PBMOpLogCollection().InsertOne(ctx, l.LockHeader) if err != nil { if se, ok := err.(mongo.ServerError); ok && se.HasErrorCode(11000) { //nolint:errorlint return DuplicatedOpError{l.LockHeader} @@ -211,34 +159,40 @@ func (l *Lock) log() error { return nil } -func (p *PBM) MarkBcpStale(opid string) error { - bcp, err := p.GetBackupByOPID(opid) +func MarkBcpStale(ctx context.Context, l *Lock, opid string) error { + bcp, err := query.GetBackupByOPID(ctx, l.m, opid) if err != nil { return errors.Wrap(err, "get backup meta") } // not to rewrite an error emitted by the agent - if bcp.Status == StatusError || bcp.Status == StatusDone { + if bcp.Status == defs.StatusError || bcp.Status == defs.StatusDone { return nil } - p.log.Debug(string(CmdBackup), "", opid, primitive.Timestamp{}, "mark stale meta") - return p.ChangeBackupStateOPID(opid, StatusError, "some of pbm-agents were lost during the backup") + if logger := log.GetLoggerFromContextOr(ctx, nil); logger != nil { + logger.Debug(string(defs.CmdBackup), "", opid, primitive.Timestamp{}, "mark stale meta") + } + return query.ChangeBackupStateOPID(l.m, opid, defs.StatusError, + "some of pbm-agents were lost during the backup") } -func (p *PBM) MarkRestoreStale(opid string) error { - r, err := p.GetRestoreMetaByOPID(opid) +func MarkRestoreStale(ctx context.Context, l *Lock, opid string) error { + r, err := query.GetRestoreMetaByOPID(ctx, l.m, opid) if err != nil { return errors.Wrap(err, "get retore meta") } // not to rewrite an error emitted by the agent - if r.Status == StatusError || r.Status == StatusDone { + if r.Status == defs.StatusError || r.Status == defs.StatusDone { return nil } - p.log.Debug(string(CmdRestore), "", opid, primitive.Timestamp{}, "mark stale meta") - return p.ChangeRestoreStateOPID(opid, StatusError, "some of pbm-agents were lost during the restore") + if logger := log.GetLoggerFromContextOr(ctx, nil); logger != nil { + logger.Debug(string(defs.CmdRestore), "", opid, primitive.Timestamp{}, "mark stale meta") + } + return query.ChangeRestoreStateOPID(ctx, l.m, opid, defs.StatusError, + "some of pbm-agents were lost during the restore") } // Release the lock @@ -247,18 +201,18 @@ func (l *Lock) Release() error { l.cancel() } - _, err := l.c.DeleteOne(l.p.Context(), l.LockHeader) + _, err := l.coll.DeleteOne(context.Background(), l.LockHeader) return errors.Wrap(err, "deleteOne") } -func (l *Lock) acquire() (bool, error) { +func (l *Lock) acquire(ctx context.Context) (bool, error) { var err error - l.Heartbeat, err = l.p.ClusterTime() + l.Heartbeat, err = topo.GetClusterTime(ctx, l.m) if err != nil { return false, errors.Wrap(err, "read cluster time") } - _, err = l.c.InsertOne(l.p.Context(), l.LockData) + _, err = l.coll.InsertOne(ctx, l.LockData) if err != nil { if se, ok := err.(mongo.ServerError); ok && se.HasErrorCode(11000) { //nolint:errorlint return false, nil @@ -266,26 +220,26 @@ func (l *Lock) acquire() (bool, error) { return false, errors.Wrap(err, "acquire lock") } - l.hb() + l.hb(ctx) return true, nil } // rewrite tries to rewrite the given lock with itself // it will transactionally delete the `old` lock // and acquire an istance of itself -func (l *Lock) rewrite(old *LockHeader) (bool, error) { +func (l *Lock) rewrite(ctx context.Context, old *LockHeader) (bool, error) { var err error - l.Heartbeat, err = l.p.ClusterTime() + l.Heartbeat, err = topo.GetClusterTime(ctx, l.m) if err != nil { return false, errors.Wrap(err, "read cluster time") } - _, err = l.c.DeleteOne(l.p.Context(), old) + _, err = l.coll.DeleteOne(ctx, old) if err != nil { return false, errors.Wrap(err, "rewrite: delete old") } - _, err = l.c.InsertOne(l.p.Context(), l.LockData) + _, err = l.coll.InsertOne(ctx, l.LockData) if err != nil { if se, ok := err.(mongo.ServerError); ok && se.HasErrorCode(11000) { //nolint:errorlint @@ -294,14 +248,15 @@ func (l *Lock) rewrite(old *LockHeader) (bool, error) { return false, errors.Wrap(err, "acquire lock") } - l.hb() + l.hb(ctx) return true, nil } // heartbeats for the lock -func (l *Lock) hb() { - var ctx context.Context - ctx, l.cancel = context.WithCancel(context.Background()) +func (l *Lock) hb(ctx context.Context) { + logger := log.GetLoggerFromContextOr(ctx, nil) + ctx, l.cancel = context.WithCancel(ctx) + go func() { tk := time.NewTicker(l.hbRate) defer tk.Stop() @@ -309,9 +264,9 @@ func (l *Lock) hb() { for { select { case <-tk.C: - err := l.beat() - if err != nil { - l.p.log.Error(string(l.Type), "", l.OPID, *l.Epoch, "send lock heartbeat: %v", err) + err := l.beat(ctx) + if err != nil && logger != nil { + logger.Error(string(l.Type), "", l.OPID, *l.Epoch, "send lock heartbeat: %v", err) } case <-ctx.Done(): return @@ -320,31 +275,31 @@ func (l *Lock) hb() { }() } -func (l *Lock) beat() error { - ts, err := l.p.ClusterTime() +func (l *Lock) beat(ctx context.Context) error { + ts, err := topo.GetClusterTime(ctx, l.m) if err != nil { return errors.Wrap(err, "read cluster time") } - _, err = l.c.UpdateOne( - l.p.Context(), + _, err = l.coll.UpdateOne( + ctx, l.LockHeader, bson.M{"$set": bson.M{"hb": ts}}, ) return errors.Wrap(err, "set timestamp") } -func (p *PBM) GetLockData(lh *LockHeader) (LockData, error) { - return p.getLockData(lh, p.Conn.Database(DB).Collection(LockCollection)) +func GetLockData(ctx context.Context, m connect.Client, lh *LockHeader) (LockData, error) { + return getLockData(ctx, lh, m.LockCollection()) } -func (p *PBM) GetOpLockData(lh *LockHeader) (LockData, error) { - return p.getLockData(lh, p.Conn.Database(DB).Collection(LockOpCollection)) +func GetOpLockData(ctx context.Context, m connect.Client, lh *LockHeader) (LockData, error) { + return getLockData(ctx, lh, m.LockOpCollection()) } -func (p *PBM) getLockData(lh *LockHeader, cl *mongo.Collection) (LockData, error) { +func getLockData(ctx context.Context, lh *LockHeader, cl *mongo.Collection) (LockData, error) { var l LockData - r := cl.FindOne(p.ctx, lh) + r := cl.FindOne(ctx, lh) if r.Err() != nil { return l, r.Err() } @@ -352,23 +307,23 @@ func (p *PBM) getLockData(lh *LockHeader, cl *mongo.Collection) (LockData, error return l, err } -func (p *PBM) GetLocks(lh *LockHeader) ([]LockData, error) { - return p.getLocks(lh, p.Conn.Database(DB).Collection(LockCollection)) +func GetLocks(ctx context.Context, m connect.Client, lh *LockHeader) ([]LockData, error) { + return getLocks(ctx, lh, m.LockCollection()) } -func (p *PBM) GetOpLocks(lh *LockHeader) ([]LockData, error) { - return p.getLocks(lh, p.Conn.Database(DB).Collection(LockOpCollection)) +func GetOpLocks(ctx context.Context, m connect.Client, lh *LockHeader) ([]LockData, error) { + return getLocks(ctx, lh, m.LockOpCollection()) } -func (p *PBM) getLocks(lh *LockHeader, cl *mongo.Collection) ([]LockData, error) { +func getLocks(ctx context.Context, lh *LockHeader, cl *mongo.Collection) ([]LockData, error) { var locks []LockData - cur, err := cl.Find(p.ctx, lh) + cur, err := cl.Find(ctx, lh) if err != nil { return nil, errors.Wrap(err, "get locks") } - for cur.Next(p.ctx) { + for cur.Next(ctx) { var l LockData err := cur.Decode(&l) if err != nil { diff --git a/internal/log/context.go b/internal/log/context.go new file mode 100644 index 000000000..d1245e671 --- /dev/null +++ b/internal/log/context.go @@ -0,0 +1,44 @@ +package log + +import "github.com/percona/percona-backup-mongodb/internal/context" + +const ( + logEventTag = "pbm:log:event" + loggerTag = "pbm:logger" +) + +func GetLogEventFromContextOr(ctx context.Context, fallback *Event) *Event { + val := ctx.Value(logEventTag) + if val == nil { + return fallback + } + + ev, ok := val.(*Event) + if !ok { + return fallback + } + + return ev +} + +func SetLogEventToContext(ctx context.Context, ev *Event) context.Context { + return context.WithValue(ctx, logEventTag, ev) +} + +func GetLoggerFromContextOr(ctx context.Context, fallback *Logger) *Logger { + val := ctx.Value(loggerTag) + if val == nil { + return fallback + } + + ev, ok := val.(*Logger) + if !ok { + return fallback + } + + return ev +} + +func SetLoggerToContext(ctx context.Context, ev *Logger) context.Context { + return context.WithValue(ctx, logEventTag, ev) +} diff --git a/pbm/log/log.go b/internal/log/log.go similarity index 90% rename from pbm/log/log.go rename to internal/log/log.go index 3886ce0f9..6f8180d8b 100644 --- a/pbm/log/log.go +++ b/internal/log/log.go @@ -1,7 +1,6 @@ package log import ( - "context" "encoding/json" "fmt" "io" @@ -11,11 +10,14 @@ import ( "sync/atomic" "time" - "github.com/pkg/errors" "go.mongodb.org/mongo-driver/bson" "go.mongodb.org/mongo-driver/bson/primitive" "go.mongodb.org/mongo-driver/mongo" "go.mongodb.org/mongo-driver/mongo/options" + + "github.com/percona/percona-backup-mongodb/internal/connect" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/errors" ) type Logger struct { @@ -192,7 +194,7 @@ func (l *Logger) output( Msg: msg, } - err := l.Output(e) + err := l.Output(context.TODO(), e) if err != nil { log.Printf("[ERROR] wrting log: %v, entry: %s", err, e) } @@ -222,11 +224,11 @@ func (l *Logger) Fatal(event, obj, opid string, epoch primitive.Timestamp, msg s l.output(Fatal, event, obj, opid, epoch, msg, args...) } -func (l *Logger) Output(e *Entry) error { +func (l *Logger) Output(ctx context.Context, e *Entry) error { var rerr error if l.cn != nil && atomic.LoadInt32(&l.pauseMgo) == 0 { - _, err := l.cn.InsertOne(context.TODO(), e) + _, err := l.cn.InsertOne(ctx, e) if err != nil { rerr = errors.Wrap(err, "db") } @@ -372,20 +374,26 @@ func buildLogFilter(r *LogRequest, exactSeverity bool) bson.D { return filter } -func Get(cn *mongo.Collection, r *LogRequest, limit int64, exactSeverity bool) (*Entries, error) { +func fetch( + ctx context.Context, + m connect.Client, + r *LogRequest, + limit int64, + exactSeverity bool, +) (*Entries, error) { filter := buildLogFilter(r, exactSeverity) - cur, err := cn.Find( - context.TODO(), + cur, err := m.LogCollection().Find( + ctx, filter, options.Find().SetLimit(limit).SetSort(bson.D{{"ts", -1}, {"ns", -1}}), ) if err != nil { return nil, errors.Wrap(err, "get list from mongo") } - defer cur.Close(context.TODO()) + defer cur.Close(ctx) e := &Entries{} - for cur.Next(context.TODO()) { + for cur.Next(ctx) { l := Entry{} err := cur.Decode(&l) if err != nil { @@ -417,7 +425,7 @@ func Follow( cur, err := coll.Find(ctx, filter, opt) if err != nil { - errC <- errors.WithMessage(err, "query") + errC <- errors.Wrap(err, "query") return } defer cur.Close(context.Background()) @@ -425,7 +433,7 @@ func Follow( for cur.Next(ctx) { e := &Entry{} if err := cur.Decode(e); err != nil { - errC <- errors.WithMessage(err, "decode") + errC <- errors.Wrap(err, "decode") return } @@ -436,3 +444,11 @@ func Follow( return outC, errC } + +func LogGet(ctx context.Context, m connect.Client, r *LogRequest, limit int64) (*Entries, error) { + return fetch(ctx, m, r, limit, false) +} + +func LogGetExactSeverity(ctx context.Context, m connect.Client, r *LogRequest, limit int64) (*Entries, error) { + return fetch(ctx, m, r, limit, true) +} diff --git a/pbm/bcp_nodes_priority.go b/internal/priority/priority.go similarity index 56% rename from pbm/bcp_nodes_priority.go rename to internal/priority/priority.go index e4de5d970..2fe052352 100644 --- a/pbm/bcp_nodes_priority.go +++ b/internal/priority/priority.go @@ -1,10 +1,14 @@ -package pbm +package priority import ( "sort" - "github.com/pkg/errors" - "go.mongodb.org/mongo-driver/bson" + "github.com/percona/percona-backup-mongodb/internal/config" + "github.com/percona/percona-backup-mongodb/internal/connect" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/topo" ) const defaultScore = 1.0 @@ -35,23 +39,28 @@ func (n *NodesPriority) RS(rs string) [][]string { return n.m[rs].list() } -type agentScore func(AgentStat) float64 +type agentScore func(topo.AgentStat) float64 // BcpNodesPriority returns list nodes grouped by backup preferences // in descended order. First are nodes with the highest priority. // Custom coefficients might be passed. These will be ignored though // if the config is set. -func (p *PBM) BcpNodesPriority(c map[string]float64, agents []AgentStat) (*NodesPriority, error) { - cfg, err := p.GetConfig() +func BcpNodesPriority( + ctx context.Context, + m connect.Client, + c map[string]float64, + agents []topo.AgentStat, +) (*NodesPriority, error) { + cfg, err := config.GetConfig(ctx, m) if err != nil { return nil, errors.Wrap(err, "get config") } // if cfg.Backup.Priority doesn't set apply defaults - f := func(a AgentStat) float64 { + f := func(a topo.AgentStat) float64 { if coeff, ok := c[a.Node]; ok && c != nil { return defaultScore * coeff - } else if a.State == NodeStatePrimary { + } else if a.State == defs.NodeStatePrimary { return defaultScore / 2 } else if a.Hidden { return defaultScore * 2 @@ -60,7 +69,7 @@ func (p *PBM) BcpNodesPriority(c map[string]float64, agents []AgentStat) (*Nodes } if cfg.Backup.Priority != nil || len(cfg.Backup.Priority) > 0 { - f = func(a AgentStat) float64 { + f = func(a topo.AgentStat) float64 { sc, ok := cfg.Backup.Priority[a.Node] if !ok || sc < 0 { return defaultScore @@ -73,7 +82,7 @@ func (p *PBM) BcpNodesPriority(c map[string]float64, agents []AgentStat) (*Nodes return bcpNodesPriority(agents, f), nil } -func bcpNodesPriority(agents []AgentStat, f agentScore) *NodesPriority { +func bcpNodesPriority(agents []topo.AgentStat, f agentScore) *NodesPriority { scores := NewNodesPriority() for _, a := range agents { @@ -110,54 +119,3 @@ func (s nodeScores) list() [][]string { return ret } - -func (p *PBM) SetRSNomination(bcpName, rs string) error { - n := BackupRsNomination{RS: rs, Nodes: []string{}} - _, err := p.Conn.Database(DB).Collection(BcpCollection). - UpdateOne( - p.ctx, - bson.D{{"name", bcpName}}, - bson.D{{"$addToSet", bson.M{"n": n}}}, - ) - - return errors.WithMessage(err, "query") -} - -func (p *PBM) GetRSNominees(bcpName, rsName string) (*BackupRsNomination, error) { - bcp, err := p.GetBackupMeta(bcpName) - if err != nil { - return nil, err - } - - for _, n := range bcp.Nomination { - if n.RS == rsName { - return &n, nil - } - } - - return nil, ErrNotFound -} - -func (p *PBM) SetRSNominees(bcpName, rsName string, nodes []string) error { - _, err := p.Conn.Database(DB).Collection(BcpCollection).UpdateOne( - p.ctx, - bson.D{{"name", bcpName}, {"n.rs", rsName}}, - bson.D{ - {"$set", bson.M{"n.$.n": nodes}}, - }, - ) - - return err -} - -func (p *PBM) SetRSNomineeACK(bcpName, rsName, node string) error { - _, err := p.Conn.Database(DB).Collection(BcpCollection).UpdateOne( - p.ctx, - bson.D{{"name", bcpName}, {"n.rs", rsName}}, - bson.D{ - {"$set", bson.M{"n.$.ack": node}}, - }, - ) - - return err -} diff --git a/internal/query/backup.go b/internal/query/backup.go new file mode 100644 index 000000000..e60b7e5a9 --- /dev/null +++ b/internal/query/backup.go @@ -0,0 +1,379 @@ +package query + +import ( + "time" + + "go.mongodb.org/mongo-driver/bson" + "go.mongodb.org/mongo-driver/bson/primitive" + "go.mongodb.org/mongo-driver/mongo" + "go.mongodb.org/mongo-driver/mongo/options" + + "github.com/percona/percona-backup-mongodb/internal/connect" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/topo" + "github.com/percona/percona-backup-mongodb/internal/types" +) + +func GetBackupMeta(ctx context.Context, m connect.Client, name string) (*types.BackupMeta, error) { + return getBackupMeta(ctx, m, bson.D{{"name", name}}) +} + +func GetBackupByOPID(ctx context.Context, m connect.Client, opid string) (*types.BackupMeta, error) { + return getBackupMeta(ctx, m, bson.D{{"opid", opid}}) +} + +func getBackupMeta(ctx context.Context, m connect.Client, clause bson.D) (*types.BackupMeta, error) { + res := m.BcpCollection().FindOne(ctx, clause) + if err := res.Err(); err != nil { + if errors.Is(err, mongo.ErrNoDocuments) { + return nil, errors.ErrNotFound + } + return nil, errors.Wrap(err, "get") + } + + b := &types.BackupMeta{} + err := res.Decode(b) + return b, errors.Wrap(err, "decode") +} + +func ChangeBackupStateOPID(m connect.Client, opid string, s defs.Status, msg string) error { + return changeBackupState(context.Background(), m, bson.D{{"opid", opid}}, s, msg) +} + +func ChangeBackupState(m connect.Client, bcpName string, s defs.Status, msg string) error { + return changeBackupState(context.Background(), m, bson.D{{"name", bcpName}}, s, msg) +} + +func changeBackupState(ctx context.Context, m connect.Client, clause bson.D, s defs.Status, msg string) error { + ts := time.Now().UTC().Unix() + _, err := m.BcpCollection().UpdateOne( + ctx, + clause, + bson.D{ + {"$set", bson.M{"status": s}}, + {"$set", bson.M{"last_transition_ts": ts}}, + {"$set", bson.M{"error": msg}}, + {"$push", bson.M{"conditions": types.Condition{Timestamp: ts, Status: s, Error: msg}}}, + }, + ) + + return err +} + +func SetBackupMeta(ctx context.Context, m connect.Client, meta *types.BackupMeta) error { + meta.LastTransitionTS = meta.StartTS + meta.Conditions = append(meta.Conditions, types.Condition{ + Timestamp: meta.StartTS, + Status: meta.Status, + }) + + _, err := m.BcpCollection().InsertOne(ctx, meta) + + return err +} + +func BackupHB(ctx context.Context, m connect.Client, bcpName string) error { + ts, err := topo.GetClusterTime(ctx, m) + if err != nil { + return errors.Wrap(err, "read cluster time") + } + + _, err = m.BcpCollection().UpdateOne( + ctx, + bson.D{{"name", bcpName}}, + bson.D{ + {"$set", bson.M{"hb": ts}}, + }, + ) + + return errors.Wrap(err, "write into db") +} + +func SetSrcBackup(ctx context.Context, m connect.Client, bcpName, srcName string) error { + _, err := m.BcpCollection().UpdateOne( + ctx, + bson.D{{"name", bcpName}}, + bson.D{ + {"$set", bson.M{"src_backup": srcName}}, + }, + ) + + return err +} + +func SetFirstWrite(ctx context.Context, m connect.Client, bcpName string, first primitive.Timestamp) error { + _, err := m.BcpCollection().UpdateOne( + ctx, + bson.D{{"name", bcpName}}, + bson.D{ + {"$set", bson.M{"first_write_ts": first}}, + }, + ) + + return err +} + +func SetLastWrite(ctx context.Context, m connect.Client, bcpName string, last primitive.Timestamp) error { + _, err := m.BcpCollection().UpdateOne( + ctx, + bson.D{{"name", bcpName}}, + bson.D{ + {"$set", bson.M{"last_write_ts": last}}, + }, + ) + + return err +} + +func AddRSMeta(ctx context.Context, m connect.Client, bcpName string, rs types.BackupReplset) error { + rs.LastTransitionTS = rs.StartTS + rs.Conditions = append(rs.Conditions, types.Condition{ + Timestamp: rs.StartTS, + Status: rs.Status, + }) + _, err := m.BcpCollection().UpdateOne( + ctx, + bson.D{{"name", bcpName}}, + bson.D{{"$addToSet", bson.M{"replsets": rs}}}, + ) + + return err +} + +func ChangeRSState(m connect.Client, bcpName, rsName string, s defs.Status, msg string) error { + ts := time.Now().UTC().Unix() + _, err := m.BcpCollection().UpdateOne( + context.Background(), + bson.D{{"name", bcpName}, {"replsets.name", rsName}}, + bson.D{ + {"$set", bson.M{"replsets.$.status": s}}, + {"$set", bson.M{"replsets.$.last_transition_ts": ts}}, + {"$set", bson.M{"replsets.$.error": msg}}, + {"$push", bson.M{"replsets.$.conditions": types.Condition{Timestamp: ts, Status: s, Error: msg}}}, + }, + ) + + return err +} + +func IncBackupSize(ctx context.Context, m connect.Client, bcpName string, size int64) error { + _, err := m.BcpCollection().UpdateOne(ctx, + bson.D{{"name", bcpName}}, + bson.D{{"$inc", bson.M{"size": size}}}) + + return err +} + +func RSSetPhyFiles(ctx context.Context, m connect.Client, bcpName, rsName string, rs *types.BackupReplset) error { + _, err := m.BcpCollection().UpdateOne( + ctx, + bson.D{{"name", bcpName}, {"replsets.name", rsName}}, + bson.D{ + {"$set", bson.M{"replsets.$.files": rs.Files}}, + {"$set", bson.M{"replsets.$.journal": rs.Journal}}, + }, + ) + + return err +} + +func SetRSLastWrite(m connect.Client, bcpName, rsName string, ts primitive.Timestamp) error { + _, err := m.BcpCollection().UpdateOne( + context.Background(), + bson.D{{"name", bcpName}, {"replsets.name", rsName}}, + bson.D{ + {"$set", bson.M{"replsets.$.last_write_ts": ts}}, + }, + ) + + return err +} + +func LastIncrementalBackup(ctx context.Context, m connect.Client) (*types.BackupMeta, error) { + return getRecentBackup(ctx, m, nil, nil, -1, bson.D{{"type", string(defs.IncrementalBackup)}}) +} + +// GetLastBackup returns last successfully finished backup (non-selective and non-external) +// or nil if there is no such backup yet. If ts isn't nil it will +// search for the most recent backup that finished before specified timestamp +func GetLastBackup(ctx context.Context, m connect.Client, before *primitive.Timestamp) (*types.BackupMeta, error) { + return getRecentBackup(ctx, m, nil, before, -1, + bson.D{{"nss", nil}, {"type", bson.M{"$ne": defs.ExternalBackup}}}) +} + +func GetFirstBackup(ctx context.Context, m connect.Client, after *primitive.Timestamp) (*types.BackupMeta, error) { + return getRecentBackup(ctx, m, after, nil, 1, + bson.D{{"nss", nil}, {"type", bson.M{"$ne": defs.ExternalBackup}}}) +} + +func getRecentBackup( + ctx context.Context, + m connect.Client, + after, + before *primitive.Timestamp, + sort int, + opts bson.D, +) (*types.BackupMeta, error) { + q := append(bson.D{}, opts...) + q = append(q, bson.E{"status", defs.StatusDone}) + if after != nil { + q = append(q, bson.E{"last_write_ts", bson.M{"$gte": after}}) + } + if before != nil { + q = append(q, bson.E{"last_write_ts", bson.M{"$lte": before}}) + } + + res := m.BcpCollection().FindOne( + ctx, + q, + options.FindOne().SetSort(bson.D{{"start_ts", sort}}), + ) + if err := res.Err(); err != nil { + if errors.Is(err, mongo.ErrNoDocuments) { + return nil, errors.ErrNotFound + } + return nil, errors.Wrap(err, "get") + } + + b := &types.BackupMeta{} + err := res.Decode(b) + return b, errors.Wrap(err, "decode") +} + +func BackupHasNext(ctx context.Context, m connect.Client, backup *types.BackupMeta) (bool, error) { + f := bson.D{ + {"nss", nil}, + {"type", bson.M{"$ne": defs.ExternalBackup}}, + {"start_ts", bson.M{"$gt": backup.LastWriteTS.T}}, + {"status", defs.StatusDone}, + } + o := options.FindOne().SetProjection(bson.D{{"_id", 1}}) + res := m.BcpCollection().FindOne(ctx, f, o) + if err := res.Err(); err != nil { + if errors.Is(err, mongo.ErrNoDocuments) { + return false, nil + } + return false, errors.Wrap(err, "query") + } + + return true, nil +} + +func BackupsList(ctx context.Context, m connect.Client, limit int64) ([]types.BackupMeta, error) { + cur, err := m.BcpCollection().Find( + ctx, + bson.M{}, + options.Find().SetLimit(limit).SetSort(bson.D{{"start_ts", -1}}), + ) + if err != nil { + return nil, errors.Wrap(err, "query mongo") + } + defer cur.Close(ctx) + + backups := []types.BackupMeta{} + for cur.Next(ctx) { + b := types.BackupMeta{} + err := cur.Decode(&b) + if err != nil { + return nil, errors.Wrap(err, "message decode") + } + if b.Type == "" { + b.Type = defs.LogicalBackup + } + backups = append(backups, b) + } + + return backups, cur.Err() +} + +func BackupsDoneList( + ctx context.Context, + m connect.Client, + after *primitive.Timestamp, + limit int64, + order int, +) ([]types.BackupMeta, error) { + q := bson.D{{"status", defs.StatusDone}} + if after != nil { + q = append(q, bson.E{"last_write_ts", bson.M{"$gte": after}}) + } + + cur, err := m.BcpCollection().Find( + ctx, + q, + options.Find().SetLimit(limit).SetSort(bson.D{{"last_write_ts", order}}), + ) + if err != nil { + return nil, errors.Wrap(err, "query mongo") + } + defer cur.Close(ctx) + + backups := []types.BackupMeta{} + for cur.Next(ctx) { + b := types.BackupMeta{} + err := cur.Decode(&b) + if err != nil { + return nil, errors.Wrap(err, "message decode") + } + backups = append(backups, b) + } + + return backups, cur.Err() +} + +func SetRSNomination(ctx context.Context, m connect.Client, bcpName, rs string) error { + n := types.BackupRsNomination{RS: rs, Nodes: []string{}} + _, err := m.BcpCollection(). + UpdateOne( + ctx, + bson.D{{"name", bcpName}}, + bson.D{{"$addToSet", bson.M{"n": n}}}, + ) + + return errors.Wrap(err, "query") +} + +func GetRSNominees( + ctx context.Context, + m connect.Client, + bcpName, rsName string, +) (*types.BackupRsNomination, error) { + bcp, err := GetBackupMeta(ctx, m, bcpName) + if err != nil { + return nil, err + } + + for _, n := range bcp.Nomination { + if n.RS == rsName { + return &n, nil + } + } + + return nil, errors.ErrNotFound +} + +func SetRSNominees(ctx context.Context, m connect.Client, bcpName, rsName string, nodes []string) error { + _, err := m.BcpCollection().UpdateOne( + ctx, + bson.D{{"name", bcpName}, {"n.rs", rsName}}, + bson.D{ + {"$set", bson.M{"n.$.n": nodes}}, + }, + ) + + return err +} + +func SetRSNomineeACK(ctx context.Context, m connect.Client, bcpName, rsName, node string) error { + _, err := m.BcpCollection().UpdateOne( + ctx, + bson.D{{"name", bcpName}, {"n.rs", rsName}}, + bson.D{ + {"$set", bson.M{"n.$.ack": node}}, + }, + ) + + return err +} diff --git a/internal/query/restore.go b/internal/query/restore.go new file mode 100644 index 000000000..602578a3f --- /dev/null +++ b/internal/query/restore.go @@ -0,0 +1,259 @@ +package query + +import ( + "time" + + "github.com/mongodb/mongo-tools/common/db" + "go.mongodb.org/mongo-driver/bson" + "go.mongodb.org/mongo-driver/bson/primitive" + "go.mongodb.org/mongo-driver/mongo" + "go.mongodb.org/mongo-driver/mongo/options" + + "github.com/percona/percona-backup-mongodb/internal/connect" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/topo" + "github.com/percona/percona-backup-mongodb/internal/types" +) + +func GetRestoreMetaByOPID(ctx context.Context, m connect.Client, opid string) (*types.RestoreMeta, error) { + return getRestoreMeta(ctx, m, bson.D{{"opid", opid}}) +} + +func GetRestoreMeta(ctx context.Context, m connect.Client, name string) (*types.RestoreMeta, error) { + return getRestoreMeta(ctx, m, bson.D{{"name", name}}) +} + +func getRestoreMeta(ctx context.Context, m connect.Client, clause bson.D) (*types.RestoreMeta, error) { + res := m.RestoresCollection().FindOne(ctx, clause) + if err := res.Err(); err != nil { + if errors.Is(err, mongo.ErrNoDocuments) { + return nil, errors.ErrNotFound + } + return nil, errors.Wrap(err, "get") + } + r := &types.RestoreMeta{} + err := res.Decode(r) + return r, errors.Wrap(err, "decode") +} + +func ChangeRestoreStateOPID(ctx context.Context, m connect.Client, opid string, s defs.Status, msg string) error { + return changeRestoreState(ctx, m, bson.D{{"name", opid}}, s, msg) +} + +func ChangeRestoreState(ctx context.Context, m connect.Client, name string, s defs.Status, msg string) error { + return changeRestoreState(ctx, m, bson.D{{"name", name}}, s, msg) +} + +func changeRestoreState(ctx context.Context, m connect.Client, clause bson.D, s defs.Status, msg string) error { + ts := time.Now().UTC().Unix() + _, err := m.RestoresCollection().UpdateOne( + ctx, + clause, + bson.D{ + {"$set", bson.M{"status": s}}, + {"$set", bson.M{"last_transition_ts": ts}}, + {"$set", bson.M{"error": msg}}, + {"$push", bson.M{"conditions": types.Condition{Timestamp: ts, Status: s, Error: msg}}}, + }, + ) + + return err +} + +func ChangeRestoreRSState( + ctx context.Context, + m connect.Client, + name, + rsName string, + s defs.Status, + msg string, +) error { + ts := time.Now().UTC().Unix() + _, err := m.RestoresCollection().UpdateOne( + ctx, + bson.D{{"name", name}, {"replsets.name", rsName}}, + bson.D{ + {"$set", bson.M{"replsets.$.status": s}}, + {"$set", bson.M{"replsets.$.last_transition_ts": ts}}, + {"$set", bson.M{"replsets.$.error": msg}}, + {"$push", bson.M{"replsets.$.conditions": types.Condition{Timestamp: ts, Status: s, Error: msg}}}, + }, + ) + + return err +} + +func RestoreSetRSTxn( + ctx context.Context, + m connect.Client, + name, rsName string, + txn []types.RestoreTxn, +) error { + _, err := m.RestoresCollection().UpdateOne( + ctx, + bson.D{{"name", name}, {"replsets.name", rsName}}, + bson.D{{"$set", bson.M{"replsets.$.committed_txn": txn, "replsets.$.txn_set": true}}}, + ) + + return err +} + +func RestoreSetRSStat( + ctx context.Context, + m connect.Client, + name, rsName string, + stat types.RestoreShardStat, +) error { + _, err := m.RestoresCollection().UpdateOne( + ctx, + bson.D{{"name", name}, {"replsets.name", rsName}}, + bson.D{{"$set", bson.M{"replsets.$.stat": stat}}}, + ) + + return err +} + +func RestoreSetStat(ctx context.Context, m connect.Client, name string, stat types.RestoreStat) error { + _, err := m.RestoresCollection().UpdateOne( + ctx, + bson.D{{"name", name}}, + bson.D{{"$set", bson.M{"stat": stat}}}, + ) + + return err +} + +func RestoreSetRSPartTxn(ctx context.Context, m connect.Client, name, rsName string, txn []db.Oplog) error { + _, err := m.RestoresCollection().UpdateOne( + ctx, + bson.D{{"name", name}, {"replsets.name", rsName}}, + bson.D{{"$set", bson.M{"replsets.$.partial_txn": txn}}}, + ) + + return err +} + +func SetCurrentOp(ctx context.Context, m connect.Client, name, rsName string, ts primitive.Timestamp) error { + _, err := m.RestoresCollection().UpdateOne( + ctx, + bson.D{{"name", name}, {"replsets.name", rsName}}, + bson.D{{"$set", bson.M{"replsets.$.op": ts}}}, + ) + + return err +} + +func SetRestoreMeta(ctx context.Context, m connect.Client, meta *types.RestoreMeta) error { + meta.LastTransitionTS = meta.StartTS + meta.Conditions = append(meta.Conditions, &types.Condition{ + Timestamp: meta.StartTS, + Status: meta.Status, + }) + + _, err := m.RestoresCollection().InsertOne(ctx, meta) + + return err +} + +// GetLastRestore returns last successfully finished restore +// and nil if there is no such restore yet. +func GetLastRestore(ctx context.Context, m connect.Client) (*types.RestoreMeta, error) { + r := &types.RestoreMeta{} + + res := m.RestoresCollection().FindOne( + ctx, + bson.D{{"status", defs.StatusDone}}, + options.FindOne().SetSort(bson.D{{"start_ts", -1}}), + ) + if err := res.Err(); err != nil { + if errors.Is(err, mongo.ErrNoDocuments) { + return nil, errors.ErrNotFound + } + return nil, errors.Wrap(err, "get") + } + err := res.Decode(r) + return r, errors.Wrap(err, "decode") +} + +func AddRestoreRSMeta(ctx context.Context, m connect.Client, name string, rs types.RestoreReplset) error { + rs.LastTransitionTS = rs.StartTS + rs.Conditions = append(rs.Conditions, &types.Condition{ + Timestamp: rs.StartTS, + Status: rs.Status, + }) + _, err := m.RestoresCollection().UpdateOne( + ctx, + bson.D{{"name", name}}, + bson.D{{"$addToSet", bson.M{"replsets": rs}}}, + ) + + return err +} + +func RestoreHB(ctx context.Context, m connect.Client, name string) error { + ts, err := topo.GetClusterTime(ctx, m) + if err != nil { + return errors.Wrap(err, "read cluster time") + } + + _, err = m.RestoresCollection().UpdateOne( + ctx, + bson.D{{"name", name}}, + bson.D{ + {"$set", bson.M{"hb": ts}}, + }, + ) + + return errors.Wrap(err, "write into db") +} + +func SetRestoreBackup(ctx context.Context, m connect.Client, name, backupName string, nss []string) error { + d := bson.M{"backup": backupName} + if nss != nil { + d["nss"] = nss + } + + _, err := m.RestoresCollection().UpdateOne( + ctx, + bson.D{{"name", name}}, + bson.D{{"$set", d}}, + ) + + return err +} + +func SetOplogTimestamps(ctx context.Context, m connect.Client, name string, start, end int64) error { + _, err := m.RestoresCollection().UpdateOne( + ctx, + bson.M{"name": name}, + bson.M{"$set": bson.M{"start_pitr": start, "pitr": end}}, + ) + + return err +} + +func RestoresList(ctx context.Context, m connect.Client, limit int64) ([]types.RestoreMeta, error) { + cur, err := m.RestoresCollection().Find( + ctx, + bson.M{}, + options.Find().SetLimit(limit).SetSort(bson.D{{"start_ts", -1}}), + ) + if err != nil { + return nil, errors.Wrap(err, "query mongo") + } + defer cur.Close(ctx) + + restores := []types.RestoreMeta{} + for cur.Next(ctx) { + r := types.RestoreMeta{} + err := cur.Decode(&r) + if err != nil { + return nil, errors.Wrap(err, "message decode") + } + restores = append(restores, r) + } + + return restores, cur.Err() +} diff --git a/internal/query/setup.go b/internal/query/setup.go new file mode 100644 index 000000000..f207e7d42 --- /dev/null +++ b/internal/query/setup.go @@ -0,0 +1,129 @@ +package query + +import ( + "strings" + + "go.mongodb.org/mongo-driver/bson" + "go.mongodb.org/mongo-driver/mongo" + "go.mongodb.org/mongo-driver/mongo/options" + + "github.com/percona/percona-backup-mongodb/internal/connect" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" +) + +const ( + cmdCollectionSizeBytes = 1 << 20 // 1Mb + pbmOplogCollectionSizeBytes = 10 << 20 // 10Mb + logsCollectionSizeBytes = 50 << 20 // 50Mb +) + +// setup a new DB for PBM +func SetupNewDB(ctx context.Context, m connect.Client) error { + err := m.AdminCommand( + ctx, + bson.D{{"create", defs.CmdStreamCollection}, {"capped", true}, {"size", cmdCollectionSizeBytes}}, + ).Err() + if err != nil && !strings.Contains(err.Error(), "already exists") { + return errors.Wrap(err, "ensure cmd collection") + } + + err = m.AdminCommand( + ctx, + bson.D{{"create", defs.LogCollection}, {"capped", true}, {"size", logsCollectionSizeBytes}}, + ).Err() + if err != nil && !strings.Contains(err.Error(), "already exists") { + return errors.Wrap(err, "ensure log collection") + } + + err = m.AdminCommand( + ctx, + bson.D{{"create", defs.LockCollection}}, + ).Err() + if err != nil && !strings.Contains(err.Error(), "already exists") { + return errors.Wrap(err, "ensure lock collection") + } + + // create indexes for the lock collections + _, err = m.LockCollection().Indexes().CreateOne( + ctx, + mongo.IndexModel{ + Keys: bson.D{{"replset", 1}}, + Options: options.Index(). + SetUnique(true). + SetSparse(true), + }, + ) + if err != nil && !strings.Contains(err.Error(), "already exists") { + return errors.Wrapf(err, "ensure lock index on %s", defs.LockCollection) + } + _, err = m.LockOpCollection().Indexes().CreateOne( + ctx, + mongo.IndexModel{ + Keys: bson.D{{"replset", 1}, {"type", 1}}, + Options: options.Index(). + SetUnique(true). + SetSparse(true), + }, + ) + if err != nil && !strings.Contains(err.Error(), "already exists") { + return errors.Wrapf(err, "ensure lock index on %s", defs.LockOpCollection) + } + + err = m.AdminCommand( + ctx, + bson.D{{"create", defs.PBMOpLogCollection}, {"capped", true}, {"size", pbmOplogCollectionSizeBytes}}, + ).Err() + if err != nil && !strings.Contains(err.Error(), "already exists") { + return errors.Wrap(err, "ensure log collection") + } + _, err = m.PBMOpLogCollection().Indexes().CreateOne( + ctx, + mongo.IndexModel{ + Keys: bson.D{{"opid", 1}, {"replset", 1}}, + Options: options.Index(). + SetUnique(true). + SetSparse(true), + }, + ) + if err != nil && !strings.Contains(err.Error(), "already exists") { + return errors.Wrapf(err, "ensure lock index on %s", defs.LockOpCollection) + } + + // create indexs for the pitr chunks + _, err = m.PITRChunksCollection().Indexes().CreateMany( + ctx, + []mongo.IndexModel{ + { + Keys: bson.D{{"rs", 1}, {"start_ts", 1}, {"end_ts", 1}}, + Options: options.Index(). + SetUnique(true). + SetSparse(true), + }, + { + Keys: bson.D{{"start_ts", 1}, {"end_ts", 1}}, + }, + }, + ) + if err != nil && !strings.Contains(err.Error(), "already exists") { + return errors.Wrap(err, "ensure pitr chunks index") + } + + _, err = m.BcpCollection().Indexes().CreateMany( + ctx, + []mongo.IndexModel{ + { + Keys: bson.D{{"name", 1}}, + Options: options.Index(). + SetUnique(true). + SetSparse(true), + }, + { + Keys: bson.D{{"start_ts", 1}, {"status", 1}}, + }, + }, + ) + + return err +} diff --git a/pbm/rsync.go b/internal/resync/rsync.go similarity index 72% rename from pbm/rsync.go rename to internal/resync/rsync.go index f9c220b99..7420212c6 100644 --- a/pbm/rsync.go +++ b/internal/resync/rsync.go @@ -1,8 +1,7 @@ -package pbm +package resync import ( "bytes" - "context" "encoding/json" "io" "path" @@ -11,38 +10,39 @@ import ( "strings" "github.com/mongodb/mongo-tools/common/db" - "github.com/pkg/errors" "go.mongodb.org/mongo-driver/bson" "go.mongodb.org/mongo-driver/mongo/options" "golang.org/x/sync/errgroup" - "github.com/percona/percona-backup-mongodb/pbm/archive" - "github.com/percona/percona-backup-mongodb/pbm/log" - "github.com/percona/percona-backup-mongodb/pbm/storage" - "github.com/percona/percona-backup-mongodb/version" -) - -const ( - StorInitFile = ".pbm.init" - PhysRestoresDir = ".pbm.restore" + "github.com/percona/percona-backup-mongodb/internal/archive" + "github.com/percona/percona-backup-mongodb/internal/connect" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/log" + "github.com/percona/percona-backup-mongodb/internal/storage" + "github.com/percona/percona-backup-mongodb/internal/types" + "github.com/percona/percona-backup-mongodb/internal/util" + "github.com/percona/percona-backup-mongodb/internal/version" + "github.com/percona/percona-backup-mongodb/pbm/oplog" ) // ResyncStorage updates PBM metadata (snapshots and pitr) according to the data in the storage -func (p *PBM) ResyncStorage(l *log.Event) error { - stg, err := p.GetStorage(l) +func ResyncStorage(ctx context.Context, m connect.Client, l *log.Event) error { + stg, err := util.GetStorage(ctx, m, l) if err != nil { return errors.Wrap(err, "unable to get backup store") } - _, err = stg.FileStat(StorInitFile) + _, err = stg.FileStat(defs.StorInitFile) if errors.Is(err, storage.ErrNotExist) { - err = stg.Save(StorInitFile, bytes.NewBufferString(version.Current().Version), 0) + err = stg.Save(defs.StorInitFile, bytes.NewBufferString(version.Current().Version), 0) } if err != nil { return errors.Wrap(err, "init storage") } - rstrs, err := stg.List(PhysRestoresDir, ".json") + rstrs, err := stg.List(defs.PhysRestoresDir, ".json") if err != nil { return errors.Wrap(err, "get physical restores list from the storage") } @@ -57,8 +57,8 @@ func (p *PBM) ResyncStorage(l *log.Event) error { } } - _, err = p.Conn.Database(DB).Collection(RestoresCollection).ReplaceOne( - p.ctx, + _, err = m.RestoresCollection().ReplaceOne( + ctx, bson.D{{"name", rmeta.Name}}, rmeta, options.Replace().SetUpsert(true), @@ -68,20 +68,20 @@ func (p *PBM) ResyncStorage(l *log.Event) error { } } - bcps, err := stg.List("", MetadataFileSuffix) + bcps, err := stg.List("", defs.MetadataFileSuffix) if err != nil { return errors.Wrap(err, "get a backups list from the storage") } l.Debug("got backups list: %v", len(bcps)) - _, err = p.Conn.Database(DB).Collection(BcpCollection).DeleteMany(p.ctx, bson.M{}) + _, err = m.BcpCollection().DeleteMany(ctx, bson.M{}) if err != nil { - return errors.Wrapf(err, "clean up %s", BcpCollection) + return errors.Wrapf(err, "clean up %s", defs.BcpCollection) } - _, err = p.Conn.Database(DB).Collection(PITRChunksCollection).DeleteMany(p.ctx, bson.M{}) + _, err = m.PITRChunksCollection().DeleteMany(ctx, bson.M{}) if err != nil { - return errors.Wrapf(err, "clean up %s", PITRChunksCollection) + return errors.Wrapf(err, "clean up %s", defs.PITRChunksCollection) } var ins []interface{} @@ -93,29 +93,29 @@ func (p *PBM) ResyncStorage(l *log.Event) error { return errors.Wrapf(err, "read meta for %v", b.Name) } - v := BackupMeta{} + v := types.BackupMeta{} err = json.NewDecoder(d).Decode(&v) d.Close() if err != nil { return errors.Wrapf(err, "unmarshal backup meta [%s]", b.Name) } - err = checkBackupFiles(p.ctx, &v, stg) + err = checkBackupFiles(ctx, &v, stg) if err != nil { l.Warning("skip snapshot %s: %v", v.Name, err) - v.Status = StatusError + v.Status = defs.StatusError v.Err = err.Error() } ins = append(ins, v) } if len(ins) != 0 { - _, err = p.Conn.Database(DB).Collection(BcpCollection).InsertMany(p.ctx, ins) + _, err = m.BcpCollection().InsertMany(ctx, ins) if err != nil { return errors.Wrap(err, "insert retrieved backups meta") } } - pitrf, err := stg.List(PITRfsPrefix, "") + pitrf, err := stg.List(defs.PITRfsPrefix, "") if err != nil { return errors.Wrap(err, "get list of pitr chunks") } @@ -125,12 +125,12 @@ func (p *PBM) ResyncStorage(l *log.Event) error { var pitr []interface{} for _, f := range pitrf { - stat, err := stg.FileStat(PITRfsPrefix + "/" + f.Name) + stat, err := stg.FileStat(defs.PITRfsPrefix + "/" + f.Name) if err != nil { - l.Warning("skip pitr chunk %s/%s because of %v", PITRfsPrefix, f.Name, err) + l.Warning("skip pitr chunk %s/%s because of %v", defs.PITRfsPrefix, f.Name, err) continue } - chnk := PITRmetaFromFName(f.Name) + chnk := oplog.PITRmetaFromFName(f.Name) if chnk != nil { chnk.Size = stat.Size pitr = append(pitr, chnk) @@ -141,7 +141,7 @@ func (p *PBM) ResyncStorage(l *log.Event) error { return nil } - _, err = p.Conn.Database(DB).Collection(PITRChunksCollection).InsertMany(p.ctx, pitr) + _, err = m.PITRChunksCollection().InsertMany(ctx, pitr) if err != nil { return errors.Wrap(err, "insert retrieved pitr meta") } @@ -149,9 +149,9 @@ func (p *PBM) ResyncStorage(l *log.Event) error { return nil } -func checkBackupFiles(ctx context.Context, bcp *BackupMeta, stg storage.Storage) error { +func checkBackupFiles(ctx context.Context, bcp *types.BackupMeta, stg storage.Storage) error { // !!! TODO: Check physical files ? - if bcp.Type != LogicalBackup { + if bcp.Type != defs.LogicalBackup { return nil } @@ -169,7 +169,7 @@ func checkBackupFiles(ctx context.Context, bcp *BackupMeta, stg storage.Storage) nss, err := ReadArchiveNamespaces(stg, rs.DumpName) if err != nil { - return errors.WithMessagef(err, "parse metafile %q", rs.DumpName) + return errors.Wrapf(err, "parse metafile %q", rs.DumpName) } for _, ns := range nss { @@ -190,13 +190,13 @@ func checkBackupFiles(ctx context.Context, bcp *BackupMeta, stg storage.Storage) func ReadArchiveNamespaces(stg storage.Storage, metafile string) ([]*archive.Namespace, error) { r, err := stg.SourceReader(metafile) if err != nil { - return nil, errors.WithMessagef(err, "open %q", metafile) + return nil, errors.Wrapf(err, "open %q", metafile) } defer r.Close() meta, err := archive.ReadMetadata(r) if err != nil { - return nil, errors.WithMessagef(err, "parse metafile %q", metafile) + return nil, errors.Wrapf(err, "parse metafile %q", metafile) } return meta.Namespaces, nil @@ -205,7 +205,7 @@ func ReadArchiveNamespaces(stg storage.Storage, metafile string) ([]*archive.Nam func checkFile(stg storage.Storage, filename string) error { f, err := stg.FileStat(filename) if err != nil { - return errors.WithMessagef(err, "file %q", filename) + return errors.Wrapf(err, "file %q", filename) } if f.Size == 0 { return errors.Errorf("%q is empty", filename) @@ -214,14 +214,14 @@ func checkFile(stg storage.Storage, filename string) error { return nil } -func GetPhysRestoreMeta(restore string, stg storage.Storage, l *log.Event) (*RestoreMeta, error) { - mjson := filepath.Join(PhysRestoresDir, restore) + ".json" +func GetPhysRestoreMeta(restore string, stg storage.Storage, l *log.Event) (*types.RestoreMeta, error) { + mjson := filepath.Join(defs.PhysRestoresDir, restore) + ".json" _, err := stg.FileStat(mjson) if err != nil && !errors.Is(err, storage.ErrNotExist) { return nil, errors.Wrapf(err, "get file %s", mjson) } - var rmeta *RestoreMeta + var rmeta *types.RestoreMeta if err == nil { src, err := stg.SourceReader(mjson) if err != nil { @@ -253,26 +253,26 @@ func GetPhysRestoreMeta(restore string, stg storage.Storage, l *log.Event) (*Res } rmeta.Hb = condsm.Hb rmeta.Conditions = condsm.Conditions - rmeta.Type = PhysicalBackup + rmeta.Type = defs.PhysicalBackup rmeta.Stat = condsm.Stat return rmeta, err } -// ParsePhysRestoreStatus parses phys restore's sync files and creates RestoreMeta. +// ParsePhysRestoreStatus parses phys restore's sync files and creates types.RestoreMeta. // // On files format, see comments for *PhysRestore.toState() in pbm/restore/physical.go -func ParsePhysRestoreStatus(restore string, stg storage.Storage, l *log.Event) (*RestoreMeta, error) { - rfiles, err := stg.List(PhysRestoresDir+"/"+restore, "") +func ParsePhysRestoreStatus(restore string, stg storage.Storage, l *log.Event) (*types.RestoreMeta, error) { + rfiles, err := stg.List(defs.PhysRestoresDir+"/"+restore, "") if err != nil { return nil, errors.Wrap(err, "get files") } - meta := RestoreMeta{Name: restore, Type: PhysicalBackup} + meta := types.RestoreMeta{Name: restore, Type: defs.PhysicalBackup} rss := make(map[string]struct { - rs RestoreReplset - nodes map[string]RestoreNode + rs types.RestoreReplset + nodes map[string]types.RestoreNode }) for _, f := range rfiles { @@ -292,7 +292,7 @@ func ParsePhysRestoreStatus(restore string, stg storage.Storage, l *log.Event) ( rs, ok := rss[rsName] if !ok { rs.rs.Name = rsName - rs.nodes = make(map[string]RestoreNode) + rs.nodes = make(map[string]types.RestoreNode) } p := strings.Split(rsparts[1], ".") @@ -330,7 +330,7 @@ func ParsePhysRestoreStatus(restore string, stg storage.Storage, l *log.Event) ( continue } if p[1] == "partTxn" { - src, err := stg.SourceReader(filepath.Join(PhysRestoresDir, restore, f.Name)) + src, err := stg.SourceReader(filepath.Join(defs.PhysRestoresDir, restore, f.Name)) if err != nil { l.Error("get partial txn file %s: %v", f.Name, err) break @@ -361,22 +361,22 @@ func ParsePhysRestoreStatus(restore string, stg storage.Storage, l *log.Event) ( rs.rs.Error = l.Error } case "stat": - src, err := stg.SourceReader(filepath.Join(PhysRestoresDir, restore, f.Name)) + src, err := stg.SourceReader(filepath.Join(defs.PhysRestoresDir, restore, f.Name)) if err != nil { l.Error("get stat file %s: %v", f.Name, err) break } if meta.Stat == nil { - meta.Stat = &RestoreStat{RS: make(map[string]map[string]RestoreRSMetrics)} + meta.Stat = &types.RestoreStat{RS: make(map[string]map[string]types.RestoreRSMetrics)} } - st := RestoreShardStat{} + st := types.RestoreShardStat{} err = json.NewDecoder(src).Decode(&st) if err != nil { l.Error("unmarshal stat file %s: %v", f.Name, err) break } if _, ok := meta.Stat.RS[rsName]; !ok { - meta.Stat.RS[rsName] = make(map[string]RestoreRSMetrics) + meta.Stat.RS[rsName] = make(map[string]types.RestoreRSMetrics) } nName := strings.Join(p[1:], ".") lstat := meta.Stat.RS[rsName][nName] @@ -415,7 +415,7 @@ func ParsePhysRestoreStatus(restore string, stg storage.Storage, l *log.Event) ( nodeErr := "" for _, node := range rs.nodes { rs.rs.Nodes = append(rs.rs.Nodes, node) - if node.Status != StatusError { + if node.Status != defs.StatusError { noerr++ } if node.Error != "" { @@ -423,11 +423,11 @@ func ParsePhysRestoreStatus(restore string, stg storage.Storage, l *log.Event) ( } } if noerr == 0 { - rs.rs.Status = StatusError + rs.rs.Status = defs.StatusError if rs.rs.Error == "" { rs.rs.Error = nodeErr } - meta.Status = StatusError + meta.Status = defs.StatusError if meta.Error == "" { meta.Error = nodeErr } @@ -438,11 +438,11 @@ func ParsePhysRestoreStatus(restore string, stg storage.Storage, l *log.Event) ( return &meta, nil } -func parsePhysRestoreCond(stg storage.Storage, fname, restore string) (*Condition, error) { +func parsePhysRestoreCond(stg storage.Storage, fname, restore string) (*types.Condition, error) { s := strings.Split(fname, ".") - cond := Condition{Status: Status(s[len(s)-1])} + cond := types.Condition{Status: defs.Status(s[len(s)-1])} - src, err := stg.SourceReader(filepath.Join(PhysRestoresDir, restore, fname)) + src, err := stg.SourceReader(filepath.Join(defs.PhysRestoresDir, restore, fname)) if err != nil { return nil, errors.Wrapf(err, "get file %s", fname) } @@ -451,7 +451,7 @@ func parsePhysRestoreCond(stg storage.Storage, fname, restore string) (*Conditio return nil, errors.Wrapf(err, "read file %s", fname) } - if cond.Status == StatusError || cond.Status == StatusExtTS { + if cond.Status == defs.StatusError || cond.Status == defs.StatusExtTS { estr := strings.SplitN(string(b), ":", 2) if len(estr) != 2 { return nil, errors.Errorf("malformatted data in %s: %s", fname, b) @@ -460,7 +460,7 @@ func parsePhysRestoreCond(stg storage.Storage, fname, restore string) (*Conditio if err != nil { return nil, errors.Wrapf(err, "read ts from %s", fname) } - if cond.Status == StatusError { + if cond.Status == defs.StatusError { cond.Error = estr[1] } return &cond, nil diff --git a/pbm/pitr/pitr.go b/internal/slicer/slicer.go similarity index 70% rename from pbm/pitr/pitr.go rename to internal/slicer/slicer.go index e333e3100..576872336 100644 --- a/pbm/pitr/pitr.go +++ b/internal/slicer/slicer.go @@ -1,50 +1,61 @@ -package pitr +package slicer import ( - "context" "fmt" "strconv" "strings" "sync/atomic" "time" - "github.com/pkg/errors" "go.mongodb.org/mongo-driver/bson/primitive" "go.mongodb.org/mongo-driver/mongo" - "github.com/percona/percona-backup-mongodb/pbm" - "github.com/percona/percona-backup-mongodb/pbm/backup" - "github.com/percona/percona-backup-mongodb/pbm/compress" - "github.com/percona/percona-backup-mongodb/pbm/log" + "github.com/percona/percona-backup-mongodb/internal/config" + "github.com/percona/percona-backup-mongodb/internal/connect" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/lock" + "github.com/percona/percona-backup-mongodb/internal/log" + "github.com/percona/percona-backup-mongodb/internal/query" + "github.com/percona/percona-backup-mongodb/internal/storage" + "github.com/percona/percona-backup-mongodb/internal/topo" + "github.com/percona/percona-backup-mongodb/internal/types" + "github.com/percona/percona-backup-mongodb/internal/util" "github.com/percona/percona-backup-mongodb/pbm/oplog" - "github.com/percona/percona-backup-mongodb/pbm/sel" - "github.com/percona/percona-backup-mongodb/pbm/storage" ) // Slicer is an incremental backup object type Slicer struct { - pbm *pbm.PBM - node *pbm.Node - rs string - span int64 - lastTS primitive.Timestamp - storage storage.Storage - oplog *oplog.OplogBackup - l *log.Event - ep pbm.Epoch + leadClient connect.Client + node *mongo.Client + rs string + span int64 + lastTS primitive.Timestamp + storage storage.Storage + oplog *oplog.OplogBackup + l *log.Event + ep config.Epoch } // NewSlicer creates an incremental backup object -func NewSlicer(rs string, cn *pbm.PBM, node *pbm.Node, to storage.Storage, ep pbm.Epoch) *Slicer { +func NewSlicer( + rs string, + cn connect.Client, + node *mongo.Client, + to storage.Storage, + ep config.Epoch, + logger *log.Logger, +) *Slicer { return &Slicer{ - pbm: cn, - node: node, - rs: rs, - span: int64(pbm.PITRdefaultSpan), - storage: to, - oplog: oplog.NewOplogBackup(node.Session()), - l: cn.Logger().NewEvent(string(pbm.CmdPITR), "", "", ep.TS()), - ep: ep, + leadClient: cn, + node: node, + rs: rs, + span: int64(defs.PITRdefaultSpan), + storage: to, + oplog: oplog.NewOplogBackup(node), + l: logger.NewEvent(string(defs.CmdPITR), "", "", ep.TS()), + ep: ep, } } @@ -63,10 +74,10 @@ func (s *Slicer) GetSpan() time.Duration { // If there is a chunk behind the last backup it will try to fill the gaps from the chunk to the starting point. // While filling gaps it checks the oplog for sufficiency. It also checks if there is no restore intercepted // the timeline (hence there are no restores after the most recent backup) -func (s *Slicer) Catchup() error { +func (s *Slicer) Catchup(ctx context.Context) error { s.l.Debug("start_catchup") - baseBcp, err := s.pbm.GetLastBackup(nil) - if errors.Is(err, pbm.ErrNotFound) { + baseBcp, err := query.GetLastBackup(ctx, s.leadClient, nil) + if errors.Is(err, errors.ErrNotFound) { return errors.New("no backup found. full backup is required to start PITR") } if err != nil { @@ -77,16 +88,16 @@ func (s *Slicer) Catchup() error { s.l.Debug("lastTS set to %v %s", s.lastTS, formatts(s.lastTS)) }() - rstr, err := s.pbm.GetLastRestore() - if err != nil && !errors.Is(err, pbm.ErrNotFound) { + rstr, err := query.GetLastRestore(ctx, s.leadClient) + if err != nil && !errors.Is(err, errors.ErrNotFound) { return errors.Wrap(err, "get last restore") } if rstr != nil && rstr.StartTS > baseBcp.StartTS { return errors.Errorf("no backup found after the restored %s, a new backup is required to resume PITR", rstr.Backup) } - chnk, err := s.pbm.PITRLastChunkMeta(s.rs) - if err != nil && !errors.Is(err, pbm.ErrNotFound) { + chnk, err := oplog.PITRLastChunkMeta(ctx, s.leadClient, s.rs) + if err != nil && !errors.Is(err, errors.ErrNotFound) { return errors.Wrap(err, "get last slice") } @@ -97,7 +108,7 @@ func (s *Slicer) Catchup() error { } // PITR chunk after the recent backup is the most recent oplog slice - if primitive.CompareTimestamp(chnk.EndTS, baseBcp.LastWriteTS) >= 0 { + if chnk.EndTS.Compare(baseBcp.LastWriteTS) >= 0 { s.lastTS = chnk.EndTS return nil } @@ -107,7 +118,7 @@ func (s *Slicer) Catchup() error { return nil } - bl, err := s.pbm.BackupsDoneList(&chnk.EndTS, 0, -1) + bl, err := query.BackupsDoneList(ctx, s.leadClient, &chnk.EndTS, 0, -1) if err != nil { return errors.Wrapf(err, "get backups list from %v", chnk.EndTS) } @@ -119,7 +130,7 @@ func (s *Slicer) Catchup() error { // if there is a gap between chunk and the backup - fill it // failed gap shouldn't prevent further chunk creation - if primitive.CompareTimestamp(chnk.EndTS, baseBcp.FirstWriteTS) < 0 { + if chnk.EndTS.Compare(baseBcp.FirstWriteTS) < 0 { ok, err := s.oplog.IsSufficient(chnk.EndTS) if err != nil { s.l.Warning("check oplog sufficiency for %s: %v", chnk, err) @@ -130,12 +141,12 @@ func (s *Slicer) Catchup() error { return nil } - cfg, err := s.pbm.GetConfig() + cfg, err := config.GetConfig(ctx, s.leadClient) if err != nil { return errors.Wrap(err, "get config") } - err = s.upload(chnk.EndTS, baseBcp.FirstWriteTS, cfg.PITR.Compression, cfg.PITR.CompressionLevel) + err = s.upload(ctx, chnk.EndTS, baseBcp.FirstWriteTS, cfg.PITR.Compression, cfg.PITR.CompressionLevel) if err != nil { s.l.Warning("create last_chunk<->sanpshot slice: %v", err) // duplicate key means chunk is already created by probably another routine @@ -148,14 +159,14 @@ func (s *Slicer) Catchup() error { } } - if baseBcp.Type != pbm.LogicalBackup || sel.IsSelective(baseBcp.Namespaces) { + if baseBcp.Type != defs.LogicalBackup || util.IsSelective(baseBcp.Namespaces) { // the backup does not contain complete oplog to copy from // NOTE: the chunk' last op can be later than backup' first write ts s.lastTS = chnk.EndTS return nil } - err = s.copyFromBcp(baseBcp) + err = s.copyFromBcp(ctx, baseBcp) if err != nil { s.l.Warning("copy snapshot [%s] oplog: %v", baseBcp.Name, err) } else { @@ -166,7 +177,7 @@ func (s *Slicer) Catchup() error { } //nolint:nonamedreturns -func (s *Slicer) OplogOnlyCatchup() (err error) { +func (s *Slicer) OplogOnlyCatchup(ctx context.Context) (err error) { s.l.Debug("start_catchup [oplog only]") defer func() { @@ -175,8 +186,8 @@ func (s *Slicer) OplogOnlyCatchup() (err error) { } }() - chnk, err := s.pbm.PITRLastChunkMeta(s.rs) - if err != nil && !errors.Is(err, pbm.ErrNotFound) { + chnk, err := oplog.PITRLastChunkMeta(ctx, s.leadClient, s.rs) + if err != nil && !errors.Is(err, errors.ErrNotFound) { return errors.Wrap(err, "get last slice") } @@ -195,7 +206,7 @@ func (s *Slicer) OplogOnlyCatchup() (err error) { s.l.Info("insufficient range since %v", chnk.EndTS) } - ts, err := s.pbm.ClusterTime() + ts, err := topo.GetClusterTime(ctx, s.leadClient) if err != nil { return err } @@ -204,20 +215,20 @@ func (s *Slicer) OplogOnlyCatchup() (err error) { return nil } -func (s *Slicer) copyFromBcp(bcp *pbm.BackupMeta) error { - var oplog string +func (s *Slicer) copyFromBcp(ctx context.Context, bcp *types.BackupMeta) error { + var oplogName string for _, r := range bcp.Replsets { if r.Name == s.rs { - oplog = r.OplogName + oplogName = r.OplogName break } } - if oplog == "" { + if oplogName == "" { return errors.New("no data for shard") } n := s.chunkPath(bcp.FirstWriteTS, bcp.LastWriteTS, bcp.Compression) - err := s.storage.Copy(oplog, n) + err := s.storage.Copy(oplogName, n) if err != nil { return errors.Wrap(err, "storage copy") } @@ -226,7 +237,7 @@ func (s *Slicer) copyFromBcp(bcp *pbm.BackupMeta) error { return errors.Wrap(err, "file stat") } - meta := pbm.OplogChunk{ + meta := oplog.OplogChunk{ RS: s.rs, FName: n, Compression: bcp.Compression, @@ -234,7 +245,7 @@ func (s *Slicer) copyFromBcp(bcp *pbm.BackupMeta) error { EndTS: bcp.LastWriteTS, Size: stat.Size, } - err = s.pbm.PITRAddChunk(meta) + err = oplog.PITRAddChunk(ctx, s.leadClient, meta) if err != nil { return errors.Wrapf(err, "unable to save chunk meta %v", meta) } @@ -267,9 +278,10 @@ const LogStartMsg = "start_ok" // Stream streaming (saving) chunks of the oplog to the given storage func (s *Slicer) Stream( ctx context.Context, - backupSig <-chan *pbm.OPID, - compression compress.CompressionType, - level *int, timeouts *pbm.BackupTimeouts, + stopC <-chan struct{}, + backupSig <-chan *types.OPID, + compression defs.CompressionType, + level *int, timeouts *config.BackupTimeouts, ) error { if s.lastTS.T == 0 { return errors.New("no starting point defined") @@ -280,7 +292,7 @@ func (s *Slicer) Stream( tk := time.NewTicker(cspan) defer tk.Stop() - nodeInfo, err := s.node.GetInfo() + nodeInfo, err := topo.GetNodeInfoExt(ctx, s.node) if err != nil { return errors.Wrap(err, "get NodeInfo data") } @@ -297,7 +309,7 @@ func (s *Slicer) Stream( s.l.Debug(LogStartMsg) lastSlice := false - llock := &pbm.LockHeader{Replset: s.rs} + llock := &lock.LockHeader{Replset: s.rs} var sliceTo primitive.Timestamp for { @@ -305,7 +317,7 @@ func (s *Slicer) Stream( select { // wrapping up at the current point-in-time // upload the chunks up to the current time and return - case <-ctx.Done(): + case <-stopC: s.l.Info("got done signal, stopping") lastSlice = true // on wakeup or tick whatever comes first do the job @@ -313,14 +325,14 @@ func (s *Slicer) Stream( s.l.Info("got wake_up signal") if bcp != nil { s.l.Info("wake_up for bcp %s", bcp.String()) - sliceTo, err = s.backupStartTS(bcp.String(), timeouts.StartingStatus()) + sliceTo, err = s.backupStartTS(ctx, bcp.String(), timeouts.StartingStatus()) if err != nil { return errors.Wrap(err, "get backup start TS") } // it can happen that prevoius slice >= backup's fisrt_write // in that case we have to just back off. - if primitive.CompareTimestamp(s.lastTS, sliceTo) >= 0 { + if s.lastTS.Compare(sliceTo) >= 0 { s.l.Info("pausing/stopping with last_ts %v", time.Unix(int64(s.lastTS.T), 0).UTC()) return nil } @@ -332,11 +344,11 @@ func (s *Slicer) Stream( nextChunkT := time.Now().Add(cspan) // check if the node is still any good to make backups - ninf, err := s.node.GetInfo() + ninf, err := topo.GetNodeInfoExt(ctx, s.node) if err != nil { return errors.Wrap(err, "get node info") } - q, err := backup.NodeSuits(s.node, ninf) + q, err := topo.NodeSuits(ctx, s.node, ninf) if err != nil { return errors.Wrap(err, "node check") } @@ -355,35 +367,35 @@ func (s *Slicer) Stream( // and a new worker was elected; // - any other case (including no lock) is the undefined behavior - return. // - ld, err := s.getOpLock(llock, timeouts.StartingStatus()) + ld, err := s.getOpLock(ctx, llock, timeouts.StartingStatus()) if err != nil { return errors.Wrap(err, "check lock") } // in case there is a lock, even a legit one (our own, or backup's one) but it is stale // we should return so the slicer would get through the lock acquisition again. - ts, err := s.pbm.ClusterTime() + ts, err := topo.GetClusterTime(ctx, s.leadClient) if err != nil { return errors.Wrap(err, "read cluster time") } - if ld.Heartbeat.T+pbm.StaleFrameSec < ts.T { + if ld.Heartbeat.T+defs.StaleFrameSec < ts.T { return errors.Errorf("stale lock %#v, last beat ts: %d", ld.LockHeader, ld.Heartbeat.T) } switch ld.Type { - case pbm.CmdPITR: + case defs.CmdPITR: if ld.Node != nodeInfo.Me { return OpMovedError{ld.Node} } - sliceTo, err = s.oplog.LastWrite() + sliceTo, err = s.oplog.LastWrite(ctx) if err != nil { return errors.Wrap(err, "define last write timestamp") } - case pbm.CmdUndefined: + case defs.CmdUndefined: return errors.New("undefined behavior operation is running") - case pbm.CmdBackup: + case defs.CmdBackup: // continue only if we had `backupSig` - if !lastSlice || primitive.CompareTimestamp(s.lastTS, sliceTo) == 0 { + if !lastSlice || s.lastTS.Compare(sliceTo) == 0 { return errors.Errorf("another operation is running: %#v", ld) } default: @@ -392,16 +404,16 @@ func (s *Slicer) Stream( // if this is the last slice, epoch probably already changed (e.g. due to config changes) and that's ok if !lastSlice { - cep, err := s.pbm.GetEpoch() + cep, err := config.GetEpoch(ctx, s.leadClient) if err != nil { return errors.Wrap(err, "get epoch") } - if primitive.CompareTimestamp(s.ep.TS(), cep.TS()) != 0 { + if s.ep.TS().Compare(cep.TS()) != 0 { return errors.Errorf("epoch mismatch. Got sleep in %v, woke up in %v. Too old for that stuff.", s.ep.TS(), cep.TS()) } } - err = s.upload(s.lastTS, sliceTo, compression, level) + err = s.upload(ctx, s.lastTS, sliceTo, compression, level) if err != nil { return err } @@ -426,11 +438,16 @@ func (s *Slicer) Stream( } } -func (s *Slicer) upload(from, to primitive.Timestamp, compression compress.CompressionType, level *int) error { +func (s *Slicer) upload( + ctx context.Context, + from, to primitive.Timestamp, + compression defs.CompressionType, + level *int, +) error { s.oplog.SetTailingSpan(from, to) fname := s.chunkPath(from, to, compression) // if use parent ctx, upload will be canceled on the "done" signal - size, err := backup.Upload(context.Background(), s.oplog, s.storage, compression, level, fname, -1) + size, err := storage.Upload(ctx, s.oplog, s.storage, compression, level, fname, -1) if err != nil { // PITR chunks have no metadata to indicate any failed state and if something went // wrong during the data read we may end up with an already created file. Although @@ -444,7 +461,7 @@ func (s *Slicer) upload(from, to primitive.Timestamp, compression compress.Compr return errors.Wrapf(err, "unable to upload chunk %v.%v", from, to) } - meta := pbm.OplogChunk{ + meta := oplog.OplogChunk{ RS: s.rs, FName: fname, Compression: compression, @@ -452,7 +469,7 @@ func (s *Slicer) upload(from, to primitive.Timestamp, compression compress.Compr EndTS: to, Size: size, } - err = s.pbm.PITRAddChunk(meta) + err = oplog.PITRAddChunk(ctx, s.leadClient, meta) if err != nil { return errors.Wrapf(err, "unable to save chunk meta %v", meta) } @@ -464,34 +481,34 @@ func formatts(t primitive.Timestamp) string { return time.Unix(int64(t.T), 0).UTC().Format("2006-01-02T15:04:05") } -func (s *Slicer) getOpLock(l *pbm.LockHeader, t time.Duration) (pbm.LockData, error) { +func (s *Slicer) getOpLock(ctx context.Context, l *lock.LockHeader, t time.Duration) (lock.LockData, error) { tk := time.NewTicker(time.Second) defer tk.Stop() - var lock pbm.LockData + var lck lock.LockData for j := 0; j < int(t.Seconds()); j++ { var err error - lock, err = s.pbm.GetLockData(l) + lck, err = lock.GetLockData(ctx, s.leadClient, l) if err != nil && !errors.Is(err, mongo.ErrNoDocuments) { - return lock, errors.Wrap(err, "get") + return lck, errors.Wrap(err, "get") } - if lock.Type != pbm.CmdUndefined { - return lock, nil + if lck.Type != defs.CmdUndefined { + return lck, nil } <-tk.C } - return lock, nil + return lck, nil } -func (s *Slicer) backupStartTS(opid string, t time.Duration) (primitive.Timestamp, error) { +func (s *Slicer) backupStartTS(ctx context.Context, opid string, t time.Duration) (primitive.Timestamp, error) { var ts primitive.Timestamp tk := time.NewTicker(time.Second) defer tk.Stop() for j := 0; j < int(t.Seconds()); j++ { - b, err := s.pbm.GetBackupByOPID(opid) - if err != nil && !errors.Is(err, pbm.ErrNotFound) { + b, err := query.GetBackupByOPID(ctx, s.leadClient, opid) + if err != nil && !errors.Is(err, errors.ErrNotFound) { return ts, errors.Wrap(err, "get backup meta") } if b != nil && b.FirstWriteTS.T > 1 { @@ -504,17 +521,17 @@ func (s *Slicer) backupStartTS(opid string, t time.Duration) (primitive.Timestam } // !!! should be agreed with pbm.PITRmetaFromFName() -func (s *Slicer) chunkPath(first, last primitive.Timestamp, c compress.CompressionType) string { +func (s *Slicer) chunkPath(first, last primitive.Timestamp, c defs.CompressionType) string { return ChunkName(s.rs, first, last, c) } -func ChunkName(rs string, first, last primitive.Timestamp, c compress.CompressionType) string { +func ChunkName(rs string, first, last primitive.Timestamp, c defs.CompressionType) string { ft := time.Unix(int64(first.T), 0).UTC() lt := time.Unix(int64(last.T), 0).UTC() name := strings.Builder{} - if len(pbm.PITRfsPrefix) > 0 { - name.WriteString(pbm.PITRfsPrefix) + if len(defs.PITRfsPrefix) > 0 { + name.WriteString(defs.PITRfsPrefix) name.WriteString("/") } name.WriteString(rs) diff --git a/internal/storage/azure/azure.go b/internal/storage/azure/azure.go new file mode 100644 index 000000000..bfeefb0c1 --- /dev/null +++ b/internal/storage/azure/azure.go @@ -0,0 +1,272 @@ +package azure + +import ( + "fmt" + "io" + "net/http" + "path" + "runtime" + "strings" + "time" + + "github.com/Azure/azure-sdk-for-go/sdk/azcore" + "github.com/Azure/azure-sdk-for-go/sdk/azcore/policy" + "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob" + "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/blob" + + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/log" + "github.com/percona/percona-backup-mongodb/internal/storage" + "github.com/percona/percona-backup-mongodb/pbm/storage/azure" +) + +const ( + BlobURL = azure.BlobURL + + defaultUploadBuff = 10 << 20 // 10Mb + defaultUploadMaxBuff = 5 + + defaultRetries = 10 + + maxBlocks = 50_000 +) + +type Conf struct { + Account string `bson:"account" json:"account,omitempty" yaml:"account,omitempty"` + Container string `bson:"container" json:"container,omitempty" yaml:"container,omitempty"` + Prefix string `bson:"prefix" json:"prefix,omitempty" yaml:"prefix,omitempty"` + Credentials Credentials `bson:"credentials" json:"-" yaml:"credentials"` +} + +type Credentials struct { + Key string `bson:"key" json:"key,omitempty" yaml:"key,omitempty"` +} + +type Blob struct { + opts Conf + log *log.Event + // url *url.URL + c *azblob.Client +} + +func New(opts Conf, l *log.Event) (*Blob, error) { + b := &Blob{ + opts: opts, + log: l, + } + + var err error + b.c, err = b.client() + if err != nil { + return nil, errors.Wrap(err, "init container") + } + + return b, b.ensureContainer() +} + +func (*Blob) Type() storage.Type { + return storage.Azure +} + +func (b *Blob) Save(name string, data io.Reader, sizeb int64) error { + bufsz := defaultUploadBuff + if sizeb > 0 { + ps := int(sizeb / maxBlocks * 11 / 10) // add 10% just in case + if ps > bufsz { + bufsz = ps + } + } + + cc := runtime.NumCPU() / 2 + if cc == 0 { + cc = 1 + } + + if b.log != nil { + b.log.Debug("BufferSize is set to %d (~%dMb) | %d", bufsz, bufsz>>20, sizeb) + } + + _, err := b.c.UploadStream(context.TODO(), + b.opts.Container, + path.Join(b.opts.Prefix, name), + data, + &azblob.UploadStreamOptions{ + BlockSize: int64(bufsz), + Concurrency: cc, + }) + + return err +} + +func (b *Blob) List(prefix, suffix string) ([]storage.FileInfo, error) { + prfx := path.Join(b.opts.Prefix, prefix) + + if prfx != "" && !strings.HasSuffix(prfx, "/") { + prfx += "/" + } + + pager := b.c.NewListBlobsFlatPager(b.opts.Container, &azblob.ListBlobsFlatOptions{ + Prefix: &prfx, + }) + + var files []storage.FileInfo + for pager.More() { + l, err := pager.NextPage(context.TODO()) + if err != nil { + return nil, errors.Wrap(err, "list segment") + } + + for _, b := range l.Segment.BlobItems { + if b.Name == nil { + return files, errors.Errorf("blob returned nil Name for item %v", b) + } + var sz int64 + if b.Properties.ContentLength != nil { + sz = *b.Properties.ContentLength + } + f := *b.Name + f = strings.TrimPrefix(f, prfx) + if len(f) == 0 { + continue + } + if f[0] == '/' { + f = f[1:] + } + + if strings.HasSuffix(f, suffix) { + files = append(files, storage.FileInfo{ + Name: f, + Size: sz, + }) + } + } + } + + return files, nil +} + +func (b *Blob) FileStat(name string) (storage.FileInfo, error) { + inf := storage.FileInfo{} + + p, err := b.c.ServiceClient(). + NewContainerClient(b.opts.Container). + NewBlockBlobClient(path.Join(b.opts.Prefix, name)). + GetProperties(context.TODO(), nil) + if err != nil { + if isNotFound(err) { + return inf, storage.ErrNotExist + } + return inf, errors.Wrap(err, "get properties") + } + + inf.Name = name + if p.ContentLength != nil { + inf.Size = *p.ContentLength + } + + if inf.Size == 0 { + return inf, storage.ErrEmpty + } + + return inf, nil +} + +func (b *Blob) Copy(src, dst string) error { + to := b.c.ServiceClient().NewContainerClient(b.opts.Container).NewBlockBlobClient(path.Join(b.opts.Prefix, dst)) + from := b.c.ServiceClient().NewContainerClient(b.opts.Container).NewBlockBlobClient(path.Join(b.opts.Prefix, src)) + r, err := to.StartCopyFromURL(context.TODO(), from.BlobClient().URL(), nil) + if err != nil { + return errors.Wrap(err, "start copy") + } + + if r.CopyStatus == nil { + return errors.New("undefined copy status") + } + status := *r.CopyStatus + for status == blob.CopyStatusTypePending { + time.Sleep(time.Second * 2) + p, err := to.GetProperties(context.TODO(), nil) + if err != nil { + return errors.Wrap(err, "get copy status") + } + if r.CopyStatus == nil { + return errors.New("undefined copy status") + } + status = *p.CopyStatus + } + + switch status { + case blob.CopyStatusTypeSuccess: + return nil + + case blob.CopyStatusTypeAborted: + return errors.New("copy aborted") + case blob.CopyStatusTypeFailed: + return errors.New("copy failed") + default: + return errors.Errorf("undefined status") + } +} + +func (b *Blob) SourceReader(name string) (io.ReadCloser, error) { + o, err := b.c.DownloadStream(context.TODO(), b.opts.Container, path.Join(b.opts.Prefix, name), nil) + if err != nil { + if isNotFound(err) { + return nil, storage.ErrNotExist + } + return nil, errors.Wrap(err, "download object") + } + + return o.Body, nil +} + +func (b *Blob) Delete(name string) error { + _, err := b.c.DeleteBlob(context.TODO(), b.opts.Container, path.Join(b.opts.Prefix, name), nil) + if err != nil { + if isNotFound(err) { + return storage.ErrNotExist + } + return errors.Wrap(err, "delete object") + } + + return nil +} + +func (b *Blob) ensureContainer() error { + _, err := b.c.ServiceClient().NewContainerClient(b.opts.Container).GetProperties(context.TODO(), nil) + // container already exists + if err == nil { + return nil + } + + var stgErr *azcore.ResponseError + if errors.As(err, &stgErr) && stgErr.StatusCode != http.StatusNotFound { + return errors.Wrap(err, "check container") + } + + _, err = b.c.CreateContainer(context.TODO(), b.opts.Container, nil) + return err +} + +func (b *Blob) client() (*azblob.Client, error) { + cred, err := azblob.NewSharedKeyCredential(b.opts.Account, b.opts.Credentials.Key) + if err != nil { + return nil, errors.Wrap(err, "create credentials") + } + + opts := &azblob.ClientOptions{} + opts.Retry = policy.RetryOptions{ + MaxRetries: defaultRetries, + } + return azblob.NewClientWithSharedKeyCredential(fmt.Sprintf(BlobURL, b.opts.Account), cred, opts) +} + +func isNotFound(err error) bool { + var stgErr *azcore.ResponseError + if errors.As(err, &stgErr) { + return stgErr.StatusCode == http.StatusNotFound + } + + return false +} diff --git a/pbm/storage/blackhole/blackhole.go b/internal/storage/blackhole/blackhole.go similarity index 93% rename from pbm/storage/blackhole/blackhole.go rename to internal/storage/blackhole/blackhole.go index fe0d94276..87e4faad6 100644 --- a/pbm/storage/blackhole/blackhole.go +++ b/internal/storage/blackhole/blackhole.go @@ -3,7 +3,7 @@ package blackhole import ( "io" - "github.com/percona/percona-backup-mongodb/pbm/storage" + "github.com/percona/percona-backup-mongodb/internal/storage" ) type Blackhole struct{} diff --git a/pbm/storage/fs/fs.go b/internal/storage/fs/fs.go similarity index 90% rename from pbm/storage/fs/fs.go rename to internal/storage/fs/fs.go index a8357e8f5..6b62058ad 100644 --- a/pbm/storage/fs/fs.go +++ b/internal/storage/fs/fs.go @@ -7,9 +7,9 @@ import ( "path/filepath" "strings" - "github.com/pkg/errors" + "github.com/percona/percona-backup-mongodb/internal/errors" - "github.com/percona/percona-backup-mongodb/pbm/storage" + "github.com/percona/percona-backup-mongodb/internal/storage" ) type Conf struct { @@ -33,24 +33,24 @@ func New(opts Conf) (*FS, error) { if err != nil { if os.IsNotExist(err) { if err := os.MkdirAll(opts.Path, os.ModeDir|0o755); err != nil { - return nil, errors.WithMessagef(err, "mkdir %s", opts.Path) + return nil, errors.Wrapf(err, "mkdir %s", opts.Path) } return &FS{opts.Path}, nil } - return nil, errors.WithMessagef(err, "stat %s", opts.Path) + return nil, errors.Wrapf(err, "stat %s", opts.Path) } root := opts.Path if info.Mode()&os.ModeSymlink != 0 { root, err = filepath.EvalSymlinks(opts.Path) if err != nil { - return nil, errors.WithMessagef(err, "resolve link: %s", opts.Path) + return nil, errors.Wrapf(err, "resolve link: %s", opts.Path) } info, err = os.Lstat(root) if err != nil { - return nil, errors.WithMessagef(err, "stat %s", root) + return nil, errors.Wrapf(err, "stat %s", root) } } if !info.Mode().IsDir() { diff --git a/pbm/storage/s3/download.go b/internal/storage/s3/download.go similarity index 99% rename from pbm/storage/s3/download.go rename to internal/storage/s3/download.go index 54c97f1ce..082df7ce8 100644 --- a/pbm/storage/s3/download.go +++ b/internal/storage/s3/download.go @@ -15,9 +15,10 @@ import ( "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/awserr" "github.com/aws/aws-sdk-go/service/s3" - "github.com/pkg/errors" - "github.com/percona/percona-backup-mongodb/pbm/log" + "github.com/percona/percona-backup-mongodb/internal/errors" + + "github.com/percona/percona-backup-mongodb/internal/log" ) // Downloading objects from the storage. diff --git a/pbm/storage/s3/s3.go b/internal/storage/s3/s3.go similarity index 98% rename from pbm/storage/s3/s3.go rename to internal/storage/s3/s3.go index 54ed6370c..094031a5b 100644 --- a/pbm/storage/s3/s3.go +++ b/internal/storage/s3/s3.go @@ -28,10 +28,11 @@ import ( "github.com/aws/aws-sdk-go/service/sts" "github.com/minio/minio-go" "github.com/minio/minio-go/pkg/encrypt" - "github.com/pkg/errors" - "github.com/percona/percona-backup-mongodb/pbm/log" - "github.com/percona/percona-backup-mongodb/pbm/storage" + "github.com/percona/percona-backup-mongodb/internal/errors" + + "github.com/percona/percona-backup-mongodb/internal/log" + "github.com/percona/percona-backup-mongodb/internal/storage" ) const ( @@ -538,7 +539,7 @@ func (s *S3) session() (*session.Session, error) { awsSession, err := session.NewSession() if err != nil { - return nil, errors.WithMessage(err, "new session") + return nil, errors.Wrap(err, "new session") } // allow fetching credentials from env variables and ec2 metadata endpoint diff --git a/internal/storage/storage.go b/internal/storage/storage.go new file mode 100644 index 000000000..691244cfc --- /dev/null +++ b/internal/storage/storage.go @@ -0,0 +1,157 @@ +package storage + +import ( + "io" + + "github.com/percona/percona-backup-mongodb/internal/context" + + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" + + "github.com/percona/percona-backup-mongodb/internal/compress" +) + +var ( + // ErrNotExist is an error for file doesn't exists on storage + ErrNotExist = errors.New("no such file") + ErrEmpty = errors.New("file is empty") +) + +// Type represents a type of the destination storage for backups +type Type string + +const ( + Undef Type = "" + S3 Type = "s3" + Azure Type = "azure" + Filesystem Type = "filesystem" + BlackHole Type = "blackhole" +) + +type FileInfo struct { + Name string // with path + Size int64 +} + +type Storage interface { + Type() Type + Save(name string, data io.Reader, size int64) error + SourceReader(name string) (io.ReadCloser, error) + // FileStat returns file info. It returns error if file is empty or not exists. + FileStat(name string) (FileInfo, error) + // List scans path with prefix and returns all files with given suffix. + // Both prefix and suffix can be omitted. + List(prefix, suffix string) ([]FileInfo, error) + // Delete deletes given file. + // It returns storage.ErrNotExist if a file doesn't exists. + Delete(name string) error + // Copy makes a copy of the src objec/file under dst name + Copy(src, dst string) error +} + +// ParseType parses string and returns storage type +func ParseType(s string) Type { + switch s { + case string(S3): + return S3 + case string(Azure): + return Azure + case string(Filesystem): + return Filesystem + case string(BlackHole): + return BlackHole + default: + return Undef + } +} + +// rwError multierror for the read/compress/write-to-store operations set +type rwError struct { + read error + compress error + write error +} + +func (rwe rwError) Error() string { + var r string + if rwe.read != nil { + r += "read data: " + rwe.read.Error() + "." + } + if rwe.compress != nil { + r += "compress data: " + rwe.compress.Error() + "." + } + if rwe.write != nil { + r += "write data: " + rwe.write.Error() + "." + } + + return r +} + +func (rwe rwError) nil() bool { + return rwe.read == nil && rwe.compress == nil && rwe.write == nil +} + +type Source interface { + io.WriterTo +} + +type Canceller interface { + Cancel() +} + +// ErrCancelled means backup was canceled +var ErrCancelled = errors.New("backup canceled") + +// Upload writes data to dst from given src and returns an amount of written bytes +func Upload( + ctx context.Context, + src Source, + dst Storage, + compression defs.CompressionType, + compressLevel *int, + fname string, + sizeb int64, +) (int64, error) { + r, pw := io.Pipe() + + w, err := compress.Compress(pw, compression, compressLevel) + if err != nil { + return 0, err + } + + var rwErr rwError + var n int64 + go func() { + n, rwErr.read = src.WriteTo(w) + rwErr.compress = w.Close() + pw.Close() + }() + + saveDone := make(chan struct{}) + go func() { + rwErr.write = dst.Save(fname, r, sizeb) + saveDone <- struct{}{} + }() + + select { + case <-ctx.Done(): + if c, ok := src.(Canceller); ok { + c.Cancel() + } + + err := r.Close() + if err != nil { + return 0, errors.Wrap(err, "cancel backup: close reader") + } + return 0, ErrCancelled + case <-saveDone: + } + + r.Close() + + if !rwErr.nil() { + return 0, rwErr + } + + return n, nil +} diff --git a/pbm/agent_status.go b/internal/topo/agent.go similarity index 59% rename from pbm/agent_status.go rename to internal/topo/agent.go index 3eea11745..5ed7c64d7 100644 --- a/pbm/agent_status.go +++ b/internal/topo/agent.go @@ -1,23 +1,26 @@ -package pbm +package topo import ( - "context" "fmt" "strconv" "strings" - "github.com/pkg/errors" "go.mongodb.org/mongo-driver/bson" "go.mongodb.org/mongo-driver/bson/primitive" - "go.mongodb.org/mongo-driver/mongo" "go.mongodb.org/mongo-driver/mongo/options" "golang.org/x/mod/semver" + + "github.com/percona/percona-backup-mongodb/internal/connect" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/version" ) type AgentStat struct { Node string `bson:"n"` RS string `bson:"rs"` - State NodeState `bson:"s"` + State defs.NodeState `bson:"s"` StateStr string `bson:"str"` Hidden bool `bson:"hdn"` Passive bool `bson:"psv"` @@ -56,8 +59,8 @@ func (s *AgentStat) OK() (bool, []string) { return ok, errs } -func (s *AgentStat) MongoVersion() MongoVersion { - v := MongoVersion{ +func (s *AgentStat) MongoVersion() version.MongoVersion { + v := version.MongoVersion{ PSMDBVersion: s.PerconaVer, VersionString: s.MongoVer, } @@ -72,15 +75,15 @@ func (s *AgentStat) MongoVersion() MongoVersion { return v } -func (p *PBM) SetAgentStatus(stat AgentStat) error { - ct, err := p.ClusterTime() +func SetAgentStatus(ctx context.Context, m connect.Client, stat AgentStat) error { + ct, err := GetClusterTime(ctx, m) if err != nil { return errors.Wrap(err, "get cluster time") } stat.Heartbeat = ct - _, err = p.Conn.Database(DB).Collection(AgentsStatusCollection).ReplaceOne( - p.ctx, + _, err = m.AgentsStatusCollection().ReplaceOne( + ctx, bson.D{{"n", stat.Node}, {"rs", stat.RS}}, stat, options.Replace().SetUpsert(true), @@ -88,17 +91,17 @@ func (p *PBM) SetAgentStatus(stat AgentStat) error { return errors.Wrap(err, "write into db") } -func (p *PBM) RemoveAgentStatus(stat AgentStat) error { - _, err := p.Conn.Database(DB).Collection(AgentsStatusCollection). - DeleteOne(p.ctx, bson.D{{"n", stat.Node}, {"rs", stat.RS}}) - return errors.WithMessage(err, "query") +func RemoveAgentStatus(ctx context.Context, m connect.Client, stat AgentStat) error { + _, err := m.AgentsStatusCollection(). + DeleteOne(ctx, bson.D{{"n", stat.Node}, {"rs", stat.RS}}) + return errors.Wrap(err, "query") } // GetAgentStatus returns agent status by given node and rs // it's up to user how to handle ErrNoDocuments -func (p *PBM) GetAgentStatus(rs, node string) (AgentStat, error) { - res := p.Conn.Database(DB).Collection(AgentsStatusCollection).FindOne( - p.ctx, +func GetAgentStatus(ctx context.Context, m connect.Client, rs, node string) (AgentStat, error) { + res := m.AgentsStatusCollection().FindOne( + ctx, bson.D{{"n", node}, {"rs", rs}}, ) if res.Err() != nil { @@ -111,8 +114,8 @@ func (p *PBM) GetAgentStatus(rs, node string) (AgentStat, error) { } // AgentStatusGC cleans up stale agent statuses -func (p *PBM) AgentStatusGC() error { - ct, err := p.ClusterTime() +func AgentStatusGC(ctx context.Context, m connect.Client) error { + ct, err := GetClusterTime(ctx, m) if err != nil { return errors.Wrap(err, "get cluster time") } @@ -121,13 +124,13 @@ func (p *PBM) AgentStatusGC() error { // Which would lead to the false clamin "not found" in the status output. So stale range should at least 30 sec // (+5 just in case). // XXX: stalesec is const 15 secs which resolves to 35 secs - stalesec := AgentsStatCheckRange.Seconds() * 3 + stalesec := defs.AgentsStatCheckRange.Seconds() * 3 if stalesec < 35 { stalesec = 35 } ct.T -= uint32(stalesec) - _, err = p.Conn.Database(DB).Collection(AgentsStatusCollection).DeleteMany( - p.ctx, + _, err = m.AgentsStatusCollection().DeleteMany( + ctx, bson.M{"hb": bson.M{"$lt": ct}}, ) @@ -135,38 +138,21 @@ func (p *PBM) AgentStatusGC() error { } // ListAgentStatuses returns list of registered agents -func (p *PBM) ListAgentStatuses() ([]AgentStat, error) { - if err := p.AgentStatusGC(); err != nil { - return nil, errors.WithMessage(err, "remove stale statuses") +func ListAgentStatuses(ctx context.Context, m connect.Client) ([]AgentStat, error) { + if err := AgentStatusGC(ctx, m); err != nil { + return nil, errors.Wrap(err, "remove stale statuses") } - return p.ListAgents() + return ListAgents(ctx, m) } -func (p *PBM) ListAgents() ([]AgentStat, error) { - cur, err := p.Conn.Database(DB).Collection(AgentsStatusCollection).Find(p.ctx, bson.M{}) +func ListAgents(ctx context.Context, m connect.Client) ([]AgentStat, error) { + cur, err := m.AgentsStatusCollection().Find(ctx, bson.M{}) if err != nil { - return nil, errors.WithMessage(err, "query") + return nil, errors.Wrap(err, "query") } var agents []AgentStat - err = cur.All(p.ctx, &agents) - return agents, errors.WithMessage(err, "decode") -} - -// GetReplsetStatus returns `replSetGetStatus` for the replset -// or config server in case of sharded cluster -func (p *PBM) GetReplsetStatus() (*ReplsetStatus, error) { - return GetReplsetStatus(p.ctx, p.Conn) -} - -// GetReplsetStatus returns `replSetGetStatus` for the given connection -func GetReplsetStatus(ctx context.Context, cn *mongo.Client) (*ReplsetStatus, error) { - status := &ReplsetStatus{} - err := cn.Database("admin").RunCommand(ctx, bson.D{{"replSetGetStatus", 1}}).Decode(status) - if err != nil { - return nil, errors.WithMessage(err, "query adminCommand: replSetGetStatus") - } - - return status, nil + err = cur.All(ctx, &agents) + return agents, errors.Wrap(err, "decode") } diff --git a/internal/topo/cluster.go b/internal/topo/cluster.go new file mode 100644 index 000000000..b392505f3 --- /dev/null +++ b/internal/topo/cluster.go @@ -0,0 +1,174 @@ +package topo + +import ( + "strings" + + "go.mongodb.org/mongo-driver/bson" + "go.mongodb.org/mongo-driver/bson/primitive" + "go.mongodb.org/mongo-driver/mongo" + + "github.com/percona/percona-backup-mongodb/internal/connect" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/errors" +) + +// Shard represent config.shard https://docs.mongodb.com/manual/reference/config-database/#config.shards +// _id may differ from the rs name, so extract rs name from the host (format like "rs2/localhost:27017") +// see https://jira.percona.com/browse/PBM-595 +type Shard struct { + ID string `bson:"_id"` + RS string `bson:"-"` + Host string `bson:"host"` +} + +// ClusterTime returns mongo's current cluster time +func GetClusterTime(ctx context.Context, m connect.Client) (primitive.Timestamp, error) { + // Make a read to force the cluster timestamp update. + // Otherwise, cluster timestamp could remain the same between node info reads, + // while in fact time has been moved forward. + err := m.LockCollection().FindOne(ctx, bson.D{}).Err() + if err != nil && !errors.Is(err, mongo.ErrNoDocuments) { + return primitive.Timestamp{}, errors.Wrap(err, "void read") + } + + inf, err := GetNodeInfoExt(ctx, m.MongoClient()) + if err != nil { + return primitive.Timestamp{}, errors.Wrap(err, "get NodeInfo") + } + + if inf.ClusterTime == nil { + return primitive.Timestamp{}, errors.Wrap(err, "no clusterTime in response") + } + + return inf.ClusterTime.ClusterTime, nil +} + +func GetLastWrite(ctx context.Context, m *mongo.Client, majority bool) (primitive.Timestamp, error) { + inf, err := GetNodeInfo(ctx, m) + if err != nil { + return primitive.Timestamp{}, errors.Wrap(err, "get NodeInfo data") + } + lw := inf.LastWrite.MajorityOpTime.TS + if !majority { + lw = inf.LastWrite.OpTime.TS + } + if lw.T == 0 { + return primitive.Timestamp{}, errors.New("last write timestamp is nil") + } + return lw, nil +} + +// ClusterMembers returns list of replicasets current cluster consists of +// (shards + configserver). The list would consist of on rs if cluster is +// a non-sharded rs. +func ClusterMembers(ctx context.Context, m *mongo.Client) ([]Shard, error) { + // it would be a config server in sharded cluster + inf, err := GetNodeInfo(ctx, m) + if err != nil { + return nil, errors.Wrap(err, "define cluster state") + } + + if inf.IsMongos() || inf.IsSharded() { + return getClusterMembersImpl(ctx, m) + } + + shards := []Shard{{ + RS: inf.SetName, + Host: inf.SetName + "/" + strings.Join(inf.Hosts, ","), + }} + return shards, nil +} + +func getClusterMembersImpl(ctx context.Context, m *mongo.Client) ([]Shard, error) { + res := m.Database("admin").RunCommand(ctx, bson.D{{"getShardMap", 1}}) + if err := res.Err(); err != nil { + return nil, errors.Wrap(err, "query") + } + + // the map field is mapping of shard names to replset uri + // if shard name is not set, mongodb will provide unique name for it + // (e.g. the replset name of the shard) + // for configsvr, key name is "config" + var shardMap struct{ Map map[string]string } + if err := res.Decode(&shardMap); err != nil { + return nil, errors.Wrap(err, "decode") + } + + shards := make([]Shard, 0, len(shardMap.Map)) + for id, host := range shardMap.Map { + if id == "" || strings.ContainsAny(id, "/:") { + // till 4.2, map field is like connStrings (added in 4.4) + // and key is uri of the directly (w/o mongos) connected replset + // skip not shard name + continue + } + + rs, _, _ := strings.Cut(host, "/") + shards = append(shards, Shard{ + ID: id, + RS: rs, + Host: host, + }) + } + + return shards, nil +} + +type BalancerMode string + +const ( + BalancerModeOn BalancerMode = "full" + BalancerModeOff BalancerMode = "off" +) + +func (m BalancerMode) String() string { + switch m { + case BalancerModeOn: + return "on" + case BalancerModeOff: + return "off" + default: + return "unknown" + } +} + +type BalancerStatus struct { + Mode BalancerMode `bson:"mode" json:"mode"` + InBalancerRound bool `bson:"inBalancerRound" json:"inBalancerRound"` + NumBalancerRounds int64 `bson:"numBalancerRounds" json:"numBalancerRounds"` + Ok int `bson:"ok" json:"ok"` +} + +func (b *BalancerStatus) IsOn() bool { + return b.Mode == BalancerModeOn +} + +// SetBalancerStatus sets balancer status +func SetBalancerStatus(ctx context.Context, m connect.Client, mode BalancerMode) error { + var cmd string + + switch mode { + case BalancerModeOn: + cmd = "_configsvrBalancerStart" + case BalancerModeOff: + cmd = "_configsvrBalancerStop" + default: + return errors.Errorf("unknown mode %s", mode) + } + + err := m.AdminCommand(ctx, bson.D{{cmd, 1}}).Err() + if err != nil { + return errors.Wrap(err, "run mongo command") + } + return nil +} + +// GetBalancerStatus returns balancer status +func GetBalancerStatus(ctx context.Context, m connect.Client) (*BalancerStatus, error) { + inf := &BalancerStatus{} + err := m.AdminCommand(ctx, bson.D{{"_configsvrBalancerStatus", 1}}).Decode(inf) + if err != nil { + return nil, errors.Wrap(err, "run mongo command") + } + return inf, nil +} diff --git a/pbm/bsontypes.go b/internal/topo/node.go similarity index 66% rename from pbm/bsontypes.go rename to internal/topo/node.go index 01f1cfbfb..cecc15741 100644 --- a/pbm/bsontypes.go +++ b/internal/topo/node.go @@ -1,9 +1,24 @@ -package pbm +package topo import ( "time" + "go.mongodb.org/mongo-driver/bson" "go.mongodb.org/mongo-driver/bson/primitive" + "go.mongodb.org/mongo-driver/mongo" + + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" +) + +// ReplsetRole is a replicaset role in sharded cluster +type ReplsetRole string + +const ( + RoleUnknown ReplsetRole = "unknown" + RoleShard ReplsetRole = "shard" + RoleConfigSrv ReplsetRole = "configsrv" ) type OpTime struct { @@ -19,6 +34,18 @@ type MongoLastWrite struct { MajorityWriteDate time.Time `bson:"majorityWriteDate"` } +type ClusterTime struct { + ClusterTime primitive.Timestamp `bson:"clusterTime"` + Signature struct { + Hash primitive.Binary `bson:"hash"` + KeyID int64 `bson:"keyId"` + } `bson:"signature"` +} + +type ConfigServerState struct { + OpTime *OpTime `bson:"opTime"` +} + // NodeInfo represents the mongo's node info type NodeInfo struct { Hosts []string `bson:"hosts,omitempty"` @@ -47,7 +74,7 @@ type NodeInfo struct { ClusterTime *ClusterTime `bson:"$clusterTime,omitempty"` ConfigServerState *ConfigServerState `bson:"$configServerState,omitempty"` OperationTime *primitive.Timestamp `bson:"operationTime,omitempty"` - opts MongodOpts + Opts MongodOpts `bson:"-"` } // IsSharded returns true is replset is part sharded cluster @@ -57,7 +84,7 @@ func (i *NodeInfo) IsMongos() bool { // IsSharded returns true is replset is part sharded cluster func (i *NodeInfo) IsSharded() bool { - return i.SetName != "" && (i.ConfigServerState != nil || i.opts.Sharding.ClusterRole != "" || i.ConfigSvr == 2) + return i.SetName != "" && (i.ConfigServerState != nil || i.Opts.Sharding.ClusterRole != "" || i.ConfigSvr == 2) } // IsLeader returns true if node can act as backup leader (it's configsrv or non shareded rs) @@ -93,150 +120,6 @@ func (i *NodeInfo) IsStandalone() bool { return i.SetName == "" } -type ClusterTime struct { - ClusterTime primitive.Timestamp `bson:"clusterTime"` - Signature struct { - Hash primitive.Binary `bson:"hash"` - KeyID int64 `bson:"keyId"` - } `bson:"signature"` -} - -type ConfigServerState struct { - OpTime *OpTime `bson:"opTime"` -} - -type Operation string - -const ( - OperationInsert Operation = "i" - OperationNoop Operation = "n" - OperationUpdate Operation = "u" - OperationDelete Operation = "d" - OperationCommand Operation = "c" -) - -type NodeHealth int - -const ( - NodeHealthDown NodeHealth = iota - NodeHealthUp -) - -type NodeState int - -const ( - NodeStateStartup NodeState = iota - NodeStatePrimary - NodeStateSecondary - NodeStateRecovering - NodeStateStartup2 - NodeStateUnknown - NodeStateArbiter - NodeStateDown - NodeStateRollback - NodeStateRemoved -) - -type StatusOpTimes struct { - LastCommittedOpTime *OpTime `bson:"lastCommittedOpTime" json:"lastCommittedOpTime"` - ReadConcernMajorityOpTime *OpTime `bson:"readConcernMajorityOpTime" json:"readConcernMajorityOpTime"` - AppliedOpTime *OpTime `bson:"appliedOpTime" json:"appliedOpTime"` - DurableOptime *OpTime `bson:"durableOpTime" json:"durableOpTime"` -} - -type NodeStatus struct { - ID int `bson:"_id" json:"_id"` - Name string `bson:"name" json:"name"` - Health NodeHealth `bson:"health" json:"health"` - State NodeState `bson:"state" json:"state"` - StateStr string `bson:"stateStr" json:"stateStr"` - Uptime int64 `bson:"uptime" json:"uptime"` - Optime *OpTime `bson:"optime" json:"optime"` - OptimeDate time.Time `bson:"optimeDate" json:"optimeDate"` - ConfigVersion int `bson:"configVersion" json:"configVersion"` - ElectionTime primitive.Timestamp `bson:"electionTime,omitempty" json:"electionTime,omitempty"` - ElectionDate time.Time `bson:"electionDate,omitempty" json:"electionDate,omitempty"` - InfoMessage string `bson:"infoMessage,omitempty" json:"infoMessage,omitempty"` - OptimeDurable *OpTime `bson:"optimeDurable,omitempty" json:"optimeDurable,omitempty"` - OptimeDurableDate time.Time `bson:"optimeDurableDate,omitempty" json:"optimeDurableDate,omitempty"` - LastHeartbeat time.Time `bson:"lastHeartbeat,omitempty" json:"lastHeartbeat,omitempty"` - LastHeartbeatRecv time.Time `bson:"lastHeartbeatRecv,omitempty" json:"lastHeartbeatRecv,omitempty"` - PingMs int64 `bson:"pingMs,omitempty" json:"pingMs,omitempty"` - Self bool `bson:"self,omitempty" json:"self,omitempty"` - SyncingTo string `bson:"syncingTo,omitempty" json:"syncingTo,omitempty"` -} - -type ReplsetStatus struct { - Set string `bson:"set" json:"set"` - Date time.Time `bson:"date" json:"date"` - MyState NodeState `bson:"myState" json:"myState"` - Members []NodeStatus `bson:"members" json:"members"` - Term int64 `bson:"term,omitempty" json:"term,omitempty"` - HeartbeatIntervalMillis int64 `bson:"heartbeatIntervalMillis,omitempty" json:"heartbeatIntervalMillis,omitempty"` //nolint:lll - Optimes *StatusOpTimes `bson:"optimes,omitempty" json:"optimes,omitempty"` - Errmsg string `bson:"errmsg,omitempty" json:"errmsg,omitempty"` - Ok int `bson:"ok" json:"ok"` - ClusterTime *ClusterTime `bson:"$clusterTime,omitempty" json:"$clusterTime,omitempty"` - ConfigServerState *ConfigServerState `bson:"$configServerState,omitempty" json:"$configServerState,omitempty"` - OperationTime *primitive.Timestamp `bson:"operationTime,omitempty" json:"operationTime,omitempty"` -} - -// Shard represent config.shard https://docs.mongodb.com/manual/reference/config-database/#config.shards -// _id may differ from the rs name, so extract rs name from the host (format like "rs2/localhost:27017") -// see https://jira.percona.com/browse/PBM-595 -type Shard struct { - ID string `bson:"_id"` - RS string `bson:"-"` - Host string `bson:"host"` -} - -type ConnectionStatus struct { - AuthInfo AuthInfo `bson:"authInfo" json:"authInfo"` -} - -type AuthInfo struct { - Users []AuthUser `bson:"authenticatedUsers" json:"authenticatedUsers"` - UserRoles []AuthUserRoles `bson:"authenticatedUserRoles" json:"authenticatedUserRoles"` -} - -type AuthUser struct { - User string `bson:"user" json:"user"` - DB string `bson:"db" json:"db"` -} -type AuthUserRoles struct { - Role string `bson:"role" json:"role"` - DB string `bson:"db" json:"db"` -} - -type BalancerMode string - -const ( - BalancerModeOn BalancerMode = "full" - BalancerModeOff BalancerMode = "off" -) - -func (m BalancerMode) String() string { - switch m { - case BalancerModeOn: - return "on" - case BalancerModeOff: - return "off" - default: - return "unknown" - } -} - -type BalancerStatus struct { - Mode BalancerMode `bson:"mode" json:"mode"` - InBalancerRound bool `bson:"inBalancerRound" json:"inBalancerRound"` - NumBalancerRounds int64 `bson:"numBalancerRounds" json:"numBalancerRounds"` - Ok int `bson:"ok" json:"ok"` -} - -func (b *BalancerStatus) IsOn() bool { - return b.Mode == BalancerModeOn -} - type MongodOpts struct { Net struct { BindIP string `bson:"bindIp" json:"bindIp" yaml:"bindIp"` @@ -314,6 +197,47 @@ func (stg *MongodOptsStorage) UnmarshalYAML(unmarshal func(interface{}) error) e return unmarshal((*rawStg)(stg)) } +// GetNodeInfoExt returns mongo node info with mongod options +func GetNodeInfoExt(ctx context.Context, m *mongo.Client) (*NodeInfo, error) { + i, err := GetNodeInfo(ctx, m) + if err != nil { + return nil, errors.Wrap(err, "get NodeInfo") + } + opts, err := GetMongodOpts(ctx, m, nil) + if err != nil { + return nil, errors.Wrap(err, "get mongod options") + } + if opts != nil { + i.Opts = *opts + } + return i, nil +} + +func GetNodeInfo(ctx context.Context, m *mongo.Client) (*NodeInfo, error) { + res := m.Database(defs.DB).RunCommand(ctx, bson.D{{"isMaster", 1}}) + if err := res.Err(); err != nil { + return nil, errors.Wrap(err, "cmd: isMaster") + } + + n := &NodeInfo{} + err := res.Decode(&n) + return n, errors.Wrap(err, "decode") +} + +func GetMongodOpts(ctx context.Context, m *mongo.Client, defaults *MongodOpts) (*MongodOpts, error) { + opts := struct { + Parsed MongodOpts `bson:"parsed" json:"parsed"` + }{} + if defaults != nil { + opts.Parsed = *defaults + } + err := m.Database("admin").RunCommand(ctx, bson.D{{"getCmdLineOpts", 1}}).Decode(&opts) + if err != nil { + return nil, errors.Wrap(err, "run mongo command") + } + return &opts.Parsed, nil +} + //nolint:lll type RSConfig struct { ID string `bson:"_id" json:"_id"` @@ -343,3 +267,27 @@ type RSMember struct { SecondaryDelaySecs int64 `bson:"secondaryDelaySecs,omitempty"` Votes int `bson:"votes" json:"votes"` } + +func GetReplSetConfig(ctx context.Context, m *mongo.Client) (*RSConfig, error) { + res := m.Database("admin").RunCommand(ctx, bson.D{{"replSetGetConfig", 1}}) + if err := res.Err(); err != nil { + return nil, errors.Wrap(err, "run command") + } + + val := struct{ Config *RSConfig }{} + if err := res.Decode(&val); err != nil { + return nil, errors.Wrap(err, "decode") + } + + return val.Config, nil +} + +func ConfSvrConn(ctx context.Context, cn *mongo.Client) (string, error) { + csvr := struct { + URI string `bson:"configsvrConnectionString"` + }{} + err := cn.Database("admin").Collection("system.version"). + FindOne(ctx, bson.D{{"_id", "shardIdentity"}}).Decode(&csvr) + + return csvr.URI, err +} diff --git a/internal/topo/status.go b/internal/topo/status.go new file mode 100644 index 000000000..96143b019 --- /dev/null +++ b/internal/topo/status.go @@ -0,0 +1,53 @@ +package topo + +import ( + "time" + + "go.mongodb.org/mongo-driver/bson/primitive" + + "github.com/percona/percona-backup-mongodb/internal/defs" +) + +type NodeStatus struct { + ID int `bson:"_id" json:"_id"` + Name string `bson:"name" json:"name"` + Health defs.NodeHealth `bson:"health" json:"health"` + State defs.NodeState `bson:"state" json:"state"` + StateStr string `bson:"stateStr" json:"stateStr"` + Uptime int64 `bson:"uptime" json:"uptime"` + Optime *OpTime `bson:"optime" json:"optime"` + OptimeDate time.Time `bson:"optimeDate" json:"optimeDate"` + ConfigVersion int `bson:"configVersion" json:"configVersion"` + ElectionTime primitive.Timestamp `bson:"electionTime,omitempty" json:"electionTime,omitempty"` + ElectionDate time.Time `bson:"electionDate,omitempty" json:"electionDate,omitempty"` + InfoMessage string `bson:"infoMessage,omitempty" json:"infoMessage,omitempty"` + OptimeDurable *OpTime `bson:"optimeDurable,omitempty" json:"optimeDurable,omitempty"` + OptimeDurableDate time.Time `bson:"optimeDurableDate,omitempty" json:"optimeDurableDate,omitempty"` + LastHeartbeat time.Time `bson:"lastHeartbeat,omitempty" json:"lastHeartbeat,omitempty"` + LastHeartbeatRecv time.Time `bson:"lastHeartbeatRecv,omitempty" json:"lastHeartbeatRecv,omitempty"` + PingMs int64 `bson:"pingMs,omitempty" json:"pingMs,omitempty"` + Self bool `bson:"self,omitempty" json:"self,omitempty"` + SyncingTo string `bson:"syncingTo,omitempty" json:"syncingTo,omitempty"` +} + +type StatusOpTimes struct { + LastCommittedOpTime *OpTime `bson:"lastCommittedOpTime" json:"lastCommittedOpTime"` + ReadConcernMajorityOpTime *OpTime `bson:"readConcernMajorityOpTime" json:"readConcernMajorityOpTime"` + AppliedOpTime *OpTime `bson:"appliedOpTime" json:"appliedOpTime"` + DurableOptime *OpTime `bson:"durableOpTime" json:"durableOpTime"` +} + +type ReplsetStatus struct { + Set string `bson:"set" json:"set"` + Date time.Time `bson:"date" json:"date"` + MyState defs.NodeState `bson:"myState" json:"myState"` + Members []NodeStatus `bson:"members" json:"members"` + Term int64 `bson:"term,omitempty" json:"term,omitempty"` + HeartbeatIntervalMillis int64 `bson:"heartbeatIntervalMillis,omitempty" json:"heartbeatIntervalMillis,omitempty"` //nolint:lll + Optimes *StatusOpTimes `bson:"optimes,omitempty" json:"optimes,omitempty"` + Errmsg string `bson:"errmsg,omitempty" json:"errmsg,omitempty"` + Ok int `bson:"ok" json:"ok"` + ClusterTime *ClusterTime `bson:"$clusterTime,omitempty" json:"$clusterTime,omitempty"` + ConfigServerState *ConfigServerState `bson:"$configServerState,omitempty" json:"$configServerState,omitempty"` + OperationTime *primitive.Timestamp `bson:"operationTime,omitempty" json:"operationTime,omitempty"` +} diff --git a/internal/topo/topo.go b/internal/topo/topo.go new file mode 100644 index 000000000..626566f7d --- /dev/null +++ b/internal/topo/topo.go @@ -0,0 +1,210 @@ +package topo + +import ( + "fmt" + "strings" + + "go.mongodb.org/mongo-driver/bson" + "go.mongodb.org/mongo-driver/bson/primitive" + "go.mongodb.org/mongo-driver/mongo" + + "github.com/percona/percona-backup-mongodb/internal/connect" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/version" +) + +func CheckTopoForBackup(ctx context.Context, m connect.Client, type_ defs.BackupType) error { + members, err := ClusterMembers(ctx, m.MongoClient()) + if err != nil { + return errors.Wrap(err, "get cluster members") + } + + ts, err := GetClusterTime(ctx, m) + if err != nil { + return errors.Wrap(err, "get cluster time") + } + + agentList, err := ListAgents(ctx, m) + if err != nil { + return errors.Wrap(err, "list agents") + } + + agents := make(map[string]map[string]AgentStat) + for _, a := range agentList { + if agents[a.RS] == nil { + agents[a.RS] = make(map[string]AgentStat) + } + agents[a.RS][a.Node] = a + } + + return collectTopoCheckErrors(members, agents, ts, type_) +} + +type ( + ReplsetName = string + NodeURI = string +) + +type topoCheckError struct { + Replsets map[ReplsetName]map[NodeURI][]error + Missed []string +} + +func (r topoCheckError) hasError() bool { + return len(r.Missed) != 0 +} + +func (r topoCheckError) Error() string { + if !r.hasError() { + return "" + } + + return fmt.Sprintf("no available agent(s) on replsets: %s", strings.Join(r.Missed, ", ")) +} + +func collectTopoCheckErrors( + replsets []Shard, + agentsByRS map[ReplsetName]map[NodeURI]AgentStat, + ts primitive.Timestamp, + type_ defs.BackupType, +) error { + rv := topoCheckError{ + Replsets: make(map[string]map[NodeURI][]error), + Missed: make([]string, 0), + } + + for _, rs := range replsets { + rsName, uri, _ := strings.Cut(rs.Host, "/") + agents := agentsByRS[rsName] + if len(agents) == 0 { + rv.Missed = append(rv.Missed, rsName) + continue + } + + hosts := strings.Split(uri, ",") + members := make(map[NodeURI][]error, len(hosts)) + anyAvail := false + for _, host := range hosts { + a, ok := agents[host] + if !ok || a.Arbiter || a.Passive { + continue + } + + errs := []error{} + if a.Err != "" { + errs = append(errs, errors.New(a.Err)) + } + if ok, estrs := a.OK(); !ok { + for _, e := range estrs { + errs = append(errs, errors.New(e)) + } + } + + const maxReplicationLag uint32 = 35 + if ts.T-a.Heartbeat.T > maxReplicationLag { + errs = append(errs, errors.New("stale")) + } + if err := version.FeatureSupport(a.MongoVersion()).BackupType(type_); err != nil { + errs = append(errs, errors.Wrap(err, "unsupported backup type")) + } + + members[host] = errs + if len(errs) == 0 { + anyAvail = true + } + } + + rv.Replsets[rsName] = members + + if !anyAvail { + rv.Missed = append(rv.Missed, rsName) + } + } + + if rv.hasError() { + return rv + } + + return nil +} + +const maxReplicationLagTimeSec = 21 + +// NodeSuits checks if node can perform backup +func NodeSuits(ctx context.Context, m *mongo.Client, inf *NodeInfo) (bool, error) { + status, err := Status(ctx, m, inf.Me) + if err != nil { + return false, errors.Wrap(err, "get node status") + } + + replLag, err := ReplicationLag(ctx, m, inf.Me) + if err != nil { + return false, errors.Wrap(err, "get node replication lag") + } + + return replLag < maxReplicationLagTimeSec && status.Health == defs.NodeHealthUp && + (status.State == defs.NodeStatePrimary || status.State == defs.NodeStateSecondary), + nil +} + +func NodeSuitsExt(ctx context.Context, m *mongo.Client, inf *NodeInfo, t defs.BackupType) (bool, error) { + if ok, err := NodeSuits(ctx, m, inf); err != nil || !ok { + return false, err + } + + ver, err := version.GetMongoVersion(ctx, m) + if err != nil { + return false, errors.Wrap(err, "get mongo version") + } + + err = version.FeatureSupport(ver).BackupType(t) + return err == nil, err +} + +func Status(ctx context.Context, m *mongo.Client, self string) (*NodeStatus, error) { + s, err := GetReplsetStatus(ctx, m) + if err != nil { + return nil, errors.Wrap(err, "get replset status") + } + + for _, m := range s.Members { + if m.Name == self { + return &m, nil + } + } + + return nil, errors.ErrNotFound +} + +// GetReplsetStatus returns `replSetGetStatus` for the given connection +func GetReplsetStatus(ctx context.Context, m *mongo.Client) (*ReplsetStatus, error) { + status := &ReplsetStatus{} + err := m.Database("admin").RunCommand(ctx, bson.D{{"replSetGetStatus", 1}}).Decode(status) + if err != nil { + return nil, errors.Wrap(err, "query adminCommand: replSetGetStatus") + } + + return status, nil +} + +// ReplicationLag returns node replication lag in seconds +func ReplicationLag(ctx context.Context, m *mongo.Client, self string) (int, error) { + s, err := GetReplsetStatus(ctx, m) + if err != nil { + return -1, errors.Wrap(err, "get replset status") + } + + var primaryOptime, nodeOptime int + for _, m := range s.Members { + if m.Name == self { + nodeOptime = int(m.Optime.TS.T) + } + if m.StateStr == "PRIMARY" { + primaryOptime = int(m.Optime.TS.T) + } + } + + return primaryOptime - nodeOptime, nil +} diff --git a/internal/types/backup.go b/internal/types/backup.go new file mode 100644 index 000000000..3186f9500 --- /dev/null +++ b/internal/types/backup.go @@ -0,0 +1,145 @@ +package types + +import ( + "fmt" + "io" + "os" + + "go.mongodb.org/mongo-driver/bson/primitive" + + "github.com/percona/percona-backup-mongodb/internal/config" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/topo" +) + +// BackupMeta is a backup's metadata +type BackupMeta struct { + Type defs.BackupType `bson:"type" json:"type"` + OPID string `bson:"opid" json:"opid"` + Name string `bson:"name" json:"name"` + + // SrcBackup is the source for the incremental backups. The souce might be + // incremental as well. + // Empty means this is a full backup (and a base for further incremental bcps). + SrcBackup string `bson:"src_backup,omitempty" json:"src_backup,omitempty"` + + // ShardRemap is map of replset to shard names. + // If shard name is different from replset name, it will be stored in the map. + // If all shard names are the same as their replset names, the map is nil. + ShardRemap map[string]string `bson:"shardRemap,omitempty" json:"shardRemap,omitempty"` + + Namespaces []string `bson:"nss,omitempty" json:"nss,omitempty"` + Replsets []BackupReplset `bson:"replsets" json:"replsets"` + Compression defs.CompressionType `bson:"compression" json:"compression"` + Store config.StorageConf `bson:"store" json:"store"` + Size int64 `bson:"size" json:"size"` + MongoVersion string `bson:"mongodb_version" json:"mongodb_version,omitempty"` + FCV string `bson:"fcv" json:"fcv"` + StartTS int64 `bson:"start_ts" json:"start_ts"` + LastTransitionTS int64 `bson:"last_transition_ts" json:"last_transition_ts"` + FirstWriteTS primitive.Timestamp `bson:"first_write_ts" json:"first_write_ts"` + LastWriteTS primitive.Timestamp `bson:"last_write_ts" json:"last_write_ts"` + Hb primitive.Timestamp `bson:"hb" json:"hb"` + Status defs.Status `bson:"status" json:"status"` + Conditions []Condition `bson:"conditions" json:"conditions"` + Nomination []BackupRsNomination `bson:"n" json:"n"` + Err string `bson:"error,omitempty" json:"error,omitempty"` + PBMVersion string `bson:"pbm_version,omitempty" json:"pbm_version,omitempty"` + BalancerStatus topo.BalancerMode `bson:"balancer" json:"balancer"` + runtimeError error +} + +func (b *BackupMeta) Error() error { + switch { + case b.runtimeError != nil: + return b.runtimeError + case b.Err != "": + return errors.New(b.Err) + default: + return nil + } +} + +func (b *BackupMeta) SetRuntimeError(err error) { + b.runtimeError = err + b.Status = defs.StatusError +} + +// RS returns the metadata of the replset with given name. +// It returns nil if no replset found. +func (b *BackupMeta) RS(name string) *BackupReplset { + for _, rs := range b.Replsets { + if rs.Name == name { + return &rs + } + } + return nil +} + +// BackupRsNomination is used to choose (nominate and elect) nodes for the backup +// within a replica set +type BackupRsNomination struct { + RS string `bson:"rs" json:"rs"` + Nodes []string `bson:"n" json:"n"` + Ack string `bson:"ack" json:"ack"` +} + +type Condition struct { + Timestamp int64 `bson:"timestamp" json:"timestamp"` + Status defs.Status `bson:"status" json:"status"` + Error string `bson:"error,omitempty" json:"error,omitempty"` +} + +type BackupReplset struct { + Name string `bson:"name" json:"name"` + // Journal is not used. left for backward compatibility + Journal []File `bson:"journal,omitempty" json:"journal,omitempty"` + Files []File `bson:"files,omitempty" json:"files,omitempty"` + DumpName string `bson:"dump_name,omitempty" json:"backup_name,omitempty"` + OplogName string `bson:"oplog_name,omitempty" json:"oplog_name,omitempty"` + StartTS int64 `bson:"start_ts" json:"start_ts"` + Status defs.Status `bson:"status" json:"status"` + IsConfigSvr *bool `bson:"iscs,omitempty" json:"iscs,omitempty"` + LastTransitionTS int64 `bson:"last_transition_ts" json:"last_transition_ts"` + FirstWriteTS primitive.Timestamp `bson:"first_write_ts" json:"first_write_ts"` + LastWriteTS primitive.Timestamp `bson:"last_write_ts" json:"last_write_ts"` + Node string `bson:"node" json:"node"` // node that performed backup + Error string `bson:"error,omitempty" json:"error,omitempty"` + Conditions []Condition `bson:"conditions" json:"conditions"` + MongodOpts *topo.MongodOpts `bson:"mongod_opts,omitempty" json:"mongod_opts,omitempty"` + + // CustomThisID is customized thisBackupName value for $backupCursor (in WT: "this_id"). + // If it is not set (empty), the default value was used. + CustomThisID string `bson:"this_id,omitempty" json:"this_id,omitempty"` +} + +type File struct { + Name string `bson:"filename" json:"filename"` + Off int64 `bson:"offset" json:"offset"` // offset for incremental backups + Len int64 `bson:"length" json:"length"` // length of chunk after the offset + Size int64 `bson:"fileSize" json:"fileSize"` + StgSize int64 `bson:"stgSize" json:"stgSize"` + Fmode os.FileMode `bson:"fmode" json:"fmode"` +} + +func (f File) String() string { + if f.Off == 0 && f.Len == 0 { + return f.Name + } + return fmt.Sprintf("%s [%d:%d]", f.Name, f.Off, f.Len) +} + +func (f *File) WriteTo(w io.Writer) (int64, error) { + fd, err := os.Open(f.Name) + if err != nil { + return 0, errors.Wrap(err, "open file for reading") + } + defer fd.Close() + + if f.Len == 0 && f.Off == 0 { + return io.Copy(w, fd) + } + + return io.Copy(w, io.NewSectionReader(fd, f.Off, f.Len)) +} diff --git a/internal/types/cmd.go b/internal/types/cmd.go new file mode 100644 index 000000000..18926f455 --- /dev/null +++ b/internal/types/cmd.go @@ -0,0 +1,142 @@ +package types + +import ( + "bytes" + "fmt" + "strconv" + + "go.mongodb.org/mongo-driver/bson/primitive" + + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/topo" +) + +type OPID primitive.ObjectID + +func OPIDfromStr(s string) (OPID, error) { + o, err := primitive.ObjectIDFromHex(s) + if err != nil { + return OPID(primitive.NilObjectID), err + } + return OPID(o), nil +} + +func NilOPID() OPID { return OPID(primitive.NilObjectID) } + +func (o OPID) String() string { + return primitive.ObjectID(o).Hex() +} + +func (o OPID) Obj() primitive.ObjectID { + return primitive.ObjectID(o) +} + +type Cmd struct { + Cmd defs.Command `bson:"cmd"` + Backup *BackupCmd `bson:"backup,omitempty"` + Restore *RestoreCmd `bson:"restore,omitempty"` + Replay *ReplayCmd `bson:"replay,omitempty"` + Delete *DeleteBackupCmd `bson:"delete,omitempty"` + DeletePITR *DeletePITRCmd `bson:"deletePitr,omitempty"` + Cleanup *CleanupCmd `bson:"cleanup,omitempty"` + TS int64 `bson:"ts"` + OPID OPID `bson:"-"` +} + +func (c Cmd) String() string { + var buf bytes.Buffer + + buf.WriteString(string(c.Cmd)) + switch c.Cmd { + case defs.CmdBackup: + buf.WriteString(" [") + buf.WriteString(c.Backup.String()) + buf.WriteString("]") + case defs.CmdRestore: + buf.WriteString(" [") + buf.WriteString(c.Restore.String()) + buf.WriteString("]") + } + buf.WriteString(" ") + return buf.String() +} + +type BackupCmd struct { + Type defs.BackupType `bson:"type"` + IncrBase bool `bson:"base"` + Name string `bson:"name"` + Namespaces []string `bson:"nss,omitempty"` + Compression defs.CompressionType `bson:"compression"` + CompressionLevel *int `bson:"level,omitempty"` +} + +func (b BackupCmd) String() string { + var level string + if b.CompressionLevel == nil { + level = "default" + } else { + level = strconv.Itoa(*b.CompressionLevel) + } + return fmt.Sprintf("name: %s, compression: %s (level: %s)", b.Name, b.Compression, level) +} + +type RestoreCmd struct { + Name string `bson:"name"` + BackupName string `bson:"backupName"` + Namespaces []string `bson:"nss,omitempty"` + RSMap map[string]string `bson:"rsMap,omitempty"` + + OplogTS primitive.Timestamp `bson:"oplogTS,omitempty"` + + External bool `bson:"external"` + ExtConf topo.ExternOpts `bson:"extConf"` + ExtTS primitive.Timestamp `bson:"extTS"` +} + +func (r RestoreCmd) String() string { + bcp := "" + if r.BackupName != "" { + bcp = "snapshot: " + r.BackupName + } + if r.External { + bcp += "[external]" + } + if r.ExtTS.T > 0 { + bcp += fmt.Sprintf(" external ts: <%d,%d>", r.ExtTS.T, r.ExtTS.I) + } + if r.OplogTS.T > 0 { + bcp += fmt.Sprintf(" point-in-time: <%d,%d>", r.OplogTS.T, r.OplogTS.I) + } + + return fmt.Sprintf("name: %s, %s", r.Name, bcp) +} + +type ReplayCmd struct { + Name string `bson:"name"` + Start primitive.Timestamp `bson:"start,omitempty"` + End primitive.Timestamp `bson:"end,omitempty"` + RSMap map[string]string `bson:"rsMap,omitempty"` +} + +func (c ReplayCmd) String() string { + return fmt.Sprintf("name: %s, time: %d - %d", c.Name, c.Start, c.End) +} + +type DeleteBackupCmd struct { + Backup string `bson:"backup"` + OlderThan int64 `bson:"olderthan"` +} + +type DeletePITRCmd struct { + OlderThan int64 `bson:"olderthan"` +} + +type CleanupCmd struct { + OlderThan primitive.Timestamp `bson:"olderThan"` +} + +func (d DeleteBackupCmd) String() string { + return fmt.Sprintf("backup: %s, older than: %d", d.Backup, d.OlderThan) +} diff --git a/internal/types/restore.go b/internal/types/restore.go new file mode 100644 index 000000000..03bacd7bf --- /dev/null +++ b/internal/types/restore.go @@ -0,0 +1,153 @@ +package types + +import ( + "bytes" + "fmt" + "sort" + "strconv" + + "github.com/mongodb/mongo-tools/common/db" + "go.mongodb.org/mongo-driver/bson/primitive" + + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/storage/s3" +) + +type RestoreMeta struct { + Status defs.Status `bson:"status" json:"status"` + Error string `bson:"error,omitempty" json:"error,omitempty"` + Name string `bson:"name" json:"name"` + OPID string `bson:"opid" json:"opid"` + Backup string `bson:"backup" json:"backup"` + BcpChain []string `bson:"bcp_chain" json:"bcp_chain"` // for incremental + Namespaces []string `bson:"nss,omitempty" json:"nss,omitempty"` + StartPITR int64 `bson:"start_pitr" json:"start_pitr"` + PITR int64 `bson:"pitr" json:"pitr"` + Replsets []RestoreReplset `bson:"replsets" json:"replsets"` + Hb primitive.Timestamp `bson:"hb" json:"hb"` + StartTS int64 `bson:"start_ts" json:"start_ts"` + LastTransitionTS int64 `bson:"last_transition_ts" json:"last_transition_ts"` + Conditions Conditions `bson:"conditions" json:"conditions"` + Type defs.BackupType `bson:"type" json:"type"` + Leader string `bson:"l,omitempty" json:"l,omitempty"` + Stat *RestoreStat `bson:"stat,omitempty" json:"stat,omitempty"` +} + +type RestoreStat struct { + RS map[string]map[string]RestoreRSMetrics `bson:"rs,omitempty" json:"rs,omitempty"` +} +type RestoreRSMetrics struct { + DistTxn DistTxnStat `bson:"txn,omitempty" json:"txn,omitempty"` + Download s3.DownloadStat `bson:"download,omitempty" json:"download,omitempty"` +} + +type DistTxnStat struct { + // Partial is the num of transactions that were allied on other shards + // but can't be applied on this one since not all prepare messages got + // into the oplog (shouldn't happen). + Partial int `bson:"partial" json:"partial"` + // ShardUncommitted is the number of uncommitted transactions before + // the sync. Basically, the transaction is full but no commit message + // in the oplog of this shard. + ShardUncommitted int `bson:"shard_uncommitted" json:"shard_uncommitted"` + // LeftUncommitted is the num of transactions that remain uncommitted + // after the sync. The transaction is full but no commit message in the + // oplog of any shard. + LeftUncommitted int `bson:"left_uncommitted" json:"left_uncommitted"` +} + +type RestoreShardStat struct { + Txn DistTxnStat `json:"txn"` + D *s3.DownloadStat `json:"d"` +} + +type RestoreReplset struct { + Name string `bson:"name" json:"name"` + StartTS int64 `bson:"start_ts" json:"start_ts"` + Status defs.Status `bson:"status" json:"status"` + CommittedTxn []RestoreTxn `bson:"committed_txn" json:"committed_txn"` + CommittedTxnSet bool `bson:"txn_set" json:"txn_set"` + PartialTxn []db.Oplog `bson:"partial_txn" json:"partial_txn"` + CurrentOp primitive.Timestamp `bson:"op" json:"op"` + LastTransitionTS int64 `bson:"last_transition_ts" json:"last_transition_ts"` + LastWriteTS primitive.Timestamp `bson:"last_write_ts" json:"last_write_ts"` + Nodes []RestoreNode `bson:"nodes,omitempty" json:"nodes,omitempty"` + Error string `bson:"error,omitempty" json:"error,omitempty"` + Conditions Conditions `bson:"conditions" json:"conditions"` + Hb primitive.Timestamp `bson:"hb" json:"hb"` + Stat RestoreShardStat `bson:"stat" json:"stat"` +} + +type Conditions []*Condition + +func (b Conditions) Len() int { return len(b) } +func (b Conditions) Less(i, j int) bool { return b[i].Timestamp < b[j].Timestamp } +func (b Conditions) Swap(i, j int) { b[i], b[j] = b[j], b[i] } + +// Insert keeps conditions asc sorted by Timestamp +func (b *Conditions) Insert(c *Condition) { + i := sort.Search(len(*b), func(i int) bool { return []*Condition(*b)[i].Timestamp >= c.Timestamp }) + *b = append(*b, &Condition{}) + copy([]*Condition(*b)[i+1:], []*Condition(*b)[i:]) + []*Condition(*b)[i] = c +} + +type RestoreNode struct { + Name string `bson:"name" json:"name"` + Status defs.Status `bson:"status" json:"status"` + LastTransitionTS int64 `bson:"last_transition_ts" json:"last_transition_ts"` + Error string `bson:"error,omitempty" json:"error,omitempty"` + Conditions Conditions `bson:"conditions" json:"conditions"` + Hb primitive.Timestamp `bson:"hb" json:"hb"` +} + +type TxnState string + +const ( + TxnCommit TxnState = "commit" + TxnPrepare TxnState = "prepare" + TxnAbort TxnState = "abort" + TxnUnknown TxnState = "" +) + +type RestoreTxn struct { + ID string `bson:"id" json:"id"` + Ctime primitive.Timestamp `bson:"ts" json:"ts"` // commit timestamp of the transaction + State TxnState `bson:"state" json:"state"` +} + +func (t RestoreTxn) Encode() []byte { + return []byte(fmt.Sprintf("txn:%d,%d:%s:%s", t.Ctime.T, t.Ctime.I, t.ID, t.State)) +} + +func (t *RestoreTxn) Decode(b []byte) error { + for k, v := range bytes.SplitN(bytes.TrimSpace(b), []byte{':'}, 4) { + switch k { + case 0: + case 1: + if si := bytes.SplitN(v, []byte{','}, 2); len(si) == 2 { + tt, err := strconv.ParseInt(string(si[0]), 10, 64) + if err != nil { + return errors.Wrap(err, "parse clusterTime T") + } + ti, err := strconv.ParseInt(string(si[1]), 10, 64) + if err != nil { + return errors.Wrap(err, "parse clusterTime I") + } + + t.Ctime = primitive.Timestamp{T: uint32(tt), I: uint32(ti)} + } + case 2: + t.ID = string(v) + case 3: + t.State = TxnState(string(v)) + } + } + + return nil +} + +func (t RestoreTxn) String() string { + return fmt.Sprintf("<%s> [%s] %v", t.ID, t.State, t.Ctime) +} diff --git a/internal/types/types.go b/internal/types/types.go new file mode 100644 index 000000000..1b1447c55 --- /dev/null +++ b/internal/types/types.go @@ -0,0 +1,20 @@ +package types + +type ConnectionStatus struct { + AuthInfo AuthInfo `bson:"authInfo" json:"authInfo"` +} + +type AuthInfo struct { + Users []AuthUser `bson:"authenticatedUsers" json:"authenticatedUsers"` + UserRoles []AuthUserRoles `bson:"authenticatedUserRoles" json:"authenticatedUserRoles"` +} + +type AuthUser struct { + User string `bson:"user" json:"user"` + DB string `bson:"db" json:"db"` +} + +type AuthUserRoles struct { + Role string `bson:"role" json:"role"` + DB string `bson:"db" json:"db"` +} diff --git a/pbm/rs_map.go b/internal/util/rs_map.go similarity index 97% rename from pbm/rs_map.go rename to internal/util/rs_map.go index 5371127ea..04459404c 100644 --- a/pbm/rs_map.go +++ b/internal/util/rs_map.go @@ -1,4 +1,4 @@ -package pbm +package util type RSMapFunc func(string) string diff --git a/pbm/sel/sel.go b/internal/util/sel.go similarity index 96% rename from pbm/sel/sel.go rename to internal/util/sel.go index 17aa576ad..94678f086 100644 --- a/pbm/sel/sel.go +++ b/internal/util/sel.go @@ -1,4 +1,4 @@ -package sel +package util import ( "encoding/hex" @@ -7,7 +7,7 @@ import ( "go.mongodb.org/mongo-driver/bson" "go.mongodb.org/mongo-driver/bson/primitive" - "github.com/percona/percona-backup-mongodb/pbm/archive" + "github.com/percona/percona-backup-mongodb/internal/archive" ) func IsSelective(ids []string) bool { diff --git a/pbm/sel/sel_test.go b/internal/util/sel_test.go similarity index 88% rename from pbm/sel/sel_test.go rename to internal/util/sel_test.go index ec99abc63..56b604723 100644 --- a/pbm/sel/sel_test.go +++ b/internal/util/sel_test.go @@ -1,10 +1,10 @@ -package sel_test +package util_test import ( "reflect" "testing" - "github.com/percona/percona-backup-mongodb/pbm/sel" + "github.com/percona/percona-backup-mongodb/internal/util" ) func TestSelectedPred(t *testing.T) { @@ -32,7 +32,7 @@ func TestSelectedPred(t *testing.T) { } for _, c := range cases { - s := sel.MakeSelectedPred(c.s) + s := util.MakeSelectedPred(c.s) r := []string{} for _, ns := range nss { if s(ns) { diff --git a/internal/util/storage.go b/internal/util/storage.go new file mode 100644 index 000000000..ff3f52bb6 --- /dev/null +++ b/internal/util/storage.go @@ -0,0 +1,46 @@ +package util + +import ( + "github.com/percona/percona-backup-mongodb/internal/config" + "github.com/percona/percona-backup-mongodb/internal/connect" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/log" + "github.com/percona/percona-backup-mongodb/internal/storage" + "github.com/percona/percona-backup-mongodb/internal/storage/azure" + "github.com/percona/percona-backup-mongodb/internal/storage/blackhole" + "github.com/percona/percona-backup-mongodb/internal/storage/fs" + "github.com/percona/percona-backup-mongodb/internal/storage/s3" +) + +// ErrStorageUndefined is an error for undefined storage +var ErrStorageUndefined = errors.New("storage undefined") + +// StorageFromConfig creates and returns a storage object based on a given config +func StorageFromConfig(c config.Config, l *log.Event) (storage.Storage, error) { + switch c.Storage.Type { + case storage.S3: + return s3.New(c.Storage.S3, l) + case storage.Azure: + return azure.New(c.Storage.Azure, l) + case storage.Filesystem: + return fs.New(c.Storage.Filesystem) + case storage.BlackHole: + return blackhole.New(), nil + case storage.Undef: + return nil, ErrStorageUndefined + default: + return nil, errors.Errorf("unknown storage type %s", c.Storage.Type) + } +} + +// GetStorage reads current storage config and creates and +// returns respective storage.Storage object +func GetStorage(ctx context.Context, m connect.Client, l *log.Event) (storage.Storage, error) { + c, err := config.GetConfig(ctx, m) + if err != nil { + return nil, errors.Wrap(err, "get config") + } + + return StorageFromConfig(c, l) +} diff --git a/internal/version/version.go b/internal/version/version.go new file mode 100644 index 000000000..a4b4c1566 --- /dev/null +++ b/internal/version/version.go @@ -0,0 +1,276 @@ +package version + +import ( + "encoding/json" + "fmt" + "runtime" + + "go.mongodb.org/mongo-driver/bson" + "go.mongodb.org/mongo-driver/mongo" + "golang.org/x/mod/semver" + + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" +) + +// current PBM version +const version = "2.3.0-next" + +var ( + platform string + gitCommit string + gitBranch string + buildTime string +) + +type Info struct { //nolint:musttag + Version string + Platform string + GitCommit string + GitBranch string + BuildTime string + GoVersion string +} + +const plain = `Version: %s +Platform: %s +GitCommit: %s +GitBranch: %s +BuildTime: %s +GoVersion: %s` + +func Current() Info { + v := Info{ + Version: version, + Platform: platform, + GitCommit: gitCommit, + GitBranch: gitBranch, + BuildTime: buildTime, + GoVersion: runtime.Version(), + } + if v.Platform == "" { + v.Platform = fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH) + } + + return v +} + +func (i Info) String() string { + return fmt.Sprintf(plain, + i.Version, + i.Platform, + i.GitCommit, + i.GitBranch, + i.BuildTime, + i.GoVersion, + ) +} + +func (i Info) Short() string { + return i.Version +} + +func (i Info) All(format string) string { + switch format { + case "": + return fmt.Sprintf(plain, + i.Version, + i.Platform, + i.GitCommit, + i.GitBranch, + i.BuildTime, + i.GoVersion, + ) + case "json": + v, _ := json.MarshalIndent(i, "", " ") //nolint:errchkjson + return string(v) + default: + return fmt.Sprintf("%#v", i) + } +} + +// CompatibleWith checks if a given version is compatible the current one. It +// is not compatible if the current is crossed the breaking ponit +// (version >= breakingVersion) and the given isn't (v < breakingVersion) +func CompatibleWith(v string, breakingv []string) bool { + return compatible(version, v, breakingv) +} + +func compatible(v1, v2 string, breakingv []string) bool { + if len(breakingv) == 0 { + return true + } + + v1 = majmin(v1) + v2 = majmin(v2) + + c := semver.Compare(v2, v1) + if c == 0 { + return true + } + + hV, lV := v1, v2 + if c == 1 { + hV, lV = lV, hV + } + + for i := len(breakingv) - 1; i >= 0; i-- { + cb := majmin(breakingv[i]) + if semver.Compare(hV, cb) >= 0 { + return semver.Compare(lV, cb) >= 0 + } + } + + return true +} + +func majmin(v string) string { + if len(v) == 0 { + return v + } + + if v[0] != 'v' { + v = "v" + v + } + + return semver.MajorMinor(v) +} + +func IsLegacyArchive(ver string) bool { + return semver.Compare(majmin(ver), "v2.0") == -1 +} + +// BreakingChangesMap map of versions introduced breaking changes to respective +// backup defs. +// !!! Versions should be sorted in the ascending order. +var BreakingChangesMap = map[defs.BackupType][]string{ + defs.LogicalBackup: {"1.5.0"}, + defs.IncrementalBackup: {"2.1.0"}, + defs.PhysicalBackup: {}, +} + +type MongoVersion struct { + PSMDBVersion string `bson:"psmdbVersion,omitempty"` + VersionString string `bson:"version"` + Version []int `bson:"versionArray"` +} + +func (v MongoVersion) Major() int { + if len(v.Version) == 0 { + return 0 + } + + return v.Version[0] +} + +func GetMongoVersion(ctx context.Context, m *mongo.Client) (MongoVersion, error) { + res := m.Database("admin").RunCommand(ctx, bson.D{{"buildInfo", 1}}) + if err := res.Err(); err != nil { + return MongoVersion{}, err + } + + var ver MongoVersion + if err := res.Decode(&ver); err != nil { + return MongoVersion{}, err + } + + return ver, nil +} + +type FeatureSupport MongoVersion + +func (f FeatureSupport) PBMSupport() error { + v := MongoVersion(f) + + if v.Version[0] == 4 && v.Version[1] == 4 { + return nil + } + + if (v.Version[0] == 5 || v.Version[0] == 6) && v.Version[1] == 0 { + return nil + } + + return errors.New("Unsupported MongoDB version. PBM works with v4.4, v5.0, v6.0") +} + +func (f FeatureSupport) FullPhysicalBackup() bool { + // PSMDB 4.2.15, 4.4.6 + v := MongoVersion(f) + if v.PSMDBVersion == "" { + return false + } + + switch { + case v.Version[0] == 4 && v.Version[1] == 2 && v.Version[2] >= 15: + fallthrough + case v.Version[0] == 4 && v.Version[1] == 4 && v.Version[2] >= 6: + fallthrough + case v.Version[0] >= 5: + return true + } + + return false +} + +func (f FeatureSupport) IncrementalPhysicalBackup() bool { + // PSMDB 4.2.24, 4.4.18, 5.0.14, 6.0.3 + v := MongoVersion(f) + if v.PSMDBVersion == "" { + return false + } + + switch { + case v.Version[0] == 4 && v.Version[1] == 2 && v.Version[2] >= 24: + fallthrough + case v.Version[0] == 4 && v.Version[1] == 4 && v.Version[2] >= 18: + fallthrough + case v.Version[0] == 5 && v.Version[1] == 0 && v.Version[2] >= 14: + fallthrough + case v.Version[0] == 6 && v.Version[1] == 0 && v.Version[2] >= 3: + fallthrough + case v.Version[0] >= 7: + return true + } + + return false +} + +func (f FeatureSupport) BackupType(t defs.BackupType) error { + switch t { + case defs.PhysicalBackup: + if !f.FullPhysicalBackup() { + return errors.New("full physical backup works since " + + "Percona Server for MongoDB 4.2.15, 4.4.6") + } + case defs.IncrementalBackup: + if !f.IncrementalPhysicalBackup() { + return errors.New("incremental physical backup works since " + + "Percona Server for MongoDB 4.2.24, 4.4.18, 5.0.14, 6.0.3") + } + case defs.ExternalBackup: + if !f.FullPhysicalBackup() { + return errors.New("external backup works since " + + "Percona Server for MongoDB 4.2.15, 4.4.6") + } + } + + return nil +} + +func GetFCV(ctx context.Context, m *mongo.Client) (string, error) { + res := m.Database("admin").RunCommand(ctx, bson.D{ + {"getParameter", 1}, + {"featureCompatibilityVersion", 1}, + }) + if err := res.Err(); err != nil { + return "", errors.Wrap(err, "query") + } + + var ver struct{ FeatureCompatibilityVersion struct{ Version string } } + if err := res.Decode(&ver); err != nil { + return "", errors.Wrap(err, "decode") + } + + return ver.FeatureCompatibilityVersion.Version, nil +} diff --git a/version/version_test.go b/internal/version/version_test.go similarity index 100% rename from version/version_test.go rename to internal/version/version_test.go diff --git a/pbm/backup/backup.go b/pbm/backup/backup.go index b90b87f56..10398ffe2 100644 --- a/pbm/backup/backup.go +++ b/pbm/backup/backup.go @@ -2,35 +2,40 @@ package backup import ( "bytes" - "context" "encoding/json" - "io" "time" - "github.com/pkg/errors" "go.mongodb.org/mongo-driver/bson/primitive" "go.mongodb.org/mongo-driver/mongo" + "github.com/percona/percona-backup-mongodb/internal/config" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/lock" + "github.com/percona/percona-backup-mongodb/internal/log" + "github.com/percona/percona-backup-mongodb/internal/query" + "github.com/percona/percona-backup-mongodb/internal/storage" + "github.com/percona/percona-backup-mongodb/internal/topo" + "github.com/percona/percona-backup-mongodb/internal/types" + "github.com/percona/percona-backup-mongodb/internal/util" + "github.com/percona/percona-backup-mongodb/internal/version" "github.com/percona/percona-backup-mongodb/pbm" - "github.com/percona/percona-backup-mongodb/pbm/compress" - plog "github.com/percona/percona-backup-mongodb/pbm/log" - "github.com/percona/percona-backup-mongodb/pbm/storage" - "github.com/percona/percona-backup-mongodb/version" ) type Backup struct { cn *pbm.PBM node *pbm.Node - typ pbm.BackupType + typ defs.BackupType incrBase bool - timeouts *pbm.BackupTimeouts + timeouts *config.BackupTimeouts } func New(cn *pbm.PBM, node *pbm.Node) *Backup { return &Backup{ cn: cn, node: node, - typ: pbm.LogicalBackup, + typ: defs.LogicalBackup, } } @@ -38,7 +43,7 @@ func NewPhysical(cn *pbm.PBM, node *pbm.Node) *Backup { return &Backup{ cn: cn, node: node, - typ: pbm.PhysicalBackup, + typ: defs.PhysicalBackup, } } @@ -46,7 +51,7 @@ func NewExternal(cn *pbm.PBM, node *pbm.Node) *Backup { return &Backup{ cn: cn, node: node, - typ: pbm.ExternalBackup, + typ: defs.ExternalBackup, } } @@ -54,28 +59,30 @@ func NewIncremental(cn *pbm.PBM, node *pbm.Node, base bool) *Backup { return &Backup{ cn: cn, node: node, - typ: pbm.IncrementalBackup, + typ: defs.IncrementalBackup, incrBase: base, } } -func (b *Backup) SetTimeouts(t *pbm.BackupTimeouts) { +func (b *Backup) SetTimeouts(t *config.BackupTimeouts) { b.timeouts = t } func (b *Backup) Init( - bcp *pbm.BackupCmd, - opid pbm.OPID, - inf *pbm.NodeInfo, - store pbm.StorageConf, - balancer pbm.BalancerMode, + ctx context.Context, + bcp *types.BackupCmd, + opid types.OPID, + inf *topo.NodeInfo, + store config.StorageConf, + balancer topo.BalancerMode, + l *log.Event, ) error { - ts, err := b.cn.ClusterTime() + ts, err := topo.GetClusterTime(ctx, b.cn.Conn) if err != nil { return errors.Wrap(err, "read cluster time") } - meta := &pbm.BackupMeta{ + meta := &types.BackupMeta{ Type: b.typ, OPID: opid.String(), Name: bcp.Name, @@ -83,42 +90,44 @@ func (b *Backup) Init( Compression: bcp.Compression, Store: store, StartTS: time.Now().Unix(), - Status: pbm.StatusStarting, - Replsets: []pbm.BackupReplset{}, + Status: defs.StatusStarting, + Replsets: []types.BackupReplset{}, // the driver (mongo?) sets TS to the current wall clock if TS was 0, so have to init with 1 LastWriteTS: primitive.Timestamp{T: 1, I: 1}, // the driver (mongo?) sets TS to the current wall clock if TS was 0, so have to init with 1 FirstWriteTS: primitive.Timestamp{T: 1, I: 1}, PBMVersion: version.Current().Version, - Nomination: []pbm.BackupRsNomination{}, + Nomination: []types.BackupRsNomination{}, BalancerStatus: balancer, Hb: ts, } - cfg, err := b.cn.GetConfig() - if errors.Is(err, pbm.ErrStorageUndefined) { - return errors.New("backups cannot be saved because PBM storage configuration hasn't been set yet") - } else if err != nil { + cfg, err := config.GetConfig(ctx, b.cn.Conn) + if err != nil { return errors.Wrap(err, "unable to get PBM config settings") } + _, err = util.StorageFromConfig(cfg, l) + if errors.Is(err, util.ErrStorageUndefined) { + return errors.New("backups cannot be saved because PBM storage configuration hasn't been set yet") + } meta.Store = cfg.Storage - ver, err := b.node.GetMongoVersion() + ver, err := version.GetMongoVersion(ctx, b.node.Session()) if err != nil { - return errors.WithMessage(err, "get mongo version") + return errors.Wrap(err, "get mongo version") } meta.MongoVersion = ver.VersionString - fcv, err := b.node.GetFeatureCompatibilityVersion() + fcv, err := version.GetFCV(ctx, b.node.Session()) if err != nil { - return errors.WithMessage(err, "get featureCompatibilityVersion") + return errors.Wrap(err, "get featureCompatibilityVersion") } meta.FCV = fcv if inf.IsSharded() { - ss, err := b.cn.GetShards() + ss, err := b.cn.GetShards(ctx) if err != nil { - return errors.WithMessage(err, "get shards") + return errors.Wrap(err, "get shards") } shards := make(map[string]string) @@ -133,37 +142,37 @@ func (b *Backup) Init( } } - return b.cn.SetBackupMeta(meta) + return query.SetBackupMeta(ctx, b.cn.Conn, meta) } // Run runs backup. // TODO: describe flow // //nolint:nonamedreturns -func (b *Backup) Run(ctx context.Context, bcp *pbm.BackupCmd, opid pbm.OPID, l *plog.Event) (err error) { - inf, err := b.node.GetInfo() +func (b *Backup) Run(ctx context.Context, bcp *types.BackupCmd, opid types.OPID, l *log.Event) (err error) { + inf, err := topo.GetNodeInfoExt(ctx, b.node.Session()) if err != nil { return errors.Wrap(err, "get cluster info") } - rsMeta := pbm.BackupReplset{ + rsMeta := types.BackupReplset{ Name: inf.SetName, Node: inf.Me, StartTS: time.Now().UTC().Unix(), - Status: pbm.StatusRunning, - Conditions: []pbm.Condition{}, + Status: defs.StatusRunning, + Conditions: []types.Condition{}, FirstWriteTS: primitive.Timestamp{T: 1, I: 1}, } if v := inf.IsConfigSrv(); v { rsMeta.IsConfigSvr = &v } - stg, err := b.cn.GetStorage(l) + stg, err := util.GetStorage(ctx, b.cn.Conn, l) if err != nil { return errors.Wrap(err, "unable to get PBM storage configuration settings") } - bcpm, err := b.cn.GetBackupMeta(bcp.Name) + bcpm, err := query.GetBackupMeta(ctx, b.cn.Conn, bcp.Name) if err != nil { return errors.Wrap(err, "balancer status, get backup meta") } @@ -171,16 +180,16 @@ func (b *Backup) Run(ctx context.Context, bcp *pbm.BackupCmd, opid pbm.OPID, l * // on any error the RS' and the backup' (in case this is the backup leader) meta will be marked appropriately defer func() { if err != nil { - status := pbm.StatusError - if errors.Is(err, ErrCancelled) { - status = pbm.StatusCancelled + status := defs.StatusError + if errors.Is(err, storage.ErrCancelled) || errors.Is(err, context.Canceled) { + status = defs.StatusCancelled } - ferr := b.cn.ChangeRSState(bcp.Name, rsMeta.Name, status, err.Error()) + ferr := query.ChangeRSState(b.cn.Conn, bcp.Name, rsMeta.Name, status, err.Error()) l.Info("mark RS as %s `%v`: %v", status, err, ferr) if inf.IsLeader() { - ferr := b.cn.ChangeBackupState(bcp.Name, status, err.Error()) + ferr := query.ChangeBackupState(b.cn.Conn, bcp.Name, status, err.Error()) l.Info("mark backup as %s `%v`: %v", status, err, ferr) } } @@ -191,11 +200,11 @@ func (b *Backup) Run(ctx context.Context, bcp *pbm.BackupCmd, opid pbm.OPID, l * // And will try to turn it on again if so. So if the leader node went down after turning off // the balancer some other node will bring it back. // TODO: what if all agents went down. - if bcpm.BalancerStatus != pbm.BalancerModeOn { + if bcpm.BalancerStatus != topo.BalancerModeOn { return } - errd := b.cn.SetBalancerStatus(pbm.BalancerModeOn) + errd := topo.SetBalancerStatus(context.Background(), b.cn.Conn, topo.BalancerModeOn) if errd != nil { l.Error("set balancer ON: %v", errd) return @@ -207,7 +216,7 @@ func (b *Backup) Run(ctx context.Context, bcp *pbm.BackupCmd, opid pbm.OPID, l * hbstop := make(chan struct{}) defer close(hbstop) - err := b.cn.BackupHB(bcp.Name) + err := query.BackupHB(ctx, b.cn.Conn, bcp.Name) if err != nil { return errors.Wrap(err, "init heartbeat") } @@ -219,7 +228,7 @@ func (b *Backup) Run(ctx context.Context, bcp *pbm.BackupCmd, opid pbm.OPID, l * for { select { case <-tk.C: - err := b.cn.BackupHB(bcp.Name) + err := query.BackupHB(ctx, b.cn.Conn, bcp.Name) if err != nil { l.Error("send pbm heartbeat: %v", err) } @@ -229,27 +238,30 @@ func (b *Backup) Run(ctx context.Context, bcp *pbm.BackupCmd, opid pbm.OPID, l * } }() - if bcpm.BalancerStatus == pbm.BalancerModeOn { - err = b.cn.SetBalancerStatus(pbm.BalancerModeOff) + if bcpm.BalancerStatus == topo.BalancerModeOn { + err = topo.SetBalancerStatus(ctx, b.cn.Conn, topo.BalancerModeOff) if err != nil { return errors.Wrap(err, "set balancer OFF") } l.Debug("waiting for balancer off") - bs := waitForBalancerOff(b.cn, time.Second*30, l) + bs := waitForBalancerOff(ctx, b.cn, time.Second*30, l) l.Debug("balancer status: %s", bs) } } // Waiting for StatusStarting to move further. // In case some preparations has to be done before backup. - err = b.waitForStatus(bcp.Name, pbm.StatusStarting, ref(b.timeouts.StartingStatus())) + err = b.waitForStatus(ctx, bcp.Name, defs.StatusStarting, ref(b.timeouts.StartingStatus())) if err != nil { return errors.Wrap(err, "waiting for start") } defer func() { - if !errors.Is(err, ErrCancelled) || !inf.IsLeader() { + if !inf.IsLeader() { + return + } + if !errors.Is(err, storage.ErrCancelled) || !errors.Is(err, context.Canceled) { return } @@ -259,9 +271,9 @@ func (b *Backup) Run(ctx context.Context, bcp *pbm.BackupCmd, opid pbm.OPID, l * }() switch b.typ { - case pbm.LogicalBackup: + case defs.LogicalBackup: err = b.doLogical(ctx, bcp, opid, &rsMeta, inf, stg, l) - case pbm.PhysicalBackup, pbm.IncrementalBackup, pbm.ExternalBackup: + case defs.PhysicalBackup, defs.IncrementalBackup, defs.ExternalBackup: err = b.doPhysical(ctx, bcp, opid, &rsMeta, inf, stg, l) default: return errors.New("undefined backup type") @@ -270,25 +282,25 @@ func (b *Backup) Run(ctx context.Context, bcp *pbm.BackupCmd, opid pbm.OPID, l * return err } - err = b.cn.ChangeRSState(bcp.Name, rsMeta.Name, pbm.StatusDone, "") + err = query.ChangeRSState(b.cn.Conn, bcp.Name, rsMeta.Name, defs.StatusDone, "") if err != nil { return errors.Wrap(err, "set shard's StatusDone") } if inf.IsLeader() { - epch, err := b.cn.ResetEpoch() + epch, err := config.ResetEpochWithContext(ctx, b.cn.Conn) if err != nil { l.Error("reset epoch") } else { l.Debug("epoch set to %v", epch) } - err = b.reconcileStatus(bcp.Name, opid.String(), pbm.StatusDone, nil) + err = b.reconcileStatus(ctx, bcp.Name, opid.String(), defs.StatusDone, nil) if err != nil { return errors.Wrap(err, "check cluster for backup done") } - bcpm, err = b.cn.GetBackupMeta(bcp.Name) + bcpm, err = query.GetBackupMeta(ctx, b.cn.Conn, bcp.Name) if err != nil { return errors.Wrap(err, "get backup metadata") } @@ -300,31 +312,31 @@ func (b *Backup) Run(ctx context.Context, bcp *pbm.BackupCmd, opid pbm.OPID, l * } // to be sure the locks released only after the "done" status had written - err = b.waitForStatus(bcp.Name, pbm.StatusDone, nil) + err = b.waitForStatus(ctx, bcp.Name, defs.StatusDone, nil) return errors.Wrap(err, "waiting for done") } -func waitForBalancerOff(cn *pbm.PBM, t time.Duration, l *plog.Event) pbm.BalancerMode { +func waitForBalancerOff(ctx context.Context, cn *pbm.PBM, t time.Duration, l *log.Event) topo.BalancerMode { dn := time.NewTimer(t) defer dn.Stop() tk := time.NewTicker(time.Millisecond * 500) defer tk.Stop() - var bs *pbm.BalancerStatus + var bs *topo.BalancerStatus var err error Loop: for { select { case <-tk.C: - bs, err = cn.GetBalancerStatus() + bs, err = topo.GetBalancerStatus(ctx, cn.Conn) if err != nil { l.Error("get balancer status: %v", err) continue } - if bs.Mode == pbm.BalancerModeOff { - return pbm.BalancerModeOff + if bs.Mode == topo.BalancerModeOff { + return topo.BalancerModeOff } case <-dn.C: break Loop @@ -332,144 +344,26 @@ Loop: } if bs != nil { - return pbm.BalancerMode("") + return topo.BalancerMode("") } return bs.Mode } -const maxReplicationLagTimeSec = 21 - -// NodeSuits checks if node can perform backup -func NodeSuits(node *pbm.Node, inf *pbm.NodeInfo) (bool, error) { - status, err := node.Status() - if err != nil { - return false, errors.Wrap(err, "get node status") - } - - replLag, err := node.ReplicationLag() - if err != nil { - return false, errors.Wrap(err, "get node replication lag") - } - - return replLag < maxReplicationLagTimeSec && status.Health == pbm.NodeHealthUp && - (status.State == pbm.NodeStatePrimary || status.State == pbm.NodeStateSecondary), - nil -} - -func NodeSuitsExt(node *pbm.Node, inf *pbm.NodeInfo, t pbm.BackupType) (bool, error) { - if ok, err := NodeSuits(node, inf); err != nil || !ok { - return false, err - } - - ver, err := node.GetMongoVersion() - if err != nil { - return false, errors.Wrap(err, "get mongo version") - } - - err = pbm.FeatureSupport(*ver).BackupType(t) - return err == nil, err -} - -// rwError multierror for the read/compress/write-to-store operations set -type rwError struct { - read error - compress error - write error -} - -func (rwe rwError) Error() string { - var r string - if rwe.read != nil { - r += "read data: " + rwe.read.Error() + "." - } - if rwe.compress != nil { - r += "compress data: " + rwe.compress.Error() + "." - } - if rwe.write != nil { - r += "write data: " + rwe.write.Error() + "." - } - - return r -} - -func (rwe rwError) nil() bool { - return rwe.read == nil && rwe.compress == nil && rwe.write == nil -} - -type Source interface { - io.WriterTo -} - -type Canceller interface { - Cancel() -} - -// ErrCancelled means backup was canceled -var ErrCancelled = errors.New("backup canceled") - -// Upload writes data to dst from given src and returns an amount of written bytes -func Upload( +func (b *Backup) toState( ctx context.Context, - src Source, - dst storage.Storage, - compression compress.CompressionType, - compressLevel *int, - fname string, - sizeb int64, -) (int64, error) { - r, pw := io.Pipe() - - w, err := compress.Compress(pw, compression, compressLevel) - if err != nil { - return 0, err - } - - var rwErr rwError - var n int64 - go func() { - n, rwErr.read = src.WriteTo(w) - rwErr.compress = w.Close() - pw.Close() - }() - - saveDone := make(chan struct{}) - go func() { - rwErr.write = dst.Save(fname, r, sizeb) - saveDone <- struct{}{} - }() - - select { - case <-ctx.Done(): - if c, ok := src.(Canceller); ok { - c.Cancel() - } - - err := r.Close() - if err != nil { - return 0, errors.Wrap(err, "cancel backup: close reader") - } - return 0, ErrCancelled - case <-saveDone: - } - - r.Close() - - if !rwErr.nil() { - return 0, rwErr - } - - return n, nil -} - -func (b *Backup) toState(status pbm.Status, bcp, opid string, inf *pbm.NodeInfo, wait *time.Duration) error { - err := b.cn.ChangeRSState(bcp, inf.SetName, status, "") + status defs.Status, + bcp, opid string, + inf *topo.NodeInfo, + wait *time.Duration, +) error { + err := query.ChangeRSState(b.cn.Conn, bcp, inf.SetName, status, "") if err != nil { return errors.Wrap(err, "set shard's status") } if inf.IsLeader() { - err = b.reconcileStatus(bcp, opid, status, wait) + err = b.reconcileStatus(ctx, bcp, opid, status, wait) if err != nil { if errors.Is(err, errConvergeTimeOut) { return errors.Wrap(err, "couldn't get response from all shards") @@ -478,7 +372,7 @@ func (b *Backup) toState(status pbm.Status, bcp, opid string, inf *pbm.NodeInfo, } } - err = b.waitForStatus(bcp, status, wait) + err = b.waitForStatus(ctx, bcp, status, wait) if err != nil { return errors.Wrapf(err, "waiting for %s", status) } @@ -486,35 +380,45 @@ func (b *Backup) toState(status pbm.Status, bcp, opid string, inf *pbm.NodeInfo, return nil } -func (b *Backup) reconcileStatus(bcpName, opid string, status pbm.Status, timeout *time.Duration) error { - shards, err := b.cn.ClusterMembers() +func (b *Backup) reconcileStatus( + ctx context.Context, + bcpName, opid string, + status defs.Status, + timeout *time.Duration, +) error { + shards, err := topo.ClusterMembers(ctx, b.cn.Conn.MongoClient()) if err != nil { return errors.Wrap(err, "get cluster members") } if timeout != nil { - return errors.Wrap(b.convergeClusterWithTimeout(bcpName, opid, shards, status, *timeout), + return errors.Wrap(b.convergeClusterWithTimeout(ctx, bcpName, opid, shards, status, *timeout), "convergeClusterWithTimeout") } - return errors.Wrap(b.convergeCluster(bcpName, opid, shards, status), "convergeCluster") + return errors.Wrap(b.convergeCluster(ctx, bcpName, opid, shards, status), "convergeCluster") } // convergeCluster waits until all given shards reached `status` and updates a cluster status -func (b *Backup) convergeCluster(bcpName, opid string, shards []pbm.Shard, status pbm.Status) error { +func (b *Backup) convergeCluster( + ctx context.Context, + bcpName, opid string, + shards []topo.Shard, + status defs.Status, +) error { tk := time.NewTicker(time.Second * 1) defer tk.Stop() for { select { case <-tk.C: - ok, err := b.converged(bcpName, opid, shards, status) + ok, err := b.converged(ctx, bcpName, opid, shards, status) if err != nil { return err } if ok { return nil } - case <-b.cn.Context().Done(): + case <-ctx.Done(): return nil } } @@ -525,10 +429,11 @@ var errConvergeTimeOut = errors.New("reached converge timeout") // convergeClusterWithTimeout waits up to the geiven timeout until // all given shards reached `status` and then updates the cluster status func (b *Backup) convergeClusterWithTimeout( + ctx context.Context, bcpName, opid string, - shards []pbm.Shard, - status pbm.Status, + shards []topo.Shard, + status defs.Status, t time.Duration, ) error { tk := time.NewTicker(time.Second * 1) @@ -540,7 +445,7 @@ func (b *Backup) convergeClusterWithTimeout( for { select { case <-tk.C: - ok, err := b.converged(bcpName, opid, shards, status) + ok, err := b.converged(ctx, bcpName, opid, shards, status) if err != nil { return err } @@ -549,20 +454,25 @@ func (b *Backup) convergeClusterWithTimeout( } case <-tout.C: return errConvergeTimeOut - case <-b.cn.Context().Done(): + case <-ctx.Done(): return nil } } } -func (b *Backup) converged(bcpName, opid string, shards []pbm.Shard, status pbm.Status) (bool, error) { +func (b *Backup) converged( + ctx context.Context, + bcpName, opid string, + shards []topo.Shard, + status defs.Status, +) (bool, error) { shardsToFinish := len(shards) - bmeta, err := b.cn.GetBackupMeta(bcpName) + bmeta, err := query.GetBackupMeta(ctx, b.cn.Conn, bcpName) if err != nil { return false, errors.Wrap(err, "get backup metadata") } - clusterTime, err := b.cn.ClusterTime() + clusterTime, err := topo.GetClusterTime(ctx, b.cn.Conn) if err != nil { return false, errors.Wrap(err, "read cluster time") } @@ -571,20 +481,20 @@ func (b *Backup) converged(bcpName, opid string, shards []pbm.Shard, status pbm. for _, shard := range bmeta.Replsets { if shard.Name == sh.RS { // check if node alive - lock, err := b.cn.GetLockData(&pbm.LockHeader{ - Type: pbm.CmdBackup, + lck, err := lock.GetLockData(ctx, b.cn.Conn, &lock.LockHeader{ + Type: defs.CmdBackup, OPID: opid, Replset: shard.Name, }) // nodes are cleaning its locks moving to the done status // so no lock is ok and no need to ckech the heartbeats - if status != pbm.StatusDone && !errors.Is(err, mongo.ErrNoDocuments) { + if status != defs.StatusDone && !errors.Is(err, mongo.ErrNoDocuments) { if err != nil { return false, errors.Wrapf(err, "unable to read lock for shard %s", shard.Name) } - if lock.Heartbeat.T+pbm.StaleFrameSec < clusterTime.T { - return false, errors.Errorf("lost shard %s, last beat ts: %d", shard.Name, lock.Heartbeat.T) + if lck.Heartbeat.T+defs.StaleFrameSec < clusterTime.T { + return false, errors.Errorf("lost shard %s, last beat ts: %d", shard.Name, lck.Heartbeat.T) } } @@ -592,9 +502,9 @@ func (b *Backup) converged(bcpName, opid string, shards []pbm.Shard, status pbm. switch shard.Status { case status: shardsToFinish-- - case pbm.StatusCancelled: - return false, ErrCancelled - case pbm.StatusError: + case defs.StatusCancelled: + return false, storage.ErrCancelled + case defs.StatusError: return false, errors.Errorf("backup on shard %s failed with: %s", shard.Name, bmeta.Error()) } } @@ -602,7 +512,7 @@ func (b *Backup) converged(bcpName, opid string, shards []pbm.Shard, status pbm. } if shardsToFinish == 0 { - err := b.cn.ChangeBackupState(bcpName, status, "") + err := query.ChangeBackupState(b.cn.Conn, bcpName, status, "") if err != nil { return false, errors.Wrapf(err, "update backup meta with %s", status) } @@ -612,7 +522,12 @@ func (b *Backup) converged(bcpName, opid string, shards []pbm.Shard, status pbm. return false, nil } -func (b *Backup) waitForStatus(bcpName string, status pbm.Status, waitFor *time.Duration) error { +func (b *Backup) waitForStatus( + ctx context.Context, + bcpName string, + status defs.Status, + waitFor *time.Duration, +) error { var tout <-chan time.Time if waitFor != nil { tmr := time.NewTimer(*waitFor) @@ -627,111 +542,114 @@ func (b *Backup) waitForStatus(bcpName string, status pbm.Status, waitFor *time. for { select { case <-tk.C: - bmeta, err := b.cn.GetBackupMeta(bcpName) - if errors.Is(err, pbm.ErrNotFound) { + bmeta, err := query.GetBackupMeta(ctx, b.cn.Conn, bcpName) + if errors.Is(err, errors.ErrNotFound) { continue } if err != nil { return errors.Wrap(err, "get backup metadata") } - clusterTime, err := b.cn.ClusterTime() + clusterTime, err := topo.GetClusterTime(ctx, b.cn.Conn) if err != nil { return errors.Wrap(err, "read cluster time") } - if bmeta.Hb.T+pbm.StaleFrameSec < clusterTime.T { + if bmeta.Hb.T+defs.StaleFrameSec < clusterTime.T { return errors.Errorf("backup stuck, last beat ts: %d", bmeta.Hb.T) } switch bmeta.Status { case status: return nil - case pbm.StatusCancelled: - return ErrCancelled - case pbm.StatusError: + case defs.StatusCancelled: + return storage.ErrCancelled + case defs.StatusError: return errors.Errorf("cluster failed: %v", err) } case <-tout: return errors.New("no backup meta, looks like a leader failed to start") - case <-b.cn.Context().Done(): + case <-ctx.Done(): return nil } } } //nolint:nonamedreturns -func (b *Backup) waitForFirstLastWrite(bcpName string) (first, last primitive.Timestamp, err error) { +func (b *Backup) waitForFirstLastWrite( + ctx context.Context, + bcpName string, +) (first, last primitive.Timestamp, err error) { tk := time.NewTicker(time.Second * 1) defer tk.Stop() for { select { case <-tk.C: - bmeta, err := b.cn.GetBackupMeta(bcpName) + bmeta, err := query.GetBackupMeta(ctx, b.cn.Conn, bcpName) if err != nil { return first, last, errors.Wrap(err, "get backup metadata") } - clusterTime, err := b.cn.ClusterTime() + clusterTime, err := topo.GetClusterTime(ctx, b.cn.Conn) if err != nil { return first, last, errors.Wrap(err, "read cluster time") } - if bmeta.Hb.T+pbm.StaleFrameSec < clusterTime.T { + if bmeta.Hb.T+defs.StaleFrameSec < clusterTime.T { return first, last, errors.Errorf("backup stuck, last beat ts: %d", bmeta.Hb.T) } if bmeta.FirstWriteTS.T > 0 && bmeta.LastWriteTS.T > 0 { return bmeta.FirstWriteTS, bmeta.LastWriteTS, nil } - case <-b.cn.Context().Done(): + case <-ctx.Done(): return first, last, nil } } } -func writeMeta(stg storage.Storage, meta *pbm.BackupMeta) error { +func writeMeta(stg storage.Storage, meta *types.BackupMeta) error { b, err := json.MarshalIndent(meta, "", "\t") if err != nil { return errors.Wrap(err, "marshal data") } - err = stg.Save(meta.Name+pbm.MetadataFileSuffix, bytes.NewReader(b), -1) + err = stg.Save(meta.Name+defs.MetadataFileSuffix, bytes.NewReader(b), -1) return errors.Wrap(err, "write to store") } -func (b *Backup) setClusterFirstWrite(bcpName string) error { - bmeta, err := b.cn.GetBackupMeta(bcpName) +func (b *Backup) setClusterFirstWrite(ctx context.Context, bcpName string) error { + bmeta, err := query.GetBackupMeta(ctx, b.cn.Conn, bcpName) if err != nil { return errors.Wrap(err, "get backup metadata") } var fw primitive.Timestamp for _, rs := range bmeta.Replsets { - if fw.T == 0 || primitive.CompareTimestamp(fw, rs.FirstWriteTS) == 1 { + if fw.T == 0 || fw.Compare(rs.FirstWriteTS) == 1 { fw = rs.FirstWriteTS } } - err = b.cn.SetFirstWrite(bcpName, fw) + err = query.SetFirstWrite(ctx, b.cn.Conn, bcpName, fw) return errors.Wrap(err, "set timestamp") } -func (b *Backup) setClusterLastWrite(bcpName string) error { - bmeta, err := b.cn.GetBackupMeta(bcpName) +func (b *Backup) setClusterLastWrite(ctx context.Context, bcpName string) error { + bmeta, err := query.GetBackupMeta(ctx, b.cn.Conn, bcpName) if err != nil { return errors.Wrap(err, "get backup metadata") } var lw primitive.Timestamp for _, rs := range bmeta.Replsets { - if primitive.CompareTimestamp(lw, rs.LastWriteTS) == -1 { + if lw.Compare(rs.LastWriteTS) == -1 { lw = rs.LastWriteTS } } - err = b.cn.SetLastWrite(bcpName, lw) + err = query.SetLastWrite(ctx, b.cn.Conn, bcpName, lw) return errors.Wrap(err, "set timestamp") } diff --git a/pbm/backup/logical.go b/pbm/backup/logical.go index c27dcfbe0..0ce11e648 100644 --- a/pbm/backup/logical.go +++ b/pbm/backup/logical.go @@ -1,38 +1,43 @@ package backup import ( - "context" "io" "path" "strings" "sync" - "github.com/pkg/errors" "go.mongodb.org/mongo-driver/bson" "go.mongodb.org/mongo-driver/mongo" "golang.org/x/sync/errgroup" - "github.com/percona/percona-backup-mongodb/pbm" - "github.com/percona/percona-backup-mongodb/pbm/archive" - "github.com/percona/percona-backup-mongodb/pbm/compress" - plog "github.com/percona/percona-backup-mongodb/pbm/log" + "github.com/percona/percona-backup-mongodb/internal/archive" + "github.com/percona/percona-backup-mongodb/internal/config" + "github.com/percona/percona-backup-mongodb/internal/connect" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" + plog "github.com/percona/percona-backup-mongodb/internal/log" + "github.com/percona/percona-backup-mongodb/internal/query" + "github.com/percona/percona-backup-mongodb/internal/storage" + "github.com/percona/percona-backup-mongodb/internal/topo" + "github.com/percona/percona-backup-mongodb/internal/types" + "github.com/percona/percona-backup-mongodb/internal/util" + "github.com/percona/percona-backup-mongodb/internal/version" "github.com/percona/percona-backup-mongodb/pbm/oplog" - "github.com/percona/percona-backup-mongodb/pbm/sel" "github.com/percona/percona-backup-mongodb/pbm/snapshot" - "github.com/percona/percona-backup-mongodb/pbm/storage" ) func (b *Backup) doLogical( ctx context.Context, - bcp *pbm.BackupCmd, - opid pbm.OPID, - rsMeta *pbm.BackupReplset, - inf *pbm.NodeInfo, + bcp *types.BackupCmd, + opid types.OPID, + rsMeta *types.BackupReplset, + inf *topo.NodeInfo, stg storage.Storage, l *plog.Event, ) error { var db, coll string - if sel.IsSelective(bcp.Namespaces) { + if util.IsSelective(bcp.Namespaces) { // for selective backup, configsvr does not hold any data. // only some collections from config db is required to restore cluster state if inf.IsConfigSrv() { @@ -44,31 +49,31 @@ func (b *Backup) doLogical( nssSize, err := getNamespacesSize(ctx, b.node.Session(), db, coll) if err != nil { - return errors.WithMessage(err, "get namespaces size") + return errors.Wrap(err, "get namespaces size") } - if bcp.Compression == compress.CompressionTypeNone { + if bcp.Compression == defs.CompressionTypeNone { for n := range nssSize { nssSize[n] *= 4 } } oplog := oplog.NewOplogBackup(b.node.Session()) - oplogTS, err := oplog.LastWrite() + oplogTS, err := oplog.LastWrite(ctx) if err != nil { return errors.Wrap(err, "define oplog start position") } - rsMeta.Status = pbm.StatusRunning + rsMeta.Status = defs.StatusRunning rsMeta.FirstWriteTS = oplogTS rsMeta.OplogName = path.Join(bcp.Name, rsMeta.Name, "local.oplog.rs.bson") + bcp.Compression.Suffix() rsMeta.DumpName = path.Join(bcp.Name, rsMeta.Name, archive.MetaFile) - err = b.cn.AddRSMeta(bcp.Name, *rsMeta) + err = query.AddRSMeta(ctx, b.cn.Conn, bcp.Name, *rsMeta) if err != nil { return errors.Wrap(err, "add shard's metadata") } if inf.IsLeader() { - err := b.reconcileStatus(bcp.Name, opid.String(), pbm.StatusRunning, ref(b.timeouts.StartingStatus())) + err := b.reconcileStatus(ctx, bcp.Name, opid.String(), defs.StatusRunning, ref(b.timeouts.StartingStatus())) if err != nil { if errors.Is(err, errConvergeTimeOut) { return errors.Wrap(err, "couldn't get response from all shards") @@ -76,30 +81,30 @@ func (b *Backup) doLogical( return errors.Wrap(err, "check cluster for backup started") } - err = b.setClusterFirstWrite(bcp.Name) + err = b.setClusterFirstWrite(ctx, bcp.Name) if err != nil { return errors.Wrap(err, "set cluster first write ts") } } // Waiting for cluster's StatusRunning to move further. - err = b.waitForStatus(bcp.Name, pbm.StatusRunning, nil) + err = b.waitForStatus(ctx, bcp.Name, defs.StatusRunning, nil) if err != nil { return errors.Wrap(err, "waiting for running") } - if !sel.IsSelective(bcp.Namespaces) { + if !util.IsSelective(bcp.Namespaces) { // Save users and roles to the tmp collections so the restore would copy that data // to the system collections. Have to do this because of issues with the restore and preserverUUID. // see: https://jira.percona.com/browse/PBM-636 and comments - lw, err := b.node.CopyUsersNRolles() + lw, err := b.node.CopyUsersNRolles(ctx) if err != nil { return errors.Wrap(err, "copy users and roles for the restore") } defer func() { l.Info("dropping tmp collections") - if err := b.node.DropTMPcoll(); err != nil { + if err := b.node.DropTMPcoll(context.Background()); err != nil { l.Warning("drop tmp users and roles: %v", err) } }() @@ -108,7 +113,7 @@ func (b *Backup) doLogical( // have replicated to the node we're about to take a backup from // *copying made on a primary but backup does a secondary node l.Debug("wait for tmp users %v", lw) - err = b.node.WaitForWrite(lw) + err = b.node.WaitForWrite(ctx, lw) if err != nil { return errors.Wrap(err, "wait for tmp users and roles replication") } @@ -124,17 +129,17 @@ func (b *Backup) doLogical( } } - cfg, err := b.cn.GetConfig() + cfg, err := config.GetConfig(ctx, b.cn.Conn) if err != nil { - return errors.WithMessage(err, "get config") + return errors.Wrap(err, "get config") } nsFilter := archive.DefaultNSFilter docFilter := archive.DefaultDocFilter - if inf.IsConfigSrv() && sel.IsSelective(bcp.Namespaces) { + if inf.IsConfigSrv() && util.IsSelective(bcp.Namespaces) { chunkSelector, err := createBackupChunkSelector(ctx, b.cn.Conn, bcp.Namespaces) if err != nil { - return errors.WithMessage(err, "fetch uuids") + return errors.Wrap(err, "fetch uuids") } nsFilter = makeConfigsvrNSFilter() @@ -143,9 +148,9 @@ func (b *Backup) doLogical( snapshotSize, err := snapshot.UploadDump(dump, func(ns, ext string, r io.Reader) error { - stg, err := pbm.Storage(cfg, l) + stg, err := util.StorageFromConfig(cfg, l) if err != nil { - return errors.WithMessage(err, "get storage") + return errors.Wrap(err, "get storage") } filepath := path.Join(bcp.Name, rsMeta.Name, ns+ext) @@ -162,39 +167,39 @@ func (b *Backup) doLogical( } l.Info("mongodump finished, waiting for the oplog") - err = b.cn.ChangeRSState(bcp.Name, rsMeta.Name, pbm.StatusDumpDone, "") + err = query.ChangeRSState(b.cn.Conn, bcp.Name, rsMeta.Name, defs.StatusDumpDone, "") if err != nil { return errors.Wrap(err, "set shard's StatusDumpDone") } - lwts, err := oplog.LastWrite() + lwts, err := oplog.LastWrite(ctx) if err != nil { return errors.Wrap(err, "get shard's last write ts") } - err = b.cn.SetRSLastWrite(bcp.Name, rsMeta.Name, lwts) + err = query.SetRSLastWrite(b.cn.Conn, bcp.Name, rsMeta.Name, lwts) if err != nil { return errors.Wrap(err, "set shard's last write ts") } if inf.IsLeader() { - err := b.reconcileStatus(bcp.Name, opid.String(), pbm.StatusDumpDone, nil) + err := b.reconcileStatus(ctx, bcp.Name, opid.String(), defs.StatusDumpDone, nil) if err != nil { return errors.Wrap(err, "check cluster for dump done") } - err = b.setClusterLastWrite(bcp.Name) + err = b.setClusterLastWrite(ctx, bcp.Name) if err != nil { return errors.Wrap(err, "set cluster last write ts") } } - err = b.waitForStatus(bcp.Name, pbm.StatusDumpDone, nil) + err = b.waitForStatus(ctx, bcp.Name, defs.StatusDumpDone, nil) if err != nil { return errors.Wrap(err, "waiting for dump done") } - fwTS, lwTS, err := b.waitForFirstLastWrite(bcp.Name) + fwTS, lwTS, err := b.waitForFirstLastWrite(ctx, bcp.Name) if err != nil { return errors.Wrap(err, "get cluster first & last write ts") } @@ -202,12 +207,12 @@ func (b *Backup) doLogical( l.Debug("set oplog span to %v / %v", fwTS, lwTS) oplog.SetTailingSpan(fwTS, lwTS) // size -1 - we're assuming oplog never exceed 97Gb (see comments in s3.Save method) - oplogSize, err := Upload(ctx, oplog, stg, bcp.Compression, bcp.CompressionLevel, rsMeta.OplogName, -1) + oplogSize, err := storage.Upload(ctx, oplog, stg, bcp.Compression, bcp.CompressionLevel, rsMeta.OplogName, -1) if err != nil { return errors.Wrap(err, "oplog") } - err = b.cn.IncBackupSize(ctx, bcp.Name, snapshotSize+oplogSize) + err = query.IncBackupSize(ctx, b.cn.Conn, bcp.Name, snapshotSize+oplogSize) if err != nil { return errors.Wrap(err, "inc backup size") } @@ -215,26 +220,26 @@ func (b *Backup) doLogical( return nil } -func createBackupChunkSelector(ctx context.Context, m *mongo.Client, nss []string) (sel.ChunkSelector, error) { - ver, err := pbm.GetMongoVersion(ctx, m) +func createBackupChunkSelector(ctx context.Context, m connect.Client, nss []string) (util.ChunkSelector, error) { + ver, err := version.GetMongoVersion(ctx, m.MongoClient()) if err != nil { - return nil, errors.WithMessage(err, "get mongo version") + return nil, errors.Wrap(err, "get mongo version") } - var chunkSelector sel.ChunkSelector + var chunkSelector util.ChunkSelector if ver.Major() >= 5 { - chunkSelector = sel.NewUUIDChunkSelector() + chunkSelector = util.NewUUIDChunkSelector() } else { - chunkSelector = sel.NewNSChunkSelector() + chunkSelector = util.NewNSChunkSelector() } - cur, err := m.Database("config").Collection("collections").Find(ctx, bson.D{}) + cur, err := m.ConfigDatabase().Collection("collections").Find(ctx, bson.D{}) if err != nil { - return nil, errors.WithMessage(err, "query") + return nil, errors.Wrap(err, "query") } defer cur.Close(ctx) - selected := sel.MakeSelectedPred(nss) + selected := util.MakeSelectedPred(nss) for cur.Next(ctx) { ns := cur.Current.Lookup("_id").StringValue() if selected(ns) { @@ -242,7 +247,7 @@ func createBackupChunkSelector(ctx context.Context, m *mongo.Client, nss []strin } } if err := cur.Err(); err != nil { - return nil, errors.WithMessage(err, "cursor") + return nil, errors.Wrap(err, "cursor") } return chunkSelector, nil @@ -261,8 +266,8 @@ func makeConfigsvrNSFilter() archive.NSFilterFn { } } -func makeConfigsvrDocFilter(nss []string, selector sel.ChunkSelector) archive.DocFilterFn { - selectedNS := sel.MakeSelectedPred(nss) +func makeConfigsvrDocFilter(nss []string, selector util.ChunkSelector) archive.DocFilterFn { + selectedNS := util.MakeSelectedPred(nss) allowedDBs := make(map[string]bool) for _, ns := range nss { db, _, _ := strings.Cut(ns, ".") @@ -294,7 +299,7 @@ func getNamespacesSize(ctx context.Context, m *mongo.Client, db, coll string) (m } dbs, err := m.ListDatabaseNames(ctx, q) if err != nil { - return nil, errors.WithMessage(err, "list databases") + return nil, errors.Wrap(err, "list databases") } if len(dbs) == 0 { return rv, nil @@ -313,7 +318,7 @@ func getNamespacesSize(ctx context.Context, m *mongo.Client, db, coll string) (m } res, err := m.Database(db).ListCollectionSpecifications(ctx, q) if err != nil { - return errors.WithMessagef(err, "list collections for %q", db) + return errors.Wrapf(err, "list collections for %q", db) } if len(res) == 0 { return nil @@ -332,7 +337,7 @@ func getNamespacesSize(ctx context.Context, m *mongo.Client, db, coll string) (m ns := db + "." + coll.Name res := m.Database(db).RunCommand(ctx, bson.D{{"collStats", coll.Name}}) if err := res.Err(); err != nil { - return errors.WithMessagef(err, "collStats %q", ns) + return errors.Wrapf(err, "collStats %q", ns) } var doc struct { @@ -340,7 +345,7 @@ func getNamespacesSize(ctx context.Context, m *mongo.Client, db, coll string) (m } if err := res.Decode(&doc); err != nil { - return errors.WithMessagef(err, "decode %q", ns) + return errors.Wrapf(err, "decode %q", ns) } mu.Lock() diff --git a/pbm/backup/physical.go b/pbm/backup/physical.go index 53ab05adf..1efbce2f5 100644 --- a/pbm/backup/physical.go +++ b/pbm/backup/physical.go @@ -2,7 +2,6 @@ package backup import ( "bytes" - "context" "encoding/json" "fmt" "os" @@ -12,17 +11,21 @@ import ( "time" "github.com/google/uuid" - "github.com/pkg/errors" "go.mongodb.org/mongo-driver/bson" "go.mongodb.org/mongo-driver/bson/bsontype" "go.mongodb.org/mongo-driver/bson/primitive" "go.mongodb.org/mongo-driver/mongo" "go.mongodb.org/mongo-driver/x/bsonx/bsoncore" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" + plog "github.com/percona/percona-backup-mongodb/internal/log" + "github.com/percona/percona-backup-mongodb/internal/query" + "github.com/percona/percona-backup-mongodb/internal/storage" + "github.com/percona/percona-backup-mongodb/internal/topo" + "github.com/percona/percona-backup-mongodb/internal/types" "github.com/percona/percona-backup-mongodb/pbm" - "github.com/percona/percona-backup-mongodb/pbm/compress" - plog "github.com/percona/percona-backup-mongodb/pbm/log" - "github.com/percona/percona-backup-mongodb/pbm/storage" ) const cursorCreateRetries = 10 @@ -43,7 +46,7 @@ type BCoplogTS struct { // see https://www.percona.com/blog/2021/06/07/experimental-feature-backupcursorextend-in-percona-server-for-mongodb/ type BackupCursorData struct { Meta *Meta - Data []pbm.File + Data []types.File } type BackupCursor struct { @@ -118,12 +121,12 @@ func (bc *BackupCursor) Data(ctx context.Context) (_ *BackupCursorData, err erro } defer func() { if err != nil { - cur.Close(ctx) + cur.Close(context.Background()) } }() var m *Meta - var files []pbm.File + var files []types.File for cur.TryNext(ctx) { // metadata is the first if m == nil { @@ -138,7 +141,7 @@ func (bc *BackupCursor) Data(ctx context.Context) (_ *BackupCursorData, err erro continue } - var d pbm.File + var d types.File err = cur.Decode(&d) if err != nil { return nil, errors.Wrap(err, "decode filename") @@ -169,7 +172,7 @@ func (bc *BackupCursor) Data(ctx context.Context) (_ *BackupCursorData, err erro return &BackupCursorData{m, files}, nil } -func (bc *BackupCursor) Journals(upto primitive.Timestamp) ([]pbm.File, error) { +func (bc *BackupCursor) Journals(upto primitive.Timestamp) ([]types.File, error) { ctx := context.Background() cur, err := bc.n.Session().Database("admin").Aggregate(ctx, mongo.Pipeline{ @@ -180,7 +183,7 @@ func (bc *BackupCursor) Journals(upto primitive.Timestamp) ([]pbm.File, error) { } defer cur.Close(ctx) - var j []pbm.File + var j []types.File err = cur.All(ctx, &j) return j, err @@ -192,24 +195,28 @@ func (bc *BackupCursor) Close() { } } +func backupCursorName(s string) string { + return strings.NewReplacer("-", "", ":", "").Replace(s) +} + func (b *Backup) doPhysical( ctx context.Context, - bcp *pbm.BackupCmd, - opid pbm.OPID, - rsMeta *pbm.BackupReplset, - inf *pbm.NodeInfo, + bcp *types.BackupCmd, + opid types.OPID, + rsMeta *types.BackupReplset, + inf *topo.NodeInfo, stg storage.Storage, l *plog.Event, ) error { currOpts := bson.D{} - if b.typ == pbm.IncrementalBackup { + if b.typ == defs.IncrementalBackup { currOpts = bson.D{ // thisBackupName can be customized on retry - {"thisBackupName", pbm.BackupCursorName(bcp.Name)}, + {"thisBackupName", backupCursorName(bcp.Name)}, {"incrementalBackup", true}, } if !b.incrBase { - src, err := b.cn.LastIncrementalBackup() + src, err := query.LastIncrementalBackup(ctx, b.cn.Conn) if err != nil { return errors.Wrap(err, "define source backup") } @@ -219,7 +226,7 @@ func (b *Backup) doPhysical( // ? should be done during Init()? if inf.IsLeader() { - err := b.cn.SetSrcBackup(bcp.Name, src.Name) + err := query.SetSrcBackup(ctx, b.cn.Conn, bcp.Name, src.Name) if err != nil { return errors.Wrap(err, "set source backup in meta") } @@ -235,7 +242,7 @@ func (b *Backup) doPhysical( } if realSrcID == "" { // no custom thisBackupName was used. fallback to default - realSrcID = pbm.BackupCursorName(src.Name) + realSrcID = backupCursorName(src.Name) } currOpts = append(currOpts, bson.E{"srcBackupName", realSrcID}) @@ -261,7 +268,7 @@ func (b *Backup) doPhysical( bcur, err := cursor.Data(ctx) if err != nil { - if b.typ == pbm.IncrementalBackup && strings.Contains(err.Error(), "(UnknownError) 2: No such file or directory") { + if b.typ == defs.IncrementalBackup && strings.Contains(err.Error(), "(UnknownError) 2: No such file or directory") { return errors.New("can't find incremental backup history." + " Previous backup was made on another node." + " You can make a new base incremental backup to start a new history.") @@ -271,36 +278,36 @@ func (b *Backup) doPhysical( l.Debug("backup cursor id: %s", bcur.Meta.ID) - lwts, err := pbm.LastWrite(b.node.Session(), true) + lwts, err := topo.GetLastWrite(ctx, b.node.Session(), true) if err != nil { return errors.Wrap(err, "get shard's last write ts") } - defOpts := &pbm.MongodOpts{} + defOpts := &topo.MongodOpts{} defOpts.Storage.WiredTiger.EngineConfig.JournalCompressor = "snappy" defOpts.Storage.WiredTiger.CollectionConfig.BlockCompressor = "snappy" defOpts.Storage.WiredTiger.IndexConfig.PrefixCompression = true - mopts, err := b.node.GetOpts(defOpts) + mopts, err := topo.GetMongodOpts(ctx, b.node.Session(), defOpts) if err != nil { return errors.Wrap(err, "get mongod options") } rsMeta.MongodOpts = mopts - rsMeta.Status = pbm.StatusRunning + rsMeta.Status = defs.StatusRunning rsMeta.FirstWriteTS = bcur.Meta.OplogEnd.TS rsMeta.LastWriteTS = lwts if cursor.CustomThisID != "" { // custom thisBackupName was used rsMeta.CustomThisID = cursor.CustomThisID } - err = b.cn.AddRSMeta(bcp.Name, *rsMeta) + err = query.AddRSMeta(ctx, b.cn.Conn, bcp.Name, *rsMeta) if err != nil { return errors.Wrap(err, "add shard's metadata") } if inf.IsLeader() { - err := b.reconcileStatus(bcp.Name, opid.String(), pbm.StatusRunning, ref(b.timeouts.StartingStatus())) + err := b.reconcileStatus(ctx, bcp.Name, opid.String(), defs.StatusRunning, ref(b.timeouts.StartingStatus())) if err != nil { if errors.Is(err, errConvergeTimeOut) { return errors.Wrap(err, "couldn't get response from all shards") @@ -308,24 +315,24 @@ func (b *Backup) doPhysical( return errors.Wrap(err, "check cluster for backup started") } - err = b.setClusterFirstWrite(bcp.Name) + err = b.setClusterFirstWrite(ctx, bcp.Name) if err != nil { return errors.Wrap(err, "set cluster first write ts") } - err = b.setClusterLastWrite(bcp.Name) + err = b.setClusterLastWrite(ctx, bcp.Name) if err != nil { return errors.Wrap(err, "set cluster last write ts") } } // Waiting for cluster's StatusRunning to move further. - err = b.waitForStatus(bcp.Name, pbm.StatusRunning, nil) + err = b.waitForStatus(ctx, bcp.Name, defs.StatusRunning, nil) if err != nil { return errors.Wrap(err, "waiting for running") } - _, lwTS, err := b.waitForFirstLastWrite(bcp.Name) + _, lwTS, err := b.waitForFirstLastWrite(ctx, bcp.Name) if err != nil { return errors.Wrap(err, "get cluster first & last write ts") } @@ -347,21 +354,22 @@ func (b *Backup) doPhysical( data = append(data, *stgb) } - if b.typ == pbm.ExternalBackup { - return b.handleExternal(bcp, rsMeta, data, jrnls, bcur.Meta.DBpath, opid, inf, l) + if b.typ == defs.ExternalBackup { + return b.handleExternal(ctx, bcp, rsMeta, data, jrnls, bcur.Meta.DBpath, opid, inf, l) } return b.uploadPhysical(ctx, bcp, rsMeta, data, jrnls, bcur.Meta.DBpath, stg, l) } func (b *Backup) handleExternal( - bcp *pbm.BackupCmd, - rsMeta *pbm.BackupReplset, + ctx context.Context, + bcp *types.BackupCmd, + rsMeta *types.BackupReplset, data, - jrnls []pbm.File, + jrnls []types.File, dbpath string, - opid pbm.OPID, - inf *pbm.NodeInfo, + opid types.OPID, + inf *topo.NodeInfo, l *plog.Event, ) error { for _, f := range append(data, jrnls...) { @@ -374,15 +382,15 @@ func (b *Backup) handleExternal( // original LastWriteTS in the meta stored on PBM storage. As rsMeta might // be used outside of this method. fsMeta := *rsMeta - bmeta, err := b.cn.GetBackupMeta(bcp.Name) + bmeta, err := query.GetBackupMeta(ctx, b.cn.Conn, bcp.Name) if err == nil { fsMeta.LastWriteTS = bmeta.LastWriteTS } else { l.Warning("define LastWriteTS: get backup meta: %v", err) } // save rs meta along with the data files so it can be used during the restore - metaf := fmt.Sprintf(pbm.ExternalRsMetaFile, fsMeta.Name) - fsMeta.Files = append(fsMeta.Files, pbm.File{ + metaf := fmt.Sprintf(defs.ExternalRsMetaFile, fsMeta.Name) + fsMeta.Files = append(fsMeta.Files, types.File{ Name: metaf, }) metadst := filepath.Join(dbpath, metaf) @@ -392,20 +400,20 @@ func (b *Backup) handleExternal( l.Warning("failed to save rs meta file <%s>: %v", metadst, err) } - err = b.cn.RSSetPhyFiles(bcp.Name, rsMeta.Name, rsMeta) + err = query.RSSetPhyFiles(ctx, b.cn.Conn, bcp.Name, rsMeta.Name, rsMeta) if err != nil { return errors.Wrap(err, "set shard's files list") } - err = b.toState(pbm.StatusCopyReady, bcp.Name, opid.String(), inf, nil) + err = b.toState(ctx, defs.StatusCopyReady, bcp.Name, opid.String(), inf, nil) if err != nil { - return errors.Wrapf(err, "converge to %s", pbm.StatusCopyReady) + return errors.Wrapf(err, "converge to %s", defs.StatusCopyReady) } l.Info("waiting for the datadir to be copied") - err = b.waitForStatus(bcp.Name, pbm.StatusCopyDone, nil) + err = b.waitForStatus(ctx, bcp.Name, defs.StatusCopyDone, nil) if err != nil { - return errors.Wrapf(err, "waiting for %s", pbm.StatusCopyDone) + return errors.Wrapf(err, "waiting for %s", defs.StatusCopyDone) } err = os.Remove(metadst) @@ -416,7 +424,7 @@ func (b *Backup) handleExternal( return nil } -func writeRSmetaToDisk(fname string, rsMeta *pbm.BackupReplset) error { +func writeRSmetaToDisk(fname string, rsMeta *types.BackupReplset) error { fw, err := os.OpenFile(fname, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o600) if err != nil { return errors.Wrapf(err, "create/open") @@ -439,10 +447,10 @@ func writeRSmetaToDisk(fname string, rsMeta *pbm.BackupReplset) error { func (b *Backup) uploadPhysical( ctx context.Context, - bcp *pbm.BackupCmd, - rsMeta *pbm.BackupReplset, + bcp *types.BackupCmd, + rsMeta *types.BackupReplset, data, - jrnls []pbm.File, + jrnls []types.File, dbpath string, stg storage.Storage, l *plog.Event, @@ -450,7 +458,7 @@ func (b *Backup) uploadPhysical( var err error l.Info("uploading data") rsMeta.Files, err = uploadFiles(ctx, data, bcp.Name+"/"+rsMeta.Name, dbpath, - b.typ == pbm.IncrementalBackup, stg, bcp.Compression, bcp.CompressionLevel, l) + b.typ == defs.IncrementalBackup, stg, bcp.Compression, bcp.CompressionLevel, l) if err != nil { return err } @@ -465,7 +473,7 @@ func (b *Backup) uploadPhysical( l.Info("uploading journals done") rsMeta.Files = append(rsMeta.Files, ju...) - err = b.cn.RSSetPhyFiles(bcp.Name, rsMeta.Name, rsMeta) + err = query.RSSetPhyFiles(ctx, b.cn.Conn, bcp.Name, rsMeta.Name, rsMeta) if err != nil { return errors.Wrap(err, "set shard's files list") } @@ -475,7 +483,7 @@ func (b *Backup) uploadPhysical( size += f.StgSize } - err = b.cn.IncBackupSize(ctx, bcp.Name, size) + err = query.IncBackupSize(ctx, b.cn.Conn, bcp.Name, size) if err != nil { return errors.Wrap(err, "inc backup size") } @@ -485,7 +493,7 @@ func (b *Backup) uploadPhysical( const storagebson = "storage.bson" -func getStorageBSON(dbpath string) (*pbm.File, error) { +func getStorageBSON(dbpath string) (*types.File, error) { f, err := os.Stat(path.Join(dbpath, storagebson)) if err != nil { if errors.Is(err, os.ErrNotExist) { @@ -494,7 +502,7 @@ func getStorageBSON(dbpath string) (*pbm.File, error) { return nil, err } - return &pbm.File{ + return &types.File{ Name: path.Join(dbpath, storagebson), Len: f.Size(), Size: f.Size(), @@ -507,12 +515,12 @@ type UUID struct{ uuid.UUID } // MarshalBSONValue implements the bson.ValueMarshaler interface. func (id UUID) MarshalBSONValue() (bsontype.Type, []byte, error) { - return bsontype.Binary, bsoncore.AppendBinary(nil, 4, id.UUID[:]), nil + return bson.TypeBinary, bsoncore.AppendBinary(nil, 4, id.UUID[:]), nil } // UnmarshalBSONValue implements the bson.ValueUnmarshaler interface. func (id *UUID) UnmarshalBSONValue(t bsontype.Type, raw []byte) error { - if t != bsontype.Binary { + if t != bson.TypeBinary { return errors.New("invalid format on unmarshal bson value") } @@ -539,15 +547,15 @@ func (id *UUID) IsZero() bool { // what files shouldn't be restored (those which isn't in the target backup). func uploadFiles( ctx context.Context, - files []pbm.File, + files []types.File, subdir string, trimPrefix string, incr bool, stg storage.Storage, - comprT compress.CompressionType, + comprT defs.CompressionType, comprL *int, l *plog.Event, -) ([]pbm.File, error) { +) ([]types.File, error) { if len(files) == 0 { return nil, nil } @@ -559,11 +567,11 @@ func uploadFiles( } wfile := files[0] - data := []pbm.File{} + data := []types.File{} for _, file := range files[1:] { select { case <-ctx.Done(): - return nil, ErrCancelled + return nil, storage.ErrCancelled default: } @@ -616,13 +624,13 @@ func uploadFiles( func writeFile( ctx context.Context, - src pbm.File, + src types.File, dst string, stg storage.Storage, - compression compress.CompressionType, + compression defs.CompressionType, compressLevel *int, l *plog.Event, -) (*pbm.File, error) { +) (*types.File, error) { fstat, err := os.Stat(src.Name) if err != nil { return nil, errors.Wrap(err, "get file stat") @@ -641,7 +649,7 @@ func writeFile( } l.Debug("uploading: %s %s", src, fmtSize(sz)) - _, err = Upload(ctx, &src, stg, compression, compressLevel, dst, sz) + _, err = storage.Upload(ctx, &src, stg, compression, compressLevel, dst, sz) if err != nil { return nil, errors.Wrap(err, "upload file") } @@ -651,7 +659,7 @@ func writeFile( return nil, errors.Wrapf(err, "get storage file stat %s", dst) } - return &pbm.File{ + return &types.File{ Name: src.Name, Size: fstat.Size(), Fmode: fstat.Mode(), diff --git a/pbm/cleanup.go b/pbm/cleanup.go index 01f1aa310..a385ab8ae 100644 --- a/pbm/cleanup.go +++ b/pbm/cleanup.go @@ -1,32 +1,36 @@ package pbm import ( - "context" - - "github.com/pkg/errors" "go.mongodb.org/mongo-driver/bson" "go.mongodb.org/mongo-driver/bson/primitive" "go.mongodb.org/mongo-driver/mongo" "go.mongodb.org/mongo-driver/mongo/options" - "github.com/percona/percona-backup-mongodb/pbm/sel" + "github.com/percona/percona-backup-mongodb/internal/config" + "github.com/percona/percona-backup-mongodb/internal/connect" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/types" + "github.com/percona/percona-backup-mongodb/internal/util" + "github.com/percona/percona-backup-mongodb/pbm/oplog" ) type CleanupInfo struct { - Backups []BackupMeta `json:"backups"` - Chunks []OplogChunk `json:"chunks"` + Backups []types.BackupMeta `json:"backups"` + Chunks []oplog.OplogChunk `json:"chunks"` } -func MakeCleanupInfo(ctx context.Context, m *mongo.Client, ts primitive.Timestamp) (CleanupInfo, error) { +func MakeCleanupInfo(ctx context.Context, m connect.Client, ts primitive.Timestamp) (CleanupInfo, error) { backups, err := listBackupsBefore(ctx, m, primitive.Timestamp{T: ts.T + 1}) if err != nil { - return CleanupInfo{}, errors.WithMessage(err, "list backups before") + return CleanupInfo{}, errors.Wrap(err, "list backups before") } exclude := true if l := len(backups) - 1; l != -1 && backups[l].LastWriteTS.T == ts.T { // there is a backup at the `ts` - if backups[l].Status == StatusDone && !sel.IsSelective(backups[l].Namespaces) { + if backups[l].Status == defs.StatusDone && !util.IsSelective(backups[l].Namespaces) { // it can be used to fully restore data to the `ts` state. // no need to exclude any base snapshot and chunks before the `ts` exclude = false @@ -39,12 +43,12 @@ func MakeCleanupInfo(ctx context.Context, m *mongo.Client, ts primitive.Timestam // exclude the last incremental backups if it is required for following (after the `ts`) backups, err = extractLastIncrementalChain(ctx, m, backups) if err != nil { - return CleanupInfo{}, errors.WithMessage(err, "extract last incremental chain") + return CleanupInfo{}, errors.Wrap(err, "extract last incremental chain") } chunks, err := listChunksBefore(ctx, m, ts) if err != nil { - return CleanupInfo{}, errors.WithMessage(err, "list chunks before") + return CleanupInfo{}, errors.Wrap(err, "list chunks before") } if !exclude { // all chunks can be deleted. there is a backup to fully restore data @@ -73,9 +77,9 @@ func MakeCleanupInfo(ctx context.Context, m *mongo.Client, ts primitive.Timestam excluded := false origin := chunks - chunks = []OplogChunk{} + chunks = []oplog.OplogChunk{} for i := range origin { - if primitive.CompareTimestamp(backups[baseIndex].LastWriteTS, origin[i].EndTS) != -1 { + if backups[baseIndex].LastWriteTS.Compare(origin[i].EndTS) != -1 { chunks = append(chunks, origin[i]) } else { excluded = true @@ -93,28 +97,28 @@ func MakeCleanupInfo(ctx context.Context, m *mongo.Client, ts primitive.Timestam } // listBackupsBefore returns backups with restore cluster time less than or equals to ts -func listBackupsBefore(ctx context.Context, m *mongo.Client, ts primitive.Timestamp) ([]BackupMeta, error) { +func listBackupsBefore(ctx context.Context, m connect.Client, ts primitive.Timestamp) ([]types.BackupMeta, error) { f := bson.D{{"last_write_ts", bson.M{"$lt": ts}}} o := options.Find().SetSort(bson.D{{"last_write_ts", 1}}) - cur, err := m.Database(DB).Collection(BcpCollection).Find(ctx, f, o) + cur, err := m.BcpCollection().Find(ctx, f, o) if err != nil { - return nil, errors.WithMessage(err, "query") + return nil, errors.Wrap(err, "query") } - rv := []BackupMeta{} + rv := []types.BackupMeta{} err = cur.All(ctx, &rv) - return rv, errors.WithMessage(err, "cursor: all") + return rv, errors.Wrap(err, "cursor: all") } -func canDeleteBaseSnapshot(ctx context.Context, m *mongo.Client, lw primitive.Timestamp) (bool, error) { +func canDeleteBaseSnapshot(ctx context.Context, m connect.Client, lw primitive.Timestamp) (bool, error) { f := bson.D{ {"last_write_ts", bson.M{"$gte": lw}}, {"nss", nil}, - {"type", bson.M{"$ne": ExternalBackup}}, - {"status", StatusDone}, + {"type", bson.M{"$ne": defs.ExternalBackup}}, + {"status", defs.StatusDone}, } o := options.FindOne().SetProjection(bson.D{{"last_write_ts", 1}}) - err := m.Database(DB).Collection(BcpCollection).FindOne(ctx, f, o).Err() + err := m.BcpCollection().FindOne(ctx, f, o).Err() if err == nil { // there is a base snapshot after `lw` return true, nil @@ -124,7 +128,7 @@ func canDeleteBaseSnapshot(ctx context.Context, m *mongo.Client, lw primitive.Ti return false, err } - enabled, oplogOnly, err := isPITREnabled(ctx, m) + enabled, oplogOnly, err := config.IsPITREnabled(ctx, m) if err != nil { return false, err } @@ -135,24 +139,28 @@ func canDeleteBaseSnapshot(ctx context.Context, m *mongo.Client, lw primitive.Ti } // listChunksBefore returns oplog chunks that contain an op at the ts -func listChunksBefore(ctx context.Context, m *mongo.Client, ts primitive.Timestamp) ([]OplogChunk, error) { +func listChunksBefore(ctx context.Context, m connect.Client, ts primitive.Timestamp) ([]oplog.OplogChunk, error) { f := bson.D{{"start_ts", bson.M{"$lt": ts}}} o := options.Find().SetSort(bson.D{{"start_ts", 1}}) - cur, err := m.Database(DB).Collection(PITRChunksCollection).Find(ctx, f, o) + cur, err := m.PITRChunksCollection().Find(ctx, f, o) if err != nil { - return nil, errors.WithMessage(err, "query") + return nil, errors.Wrap(err, "query") } - rv := []OplogChunk{} + rv := []oplog.OplogChunk{} err = cur.All(ctx, &rv) - return rv, errors.WithMessage(err, "cursor: all") + return rv, errors.Wrap(err, "cursor: all") } -func extractLastIncrementalChain(ctx context.Context, m *mongo.Client, bcps []BackupMeta) ([]BackupMeta, error) { +func extractLastIncrementalChain( + ctx context.Context, + m connect.Client, + bcps []types.BackupMeta, +) ([]types.BackupMeta, error) { // lookup for the last incremental i := len(bcps) - 1 for ; i != -1; i-- { - if bcps[i].Type == IncrementalBackup { + if bcps[i].Type == defs.IncrementalBackup { break } } @@ -163,13 +171,13 @@ func extractLastIncrementalChain(ctx context.Context, m *mongo.Client, bcps []Ba // check if there is an increment based on the backup f := bson.D{{"src_backup", bcps[i].Name}} - res := m.Database(DB).Collection(BcpCollection).FindOne(ctx, f) + res := m.BcpCollection().FindOne(ctx, f) if err := res.Err(); err != nil { if errors.Is(err, mongo.ErrNoDocuments) { // the backup is the last increment in the chain err = nil } - return bcps, errors.WithMessage(err, "query") + return bcps, errors.Wrap(err, "query") } for base := bcps[i].Name; i != -1; i-- { @@ -191,7 +199,7 @@ func extractLastIncrementalChain(ctx context.Context, m *mongo.Client, bcps []Ba return bcps, nil } -func findLastBaseSnapshotIndex(bcps []BackupMeta) int { +func findLastBaseSnapshotIndex(bcps []types.BackupMeta) int { for i := len(bcps) - 1; i != -1; i-- { if isBaseSnapshot(&bcps[i]) { return i @@ -201,11 +209,11 @@ func findLastBaseSnapshotIndex(bcps []BackupMeta) int { return -1 } -func isBaseSnapshot(bcp *BackupMeta) bool { - if bcp.Status != StatusDone { +func isBaseSnapshot(bcp *types.BackupMeta) bool { + if bcp.Status != defs.StatusDone { return false } - if bcp.Type == ExternalBackup || sel.IsSelective(bcp.Namespaces) { + if bcp.Type == defs.ExternalBackup || util.IsSelective(bcp.Namespaces) { return false } diff --git a/pbm/cmd.go b/pbm/cmd.go deleted file mode 100644 index 3cae4c63a..000000000 --- a/pbm/cmd.go +++ /dev/null @@ -1,99 +0,0 @@ -package pbm - -import ( - "time" - - "github.com/pkg/errors" - "go.mongodb.org/mongo-driver/bson" -) - -type CursorClosedError struct { - Err error -} - -func (c CursorClosedError) Error() string { - return "cursor was closed with:" + c.Err.Error() -} - -func (c CursorClosedError) Is(err error) bool { - if err == nil { - return false - } - - _, ok := err.(CursorClosedError) //nolint:errorlint - return ok -} - -func (c CursorClosedError) Unwrap() error { - return c.Err -} - -func (p *PBM) ListenCmd(cl <-chan struct{}) (<-chan Cmd, <-chan error) { - cmd := make(chan Cmd) - errc := make(chan error) - - go func() { - defer close(cmd) - defer close(errc) - - ts := time.Now().UTC().Unix() - var lastTS int64 - var lastCmd Command - for { - select { - case <-cl: - return - default: - } - cur, err := p.Conn.Database(DB).Collection(CmdStreamCollection).Find( - p.ctx, - bson.M{"ts": bson.M{"$gte": ts}}, - ) - if err != nil { - errc <- errors.Wrap(err, "watch the cmd stream") - continue - } - - for cur.Next(p.ctx) { - c := Cmd{} - err := cur.Decode(&c) - if err != nil { - errc <- errors.Wrap(err, "message decode") - continue - } - - if c.Cmd == lastCmd && c.TS == lastTS { - continue - } - - opid, ok := cur.Current.Lookup("_id").ObjectIDOK() - if !ok { - errc <- errors.New("unable to get operation ID") - continue - } - - c.OPID = OPID(opid) - - lastCmd = c.Cmd - lastTS = c.TS - cmd <- c - ts = time.Now().UTC().Unix() - } - if err := cur.Err(); err != nil { - errc <- CursorClosedError{err} - cur.Close(p.ctx) - return - } - cur.Close(p.ctx) - time.Sleep(time.Second * 1) - } - }() - - return cmd, errc -} - -func (p *PBM) SendCmd(cmd Cmd) error { - cmd.TS = time.Now().UTC().Unix() - _, err := p.Conn.Database(DB).Collection(CmdStreamCollection).InsertOne(p.ctx, cmd) - return err -} diff --git a/pbm/delete.go b/pbm/delete.go index e2b790b51..9662f56de 100644 --- a/pbm/delete.go +++ b/pbm/delete.go @@ -4,36 +4,42 @@ import ( "fmt" "time" - "github.com/pkg/errors" "go.mongodb.org/mongo-driver/bson" "go.mongodb.org/mongo-driver/bson/primitive" "golang.org/x/sync/errgroup" - "github.com/percona/percona-backup-mongodb/pbm/log" - "github.com/percona/percona-backup-mongodb/pbm/sel" - "github.com/percona/percona-backup-mongodb/pbm/storage" - "github.com/percona/percona-backup-mongodb/version" + "github.com/percona/percona-backup-mongodb/internal/config" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/log" + "github.com/percona/percona-backup-mongodb/internal/query" + "github.com/percona/percona-backup-mongodb/internal/storage" + "github.com/percona/percona-backup-mongodb/internal/types" + "github.com/percona/percona-backup-mongodb/internal/util" + "github.com/percona/percona-backup-mongodb/internal/version" + "github.com/percona/percona-backup-mongodb/pbm/oplog" ) // DeleteBackup deletes backup with the given name from the current storage // and pbm database -func (p *PBM) DeleteBackup(name string, l *log.Event) error { - meta, err := p.GetBackupMeta(name) +func (p *PBM) DeleteBackup(ctx context.Context, name string, l *log.Event) error { + meta, err := query.GetBackupMeta(ctx, p.Conn, name) if err != nil { return errors.Wrap(err, "get backup meta") } - tlns, err := p.PITRTimelines() + tlns, err := oplog.PITRTimelines(ctx, p.Conn) if err != nil { return errors.Wrap(err, "get PITR chunks") } - err = p.probeDelete(meta, tlns) + err = p.probeDelete(ctx, meta, tlns) if err != nil { return err } - stg, err := p.GetStorage(l) + stg, err := util.GetStorage(ctx, p.Conn, l) if err != nil { return errors.Wrap(err, "get storage") } @@ -43,7 +49,7 @@ func (p *PBM) DeleteBackup(name string, l *log.Event) error { return errors.Wrap(err, "delete files from storage") } - _, err = p.Conn.Database(DB).Collection(BcpCollection).DeleteOne(p.ctx, bson.M{"name": meta.Name}) + _, err = p.Conn.BcpCollection().DeleteOne(ctx, bson.M{"name": meta.Name}) if err != nil { return errors.Wrap(err, "delete metadata from db") } @@ -51,15 +57,15 @@ func (p *PBM) DeleteBackup(name string, l *log.Event) error { return nil } -func (p *PBM) probeDelete(backup *BackupMeta, tlns []Timeline) error { +func (p *PBM) probeDelete(ctx context.Context, backup *types.BackupMeta, tlns []oplog.Timeline) error { // check if backup isn't running switch backup.Status { - case StatusDone, StatusCancelled, StatusError: + case defs.StatusDone, defs.StatusCancelled, defs.StatusError: default: return errors.Errorf("unable to delete backup in %s state", backup.Status) } - if backup.Type == ExternalBackup || sel.IsSelective(backup.Namespaces) { + if backup.Type == defs.ExternalBackup || util.IsSelective(backup.Namespaces) { return nil } @@ -70,7 +76,7 @@ func (p *PBM) probeDelete(backup *BackupMeta, tlns []Timeline) error { } } - ispitr, err := p.IsPITR() + ispitr, _, err := config.IsPITREnabled(ctx, p.Conn) if err != nil { return errors.Wrap(err, "unable check pitr state") } @@ -80,7 +86,7 @@ func (p *PBM) probeDelete(backup *BackupMeta, tlns []Timeline) error { return nil } - has, err := p.BackupHasNext(backup) + has, err := query.BackupHasNext(ctx, p.Conn, backup) if err != nil { return errors.Wrap(err, "check next backup") } @@ -92,11 +98,11 @@ func (p *PBM) probeDelete(backup *BackupMeta, tlns []Timeline) error { } // DeleteBackupFiles removes backup's artifacts from storage -func (p *PBM) DeleteBackupFiles(meta *BackupMeta, stg storage.Storage) error { +func (p *PBM) DeleteBackupFiles(meta *types.BackupMeta, stg storage.Storage) error { switch meta.Type { - case PhysicalBackup, IncrementalBackup: + case defs.PhysicalBackup, defs.IncrementalBackup: return p.deletePhysicalBackupFiles(meta, stg) - case LogicalBackup: + case defs.LogicalBackup: fallthrough default: var err error @@ -111,7 +117,7 @@ func (p *PBM) DeleteBackupFiles(meta *BackupMeta, stg storage.Storage) error { } // DeleteBackupFiles removes backup's artifacts from storage -func (p *PBM) deletePhysicalBackupFiles(meta *BackupMeta, stg storage.Storage) error { +func (p *PBM) deletePhysicalBackupFiles(meta *types.BackupMeta, stg storage.Storage) error { for _, r := range meta.Replsets { for _, f := range r.Files { fname := meta.Name + "/" + r.Name + "/" + f.Name + meta.Compression.Suffix() @@ -135,7 +141,7 @@ func (p *PBM) deletePhysicalBackupFiles(meta *BackupMeta, stg storage.Storage) e } } - err := stg.Delete(meta.Name + MetadataFileSuffix) + err := stg.Delete(meta.Name + defs.MetadataFileSuffix) if errors.Is(err, storage.ErrNotExist) { return nil } @@ -144,7 +150,7 @@ func (p *PBM) deletePhysicalBackupFiles(meta *BackupMeta, stg storage.Storage) e } // deleteLogicalBackupFiles removes backup's artifacts from storage -func (p *PBM) deleteLogicalBackupFiles(meta *BackupMeta, stg storage.Storage) error { +func (p *PBM) deleteLogicalBackupFiles(meta *types.BackupMeta, stg storage.Storage) error { if stg.Type() == storage.Filesystem { return p.deleteLogicalBackupFilesFromFS(stg, meta.Name) } @@ -152,36 +158,36 @@ func (p *PBM) deleteLogicalBackupFiles(meta *BackupMeta, stg storage.Storage) er prefix := meta.Name + "/" files, err := stg.List(prefix, "") if err != nil { - return errors.WithMessagef(err, "get file list: %q", prefix) + return errors.Wrapf(err, "get file list: %q", prefix) } eg := errgroup.Group{} for _, f := range files { ns := prefix + f.Name eg.Go(func() error { - return errors.WithMessagef(stg.Delete(ns), "delete %q", ns) + return errors.Wrapf(stg.Delete(ns), "delete %q", ns) }) } if err := eg.Wait(); err != nil { return err } - bcpMF := meta.Name + MetadataFileSuffix - return errors.WithMessagef(stg.Delete(bcpMF), "delete %q", bcpMF) + bcpMF := meta.Name + defs.MetadataFileSuffix + return errors.Wrapf(stg.Delete(bcpMF), "delete %q", bcpMF) } // deleteLogicalBackupFiles removes backup's artifacts from storage func (p *PBM) deleteLogicalBackupFilesFromFS(stg storage.Storage, bcpName string) error { if err := stg.Delete(bcpName); err != nil { - return errors.WithMessagef(err, "delete %q", bcpName) + return errors.Wrapf(err, "delete %q", bcpName) } - bcpMetafile := bcpName + MetadataFileSuffix - return errors.WithMessagef(stg.Delete(bcpMetafile), "delete %q", bcpMetafile) + bcpMetafile := bcpName + defs.MetadataFileSuffix + return errors.Wrapf(stg.Delete(bcpMetafile), "delete %q", bcpMetafile) } // deleteLegacyLogicalBackupFiles removes backup's artifacts from storage -func (p *PBM) deleteLegacyLogicalBackupFiles(meta *BackupMeta, stg storage.Storage) error { +func (p *PBM) deleteLegacyLogicalBackupFiles(meta *types.BackupMeta, stg storage.Storage) error { for _, r := range meta.Replsets { err := stg.Delete(r.OplogName) if err != nil && !errors.Is(err, storage.ErrNotExist) { @@ -193,7 +199,7 @@ func (p *PBM) deleteLegacyLogicalBackupFiles(meta *BackupMeta, stg storage.Stora } } - err := stg.Delete(meta.Name + MetadataFileSuffix) + err := stg.Delete(meta.Name + defs.MetadataFileSuffix) if errors.Is(err, storage.ErrNotExist) { return nil } @@ -202,19 +208,19 @@ func (p *PBM) deleteLegacyLogicalBackupFiles(meta *BackupMeta, stg storage.Stora } // DeleteOlderThan deletes backups which older than given Time -func (p *PBM) DeleteOlderThan(t time.Time, l *log.Event) error { - stg, err := p.GetStorage(l) +func (p *PBM) DeleteOlderThan(ctx context.Context, t time.Time, l *log.Event) error { + stg, err := util.GetStorage(ctx, p.Conn, l) if err != nil { return errors.Wrap(err, "get storage") } - tlns, err := p.PITRTimelines() + tlns, err := oplog.PITRTimelines(ctx, p.Conn) if err != nil { return errors.Wrap(err, "get PITR chunks") } - cur, err := p.Conn.Database(DB).Collection(BcpCollection).Find( - p.ctx, + cur, err := p.Conn.BcpCollection().Find( + ctx, bson.M{ "start_ts": bson.M{"$lt": t.Unix()}, }, @@ -222,16 +228,16 @@ func (p *PBM) DeleteOlderThan(t time.Time, l *log.Event) error { if err != nil { return errors.Wrap(err, "get backups list") } - defer cur.Close(p.ctx) + defer cur.Close(ctx) - for cur.Next(p.ctx) { - m := &BackupMeta{} + for cur.Next(ctx) { + m := &types.BackupMeta{} err := cur.Decode(m) if err != nil { return errors.Wrap(err, "decode backup meta") } - err = p.probeDelete(m, tlns) + err = p.probeDelete(ctx, m, tlns) if err != nil { l.Info("deleting %s: %v", m.Name, err) continue @@ -242,7 +248,7 @@ func (p *PBM) DeleteOlderThan(t time.Time, l *log.Event) error { return errors.Wrap(err, "delete backup files from storage") } - _, err = p.Conn.Database(DB).Collection(BcpCollection).DeleteOne(p.ctx, bson.M{"name": m.Name}) + _, err = p.Conn.BcpCollection().DeleteOne(ctx, bson.M{"name": m.Name}) if err != nil { return errors.Wrap(err, "delete backup meta from db") } @@ -261,38 +267,43 @@ func (p *PBM) DeleteOlderThan(t time.Time, l *log.Event) error { // backup is `10` it will leave `11` and `12` chunks as well since `13` won't be restorable // without `11` and `12` (contiguous timeline from the backup). // It deletes all chunks if `until` is nil. -func (p *PBM) DeletePITR(until *time.Time, l *log.Event) error { - stg, err := p.GetStorage(l) +func (p *PBM) DeletePITR(ctx context.Context, until *time.Time, l *log.Event) error { + stg, err := util.GetStorage(ctx, p.Conn, l) if err != nil { return errors.Wrap(err, "get storage") } var zerots primitive.Timestamp if until == nil { - return p.deleteChunks(zerots, zerots, stg, l) + return p.deleteChunks(ctx, zerots, zerots, stg, l) } t := primitive.Timestamp{T: uint32(until.Unix()), I: 0} - bcp, err := p.GetLastBackup(&t) - if errors.Is(err, ErrNotFound) { - return p.deleteChunks(zerots, t, stg, l) + bcp, err := query.GetLastBackup(ctx, p.Conn, &t) + if errors.Is(err, errors.ErrNotFound) { + return p.deleteChunks(ctx, zerots, t, stg, l) } if err != nil { return errors.Wrap(err, "get recent backup") } - return p.deleteChunks(zerots, bcp.LastWriteTS, stg, l) + return p.deleteChunks(ctx, zerots, bcp.LastWriteTS, stg, l) } -func (p *PBM) deleteChunks(start, until primitive.Timestamp, stg storage.Storage, l *log.Event) error { - var chunks []OplogChunk +func (p *PBM) deleteChunks( + ctx context.Context, + start, until primitive.Timestamp, + stg storage.Storage, + l *log.Event, +) error { + var chunks []oplog.OplogChunk var err error if until.T > 0 { - chunks, err = p.PITRGetChunksSliceUntil("", until) + chunks, err = oplog.PITRGetChunksSliceUntil(ctx, p.Conn, "", until) } else { - chunks, err = p.PITRGetChunksSlice("", start, until) + chunks, err = oplog.PITRGetChunksSlice(ctx, p.Conn, "", start, until) } if err != nil { return errors.Wrap(err, "get pitr chunks") @@ -307,8 +318,8 @@ func (p *PBM) deleteChunks(start, until primitive.Timestamp, stg storage.Storage return errors.Wrapf(err, "delete pitr chunk '%s' (%v) from storage", chnk.FName, chnk) } - _, err = p.Conn.Database(DB).Collection(PITRChunksCollection).DeleteOne( - p.ctx, + _, err = p.Conn.PITRChunksCollection().DeleteOne( + ctx, bson.D{ {"rs", chnk.RS}, {"start_ts", chnk.StartTS}, diff --git a/pbm/node.go b/pbm/node.go index a78cfb7f4..8885424ef 100644 --- a/pbm/node.go +++ b/pbm/node.go @@ -1,55 +1,41 @@ package pbm import ( - "context" "fmt" "strings" "time" - "github.com/pkg/errors" "go.mongodb.org/mongo-driver/bson" "go.mongodb.org/mongo-driver/bson/primitive" "go.mongodb.org/mongo-driver/mongo" "go.mongodb.org/mongo-driver/mongo/options" - "go.mongodb.org/mongo-driver/mongo/readconcern" + + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/topo" + "github.com/percona/percona-backup-mongodb/internal/types" ) type Node struct { rs string me string - ctx context.Context cn *mongo.Client curi string dumpConns int } -// ReplsetRole is a replicaset role in sharded cluster -type ReplsetRole string - -const ( - RoleUnknown ReplsetRole = "unknown" - RoleShard ReplsetRole = "shard" - RoleConfigSrv ReplsetRole = "configsrv" - - // TmpUsersCollection and TmpRoles are tmp collections used to avoid - // user related issues while resoring on new cluster and preserving UUID - // See https://jira.percona.com/browse/PBM-425, https://jira.percona.com/browse/PBM-636 - TmpUsersCollection = `pbmRUsers` - TmpRolesCollection = `pbmRRoles` -) - func NewNode(ctx context.Context, curi string, dumpConns int) (*Node, error) { n := &Node{ - ctx: ctx, curi: curi, dumpConns: dumpConns, } - err := n.Connect() + err := n.Connect(ctx) if err != nil { return nil, errors.Wrap(err, "connect") } - nodeInfo, err := n.GetInfo() + nodeInfo, err := topo.GetNodeInfoExt(ctx, n.Session()) if err != nil { return nil, errors.Wrap(err, "get node info") } @@ -73,14 +59,14 @@ func (n *Node) Name() string { return n.me } -func (n *Node) Connect() error { - conn, err := n.connect(true) +func (n *Node) Connect(ctx context.Context) error { + conn, err := n.connect(ctx, true) if err != nil { return err } if n.cn != nil { - err = n.cn.Disconnect(n.ctx) + err = n.cn.Disconnect(ctx) if err != nil { return errors.Wrap(err, "close existing connection") } @@ -90,17 +76,16 @@ func (n *Node) Connect() error { return nil } -func (n *Node) connect(direct bool) (*mongo.Client, error) { - conn, err := mongo.NewClient(options.Client().ApplyURI(n.curi).SetAppName("pbm-agent-exec").SetDirect(direct)) - if err != nil { - return nil, errors.Wrap(err, "create mongo client") - } - err = conn.Connect(n.ctx) +func (n *Node) connect(ctx context.Context, direct bool) (*mongo.Client, error) { + opts := options.Client().ApplyURI(n.curi). + SetAppName("pbm-agent-exec"). + SetDirect(direct) + conn, err := mongo.Connect(ctx, opts) if err != nil { return nil, errors.Wrap(err, "connect") } - err = conn.Ping(n.ctx, nil) + err = conn.Ping(ctx, nil) if err != nil { return nil, errors.Wrap(err, "ping") } @@ -108,21 +93,6 @@ func (n *Node) connect(direct bool) (*mongo.Client, error) { return conn, nil } -func (n *Node) GetInfo() (*NodeInfo, error) { - i, err := GetNodeInfo(n.ctx, n.cn) - if err != nil { - return nil, errors.Wrap(err, "get NodeInfo") - } - opts, err := n.GetOpts(nil) - if err != nil { - return nil, errors.Wrap(err, "get mongod options") - } - if opts != nil { - i.opts = *opts - } - return i, nil -} - // DBSize returns the total size in bytes of a specific db files on disk on replicaset. // If db is empty string, returns total size for all databases. func (n *Node) DBSize(ctx context.Context, db string) (int64, error) { @@ -140,8 +110,8 @@ func (n *Node) DBSize(ctx context.Context, db string) (int64, error) { } // IsSharded return true if node is part of the sharded cluster (in shard or configsrv replset). -func (n *Node) IsSharded() (bool, error) { - i, err := n.GetInfo() +func (n *Node) IsSharded(ctx context.Context) (bool, error) { + i, err := topo.GetNodeInfoExt(ctx, n.Session()) if err != nil { return false, err } @@ -149,38 +119,8 @@ func (n *Node) IsSharded() (bool, error) { return i.IsSharded(), nil } -func (n *Node) GetMongoVersion() (*MongoVersion, error) { - ver, err := GetMongoVersion(n.ctx, n.cn) - return &ver, err -} - -func (n *Node) GetFeatureCompatibilityVersion() (string, error) { - return getFeatureCompatibilityVersion(n.ctx, n.cn) -} - -func getFeatureCompatibilityVersion(ctx context.Context, m *mongo.Client) (string, error) { - res := m.Database("admin").RunCommand(ctx, bson.D{ - {"getParameter", 1}, - {"featureCompatibilityVersion", 1}, - }) - if err := res.Err(); err != nil { - return "", errors.WithMessage(err, "query") - } - - var ver struct{ FeatureCompatibilityVersion struct{ Version string } } - if err := res.Decode(&ver); err != nil { - return "", errors.WithMessage(err, "decode") - } - - return ver.FeatureCompatibilityVersion.Version, nil -} - -func (n *Node) GetReplsetStatus() (*ReplsetStatus, error) { - return GetReplsetStatus(n.ctx, n.cn) -} - -func (n *Node) Status() (*NodeStatus, error) { - s, err := n.GetReplsetStatus() +func (n *Node) Status(ctx context.Context) (*topo.NodeStatus, error) { + s, err := topo.GetReplsetStatus(ctx, n.Session()) if err != nil { return nil, errors.Wrap(err, "get replset status") } @@ -193,29 +133,12 @@ func (n *Node) Status() (*NodeStatus, error) { } } - return nil, ErrNotFound + return nil, errors.ErrNotFound } // ReplicationLag returns node replication lag in seconds -func (n *Node) ReplicationLag() (int, error) { - s, err := n.GetReplsetStatus() - if err != nil { - return -1, errors.Wrap(err, "get replset status") - } - - name := n.Name() - - var primaryOptime, nodeOptime int - for _, m := range s.Members { - if m.Name == name { - nodeOptime = int(m.Optime.TS.T) - } - if m.StateStr == "PRIMARY" { - primaryOptime = int(m.Optime.TS.T) - } - } - - return primaryOptime - nodeOptime, nil +func (n *Node) ReplicationLag(ctx context.Context) (int, error) { + return topo.ReplicationLag(ctx, n.Session(), n.Name()) } func (n *Node) ConnURI() string { @@ -230,9 +153,9 @@ func (n *Node) Session() *mongo.Client { return n.cn } -func (n *Node) CurrentUser() (*AuthInfo, error) { - c := &ConnectionStatus{} - err := n.cn.Database(DB).RunCommand(n.ctx, bson.D{{"connectionStatus", 1}}).Decode(c) +func (n *Node) CurrentUser(ctx context.Context) (*types.AuthInfo, error) { + c := &types.ConnectionStatus{} + err := n.cn.Database(defs.DB).RunCommand(ctx, bson.D{{"connectionStatus", 1}}).Decode(c) if err != nil { return nil, errors.Wrap(err, "run mongo command connectionStatus") } @@ -240,14 +163,14 @@ func (n *Node) CurrentUser() (*AuthInfo, error) { return &c.AuthInfo, nil } -func (n *Node) DropTMPcoll() error { - cn, err := n.connect(false) +func (n *Node) DropTMPcoll(ctx context.Context) error { + cn, err := n.connect(ctx, false) if err != nil { return errors.Wrap(err, "connect to primary") } - defer cn.Disconnect(n.ctx) //nolint:errcheck + defer cn.Disconnect(ctx) //nolint:errcheck - err = DropTMPcoll(n.ctx, cn) + err = DropTMPcoll(ctx, cn) if err != nil { return err } @@ -256,26 +179,26 @@ func (n *Node) DropTMPcoll() error { } func DropTMPcoll(ctx context.Context, cn *mongo.Client) error { - err := cn.Database(DB).Collection(TmpRolesCollection).Drop(ctx) + err := cn.Database(defs.DB).Collection(defs.TmpRolesCollection).Drop(ctx) if err != nil { - return errors.Wrapf(err, "drop collection %s", TmpRolesCollection) + return errors.Wrapf(err, "drop collection %s", defs.TmpRolesCollection) } - err = cn.Database(DB).Collection(TmpUsersCollection).Drop(ctx) + err = cn.Database(defs.DB).Collection(defs.TmpUsersCollection).Drop(ctx) if err != nil { - return errors.Wrapf(err, "drop collection %s", TmpUsersCollection) + return errors.Wrapf(err, "drop collection %s", defs.TmpUsersCollection) } return nil } -func (n *Node) WaitForWrite(ts primitive.Timestamp) error { +func (n *Node) WaitForWrite(ctx context.Context, ts primitive.Timestamp) error { var lw primitive.Timestamp var err error for i := 0; i < 21; i++ { - lw, err = LastWrite(n.cn, false) - if err == nil && primitive.CompareTimestamp(lw, ts) >= 0 { + lw, err = topo.GetLastWrite(ctx, n.Session(), false) + if err == nil && lw.Compare(ts) >= 0 { return nil } time.Sleep(time.Second * 1) @@ -288,138 +211,63 @@ func (n *Node) WaitForWrite(ts primitive.Timestamp) error { return errors.New("run out of time") } -func LastWrite(cn *mongo.Client, majority bool) (primitive.Timestamp, error) { - inf, err := GetNodeInfo(context.TODO(), cn) - if err != nil { - return primitive.Timestamp{}, errors.Wrap(err, "get NodeInfo data") - } - lw := inf.LastWrite.MajorityOpTime.TS - if !majority { - lw = inf.LastWrite.OpTime.TS - } - if lw.T == 0 { - return primitive.Timestamp{}, errors.New("last write timestamp is nil") - } - return lw, nil -} - -// OplogStartTime returns either the oldest active transaction timestamp or the -// current oplog time if there are no active transactions. -// taken from https://github.com/mongodb/mongo-tools/blob/1b496c4a8ff7415abc07b9621166d8e1fac00c91/mongodump/oplog_dump.go#L68 -// -//nolint:lll -func (n *Node) OplogStartTime() (primitive.Timestamp, error) { - coll := n.cn.Database("config").Collection("transactions", options.Collection().SetReadConcern(readconcern.Local())) - filter := bson.D{{"state", bson.D{{"$in", bson.A{"prepared", "inProgress"}}}}} - opts := options.FindOne().SetSort(bson.D{{"startOpTime", 1}}) - - var result bson.Raw - res := coll.FindOne(context.Background(), filter, opts) - err := res.Decode(&result) - if err != nil { - if errors.Is(err, mongo.ErrNoDocuments) { - return LastWrite(n.cn, true) - } - return primitive.Timestamp{}, fmt.Errorf("config.transactions.findOne error: %w", err) - } - - rawTS, err := result.LookupErr("startOpTime", "ts") - if err != nil { - return primitive.Timestamp{}, errors.New("config.transactions row had no startOpTime.ts field") - } - - t, i, ok := rawTS.TimestampOK() - if !ok { - return primitive.Timestamp{}, errors.New("config.transactions startOpTime.ts was not a BSON timestamp") - } - - return primitive.Timestamp{T: t, I: i}, nil -} - //nolint:nonamedreturns -func (n *Node) CopyUsersNRolles() (lastWrite primitive.Timestamp, err error) { - cn, err := n.connect(false) +func (n *Node) CopyUsersNRolles(ctx context.Context) (lastWrite primitive.Timestamp, err error) { + cn, err := n.connect(ctx, false) if err != nil { return lastWrite, errors.Wrap(err, "connect to primary") } - defer cn.Disconnect(n.ctx) //nolint:errcheck + defer cn.Disconnect(ctx) //nolint:errcheck - err = DropTMPcoll(n.ctx, cn) + err = DropTMPcoll(ctx, cn) if err != nil { return lastWrite, errors.Wrap(err, "drop tmp collections before copy") } - _, err = CopyColl(n.ctx, + err = copyColl(ctx, cn.Database("admin").Collection("system.roles"), - cn.Database(DB).Collection(TmpRolesCollection), + cn.Database(defs.DB).Collection(defs.TmpRolesCollection), bson.M{}, ) if err != nil { return lastWrite, errors.Wrap(err, "copy admin.system.roles") } - _, err = CopyColl(n.ctx, + err = copyColl(ctx, cn.Database("admin").Collection("system.users"), - cn.Database(DB).Collection(TmpUsersCollection), + cn.Database(defs.DB).Collection(defs.TmpUsersCollection), bson.M{}, ) if err != nil { return lastWrite, errors.Wrap(err, "copy admin.system.users") } - return LastWrite(cn, false) + return topo.GetLastWrite(ctx, cn, false) } -func (n *Node) GetOpts(defaults *MongodOpts) (*MongodOpts, error) { - opts := struct { - Parsed MongodOpts `bson:"parsed" json:"parsed"` - }{} - if defaults != nil { - opts.Parsed = *defaults - } - err := n.cn.Database("admin").RunCommand(n.ctx, bson.D{{"getCmdLineOpts", 1}}).Decode(&opts) +// copyColl copy documents matching the given filter and return number of copied documents +func copyColl(ctx context.Context, from, to *mongo.Collection, filter any) error { + cur, err := from.Find(ctx, filter) if err != nil { - return nil, errors.Wrap(err, "run mongo command") + return errors.Wrap(err, "create cursor") } - return &opts.Parsed, nil -} + defer cur.Close(ctx) -func (n *Node) GetRSconf() (*RSConfig, error) { - return GetReplSetConfig(n.ctx, n.cn) -} + n := 0 + for cur.Next(ctx) { + _, err = to.InsertOne(ctx, cur.Current) + if err != nil { + return errors.Wrap(err, "insert document") + } + n++ + } -func (n *Node) ConfSvrConn() (string, error) { - return ConfSvrConn(n.ctx, n.cn) + return nil } -func (n *Node) Shutdown() error { - err := n.cn.Database("admin").RunCommand(n.ctx, bson.D{{"shutdown", 1}}).Err() +func (n *Node) Shutdown(ctx context.Context) error { + err := n.cn.Database("admin").RunCommand(ctx, bson.D{{"shutdown", 1}}).Err() if err == nil || strings.Contains(err.Error(), "socket was unexpectedly closed") { return nil } return err } - -func GetNodeInfo(ctx context.Context, m *mongo.Client) (*NodeInfo, error) { - res := m.Database(DB).RunCommand(ctx, bson.D{{"isMaster", 1}}) - if err := res.Err(); err != nil { - return nil, errors.WithMessage(err, "cmd: isMaster") - } - - n := &NodeInfo{} - err := res.Decode(&n) - return n, errors.WithMessage(err, "decode") -} - -func GetReplSetConfig(ctx context.Context, m *mongo.Client) (*RSConfig, error) { - res := m.Database("admin").RunCommand(ctx, bson.D{{"replSetGetConfig", 1}}) - if err := res.Err(); err != nil { - return nil, errors.WithMessage(err, "run command") - } - - val := struct{ Config *RSConfig }{} - if err := res.Decode(&val); err != nil { - return nil, errors.WithMessage(err, "decode") - } - - return val.Config, nil -} diff --git a/pbm/oplog/backup.go b/pbm/oplog/backup.go index 121b9254c..61b38450b 100644 --- a/pbm/oplog/backup.go +++ b/pbm/oplog/backup.go @@ -1,17 +1,19 @@ package oplog import ( - "context" "fmt" "io" - "github.com/pkg/errors" + "github.com/percona/percona-backup-mongodb/internal/context" + "go.mongodb.org/mongo-driver/bson" "go.mongodb.org/mongo-driver/bson/primitive" "go.mongodb.org/mongo-driver/mongo" "go.mongodb.org/mongo-driver/mongo/options" - "github.com/percona/percona-backup-mongodb/pbm" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/topo" ) // OplogBackup is used for reading the Mongodb oplog @@ -111,12 +113,12 @@ func (ot *OplogBackup) WriteTo(w io.Writer) (int64, error) { rcheck = true } - if primitive.CompareTimestamp(ot.end, opts) == -1 { + if ot.end.Compare(opts) == -1 { return written, nil } // skip noop operations - if cur.Current.Lookup("op").String() == string(pbm.OperationNoop) { + if cur.Current.Lookup("op").String() == string(defs.OperationNoop) { continue } @@ -156,6 +158,6 @@ func (ot *OplogBackup) IsSufficient(from primitive.Timestamp) (bool, error) { } // LastWrite returns a timestamp of the last write operation readable by majority reads -func (ot *OplogBackup) LastWrite() (primitive.Timestamp, error) { - return pbm.LastWrite(ot.cl, true) +func (ot *OplogBackup) LastWrite(ctx context.Context) (primitive.Timestamp, error) { + return topo.GetLastWrite(ctx, ot.cl, true) } diff --git a/pbm/pitr.go b/pbm/oplog/chunk.go similarity index 68% rename from pbm/pitr.go rename to pbm/oplog/chunk.go index a71626325..7f61ea504 100644 --- a/pbm/pitr.go +++ b/pbm/oplog/chunk.go @@ -1,7 +1,6 @@ -package pbm +package oplog import ( - "context" "fmt" "path" "sort" @@ -9,96 +8,48 @@ import ( "strings" "time" - "github.com/pkg/errors" "go.mongodb.org/mongo-driver/bson" "go.mongodb.org/mongo-driver/bson/primitive" "go.mongodb.org/mongo-driver/mongo" "go.mongodb.org/mongo-driver/mongo/options" - "github.com/percona/percona-backup-mongodb/pbm/compress" -) - -const ( - // PITRdefaultSpan oplog slicing time span - PITRdefaultSpan = time.Minute * 10 - // PITRfsPrefix is a prefix (folder) for PITR chunks on the storage - PITRfsPrefix = "pbmPitr" + "github.com/percona/percona-backup-mongodb/internal/compress" + "github.com/percona/percona-backup-mongodb/internal/connect" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/topo" ) // OplogChunk is index metadata for the oplog chunks type OplogChunk struct { - RS string `bson:"rs"` - FName string `bson:"fname"` - Compression compress.CompressionType `bson:"compression"` - StartTS primitive.Timestamp `bson:"start_ts"` - EndTS primitive.Timestamp `bson:"end_ts"` - Size int64 `bson:"size"` -} - -// IsPITR checks if PITR is enabled -func (p *PBM) IsPITR() (bool, error) { - enabled, _, err := isPITREnabled(p.ctx, p.Conn) - return enabled, err -} - -func isPITREnabled(ctx context.Context, m *mongo.Client) (bool, bool, error) { - cfg, err := getPBMConfig(ctx, m) - if err != nil { - if errors.Is(err, mongo.ErrNoDocuments) { - return false, false, nil - } - - return false, false, errors.Wrap(err, "get config") - } - - return cfg.PITR.Enabled, cfg.PITR.OplogOnly, nil -} - -// PITRrun checks if PITR slicing is running. It looks for PITR locks -// and returns true if there is at least one not stale. -func (p *PBM) PITRrun() (bool, error) { - l, err := p.GetLocks(&LockHeader{Type: CmdPITR}) - if errors.Is(err, mongo.ErrNoDocuments) || len(l) == 0 { - return false, nil - } - - if err != nil { - return false, errors.Wrap(err, "get locks") - } - - ct, err := p.ClusterTime() - if err != nil { - return false, errors.Wrap(err, "get cluster time") - } - - for _, lk := range l { - if lk.Heartbeat.T+StaleFrameSec >= ct.T { - return true, nil - } - } - - return false, nil + RS string `bson:"rs"` + FName string `bson:"fname"` + Compression defs.CompressionType `bson:"compression"` + StartTS primitive.Timestamp `bson:"start_ts"` + EndTS primitive.Timestamp `bson:"end_ts"` + Size int64 `bson:"size"` } // PITRLastChunkMeta returns the most recent PITR chunk for the given Replset -func (p *PBM) PITRLastChunkMeta(rs string) (*OplogChunk, error) { - return p.pitrChunk(rs, -1) +func PITRLastChunkMeta(ctx context.Context, m connect.Client, rs string) (*OplogChunk, error) { + return pitrChunk(ctx, m, rs, -1) } // PITRFirstChunkMeta returns the oldest PITR chunk for the given Replset -func (p *PBM) PITRFirstChunkMeta(rs string) (*OplogChunk, error) { - return p.pitrChunk(rs, 1) +func PITRFirstChunkMeta(ctx context.Context, m connect.Client, rs string) (*OplogChunk, error) { + return pitrChunk(ctx, m, rs, 1) } -func (p *PBM) pitrChunk(rs string, sort int) (*OplogChunk, error) { - res := p.Conn.Database(DB).Collection(PITRChunksCollection).FindOne( - p.ctx, +func pitrChunk(ctx context.Context, m connect.Client, rs string, sort int) (*OplogChunk, error) { + res := m.PITRChunksCollection().FindOne( + ctx, bson.D{{"rs", rs}}, options.FindOne().SetSort(bson.D{{"start_ts", sort}}), ) if err := res.Err(); err != nil { if errors.Is(err, mongo.ErrNoDocuments) { - return nil, ErrNotFound + return nil, errors.ErrNotFound } return nil, errors.Wrap(err, "get") } @@ -108,7 +59,7 @@ func (p *PBM) pitrChunk(rs string, sort int) (*OplogChunk, error) { return chnk, errors.Wrap(err, "decode") } -func (p *PBM) AllOplogRSNames(ctx context.Context, from, to primitive.Timestamp) ([]string, error) { +func AllOplogRSNames(ctx context.Context, m connect.Client, from, to primitive.Timestamp) ([]string, error) { q := bson.M{ "start_ts": bson.M{"$lte": to}, } @@ -116,7 +67,7 @@ func (p *PBM) AllOplogRSNames(ctx context.Context, from, to primitive.Timestamp) q["end_ts"] = bson.M{"$gte": from} } - res, err := p.Conn.Database(DB).Collection(PITRChunksCollection).Distinct(ctx, "rs", q) + res, err := m.PITRChunksCollection().Distinct(ctx, "rs", q) if err != nil { return nil, errors.Wrapf(err, "query") } @@ -131,7 +82,12 @@ func (p *PBM) AllOplogRSNames(ctx context.Context, from, to primitive.Timestamp) // PITRGetChunksSlice returns slice of PITR oplog chunks which Start TS // lies in a given time frame. Returns all chunks if `to` is 0. -func (p *PBM) PITRGetChunksSlice(rs string, from, to primitive.Timestamp) ([]OplogChunk, error) { +func PITRGetChunksSlice( + ctx context.Context, + m connect.Client, + rs string, + from, to primitive.Timestamp, +) ([]OplogChunk, error) { q := bson.D{} if rs != "" { q = bson.D{{"rs", rs}} @@ -144,11 +100,16 @@ func (p *PBM) PITRGetChunksSlice(rs string, from, to primitive.Timestamp) ([]Opl }...) } - return p.pitrGetChunksSlice(q) + return pitrGetChunksSlice(ctx, m, q) } // PITRGetChunksSliceUntil returns slice of PITR oplog chunks that starts up until timestamp (exclusively) -func (p *PBM) PITRGetChunksSliceUntil(rs string, t primitive.Timestamp) ([]OplogChunk, error) { +func PITRGetChunksSliceUntil( + ctx context.Context, + m connect.Client, + rs string, + t primitive.Timestamp, +) ([]OplogChunk, error) { q := bson.D{} if rs != "" { q = bson.D{{"rs", rs}} @@ -156,22 +117,22 @@ func (p *PBM) PITRGetChunksSliceUntil(rs string, t primitive.Timestamp) ([]Oplog q = append(q, bson.E{"start_ts", bson.M{"$lt": t}}) - return p.pitrGetChunksSlice(q) + return pitrGetChunksSlice(ctx, m, q) } -func (p *PBM) pitrGetChunksSlice(q bson.D) ([]OplogChunk, error) { - cur, err := p.Conn.Database(DB).Collection(PITRChunksCollection).Find( - p.ctx, +func pitrGetChunksSlice(ctx context.Context, m connect.Client, q bson.D) ([]OplogChunk, error) { + cur, err := m.PITRChunksCollection().Find( + ctx, q, options.Find().SetSort(bson.D{{"start_ts", 1}}), ) if err != nil { return nil, errors.Wrap(err, "get cursor") } - defer cur.Close(p.ctx) + defer cur.Close(ctx) chnks := []OplogChunk{} - for cur.Next(p.ctx) { + for cur.Next(ctx) { var chnk OplogChunk err := cur.Decode(&chnk) if err != nil { @@ -186,9 +147,14 @@ func (p *PBM) pitrGetChunksSlice(q bson.D) ([]OplogChunk, error) { // PITRGetChunkStarts returns a pitr slice chunk that belongs to the // given replica set and start from the given timestamp -func (p *PBM) PITRGetChunkStarts(rs string, ts primitive.Timestamp) (*OplogChunk, error) { - res := p.Conn.Database(DB).Collection(PITRChunksCollection).FindOne( - p.ctx, +func PITRGetChunkStarts( + ctx context.Context, + m connect.Client, + rs string, + ts primitive.Timestamp, +) (*OplogChunk, error) { + res := m.PITRChunksCollection().FindOne( + ctx, bson.D{ {"rs", rs}, {"start_ts", ts}, @@ -204,8 +170,8 @@ func (p *PBM) PITRGetChunkStarts(rs string, ts primitive.Timestamp) (*OplogChunk } // PITRAddChunk stores PITR chunk metadata -func (p *PBM) PITRAddChunk(c OplogChunk) error { - _, err := p.Conn.Database(DB).Collection(PITRChunksCollection).InsertOne(p.ctx, c) +func PITRAddChunk(ctx context.Context, m connect.Client, c OplogChunk) error { + _, err := m.PITRChunksCollection().InsertOne(ctx, c) return err } @@ -230,16 +196,21 @@ func (t Timeline) String() string { // any saved chunk already belongs to some valid timeline, // the slice wouldn't be done otherwise. // `flist` is a cache of chunk sizes. -func (p *PBM) PITRGetValidTimelines(rs string, until primitive.Timestamp) ([]Timeline, error) { - fch, err := p.PITRFirstChunkMeta(rs) - if err != nil && !errors.Is(err, ErrNotFound) { +func PITRGetValidTimelines( + ctx context.Context, + m connect.Client, + rs string, + until primitive.Timestamp, +) ([]Timeline, error) { + fch, err := PITRFirstChunkMeta(ctx, m, rs) + if err != nil && !errors.Is(err, errors.ErrNotFound) { return nil, errors.Wrap(err, "get the oldest chunk") } if fch == nil { return nil, nil } - slices, err := p.PITRGetChunksSlice(rs, fch.StartTS, until) + slices, err := PITRGetChunksSlice(ctx, m, rs, fch.StartTS, until) if err != nil { return nil, errors.Wrap(err, "get slice") } @@ -248,20 +219,20 @@ func (p *PBM) PITRGetValidTimelines(rs string, until primitive.Timestamp) ([]Tim } // PITRTimelines returns cluster-wide time ranges valid for PITR restore -func (p *PBM) PITRTimelines() ([]Timeline, error) { - shards, err := p.ClusterMembers() +func PITRTimelines(ctx context.Context, m connect.Client) ([]Timeline, error) { + shards, err := topo.ClusterMembers(ctx, m.MongoClient()) if err != nil { return nil, errors.Wrap(err, "get cluster members") } - now, err := p.ClusterTime() + now, err := topo.GetClusterTime(ctx, m) if err != nil { return nil, errors.Wrap(err, "get cluster time") } var tlns [][]Timeline for _, s := range shards { - t, err := p.PITRGetValidTimelines(s.RS, now) + t, err := PITRGetValidTimelines(ctx, m, s.RS, now) if err != nil { return nil, errors.Wrapf(err, "get PITR timelines for %s replset", s.RS) } @@ -279,7 +250,7 @@ func gettimelines(slices []OplogChunk) []Timeline { tlines := []Timeline{} for _, s := range slices { - if prevEnd.T != 0 && primitive.CompareTimestamp(prevEnd, s.StartTS) == -1 { + if prevEnd.T != 0 && prevEnd.Compare(s.StartTS) == -1 { tlines = append(tlines, tl) tl = Timeline{} } @@ -428,7 +399,7 @@ func PITRmetaFromFName(f string) *OplogChunk { } chnk := &OplogChunk{} chnk.RS = ppath[0] - chnk.FName = path.Join(PITRfsPrefix, f) + chnk.FName = path.Join(defs.PITRfsPrefix, f) fname := ppath[len(ppath)-1] fparts := strings.Split(fname, ".") @@ -438,7 +409,7 @@ func PITRmetaFromFName(f string) *OplogChunk { if len(fparts) == 4 { chnk.Compression = compress.FileCompression(fparts[3]) } else { - chnk.Compression = compress.CompressionTypeNone + chnk.Compression = defs.CompressionTypeNone } start := pitrParseTS(fparts[0]) diff --git a/pbm/pitr_test.go b/pbm/oplog/chunk_test.go similarity index 99% rename from pbm/pitr_test.go rename to pbm/oplog/chunk_test.go index bc2f2fe0e..f06f27eab 100644 --- a/pbm/pitr_test.go +++ b/pbm/oplog/chunk_test.go @@ -1,4 +1,4 @@ -package pbm +package oplog import ( "fmt" diff --git a/pbm/oplog/restore.go b/pbm/oplog/restore.go index 614dd6df8..911ceecb7 100644 --- a/pbm/oplog/restore.go +++ b/pbm/oplog/restore.go @@ -8,7 +8,6 @@ package oplog import ( - "context" "encoding/base64" "encoding/json" "fmt" @@ -22,12 +21,15 @@ import ( "github.com/mongodb/mongo-tools/common/idx" "github.com/mongodb/mongo-tools/common/txn" "github.com/mongodb/mongo-tools/mongorestore/ns" - "github.com/pkg/errors" "go.mongodb.org/mongo-driver/bson" "go.mongodb.org/mongo-driver/bson/primitive" "go.mongodb.org/mongo-driver/mongo" - "github.com/percona/percona-backup-mongodb/pbm" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/types" + "github.com/percona/percona-backup-mongodb/internal/version" "github.com/percona/percona-backup-mongodb/pbm/snapshot" ) @@ -41,8 +43,8 @@ func DefaultOpFilter(*Record) bool { return true } var excludeFromOplog = []string{ "config.rangeDeletions", - pbm.DB + "." + pbm.TmpUsersCollection, - pbm.DB + "." + pbm.TmpRolesCollection, + defs.DB + "." + defs.TmpUsersCollection, + defs.DB + "." + defs.TmpRolesCollection, } var knownCommands = map[string]struct{}{ @@ -104,7 +106,7 @@ type OplogRestore struct { // the queue of last N committed transactions txnCommit *cqueue - txn chan pbm.RestoreTxn + txn chan types.RestoreTxn txnSyncErr chan error // The `T` part of the last applied op's Timestamp. // Keeping just `T` allows atomic use as we only care @@ -126,10 +128,10 @@ const saveLastDistTxns = 100 func NewOplogRestore( dst *mongo.Client, ic *idx.IndexCatalog, - sv *pbm.MongoVersion, + sv *version.MongoVersion, unsafe, preserveUUID bool, - ctxn chan pbm.RestoreTxn, + ctxn chan types.RestoreTxn, txnErr chan error, ) (*OplogRestore, error) { m, err := ns.NewMatcher(append(snapshot.ExcludeFromRestore, excludeFromOplog...)) @@ -205,12 +207,12 @@ func (o *OplogRestore) Apply(src io.ReadCloser) (primitive.Timestamp, error) { } // skip if operation happened before the desired time frame - if primitive.CompareTimestamp(o.startTS, oe.Timestamp) == 1 { + if o.startTS.Compare(oe.Timestamp) == 1 { continue } // finish if operation happened after the desired time frame (oe.Timestamp > to) - if o.endTS.T > 0 && primitive.CompareTimestamp(oe.Timestamp, o.endTS) == 1 { + if o.endTS.T > 0 && oe.Timestamp.Compare(o.endTS) == 1 { return lts, nil } @@ -286,7 +288,7 @@ func (o *OplogRestore) LastOpTS() uint32 { func (o *OplogRestore) handleOp(oe db.Oplog) error { // skip if operation happened after the desired time frame (oe.Timestamp > o.lastTS) - if o.endTS.T > 0 && primitive.CompareTimestamp(oe.Timestamp, o.endTS) == 1 { + if o.endTS.T > 0 && oe.Timestamp.Compare(o.endTS) == 1 { return nil } @@ -449,10 +451,10 @@ func (o *OplogRestore) handleTxnOp(meta txn.Meta, op db.Oplog) error { } } - o.txnCommit.push(pbm.RestoreTxn{ + o.txnCommit.push(types.RestoreTxn{ ID: txnID, Ctime: cts, - State: pbm.TxnCommit, + State: types.TxnCommit, }) } @@ -581,7 +583,7 @@ func (o *OplogRestore) applyTxn(id string) error { } //nolint:nonamedreturns -func (o *OplogRestore) TxnLeftovers() (uncommitted map[string]Txn, lastCommits []pbm.RestoreTxn) { +func (o *OplogRestore) TxnLeftovers() (uncommitted map[string]Txn, lastCommits []types.RestoreTxn) { return o.txnData, o.txnCommit.s } @@ -756,7 +758,7 @@ func (o *OplogRestore) handleNonTxnOp(op db.Oplog) error { op2 := op op2.Object = bson.D{{"drop", collName}} if err := o.handleNonTxnOp(op2); err != nil { - return errors.WithMessage(err, "oplog: drop collection before create") + return errors.Wrap(err, "oplog: drop collection before create") } } } @@ -778,15 +780,15 @@ func (o *OplogRestore) handleNonTxnOp(op db.Oplog) error { } type cqueue struct { - s []pbm.RestoreTxn + s []types.RestoreTxn c int } func newCQueue(capacity int) *cqueue { - return &cqueue{s: make([]pbm.RestoreTxn, 0, capacity), c: capacity} + return &cqueue{s: make([]types.RestoreTxn, 0, capacity), c: capacity} } -func (c *cqueue) push(v pbm.RestoreTxn) { +func (c *cqueue) push(v types.RestoreTxn) { if len(c.s) == c.c { c.s = c.s[1:] } @@ -794,7 +796,7 @@ func (c *cqueue) push(v pbm.RestoreTxn) { c.s = append(c.s, v) } -func (c *cqueue) last() *pbm.RestoreTxn { +func (c *cqueue) last() *types.RestoreTxn { if len(c.s) == 0 { return nil } diff --git a/pbm/pbm.go b/pbm/pbm.go index 1d187dcef..ed6302091 100644 --- a/pbm/pbm.go +++ b/pbm/pbm.go @@ -1,256 +1,21 @@ package pbm import ( - "bytes" - "context" - "fmt" - "io" - "net/url" - "os" - "strconv" "strings" - "time" - "github.com/pkg/errors" "go.mongodb.org/mongo-driver/bson" - "go.mongodb.org/mongo-driver/bson/primitive" - "go.mongodb.org/mongo-driver/mongo" - "go.mongodb.org/mongo-driver/mongo/options" - "go.mongodb.org/mongo-driver/mongo/readconcern" - "go.mongodb.org/mongo-driver/mongo/readpref" - "go.mongodb.org/mongo-driver/mongo/writeconcern" - "github.com/percona/percona-backup-mongodb/pbm/compress" - "github.com/percona/percona-backup-mongodb/pbm/log" + "github.com/percona/percona-backup-mongodb/internal/connect" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/log" + "github.com/percona/percona-backup-mongodb/internal/query" + "github.com/percona/percona-backup-mongodb/internal/topo" ) -const ( - // DB is a name of the PBM database - DB = "admin" - // LogCollection is the name of the mongo collection that contains PBM logs - LogCollection = "pbmLog" - // ConfigCollection is the name of the mongo collection that contains PBM configs - ConfigCollection = "pbmConfig" - // LockCollection is the name of the mongo collection that is used - // by agents to coordinate mutually exclusive operations (e.g. backup/restore) - LockCollection = "pbmLock" - // LockOpCollection is the name of the mongo collection that is used - // by agents to coordinate operations that don't need to be - // mutually exclusive to other operation types (e.g. backup-delete) - LockOpCollection = "pbmLockOp" - // BcpCollection is a collection for backups metadata - BcpCollection = "pbmBackups" - // RestoresCollection is a collection for restores metadata - RestoresCollection = "pbmRestores" - // CmdStreamCollection is the name of the mongo collection that contains backup/restore commands stream - CmdStreamCollection = "pbmCmd" - // PITRChunksCollection contains index metadata of PITR chunks - PITRChunksCollection = "pbmPITRChunks" - // PBMOpLogCollection contains log of acquired locks (hence run ops) - PBMOpLogCollection = "pbmOpLog" - // AgentsStatusCollection is an agents registry with its status/health checks - AgentsStatusCollection = "pbmAgents" - - // MetadataFileSuffix is a suffix for the metadata file on a storage - MetadataFileSuffix = ".pbm.json" -) - -// ErrNotFound - object not found -var ErrNotFound = errors.New("not found") - -// Command represents actions that could be done on behalf of the client by the agents -type Command string - -const ( - CmdUndefined Command = "" - CmdBackup Command = "backup" - CmdRestore Command = "restore" - CmdReplay Command = "replay" - CmdCancelBackup Command = "cancelBackup" - CmdResync Command = "resync" - CmdPITR Command = "pitr" - CmdDeleteBackup Command = "delete" - CmdDeletePITR Command = "deletePitr" - CmdCleanup Command = "cleanup" -) - -func (c Command) String() string { - switch c { - case CmdBackup: - return "Snapshot backup" - case CmdRestore: - return "Snapshot restore" - case CmdReplay: - return "Oplog replay" - case CmdCancelBackup: - return "Backup cancellation" - case CmdResync: - return "Resync storage" - case CmdPITR: - return "PITR incremental backup" - case CmdDeleteBackup: - return "Delete" - case CmdDeletePITR: - return "Delete PITR chunks" - case CmdCleanup: - return "Cleanup backups and PITR chunks" - default: - return "Undefined" - } -} - -type OPID primitive.ObjectID - -type Cmd struct { - Cmd Command `bson:"cmd"` - Backup *BackupCmd `bson:"backup,omitempty"` - Restore *RestoreCmd `bson:"restore,omitempty"` - Replay *ReplayCmd `bson:"replay,omitempty"` - Delete *DeleteBackupCmd `bson:"delete,omitempty"` - DeletePITR *DeletePITRCmd `bson:"deletePitr,omitempty"` - Cleanup *CleanupCmd `bson:"cleanup,omitempty"` - TS int64 `bson:"ts"` - OPID OPID `bson:"-"` -} - -func OPIDfromStr(s string) (OPID, error) { - o, err := primitive.ObjectIDFromHex(s) - if err != nil { - return OPID(primitive.NilObjectID), err - } - return OPID(o), nil -} - -func NilOPID() OPID { return OPID(primitive.NilObjectID) } - -func (o OPID) String() string { - return primitive.ObjectID(o).Hex() -} - -func (o OPID) Obj() primitive.ObjectID { - return primitive.ObjectID(o) -} - -func (c Cmd) String() string { - var buf bytes.Buffer - - buf.WriteString(string(c.Cmd)) - switch c.Cmd { - case CmdBackup: - buf.WriteString(" [") - buf.WriteString(c.Backup.String()) - buf.WriteString("]") - case CmdRestore: - buf.WriteString(" [") - buf.WriteString(c.Restore.String()) - buf.WriteString("]") - } - buf.WriteString(" ") - return buf.String() -} - -type BackupCmd struct { - Type BackupType `bson:"type"` - IncrBase bool `bson:"base"` - Name string `bson:"name"` - Namespaces []string `bson:"nss,omitempty"` - Compression compress.CompressionType `bson:"compression"` - CompressionLevel *int `bson:"level,omitempty"` -} - -func (b BackupCmd) String() string { - var level string - if b.CompressionLevel == nil { - level = "default" - } else { - level = strconv.Itoa(*b.CompressionLevel) - } - return fmt.Sprintf("name: %s, compression: %s (level: %s)", b.Name, b.Compression, level) -} - -type RestoreCmd struct { - Name string `bson:"name"` - BackupName string `bson:"backupName"` - Namespaces []string `bson:"nss,omitempty"` - RSMap map[string]string `bson:"rsMap,omitempty"` - - OplogTS primitive.Timestamp `bson:"oplogTS,omitempty"` - - External bool `bson:"external"` - ExtConf ExternOpts `bson:"extConf"` - ExtTS primitive.Timestamp `bson:"extTS"` -} - -func (r RestoreCmd) String() string { - bcp := "" - if r.BackupName != "" { - bcp = "snapshot: " + r.BackupName - } - if r.External { - bcp += "[external]" - } - if r.ExtTS.T > 0 { - bcp += fmt.Sprintf(" external ts: <%d,%d>", r.ExtTS.T, r.ExtTS.I) - } - if r.OplogTS.T > 0 { - bcp += fmt.Sprintf(" point-in-time: <%d,%d>", r.OplogTS.T, r.OplogTS.I) - } - - return fmt.Sprintf("name: %s, %s", r.Name, bcp) -} - -type ReplayCmd struct { - Name string `bson:"name"` - Start primitive.Timestamp `bson:"start,omitempty"` - End primitive.Timestamp `bson:"end,omitempty"` - RSMap map[string]string `bson:"rsMap,omitempty"` -} - -func (c ReplayCmd) String() string { - return fmt.Sprintf("name: %s, time: %d - %d", c.Name, c.Start, c.End) -} - -type DeleteBackupCmd struct { - Backup string `bson:"backup"` - OlderThan int64 `bson:"olderthan"` -} - -type DeletePITRCmd struct { - OlderThan int64 `bson:"olderthan"` -} - -type CleanupCmd struct { - OlderThan primitive.Timestamp `bson:"olderThan"` -} - -func (d DeleteBackupCmd) String() string { - return fmt.Sprintf("backup: %s, older than: %d", d.Backup, d.OlderThan) -} - -const ( - PITRcheckRange = time.Second * 15 - AgentsStatCheckRange = time.Second * 5 -) - -var ( - WaitActionStart = time.Second * 15 - WaitBackupStart = WaitActionStart + PITRcheckRange*12/10 // 33 seconds -) - -// OpLog represents log of started operation. -// Operation progress can be get from logs by OPID. -// Basically it is a log of all ever taken locks. With the -// uniqueness by rs + opid -type OpLog struct { - LockHeader `bson:",inline" json:",inline"` -} - type PBM struct { - Conn *mongo.Client + Conn connect.Client log *log.Logger - ctx context.Context } // New creates a new PBM object. @@ -259,733 +24,34 @@ type PBM struct { // If agent's or ctl's local node is not a member of ConfigServer, // after discovering current topology connection will be established to ConfigServer. func New(ctx context.Context, uri, appName string) (*PBM, error) { - uri = "mongodb://" + strings.Replace(uri, "mongodb://", "", 1) - - client, err := connect(ctx, uri, appName) + c, err := connect.Connect(ctx, uri, &connect.ConnectOptions{AppName: appName}) if err != nil { return nil, errors.Wrap(err, "create mongo connection") } - pbm := &PBM{ - Conn: client, - ctx: ctx, - } - inf, err := pbm.GetNodeInfo() - if err != nil { - return nil, errors.Wrap(err, "get topology") - } - - if !inf.IsSharded() || inf.ReplsetRole() == RoleConfigSrv { - return pbm, errors.Wrap(pbm.setupNewDB(), "setup a new backups db") - } - - csvr, err := ConfSvrConn(ctx, client) - if err != nil { - return nil, errors.Wrap(err, "get config server connection URI") - } - // no need in this connection anymore, we need a new one with the ConfigServer - err = client.Disconnect(ctx) - if err != nil { - return nil, errors.Wrap(err, "disconnect old client") - } - - chost := strings.Split(csvr, "/") - if len(chost) < 2 { - return nil, errors.Wrapf(err, "define config server connection URI from %s", csvr) - } - - curi, err := url.Parse(uri) - if err != nil { - return nil, errors.Wrapf(err, "parse mongo-uri '%s'", uri) - } - - // Preserving the `replicaSet` parameter will cause an error - // while connecting to the ConfigServer (mismatched replicaset names) - query := curi.Query() - query.Del("replicaSet") - curi.RawQuery = query.Encode() - curi.Host = chost[1] - pbm.Conn, err = connect(ctx, curi.String(), appName) - if err != nil { - return nil, errors.Wrapf(err, "create mongo connection to configsvr with connection string '%s'", curi) - } - - return pbm, errors.Wrap(pbm.setupNewDB(), "setup a new backups db") + pbm := &PBM{Conn: c} + return pbm, errors.Wrap(query.SetupNewDB(ctx, c), "setup a new backups db") } func (p *PBM) InitLogger(rs, node string) { - p.log = log.New(p.Conn.Database(DB).Collection(LogCollection), rs, node) + p.log = log.New(p.Conn.LogCollection(), rs, node) } func (p *PBM) Logger() *log.Logger { return p.log } -const ( - cmdCollectionSizeBytes = 1 << 20 // 1Mb - pbmOplogCollectionSizeBytes = 10 << 20 // 10Mb - logsCollectionSizeBytes = 50 << 20 // 50Mb -) - -// setup a new DB for PBM -func (p *PBM) setupNewDB() error { - err := p.Conn.Database(DB).RunCommand( - p.ctx, - bson.D{{"create", CmdStreamCollection}, {"capped", true}, {"size", cmdCollectionSizeBytes}}, - ).Err() - if err != nil && !strings.Contains(err.Error(), "already exists") { - return errors.Wrap(err, "ensure cmd collection") - } - - err = p.Conn.Database(DB).RunCommand( - p.ctx, - bson.D{{"create", LogCollection}, {"capped", true}, {"size", logsCollectionSizeBytes}}, - ).Err() - if err != nil && !strings.Contains(err.Error(), "already exists") { - return errors.Wrap(err, "ensure log collection") - } - - err = p.Conn.Database(DB).RunCommand( - p.ctx, - bson.D{{"create", LockCollection}}, - ).Err() - if err != nil && !strings.Contains(err.Error(), "already exists") { - return errors.Wrap(err, "ensure lock collection") - } - - // create indexes for the lock collections - _, err = p.Conn.Database(DB).Collection(LockCollection).Indexes().CreateOne( - p.ctx, - mongo.IndexModel{ - Keys: bson.D{{"replset", 1}}, - Options: options.Index(). - SetUnique(true). - SetSparse(true), - }, - ) - if err != nil && !strings.Contains(err.Error(), "already exists") { - return errors.Wrapf(err, "ensure lock index on %s", LockCollection) - } - _, err = p.Conn.Database(DB).Collection(LockOpCollection).Indexes().CreateOne( - p.ctx, - mongo.IndexModel{ - Keys: bson.D{{"replset", 1}, {"type", 1}}, - Options: options.Index(). - SetUnique(true). - SetSparse(true), - }, - ) - if err != nil && !strings.Contains(err.Error(), "already exists") { - return errors.Wrapf(err, "ensure lock index on %s", LockOpCollection) - } - - err = p.Conn.Database(DB).RunCommand( - p.ctx, - bson.D{{"create", PBMOpLogCollection}, {"capped", true}, {"size", pbmOplogCollectionSizeBytes}}, - ).Err() - if err != nil && !strings.Contains(err.Error(), "already exists") { - return errors.Wrap(err, "ensure log collection") - } - _, err = p.Conn.Database(DB).Collection(PBMOpLogCollection).Indexes().CreateOne( - p.ctx, - mongo.IndexModel{ - Keys: bson.D{{"opid", 1}, {"replset", 1}}, - Options: options.Index(). - SetUnique(true). - SetSparse(true), - }, - ) - if err != nil && !strings.Contains(err.Error(), "already exists") { - return errors.Wrapf(err, "ensure lock index on %s", LockOpCollection) - } - - // create indexs for the pitr chunks - _, err = p.Conn.Database(DB).Collection(PITRChunksCollection).Indexes().CreateMany( - p.ctx, - []mongo.IndexModel{ - { - Keys: bson.D{{"rs", 1}, {"start_ts", 1}, {"end_ts", 1}}, - Options: options.Index(). - SetUnique(true). - SetSparse(true), - }, - { - Keys: bson.D{{"start_ts", 1}, {"end_ts", 1}}, - }, - }, - ) - if err != nil && !strings.Contains(err.Error(), "already exists") { - return errors.Wrap(err, "ensure pitr chunks index") - } - - _, err = p.Conn.Database(DB).Collection(BcpCollection).Indexes().CreateMany( - p.ctx, - []mongo.IndexModel{ - { - Keys: bson.D{{"name", 1}}, - Options: options.Index(). - SetUnique(true). - SetSparse(true), - }, - { - Keys: bson.D{{"start_ts", 1}, {"status", 1}}, - }, - }, - ) - - return err -} - -func connect(ctx context.Context, uri, appName string) (*mongo.Client, error) { - client, err := mongo.NewClient( - options.Client().ApplyURI(uri). - SetAppName(appName). - SetReadPreference(readpref.Primary()). - SetReadConcern(readconcern.Majority()). - SetWriteConcern(writeconcern.New(writeconcern.WMajority())), - ) - if err != nil { - return nil, errors.Wrap(err, "create mongo client") - } - err = client.Connect(ctx) - if err != nil { - return nil, errors.Wrap(err, "mongo connect") - } - - err = client.Ping(ctx, nil) - if err != nil { - return nil, errors.Wrap(err, "mongo ping") - } - - return client, nil -} - -type BackupType string - -const ( - PhysicalBackup BackupType = "physical" - ExternalBackup BackupType = "external" - IncrementalBackup BackupType = "incremental" - LogicalBackup BackupType = "logical" -) - -// BackupMeta is a backup's metadata -type BackupMeta struct { - Type BackupType `bson:"type" json:"type"` - OPID string `bson:"opid" json:"opid"` - Name string `bson:"name" json:"name"` - - // SrcBackup is the source for the incremental backups. The souce might be - // incremental as well. - // Empty means this is a full backup (and a base for further incremental bcps). - SrcBackup string `bson:"src_backup,omitempty" json:"src_backup,omitempty"` - - // ShardRemap is map of replset to shard names. - // If shard name is different from replset name, it will be stored in the map. - // If all shard names are the same as their replset names, the map is nil. - ShardRemap map[string]string `bson:"shardRemap,omitempty" json:"shardRemap,omitempty"` - - Namespaces []string `bson:"nss,omitempty" json:"nss,omitempty"` - Replsets []BackupReplset `bson:"replsets" json:"replsets"` - Compression compress.CompressionType `bson:"compression" json:"compression"` - Store StorageConf `bson:"store" json:"store"` - Size int64 `bson:"size" json:"size"` - MongoVersion string `bson:"mongodb_version" json:"mongodb_version,omitempty"` - FCV string `bson:"fcv" json:"fcv"` - StartTS int64 `bson:"start_ts" json:"start_ts"` - LastTransitionTS int64 `bson:"last_transition_ts" json:"last_transition_ts"` - FirstWriteTS primitive.Timestamp `bson:"first_write_ts" json:"first_write_ts"` - LastWriteTS primitive.Timestamp `bson:"last_write_ts" json:"last_write_ts"` - Hb primitive.Timestamp `bson:"hb" json:"hb"` - Status Status `bson:"status" json:"status"` - Conditions []Condition `bson:"conditions" json:"conditions"` - Nomination []BackupRsNomination `bson:"n" json:"n"` - Err string `bson:"error,omitempty" json:"error,omitempty"` - PBMVersion string `bson:"pbm_version,omitempty" json:"pbm_version,omitempty"` - BalancerStatus BalancerMode `bson:"balancer" json:"balancer"` - runtimeError error -} - -func (b *BackupMeta) Error() error { - switch { - case b.runtimeError != nil: - return b.runtimeError - case b.Err != "": - return errors.New(b.Err) - default: - return nil - } -} - -func (b *BackupMeta) SetRuntimeError(err error) { - b.runtimeError = err - b.Status = StatusError -} - -// BackupRsNomination is used to choose (nominate and elect) nodes for the backup -// within a replica set -type BackupRsNomination struct { - RS string `bson:"rs" json:"rs"` - Nodes []string `bson:"n" json:"n"` - Ack string `bson:"ack" json:"ack"` -} - -type Condition struct { - Timestamp int64 `bson:"timestamp" json:"timestamp"` - Status Status `bson:"status" json:"status"` - Error string `bson:"error,omitempty" json:"error,omitempty"` -} - -type BackupReplset struct { - Name string `bson:"name" json:"name"` - // Journal is not used. left for backward compatibility - Journal []File `bson:"journal,omitempty" json:"journal,omitempty"` - Files []File `bson:"files,omitempty" json:"files,omitempty"` - DumpName string `bson:"dump_name,omitempty" json:"backup_name,omitempty"` - OplogName string `bson:"oplog_name,omitempty" json:"oplog_name,omitempty"` - StartTS int64 `bson:"start_ts" json:"start_ts"` - Status Status `bson:"status" json:"status"` - IsConfigSvr *bool `bson:"iscs,omitempty" json:"iscs,omitempty"` - LastTransitionTS int64 `bson:"last_transition_ts" json:"last_transition_ts"` - FirstWriteTS primitive.Timestamp `bson:"first_write_ts" json:"first_write_ts"` - LastWriteTS primitive.Timestamp `bson:"last_write_ts" json:"last_write_ts"` - Node string `bson:"node" json:"node"` // node that performed backup - Error string `bson:"error,omitempty" json:"error,omitempty"` - Conditions []Condition `bson:"conditions" json:"conditions"` - MongodOpts *MongodOpts `bson:"mongod_opts,omitempty" json:"mongod_opts,omitempty"` - - // CustomThisID is customized thisBackupName value for $backupCursor (in WT: "this_id"). - // If it is not set (empty), the default value was used. - CustomThisID string `bson:"this_id,omitempty" json:"this_id,omitempty"` -} - -type File struct { - Name string `bson:"filename" json:"filename"` - Off int64 `bson:"offset" json:"offset"` // offset for incremental backups - Len int64 `bson:"length" json:"length"` // length of chunk after the offset - Size int64 `bson:"fileSize" json:"fileSize"` - StgSize int64 `bson:"stgSize" json:"stgSize"` - Fmode os.FileMode `bson:"fmode" json:"fmode"` -} - -func (f File) String() string { - if f.Off == 0 && f.Len == 0 { - return f.Name - } - return fmt.Sprintf("%s [%d:%d]", f.Name, f.Off, f.Len) -} - -func (f *File) WriteTo(w io.Writer) (int64, error) { - fd, err := os.Open(f.Name) - if err != nil { - return 0, errors.Wrap(err, "open file for reading") - } - defer fd.Close() - - if f.Len == 0 && f.Off == 0 { - return io.Copy(w, fd) - } - - return io.Copy(w, io.NewSectionReader(fd, f.Off, f.Len)) -} - -// Status is a backup current status -type Status string - -const ( - StatusInit Status = "init" - StatusReady Status = "ready" - - // for phys restore, to indicate shards have been stopped - StatusDown Status = "down" - - StatusStarting Status = "starting" - StatusRunning Status = "running" - StatusDumpDone Status = "dumpDone" - StatusCopyReady Status = "copyReady" - StatusCopyDone Status = "copyDone" - StatusPartlyDone Status = "partlyDone" - StatusDone Status = "done" - StatusCancelled Status = "canceled" - StatusError Status = "error" - - // status to communicate last op timestamp if it's not set - // during external restore - StatusExtTS Status = "lastTS" -) - -func (p *PBM) SetBackupMeta(m *BackupMeta) error { - m.LastTransitionTS = m.StartTS - m.Conditions = append(m.Conditions, Condition{ - Timestamp: m.StartTS, - Status: m.Status, - }) - - _, err := p.Conn.Database(DB).Collection(BcpCollection).InsertOne(p.ctx, m) - - return err -} - -// RS returns the metadata of the replset with given name. -// It returns nil if no replset found. -func (b *BackupMeta) RS(name string) *BackupReplset { - for _, rs := range b.Replsets { - if rs.Name == name { - return &rs - } - } - return nil -} - -func (p *PBM) ChangeBackupStateOPID(opid string, s Status, msg string) error { - return p.changeBackupState(bson.D{{"opid", opid}}, s, msg) -} - -func (p *PBM) ChangeBackupState(bcpName string, s Status, msg string) error { - return p.changeBackupState(bson.D{{"name", bcpName}}, s, msg) -} - -func (p *PBM) changeBackupState(clause bson.D, s Status, msg string) error { - ts := time.Now().UTC().Unix() - _, err := p.Conn.Database(DB).Collection(BcpCollection).UpdateOne( - p.ctx, - clause, - bson.D{ - {"$set", bson.M{"status": s}}, - {"$set", bson.M{"last_transition_ts": ts}}, - {"$set", bson.M{"error": msg}}, - {"$push", bson.M{"conditions": Condition{Timestamp: ts, Status: s, Error: msg}}}, - }, - ) - - return err -} - -func (p *PBM) BackupHB(bcpName string) error { - ts, err := p.ClusterTime() - if err != nil { - return errors.Wrap(err, "read cluster time") - } - - _, err = p.Conn.Database(DB).Collection(BcpCollection).UpdateOne( - p.ctx, - bson.D{{"name", bcpName}}, - bson.D{ - {"$set", bson.M{"hb": ts}}, - }, - ) - - return errors.Wrap(err, "write into db") -} - -func (p *PBM) SetSrcBackup(bcpName, srcName string) error { - _, err := p.Conn.Database(DB).Collection(BcpCollection).UpdateOne( - p.ctx, - bson.D{{"name", bcpName}}, - bson.D{ - {"$set", bson.M{"src_backup": srcName}}, - }, - ) - - return err -} - -func (p *PBM) SetFirstWrite(bcpName string, first primitive.Timestamp) error { - _, err := p.Conn.Database(DB).Collection(BcpCollection).UpdateOne( - p.ctx, - bson.D{{"name", bcpName}}, - bson.D{ - {"$set", bson.M{"first_write_ts": first}}, - }, - ) - - return err -} - -func (p *PBM) SetLastWrite(bcpName string, last primitive.Timestamp) error { - _, err := p.Conn.Database(DB).Collection(BcpCollection).UpdateOne( - p.ctx, - bson.D{{"name", bcpName}}, - bson.D{ - {"$set", bson.M{"last_write_ts": last}}, - }, - ) - - return err -} - -func (p *PBM) AddRSMeta(bcpName string, rs BackupReplset) error { - rs.LastTransitionTS = rs.StartTS - rs.Conditions = append(rs.Conditions, Condition{ - Timestamp: rs.StartTS, - Status: rs.Status, - }) - _, err := p.Conn.Database(DB).Collection(BcpCollection).UpdateOne( - p.ctx, - bson.D{{"name", bcpName}}, - bson.D{{"$addToSet", bson.M{"replsets": rs}}}, - ) - - return err -} - -func (p *PBM) ChangeRSState(bcpName, rsName string, s Status, msg string) error { - ts := time.Now().UTC().Unix() - _, err := p.Conn.Database(DB).Collection(BcpCollection).UpdateOne( - p.ctx, - bson.D{{"name", bcpName}, {"replsets.name", rsName}}, - bson.D{ - {"$set", bson.M{"replsets.$.status": s}}, - {"$set", bson.M{"replsets.$.last_transition_ts": ts}}, - {"$set", bson.M{"replsets.$.error": msg}}, - {"$push", bson.M{"replsets.$.conditions": Condition{Timestamp: ts, Status: s, Error: msg}}}, - }, - ) - - return err -} - -func (p *PBM) IncBackupSize(ctx context.Context, bcpName string, size int64) error { - _, err := p.Conn.Database(DB).Collection(BcpCollection).UpdateOne(ctx, - bson.D{{"name", bcpName}}, - bson.D{{"$inc", bson.M{"size": size}}}) - - return err -} - -func (p *PBM) RSSetPhyFiles(bcpName, rsName string, rs *BackupReplset) error { - _, err := p.Conn.Database(DB).Collection(BcpCollection).UpdateOne( - p.ctx, - bson.D{{"name", bcpName}, {"replsets.name", rsName}}, - bson.D{ - {"$set", bson.M{"replsets.$.files": rs.Files}}, - {"$set", bson.M{"replsets.$.journal": rs.Journal}}, - }, - ) - - return err -} - -func (p *PBM) SetRSLastWrite(bcpName, rsName string, ts primitive.Timestamp) error { - _, err := p.Conn.Database(DB).Collection(BcpCollection).UpdateOne( - p.ctx, - bson.D{{"name", bcpName}, {"replsets.name", rsName}}, - bson.D{ - {"$set", bson.M{"replsets.$.last_write_ts": ts}}, - }, - ) - - return err -} - -func (p *PBM) GetBackupMeta(name string) (*BackupMeta, error) { - return p.getBackupMeta(bson.D{{"name", name}}) -} - -func (p *PBM) GetBackupByOPID(opid string) (*BackupMeta, error) { - return p.getBackupMeta(bson.D{{"opid", opid}}) -} - -func (p *PBM) getBackupMeta(clause bson.D) (*BackupMeta, error) { - res := p.Conn.Database(DB).Collection(BcpCollection).FindOne(p.ctx, clause) - if err := res.Err(); err != nil { - if errors.Is(err, mongo.ErrNoDocuments) { - return nil, ErrNotFound - } - return nil, errors.Wrap(err, "get") - } - - b := &BackupMeta{} - err := res.Decode(b) - return b, errors.Wrap(err, "decode") -} - -func (p *PBM) LastIncrementalBackup() (*BackupMeta, error) { - return p.getRecentBackup(nil, nil, -1, bson.D{{"type", string(IncrementalBackup)}}) -} - -// GetLastBackup returns last successfully finished backup (non-selective and non-external) -// or nil if there is no such backup yet. If ts isn't nil it will -// search for the most recent backup that finished before specified timestamp -func (p *PBM) GetLastBackup(before *primitive.Timestamp) (*BackupMeta, error) { - return p.getRecentBackup(nil, before, -1, - bson.D{{"nss", nil}, {"type", bson.M{"$ne": ExternalBackup}}}) -} - -func (p *PBM) GetFirstBackup(after *primitive.Timestamp) (*BackupMeta, error) { - return p.getRecentBackup(after, nil, 1, - bson.D{{"nss", nil}, {"type", bson.M{"$ne": ExternalBackup}}}) -} - -func (p *PBM) getRecentBackup(after, before *primitive.Timestamp, sort int, opts bson.D) (*BackupMeta, error) { - q := append(bson.D{}, opts...) - q = append(q, bson.E{"status", StatusDone}) - if after != nil { - q = append(q, bson.E{"last_write_ts", bson.M{"$gte": after}}) - } - if before != nil { - q = append(q, bson.E{"last_write_ts", bson.M{"$lte": before}}) - } - - res := p.Conn.Database(DB).Collection(BcpCollection).FindOne( - p.ctx, - q, - options.FindOne().SetSort(bson.D{{"start_ts", sort}}), - ) - if err := res.Err(); err != nil { - if errors.Is(err, mongo.ErrNoDocuments) { - return nil, ErrNotFound - } - return nil, errors.Wrap(err, "get") - } - - b := &BackupMeta{} - err := res.Decode(b) - return b, errors.Wrap(err, "decode") -} - -func (p *PBM) BackupHasNext(backup *BackupMeta) (bool, error) { - f := bson.D{ - {"nss", nil}, - {"type", bson.M{"$ne": ExternalBackup}}, - {"start_ts", bson.M{"$gt": backup.LastWriteTS.T}}, - {"status", StatusDone}, - } - o := options.FindOne().SetProjection(bson.D{{"_id", 1}}) - res := p.Conn.Database(DB).Collection(BcpCollection).FindOne(p.ctx, f, o) - if err := res.Err(); err != nil { - if errors.Is(err, mongo.ErrNoDocuments) { - return false, nil - } - return false, errors.WithMessage(err, "query") - } - - return true, nil -} - -func (p *PBM) BackupsList(limit int64) ([]BackupMeta, error) { - cur, err := p.Conn.Database(DB).Collection(BcpCollection).Find( - p.ctx, - bson.M{}, - options.Find().SetLimit(limit).SetSort(bson.D{{"start_ts", -1}}), - ) - if err != nil { - return nil, errors.Wrap(err, "query mongo") - } - defer cur.Close(p.ctx) - - backups := []BackupMeta{} - for cur.Next(p.ctx) { - b := BackupMeta{} - err := cur.Decode(&b) - if err != nil { - return nil, errors.Wrap(err, "message decode") - } - if b.Type == "" { - b.Type = LogicalBackup - } - backups = append(backups, b) - } - - return backups, cur.Err() -} - -func (p *PBM) BackupsDoneList(after *primitive.Timestamp, limit int64, order int) ([]BackupMeta, error) { - q := bson.D{{"status", StatusDone}} - if after != nil { - q = append(q, bson.E{"last_write_ts", bson.M{"$gte": after}}) - } - - cur, err := p.Conn.Database(DB).Collection(BcpCollection).Find( - p.ctx, - q, - options.Find().SetLimit(limit).SetSort(bson.D{{"last_write_ts", order}}), - ) - if err != nil { - return nil, errors.Wrap(err, "query mongo") - } - defer cur.Close(p.ctx) - - backups := []BackupMeta{} - for cur.Next(p.ctx) { - b := BackupMeta{} - err := cur.Decode(&b) - if err != nil { - return nil, errors.Wrap(err, "message decode") - } - backups = append(backups, b) - } - - return backups, cur.Err() -} - -// ClusterMembers returns list of replicasets current cluster consists of -// (shards + configserver). The list would consist of on rs if cluster is -// a non-sharded rs. -func (p *PBM) ClusterMembers() ([]Shard, error) { - // it would be a config server in sharded cluster - inf, err := p.GetNodeInfo() - if err != nil { - return nil, errors.Wrap(err, "define cluster state") - } - - if inf.IsMongos() || inf.IsSharded() { - return getClusterMembersImpl(p.ctx, p.Conn) - } - - shards := []Shard{{ - RS: inf.SetName, - Host: inf.SetName + "/" + strings.Join(inf.Hosts, ","), - }} - return shards, nil -} - -func getClusterMembersImpl(ctx context.Context, m *mongo.Client) ([]Shard, error) { - res := m.Database("admin").RunCommand(ctx, bson.D{{"getShardMap", 1}}) - if err := res.Err(); err != nil { - return nil, errors.WithMessage(err, "query") - } - - // the map field is mapping of shard names to replset uri - // if shard name is not set, mongodb will provide unique name for it - // (e.g. the replset name of the shard) - // for configsvr, key name is "config" - var shardMap struct{ Map map[string]string } - if err := res.Decode(&shardMap); err != nil { - return nil, errors.WithMessage(err, "decode") - } - - shards := make([]Shard, 0, len(shardMap.Map)) - for id, host := range shardMap.Map { - if id == "" || strings.ContainsAny(id, "/:") { - // till 4.2, map field is like connStrings (added in 4.4) - // and key is uri of the directly (w/o mongos) connected replset - // skip not shard name - continue - } - - rs, _, _ := strings.Cut(host, "/") - shards = append(shards, Shard{ - ID: id, - RS: rs, - Host: host, - }) - } - - return shards, nil -} - // GetShards gets list of shards -func (p *PBM) GetShards() ([]Shard, error) { - cur, err := p.Conn.Database("config").Collection("shards").Find(p.ctx, bson.M{}) +func (p *PBM) GetShards(ctx context.Context) ([]topo.Shard, error) { + cur, err := p.Conn.ConfigDatabase().Collection("shards").Find(ctx, bson.M{}) if err != nil { return nil, errors.Wrap(err, "query mongo") } - defer cur.Close(p.ctx) + defer cur.Close(ctx) - shards := []Shard{} - for cur.Next(p.ctx) { - s := Shard{} + shards := []topo.Shard{} + for cur.Next(ctx) { + s := topo.Shard{} err := cur.Decode(&s) if err != nil { return nil, errors.Wrap(err, "message decode") @@ -1002,158 +68,3 @@ func (p *PBM) GetShards() ([]Shard, error) { return shards, cur.Err() } - -// Context returns object context -func (p *PBM) Context() context.Context { - return p.ctx -} - -// GetNodeInfo returns mongo node info -func (p *PBM) GetNodeInfo() (*NodeInfo, error) { - inf, err := GetNodeInfo(p.ctx, p.Conn) - if err != nil { - return nil, errors.Wrap(err, "get NodeInfo") - } - if inf.IsMongos() { - return inf, nil - } - - opts := struct { - Parsed MongodOpts `bson:"parsed" json:"parsed"` - }{} - err = p.Conn.Database("admin").RunCommand(p.ctx, bson.D{{"getCmdLineOpts", 1}}).Decode(&opts) - if err != nil { - return nil, errors.Wrap(err, "get mongod options") - } - inf.opts = opts.Parsed - - return inf, nil -} - -// GetNodeInfo returns mongo node info -func (p *PBM) GetFeatureCompatibilityVersion() (string, error) { - return getFeatureCompatibilityVersion(p.ctx, p.Conn) -} - -// ClusterTime returns mongo's current cluster time -func (p *PBM) ClusterTime() (primitive.Timestamp, error) { - // Make a read to force the cluster timestamp update. - // Otherwise, cluster timestamp could remain the same between node info reads, - // while in fact time has been moved forward. - err := p.Conn.Database(DB).Collection(LockCollection).FindOne(p.ctx, bson.D{}).Err() - if err != nil && !errors.Is(err, mongo.ErrNoDocuments) { - return primitive.Timestamp{}, errors.Wrap(err, "void read") - } - - inf, err := p.GetNodeInfo() - if err != nil { - return primitive.Timestamp{}, errors.Wrap(err, "get NodeInfo") - } - - if inf.ClusterTime == nil { - return primitive.Timestamp{}, errors.Wrap(err, "no clusterTime in response") - } - - return inf.ClusterTime.ClusterTime, nil -} - -func (p *PBM) LogGet(r *log.LogRequest, limit int64) (*log.Entries, error) { - return log.Get(p.Conn.Database(DB).Collection(LogCollection), r, limit, false) -} - -func (p *PBM) LogGetExactSeverity(r *log.LogRequest, limit int64) (*log.Entries, error) { - return log.Get(p.Conn.Database(DB).Collection(LogCollection), r, limit, true) -} - -// SetBalancerStatus sets balancer status -func (p *PBM) SetBalancerStatus(m BalancerMode) error { - var cmd string - - switch m { - case BalancerModeOn: - cmd = "_configsvrBalancerStart" - case BalancerModeOff: - cmd = "_configsvrBalancerStop" - default: - return errors.Errorf("unknown mode %s", m) - } - - err := p.Conn.Database("admin").RunCommand(p.ctx, bson.D{{cmd, 1}}).Err() - if err != nil { - return errors.Wrap(err, "run mongo command") - } - return nil -} - -// GetBalancerStatus returns balancer status -func (p *PBM) GetBalancerStatus() (*BalancerStatus, error) { - inf := &BalancerStatus{} - err := p.Conn.Database("admin").RunCommand(p.ctx, bson.D{{"_configsvrBalancerStatus", 1}}).Decode(inf) - if err != nil { - return nil, errors.Wrap(err, "run mongo command") - } - return inf, nil -} - -type Epoch primitive.Timestamp - -func (p *PBM) GetEpoch() (Epoch, error) { - c, err := p.GetConfig() - if err != nil { - return Epoch{}, errors.Wrap(err, "get config") - } - - return Epoch(c.Epoch), nil -} - -func (p *PBM) ResetEpoch() (Epoch, error) { - ct, err := p.ClusterTime() - if err != nil { - return Epoch{}, errors.Wrap(err, "get cluster time") - } - _, err = p.Conn.Database(DB).Collection(ConfigCollection).UpdateOne( - p.ctx, - bson.D{}, - bson.M{"$set": bson.M{"epoch": ct}}, - ) - - return Epoch(ct), err -} - -func (e Epoch) TS() primitive.Timestamp { - return primitive.Timestamp(e) -} - -// CopyColl copy documents matching the given filter and return number of copied documents -func CopyColl(ctx context.Context, from, to *mongo.Collection, filter interface{}) (int, error) { - cur, err := from.Find(ctx, filter) - if err != nil { - return 0, errors.Wrap(err, "create cursor") - } - defer cur.Close(ctx) - - n := 0 - for cur.Next(ctx) { - _, err = to.InsertOne(ctx, cur.Current) - if err != nil { - return 0, errors.Wrap(err, "insert document") - } - n++ - } - - return n, nil -} - -func BackupCursorName(s string) string { - return strings.NewReplacer("-", "", ":", "").Replace(s) -} - -func ConfSvrConn(ctx context.Context, cn *mongo.Client) (string, error) { - csvr := struct { - URI string `bson:"configsvrConnectionString"` - }{} - err := cn.Database("admin").Collection("system.version"). - FindOne(ctx, bson.D{{"_id", "shardIdentity"}}).Decode(&csvr) - - return csvr.URI, err -} diff --git a/pbm/restore.go b/pbm/restore.go deleted file mode 100644 index 092348be8..000000000 --- a/pbm/restore.go +++ /dev/null @@ -1,382 +0,0 @@ -package pbm - -import ( - "bytes" - "fmt" - "sort" - "strconv" - "time" - - "github.com/mongodb/mongo-tools/common/db" - "github.com/pkg/errors" - "go.mongodb.org/mongo-driver/bson" - "go.mongodb.org/mongo-driver/bson/primitive" - "go.mongodb.org/mongo-driver/mongo" - "go.mongodb.org/mongo-driver/mongo/options" - - "github.com/percona/percona-backup-mongodb/pbm/storage/s3" -) - -const ExternalRsMetaFile = "pbm.rsmeta.%s.json" - -type RestoreMeta struct { - Status Status `bson:"status" json:"status"` - Error string `bson:"error,omitempty" json:"error,omitempty"` - Name string `bson:"name" json:"name"` - OPID string `bson:"opid" json:"opid"` - Backup string `bson:"backup" json:"backup"` - BcpChain []string `bson:"bcp_chain" json:"bcp_chain"` // for incremental - Namespaces []string `bson:"nss,omitempty" json:"nss,omitempty"` - StartPITR int64 `bson:"start_pitr" json:"start_pitr"` - PITR int64 `bson:"pitr" json:"pitr"` - Replsets []RestoreReplset `bson:"replsets" json:"replsets"` - Hb primitive.Timestamp `bson:"hb" json:"hb"` - StartTS int64 `bson:"start_ts" json:"start_ts"` - LastTransitionTS int64 `bson:"last_transition_ts" json:"last_transition_ts"` - Conditions Conditions `bson:"conditions" json:"conditions"` - Type BackupType `bson:"type" json:"type"` - Leader string `bson:"l,omitempty" json:"l,omitempty"` - Stat *RestoreStat `bson:"stat,omitempty" json:"stat,omitempty"` -} - -type RestoreStat struct { - RS map[string]map[string]RestoreRSMetrics `bson:"rs,omitempty" json:"rs,omitempty"` -} -type RestoreRSMetrics struct { - DistTxn DistTxnStat `bson:"txn,omitempty" json:"txn,omitempty"` - Download s3.DownloadStat `bson:"download,omitempty" json:"download,omitempty"` -} - -type DistTxnStat struct { - // Partial is the num of transactions that were allied on other shards - // but can't be applied on this one since not all prepare messages got - // into the oplog (shouldn't happen). - Partial int `bson:"partial" json:"partial"` - // ShardUncommitted is the number of uncommitted transactions before - // the sync. Basically, the transaction is full but no commit message - // in the oplog of this shard. - ShardUncommitted int `bson:"shard_uncommitted" json:"shard_uncommitted"` - // LeftUncommitted is the num of transactions that remain uncommitted - // after the sync. The transaction is full but no commit message in the - // oplog of any shard. - LeftUncommitted int `bson:"left_uncommitted" json:"left_uncommitted"` -} - -type RestoreShardStat struct { - Txn DistTxnStat `json:"txn"` - D *s3.DownloadStat `json:"d"` -} - -type RestoreReplset struct { - Name string `bson:"name" json:"name"` - StartTS int64 `bson:"start_ts" json:"start_ts"` - Status Status `bson:"status" json:"status"` - CommittedTxn []RestoreTxn `bson:"committed_txn" json:"committed_txn"` - CommittedTxnSet bool `bson:"txn_set" json:"txn_set"` - PartialTxn []db.Oplog `bson:"partial_txn" json:"partial_txn"` - CurrentOp primitive.Timestamp `bson:"op" json:"op"` - LastTransitionTS int64 `bson:"last_transition_ts" json:"last_transition_ts"` - LastWriteTS primitive.Timestamp `bson:"last_write_ts" json:"last_write_ts"` - Nodes []RestoreNode `bson:"nodes,omitempty" json:"nodes,omitempty"` - Error string `bson:"error,omitempty" json:"error,omitempty"` - Conditions Conditions `bson:"conditions" json:"conditions"` - Hb primitive.Timestamp `bson:"hb" json:"hb"` - Stat RestoreShardStat `bson:"stat" json:"stat"` -} - -type Conditions []*Condition - -func (b Conditions) Len() int { return len(b) } -func (b Conditions) Less(i, j int) bool { return b[i].Timestamp < b[j].Timestamp } -func (b Conditions) Swap(i, j int) { b[i], b[j] = b[j], b[i] } - -// Insert keeps conditions asc sorted by Timestamp -func (b *Conditions) Insert(c *Condition) { - i := sort.Search(len(*b), func(i int) bool { return []*Condition(*b)[i].Timestamp >= c.Timestamp }) - *b = append(*b, &Condition{}) - copy([]*Condition(*b)[i+1:], []*Condition(*b)[i:]) - []*Condition(*b)[i] = c -} - -type RestoreNode struct { - Name string `bson:"name" json:"name"` - Status Status `bson:"status" json:"status"` - LastTransitionTS int64 `bson:"last_transition_ts" json:"last_transition_ts"` - Error string `bson:"error,omitempty" json:"error,omitempty"` - Conditions Conditions `bson:"conditions" json:"conditions"` - Hb primitive.Timestamp `bson:"hb" json:"hb"` -} - -type TxnState string - -const ( - TxnCommit TxnState = "commit" - TxnPrepare TxnState = "prepare" - TxnAbort TxnState = "abort" - TxnUnknown TxnState = "" -) - -type RestoreTxn struct { - ID string `bson:"id" json:"id"` - Ctime primitive.Timestamp `bson:"ts" json:"ts"` // commit timestamp of the transaction - State TxnState `bson:"state" json:"state"` -} - -func (t RestoreTxn) Encode() []byte { - return []byte(fmt.Sprintf("txn:%d,%d:%s:%s", t.Ctime.T, t.Ctime.I, t.ID, t.State)) -} - -func (t *RestoreTxn) Decode(b []byte) error { - for k, v := range bytes.SplitN(bytes.TrimSpace(b), []byte{':'}, 4) { - switch k { - case 0: - case 1: - if si := bytes.SplitN(v, []byte{','}, 2); len(si) == 2 { - tt, err := strconv.ParseInt(string(si[0]), 10, 64) - if err != nil { - return errors.Wrap(err, "parse clusterTime T") - } - ti, err := strconv.ParseInt(string(si[1]), 10, 64) - if err != nil { - return errors.Wrap(err, "parse clusterTime I") - } - - t.Ctime = primitive.Timestamp{T: uint32(tt), I: uint32(ti)} - } - case 2: - t.ID = string(v) - case 3: - t.State = TxnState(string(v)) - } - } - - return nil -} - -func (t RestoreTxn) String() string { - return fmt.Sprintf("<%s> [%s] %v", t.ID, t.State, t.Ctime) -} - -func (p *PBM) RestoreSetRSTxn(name, rsName string, txn []RestoreTxn) error { - _, err := p.Conn.Database(DB).Collection(RestoresCollection).UpdateOne( - p.ctx, - bson.D{{"name", name}, {"replsets.name", rsName}}, - bson.D{{"$set", bson.M{"replsets.$.committed_txn": txn, "replsets.$.txn_set": true}}}, - ) - - return err -} - -func (p *PBM) RestoreSetRSStat(name, rsName string, stat RestoreShardStat) error { - _, err := p.Conn.Database(DB).Collection(RestoresCollection).UpdateOne( - p.ctx, - bson.D{{"name", name}, {"replsets.name", rsName}}, - bson.D{{"$set", bson.M{"replsets.$.stat": stat}}}, - ) - - return err -} - -func (p *PBM) RestoreSetStat(name string, stat RestoreStat) error { - _, err := p.Conn.Database(DB).Collection(RestoresCollection).UpdateOne( - p.ctx, - bson.D{{"name", name}}, - bson.D{{"$set", bson.M{"stat": stat}}}, - ) - - return err -} - -func (p *PBM) RestoreSetRSPartTxn(name, rsName string, txn []db.Oplog) error { - _, err := p.Conn.Database(DB).Collection(RestoresCollection).UpdateOne( - p.ctx, - bson.D{{"name", name}, {"replsets.name", rsName}}, - bson.D{{"$set", bson.M{"replsets.$.partial_txn": txn}}}, - ) - - return err -} - -func (p *PBM) SetCurrentOp(name, rsName string, ts primitive.Timestamp) error { - _, err := p.Conn.Database(DB).Collection(RestoresCollection).UpdateOne( - p.ctx, - bson.D{{"name", name}, {"replsets.name", rsName}}, - bson.D{{"$set", bson.M{"replsets.$.op": ts}}}, - ) - - return err -} - -func (p *PBM) SetRestoreMeta(m *RestoreMeta) error { - m.LastTransitionTS = m.StartTS - m.Conditions = append(m.Conditions, &Condition{ - Timestamp: m.StartTS, - Status: m.Status, - }) - - _, err := p.Conn.Database(DB).Collection(RestoresCollection).InsertOne(p.ctx, m) - - return err -} - -func (p *PBM) GetRestoreMetaByOPID(opid string) (*RestoreMeta, error) { - return p.getRestoreMeta(bson.D{{"opid", opid}}) -} - -func (p *PBM) GetRestoreMeta(name string) (*RestoreMeta, error) { - return p.getRestoreMeta(bson.D{{"name", name}}) -} - -func (p *PBM) getRestoreMeta(clause bson.D) (*RestoreMeta, error) { - res := p.Conn.Database(DB).Collection(RestoresCollection).FindOne(p.ctx, clause) - if err := res.Err(); err != nil { - if errors.Is(err, mongo.ErrNoDocuments) { - return nil, ErrNotFound - } - return nil, errors.Wrap(err, "get") - } - r := &RestoreMeta{} - err := res.Decode(r) - return r, errors.Wrap(err, "decode") -} - -// GetLastRestore returns last successfully finished restore -// and nil if there is no such restore yet. -func (p *PBM) GetLastRestore() (*RestoreMeta, error) { - r := &RestoreMeta{} - - res := p.Conn.Database(DB).Collection(RestoresCollection).FindOne( - p.ctx, - bson.D{{"status", StatusDone}}, - options.FindOne().SetSort(bson.D{{"start_ts", -1}}), - ) - if err := res.Err(); err != nil { - if errors.Is(err, mongo.ErrNoDocuments) { - return nil, ErrNotFound - } - return nil, errors.Wrap(err, "get") - } - err := res.Decode(r) - return r, errors.Wrap(err, "decode") -} - -func (p *PBM) AddRestoreRSMeta(name string, rs RestoreReplset) error { - rs.LastTransitionTS = rs.StartTS - rs.Conditions = append(rs.Conditions, &Condition{ - Timestamp: rs.StartTS, - Status: rs.Status, - }) - _, err := p.Conn.Database(DB).Collection(RestoresCollection).UpdateOne( - p.ctx, - bson.D{{"name", name}}, - bson.D{{"$addToSet", bson.M{"replsets": rs}}}, - ) - - return err -} - -func (p *PBM) RestoreHB(name string) error { - ts, err := p.ClusterTime() - if err != nil { - return errors.Wrap(err, "read cluster time") - } - - _, err = p.Conn.Database(DB).Collection(RestoresCollection).UpdateOne( - p.ctx, - bson.D{{"name", name}}, - bson.D{ - {"$set", bson.M{"hb": ts}}, - }, - ) - - return errors.Wrap(err, "write into db") -} - -func (p *PBM) ChangeRestoreStateOPID(opid string, s Status, msg string) error { - return p.changeRestoreState(bson.D{{"name", opid}}, s, msg) -} - -func (p *PBM) ChangeRestoreState(name string, s Status, msg string) error { - return p.changeRestoreState(bson.D{{"name", name}}, s, msg) -} - -func (p *PBM) changeRestoreState(clause bson.D, s Status, msg string) error { - ts := time.Now().UTC().Unix() - _, err := p.Conn.Database(DB).Collection(RestoresCollection).UpdateOne( - p.ctx, - clause, - bson.D{ - {"$set", bson.M{"status": s}}, - {"$set", bson.M{"last_transition_ts": ts}}, - {"$set", bson.M{"error": msg}}, - {"$push", bson.M{"conditions": Condition{Timestamp: ts, Status: s, Error: msg}}}, - }, - ) - - return err -} - -func (p *PBM) SetRestoreBackup(name, backupName string, nss []string) error { - d := bson.M{"backup": backupName} - if nss != nil { - d["nss"] = nss - } - - _, err := p.Conn.Database(DB).Collection(RestoresCollection).UpdateOne( - p.ctx, - bson.D{{"name", name}}, - bson.D{{"$set", d}}, - ) - - return err -} - -func (p *PBM) SetOplogTimestamps(name string, start, end int64) error { - _, err := p.Conn.Database(DB).Collection(RestoresCollection).UpdateOne( - p.ctx, - bson.M{"name": name}, - bson.M{"$set": bson.M{"start_pitr": start, "pitr": end}}, - ) - - return err -} - -func (p *PBM) ChangeRestoreRSState(name, rsName string, s Status, msg string) error { - ts := time.Now().UTC().Unix() - _, err := p.Conn.Database(DB).Collection(RestoresCollection).UpdateOne( - p.ctx, - bson.D{{"name", name}, {"replsets.name", rsName}}, - bson.D{ - {"$set", bson.M{"replsets.$.status": s}}, - {"$set", bson.M{"replsets.$.last_transition_ts": ts}}, - {"$set", bson.M{"replsets.$.error": msg}}, - {"$push", bson.M{"replsets.$.conditions": Condition{Timestamp: ts, Status: s, Error: msg}}}, - }, - ) - - return err -} - -func (p *PBM) RestoresList(limit int64) ([]RestoreMeta, error) { - cur, err := p.Conn.Database(DB).Collection(RestoresCollection).Find( - p.ctx, - bson.M{}, - options.Find().SetLimit(limit).SetSort(bson.D{{"start_ts", -1}}), - ) - if err != nil { - return nil, errors.Wrap(err, "query mongo") - } - defer cur.Close(p.ctx) - - restores := []RestoreMeta{} - for cur.Next(p.ctx) { - r := RestoreMeta{} - err := cur.Decode(&r) - if err != nil { - return nil, errors.Wrap(err, "message decode") - } - restores = append(restores, r) - } - - return restores, cur.Err() -} diff --git a/pbm/restore/logical.go b/pbm/restore/logical.go index 60f3882dc..efa0fee3c 100644 --- a/pbm/restore/logical.go +++ b/pbm/restore/logical.go @@ -2,7 +2,6 @@ package restore import ( "bytes" - "context" "fmt" "io" "path" @@ -13,20 +12,28 @@ import ( "github.com/mongodb/mongo-tools/common/db" "github.com/mongodb/mongo-tools/common/idx" "github.com/mongodb/mongo-tools/mongorestore" - "github.com/pkg/errors" "go.mongodb.org/mongo-driver/bson" "go.mongodb.org/mongo-driver/bson/primitive" "go.mongodb.org/mongo-driver/mongo" + "github.com/percona/percona-backup-mongodb/internal/archive" + "github.com/percona/percona-backup-mongodb/internal/compress" + "github.com/percona/percona-backup-mongodb/internal/config" + "github.com/percona/percona-backup-mongodb/internal/connect" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/lock" + "github.com/percona/percona-backup-mongodb/internal/log" + "github.com/percona/percona-backup-mongodb/internal/query" + "github.com/percona/percona-backup-mongodb/internal/storage" + "github.com/percona/percona-backup-mongodb/internal/topo" + "github.com/percona/percona-backup-mongodb/internal/types" + "github.com/percona/percona-backup-mongodb/internal/util" + "github.com/percona/percona-backup-mongodb/internal/version" "github.com/percona/percona-backup-mongodb/pbm" - "github.com/percona/percona-backup-mongodb/pbm/archive" - "github.com/percona/percona-backup-mongodb/pbm/compress" - "github.com/percona/percona-backup-mongodb/pbm/log" "github.com/percona/percona-backup-mongodb/pbm/oplog" - "github.com/percona/percona-backup-mongodb/pbm/sel" "github.com/percona/percona-backup-mongodb/pbm/snapshot" - "github.com/percona/percona-backup-mongodb/pbm/storage" - "github.com/percona/percona-backup-mongodb/version" ) type Restore struct { @@ -34,7 +41,7 @@ type Restore struct { cn *pbm.PBM node *pbm.Node stopHB chan struct{} - nodeInfo *pbm.NodeInfo + nodeInfo *topo.NodeInfo stg storage.Storage // Shards to participate in restore. Num of shards in bcp could // be less than in the cluster and this is ok. Only these shards @@ -42,7 +49,7 @@ type Restore struct { // status checks etc.) // // Only the restore leader would have this info. - shards []pbm.Shard + shards []topo.Shard // rsMap is mapping between old and new replset names. used for data restore. // empty if all replset names are the same rsMap map[string]string @@ -79,9 +86,9 @@ func (r *Restore) Close() { } } -func (r *Restore) exit(err error, l *log.Event) { +func (r *Restore) exit(ctx context.Context, err error, l *log.Event) { if err != nil && !errors.Is(err, ErrNoDataForShard) { - ferr := r.MarkFailed(err) + ferr := r.MarkFailed(ctx, err) if ferr != nil { l.Error("mark restore as failed `%v`: %v", err, ferr) } @@ -93,35 +100,35 @@ func (r *Restore) exit(err error, l *log.Event) { // Snapshot do the snapshot's (mongo dump) restore // //nolint:nonamedreturns -func (r *Restore) Snapshot(cmd *pbm.RestoreCmd, opid pbm.OPID, l *log.Event) (err error) { - defer func() { r.exit(err, l) }() +func (r *Restore) Snapshot(ctx context.Context, cmd *types.RestoreCmd, opid types.OPID, l *log.Event) (err error) { + defer func() { r.exit(context.Background(), err, l) }() - bcp, err := SnapshotMeta(r.cn, cmd.BackupName, r.stg) + bcp, err := SnapshotMeta(ctx, r.cn, cmd.BackupName, r.stg) if err != nil { return err } - err = r.init(cmd.Name, opid, l) + err = r.init(ctx, cmd.Name, opid, l) if err != nil { return err } nss := cmd.Namespaces - if !sel.IsSelective(nss) { + if !util.IsSelective(nss) { nss = bcp.Namespaces } - err = r.cn.SetRestoreBackup(r.name, cmd.BackupName, nss) + err = query.SetRestoreBackup(ctx, r.cn.Conn, r.name, cmd.BackupName, nss) if err != nil { return errors.Wrap(err, "set backup name") } - err = r.checkSnapshot(bcp) + err = r.checkSnapshot(ctx, bcp) if err != nil { return err } - err = r.setShards(bcp) + err = r.setShards(ctx, bcp) if err != nil { return err } @@ -130,35 +137,35 @@ func (r *Restore) Snapshot(cmd *pbm.RestoreCmd, opid pbm.OPID, l *log.Event) (er r.sMap = r.getShardMapping(bcp) } - dump, oplog, err := r.snapshotObjects(bcp) + dump, oplogName, err := r.snapshotObjects(bcp) if err != nil { return err } - err = r.toState(pbm.StatusRunning, &pbm.WaitActionStart) + err = r.toState(ctx, defs.StatusRunning, &defs.WaitActionStart) if err != nil { return err } - err = r.RunSnapshot(dump, bcp, nss) + err = r.RunSnapshot(ctx, dump, bcp, nss) if err != nil { return err } - err = r.toState(pbm.StatusDumpDone, nil) + err = r.toState(ctx, defs.StatusDumpDone, nil) if err != nil { return err } oplogOption := &applyOplogOption{nss: nss} - if r.nodeInfo.IsConfigSrv() && sel.IsSelective(nss) { + if r.nodeInfo.IsConfigSrv() && util.IsSelective(nss) { oplogOption.nss = []string{"config.databases"} oplogOption.filter = newConfigsvrOpFilter(nss) } - err = r.applyOplog([]pbm.OplogChunk{{ + err = r.applyOplog(ctx, []oplog.OplogChunk{{ RS: r.nodeInfo.SetName, - FName: oplog, + FName: oplogName, Compression: bcp.Compression, StartTS: bcp.FirstWriteTS, EndTS: bcp.LastWriteTS, @@ -167,21 +174,21 @@ func (r *Restore) Snapshot(cmd *pbm.RestoreCmd, opid pbm.OPID, l *log.Event) (er return err } - err = r.restoreIndexes(oplogOption.nss) + err = r.restoreIndexes(ctx, oplogOption.nss) if err != nil { - return errors.WithMessage(err, "restore indexes") + return errors.Wrap(err, "restore indexes") } - if err = r.updateRouterConfig(r.cn.Context()); err != nil { - return errors.WithMessage(err, "update router config") + if err = r.updateRouterConfig(ctx); err != nil { + return errors.Wrap(err, "update router config") } - return r.Done() + return r.Done(ctx) } // newConfigsvrOpFilter filters out not needed ops during selective backup on configsvr func newConfigsvrOpFilter(nss []string) oplog.OpFilter { - selected := sel.MakeSelectedPred(nss) + selected := util.MakeSelectedPred(nss) return func(r *oplog.Record) bool { if r.Namespace != "config.databases" { @@ -205,15 +212,15 @@ func newConfigsvrOpFilter(nss []string) oplog.OpFilter { // PITR do the Point-in-Time Recovery // //nolint:nonamedreturns -func (r *Restore) PITR(cmd *pbm.RestoreCmd, opid pbm.OPID, l *log.Event) (err error) { - defer func() { r.exit(err, l) }() +func (r *Restore) PITR(ctx context.Context, cmd *types.RestoreCmd, opid types.OPID, l *log.Event) (err error) { + defer func() { r.exit(context.Background(), err, l) }() - err = r.init(cmd.Name, opid, l) + err = r.init(ctx, cmd.Name, opid, l) if err != nil { return err } - bcp, err := SnapshotMeta(r.cn, cmd.BackupName, r.stg) + bcp, err := SnapshotMeta(ctx, r.cn, cmd.BackupName, r.stg) if err != nil { return errors.Wrap(err, "get base backup") } @@ -228,23 +235,23 @@ func (r *Restore) PITR(cmd *pbm.RestoreCmd, opid pbm.OPID, l *log.Event) (err er } if r.nodeInfo.IsLeader() { - err = r.cn.SetOplogTimestamps(r.name, 0, int64(cmd.OplogTS.T)) + err = query.SetOplogTimestamps(ctx, r.cn.Conn, r.name, 0, int64(cmd.OplogTS.T)) if err != nil { return errors.Wrap(err, "set PITR timestamp") } } - err = r.cn.SetRestoreBackup(r.name, bcp.Name, nss) + err = query.SetRestoreBackup(ctx, r.cn.Conn, r.name, bcp.Name, nss) if err != nil { return errors.Wrap(err, "set backup name") } - err = r.checkSnapshot(bcp) + err = r.checkSnapshot(ctx, bcp) if err != nil { return err } - err = r.setShards(bcp) + err = r.setShards(ctx, bcp) if err != nil { return err } @@ -254,75 +261,75 @@ func (r *Restore) PITR(cmd *pbm.RestoreCmd, opid pbm.OPID, l *log.Event) (err er bcpShards[i] = bcp.Replsets[i].Name } - if !Contains(bcpShards, pbm.MakeReverseRSMapFunc(r.rsMap)(r.nodeInfo.SetName)) { - return r.Done() // skip. no backup for current rs + if !Contains(bcpShards, util.MakeReverseRSMapFunc(r.rsMap)(r.nodeInfo.SetName)) { + return r.Done(ctx) // skip. no backup for current rs } if r.nodeInfo.IsConfigSrv() { r.sMap = r.getShardMapping(bcp) } - chunks, err := r.chunks(bcp.LastWriteTS, cmd.OplogTS) + chunks, err := r.chunks(ctx, bcp.LastWriteTS, cmd.OplogTS) if err != nil { return err } - dump, oplog, err := r.snapshotObjects(bcp) + dump, oplogName, err := r.snapshotObjects(bcp) if err != nil { return err } - err = r.toState(pbm.StatusRunning, &pbm.WaitActionStart) + err = r.toState(ctx, defs.StatusRunning, &defs.WaitActionStart) if err != nil { return err } - err = r.RunSnapshot(dump, bcp, nss) + err = r.RunSnapshot(ctx, dump, bcp, nss) if err != nil { return err } - err = r.toState(pbm.StatusDumpDone, nil) + err = r.toState(ctx, defs.StatusDumpDone, nil) if err != nil { return err } - snapshotChunk := pbm.OplogChunk{ + snapshotChunk := oplog.OplogChunk{ RS: r.nodeInfo.SetName, - FName: oplog, + FName: oplogName, Compression: bcp.Compression, StartTS: bcp.FirstWriteTS, EndTS: bcp.LastWriteTS, } oplogOption := applyOplogOption{end: &cmd.OplogTS, nss: nss} - if r.nodeInfo.IsConfigSrv() && sel.IsSelective(nss) { + if r.nodeInfo.IsConfigSrv() && util.IsSelective(nss) { oplogOption.nss = []string{"config.databases"} oplogOption.filter = newConfigsvrOpFilter(nss) } - err = r.applyOplog(append([]pbm.OplogChunk{snapshotChunk}, chunks...), &oplogOption) + err = r.applyOplog(ctx, append([]oplog.OplogChunk{snapshotChunk}, chunks...), &oplogOption) if err != nil { return err } - err = r.restoreIndexes(oplogOption.nss) + err = r.restoreIndexes(ctx, oplogOption.nss) if err != nil { - return errors.WithMessage(err, "restore indexes") + return errors.Wrap(err, "restore indexes") } - if err = r.updateRouterConfig(r.cn.Context()); err != nil { - return errors.WithMessage(err, "update router config") + if err = r.updateRouterConfig(ctx); err != nil { + return errors.Wrap(err, "update router config") } - return r.Done() + return r.Done(ctx) } //nolint:nonamedreturns -func (r *Restore) ReplayOplog(cmd *pbm.ReplayCmd, opid pbm.OPID, l *log.Event) (err error) { - defer func() { r.exit(err, l) }() +func (r *Restore) ReplayOplog(ctx context.Context, cmd *types.ReplayCmd, opid types.OPID, l *log.Event) (err error) { + defer func() { r.exit(context.Background(), err, l) }() - if err = r.init(cmd.Name, opid, l); err != nil { + if err = r.init(ctx, cmd.Name, opid, l); err != nil { return errors.Wrap(err, "init") } @@ -330,38 +337,38 @@ func (r *Restore) ReplayOplog(cmd *pbm.ReplayCmd, opid pbm.OPID, l *log.Event) ( return errors.Errorf("%q is not primary", r.nodeInfo.SetName) } - r.shards, err = r.cn.ClusterMembers() + r.shards, err = topo.ClusterMembers(ctx, r.cn.Conn.MongoClient()) if err != nil { return errors.Wrap(err, "get cluster members") } if r.nodeInfo.IsLeader() { - err := r.cn.SetOplogTimestamps(r.name, int64(cmd.Start.T), int64(cmd.End.T)) + err := query.SetOplogTimestamps(ctx, r.cn.Conn, r.name, int64(cmd.Start.T), int64(cmd.End.T)) if err != nil { return errors.Wrap(err, "set oplog timestamps") } } - oplogShards, err := r.cn.AllOplogRSNames(r.cn.Context(), cmd.Start, cmd.End) + oplogShards, err := oplog.AllOplogRSNames(ctx, r.cn.Conn, cmd.Start, cmd.End) if err != nil { return err } err = r.checkTopologyForOplog(r.shards, oplogShards) if err != nil { - return errors.WithMessage(err, "topology") + return errors.Wrap(err, "topology") } - if !Contains(oplogShards, pbm.MakeReverseRSMapFunc(r.rsMap)(r.nodeInfo.SetName)) { - return r.Done() // skip. no oplog for current rs + if !Contains(oplogShards, util.MakeReverseRSMapFunc(r.rsMap)(r.nodeInfo.SetName)) { + return r.Done(ctx) // skip. no oplog for current rs } - opChunks, err := r.chunks(cmd.Start, cmd.End) + opChunks, err := r.chunks(ctx, cmd.Start, cmd.End) if err != nil { return err } - err = r.toState(pbm.StatusRunning, &pbm.WaitActionStart) + err = r.toState(ctx, defs.StatusRunning, &defs.WaitActionStart) if err != nil { return err } @@ -371,18 +378,18 @@ func (r *Restore) ReplayOplog(cmd *pbm.ReplayCmd, opid pbm.OPID, l *log.Event) ( end: &cmd.End, unsafe: true, } - if err = r.applyOplog(opChunks, &oplogOption); err != nil { + if err = r.applyOplog(ctx, opChunks, &oplogOption); err != nil { return err } - return r.Done() + return r.Done(ctx) } -func (r *Restore) init(name string, opid pbm.OPID, l *log.Event) error { +func (r *Restore) init(ctx context.Context, name string, opid types.OPID, l *log.Event) error { r.log = l var err error - r.nodeInfo, err = r.node.GetInfo() + r.nodeInfo, err = topo.GetNodeInfoExt(ctx, r.node.Session()) if err != nil { return errors.Wrap(err, "get node data") } @@ -393,21 +400,21 @@ func (r *Restore) init(name string, opid pbm.OPID, l *log.Event) error { r.name = name r.opid = opid.String() if r.nodeInfo.IsLeader() { - ts, err := r.cn.ClusterTime() + ts, err := topo.GetClusterTime(ctx, r.cn.Conn) if err != nil { return errors.Wrap(err, "init restore meta, read cluster time") } - meta := &pbm.RestoreMeta{ - Type: pbm.LogicalBackup, + meta := &types.RestoreMeta{ + Type: defs.LogicalBackup, OPID: r.opid, Name: r.name, StartTS: time.Now().Unix(), - Status: pbm.StatusStarting, - Replsets: []pbm.RestoreReplset{}, + Status: defs.StatusStarting, + Replsets: []types.RestoreReplset{}, Hb: ts, } - err = r.cn.SetRestoreMeta(meta) + err = query.SetRestoreMeta(ctx, r.cn.Conn, meta) if err != nil { return errors.Wrap(err, "write backup meta to db") } @@ -420,7 +427,7 @@ func (r *Restore) init(name string, opid pbm.OPID, l *log.Event) error { for { select { case <-tk.C: - err := r.cn.RestoreHB(r.name) + err := query.RestoreHB(ctx, r.cn.Conn, r.name) if err != nil { l.Error("send heartbeat: %v", err) } @@ -433,24 +440,24 @@ func (r *Restore) init(name string, opid pbm.OPID, l *log.Event) error { // Waiting for StatusStarting to move further. // In case some preparations has to be done before the restore. - err = r.waitForStatus(pbm.StatusStarting) + err = r.waitForStatus(ctx, defs.StatusStarting) if err != nil { return errors.Wrap(err, "waiting for start") } - rsMeta := pbm.RestoreReplset{ + rsMeta := types.RestoreReplset{ Name: r.nodeInfo.SetName, StartTS: time.Now().UTC().Unix(), - Status: pbm.StatusStarting, - Conditions: pbm.Conditions{}, + Status: defs.StatusStarting, + Conditions: types.Conditions{}, } - err = r.cn.AddRestoreRSMeta(r.name, rsMeta) + err = query.AddRestoreRSMeta(ctx, r.cn.Conn, r.name, rsMeta) if err != nil { return errors.Wrap(err, "add shard's metadata") } - r.stg, err = r.cn.GetStorage(r.log) + r.stg, err = util.GetStorage(ctx, r.cn.Conn, r.log) if err != nil { return errors.Wrap(err, "get backup storage") } @@ -458,8 +465,8 @@ func (r *Restore) init(name string, opid pbm.OPID, l *log.Event) error { return nil } -func (r *Restore) checkTopologyForOplog(currShards []pbm.Shard, oplogShards []string) error { - mapRS, mapRevRS := pbm.MakeRSMapFunc(r.rsMap), pbm.MakeReverseRSMapFunc(r.rsMap) +func (r *Restore) checkTopologyForOplog(currShards []topo.Shard, oplogShards []string) error { + mapRS, mapRevRS := util.MakeRSMapFunc(r.rsMap), util.MakeReverseRSMapFunc(r.rsMap) shards := make(map[string]struct{}, len(currShards)) for i := range r.shards { @@ -486,13 +493,13 @@ func (r *Restore) checkTopologyForOplog(currShards []pbm.Shard, oplogShards []st // chunks defines chunks of oplog slice in given range, ensures its integrity (timeline // is contiguous - there are no gaps), checks for respective files on storage and returns // chunks list if all checks passed -func (r *Restore) chunks(from, to primitive.Timestamp) ([]pbm.OplogChunk, error) { - return chunks(r.cn, r.stg, from, to, r.nodeInfo.SetName, r.rsMap) +func (r *Restore) chunks(ctx context.Context, from, to primitive.Timestamp) ([]oplog.OplogChunk, error) { + return chunks(ctx, r.cn, r.stg, from, to, r.nodeInfo.SetName, r.rsMap) } -func SnapshotMeta(cn *pbm.PBM, backupName string, stg storage.Storage) (*pbm.BackupMeta, error) { - bcp, err := cn.GetBackupMeta(backupName) - if errors.Is(err, pbm.ErrNotFound) { +func SnapshotMeta(ctx context.Context, cn *pbm.PBM, backupName string, stg storage.Storage) (*types.BackupMeta, error) { + bcp, err := query.GetBackupMeta(ctx, cn.Conn, backupName) + if errors.Is(err, errors.ErrNotFound) { bcp, err = GetMetaFromStore(stg, backupName) } if err != nil { @@ -505,18 +512,18 @@ func SnapshotMeta(cn *pbm.PBM, backupName string, stg storage.Storage) (*pbm.Bac // setShards defines and set shards participating in the restore // cluster migth have more shards then the backup and it's ok. But all // backup's shards must have respective destination on the target cluster. -func (r *Restore) setShards(bcp *pbm.BackupMeta) error { - s, err := r.cn.ClusterMembers() +func (r *Restore) setShards(ctx context.Context, bcp *types.BackupMeta) error { + s, err := topo.ClusterMembers(ctx, r.cn.Conn.MongoClient()) if err != nil { return errors.Wrap(err, "get cluster members") } - fl := make(map[string]pbm.Shard, len(s)) + fl := make(map[string]topo.Shard, len(s)) for _, rs := range s { fl[rs.RS] = rs } - mapRS, mapRevRS := pbm.MakeRSMapFunc(r.rsMap), pbm.MakeReverseRSMapFunc(r.rsMap) + mapRS, mapRevRS := util.MakeRSMapFunc(r.rsMap), util.MakeReverseRSMapFunc(r.rsMap) var nors []string for _, sh := range bcp.Replsets { @@ -543,8 +550,8 @@ func (r *Restore) setShards(bcp *pbm.BackupMeta) error { var ErrNoDataForShard = errors.New("no data for shard") //nolint:nonamedreturns -func (r *Restore) snapshotObjects(bcp *pbm.BackupMeta) (dump, oplog string, err error) { - mapRS := pbm.MakeRSMapFunc(r.rsMap) +func (r *Restore) snapshotObjects(bcp *types.BackupMeta) (dump, oplog string, err error) { + mapRS := util.MakeRSMapFunc(r.rsMap) var ok bool for _, v := range bcp.Replsets { @@ -577,21 +584,21 @@ func (r *Restore) snapshotObjects(bcp *pbm.BackupMeta) (dump, oplog string, err return dump, oplog, nil } -func (r *Restore) checkSnapshot(bcp *pbm.BackupMeta) error { - if bcp.Status != pbm.StatusDone { +func (r *Restore) checkSnapshot(ctx context.Context, bcp *types.BackupMeta) error { + if bcp.Status != defs.StatusDone { return errors.Errorf("backup wasn't successful: status: %s, error: %s", bcp.Status, bcp.Error()) } - if !version.CompatibleWith(version.Current().Version, pbm.BreakingChangesMap[bcp.Type]) { + if !version.CompatibleWith(version.Current().Version, version.BreakingChangesMap[bcp.Type]) { return errors.Errorf("backup PBM v%s is incompatible with the running PBM v%s", bcp.PBMVersion, version.Current().Version) } if bcp.FCV != "" { - fcv, err := r.node.GetFeatureCompatibilityVersion() + fcv, err := version.GetFCV(ctx, r.node.Session()) if err != nil { - return errors.WithMessage(err, "get featureCompatibilityVersion") + return errors.Wrap(err, "get featureCompatibilityVersion") } if bcp.FCV != fcv { @@ -600,9 +607,9 @@ func (r *Restore) checkSnapshot(bcp *pbm.BackupMeta) error { return nil } } else { - ver, err := r.node.GetMongoVersion() + ver, err := version.GetMongoVersion(ctx, r.node.Session()) if err != nil { - return errors.WithMessage(err, "get mongo version") + return errors.Wrap(err, "get mongo version") } if majmin(bcp.MongoVersion) != majmin(ver.VersionString) { @@ -616,12 +623,12 @@ func (r *Restore) checkSnapshot(bcp *pbm.BackupMeta) error { return nil } -func (r *Restore) toState(status pbm.Status, wait *time.Duration) error { +func (r *Restore) toState(ctx context.Context, status defs.Status, wait *time.Duration) error { r.log.Info("moving to state %s", status) - return toState(r.cn, status, r.name, r.nodeInfo, r.reconcileStatus, wait) + return toState(ctx, r.cn, status, r.name, r.nodeInfo, r.reconcileStatus, wait) } -func (r *Restore) RunSnapshot(dump string, bcp *pbm.BackupMeta, nss []string) error { +func (r *Restore) RunSnapshot(ctx context.Context, dump string, bcp *types.BackupMeta, nss []string) error { var rdr io.ReadCloser var err error @@ -637,34 +644,34 @@ func (r *Restore) RunSnapshot(dump string, bcp *pbm.BackupMeta, nss []string) er return errors.Wrapf(err, "decompress object %s", dump) } } else { - if !sel.IsSelective(nss) { + if !util.IsSelective(nss) { nss = bcp.Namespaces } - if !sel.IsSelective(nss) { + if !util.IsSelective(nss) { nss = []string{"*.*"} } - mapRS := pbm.MakeReverseRSMapFunc(r.rsMap) - if r.nodeInfo.IsConfigSrv() && sel.IsSelective(nss) { + mapRS := util.MakeReverseRSMapFunc(r.rsMap) + if r.nodeInfo.IsConfigSrv() && util.IsSelective(nss) { // restore cluster specific configs only - return r.configsvrRestore(bcp, nss, mapRS) + return r.configsvrRestore(ctx, bcp, nss, mapRS) } - var cfg pbm.Config + var cfg config.Config // get pbm.Config for creating a storage.Storage later. // while r.stg is already created storage for the restore, // it triggers data race warnings during concurrent file downloading/reading. // for that, it's better to create a new storage for each file - cfg, err = r.cn.GetConfig() + cfg, err = config.GetConfig(ctx, r.cn.Conn) if err != nil { - return errors.WithMessage(err, "get config") + return errors.Wrap(err, "get config") } rdr, err = snapshot.DownloadDump( func(ns string) (io.ReadCloser, error) { - stg, err := pbm.Storage(cfg, r.log) + stg, err := util.StorageFromConfig(cfg, r.log) if err != nil { - return nil, errors.WithMessage(err, "get storage") + return nil, errors.Wrap(err, "get storage") } // while importing backup made by RS with another name // that current RS we can't use our r.node.RS() to point files @@ -682,7 +689,7 @@ func (r *Restore) RunSnapshot(dump string, bcp *pbm.BackupMeta, nss []string) er err = r.loadIndexesFrom(bytes.NewReader(data)) if err != nil { - return nil, errors.WithMessage(err, "load indexes") + return nil, errors.Wrap(err, "load indexes") } rdr = io.NopCloser(bytes.NewReader(data)) @@ -691,7 +698,7 @@ func (r *Restore) RunSnapshot(dump string, bcp *pbm.BackupMeta, nss []string) er return rdr, nil }, bcp.Compression, - sel.MakeSelectedPred(nss)) + util.MakeSelectedPred(nss)) } if err != nil { return err @@ -699,27 +706,27 @@ func (r *Restore) RunSnapshot(dump string, bcp *pbm.BackupMeta, nss []string) er defer rdr.Close() // Restore snapshot (mongorestore) - err = r.snapshot(rdr) + err = r.snapshot(ctx, rdr) if err != nil { return errors.Wrap(err, "mongorestore") } - if sel.IsSelective(nss) { + if util.IsSelective(nss) { return nil } r.log.Info("restoring users and roles") - cusr, err := r.node.CurrentUser() + cusr, err := r.node.CurrentUser(ctx) if err != nil { return errors.Wrap(err, "get current user") } - err = r.swapUsers(r.cn.Context(), cusr) + err = r.swapUsers(ctx, cusr) if err != nil { return errors.Wrap(err, "swap users 'n' roles") } - err = pbm.DropTMPcoll(r.cn.Context(), r.node.Session()) + err = pbm.DropTMPcoll(ctx, r.node.Session()) if err != nil { r.log.Warning("drop tmp collections: %v", err) } @@ -730,14 +737,14 @@ func (r *Restore) RunSnapshot(dump string, bcp *pbm.BackupMeta, nss []string) er func (r *Restore) loadIndexesFrom(rdr io.Reader) error { meta, err := archive.ReadMetadata(rdr) if err != nil { - return errors.WithMessage(err, "read metadata") + return errors.Wrap(err, "read metadata") } for _, ns := range meta.Namespaces { var md mongorestore.Metadata err := bson.UnmarshalExtJSON([]byte(ns.Metadata), true, &md) if err != nil { - return errors.WithMessagef(err, "unmarshal %s.%s metadata", + return errors.Wrapf(err, "unmarshal %s.%s metadata", ns.Database, ns.Collection) } @@ -761,10 +768,10 @@ func (r *Restore) loadIndexesFrom(rdr io.Reader) error { return nil } -func (r *Restore) restoreIndexes(nss []string) error { +func (r *Restore) restoreIndexes(ctx context.Context, nss []string) error { r.log.Debug("building indexes up") - isSelected := sel.MakeSelectedPred(nss) + isSelected := util.MakeSelectedPred(nss) for _, ns := range r.indexCatalog.Namespaces() { if ns := archive.NSify(ns.DB, ns.Collection); !isSelected(ns) { r.log.Debug("skip restore indexes for %q", ns) @@ -801,9 +808,9 @@ func (r *Restore) restoreIndexes(nss []string) error { r.log.Info("restoring indexes for %s.%s: %s", ns.DB, ns.Collection, strings.Join(indexNames, ", ")) - err := r.node.Session().Database(ns.DB).RunCommand(r.cn.Context(), rawCommand).Err() + err := r.node.Session().Database(ns.DB).RunCommand(ctx, rawCommand).Err() if err != nil { - return errors.WithMessagef(err, "createIndexes for %s.%s", ns.DB, ns.Collection) + return errors.Wrapf(err, "createIndexes for %s.%s", ns.DB, ns.Collection) } } @@ -822,24 +829,24 @@ func (r *Restore) updateRouterConfig(ctx context.Context) error { } } - res := r.cn.Conn.Database(pbm.DB).RunCommand(ctx, primitive.M{"flushRouterConfig": 1}) - return errors.WithMessage(res.Err(), "flushRouterConfig") + res := r.cn.Conn.AdminCommand(ctx, primitive.M{"flushRouterConfig": 1}) + return errors.Wrap(res.Err(), "flushRouterConfig") } -func updateRouterTables(ctx context.Context, m *mongo.Client, sMap map[string]string) error { +func updateRouterTables(ctx context.Context, m connect.Client, sMap map[string]string) error { if err := updateDatabasesRouterTable(ctx, m, sMap); err != nil { - return errors.WithMessage(err, "databases") + return errors.Wrap(err, "databases") } if err := updateChunksRouterTable(ctx, m, sMap); err != nil { - return errors.WithMessage(err, "chunks") + return errors.Wrap(err, "chunks") } return nil } -func updateDatabasesRouterTable(ctx context.Context, m *mongo.Client, sMap map[string]string) error { - coll := m.Database("config").Collection("databases") +func updateDatabasesRouterTable(ctx context.Context, m connect.Client, sMap map[string]string) error { + coll := m.ConfigDatabase().Collection("databases") oldNames := make(primitive.A, 0, len(sMap)) for k := range sMap { @@ -849,7 +856,7 @@ func updateDatabasesRouterTable(ctx context.Context, m *mongo.Client, sMap map[s q := primitive.M{"primary": primitive.M{"$in": oldNames}} cur, err := coll.Find(ctx, q) if err != nil { - return errors.WithMessage(err, "query") + return errors.Wrap(err, "query") } models := make([]mongo.WriteModel, 0) @@ -859,7 +866,7 @@ func updateDatabasesRouterTable(ctx context.Context, m *mongo.Client, sMap map[s Primary string `bson:"primary"` } if err := cur.Decode(&doc); err != nil { - return errors.WithMessage(err, "decode") + return errors.Wrap(err, "decode") } m := mongo.NewUpdateOneModel() @@ -869,18 +876,18 @@ func updateDatabasesRouterTable(ctx context.Context, m *mongo.Client, sMap map[s models = append(models, m) } if err := cur.Err(); err != nil { - return errors.WithMessage(err, "cursor") + return errors.Wrap(err, "cursor") } if len(models) == 0 { return nil } _, err = coll.BulkWrite(ctx, models) - return errors.WithMessage(err, "bulk write") + return errors.Wrap(err, "bulk write") } -func updateChunksRouterTable(ctx context.Context, m *mongo.Client, sMap map[string]string) error { - coll := m.Database("config").Collection("chunks") +func updateChunksRouterTable(ctx context.Context, m connect.Client, sMap map[string]string) error { + coll := m.ConfigDatabase().Collection("chunks") oldNames := make(primitive.A, 0, len(sMap)) for k := range sMap { @@ -890,7 +897,7 @@ func updateChunksRouterTable(ctx context.Context, m *mongo.Client, sMap map[stri q := primitive.M{"history.shard": primitive.M{"$in": oldNames}} cur, err := coll.Find(ctx, q) if err != nil { - return errors.WithMessage(err, "query") + return errors.Wrap(err, "query") } models := make([]mongo.WriteModel, 0) @@ -903,7 +910,7 @@ func updateChunksRouterTable(ctx context.Context, m *mongo.Client, sMap map[stri } `bson:"history"` } if err := cur.Decode(&doc); err != nil { - return errors.WithMessage(err, "decode") + return errors.Wrap(err, "decode") } updates := primitive.M{} @@ -923,21 +930,21 @@ func updateChunksRouterTable(ctx context.Context, m *mongo.Client, sMap map[stri models = append(models, m) } if err := cur.Err(); err != nil { - return errors.WithMessage(err, "cursor") + return errors.Wrap(err, "cursor") } if len(models) == 0 { return nil } _, err = coll.BulkWrite(ctx, models) - return errors.WithMessage(err, "bulk write") + return errors.Wrap(err, "bulk write") } -func (r *Restore) setcommittedTxn(txn []pbm.RestoreTxn) error { - return r.cn.RestoreSetRSTxn(r.name, r.nodeInfo.SetName, txn) +func (r *Restore) setcommittedTxn(ctx context.Context, txn []types.RestoreTxn) error { + return query.RestoreSetRSTxn(ctx, r.cn.Conn, r.name, r.nodeInfo.SetName, txn) } -func (r *Restore) getcommittedTxn() (map[string]primitive.Timestamp, error) { +func (r *Restore) getcommittedTxn(ctx context.Context) (map[string]primitive.Timestamp, error) { txn := make(map[string]primitive.Timestamp) shards := make(map[string]struct{}) @@ -946,12 +953,12 @@ func (r *Restore) getcommittedTxn() (map[string]primitive.Timestamp, error) { } for len(shards) > 0 { - bmeta, err := r.cn.GetRestoreMeta(r.name) + bmeta, err := query.GetRestoreMeta(ctx, r.cn.Conn, r.name) if err != nil { return nil, errors.Wrap(err, "get restore metadata") } - clusterTime, err := r.cn.ClusterTime() + clusterTime, err := topo.GetClusterTime(ctx, r.cn.Conn) if err != nil { return nil, errors.Wrap(err, "read cluster time") } @@ -963,8 +970,8 @@ func (r *Restore) getcommittedTxn() (map[string]primitive.Timestamp, error) { continue } // check if node alive - lock, err := r.cn.GetLockData(&pbm.LockHeader{ - Type: pbm.CmdRestore, + lck, err := lock.GetLockData(ctx, r.cn.Conn, &lock.LockHeader{ + Type: defs.CmdRestore, OPID: r.opid, Replset: shard.Name, }) @@ -975,18 +982,18 @@ func (r *Restore) getcommittedTxn() (map[string]primitive.Timestamp, error) { if err != nil { return nil, errors.Wrapf(err, "unable to read lock for shard %s", shard.Name) } - if lock.Heartbeat.T+pbm.StaleFrameSec < clusterTime.T { - return nil, errors.Errorf("lost shard %s, last beat ts: %d", shard.Name, lock.Heartbeat.T) + if lck.Heartbeat.T+defs.StaleFrameSec < clusterTime.T { + return nil, errors.Errorf("lost shard %s, last beat ts: %d", shard.Name, lck.Heartbeat.T) } } - if shard.Status == pbm.StatusError { + if shard.Status == defs.StatusError { return nil, errors.Errorf("shard %s failed with: %v", shard.Name, shard.Error) } if shard.CommittedTxnSet { for _, t := range shard.CommittedTxn { - if t.State == pbm.TxnCommit { + if t.State == types.TxnCommit { txn[t.ID] = t.Ctime } } @@ -999,15 +1006,15 @@ func (r *Restore) getcommittedTxn() (map[string]primitive.Timestamp, error) { return txn, nil } -func (r *Restore) applyOplog(chunks []pbm.OplogChunk, options *applyOplogOption) error { - mgoV, err := r.node.GetMongoVersion() +func (r *Restore) applyOplog(ctx context.Context, chunks []oplog.OplogChunk, options *applyOplogOption) error { + mgoV, err := version.GetMongoVersion(ctx, r.node.Session()) if err != nil || len(mgoV.Version) < 1 { return errors.Wrap(err, "define mongo version") } - stat := pbm.RestoreShardStat{} - partial, err := applyOplog(r.node.Session(), chunks, options, r.nodeInfo.IsSharded(), + stat := types.RestoreShardStat{} + partial, err := applyOplog(ctx, r.node.Session(), chunks, options, r.nodeInfo.IsSharded(), r.indexCatalog, r.setcommittedTxn, r.getcommittedTxn, &stat.Txn, - mgoV, r.stg, r.log) + &mgoV, r.stg, r.log) if err != nil { return errors.Wrap(err, "reply oplog") } @@ -1018,13 +1025,13 @@ func (r *Restore) applyOplog(chunks []pbm.OplogChunk, options *applyOplogOption) tops = append(tops, t.Oplog...) } - err = r.cn.RestoreSetRSPartTxn(r.name, r.nodeInfo.SetName, tops) + err = query.RestoreSetRSPartTxn(ctx, r.cn.Conn, r.name, r.nodeInfo.SetName, tops) if err != nil { return errors.Wrap(err, "set partial transactions") } } - err = r.cn.RestoreSetRSStat(r.name, r.nodeInfo.SetName, stat) + err = query.RestoreSetRSStat(ctx, r.cn.Conn, r.name, r.nodeInfo.SetName, stat) if err != nil { r.log.Warning("applyOplog: failed to set stat: %v", err) } @@ -1032,8 +1039,8 @@ func (r *Restore) applyOplog(chunks []pbm.OplogChunk, options *applyOplogOption) return nil } -func (r *Restore) snapshot(input io.Reader) error { - cfg, err := r.cn.GetConfig() +func (r *Restore) snapshot(ctx context.Context, input io.Reader) error { + cfg, err := config.GetConfig(ctx, r.cn.Conn) if err != nil { return errors.Wrap(err, "unable to get PBM config settings") } @@ -1049,19 +1056,19 @@ func (r *Restore) snapshot(input io.Reader) error { // Done waits for the replicas to finish the job // and marks restore as done -func (r *Restore) Done() error { - err := r.cn.ChangeRestoreRSState(r.name, r.nodeInfo.SetName, pbm.StatusDone, "") +func (r *Restore) Done(ctx context.Context) error { + err := query.ChangeRestoreRSState(ctx, r.cn.Conn, r.name, r.nodeInfo.SetName, defs.StatusDone, "") if err != nil { return errors.Wrap(err, "set shard's StatusDone") } if r.nodeInfo.IsLeader() { - err = r.reconcileStatus(pbm.StatusDone, nil) + err = r.reconcileStatus(ctx, defs.StatusDone, nil) if err != nil { return errors.Wrap(err, "check cluster for the restore done") } - m, err := r.cn.GetRestoreMeta(r.name) + m, err := query.GetRestoreMeta(ctx, r.cn.Conn, r.name) if err != nil { return errors.Wrap(err, "update stat: get restore meta") } @@ -1069,11 +1076,11 @@ func (r *Restore) Done() error { return nil } - stat := make(map[string]map[string]pbm.RestoreRSMetrics) + stat := make(map[string]map[string]types.RestoreRSMetrics) for _, rs := range m.Replsets { - stat[rs.Name] = map[string]pbm.RestoreRSMetrics{ - "_primary": {DistTxn: pbm.DistTxnStat{ + stat[rs.Name] = map[string]types.RestoreRSMetrics{ + "_primary": {DistTxn: types.DistTxnStat{ Partial: rs.Stat.Txn.Partial, ShardUncommitted: rs.Stat.Txn.ShardUncommitted, LeftUncommitted: rs.Stat.Txn.LeftUncommitted, @@ -1081,7 +1088,7 @@ func (r *Restore) Done() error { } } - err = r.cn.RestoreSetStat(r.name, pbm.RestoreStat{RS: stat}) + err = query.RestoreSetStat(ctx, r.cn.Conn, r.name, types.RestoreStat{RS: stat}) if err != nil { return errors.Wrap(err, "set restore stat") } @@ -1090,7 +1097,7 @@ func (r *Restore) Done() error { return nil } -func (r *Restore) swapUsers(ctx context.Context, exclude *pbm.AuthInfo) error { +func (r *Restore) swapUsers(ctx context.Context, exclude *types.AuthInfo) error { rolesC := r.node.Session().Database("admin").Collection("system.roles") eroles := []string{} @@ -1098,7 +1105,7 @@ func (r *Restore) swapUsers(ctx context.Context, exclude *pbm.AuthInfo) error { eroles = append(eroles, r.DB+"."+r.Role) } - curr, err := r.node.Session().Database(pbm.DB).Collection(pbm.TmpRolesCollection). + curr, err := r.node.Session().Database(defs.DB).Collection(defs.TmpRolesCollection). Find(ctx, bson.M{"_id": bson.M{"$nin": eroles}}) if err != nil { return errors.Wrap(err, "create cursor for tmpRoles") @@ -1126,7 +1133,7 @@ func (r *Restore) swapUsers(ctx context.Context, exclude *pbm.AuthInfo) error { if len(exclude.Users) > 0 { user = exclude.Users[0].DB + "." + exclude.Users[0].User } - cur, err := r.node.Session().Database(pbm.DB).Collection(pbm.TmpUsersCollection). + cur, err := r.node.Session().Database(defs.DB).Collection(defs.TmpUsersCollection). Find(ctx, bson.M{"_id": bson.M{"$ne": user}}) if err != nil { return errors.Wrap(err, "create cursor for tmpUsers") @@ -1154,26 +1161,26 @@ func (r *Restore) swapUsers(ctx context.Context, exclude *pbm.AuthInfo) error { return nil } -func (r *Restore) reconcileStatus(status pbm.Status, timeout *time.Duration) error { +func (r *Restore) reconcileStatus(ctx context.Context, status defs.Status, timeout *time.Duration) error { if timeout != nil { - err := convergeClusterWithTimeout(r.cn, r.name, r.opid, r.shards, status, *timeout) + err := convergeClusterWithTimeout(ctx, r.cn, r.name, r.opid, r.shards, status, *timeout) return errors.Wrap(err, "convergeClusterWithTimeout") } - err := convergeCluster(r.cn, r.name, r.opid, r.shards, status) + err := convergeCluster(ctx, r.cn, r.name, r.opid, r.shards, status) return errors.Wrap(err, "convergeCluster") } -func (r *Restore) waitForStatus(status pbm.Status) error { +func (r *Restore) waitForStatus(ctx context.Context, status defs.Status) error { r.log.Debug("waiting for '%s' status", status) - return waitForStatus(r.cn, r.name, status) + return waitForStatus(ctx, r.cn, r.name, status) } // MarkFailed sets the restore and rs state as failed with the given message -func (r *Restore) MarkFailed(e error) error { - err := r.cn.ChangeRestoreState(r.name, pbm.StatusError, e.Error()) +func (r *Restore) MarkFailed(ctx context.Context, e error) error { + err := query.ChangeRestoreState(ctx, r.cn.Conn, r.name, defs.StatusError, e.Error()) if err != nil { return errors.Wrap(err, "set restore state") } - err = r.cn.ChangeRestoreRSState(r.name, r.nodeInfo.SetName, pbm.StatusError, e.Error()) + err = query.ChangeRestoreRSState(ctx, r.cn.Conn, r.name, r.nodeInfo.SetName, defs.StatusError, e.Error()) return errors.Wrap(err, "set replset state") } diff --git a/pbm/restore/physical.go b/pbm/restore/physical.go index 51795a6b6..1c9db4f33 100644 --- a/pbm/restore/physical.go +++ b/pbm/restore/physical.go @@ -2,7 +2,6 @@ package restore import ( "bytes" - "context" "encoding/json" "fmt" "io" @@ -19,7 +18,6 @@ import ( "time" "github.com/mongodb/mongo-tools/common/db" - "github.com/pkg/errors" "go.mongodb.org/mongo-driver/bson" "go.mongodb.org/mongo-driver/bson/primitive" "go.mongodb.org/mongo-driver/mongo" @@ -27,12 +25,23 @@ import ( "golang.org/x/mod/semver" "gopkg.in/yaml.v2" + "github.com/percona/percona-backup-mongodb/internal/compress" + "github.com/percona/percona-backup-mongodb/internal/config" + "github.com/percona/percona-backup-mongodb/internal/connect" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/log" + "github.com/percona/percona-backup-mongodb/internal/query" + "github.com/percona/percona-backup-mongodb/internal/resync" + "github.com/percona/percona-backup-mongodb/internal/storage" + "github.com/percona/percona-backup-mongodb/internal/storage/s3" + "github.com/percona/percona-backup-mongodb/internal/topo" + "github.com/percona/percona-backup-mongodb/internal/types" + "github.com/percona/percona-backup-mongodb/internal/util" + "github.com/percona/percona-backup-mongodb/internal/version" "github.com/percona/percona-backup-mongodb/pbm" - "github.com/percona/percona-backup-mongodb/pbm/compress" - "github.com/percona/percona-backup-mongodb/pbm/log" - "github.com/percona/percona-backup-mongodb/pbm/storage" - "github.com/percona/percona-backup-mongodb/pbm/storage/s3" - "github.com/percona/percona-backup-mongodb/version" + "github.com/percona/percona-backup-mongodb/pbm/oplog" ) const ( @@ -49,8 +58,8 @@ const ( type files struct { BcpName string - Cmpr compress.CompressionType - Data []pbm.File + Cmpr defs.CompressionType + Data []types.File // dbpath to cut from destination if there is any (see PBM-1058) dbpath string @@ -63,21 +72,21 @@ type PhysRestore struct { // an ephemeral port to restart mongod on during the restore tmpPort int tmpConf *os.File - rsConf *pbm.RSConfig // original replset config + rsConf *topo.RSConfig // original replset config shards map[string]string // original shards list on config server cfgConn string // shardIdentity configsvrConnectionString startTS int64 - secOpts *pbm.MongodOptsSec + secOpts *topo.MongodOptsSec name string opid string - nodeInfo *pbm.NodeInfo + nodeInfo *topo.NodeInfo stg storage.Storage - bcp *pbm.BackupMeta + bcp *types.BackupMeta files []files restoreTS primitive.Timestamp - confOpts pbm.RestoreConf + confOpts config.RestoreConf mongod string // location of mongod used for internal restarts @@ -100,15 +109,21 @@ type PhysRestore struct { rsMap map[string]string } -func NewPhysical(cn *pbm.PBM, node *pbm.Node, inf *pbm.NodeInfo, rsMap map[string]string) (*PhysRestore, error) { - opts, err := node.GetOpts(nil) +func NewPhysical( + ctx context.Context, + cn *pbm.PBM, + node *pbm.Node, + inf *topo.NodeInfo, + rsMap map[string]string, +) (*PhysRestore, error) { + opts, err := topo.GetMongodOpts(ctx, node.Session(), nil) if err != nil { return nil, errors.Wrap(err, "get mongo options") } p := opts.Storage.DBpath if p == "" { switch inf.ReplsetRole() { - case pbm.RoleConfigSrv: + case topo.RoleConfigSrv: p = defaultCSRSdbpath default: p = defaultRSdbpath @@ -119,7 +134,7 @@ func NewPhysical(cn *pbm.PBM, node *pbm.Node, inf *pbm.NodeInfo, rsMap map[strin opts.Net.Port = defaultPort } - rcf, err := node.GetRSconf() + rcf, err := topo.GetReplSetConfig(ctx, node.Session()) if err != nil { return nil, errors.Wrap(err, "get replset config") } @@ -127,9 +142,9 @@ func NewPhysical(cn *pbm.PBM, node *pbm.Node, inf *pbm.NodeInfo, rsMap map[strin var shards map[string]string var csvr string if inf.IsSharded() { - ss, err := cn.GetShards() + ss, err := cn.GetShards(ctx) if err != nil { - return nil, errors.WithMessage(err, "get shards") + return nil, errors.Wrap(err, "get shards") } shards = make(map[string]string) @@ -137,8 +152,8 @@ func NewPhysical(cn *pbm.PBM, node *pbm.Node, inf *pbm.NodeInfo, rsMap map[strin shards[s.ID] = s.Host } - if inf.ReplsetRole() != pbm.RoleConfigSrv { - csvr, err = node.ConfSvrConn() + if inf.ReplsetRole() != topo.RoleConfigSrv { + csvr, err = topo.ConfSvrConn(ctx, node.Session()) if err != nil { return nil, errors.Wrap(err, "get configsvrConnectionString") } @@ -207,7 +222,7 @@ func (r *PhysRestore) close(noerr, cleanup bool) { r.log.Warning("remove tmp mongod logs %s: %v", path.Join(r.dbpath, internalMongodLog), err) } extMeta := filepath.Join(r.dbpath, - fmt.Sprintf(pbm.ExternalRsMetaFile, pbm.MakeReverseRSMapFunc(r.rsMap)(r.nodeInfo.SetName))) + fmt.Sprintf(defs.ExternalRsMetaFile, util.MakeReverseRSMapFunc(r.rsMap)(r.nodeInfo.SetName))) err = os.Remove(extMeta) if err != nil && !errors.Is(err, os.ErrNotExist) { r.log.Warning("remove external rs meta <%s>: %v", extMeta, err) @@ -224,30 +239,30 @@ func (r *PhysRestore) close(noerr, cleanup bool) { } } -func (r *PhysRestore) flush() error { +func (r *PhysRestore) flush(ctx context.Context) error { r.log.Debug("shutdown server") - rsStat, err := r.node.GetReplsetStatus() + rsStat, err := topo.GetReplsetStatus(ctx, r.node.Session()) if err != nil { return errors.Wrap(err, "get replset status") } if r.nodeInfo.IsConfigSrv() { r.log.Debug("waiting for shards to shutdown") - _, err := r.waitFiles(pbm.StatusDown, r.syncPathDataShards, false) + _, err := r.waitFiles(defs.StatusDown, r.syncPathDataShards, false) if err != nil { return errors.Wrap(err, "wait for datashards to shutdown") } } for { - inf, err := r.node.GetInfo() + inf, err := topo.GetNodeInfoExt(ctx, r.node.Session()) if err != nil { return errors.Wrap(err, "get node info") } // single-node replica set won't stepdown do secondary // so we have to shut it down despite of role if !inf.IsPrimary || len(rsStat.Members) == 1 { - err = r.node.Shutdown() + err = r.node.Shutdown(ctx) if err != nil && strings.Contains(err.Error(), // wait a bit and let the node to stepdown "(ConflictingOperationInProgress) This node is already in the process of stepping down") { @@ -266,7 +281,7 @@ func (r *PhysRestore) flush() error { } if r.nodeInfo.IsPrimary { - err = r.stg.Save(r.syncPathRS+"."+string(pbm.StatusDown), + err = r.stg.Save(r.syncPathRS+"."+string(defs.StatusDown), okStatus(), -1) if err != nil { return errors.Wrap(err, "write replset StatusDown") @@ -355,18 +370,18 @@ func waitMgoShutdown(dbpath string) error { // │ └── rs.starting // //nolint:lll,nonamedreturns -func (r *PhysRestore) toState(status pbm.Status) (_ pbm.Status, err error) { +func (r *PhysRestore) toState(status defs.Status) (_ defs.Status, err error) { defer func() { if err != nil { - if r.nodeInfo.IsPrimary && status != pbm.StatusDone { - serr := r.stg.Save(r.syncPathRS+"."+string(pbm.StatusError), + if r.nodeInfo.IsPrimary && status != defs.StatusDone { + serr := r.stg.Save(r.syncPathRS+"."+string(defs.StatusError), errStatus(err), -1) if serr != nil { r.log.Error("toState: write replset error state `%v`: %v", err, serr) } } - if r.nodeInfo.IsClusterLeader() && status != pbm.StatusDone { - serr := r.stg.Save(r.syncPathCluster+"."+string(pbm.StatusError), + if r.nodeInfo.IsClusterLeader() && status != defs.StatusDone { + serr := r.stg.Save(r.syncPathCluster+"."+string(defs.StatusError), errStatus(err), -1) if serr != nil { r.log.Error("toState: write cluster error state `%v`: %v", err, serr) @@ -380,41 +395,41 @@ func (r *PhysRestore) toState(status pbm.Status) (_ pbm.Status, err error) { err = r.stg.Save(r.syncPathNode+"."+string(status), okStatus(), -1) if err != nil { - return pbm.StatusError, errors.Wrap(err, "write node state") + return defs.StatusError, errors.Wrap(err, "write node state") } - if r.nodeInfo.IsPrimary || status == pbm.StatusDone { + if r.nodeInfo.IsPrimary || status == defs.StatusDone { r.log.Info("waiting for `%s` status in rs %v", status, r.syncPathPeers) cstat, err := r.waitFiles(status, copyMap(r.syncPathPeers), false) if err != nil { - return pbm.StatusError, errors.Wrap(err, "wait for nodes in rs") + return defs.StatusError, errors.Wrap(err, "wait for nodes in rs") } err = r.stg.Save(r.syncPathRS+"."+string(cstat), okStatus(), -1) if err != nil { - return pbm.StatusError, errors.Wrap(err, "write replset state") + return defs.StatusError, errors.Wrap(err, "write replset state") } } - if r.nodeInfo.IsClusterLeader() || status == pbm.StatusDone { + if r.nodeInfo.IsClusterLeader() || status == defs.StatusDone { r.log.Info("waiting for shards %v", r.syncPathShards) cstat, err := r.waitFiles(status, copyMap(r.syncPathShards), true) if err != nil { - return pbm.StatusError, errors.Wrap(err, "wait for shards") + return defs.StatusError, errors.Wrap(err, "wait for shards") } err = r.stg.Save(r.syncPathCluster+"."+string(cstat), okStatus(), -1) if err != nil { - return pbm.StatusError, errors.Wrap(err, "write replset state") + return defs.StatusError, errors.Wrap(err, "write replset state") } } r.log.Info("waiting for cluster") cstat, err := r.waitFiles(status, map[string]struct{}{r.syncPathCluster: {}}, true) if err != nil { - return pbm.StatusError, errors.Wrap(err, "wait for cluster") + return defs.StatusError, errors.Wrap(err, "wait for cluster") } r.log.Debug("converged to state %s", cstat) @@ -423,7 +438,7 @@ func (r *PhysRestore) toState(status pbm.Status) (_ pbm.Status, err error) { } func (r *PhysRestore) getTSFromSyncFile(path string) (primitive.Timestamp, error) { - res, err := r.stg.SourceReader(path + "." + string(pbm.StatusExtTS)) + res, err := r.stg.SourceReader(path + "." + string(defs.StatusExtTS)) if err != nil { return primitive.Timestamp{}, errors.Wrap(err, "get timestamp") } @@ -485,12 +500,12 @@ func copyMap[K comparable, V any](m map[K]V) map[K]V { } func (r *PhysRestore) waitFiles( - status pbm.Status, + status defs.Status, objs map[string]struct{}, cluster bool, -) (pbm.Status, error) { +) (defs.Status, error) { if len(objs) == 0 { - return pbm.StatusError, errors.New("empty objects maps") + return defs.StatusError, errors.New("empty objects maps") } tk := time.NewTicker(time.Second * 5) @@ -502,25 +517,25 @@ func (r *PhysRestore) waitFiles( var haveDone bool for range tk.C { for f := range objs { - errFile := f + "." + string(pbm.StatusError) + errFile := f + "." + string(defs.StatusError) _, err := r.stg.FileStat(errFile) if err != nil && !errors.Is(err, storage.ErrNotExist) { - return pbm.StatusError, errors.Wrapf(err, "get file %s", errFile) + return defs.StatusError, errors.Wrapf(err, "get file %s", errFile) } if err == nil { r, err := r.stg.SourceReader(errFile) if err != nil { - return pbm.StatusError, errors.Wrapf(err, "open error file %s", errFile) + return defs.StatusError, errors.Wrapf(err, "open error file %s", errFile) } b, err := io.ReadAll(r) r.Close() if err != nil { - return pbm.StatusError, errors.Wrapf(err, "read error file %s", errFile) + return defs.StatusError, errors.Wrapf(err, "read error file %s", errFile) } - if status != pbm.StatusDone { - return pbm.StatusError, nodeError{filepath.Base(f), string(b)} + if status != defs.StatusDone { + return defs.StatusError, nodeError{filepath.Base(f), string(b)} } curErr = nodeError{filepath.Base(f), string(b)} delete(objs, f) @@ -530,8 +545,8 @@ func (r *PhysRestore) waitFiles( err = r.checkHB(f + "." + syncHbSuffix) if err != nil { curErr = errors.Wrapf(err, "check heartbeat in %s.%s", f, syncHbSuffix) - if status != pbm.StatusDone { - return pbm.StatusError, curErr + if status != defs.StatusDone { + return defs.StatusError, curErr } delete(objs, f) continue @@ -539,23 +554,24 @@ func (r *PhysRestore) waitFiles( ok, err := checkFile(f+"."+string(status), r.stg) if err != nil { - return pbm.StatusError, errors.Wrapf(err, "check file %s", f+"."+string(status)) + return defs.StatusError, errors.Wrapf(err, "check file %s", f+"."+string(status)) } if !ok { - if status != pbm.StatusDone { + if status != defs.StatusDone { continue } - ok, err := checkFile(f+"."+string(pbm.StatusPartlyDone), r.stg) + ok, err := checkFile(f+"."+string(defs.StatusPartlyDone), r.stg) if err != nil { - return pbm.StatusError, errors.Wrapf(err, "check file %s", f+"."+string(pbm.StatusPartlyDone)) + return defs.StatusError, errors.Wrapf(err, + "check file %s", f+"."+string(defs.StatusPartlyDone)) } if !ok { continue } - retStatus = pbm.StatusPartlyDone + retStatus = defs.StatusPartlyDone } haveDone = true @@ -568,14 +584,14 @@ func (r *PhysRestore) waitFiles( } if haveDone && !cluster { - return pbm.StatusPartlyDone, nil + return defs.StatusPartlyDone, nil } - return pbm.StatusError, curErr + return defs.StatusError, curErr } } - return pbm.StatusError, storage.ErrNotExist + return defs.StatusError, storage.ErrNotExist } func checkFile(f string, stg storage.Storage) (bool, error) { @@ -677,23 +693,24 @@ func (l *logBuff) Flush() error { // //nolint:nonamedreturns func (r *PhysRestore) Snapshot( - cmd *pbm.RestoreCmd, + ctx context.Context, + cmd *types.RestoreCmd, pitr primitive.Timestamp, - opid pbm.OPID, + opid types.OPID, l *log.Event, stopAgentC chan<- struct{}, pauseHB func(), ) (err error) { l.Debug("port: %d", r.tmpPort) - meta := &pbm.RestoreMeta{ - Type: pbm.PhysicalBackup, + meta := &types.RestoreMeta{ + Type: defs.PhysicalBackup, OPID: opid.String(), Name: cmd.Name, Backup: cmd.BackupName, StartTS: time.Now().Unix(), - Status: pbm.StatusInit, - Replsets: []pbm.RestoreReplset{{Name: r.nodeInfo.Me}}, + Status: defs.StatusInit, + Replsets: []types.RestoreReplset{{Name: r.nodeInfo.Me}}, } if r.nodeInfo.IsClusterLeader() { meta.Leader = r.nodeInfo.Me + "/" + r.rsConf.ID @@ -710,7 +727,7 @@ func (r *PhysRestore) Snapshot( r.close(err == nil, progress.is(restoreStared) && !progress.is(restoreDone)) }() - err = r.init(cmd.Name, opid, l) + err = r.init(ctx, cmd.Name, opid, l) if err != nil { return errors.Wrap(err, "init") } @@ -720,7 +737,7 @@ func (r *PhysRestore) Snapshot( } if cmd.BackupName != "" { - err = r.prepareBackup(cmd.BackupName) + err = r.prepareBackup(ctx, cmd.BackupName) if err != nil { return err } @@ -730,45 +747,45 @@ func (r *PhysRestore) Snapshot( r.restoreTS = cmd.ExtTS } if cmd.External { - meta.Type = pbm.ExternalBackup + meta.Type = defs.ExternalBackup } else { meta.Type = r.bcp.Type } - var opChunks []pbm.OplogChunk + var opChunks []oplog.OplogChunk if !pitr.IsZero() { - opChunks, err = chunks(r.cn, r.stg, r.restoreTS, pitr, r.rsConf.ID, r.rsMap) + opChunks, err = chunks(ctx, r.cn, r.stg, r.restoreTS, pitr, r.rsConf.ID, r.rsMap) if err != nil { return err } } - if meta.Type == pbm.IncrementalBackup { + if meta.Type == defs.IncrementalBackup { meta.BcpChain = make([]string, 0, len(r.files)) for i := len(r.files) - 1; i >= 0; i-- { meta.BcpChain = append(meta.BcpChain, r.files[i].BcpName) } } - _, err = r.toState(pbm.StatusStarting) + _, err = r.toState(defs.StatusStarting) if err != nil { return errors.Wrap(err, "move to running state") } - l.Debug("%s", pbm.StatusStarting) + l.Debug("%s", defs.StatusStarting) // don't write logs to the mongo anymore // but dump it on storage r.cn.Logger().SefBuffer(&logBuff{ buf: &bytes.Buffer{}, - path: fmt.Sprintf("%s/%s/rs.%s/log/%s", pbm.PhysRestoresDir, r.name, r.rsConf.ID, r.nodeInfo.Me), + path: fmt.Sprintf("%s/%s/rs.%s/log/%s", defs.PhysRestoresDir, r.name, r.rsConf.ID, r.nodeInfo.Me), limit: 1 << 20, // 1Mb write: func(name string, data io.Reader) error { return r.stg.Save(name, data, -1) }, }) r.cn.Logger().PauseMgo() - _, err = r.toState(pbm.StatusRunning) + _, err = r.toState(defs.StatusRunning) if err != nil { - return errors.Wrapf(err, "moving to state %s", pbm.StatusRunning) + return errors.Wrapf(err, "moving to state %s", defs.StatusRunning) } // On this stage, the agent has to be closed on any outcome as mongod @@ -784,7 +801,7 @@ func (r *PhysRestore) Snapshot( pauseHB() l.Info("stopping mongod and flushing old data") - err = r.flush() + err = r.flush(ctx) if err != nil { return err } @@ -800,28 +817,28 @@ func (r *PhysRestore) Snapshot( // own (which sets the no-return point). progress |= restoreStared - var excfg *pbm.MongodOpts - var stats pbm.RestoreShardStat + var excfg *topo.MongodOpts + var stats types.RestoreShardStat if cmd.External { - _, err = r.toState(pbm.StatusCopyReady) + _, err = r.toState(defs.StatusCopyReady) if err != nil { - return errors.Wrapf(err, "moving to state %s", pbm.StatusCopyReady) + return errors.Wrapf(err, "moving to state %s", defs.StatusCopyReady) } l.Info("waiting for the datadir to be copied") - _, err := r.waitFiles(pbm.StatusCopyDone, map[string]struct{}{r.syncPathCluster: {}}, true) + _, err := r.waitFiles(defs.StatusCopyDone, map[string]struct{}{r.syncPathCluster: {}}, true) if err != nil { - return errors.Wrapf(err, "check %s state", pbm.StatusCopyDone) + return errors.Wrapf(err, "check %s state", defs.StatusCopyDone) } // try to read replset meta from the backup and use its data - setName := pbm.MakeReverseRSMapFunc(r.rsMap)(r.nodeInfo.SetName) - rsMetaF := filepath.Join(r.dbpath, fmt.Sprintf(pbm.ExternalRsMetaFile, setName)) + setName := util.MakeReverseRSMapFunc(r.rsMap)(r.nodeInfo.SetName) + rsMetaF := filepath.Join(r.dbpath, fmt.Sprintf(defs.ExternalRsMetaFile, setName)) conff, err := os.Open(rsMetaF) - var needFiles []pbm.File + var needFiles []types.File if err == nil { - rsMeta := &pbm.BackupReplset{} + rsMeta := &types.BackupReplset{} err := json.NewDecoder(conff).Decode(rsMeta) if err != nil { return errors.Wrap(err, "decode replset meta from the backup") @@ -896,9 +913,9 @@ func (r *PhysRestore) Snapshot( // next. progress |= restoreDone - stat, err := r.toState(pbm.StatusDone) + stat, err := r.toState(defs.StatusDone) if err != nil { - return errors.Wrapf(err, "moving to state %s", pbm.StatusDone) + return errors.Wrapf(err, "moving to state %s", defs.StatusDone) } err = r.writeStat(stats) @@ -924,7 +941,7 @@ var rmFromDatadir = map[string]struct{}{ } // removes obsolete files from the datadir -func (r *PhysRestore) cleanupDatadir(bcpFiles []pbm.File) error { +func (r *PhysRestore) cleanupDatadir(bcpFiles []types.File) error { var rm func(f string) bool needFiles := bcpFiles @@ -983,8 +1000,8 @@ func (r *PhysRestore) writeStat(stat any) error { return nil } -func (r *PhysRestore) dumpMeta(meta *pbm.RestoreMeta, s pbm.Status, msg string) error { - name := fmt.Sprintf("%s/%s.json", pbm.PhysRestoresDir, meta.Name) +func (r *PhysRestore) dumpMeta(meta *types.RestoreMeta, s defs.Status, msg string) error { + name := fmt.Sprintf("%s/%s.json", defs.PhysRestoresDir, meta.Name) _, err := r.stg.FileStat(name) if err == nil { r.log.Warning("meta `%s` already exists, trying write %s status with '%s'", name, s, msg) @@ -999,7 +1016,7 @@ func (r *PhysRestore) dumpMeta(meta *pbm.RestoreMeta, s pbm.Status, msg string) // The meta generated here is more for debugging porpuses (just in case). // `pbm status` and `resync` will always rebuild it from agents' reports // not relying solely on this file. - condsm, err := pbm.GetPhysRestoreMeta(meta.Name, r.stg, r.log) + condsm, err := resync.GetPhysRestoreMeta(meta.Name, r.stg, r.log) if err == nil { meta.Replsets = condsm.Replsets meta.Status = condsm.Status @@ -1008,10 +1025,10 @@ func (r *PhysRestore) dumpMeta(meta *pbm.RestoreMeta, s pbm.Status, msg string) meta.Hb = condsm.Hb meta.Conditions = condsm.Conditions } - if err != nil || s == pbm.StatusError { + if err != nil || s == defs.StatusError { ts := time.Now().Unix() meta.Status = s - meta.Conditions = append(meta.Conditions, &pbm.Condition{Timestamp: ts, Status: s}) + meta.Conditions = append(meta.Conditions, &types.Condition{Timestamp: ts, Status: s}) meta.LastTransitionTS = ts meta.Error = fmt.Sprintf("%s/%s: %s", r.nodeInfo.SetName, r.nodeInfo.Me, msg) } @@ -1046,7 +1063,7 @@ func (r *PhysRestore) copyFiles() (*s3.DownloadStat, error) { }() } - setName := pbm.MakeReverseRSMapFunc(r.rsMap)(r.nodeInfo.SetName) + setName := util.MakeReverseRSMapFunc(r.rsMap)(r.nodeInfo.SetName) cpbuf := make([]byte, 32*1024) for i := len(r.files) - 1; i >= 0; i-- { set := r.files[i] @@ -1126,7 +1143,7 @@ func (r *PhysRestore) getLasOpTime() (primitive.Timestamp, error) { return primitive.Timestamp{}, errors.Wrap(err, "connect to mongo") } - ctx := context.Background() + ctx := context.TODO() res := c.Database("local").Collection("oplog.rs").FindOne( ctx, @@ -1210,7 +1227,7 @@ func (r *PhysRestore) prepareData() error { } func shutdown(c *mongo.Client, dbpath string) error { - err := c.Database("admin").RunCommand(context.Background(), bson.D{{"shutdown", 1}}).Err() + err := c.Database("admin").RunCommand(context.TODO(), bson.D{{"shutdown", 1}}).Err() if err != nil && !strings.Contains(err.Error(), "socket was unexpectedly closed") { return err } @@ -1246,8 +1263,8 @@ func (r *PhysRestore) recoverStandalone() error { func (r *PhysRestore) replayOplog( from, to primitive.Timestamp, - opChunks []pbm.OplogChunk, - stat *pbm.RestoreShardStat, + opChunks []oplog.OplogChunk, + stat *types.RestoreShardStat, ) error { err := r.startMongo("--dbpath", r.dbpath, "--setParameter", "disableLogicalSessionCacheRefresh=true") @@ -1262,11 +1279,11 @@ func (r *PhysRestore) replayOplog( ctx := context.Background() _, err = c.Database("local").Collection("system.replset").InsertOne(ctx, - pbm.RSConfig{ + topo.RSConfig{ ID: r.rsConf.ID, CSRS: r.nodeInfo.IsConfigSrv(), Version: 1, - Members: []pbm.RSMember{{ + Members: []topo.RSMember{{ ID: 0, Host: "localhost:" + strconv.Itoa(r.tmpPort), Votes: 1, @@ -1303,7 +1320,7 @@ func (r *PhysRestore) replayOplog( return errors.Wrap(err, "connect to mongo rs") } - mgoV, err := pbm.GetMongoVersion(ctx, c) + mgoV, err := version.GetMongoVersion(ctx, c) if err != nil || len(mgoV.Version) < 1 { return errors.Wrap(err, "define mongo version") } @@ -1313,7 +1330,7 @@ func (r *PhysRestore) replayOplog( end: &to, unsafe: true, } - partial, err := applyOplog(c, opChunks, &oplogOption, r.nodeInfo.IsSharded(), + partial, err := applyOplog(ctx, c, opChunks, &oplogOption, r.nodeInfo.IsSharded(), nil, r.setcommittedTxn, r.getcommittedTxn, &stat.Txn, &mgoV, r.stg, r.log) if err != nil { @@ -1357,7 +1374,7 @@ func (r *PhysRestore) resetRS() error { return errors.Wrap(err, "connect to mongo") } - ctx := context.Background() + ctx := context.TODO() if r.nodeInfo.IsConfigSrv() { _, err = c.Database("config").Collection("mongos").DeleteMany(ctx, bson.D{}) @@ -1371,7 +1388,7 @@ func (r *PhysRestore) resetRS() error { cur, err := c.Database("config").Collection("shards").Find(ctx, bson.D{}) if err != nil { - return errors.WithMessage(err, "find: config.shards") + return errors.Wrap(err, "find: config.shards") } var docs []struct { @@ -1380,11 +1397,11 @@ func (r *PhysRestore) resetRS() error { R map[string]any `bson:",inline"` } if err := cur.All(ctx, &docs); err != nil { - return errors.WithMessage(err, "decode: config.shards") + return errors.Wrap(err, "decode: config.shards") } sMap := r.getShardMapping(r.bcp) - mapS := pbm.MakeRSMapFunc(sMap) + mapS := util.MakeRSMapFunc(sMap) ms := []mongo.WriteModel{&mongo.DeleteManyModel{Filter: bson.D{}}} for _, doc := range docs { doc.I = mapS(doc.I) @@ -1399,8 +1416,8 @@ func (r *PhysRestore) resetRS() error { if len(sMap) != 0 { r.log.Debug("updating router config") - if err := updateRouterTables(ctx, c, sMap); err != nil { - return errors.WithMessage(err, "update router tables") + if err := updateRouterTables(ctx, connect.UnsafeClient(c), sMap); err != nil { + return errors.Wrap(err, "update router tables") } } } else { @@ -1428,7 +1445,7 @@ func (r *PhysRestore) resetRS() error { colls, err := c.Database("config").ListCollectionNames(ctx, bson.D{{"name", bson.M{"$regex": `^cache\.`}}}) if err != nil { - return errors.WithMessage(err, "list cache collections") + return errors.Wrap(err, "list cache collections") } for _, coll := range colls { _, err := c.Database("config").Collection(coll).DeleteMany(ctx, bson.D{}) @@ -1456,7 +1473,7 @@ func (r *PhysRestore) resetRS() error { } _, err = c.Database("local").Collection("system.replset").InsertOne(ctx, - pbm.RSConfig{ + topo.RSConfig{ ID: r.rsConf.ID, CSRS: r.nodeInfo.IsConfigSrv(), Version: 1, @@ -1474,7 +1491,7 @@ func (r *PhysRestore) resetRS() error { // restore and chunks made after the backup. So it would successfully start slicing // and overwrites chunks after the backup. if r.nodeInfo.IsLeader() { - _, err = c.Database(pbm.DB).Collection(pbm.ConfigCollection).UpdateOne(ctx, bson.D{}, + _, err = c.Database(defs.DB).Collection(defs.ConfigCollection).UpdateOne(ctx, bson.D{}, bson.D{{"$set", bson.M{"pitr.enabled": false}}}, ) if err != nil { @@ -1490,7 +1507,7 @@ func (r *PhysRestore) resetRS() error { return nil } -func (r *PhysRestore) getShardMapping(bcp *pbm.BackupMeta) map[string]string { +func (r *PhysRestore) getShardMapping(bcp *types.BackupMeta) map[string]string { source := make(map[string]string) if bcp != nil && bcp.ShardRemap != nil { for i := range bcp.Replsets { @@ -1501,7 +1518,7 @@ func (r *PhysRestore) getShardMapping(bcp *pbm.BackupMeta) map[string]string { } } - mapRevRS := pbm.MakeReverseRSMapFunc(r.rsMap) + mapRevRS := util.MakeReverseRSMapFunc(r.rsMap) rv := make(map[string]string) for targetS, uri := range r.shards { targetRS, _, _ := strings.Cut(uri, "/") @@ -1536,13 +1553,13 @@ func (r *PhysRestore) agreeCommonRestoreTS() (primitive.Timestamp, error) { )) // saving straight for RS as backup for nodes in the RS the same, // hence TS would be the same as well - err = r.stg.Save(r.syncPathRS+"."+string(pbm.StatusExtTS), bts, -1) + err = r.stg.Save(r.syncPathRS+"."+string(defs.StatusExtTS), bts, -1) if err != nil { return ts, errors.Wrap(err, "write RS timestamp") } if r.nodeInfo.IsClusterLeader() { - _, err := r.waitFiles(pbm.StatusExtTS, copyMap(r.syncPathShards), true) + _, err := r.waitFiles(defs.StatusExtTS, copyMap(r.syncPathShards), true) if err != nil { return ts, errors.Wrap(err, "wait for shards timestamp") } @@ -1553,20 +1570,20 @@ func (r *PhysRestore) agreeCommonRestoreTS() (primitive.Timestamp, error) { return ts, errors.Wrapf(err, "get timestamp for RS %s", sh) } - if mints.IsZero() || primitive.CompareTimestamp(ts, mints) == -1 { + if mints.IsZero() || ts.Compare(mints) == -1 { mints = ts } } bts := bytes.NewReader([]byte( fmt.Sprintf("%d:%d,%d", time.Now().Unix(), mints.T, mints.I), )) - err = r.stg.Save(r.syncPathCluster+"."+string(pbm.StatusExtTS), bts, -1) + err = r.stg.Save(r.syncPathCluster+"."+string(defs.StatusExtTS), bts, -1) if err != nil { return ts, errors.Wrap(err, "write") } } - _, err = r.waitFiles(pbm.StatusExtTS, map[string]struct{}{r.syncPathCluster: {}}, false) + _, err = r.waitFiles(defs.StatusExtTS, map[string]struct{}{r.syncPathCluster: {}}, false) if err != nil { return ts, errors.Wrap(err, "wait for cluster timestamp") } @@ -1579,9 +1596,9 @@ func (r *PhysRestore) agreeCommonRestoreTS() (primitive.Timestamp, error) { return ts, nil } -func (r *PhysRestore) setcommittedTxn(txn []pbm.RestoreTxn) error { +func (r *PhysRestore) setcommittedTxn(_ context.Context, txn []types.RestoreTxn) error { if txn == nil { - txn = []pbm.RestoreTxn{} + txn = []types.RestoreTxn{} } var b bytes.Buffer err := json.NewEncoder(&b).Encode(txn) @@ -1593,12 +1610,12 @@ func (r *PhysRestore) setcommittedTxn(txn []pbm.RestoreTxn) error { ) } -func (r *PhysRestore) getcommittedTxn() (map[string]primitive.Timestamp, error) { +func (r *PhysRestore) getcommittedTxn(context.Context) (map[string]primitive.Timestamp, error) { shards := copyMap(r.syncPathShards) txn := make(map[string]primitive.Timestamp) for len(shards) > 0 { for f := range shards { - dr, err := r.stg.FileStat(f + "." + string(pbm.StatusDone)) + dr, err := r.stg.FileStat(f + "." + string(defs.StatusDone)) if err != nil && !errors.Is(err, storage.ErrNotExist) { return nil, errors.Wrapf(err, "check done for <%s>", f) } @@ -1614,13 +1631,13 @@ func (r *PhysRestore) getcommittedTxn() (map[string]primitive.Timestamp, error) if err != nil { return nil, errors.Wrapf(err, "get txns <%s>", f) } - txns := []pbm.RestoreTxn{} + txns := []types.RestoreTxn{} err = json.NewDecoder(txnr).Decode(&txns) if err != nil { return nil, errors.Wrapf(err, "deconde txns <%s>", f) } for _, t := range txns { - if t.State == pbm.TxnCommit { + if t.State == types.TxnCommit { txn[t.ID] = t.Ctime } } @@ -1685,12 +1702,7 @@ func conn(port int, tout time.Duration) (*mongo.Client, error) { SetConnectTimeout(time.Second * 120). SetServerSelectionTimeout(tout) - conn, err := mongo.NewClient(opts) - if err != nil { - return nil, errors.Wrap(err, "create mongo client") - } - - err = conn.Connect(ctx) + conn, err := mongo.Connect(ctx, opts) if err != nil { return nil, errors.Wrap(err, "connect") } @@ -1745,13 +1757,13 @@ func (r *PhysRestore) startMongo(opts ...string) error { const hbFrameSec = 60 * 2 -func (r *PhysRestore) init(name string, opid pbm.OPID, l *log.Event) error { - cfg, err := r.cn.GetConfig() +func (r *PhysRestore) init(ctx context.Context, name string, opid types.OPID, l *log.Event) error { + cfg, err := config.GetConfig(ctx, r.cn.Conn) if err != nil { return errors.Wrap(err, "get pbm config") } - r.stg, err = pbm.Storage(cfg, l) + r.stg, err = util.StorageFromConfig(cfg, l) if err != nil { return errors.Wrap(err, "get storage") } @@ -1773,33 +1785,33 @@ func (r *PhysRestore) init(name string, opid pbm.OPID, l *log.Event) error { r.startTS = time.Now().Unix() - r.syncPathNode = fmt.Sprintf("%s/%s/rs.%s/node.%s", pbm.PhysRestoresDir, r.name, r.rsConf.ID, r.nodeInfo.Me) - r.syncPathNodeStat = fmt.Sprintf("%s/%s/rs.%s/stat.%s", pbm.PhysRestoresDir, r.name, r.rsConf.ID, r.nodeInfo.Me) - r.syncPathRS = fmt.Sprintf("%s/%s/rs.%s/rs", pbm.PhysRestoresDir, r.name, r.rsConf.ID) - r.syncPathCluster = fmt.Sprintf("%s/%s/cluster", pbm.PhysRestoresDir, r.name) + r.syncPathNode = fmt.Sprintf("%s/%s/rs.%s/node.%s", defs.PhysRestoresDir, r.name, r.rsConf.ID, r.nodeInfo.Me) + r.syncPathNodeStat = fmt.Sprintf("%s/%s/rs.%s/stat.%s", defs.PhysRestoresDir, r.name, r.rsConf.ID, r.nodeInfo.Me) + r.syncPathRS = fmt.Sprintf("%s/%s/rs.%s/rs", defs.PhysRestoresDir, r.name, r.rsConf.ID) + r.syncPathCluster = fmt.Sprintf("%s/%s/cluster", defs.PhysRestoresDir, r.name) r.syncPathPeers = make(map[string]struct{}) for _, m := range r.rsConf.Members { if !m.ArbiterOnly { - r.syncPathPeers[fmt.Sprintf("%s/%s/rs.%s/node.%s", pbm.PhysRestoresDir, r.name, r.rsConf.ID, m.Host)] = struct{}{} + r.syncPathPeers[fmt.Sprintf("%s/%s/rs.%s/node.%s", defs.PhysRestoresDir, r.name, r.rsConf.ID, m.Host)] = struct{}{} } } - dsh, err := r.cn.ClusterMembers() + dsh, err := topo.ClusterMembers(ctx, r.cn.Conn.MongoClient()) if err != nil { return errors.Wrap(err, "get shards") } r.syncPathShards = make(map[string]struct{}) for _, s := range dsh { - r.syncPathShards[fmt.Sprintf("%s/%s/rs.%s/rs", pbm.PhysRestoresDir, r.name, s.RS)] = struct{}{} + r.syncPathShards[fmt.Sprintf("%s/%s/rs.%s/rs", defs.PhysRestoresDir, r.name, s.RS)] = struct{}{} } - sh, err := r.cn.GetShards() + sh, err := r.cn.GetShards(ctx) if err != nil { return errors.Wrap(err, "get data shards") } r.syncPathDataShards = make(map[string]struct{}) for _, s := range sh { - r.syncPathDataShards[fmt.Sprintf("%s/%s/rs.%s/rs", pbm.PhysRestoresDir, r.name, s.RS)] = struct{}{} + r.syncPathDataShards[fmt.Sprintf("%s/%s/rs.%s/rs", defs.PhysRestoresDir, r.name, s.RS)] = struct{}{} } err = r.hb() @@ -1895,13 +1907,13 @@ func (r *PhysRestore) checkHB(file string) error { return nil } -func (r *PhysRestore) setTmpConf(xopts *pbm.MongodOpts) error { - opts := &pbm.MongodOpts{} - opts.Storage = *pbm.NewMongodOptsStorage() +func (r *PhysRestore) setTmpConf(xopts *topo.MongodOpts) error { + opts := &topo.MongodOpts{} + opts.Storage = *topo.NewMongodOptsStorage() if xopts != nil { opts.Storage = xopts.Storage } else if r.bcp != nil { - setName := pbm.MakeReverseRSMapFunc(r.rsMap)(r.nodeInfo.SetName) + setName := util.MakeReverseRSMapFunc(r.rsMap)(r.nodeInfo.SetName) for _, v := range r.bcp.Replsets { if v.Name == setName { if v.MongodOpts != nil { @@ -1954,10 +1966,10 @@ const bcpDir = "__dir__" // // The restore should be done in reverse order. Applying files (diffs) // starting from the base and moving forward in time up to the target backup. -func (r *PhysRestore) setBcpFiles() error { +func (r *PhysRestore) setBcpFiles(ctx context.Context) error { bcp := r.bcp - setName := pbm.MakeReverseRSMapFunc(r.rsMap)(r.nodeInfo.SetName) + setName := util.MakeReverseRSMapFunc(r.rsMap)(r.nodeInfo.SetName) rs := getRS(bcp, setName) if rs == nil { return errors.Errorf("no data in the backup for the replica set %s", setName) @@ -1972,7 +1984,7 @@ func (r *PhysRestore) setBcpFiles() error { data := files{ BcpName: bcp.Name, Cmpr: bcp.Compression, - Data: []pbm.File{}, + Data: []types.File{}, } // PBM-1058 var is1058 bool @@ -1998,7 +2010,7 @@ func (r *PhysRestore) setBcpFiles() error { r.log.Debug("get src %s", bcp.SrcBackup) var err error - bcp, err = r.cn.GetBackupMeta(bcp.SrcBackup) + bcp, err = query.GetBackupMeta(ctx, r.cn.Conn, bcp.SrcBackup) if err != nil { return errors.Wrapf(err, "get source backup") } @@ -2013,13 +2025,13 @@ func (r *PhysRestore) setBcpFiles() error { // it runs with `directoryPerDB` option. Namely fails to create a directory // for the collections. So we have to detect and create these directories // during the restore. - var dirs []pbm.File + var dirs []types.File dirsm := make(map[string]struct{}) for f, was := range targetFiles { if !was { dir := path.Dir(f) if _, ok := dirsm[dir]; dir != "." && !ok { - dirs = append(dirs, pbm.File{ + dirs = append(dirs, types.File{ Name: f, Off: -1, Len: -1, @@ -2064,7 +2076,7 @@ func findDBpath(fname string) (bool, string) { return is, prefix } -func getRS(bcp *pbm.BackupMeta, rs string) *pbm.BackupReplset { +func getRS(bcp *types.BackupMeta, rs string) *types.BackupReplset { for _, r := range bcp.Replsets { if r.Name == rs { return &r @@ -2073,10 +2085,10 @@ func getRS(bcp *pbm.BackupMeta, rs string) *pbm.BackupReplset { return nil } -func (r *PhysRestore) prepareBackup(backupName string) error { +func (r *PhysRestore) prepareBackup(ctx context.Context, backupName string) error { var err error - r.bcp, err = r.cn.GetBackupMeta(backupName) - if errors.Is(err, pbm.ErrNotFound) { + r.bcp, err = query.GetBackupMeta(ctx, r.cn.Conn, backupName) + if errors.Is(err, errors.ErrNotFound) { r.bcp, err = GetMetaFromStore(r.stg, backupName) } if err != nil { @@ -2087,21 +2099,21 @@ func (r *PhysRestore) prepareBackup(backupName string) error { return errors.New("snapshot name doesn't set") } - err = r.cn.SetRestoreBackup(r.name, r.bcp.Name, nil) + err = query.SetRestoreBackup(ctx, r.cn.Conn, r.name, r.bcp.Name, nil) if err != nil { return errors.Wrap(err, "set backup name") } - if r.bcp.Status != pbm.StatusDone { + if r.bcp.Status != defs.StatusDone { return errors.Errorf("backup wasn't successful: status: %s, error: %s", r.bcp.Status, r.bcp.Error()) } - if !version.CompatibleWith(r.bcp.PBMVersion, pbm.BreakingChangesMap[r.bcp.Type]) { + if !version.CompatibleWith(r.bcp.PBMVersion, version.BreakingChangesMap[r.bcp.Type]) { return errors.Errorf("backup version (v%s) is not compatible with PBM v%s", r.bcp.PBMVersion, version.Current().Version) } - mgoV, err := r.node.GetMongoVersion() + mgoV, err := version.GetMongoVersion(ctx, r.node.Session()) if err != nil || len(mgoV.Version) < 1 { return errors.Wrap(err, "define mongo version") } @@ -2117,18 +2129,18 @@ func (r *PhysRestore) prepareBackup(backupName string) error { } r.log.Debug("mongod binary: %s, version: %s", r.mongod, mv) - err = r.setBcpFiles() + err = r.setBcpFiles(ctx) if err != nil { return errors.Wrap(err, "get data for restore") } - s, err := r.cn.ClusterMembers() + s, err := topo.ClusterMembers(ctx, r.cn.Conn.MongoClient()) if err != nil { return errors.Wrap(err, "get cluster members") } - mapRevRS := pbm.MakeReverseRSMapFunc(r.rsMap) - fl := make(map[string]pbm.Shard, len(s)) + mapRevRS := util.MakeReverseRSMapFunc(r.rsMap) + fl := make(map[string]topo.Shard, len(s)) for _, rs := range s { fl[mapRevRS(rs.RS)] = rs } @@ -2193,21 +2205,21 @@ func (r *PhysRestore) checkMongod(needVersion string) (version string, err error } // MarkFailed sets the restore and rs state as failed with the given message -func (r *PhysRestore) MarkFailed(meta *pbm.RestoreMeta, e error, markCluster bool) { +func (r *PhysRestore) MarkFailed(meta *types.RestoreMeta, e error, markCluster bool) { var nerr nodeError if errors.As(e, &nerr) { e = nerr - meta.Replsets = []pbm.RestoreReplset{{ + meta.Replsets = []types.RestoreReplset{{ Name: nerr.node, - Status: pbm.StatusError, + Status: defs.StatusError, Error: nerr.msg, }} } else if len(meta.Replsets) > 0 { - meta.Replsets[0].Status = pbm.StatusError + meta.Replsets[0].Status = defs.StatusError meta.Replsets[0].Error = e.Error() } - err := r.stg.Save(r.syncPathNode+"."+string(pbm.StatusError), + err := r.stg.Save(r.syncPathNode+"."+string(defs.StatusError), errStatus(e), -1) if err != nil { r.log.Error("write error state `%v` to storage: %v", e, err) @@ -2217,14 +2229,14 @@ func (r *PhysRestore) MarkFailed(meta *pbm.RestoreMeta, e error, markCluster boo // (in `toState` method). // Here we are not aware of partlyDone etc so leave it to the `toState`. if r.nodeInfo.IsPrimary && markCluster { - serr := r.stg.Save(r.syncPathRS+"."+string(pbm.StatusError), + serr := r.stg.Save(r.syncPathRS+"."+string(defs.StatusError), errStatus(e), -1) if serr != nil { r.log.Error("MarkFailed: write replset error state `%v`: %v", e, serr) } } if r.nodeInfo.IsClusterLeader() && markCluster { - serr := r.stg.Save(r.syncPathCluster+"."+string(pbm.StatusError), + serr := r.stg.Save(r.syncPathCluster+"."+string(defs.StatusError), errStatus(e), -1) if serr != nil { r.log.Error("MarkFailed: write cluster error state `%v`: %v", e, serr) diff --git a/pbm/restore/restore.go b/pbm/restore/restore.go index 50fc5da26..f30e2c5ea 100644 --- a/pbm/restore/restore.go +++ b/pbm/restore/restore.go @@ -6,45 +6,54 @@ import ( "github.com/golang/snappy" "github.com/mongodb/mongo-tools/common/idx" - "github.com/pkg/errors" "go.mongodb.org/mongo-driver/bson/primitive" "go.mongodb.org/mongo-driver/mongo" + "github.com/percona/percona-backup-mongodb/internal/compress" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/lock" + "github.com/percona/percona-backup-mongodb/internal/log" + "github.com/percona/percona-backup-mongodb/internal/query" + "github.com/percona/percona-backup-mongodb/internal/storage" + "github.com/percona/percona-backup-mongodb/internal/topo" + "github.com/percona/percona-backup-mongodb/internal/types" + "github.com/percona/percona-backup-mongodb/internal/util" + "github.com/percona/percona-backup-mongodb/internal/version" "github.com/percona/percona-backup-mongodb/pbm" - "github.com/percona/percona-backup-mongodb/pbm/compress" - "github.com/percona/percona-backup-mongodb/pbm/log" "github.com/percona/percona-backup-mongodb/pbm/oplog" - "github.com/percona/percona-backup-mongodb/pbm/storage" ) -func GetMetaFromStore(stg storage.Storage, bcpName string) (*pbm.BackupMeta, error) { - rd, err := stg.SourceReader(bcpName + pbm.MetadataFileSuffix) +func GetMetaFromStore(stg storage.Storage, bcpName string) (*types.BackupMeta, error) { + rd, err := stg.SourceReader(bcpName + defs.MetadataFileSuffix) if err != nil { return nil, errors.Wrap(err, "get from store") } defer rd.Close() - b := &pbm.BackupMeta{} + b := &types.BackupMeta{} err = json.NewDecoder(rd).Decode(b) return b, errors.Wrap(err, "decode") } func toState( + ctx context.Context, cn *pbm.PBM, - status pbm.Status, + status defs.Status, bcp string, - inf *pbm.NodeInfo, + inf *topo.NodeInfo, reconcileFn reconcileStatus, wait *time.Duration, ) error { - err := cn.ChangeRestoreRSState(bcp, inf.SetName, status, "") + err := query.ChangeRestoreRSState(ctx, cn.Conn, bcp, inf.SetName, status, "") if err != nil { return errors.Wrap(err, "set shard's status") } if inf.IsLeader() { - err = reconcileFn(status, wait) + err = reconcileFn(ctx, status, wait) if err != nil { if errors.Is(err, errConvergeTimeOut) { return errors.Wrap(err, "couldn't get response from all shards") @@ -53,7 +62,7 @@ func toState( } } - err = waitForStatus(cn, bcp, status) + err = waitForStatus(ctx, cn, bcp, status) if err != nil { return errors.Wrapf(err, "waiting for %s", status) } @@ -61,24 +70,30 @@ func toState( return nil } -type reconcileStatus func(status pbm.Status, timeout *time.Duration) error +type reconcileStatus func(ctx context.Context, status defs.Status, timeout *time.Duration) error // convergeCluster waits until all participating shards reached `status` and updates a cluster status -func convergeCluster(cn *pbm.PBM, name, opid string, shards []pbm.Shard, status pbm.Status) error { +func convergeCluster( + ctx context.Context, + cn *pbm.PBM, + name, opid string, + shards []topo.Shard, + status defs.Status, +) error { tk := time.NewTicker(time.Second * 1) defer tk.Stop() for { select { case <-tk.C: - ok, err := converged(cn, name, opid, shards, status) + ok, err := converged(ctx, cn, name, opid, shards, status) if err != nil { return err } if ok { return nil } - case <-cn.Context().Done(): + case <-ctx.Done(): return nil } } @@ -89,11 +104,12 @@ var errConvergeTimeOut = errors.New("reached converge timeout") // convergeClusterWithTimeout waits up to the geiven timeout until all participating shards reached // `status` and then updates the cluster status func convergeClusterWithTimeout( + ctx context.Context, cn *pbm.PBM, name, opid string, - shards []pbm.Shard, - status pbm.Status, + shards []topo.Shard, + status defs.Status, t time.Duration, ) error { tk := time.NewTicker(time.Second * 1) @@ -106,7 +122,7 @@ func convergeClusterWithTimeout( select { case <-tk.C: var ok bool - ok, err := converged(cn, name, opid, shards, status) + ok, err := converged(ctx, cn, name, opid, shards, status) if err != nil { return err } @@ -115,20 +131,26 @@ func convergeClusterWithTimeout( } case <-tout.C: return errConvergeTimeOut - case <-cn.Context().Done(): + case <-ctx.Done(): return nil } } } -func converged(cn *pbm.PBM, name, opid string, shards []pbm.Shard, status pbm.Status) (bool, error) { +func converged( + ctx context.Context, + cn *pbm.PBM, + name, opid string, + shards []topo.Shard, + status defs.Status, +) (bool, error) { shardsToFinish := len(shards) - bmeta, err := cn.GetRestoreMeta(name) + bmeta, err := query.GetRestoreMeta(ctx, cn.Conn, name) if err != nil { return false, errors.Wrap(err, "get backup metadata") } - clusterTime, err := cn.ClusterTime() + clusterTime, err := topo.GetClusterTime(ctx, cn.Conn) if err != nil { return false, errors.Wrap(err, "read cluster time") } @@ -137,20 +159,20 @@ func converged(cn *pbm.PBM, name, opid string, shards []pbm.Shard, status pbm.St for _, shard := range bmeta.Replsets { if shard.Name == sh.RS { // check if node alive - lock, err := cn.GetLockData(&pbm.LockHeader{ - Type: pbm.CmdRestore, + lck, err := lock.GetLockData(ctx, cn.Conn, &lock.LockHeader{ + Type: defs.CmdRestore, OPID: opid, Replset: shard.Name, }) // nodes are cleaning its locks moving to the done status // so no lock is ok and not need to ckech the heartbeats - if status != pbm.StatusDone && !errors.Is(err, mongo.ErrNoDocuments) { + if status != defs.StatusDone && !errors.Is(err, mongo.ErrNoDocuments) { if err != nil { return false, errors.Wrapf(err, "unable to read lock for shard %s", shard.Name) } - if lock.Heartbeat.T+pbm.StaleFrameSec < clusterTime.T { - return false, errors.Errorf("lost shard %s, last beat ts: %d", shard.Name, lock.Heartbeat.T) + if lck.Heartbeat.T+defs.StaleFrameSec < clusterTime.T { + return false, errors.Errorf("lost shard %s, last beat ts: %d", shard.Name, lck.Heartbeat.T) } } @@ -158,8 +180,8 @@ func converged(cn *pbm.PBM, name, opid string, shards []pbm.Shard, status pbm.St switch shard.Status { case status: shardsToFinish-- - case pbm.StatusError: - bmeta.Status = pbm.StatusError + case defs.StatusError: + bmeta.Status = defs.StatusError bmeta.Error = shard.Error return false, errors.Errorf("restore on the shard %s failed with: %s", shard.Name, shard.Error) } @@ -168,7 +190,7 @@ func converged(cn *pbm.PBM, name, opid string, shards []pbm.Shard, status pbm.St } if shardsToFinish == 0 { - err := cn.ChangeRestoreState(name, status, "") + err := query.ChangeRestoreState(ctx, cn.Conn, name, status, "") if err != nil { return false, errors.Wrapf(err, "update backup meta with %s", status) } @@ -178,37 +200,37 @@ func converged(cn *pbm.PBM, name, opid string, shards []pbm.Shard, status pbm.St return false, nil } -func waitForStatus(cn *pbm.PBM, name string, status pbm.Status) error { +func waitForStatus(ctx context.Context, cn *pbm.PBM, name string, status defs.Status) error { tk := time.NewTicker(time.Second * 1) defer tk.Stop() for { select { case <-tk.C: - meta, err := cn.GetRestoreMeta(name) - if errors.Is(err, pbm.ErrNotFound) { + meta, err := query.GetRestoreMeta(ctx, cn.Conn, name) + if errors.Is(err, errors.ErrNotFound) { continue } if err != nil { return errors.Wrap(err, "get restore metadata") } - clusterTime, err := cn.ClusterTime() + clusterTime, err := topo.GetClusterTime(ctx, cn.Conn) if err != nil { return errors.Wrap(err, "read cluster time") } - if meta.Hb.T+pbm.StaleFrameSec < clusterTime.T { + if meta.Hb.T+defs.StaleFrameSec < clusterTime.T { return errors.Errorf("restore stuck, last beat ts: %d", meta.Hb.T) } switch meta.Status { case status: return nil - case pbm.StatusError: + case defs.StatusError: return errors.Errorf("cluster failed: %s", meta.Error) } - case <-cn.Context().Done(): + case <-ctx.Done(): return nil } } @@ -218,15 +240,16 @@ func waitForStatus(cn *pbm.PBM, name string, status pbm.Status) error { // is contiguous - there are no gaps), checks for respective files on storage and returns // chunks list if all checks passed func chunks( + ctx context.Context, cn *pbm.PBM, stg storage.Storage, from, to primitive.Timestamp, rsName string, rsMap map[string]string, -) ([]pbm.OplogChunk, error) { - mapRevRS := pbm.MakeReverseRSMapFunc(rsMap) - chunks, err := cn.PITRGetChunksSlice(mapRevRS(rsName), from, to) +) ([]oplog.OplogChunk, error) { + mapRevRS := util.MakeReverseRSMapFunc(rsMap) + chunks, err := oplog.PITRGetChunksSlice(ctx, cn.Conn, mapRevRS(rsName), from, to) if err != nil { return nil, errors.Wrap(err, "get chunks index") } @@ -235,7 +258,7 @@ func chunks( return nil, errors.New("no chunks found") } - if primitive.CompareTimestamp(chunks[len(chunks)-1].EndTS, to) == -1 { + if chunks[len(chunks)-1].EndTS.Compare(to) == -1 { return nil, errors.Errorf( "no chunk with the target time, the last chunk ends on %v", chunks[len(chunks)-1].EndTS) @@ -243,7 +266,7 @@ func chunks( last := from for _, c := range chunks { - if primitive.CompareTimestamp(last, c.StartTS) == -1 { + if last.Compare(c.StartTS) == -1 { return nil, errors.Errorf( "integrity vilolated, expect chunk with start_ts %v, but got %v", last, c.StartTS) @@ -270,8 +293,8 @@ type applyOplogOption struct { } type ( - setcommittedTxnFn func(txn []pbm.RestoreTxn) error - getcommittedTxnFn func() (map[string]primitive.Timestamp, error) + setcommittedTxnFn func(ctx context.Context, txn []types.RestoreTxn) error + getcommittedTxnFn func(ctx context.Context) (map[string]primitive.Timestamp, error) ) // By looking at just transactions in the oplog we can't tell which shards @@ -297,14 +320,24 @@ type ( // should report it in logs and describe-restore. // //nolint:nonamedreturns -func applyOplog(node *mongo.Client, chunks []pbm.OplogChunk, options *applyOplogOption, sharded bool, - ic *idx.IndexCatalog, setTxn setcommittedTxnFn, getTxn getcommittedTxnFn, stat *pbm.DistTxnStat, - mgoV *pbm.MongoVersion, stg storage.Storage, log *log.Event, +func applyOplog( + ctx context.Context, + node *mongo.Client, + chunks []oplog.OplogChunk, + options *applyOplogOption, + sharded bool, + ic *idx.IndexCatalog, + setTxn setcommittedTxnFn, + getTxn getcommittedTxnFn, + stat *types.DistTxnStat, + mgoV *version.MongoVersion, + stg storage.Storage, + log *log.Event, ) (partial []oplog.Txn, err error) { log.Info("starting oplog replay") var ( - ctxn chan pbm.RestoreTxn + ctxn chan types.RestoreTxn txnSyncErr chan error ) @@ -341,7 +374,7 @@ func applyOplog(node *mongo.Client, chunks []pbm.OplogChunk, options *applyOplog // and restore will fail with snappy: corrupt input. So we try S2 in such a case. lts, err = replayChunk(chnk.FName, oplogRestore, stg, chnk.Compression) if err != nil && errors.Is(err, snappy.ErrCorrupt) { - lts, err = replayChunk(chnk.FName, oplogRestore, stg, compress.CompressionTypeS2) + lts, err = replayChunk(chnk.FName, oplogRestore, stg, defs.CompressionTypeS2) } if err != nil { return nil, errors.Wrapf(err, "replay chunk %v.%v", chnk.StartTS.T, chnk.EndTS.T) @@ -353,13 +386,13 @@ func applyOplog(node *mongo.Client, chunks []pbm.OplogChunk, options *applyOplog uc, c := oplogRestore.TxnLeftovers() stat.ShardUncommitted = len(uc) go func() { - err := setTxn(c) + err := setTxn(ctx, c) if err != nil { log.Error("write last committed txns %v", err) } }() if len(uc) > 0 { - commits, err := getTxn() + commits, err := getTxn(ctx) if err != nil { return nil, errors.Wrap(err, "get committed txns on other shards") } @@ -384,7 +417,7 @@ func replayChunk( file string, oplog *oplog.OplogRestore, stg storage.Storage, - c compress.CompressionType, + c defs.CompressionType, ) (primitive.Timestamp, error) { or, err := stg.SourceReader(file) if err != nil { diff --git a/pbm/restore/selective.go b/pbm/restore/selective.go index bf77aa09c..5d5fbdf7d 100644 --- a/pbm/restore/selective.go +++ b/pbm/restore/selective.go @@ -5,15 +5,18 @@ import ( "path" "strings" - "github.com/pkg/errors" "go.mongodb.org/mongo-driver/bson" "go.mongodb.org/mongo-driver/mongo" - "github.com/percona/percona-backup-mongodb/pbm" - "github.com/percona/percona-backup-mongodb/pbm/archive" - "github.com/percona/percona-backup-mongodb/pbm/compress" - "github.com/percona/percona-backup-mongodb/pbm/sel" - "github.com/percona/percona-backup-mongodb/pbm/storage" + "github.com/percona/percona-backup-mongodb/internal/archive" + "github.com/percona/percona-backup-mongodb/internal/compress" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/resync" + "github.com/percona/percona-backup-mongodb/internal/storage" + "github.com/percona/percona-backup-mongodb/internal/types" + "github.com/percona/percona-backup-mongodb/internal/util" + "github.com/percona/percona-backup-mongodb/internal/version" ) const ( @@ -25,39 +28,44 @@ const ( const maxBulkWriteCount = 500 // configsvrRestore restores for selected namespaces -func (r *Restore) configsvrRestore(bcp *pbm.BackupMeta, nss []string, mapRS pbm.RSMapFunc) error { - mapS := pbm.MakeRSMapFunc(r.sMap) +func (r *Restore) configsvrRestore( + ctx context.Context, + bcp *types.BackupMeta, + nss []string, + mapRS util.RSMapFunc, +) error { + mapS := util.MakeRSMapFunc(r.sMap) available, err := fetchAvailability(bcp, r.stg) if err != nil { return err } if available[databasesNS] { - if err := r.configsvrRestoreDatabases(bcp, nss, mapRS, mapS); err != nil { - return errors.WithMessage(err, "restore config.databases") + if err := r.configsvrRestoreDatabases(ctx, bcp, nss, mapRS, mapS); err != nil { + return errors.Wrap(err, "restore config.databases") } } - var chunkSelector sel.ChunkSelector + var chunkSelector util.ChunkSelector if available[collectionsNS] { var err error - chunkSelector, err = r.configsvrRestoreCollections(bcp, nss, mapRS) + chunkSelector, err = r.configsvrRestoreCollections(ctx, bcp, nss, mapRS) if err != nil { - return errors.WithMessage(err, "restore config.collections") + return errors.Wrap(err, "restore config.collections") } } if available[chunksNS] { - if err := r.configsvrRestoreChunks(bcp, chunkSelector, mapRS, mapS); err != nil { - return errors.WithMessage(err, "restore config.chunks") + if err := r.configsvrRestoreChunks(ctx, bcp, chunkSelector, mapRS, mapS); err != nil { + return errors.Wrap(err, "restore config.chunks") } } return nil } -func fetchAvailability(bcp *pbm.BackupMeta, stg storage.Storage) (map[string]bool, error) { - var cfgRS *pbm.BackupReplset +func fetchAvailability(bcp *types.BackupMeta, stg storage.Storage) (map[string]bool, error) { + var cfgRS *types.BackupReplset for i := range bcp.Replsets { rs := &bcp.Replsets[i] if rs.IsConfigSvr != nil && *rs.IsConfigSvr { @@ -69,9 +77,9 @@ func fetchAvailability(bcp *pbm.BackupMeta, stg storage.Storage) (map[string]boo return nil, errors.New("no configsvr replset metadata found") } - nss, err := pbm.ReadArchiveNamespaces(stg, cfgRS.DumpName) + nss, err := resync.ReadArchiveNamespaces(stg, cfgRS.DumpName) if err != nil { - return nil, errors.WithMessagef(err, "read archive namespaces %q", cfgRS.DumpName) + return nil, errors.Wrapf(err, "read archive namespaces %q", cfgRS.DumpName) } rv := make(map[string]bool) @@ -85,13 +93,13 @@ func fetchAvailability(bcp *pbm.BackupMeta, stg storage.Storage) (map[string]boo return rv, nil } -func (r *Restore) getShardMapping(bcp *pbm.BackupMeta) map[string]string { +func (r *Restore) getShardMapping(bcp *types.BackupMeta) map[string]string { source := bcp.ShardRemap if source == nil { source = make(map[string]string) } - mapRevRS := pbm.MakeReverseRSMapFunc(r.rsMap) + mapRevRS := util.MakeReverseRSMapFunc(r.rsMap) rv := make(map[string]string) for _, s := range r.shards { sourceRS := mapRevRS(s.RS) @@ -109,7 +117,12 @@ func (r *Restore) getShardMapping(bcp *pbm.BackupMeta) map[string]string { // configsvrRestoreDatabases upserts config.databases documents // for selected databases -func (r *Restore) configsvrRestoreDatabases(bcp *pbm.BackupMeta, nss []string, mapRS, mapS pbm.RSMapFunc) error { +func (r *Restore) configsvrRestoreDatabases( + ctx context.Context, + bcp *types.BackupMeta, + nss []string, + mapRS, mapS util.RSMapFunc, +) error { filepath := path.Join(bcp.Name, mapRS(r.node.RS()), "config.databases"+bcp.Compression.Suffix()) rdr, err := r.stg.SourceReader(filepath) if err != nil { @@ -147,7 +160,7 @@ func (r *Restore) configsvrRestoreDatabases(bcp *pbm.BackupMeta, nss []string, m doc := bson.D{} if err = bson.Unmarshal(buf, &doc); err != nil { - return errors.WithMessage(err, "unmarshal") + return errors.Wrap(err, "unmarshal") } for i, a := range doc { @@ -168,28 +181,29 @@ func (r *Restore) configsvrRestoreDatabases(bcp *pbm.BackupMeta, nss []string, m return nil } - coll := r.cn.Conn.Database("config").Collection("databases") - _, err = coll.BulkWrite(r.cn.Context(), models) - return errors.WithMessage(err, "update config.databases") + coll := r.cn.Conn.ConfigDatabase().Collection("databases") + _, err = coll.BulkWrite(ctx, models) + return errors.Wrap(err, "update config.databases") } // configsvrRestoreCollections upserts config.collections documents // for selected namespaces func (r *Restore) configsvrRestoreCollections( - bcp *pbm.BackupMeta, + ctx context.Context, + bcp *types.BackupMeta, nss []string, - mapRS pbm.RSMapFunc, -) (sel.ChunkSelector, error) { - ver, err := pbm.GetMongoVersion(r.cn.Context(), r.node.Session()) + mapRS util.RSMapFunc, +) (util.ChunkSelector, error) { + ver, err := version.GetMongoVersion(ctx, r.node.Session()) if err != nil { - return nil, errors.WithMessage(err, "get mongo version") + return nil, errors.Wrap(err, "get mongo version") } - var chunkSelector sel.ChunkSelector + var chunkSelector util.ChunkSelector if ver.Major() >= 5 { - chunkSelector = sel.NewUUIDChunkSelector() + chunkSelector = util.NewUUIDChunkSelector() } else { - chunkSelector = sel.NewNSChunkSelector() + chunkSelector = util.NewNSChunkSelector() } filepath := path.Join(bcp.Name, mapRS(r.node.RS()), "config.collections"+bcp.Compression.Suffix()) @@ -202,7 +216,7 @@ func (r *Restore) configsvrRestoreCollections( return nil, err } - selected := sel.MakeSelectedPred(nss) + selected := util.MakeSelectedPred(nss) models := []mongo.WriteModel{} buf := make([]byte, archive.MaxBSONSize) @@ -226,7 +240,7 @@ func (r *Restore) configsvrRestoreCollections( doc := bson.D{} err = bson.Unmarshal(buf, &doc) if err != nil { - return nil, errors.WithMessage(err, "unmarshal") + return nil, errors.Wrap(err, "unmarshal") } model := mongo.NewReplaceOneModel() @@ -240,9 +254,9 @@ func (r *Restore) configsvrRestoreCollections( return chunkSelector, nil } - coll := r.cn.Conn.Database("config").Collection("collections") - if _, err = coll.BulkWrite(r.cn.Context(), models); err != nil { - return nil, errors.WithMessage(err, "update config.collections") + coll := r.cn.Conn.ConfigDatabase().Collection("collections") + if _, err = coll.BulkWrite(ctx, models); err != nil { + return nil, errors.Wrap(err, "update config.collections") } return chunkSelector, nil @@ -250,10 +264,11 @@ func (r *Restore) configsvrRestoreCollections( // configsvrRestoreChunks upserts config.chunks documents for selected namespaces func (r *Restore) configsvrRestoreChunks( - bcp *pbm.BackupMeta, - selector sel.ChunkSelector, + ctx context.Context, + bcp *types.BackupMeta, + selector util.ChunkSelector, mapRS, - mapS pbm.RSMapFunc, + mapS util.RSMapFunc, ) error { filepath := path.Join(bcp.Name, mapRS(r.node.RS()), "config.chunks"+bcp.Compression.Suffix()) rdr, err := r.stg.SourceReader(filepath) @@ -265,8 +280,8 @@ func (r *Restore) configsvrRestoreChunks( return err } - coll := r.cn.Conn.Database("config").Collection("chunks") - _, err = coll.DeleteMany(r.cn.Context(), selector.BuildFilter()) + coll := r.cn.Conn.ConfigDatabase().Collection("chunks") + _, err = coll.DeleteMany(ctx, selector.BuildFilter()) if err != nil { return err } @@ -293,7 +308,7 @@ func (r *Restore) configsvrRestoreChunks( doc := bson.D{} if err := bson.Unmarshal(buf, &doc); err != nil { - return errors.WithMessage(err, "unmarshal") + return errors.Wrap(err, "unmarshal") } for i, a := range doc { @@ -322,9 +337,9 @@ func (r *Restore) configsvrRestoreChunks( return nil } - _, err = coll.BulkWrite(r.cn.Context(), models) + _, err = coll.BulkWrite(ctx, models) if err != nil { - return errors.WithMessage(err, "update config.chunks") + return errors.Wrap(err, "update config.chunks") } models = models[:0] diff --git a/pbm/snapshot/backup.go b/pbm/snapshot/backup.go index 0fdc1e6e0..43353b860 100644 --- a/pbm/snapshot/backup.go +++ b/pbm/snapshot/backup.go @@ -1,7 +1,6 @@ package snapshot import ( - "context" "io" "log" "time" @@ -11,9 +10,10 @@ import ( "github.com/mongodb/mongo-tools/common/options" "github.com/mongodb/mongo-tools/common/progress" "github.com/mongodb/mongo-tools/mongodump" - "github.com/pkg/errors" - "github.com/percona/percona-backup-mongodb/version" + "github.com/percona/percona-backup-mongodb/internal/context" + "github.com/percona/percona-backup-mongodb/internal/errors" + "github.com/percona/percona-backup-mongodb/internal/version" ) type backuper struct { diff --git a/pbm/snapshot/dump.go b/pbm/snapshot/dump.go index 85a672860..0e648fd05 100644 --- a/pbm/snapshot/dump.go +++ b/pbm/snapshot/dump.go @@ -5,14 +5,15 @@ import ( "sync" "sync/atomic" - "github.com/pkg/errors" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" - "github.com/percona/percona-backup-mongodb/pbm/archive" - "github.com/percona/percona-backup-mongodb/pbm/compress" + "github.com/percona/percona-backup-mongodb/internal/archive" + "github.com/percona/percona-backup-mongodb/internal/compress" ) type UploadDumpOptions struct { - Compression compress.CompressionType + Compression defs.CompressionType CompressionLevel *int // NSFilter checks whether a namespace is selected for backup. @@ -34,7 +35,7 @@ func UploadDump(wt io.WriterTo, upload UploadFunc, opts UploadDumpOptions) (int6 go func() { _, err := wt.WriteTo(pw) - pw.CloseWithError(errors.WithMessage(err, "write to")) + pw.CloseWithError(errors.Wrap(err, "write to")) }() newWriter := func(ns string) (io.WriteCloser, error) { @@ -52,7 +53,7 @@ func UploadDump(wt io.WriterTo, upload UploadFunc, opts UploadDumpOptions) (int6 rc := &readCounter{r: pr} err := upload(ns, ext, rc) if err != nil { - pr.CloseWithError(errors.WithMessagef(err, "upload: %q", ns)) + pr.CloseWithError(errors.Wrapf(err, "upload: %q", ns)) } atomic.AddInt64(&size, rc.n) @@ -64,19 +65,19 @@ func UploadDump(wt io.WriterTo, upload UploadFunc, opts UploadDumpOptions) (int6 w, err := compress.Compress(pw, opts.Compression, opts.CompressionLevel) dwc := io.WriteCloser(&delegatedWriteCloser{w, pw}) - return dwc, errors.WithMessagef(err, "create compressor: %q", ns) + return dwc, errors.Wrapf(err, "create compressor: %q", ns) } err := archive.Decompose(pr, newWriter, opts.NSFilter, opts.DocFilter) wg.Wait() - return size, errors.WithMessage(err, "decompose") + return size, errors.Wrap(err, "decompose") } type DownloadFunc func(filename string) (io.ReadCloser, error) func DownloadDump( download DownloadFunc, - compression compress.CompressionType, + compression defs.CompressionType, match archive.NSFilterFn, ) (io.ReadCloser, error) { pr, pw := io.Pipe() @@ -89,7 +90,7 @@ func DownloadDump( r, err := download(ns) if err != nil { - return nil, errors.WithMessagef(err, "download: %q", ns) + return nil, errors.Wrapf(err, "download: %q", ns) } if ns == archive.MetaFile { @@ -97,11 +98,11 @@ func DownloadDump( } r, err = compress.Decompress(r, compression) - return r, errors.WithMessagef(err, "create decompressor: %q", ns) + return r, errors.Wrapf(err, "create decompressor: %q", ns) } err := archive.Compose(pw, match, newReader) - pw.CloseWithError(errors.WithMessage(err, "compose")) + pw.CloseWithError(errors.Wrap(err, "compose")) }() return pr, nil diff --git a/pbm/snapshot/restore.go b/pbm/snapshot/restore.go index b5ba9f749..64116f33f 100644 --- a/pbm/snapshot/restore.go +++ b/pbm/snapshot/restore.go @@ -5,10 +5,11 @@ import ( "github.com/mongodb/mongo-tools/common/options" "github.com/mongodb/mongo-tools/mongorestore" - "github.com/pkg/errors" "go.mongodb.org/mongo-driver/mongo/writeconcern" - "github.com/percona/percona-backup-mongodb/pbm" + "github.com/percona/percona-backup-mongodb/internal/config" + "github.com/percona/percona-backup-mongodb/internal/defs" + "github.com/percona/percona-backup-mongodb/internal/errors" ) const ( @@ -19,16 +20,16 @@ const ( ) var ExcludeFromRestore = []string{ - pbm.DB + "." + pbm.CmdStreamCollection, - pbm.DB + "." + pbm.LogCollection, - pbm.DB + "." + pbm.ConfigCollection, - pbm.DB + "." + pbm.BcpCollection, - pbm.DB + "." + pbm.RestoresCollection, - pbm.DB + "." + pbm.LockCollection, - pbm.DB + "." + pbm.LockOpCollection, - pbm.DB + "." + pbm.PITRChunksCollection, - pbm.DB + "." + pbm.AgentsStatusCollection, - pbm.DB + "." + pbm.PBMOpLogCollection, + defs.DB + "." + defs.CmdStreamCollection, + defs.DB + "." + defs.LogCollection, + defs.DB + "." + defs.ConfigCollection, + defs.DB + "." + defs.BcpCollection, + defs.DB + "." + defs.RestoresCollection, + defs.DB + "." + defs.LockCollection, + defs.DB + "." + defs.LockOpCollection, + defs.DB + "." + defs.PITRChunksCollection, + defs.DB + "." + defs.AgentsStatusCollection, + defs.DB + "." + defs.PBMOpLogCollection, "config.version", "config.mongos", "config.lockpings", @@ -44,13 +45,13 @@ var ExcludeFromRestore = []string{ "config.system.indexBuilds", // deprecated PBM collections, keep it here not to bring back from old backups - pbm.DB + ".pbmBackups.old", - pbm.DB + ".pbmPITRChunks.old", + defs.DB + ".pbmBackups.old", + defs.DB + ".pbmPITRChunks.old", } type restorer struct{ *mongorestore.MongoRestore } -func NewRestore(uri string, cfg *pbm.Config) (io.ReaderFrom, error) { +func NewRestore(uri string, cfg *config.Config) (io.ReaderFrom, error) { topts := options.New("mongorestore", "0.0.1", "none", @@ -74,7 +75,7 @@ func NewRestore(uri string, cfg *pbm.Config) (io.ReaderFrom, error) { } topts.Direct = true - topts.WriteConcern = writeconcern.New(writeconcern.WMajority()) + topts.WriteConcern = writeconcern.Majority() batchSize := batchSizeDefault if cfg.Restore.BatchSize > 0 { diff --git a/pbm/storage/azure/azure.go b/pbm/storage/azure/azure.go index cde0983bc..6b6afbbf9 100644 --- a/pbm/storage/azure/azure.go +++ b/pbm/storage/azure/azure.go @@ -1,271 +1,4 @@ +// TODO: temporary. remove later package azure -import ( - "context" - "fmt" - "io" - "net/http" - "path" - "runtime" - "strings" - "time" - - "github.com/Azure/azure-sdk-for-go/sdk/azcore" - "github.com/Azure/azure-sdk-for-go/sdk/azcore/policy" - "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob" - "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/blob" - "github.com/pkg/errors" - - "github.com/percona/percona-backup-mongodb/pbm/log" - "github.com/percona/percona-backup-mongodb/pbm/storage" -) - -const ( - BlobURL = "https://%s.blob.core.windows.net" - - defaultUploadBuff = 10 << 20 // 10Mb - defaultUploadMaxBuff = 5 - - defaultRetries = 10 - - maxBlocks = 50_000 -) - -type Conf struct { - Account string `bson:"account" json:"account,omitempty" yaml:"account,omitempty"` - Container string `bson:"container" json:"container,omitempty" yaml:"container,omitempty"` - Prefix string `bson:"prefix" json:"prefix,omitempty" yaml:"prefix,omitempty"` - Credentials Credentials `bson:"credentials" json:"-" yaml:"credentials"` -} - -type Credentials struct { - Key string `bson:"key" json:"key,omitempty" yaml:"key,omitempty"` -} - -type Blob struct { - opts Conf - log *log.Event - // url *url.URL - c *azblob.Client -} - -func New(opts Conf, l *log.Event) (*Blob, error) { - b := &Blob{ - opts: opts, - log: l, - } - - var err error - b.c, err = b.client() - if err != nil { - return nil, errors.Wrap(err, "init container") - } - - return b, b.ensureContainer() -} - -func (*Blob) Type() storage.Type { - return storage.Azure -} - -func (b *Blob) Save(name string, data io.Reader, sizeb int64) error { - bufsz := defaultUploadBuff - if sizeb > 0 { - ps := int(sizeb / maxBlocks * 11 / 10) // add 10% just in case - if ps > bufsz { - bufsz = ps - } - } - - cc := runtime.NumCPU() / 2 - if cc == 0 { - cc = 1 - } - - if b.log != nil { - b.log.Debug("BufferSize is set to %d (~%dMb) | %d", bufsz, bufsz>>20, sizeb) - } - - _, err := b.c.UploadStream(context.TODO(), - b.opts.Container, - path.Join(b.opts.Prefix, name), - data, - &azblob.UploadStreamOptions{ - BlockSize: int64(bufsz), - Concurrency: cc, - }) - - return err -} - -func (b *Blob) List(prefix, suffix string) ([]storage.FileInfo, error) { - prfx := path.Join(b.opts.Prefix, prefix) - - if prfx != "" && !strings.HasSuffix(prfx, "/") { - prfx += "/" - } - - pager := b.c.NewListBlobsFlatPager(b.opts.Container, &azblob.ListBlobsFlatOptions{ - Prefix: &prfx, - }) - - var files []storage.FileInfo - for pager.More() { - l, err := pager.NextPage(context.TODO()) - if err != nil { - return nil, errors.Wrap(err, "list segment") - } - - for _, b := range l.Segment.BlobItems { - if b.Name == nil { - return files, errors.Errorf("blob returned nil Name for item %v", b) - } - var sz int64 - if b.Properties.ContentLength != nil { - sz = *b.Properties.ContentLength - } - f := *b.Name - f = strings.TrimPrefix(f, prfx) - if len(f) == 0 { - continue - } - if f[0] == '/' { - f = f[1:] - } - - if strings.HasSuffix(f, suffix) { - files = append(files, storage.FileInfo{ - Name: f, - Size: sz, - }) - } - } - } - - return files, nil -} - -func (b *Blob) FileStat(name string) (storage.FileInfo, error) { - inf := storage.FileInfo{} - - p, err := b.c.ServiceClient(). - NewContainerClient(b.opts.Container). - NewBlockBlobClient(path.Join(b.opts.Prefix, name)). - GetProperties(context.TODO(), nil) - if err != nil { - if isNotFound(err) { - return inf, storage.ErrNotExist - } - return inf, errors.Wrap(err, "get properties") - } - - inf.Name = name - if p.ContentLength != nil { - inf.Size = *p.ContentLength - } - - if inf.Size == 0 { - return inf, storage.ErrEmpty - } - - return inf, nil -} - -func (b *Blob) Copy(src, dst string) error { - to := b.c.ServiceClient().NewContainerClient(b.opts.Container).NewBlockBlobClient(path.Join(b.opts.Prefix, dst)) - from := b.c.ServiceClient().NewContainerClient(b.opts.Container).NewBlockBlobClient(path.Join(b.opts.Prefix, src)) - r, err := to.StartCopyFromURL(context.TODO(), from.BlobClient().URL(), nil) - if err != nil { - return errors.Wrap(err, "start copy") - } - - if r.CopyStatus == nil { - return errors.New("undefined copy status") - } - status := *r.CopyStatus - for status == blob.CopyStatusTypePending { - time.Sleep(time.Second * 2) - p, err := to.GetProperties(context.TODO(), nil) - if err != nil { - return errors.Wrap(err, "get copy status") - } - if r.CopyStatus == nil { - return errors.New("undefined copy status") - } - status = *p.CopyStatus - } - - switch status { - case blob.CopyStatusTypeSuccess: - return nil - - case blob.CopyStatusTypeAborted: - return errors.New("copy aborted") - case blob.CopyStatusTypeFailed: - return errors.New("copy failed") - default: - return errors.Errorf("undefined status") - } -} - -func (b *Blob) SourceReader(name string) (io.ReadCloser, error) { - o, err := b.c.DownloadStream(context.TODO(), b.opts.Container, path.Join(b.opts.Prefix, name), nil) - if err != nil { - if isNotFound(err) { - return nil, storage.ErrNotExist - } - return nil, errors.Wrap(err, "download object") - } - - return o.Body, nil -} - -func (b *Blob) Delete(name string) error { - _, err := b.c.DeleteBlob(context.TODO(), b.opts.Container, path.Join(b.opts.Prefix, name), nil) - if err != nil { - if isNotFound(err) { - return storage.ErrNotExist - } - return errors.Wrap(err, "delete object") - } - - return nil -} - -func (b *Blob) ensureContainer() error { - _, err := b.c.ServiceClient().NewContainerClient(b.opts.Container).GetProperties(context.TODO(), nil) - // container already exists - if err == nil { - return nil - } - - var stgErr *azcore.ResponseError - if errors.As(err, &stgErr) && stgErr.StatusCode != http.StatusNotFound { - return errors.Wrap(err, "check container") - } - - _, err = b.c.CreateContainer(context.TODO(), b.opts.Container, nil) - return err -} - -func (b *Blob) client() (*azblob.Client, error) { - cred, err := azblob.NewSharedKeyCredential(b.opts.Account, b.opts.Credentials.Key) - if err != nil { - return nil, errors.Wrap(err, "create credentials") - } - - opts := &azblob.ClientOptions{} - opts.Retry = policy.RetryOptions{ - MaxRetries: defaultRetries, - } - return azblob.NewClientWithSharedKeyCredential(fmt.Sprintf(BlobURL, b.opts.Account), cred, opts) -} - -func isNotFound(err error) bool { - var stgErr *azcore.ResponseError - if errors.As(err, &stgErr) { - return stgErr.StatusCode == http.StatusNotFound - } - - return false -} +const BlobURL = "https://%s.blob.core.windows.net" diff --git a/pbm/storage/storage.go b/pbm/storage/storage.go deleted file mode 100644 index 069479b06..000000000 --- a/pbm/storage/storage.go +++ /dev/null @@ -1,60 +0,0 @@ -package storage - -import ( - "errors" - "io" -) - -var ( - // ErrNotExist is an error for file doesn't exists on storage - ErrNotExist = errors.New("no such file") - ErrEmpty = errors.New("file is empty") -) - -// Type represents a type of the destination storage for backups -type Type string - -const ( - Undef Type = "" - S3 Type = "s3" - Azure Type = "azure" - Filesystem Type = "filesystem" - BlackHole Type = "blackhole" -) - -type FileInfo struct { - Name string // with path - Size int64 -} - -type Storage interface { - Type() Type - Save(name string, data io.Reader, size int64) error - SourceReader(name string) (io.ReadCloser, error) - // FileStat returns file info. It returns error if file is empty or not exists. - FileStat(name string) (FileInfo, error) - // List scans path with prefix and returns all files with given suffix. - // Both prefix and suffix can be omitted. - List(prefix, suffix string) ([]FileInfo, error) - // Delete deletes given file. - // It returns storage.ErrNotExist if a file doesn't exists. - Delete(name string) error - // Copy makes a copy of the src objec/file under dst name - Copy(src, dst string) error -} - -// ParseType parses string and returns storage type -func ParseType(s string) Type { - switch s { - case string(S3): - return S3 - case string(Azure): - return Azure - case string(Filesystem): - return Filesystem - case string(BlackHole): - return BlackHole - default: - return Undef - } -} diff --git a/pbm/topo.go b/pbm/topo.go deleted file mode 100644 index e252c6cc2..000000000 --- a/pbm/topo.go +++ /dev/null @@ -1,124 +0,0 @@ -package pbm - -import ( - "fmt" - "strings" - - "github.com/pkg/errors" - "go.mongodb.org/mongo-driver/bson/primitive" -) - -func CheckTopoForBackup(cn *PBM, type_ BackupType) error { - members, err := cn.ClusterMembers() - if err != nil { - return errors.WithMessage(err, "get cluster members") - } - - ts, err := cn.ClusterTime() - if err != nil { - return errors.Wrap(err, "get cluster time") - } - - agentList, err := cn.ListAgents() - if err != nil { - return errors.WithMessage(err, "list agents") - } - - agents := make(map[string]map[string]AgentStat) - for _, a := range agentList { - if agents[a.RS] == nil { - agents[a.RS] = make(map[string]AgentStat) - } - agents[a.RS][a.Node] = a - } - - return collectTopoCheckErrors(members, agents, ts, type_) -} - -type ( - ReplsetName = string - NodeURI = string -) - -type topoCheckError struct { - Replsets map[ReplsetName]map[NodeURI][]error - Missed []string -} - -func (r topoCheckError) hasError() bool { - return len(r.Missed) != 0 -} - -func (r topoCheckError) Error() string { - if !r.hasError() { - return "" - } - - return fmt.Sprintf("no available agent(s) on replsets: %s", strings.Join(r.Missed, ", ")) -} - -func collectTopoCheckErrors( - replsets []Shard, - agentsByRS map[ReplsetName]map[NodeURI]AgentStat, - ts primitive.Timestamp, - type_ BackupType, -) error { - rv := topoCheckError{ - Replsets: make(map[string]map[NodeURI][]error), - Missed: make([]string, 0), - } - - for _, rs := range replsets { - rsName, uri, _ := strings.Cut(rs.Host, "/") - agents := agentsByRS[rsName] - if len(agents) == 0 { - rv.Missed = append(rv.Missed, rsName) - continue - } - - hosts := strings.Split(uri, ",") - members := make(map[NodeURI][]error, len(hosts)) - anyAvail := false - for _, host := range hosts { - a, ok := agents[host] - if !ok || a.Arbiter || a.Passive { - continue - } - - errs := []error{} - if a.Err != "" { - errs = append(errs, errors.New(a.Err)) - } - if ok, estrs := a.OK(); !ok { - for _, e := range estrs { - errs = append(errs, errors.New(e)) - } - } - - const maxReplicationLag uint32 = 35 - if ts.T-a.Heartbeat.T > maxReplicationLag { - errs = append(errs, errors.New("stale")) - } - if err := FeatureSupport(a.MongoVersion()).BackupType(type_); err != nil { - errs = append(errs, errors.WithMessage(err, "unsupported backup type")) - } - - members[host] = errs - if len(errs) == 0 { - anyAvail = true - } - } - - rv.Replsets[rsName] = members - - if !anyAvail { - rv.Missed = append(rv.Missed, rsName) - } - } - - if rv.hasError() { - return rv - } - - return nil -} diff --git a/pbm/version.go b/pbm/version.go deleted file mode 100644 index d01467b2a..000000000 --- a/pbm/version.go +++ /dev/null @@ -1,126 +0,0 @@ -package pbm - -import ( - "context" - - "github.com/pkg/errors" - "go.mongodb.org/mongo-driver/bson" - "go.mongodb.org/mongo-driver/mongo" -) - -// BreakingChangesMap map of versions introduced breaking changes to respective -// backup types. -// !!! Versions should be sorted in the ascending order. -var BreakingChangesMap = map[BackupType][]string{ - LogicalBackup: {"1.5.0"}, - IncrementalBackup: {"2.1.0"}, - PhysicalBackup: {}, -} - -type MongoVersion struct { - PSMDBVersion string `bson:"psmdbVersion,omitempty"` - VersionString string `bson:"version"` - Version []int `bson:"versionArray"` -} - -func (v MongoVersion) Major() int { - if len(v.Version) == 0 { - return 0 - } - - return v.Version[0] -} - -func GetMongoVersion(ctx context.Context, m *mongo.Client) (MongoVersion, error) { - res := m.Database("admin").RunCommand(ctx, bson.D{{"buildInfo", 1}}) - if err := res.Err(); err != nil { - return MongoVersion{}, err - } - - var ver MongoVersion - if err := res.Decode(&ver); err != nil { - return MongoVersion{}, err - } - - return ver, nil -} - -type FeatureSupport MongoVersion - -func (f FeatureSupport) PBMSupport() error { - v := MongoVersion(f) - - if v.Version[0] == 4 && v.Version[1] == 4 { - return nil - } - - if (v.Version[0] == 5 || v.Version[0] == 6) && v.Version[1] == 0 { - return nil - } - - return errors.New("Unsupported MongoDB version. PBM works with v4.4, v5.0, v6.0") -} - -func (f FeatureSupport) FullPhysicalBackup() bool { - // PSMDB 4.2.15, 4.4.6 - v := MongoVersion(f) - if v.PSMDBVersion == "" { - return false - } - - switch { - case v.Version[0] == 4 && v.Version[1] == 2 && v.Version[2] >= 15: - fallthrough - case v.Version[0] == 4 && v.Version[1] == 4 && v.Version[2] >= 6: - fallthrough - case v.Version[0] >= 5: - return true - } - - return false -} - -func (f FeatureSupport) IncrementalPhysicalBackup() bool { - // PSMDB 4.2.24, 4.4.18, 5.0.14, 6.0.3 - v := MongoVersion(f) - if v.PSMDBVersion == "" { - return false - } - - switch { - case v.Version[0] == 4 && v.Version[1] == 2 && v.Version[2] >= 24: - fallthrough - case v.Version[0] == 4 && v.Version[1] == 4 && v.Version[2] >= 18: - fallthrough - case v.Version[0] == 5 && v.Version[1] == 0 && v.Version[2] >= 14: - fallthrough - case v.Version[0] == 6 && v.Version[1] == 0 && v.Version[2] >= 3: - fallthrough - case v.Version[0] >= 7: - return true - } - - return false -} - -func (f FeatureSupport) BackupType(t BackupType) error { - switch t { - case PhysicalBackup: - if !f.FullPhysicalBackup() { - return errors.New("full physical backup works since " + - "Percona Server for MongoDB 4.2.15, 4.4.6") - } - case IncrementalBackup: - if !f.IncrementalPhysicalBackup() { - return errors.New("incremental physical backup works since " + - "Percona Server for MongoDB 4.2.24, 4.4.18, 5.0.14, 6.0.3") - } - case ExternalBackup: - if !f.FullPhysicalBackup() { - return errors.New("external backup works since " + - "Percona Server for MongoDB 4.2.15, 4.4.6") - } - } - - return nil -} diff --git a/version/version.go b/version/version.go deleted file mode 100644 index 960be7828..000000000 --- a/version/version.go +++ /dev/null @@ -1,136 +0,0 @@ -package version - -import ( - "encoding/json" - "fmt" - "runtime" - - "golang.org/x/mod/semver" -) - -// current PBM version -const version = "2.3.0-next" - -var ( - platform string - gitCommit string - gitBranch string - buildTime string -) - -type Info struct { //nolint:musttag - Version string - Platform string - GitCommit string - GitBranch string - BuildTime string - GoVersion string -} - -const plain = `Version: %s -Platform: %s -GitCommit: %s -GitBranch: %s -BuildTime: %s -GoVersion: %s` - -func Current() Info { - v := Info{ - Version: version, - Platform: platform, - GitCommit: gitCommit, - GitBranch: gitBranch, - BuildTime: buildTime, - GoVersion: runtime.Version(), - } - if v.Platform == "" { - v.Platform = fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH) - } - - return v -} - -func (i Info) String() string { - return fmt.Sprintf(plain, - i.Version, - i.Platform, - i.GitCommit, - i.GitBranch, - i.BuildTime, - i.GoVersion, - ) -} - -func (i Info) Short() string { - return i.Version -} - -func (i Info) All(format string) string { - switch format { - case "": - return fmt.Sprintf(plain, - i.Version, - i.Platform, - i.GitCommit, - i.GitBranch, - i.BuildTime, - i.GoVersion, - ) - case "json": - v, _ := json.MarshalIndent(i, "", " ") //nolint:errchkjson - return string(v) - default: - return fmt.Sprintf("%#v", i) - } -} - -// CompatibleWith checks if a given version is compatible the current one. It -// is not compatible if the current is crossed the breaking ponit -// (version >= breakingVersion) and the given isn't (v < breakingVersion) -func CompatibleWith(v string, breakingv []string) bool { - return compatible(version, v, breakingv) -} - -func compatible(v1, v2 string, breakingv []string) bool { - if len(breakingv) == 0 { - return true - } - - v1 = majmin(v1) - v2 = majmin(v2) - - c := semver.Compare(v2, v1) - if c == 0 { - return true - } - - hV, lV := v1, v2 - if c == 1 { - hV, lV = lV, hV - } - - for i := len(breakingv) - 1; i >= 0; i-- { - cb := majmin(breakingv[i]) - if semver.Compare(hV, cb) >= 0 { - return semver.Compare(lV, cb) >= 0 - } - } - - return true -} - -func majmin(v string) string { - if len(v) == 0 { - return v - } - - if v[0] != 'v' { - v = "v" + v - } - - return semver.MajorMinor(v) -} - -func IsLegacyArchive(ver string) bool { - return semver.Compare(majmin(ver), "v2.0") == -1 -}