From 0e7f15eb76e2d6e2179740b0e79407b367d361dd Mon Sep 17 00:00:00 2001 From: Dmytro Zghoba Date: Tue, 12 Sep 2023 20:08:26 +0300 Subject: [PATCH] enhance PITR base-snapshot support --- .dockerignore | 1 + .gitignore | 2 +- agent/restore.go | 12 +++++++----- cli/delete.go | 2 +- cli/restore.go | 31 +++++++++++++++++++++---------- pbm/agent_status.go | 1 + pbm/cleanup.go | 4 ++-- pbm/delete.go | 5 +++++ pbm/pbm.go | 10 +++++++--- pbm/restore/logical.go | 7 +++++-- pbm/restore/restore.go | 31 ------------------------------- 11 files changed, 51 insertions(+), 55 deletions(-) diff --git a/.dockerignore b/.dockerignore index b5e3e73a6..abb8f85cd 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,3 +1,4 @@ /bin/ /dev/ +/.dev e2e-tests/docker/backups/pbm/ diff --git a/.gitignore b/.gitignore index fcb955a61..de8ab492f 100644 --- a/.gitignore +++ b/.gitignore @@ -3,7 +3,7 @@ !.vscode/settings.json .idea/ /bin/ -/dev/ +/.dev e2e-tests/docker/keyFile e2e-tests/docker/backups/pbm e2e-tests/docker/pbm.conf.yaml diff --git a/agent/restore.go b/agent/restore.go index 46ff41445..0da3dc555 100644 --- a/agent/restore.go +++ b/agent/restore.go @@ -322,15 +322,17 @@ func (a *Agent) Restore(r *pbm.RestoreCmd, opid pbm.OPID, ep pbm.Epoch) { bcpType = pbm.ExternalBackup } else { l.Info("backup: %s", r.BackupName) - if !r.OplogTS.IsZero() { - bcp, err = restore.GetBaseBackup(a.pbm, r.BackupName, r.OplogTS, stg) - } else { - bcp, err = restore.SnapshotMeta(a.pbm, r.BackupName, stg) - } + bcp, err = restore.SnapshotMeta(a.pbm, r.BackupName, stg) if err != nil { l.Error("define base backup: %v", err) return } + + if !r.OplogTS.IsZero() && bcp.LastWriteTS.Compare(r.OplogTS) >= 0 { + l.Error("snapshot's last write is later than the target time. " + + "Try to set an earlier snapshot. Or leave the snapshot empty so PBM will choose one.") + return + } bcpType = bcp.Type } diff --git a/cli/delete.go b/cli/delete.go index 49c193c28..8d42e1a50 100644 --- a/cli/delete.go +++ b/cli/delete.go @@ -335,7 +335,7 @@ func askConfirmation(question string) error { if err != nil { return errors.WithMessage(err, "stat stdin") } - if (fi.Mode() & os.ModeCharDevice) != 0 { + if (fi.Mode() & os.ModeCharDevice) == 0 { return errors.New("no tty") } diff --git a/cli/restore.go b/cli/restore.go index dc8bf78ca..9d98f9e16 100644 --- a/cli/restore.go +++ b/cli/restore.go @@ -237,21 +237,32 @@ func checkBackup(cn *pbm.PBM, o *restoreOpts, nss []string) (string, pbm.BackupT return "", pbm.ExternalBackup, nil } - if o.pitr != "" && o.pitrBase == "" { - return "", pbm.LogicalBackup, nil - } - b := o.bcp - if o.pitrBase != "" { + if o.pitr != "" && o.pitrBase != "" { b = o.pitrBase } - bcp, err := cn.GetBackupMeta(b) - if errors.Is(err, pbm.ErrNotFound) { - return "", "", errors.Errorf("backup '%s' not found", b) + var err error + var bcp *pbm.BackupMeta + if b != "" { + bcp, err = cn.GetBackupMeta(b) + if errors.Is(err, pbm.ErrNotFound) { + return "", "", errors.Errorf("backup '%s' not found", b) + } + } else { + var ts primitive.Timestamp + ts, err = parseTS(o.pitr) + if err != nil { + return "", "", errors.WithMessage(err, "parse pitr") + } + + bcp, err = cn.GetLastBackup(&primitive.Timestamp{T: ts.T + 1, I: 0}) + if errors.Is(err, pbm.ErrNotFound) { + return "", "", errors.New("no base snapshot found") + } } if err != nil { - return "", "", errors.Wrap(err, "get backup data") + return "", "", errors.WithMessage(err, "get backup data") } if len(nss) != 0 && bcp.Type != pbm.LogicalBackup { return "", "", errors.New("--ns flag is only allowed for logical restore") @@ -260,7 +271,7 @@ func checkBackup(cn *pbm.PBM, o *restoreOpts, nss []string) (string, pbm.BackupT return "", "", errors.Errorf("backup '%s' didn't finish successfully", b) } - return b, bcp.Type, nil + return bcp.Name, bcp.Type, nil } func restore( diff --git a/pbm/agent_status.go b/pbm/agent_status.go index 76e9003f6..86b31fe7b 100644 --- a/pbm/agent_status.go +++ b/pbm/agent_status.go @@ -98,6 +98,7 @@ func (p *PBM) AgentStatusGC() error { // may stuck for 30 sec on ping (trying to connect), it's HB became stale and it would be collected. // Which would lead to the false clamin "not found" in the status output. So stale range should at least 30 sec // (+5 just in case). + // XXX: stalesec is const 15 secs which resolves to 35 secs stalesec := AgentsStatCheckRange.Seconds() * 3 if stalesec < 35 { stalesec = 35 diff --git a/pbm/cleanup.go b/pbm/cleanup.go index e686a168d..01f1aa310 100644 --- a/pbm/cleanup.go +++ b/pbm/cleanup.go @@ -110,7 +110,7 @@ func canDeleteBaseSnapshot(ctx context.Context, m *mongo.Client, lw primitive.Ti f := bson.D{ {"last_write_ts", bson.M{"$gte": lw}}, {"nss", nil}, - {"type", LogicalBackup}, + {"type", bson.M{"$ne": ExternalBackup}}, {"status", StatusDone}, } o := options.FindOne().SetProjection(bson.D{{"last_write_ts", 1}}) @@ -205,7 +205,7 @@ func isBaseSnapshot(bcp *BackupMeta) bool { if bcp.Status != StatusDone { return false } - if bcp.Type != LogicalBackup || sel.IsSelective(bcp.Namespaces) { + if bcp.Type == ExternalBackup || sel.IsSelective(bcp.Namespaces) { return false } diff --git a/pbm/delete.go b/pbm/delete.go index f68a6b47e..e2b790b51 100644 --- a/pbm/delete.go +++ b/pbm/delete.go @@ -10,6 +10,7 @@ import ( "golang.org/x/sync/errgroup" "github.com/percona/percona-backup-mongodb/pbm/log" + "github.com/percona/percona-backup-mongodb/pbm/sel" "github.com/percona/percona-backup-mongodb/pbm/storage" "github.com/percona/percona-backup-mongodb/version" ) @@ -58,6 +59,10 @@ func (p *PBM) probeDelete(backup *BackupMeta, tlns []Timeline) error { return errors.Errorf("unable to delete backup in %s state", backup.Status) } + if backup.Type == ExternalBackup || sel.IsSelective(backup.Namespaces) { + return nil + } + // if backup isn't a base for any PITR timeline for _, t := range tlns { if backup.LastWriteTS.T == t.Start { diff --git a/pbm/pbm.go b/pbm/pbm.go index 935411493..db1e870fb 100644 --- a/pbm/pbm.go +++ b/pbm/pbm.go @@ -804,15 +804,17 @@ func (p *PBM) LastIncrementalBackup() (*BackupMeta, error) { return p.getRecentBackup(nil, nil, -1, bson.D{{"type", string(IncrementalBackup)}}) } -// GetLastBackup returns last successfully finished backup +// GetLastBackup returns last successfully finished backup (non-selective and non-external) // or nil if there is no such backup yet. If ts isn't nil it will // search for the most recent backup that finished before specified timestamp func (p *PBM) GetLastBackup(before *primitive.Timestamp) (*BackupMeta, error) { - return p.getRecentBackup(nil, before, -1, bson.D{{"nss", nil}, {"type", string(LogicalBackup)}}) + return p.getRecentBackup(nil, before, -1, + bson.D{{"nss", nil}, {"type", bson.M{"$ne": ExternalBackup}}}) } func (p *PBM) GetFirstBackup(after *primitive.Timestamp) (*BackupMeta, error) { - return p.getRecentBackup(after, nil, 1, bson.D{{"nss", nil}, {"type", string(LogicalBackup)}}) + return p.getRecentBackup(after, nil, 1, + bson.D{{"nss", nil}, {"type", bson.M{"$ne": ExternalBackup}}}) } func (p *PBM) getRecentBackup(after, before *primitive.Timestamp, sort int, opts bson.D) (*BackupMeta, error) { @@ -844,6 +846,8 @@ func (p *PBM) getRecentBackup(after, before *primitive.Timestamp, sort int, opts func (p *PBM) BackupHasNext(backup *BackupMeta) (bool, error) { f := bson.D{ + {"nss", nil}, + {"type", bson.M{"$ne": ExternalBackup}}, {"start_ts", bson.M{"$gt": backup.LastWriteTS.T}}, {"status", StatusDone}, } diff --git a/pbm/restore/logical.go b/pbm/restore/logical.go index fda4f5423..60f3882dc 100644 --- a/pbm/restore/logical.go +++ b/pbm/restore/logical.go @@ -213,11 +213,14 @@ func (r *Restore) PITR(cmd *pbm.RestoreCmd, opid pbm.OPID, l *log.Event) (err er return err } - // tsTo := primitive.Timestamp{T: uint32(cmd.TS), I: uint32(cmd.I)} - bcp, err := GetBaseBackup(r.cn, cmd.BackupName, cmd.OplogTS, r.stg) + bcp, err := SnapshotMeta(r.cn, cmd.BackupName, r.stg) if err != nil { return errors.Wrap(err, "get base backup") } + if bcp.LastWriteTS.Compare(cmd.OplogTS) >= 0 { + return errors.New("snapshot's last write is later than the target time. " + + "Try to set an earlier snapshot. Or leave the snapshot empty so PBM will choose one.") + } nss := cmd.Namespaces if len(nss) == 0 { diff --git a/pbm/restore/restore.go b/pbm/restore/restore.go index dd0232672..50fc5da26 100644 --- a/pbm/restore/restore.go +++ b/pbm/restore/restore.go @@ -214,37 +214,6 @@ func waitForStatus(cn *pbm.PBM, name string, status pbm.Status) error { } } -func GetBaseBackup( - cn *pbm.PBM, - bcpName string, - tsTo primitive.Timestamp, - stg storage.Storage, -) (*pbm.BackupMeta, error) { - var bcp *pbm.BackupMeta - var err error - if bcpName == "" { - bcp, err = cn.GetLastBackup(&tsTo) - if errors.Is(err, pbm.ErrNotFound) { - return nil, errors.Errorf("no backup found before ts %v", tsTo) - } - if err != nil { - return nil, errors.Wrap(err, "define last backup") - } - return bcp, nil - } - - bcp, err = SnapshotMeta(cn, bcpName, stg) - if err != nil { - return nil, err - } - if primitive.CompareTimestamp(bcp.LastWriteTS, tsTo) >= 0 { - return nil, errors.New("snapshot's last write is later than the target time. " + - "Try to set an earlier snapshot. Or leave the snapshot empty so PBM will choose one.") - } - - return bcp, nil -} - // chunks defines chunks of oplog slice in given range, ensures its integrity (timeline // is contiguous - there are no gaps), checks for respective files on storage and returns // chunks list if all checks passed