-
Notifications
You must be signed in to change notification settings - Fork 34
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* feat(schema): drop sequential restore run tracking Right now SM restores location by location, manifest by manifest, table by table. That's why it tracks restore progress by keeping location/manifest/table in the DB. We are moving away from sequential restore approach in favor of restoring from all locations/manifests/tables at the same time. * feat(restore): adjust model to dropped sequential restore run tracking * refactor(backupspec): include the newest file version in ListVersionedFiles There is no need to iterate versioned files (ListVersionedFiles) and not versioned files (buildFilesSizesCache) separately. Doing it in a single iteration is faster, and it allows to store all size information in a single place. * feat(restore): add workload indexing This commit introduces the structure of restore workload. Workload is divided per location->table->remote sstable directory. This changes the hierarchy established by manifests (location->node->table->remote sstable dir). It also aggregates files into actual sstables, extracts their IDs, and aggregates their sizes, and keeps track of sstable versioning. * feat(restore): index, support resume Indexed workload won't contain sstables that were already restored during previous restore run. * feat(restore): index, support metrics init * feat(restore): add primitive batching using indexed workload This is a temporary implementation used for integrating workload indexing with the rest of the code. It will be improved as a part of the #3979. * feat(restore): integrate new indexing and batching with codebase This commit makes use of the new indexing and batching approaches and uses them in the restore tables codebase. * fix(restore): handle download of fully versioned batch Recent commits changed versioned batch download so that if any sstable component is versioned, then all sstable components are downloaded as versioned files. It was done in that way to allow easier versioned progress calculation (we don't store per file size, only the whole sstable size). This brought to light a bug (that existed before, but was more difficult to hit), in which restoring batch failed when the whole batch was versioned, as calling RcloneSyncCopyPaths on empty paths parameter resulted in broken download. We could just skip the RcloneSyncCopyPaths call when the whole batch is versioned, but this would leave us without the agentJobID which is a part of sort key in RestoreRunProgress. Without it, we could potentially overwrite one restore run progress with another - if both of them happened on the same RemoteSSTableDir, by the same Host, and were fully versioned. It would also introduce a different path for restoring regular batch and fully versioned batch, which is not desirable. That's why I decided to modify rclone server to allow empty path parameter, so that it still generates agentJobID, but it doesn't do anything except for that. * feat(restore): index, log workload info Workload info contains location/table/remote sstable dir sstable count, total size, max and average sstable size.
- Loading branch information
1 parent
5048128
commit 14aef7b
Showing
14 changed files
with
746 additions
and
616 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,175 @@ | ||
// Copyright (C) 2024 ScyllaDB | ||
|
||
package restore | ||
|
||
import ( | ||
"slices" | ||
"sync" | ||
|
||
"github.com/pkg/errors" | ||
. "github.com/scylladb/scylla-manager/v3/pkg/service/backup/backupspec" | ||
) | ||
|
||
type batchDispatcher struct { | ||
mu sync.Mutex | ||
workload []LocationWorkload | ||
batchSize int | ||
locationHosts map[Location][]string | ||
} | ||
|
||
func newBatchDispatcher(workload []LocationWorkload, batchSize int, locationHosts map[Location][]string) *batchDispatcher { | ||
return &batchDispatcher{ | ||
mu: sync.Mutex{}, | ||
workload: workload, | ||
batchSize: batchSize, | ||
locationHosts: locationHosts, | ||
} | ||
} | ||
|
||
type batch struct { | ||
TableName | ||
*ManifestInfo | ||
|
||
RemoteSSTableDir string | ||
Size int64 | ||
SSTables []RemoteSSTable | ||
} | ||
|
||
func (b batch) NotVersionedSSTables() []RemoteSSTable { | ||
var ssts []RemoteSSTable | ||
for _, sst := range b.SSTables { | ||
if !sst.Versioned { | ||
ssts = append(ssts, sst) | ||
} | ||
} | ||
return ssts | ||
} | ||
|
||
func (b batch) VersionedSSTables() []RemoteSSTable { | ||
var ssts []RemoteSSTable | ||
for _, sst := range b.SSTables { | ||
if sst.Versioned { | ||
ssts = append(ssts, sst) | ||
} | ||
} | ||
return ssts | ||
} | ||
|
||
func (b batch) VersionedSize() int64 { | ||
var size int64 | ||
for _, sst := range b.SSTables { | ||
if sst.Versioned { | ||
size += sst.Size | ||
} | ||
} | ||
return size | ||
} | ||
|
||
func (b batch) IDs() []string { | ||
var ids []string | ||
for _, sst := range b.SSTables { | ||
ids = append(ids, sst.ID) | ||
} | ||
return ids | ||
} | ||
|
||
// ValidateAllDispatched returns error if not all sstables were dispatched. | ||
func (b *batchDispatcher) ValidateAllDispatched() error { | ||
for _, lw := range b.workload { | ||
if lw.Size != 0 { | ||
for _, tw := range lw.Tables { | ||
if tw.Size != 0 { | ||
for _, dw := range tw.RemoteDirs { | ||
if dw.Size != 0 || len(dw.SSTables) != 0 { | ||
return errors.Errorf("expected all data to be restored, missing sstable ids from location %s table %s.%s: %v (%d bytes)", | ||
dw.Location, dw.Keyspace, dw.Table, dw.SSTables, dw.Size) | ||
} | ||
} | ||
return errors.Errorf("expected all data to be restored, missinng table from location %s: %s.%s (%d bytes)", | ||
tw.Location, tw.Keyspace, tw.Table, tw.Size) | ||
} | ||
} | ||
return errors.Errorf("expected all data to be restored, missinng location: %s (%d bytes)", | ||
lw.Location, lw.Size) | ||
} | ||
} | ||
return nil | ||
} | ||
|
||
// DispatchBatch batch to be restored or false when there is no more work to do. | ||
func (b *batchDispatcher) DispatchBatch(host string) (batch, bool) { | ||
b.mu.Lock() | ||
defer b.mu.Unlock() | ||
|
||
l := b.chooseLocation(host) | ||
if l == nil { | ||
return batch{}, false | ||
} | ||
t := b.chooseTable(l) | ||
if t == nil { | ||
return batch{}, false | ||
} | ||
dir := b.chooseRemoteDir(t) | ||
if dir == nil { | ||
return batch{}, false | ||
} | ||
out := b.createBatch(l, t, dir) | ||
return out, true | ||
} | ||
|
||
// Returns location for which batch should be created. | ||
func (b *batchDispatcher) chooseLocation(host string) *LocationWorkload { | ||
for i := range b.workload { | ||
if b.workload[i].Size == 0 { | ||
continue | ||
} | ||
if slices.Contains(b.locationHosts[b.workload[i].Location], host) { | ||
return &b.workload[i] | ||
} | ||
} | ||
return nil | ||
} | ||
|
||
// Returns table for which batch should be created. | ||
func (b *batchDispatcher) chooseTable(location *LocationWorkload) *TableWorkload { | ||
for i := range location.Tables { | ||
if location.Tables[i].Size == 0 { | ||
continue | ||
} | ||
return &location.Tables[i] | ||
} | ||
return nil | ||
} | ||
|
||
// Return remote dir for which batch should be created. | ||
func (b *batchDispatcher) chooseRemoteDir(table *TableWorkload) *RemoteDirWorkload { | ||
for i := range table.RemoteDirs { | ||
if table.RemoteDirs[i].Size == 0 { | ||
continue | ||
} | ||
return &table.RemoteDirs[i] | ||
} | ||
return nil | ||
} | ||
|
||
// Returns batch and updates RemoteDirWorkload and its parents. | ||
func (b *batchDispatcher) createBatch(l *LocationWorkload, t *TableWorkload, dir *RemoteDirWorkload) batch { | ||
i := min(b.batchSize, len(dir.SSTables)) | ||
sstables := dir.SSTables[:i] | ||
dir.SSTables = dir.SSTables[i:] | ||
|
||
var size int64 | ||
for _, sst := range sstables { | ||
size += sst.Size | ||
} | ||
dir.Size -= size | ||
t.Size -= size | ||
l.Size -= size | ||
return batch{ | ||
TableName: dir.TableName, | ||
ManifestInfo: dir.ManifestInfo, | ||
RemoteSSTableDir: dir.RemoteSSTableDir, | ||
Size: size, | ||
SSTables: sstables, | ||
} | ||
} |
Oops, something went wrong.