Skip to content

Commit

Permalink
support repair snapshot (#172)
Browse files Browse the repository at this point in the history
* support repair snapshot

* add flag to control record snapshot journal

---------

Co-authored-by: KamiD <[email protected]>
  • Loading branch information
giskook and KamiD authored Jun 25, 2023
1 parent 0022603 commit fabf2de
Show file tree
Hide file tree
Showing 8 changed files with 48 additions and 20 deletions.
3 changes: 2 additions & 1 deletion app/repair_state.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,8 @@ func repairStateOnStart(ctx *server.Context) {
func RepairState(ctx *server.Context, onStart bool) {
sm.SetIgnoreSmbCheck(true)
iavl.SetIgnoreVersionCheck(true)
rootmulti.SetRepair()
global.SetRepairState(true)
defer global.SetRepairState(false)

// load latest block height
dataDir := filepath.Join(ctx.Config.RootDir, "data")
Expand Down
4 changes: 0 additions & 4 deletions cmd/okbchaind/repair_data.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package main

import (
"fmt"
"github.com/okx/okbchain/libs/cosmos-sdk/store/mpt"
"log"
"net/http"
_ "net/http/pprof"
Expand Down Expand Up @@ -64,7 +63,4 @@ func setExternalPackageValue() {
tmiavl.SetEnableFastStorage(true)
tmiavl.SetIgnoreAutoUpgrade(true)
}
if !viper.GetBool(tmiavl.FlagIavlDiscardFastStorage) {
mpt.SetSnapshotRebuild(true)
}
}
1 change: 1 addition & 0 deletions libs/cosmos-sdk/server/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -362,4 +362,5 @@ func SetExternalPackageValue(cmd *cobra.Command) {
mptstore.TrieAsyncDBInitCap = viper.GetInt(mptstore.FlagTrieAsyncDBInitCap)
mptstore.TrieAsyncDBAutoPruningOff = viper.GetBool(mptstore.FlagTrieAsyncDBAutoPruningOff)
mptstore.TrieAsyncDBSyncPruning = viper.GetBool(mptstore.FlagTrieAsyncDBSyncPruning)
mptstore.SetSnapshotJournal(viper.GetBool(mptstore.FlagTrieEnableSnapshotJournal))
}
1 change: 1 addition & 0 deletions libs/cosmos-sdk/server/start_okchain.go
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,7 @@ func RegisterServerFlags(cmd *cobra.Command) *cobra.Command {
cmd.Flags().Int(mpt.FlagTrieAsyncDBInitCap, 200_0000, "Init cap of trie async db")
cmd.Flags().Bool(mpt.FlagTrieAsyncDBAutoPruningOff, false, "Disable auto prune of trie async db")
cmd.Flags().Bool(mpt.FlagTrieAsyncDBSyncPruning, false, "if auto pruning is off and this is on, trie async db will be pruned every block in sync mode")
cmd.Flags().Bool(mpt.FlagTrieEnableSnapshotJournal, false, "Enable record snapshot's journal. So that snapshot can be repaired within certain version")

cmd.Flags().Int64(FlagCommitGapHeight, 10, "Block interval to commit cached data into db, affects iavl & mpt")
cmd.Flags().Int64(FlagFastSyncGap, 20, "Block height interval to switch fast-sync mode")
Expand Down
2 changes: 2 additions & 0 deletions libs/cosmos-sdk/store/mpt/params.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ const (
FlagTrieAsyncDBInitCap = "trie.asyncdb.init-cap"
FlagTrieAsyncDBAutoPruningOff = "trie.asyncdb.auto-pruning-off"
FlagTrieAsyncDBSyncPruning = "trie.asyncdb.sync-pruning"

FlagTrieEnableSnapshotJournal = "trie.enable-snapshot-journal"
)

var (
Expand Down
31 changes: 29 additions & 2 deletions libs/cosmos-sdk/store/mpt/snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,24 @@ import (
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/core/state/snapshot"
mpttypes "github.com/okx/okbchain/libs/cosmos-sdk/store/mpt/types"
"github.com/okx/okbchain/libs/tendermint/global"
)

var (
gDisableSnapshot = false
gSnapshotRebuild = false

// gEnableSnapshotJournal enable snapshot journal.
// so snapshot can be repaired within snapshotMemoryLayerCount.
gEnableSnapshotJournal = false
)

const (
// snapshotMemoryLayerCount snapshot memory layer count
// as we dont rollback transactions so we only keep 1 memory layer
snapshotMemoryLayerCount = 1
// snapshotMemoryLayerCount controls the snapshot Journal height,
// if repair start-height is lower than snapshot Journal height,
// snapshot will not be repaired anymore
snapshotMemoryLayerCount = 10
)

func DisableSnapshot() {
Expand All @@ -27,6 +34,14 @@ func SetSnapshotRebuild(rebuild bool) {
gSnapshotRebuild = rebuild
}

func SetSnapshotJournal(enable bool) {
gEnableSnapshotJournal = enable
}

func checkSnapshotJournal() bool {
return gEnableSnapshotJournal
}

func (ms *MptStore) openSnapshot() error {
if ms == nil || ms.db == nil || ms.trie == nil || ms.db.TrieDB().DiskDB() == nil || gDisableSnapshot {
return fmt.Errorf("mpt store is nil or mpt trie is nil")
Expand All @@ -42,6 +57,9 @@ func (ms *MptStore) openSnapshot() error {
ms.logger.Error("Enabling snapshot recovery", "chainhead", version, "diskbase", *layer)
recovery = true
}
if global.GetRepairState() {
recovery = true
}
var err error
ms.snaps, err = snapshot.NewCustom(ms.db.TrieDB().DiskDB(), ms.db.TrieDB(), 256, ms.originalRoot, false, gSnapshotRebuild, recovery, ms.retriever)
if err != nil {
Expand Down Expand Up @@ -74,6 +92,8 @@ func (ms *MptStore) prepareSnap(root common.Hash) {
ms.snapDestructs = make(map[common.Hash]struct{})
ms.snapAccounts = make(map[common.Hash][]byte)
ms.snapStorage = make(map[common.Hash]map[common.Hash][]byte)
} else {
ms.logger.Error("prepare snapshot error", "root", root)
}
}

Expand All @@ -94,6 +114,13 @@ func (ms *MptStore) commitSnap(root common.Hash) {
if err := ms.snaps.Cap(root, snapshotMemoryLayerCount); err != nil {
ms.logger.Error("Failed to cap snapshot tree", "root", root, "layers", snapshotMemoryLayerCount, "err", err)
}

// record snapshot journal
if checkSnapshotJournal() {
if _, err := ms.snaps.Journal(root); err != nil {
ms.logger.Error("Failed to journal snapshot tree", "root", root, "err", err)
}
}
}
ms.snap, ms.snapDestructs, ms.snapAccounts, ms.snapStorage = nil, nil, nil, nil

Expand Down
15 changes: 2 additions & 13 deletions libs/cosmos-sdk/store/rootmulti/rootmulti_store.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package rootmulti
import (
"encoding/binary"
"fmt"
"github.com/okx/okbchain/libs/tendermint/global"
"io"
"log"
"path/filepath"
Expand Down Expand Up @@ -47,18 +48,6 @@ const (
maxPruneHeightsLength = 100
)

var (
repairing bool
)

func SetRepair() {
repairing = true
}

func getRepair() bool {
return repairing
}

// Store is composed of many CommitStores. Name contrasts with
// cacheMultiStore which is for cache-wrapping other MultiStores. It implements
// the CommitMultiStore interface.
Expand Down Expand Up @@ -408,7 +397,7 @@ func (rs *Store) loadVersion(ver int64, upgrades *types.StoreUpgrades) error {
// we can not get the upgrade version before the expect height,
// and we should not use the original 0 too, because 0 means the latest height,
// so when we repair data before the milestone. we open a empty tree by cur version.
if getRepair() && version == 0 {
if global.GetRepairState() && version == 0 {
param.upgradeVersion = uint64(ver)
}
rs.storesParams[key] = param
Expand Down
11 changes: 11 additions & 0 deletions libs/tendermint/global/status.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package global

var repairState bool

func SetRepairState(state bool) {
repairState = state
}

func GetRepairState() bool {
return repairState
}

0 comments on commit fabf2de

Please sign in to comment.