Skip to content

Commit

Permalink
Deliver Twice Bug Fix (#13)
Browse files Browse the repository at this point in the history
Deliver Twice Bug Fix

In this scenario a decision is delivered twice. Once by the synchronizer and a second time by the view data received during a view change.

The reason this is even possible is twofold.
First of all, after a view change timeout, we do not stop the running view (since a sync may created a good view), and so the view and the view change are running concurrently.
It is feasible for a decision to be made during the concurrent view and a view change to occur while the new view message doesn't include that decision.
That decision will be delivered to a node during a sync.
Secondly, when the sync returns with the decision belonging to an old view we do not update the checkpoint.
And so when there is another view change and a view data msg with that decision is received the node thinks this is a new decision (because its checkpoint was not updated).
Finally, the node delivers that decision again from the view data msg (for the second time).

The fix includes an update of the checkpoint also when the synchronizer returns with an old view, and aborting a view before sending a view data msg to avoid running concurrent view and view change.

Signed-off-by: Hagar Meir <[email protected]>
  • Loading branch information
HagarMeir authored Jan 17, 2024
1 parent 9a64645 commit a126c0e
Show file tree
Hide file tree
Showing 5 changed files with 464 additions and 91 deletions.
137 changes: 63 additions & 74 deletions internal/bft/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -592,104 +592,93 @@ func (c *Controller) sync() (viewNum uint64, seq uint64, decisions uint64) {
c.ViewChanger.close()
}

decision := syncResponse.Latest
if decision.Proposal.Metadata == nil {
c.Logger.Infof("Synchronizer returned with proposal metadata nil")
response := c.fetchState()
if response == nil {
return 0, 0, 0
// The synchronizer returns a response which includes the latest decision with its proposal metadata.
// This proposal may be empty (its metadata is empty), meaning the synchronizer is not aware of any decisions made.
// Otherwise, the latest proposal sequence returned may be higher than our latest sequence, meaning we should
// update the checkpoint.
// In other cases we should not update the checkpoint.
// However, we always must fetch the latest state from other nodes,
// since the view may have advanced without this node and with no decisions.

var newViewNum, newProposalSequence, newDecisionsInView uint64

latestDecision := syncResponse.Latest
var latestDecisionSeq, latestDecisionViewNum, latestDecisionDecisions uint64
var latestDecisionMetadata *protos.ViewMetadata
if len(latestDecision.Proposal.Metadata) == 0 {
c.Logger.Infof("Synchronizer returned with an empty proposal metadata")
latestDecisionMetadata = nil
} else {
md := &protos.ViewMetadata{}
if err := proto.Unmarshal(latestDecision.Proposal.Metadata, md); err != nil {
c.Logger.Panicf("Controller was unable to unmarshal the proposal metadata returned by the Synchronizer")
}
if response.View > 0 && response.Seq == 1 {
c.Logger.Infof("The collected state is with view %d and sequence %d", response.View, response.Seq)
newViewToSave := &protos.SavedMessage{
Content: &protos.SavedMessage_NewView{
NewView: &protos.ViewMetadata{
ViewId: response.View,
LatestSequence: 0,
DecisionsInView: 0,
},
},
}
if err := c.State.Save(newViewToSave); err != nil {
c.Logger.Panicf("Failed to save message to state, error: %v", err)
}
c.ViewChanger.InformNewView(response.View)
return response.View, 1, 0
}
return 0, 0, 0
latestDecisionSeq = md.LatestSequence
latestDecisionViewNum = md.ViewId
latestDecisionDecisions = md.DecisionsInView
latestDecisionMetadata = md
}
md := &protos.ViewMetadata{}
if err := proto.Unmarshal(decision.Proposal.Metadata, md); err != nil {
c.Logger.Panicf("Controller was unable to unmarshal the proposal metadata returned by the Synchronizer")

controllerSequence := c.latestSeq()
newProposalSequence = controllerSequence + 1

controllerViewNum := c.currViewNumber
newViewNum = controllerViewNum

if latestDecisionSeq > controllerSequence {
c.Logger.Infof("Synchronizer returned with sequence %d while the controller is at sequence %d", latestDecisionSeq, controllerSequence)
c.Logger.Debugf("Node %d is setting the checkpoint after sync returned with view %d and seq %d", c.ID, latestDecisionViewNum, latestDecisionSeq)
c.Checkpoint.Set(latestDecision.Proposal, latestDecision.Signatures)
c.verificationSequence = uint64(latestDecision.Proposal.VerificationSequence)
newProposalSequence = latestDecisionSeq + 1
newDecisionsInView = latestDecisionDecisions + 1
}

latestSequence := c.latestSeq()
if latestDecisionViewNum > controllerViewNum {
c.Logger.Infof("Synchronizer returned with view number %d while the controller is at view number %d", latestDecisionViewNum, controllerViewNum)
newViewNum = latestDecisionViewNum
}

if md.ViewId < c.currViewNumber {
c.Logger.Infof("Synchronizer returned with view number %d but the controller is at view number %d", md.ViewId, c.currViewNumber)
response := c.fetchState()
if response == nil {
c.Logger.Infof("Fetching state failed")
response := c.fetchState()
if response == nil {
c.Logger.Infof("Fetching state failed")
if latestDecisionMetadata == nil || latestDecisionViewNum < controllerViewNum {
// And the synchronizer did not return a new view
return 0, 0, 0
}
if response.View > c.currViewNumber && response.Seq == md.LatestSequence+1 {
c.Logger.Infof("Collected state with view %d and sequence %d", response.View, response.Seq)
} else {
if response.View <= controllerViewNum && latestDecisionViewNum < controllerViewNum {
return 0, 0, 0 // no new view to report
}
if response.View > newViewNum && response.Seq == latestDecisionSeq+1 {
c.Logger.Infof("Node %d collected state with view %d and sequence %d", c.ID, response.View, response.Seq)
newViewToSave := &protos.SavedMessage{
Content: &protos.SavedMessage_NewView{
NewView: &protos.ViewMetadata{
ViewId: response.View,
LatestSequence: md.LatestSequence,
LatestSequence: latestDecisionSeq,
DecisionsInView: 0,
},
},
}
if err := c.State.Save(newViewToSave); err != nil {
c.Logger.Panicf("Failed to save message to state, error: %v", err)
}
c.ViewChanger.InformNewView(response.View)
return response.View, response.Seq, 0
newViewNum = response.View
newDecisionsInView = 0
}
return 0, 0, 0
}

c.Logger.Infof("Replicated decisions from view %d and seq %d up to view %d and sequence %d with verification sequence %d",
c.currViewNumber, latestSequence, md.ViewId, md.LatestSequence, decision.Proposal.VerificationSequence)

c.maybePruneInFlight(md)

view := md.ViewId
newView := false

response := c.fetchState()
if response != nil {
if response.View > md.ViewId && response.Seq == md.LatestSequence+1 {
c.Logger.Infof("Node %d collected state with view %d and sequence %d", c.ID, response.View, response.Seq)
view = response.View
newViewToSave := &protos.SavedMessage{
Content: &protos.SavedMessage_NewView{
NewView: &protos.ViewMetadata{
ViewId: view,
LatestSequence: md.LatestSequence,
DecisionsInView: 0,
},
},
}
if err := c.State.Save(newViewToSave); err != nil {
c.Logger.Panicf("Failed to save message to state, error: %v", err)
}
newView = true
}
if latestDecisionMetadata != nil {
c.maybePruneInFlight(latestDecisionMetadata)
}

c.Logger.Debugf("Node %d is setting the checkpoint after sync to view %d and seq %d", c.ID, md.ViewId, md.LatestSequence)
c.Checkpoint.Set(decision.Proposal, decision.Signatures)
c.verificationSequence = uint64(decision.Proposal.VerificationSequence)
c.Logger.Debugf("Node %d is informing the view changer after sync of view %d and seq %d", c.ID, md.ViewId, md.LatestSequence)
c.ViewChanger.InformNewView(view)
if md.LatestSequence == 0 || newView {
return view, md.LatestSequence + 1, 0
if newViewNum > controllerViewNum {
c.Logger.Debugf("Node %d is informing the view changer of view %d after sync of view %d and seq %d", c.ID, newViewNum, latestDecisionViewNum, latestDecisionSeq)
c.ViewChanger.InformNewView(newViewNum)
}
return view, md.LatestSequence + 1, md.DecisionsInView + 1

return newViewNum, newProposalSequence, newDecisionsInView
}

func (c *Controller) maybePruneInFlight(syncResultViewMD *protos.ViewMetadata) {
Expand Down
Loading

0 comments on commit a126c0e

Please sign in to comment.