diff --git a/examples/C/src/leader-election/NRP_FD_NobodyFails.lf b/examples/C/src/leader-election/NRP_FD_NobodyFails.lf new file mode 100644 index 00000000..f720fdae --- /dev/null +++ b/examples/C/src/leader-election/NRP_FD_NobodyFails.lf @@ -0,0 +1,36 @@ +/** + * This version of NRP_FD simply has the primary (node1) failing after 5 seconds and the backup + * (node2) failing at at 15s. The backup detects simultaneous loss of the heartbeat on both networks + * and hence assumes that the primary has failed rather than there being a network failure. Switch 1 + * remains the NRP. + * + * @author Edward A. Lee + * @author Marjan Sirjani + */ +target C { + coordination: decentralized, + timeout: 1000 hours +} +import Switch, Node from "NRP_FD.lf" + +federated reactor(heartbeat_period: time = 1 s, delay: time = 10 ms) { + node1 = new Node(heartbeat_period=heartbeat_period, id=1) + node2 = new Node(heartbeat_period=heartbeat_period, id=2) + + switch1 = new Switch(id=1) + switch2 = new Switch(id=2) + switch3 = new Switch(id=3) + switch4 = new Switch(id=4) + + node1.out -> switch1.in1, switch3.in1 after delay + switch1.out1, switch3.out1 -> node1.in after delay + + switch1.out2 -> switch2.in2 after delay + switch2.out2 -> switch1.in2 after delay + + switch2.out1, switch4.out1 -> node2.in after delay + node2.out -> switch2.in1, switch4.in1 after delay + + switch3.out2 -> switch4.in2 after delay + switch4.out2 -> switch3.in2 after delay +} diff --git a/examples/C/src/leader-election/NRP_FD_notModal.lf b/examples/C/src/leader-election/NRP_FD_notModal.lf new file mode 100644 index 00000000..e269c36d --- /dev/null +++ b/examples/C/src/leader-election/NRP_FD_notModal.lf @@ -0,0 +1,520 @@ +/** + * This program implements a redundant fault-tolerant system where a primary node, if and when it + * fails, is replaced by a backup node. The protocol is described in this paper: + * + * Bjarne Johansson; Mats RĂ„gberger; Alessandro V. Papadopoulos; Thomas Nolte, "Consistency Before + * Availability: Network Reference Point based Failure Detection for Controller Redundancy," + * Emerging Technologies and Factory Automation (ETFA), 12-15 September 2023, DOI: + * 10.1109/ETFA54631.2023.10275664 + * + * The key idea in this protocol is that when a backup fails to detect the heartbeats of a primary + * node, it becomes primary only if it has access to Network Reference Point (NRP), which is a point + * in the network. This way, if the network becomes partitioned, only a backup that is on the side + * of the partition that still has access to the NRP can become a primary. If a primary loses access + * to the NRP, then it relinquishes its primary role because it is now on the wrong side of a + * network partition. A backup on the right side of the partition will take over. + * + * This implementation omits some details in the paper. See NOTEs in the comments. + * + * This version follows the Rebeca design, avoiding modal models and more closely emulating the + * Rebeca code. + * + * This version has switch1 failing at 3s, node1 failing at 10s, and node2 failing at 15s. + * + * @author Edward A. Lee + * @author Marjan Sirjani + */ +target C { + tracing: true, + timeout: 20 s +} + +preamble {= + #ifndef NRF_FD + #define NRF_FD + #include "platform.h" // Defines PRINTF_TIME + + // Paper calls for manual intervention to set initial primary ID and NRP network. + // Here, we just hardwire this choice using #define. + #define INITIAL_PRIMARY_ID 1 + #define INITIAL_NRP_NETWORK 0 + + enum node_mode { + waiting, + primary, + backup, + failed + }; + + enum message_type { + heartbeat, + ping_NRP, + ping_NRP_response, + request_new_NRP, + new_NRP + }; + typedef struct message_t { + enum message_type type; + int source; + int destination; + int payload; + } message_t; + #endif // NRF_FD +=} + +reactor Node( + id: int = 0, + heartbeat_period: time = 1 s, + routine_ping_offset: time = 1 ms, // Time after heartbeat to ping NRP. + max_missed_heartbeats: int = 2, + fails_at_time: time = 0, // For testing. 0 for no failure. + ping_timeout: time = 500 ms, // Time until ping is deemed to have failed. + // Time until new NRP request is deemed to have failed. + nrp_timeout: time = 500 ms) { + // There are two network interfaces: + @side("east") + input[2] in: message_t + output[2] out: message_t + + timer node_fails(fails_at_time) + + state my_mode: {= enum node_mode =} = {= waiting =} + state heartbeats_missed: int[2] = {0} + state primary: int = 0 // The known primary node. + state ping_pending: bool = false // Ping has been issued and not responded to. + state ping_timeout_pending: bool = false // Ping timeout timer hasn't expired. + state become_primary_on_ping_response: bool = false + state NRP_network: int = {= INITIAL_NRP_NETWORK =} + state NRP_switch_id: int = 0 // 0 means not known. + + logical action ping_timed_out(ping_timeout) + logical action new_NRP_request_timed_out(nrp_timeout) + + timer heartbeat(heartbeat_period, heartbeat_period) + timer ping_NRP_timer(routine_ping_offset, heartbeat_period) + + // This is what the Rebeca code calls the runMe message handler. + reaction(startup) -> out {= + if (self->my_mode == waiting) { + // If I am the initial primary, broadcast a ping on network 1. + // The first switch to get this will respond. + if (self->id == INITIAL_PRIMARY_ID) { + message_t ping_message = {ping_NRP, self->id, 0, 0}; + lf_set(out[INITIAL_NRP_NETWORK], ping_message); + // Instead of scheduling ping_timed_out, we just continue waiting until a ping response arrives. + lf_print(PRINTF_TIME ": ---- Node %d is the initial primary. Waiting for ping response.", lf_time_logical_elapsed(), self->id); + } + } + =} + + reaction(in) -> out, ping_timed_out, new_NRP_request_timed_out {= + switch (self->my_mode) { + case waiting: + // Iterate over input channels. + for (int c = 0; c < in_width; c++) { + if (in[c]->is_present) { + // In this mode, primary is waiting for a ping response and backup for a new NRP. + if (self->id == INITIAL_PRIMARY_ID && in[c]->value.type == ping_NRP_response) { + // Become primary. + self->primary = self->id; + self->my_mode = primary; + + lf_print(PRINTF_TIME ": Initial primary node %d received ping response on network %d. " + "Making switch %d the NRP.", lf_time_logical_elapsed(), self->id, c, in[c]->value.source + ); + self->NRP_network = c; + self->NRP_switch_id = in[c]->value.source; + // Notify the backup of the NRP. Destination 0 here means broadcast. + message_t message = {new_NRP, self->id, 0, in[c]->value.source}; + // Send new NRP message on all networks. + for (int i = 0; i < out_width; i++) lf_set(out[i], message); + } else if (in[c]->value.type == new_NRP) { + if (in[c]->value.payload != self->NRP_switch_id) { + // Message is not redundant (new_NRP sent on both networks). + // Become backup. Source of the message is the primary. + lf_print(PRINTF_TIME ": Waiting node %d received new NRP %d on network %d. " + "Becoming backup.", lf_time_logical_elapsed(), self->id, in[c]->value.payload, + c + ); + self->primary = in[c]->value.source; + self->NRP_switch_id = in[c]->value.payload; + self->NRP_network = c; + self->my_mode = backup; + lf_print(PRINTF_TIME ": ---- Node %d becomes backup.", lf_time_logical_elapsed(), self->id); + } + } + } + } + break; + case primary: + // Iterate over input channels. + for (int c = 0; c < in_width; c++) { + if (in[c]->is_present) { + if (in[c]->value.type == request_new_NRP) { + // Backup is asking for a new NRP. Invalidate current NRP. + self->NRP_switch_id = 0; + + // Switch networks. + if (self->NRP_network == 0) self->NRP_network = 1; + else self->NRP_network = 0; + + lf_print(PRINTF_TIME ": Primary node %d looking for new NRP on network %d.", + lf_time_logical_elapsed(), self->id, self->NRP_network + ); + message_t message = {ping_NRP, self->id, 0, 0}; + lf_set(out[self->NRP_network], message); + self->ping_pending = true; + self->ping_timeout_pending = true; + lf_schedule(ping_timed_out, 0); + } else if (in[c]->value.type == ping_NRP_response) { + lf_print(PRINTF_TIME ": Primary node %d received ping response on network %d. NRP is %d.", + lf_time_logical_elapsed(), self->id, c, in[c]->value.source + ); + self->ping_pending = false; + if (self->NRP_switch_id == 0) { + // This is a new NRP. + self->NRP_switch_id = in[c]->value.source; + self->NRP_network = c; + // Notify the backup of the NRP on the NRP's network. + message_t message = {new_NRP, self->id, 0, self->NRP_switch_id}; + lf_set(out[c], message); + lf_print(PRINTF_TIME ": Primary node %d notifies backup of new NRP %d on network %d.", + lf_time_logical_elapsed(), self->id, self->NRP_switch_id, c + ); + // NOTE: Should the primary get some confirmation from the backup? + } + } + } + } + break; + case backup: + // Iterate over input channels. + for (int c = 0; c < in_width; c++) { + if (in[c]->is_present) { + if (in[c]->value.type == heartbeat) { + lf_print(PRINTF_TIME ": Backup node %d received heartbeat from node %d on network %d.", + lf_time_logical_elapsed(), self->id, in[c]->value.source, c + ); + self->heartbeats_missed[c] = 0; + } else if (in[c]->value.type == ping_NRP_response && in[c]->value.destination == self->id) { + // Got a response from the NRP to a ping we sent. + lf_print(PRINTF_TIME ": Backup node %d received ping response on network %d from NRP on switch %d.", + lf_time_logical_elapsed(), self->id, c, in[c]->value.source + ); + self->NRP_switch_id = in[c]->value.source; + // If there was a timeout on both networks that was not simultaneous, then + // we tried pinging the NRP before becoming primary. + if (self->become_primary_on_ping_response) { + self->my_mode = primary; + lf_print(PRINTF_TIME ": ---- Node %d becomes primary.", lf_time_logical_elapsed(), self->id); + self->become_primary_on_ping_response = false; + } + self->ping_pending = false; + } else if (in[c]->value.type == new_NRP) { + // NOTE: Should ping the new NRP and send confirmation back to primary. + lf_print(PRINTF_TIME ": Backup node %d received new NRP %d on network %d.", + lf_time_logical_elapsed(), self->id, in[c]->value.payload, c + ); + self->NRP_network = c; + self->NRP_switch_id = in[c]->value.payload; + } + } + } + break; + case failed: + break; + } + =} + + reaction(node_fails) {= + if(lf_time_logical_elapsed() > 0LL) { + self->my_mode = failed; + lf_print(PRINTF_TIME ": #### Node %d fails.", lf_time_logical_elapsed(), self->id); + } + =} + + reaction(heartbeat) -> out, ping_timed_out {= + if (self->my_mode == primary) { + lf_print(PRINTF_TIME ": Primary node %d sends heartbeat on both networks.", + lf_time_logical_elapsed(), self->id + ); + message_t message = {heartbeat, self->id, 0, 0}; + for (int i = 0; i < out_width; i++) lf_set(out[i], message); + } else if (self->my_mode == backup) { + if (self->heartbeats_missed[0] > self->max_missed_heartbeats + && self->heartbeats_missed[1] > self->max_missed_heartbeats) { + // Simultaneous heartbeat misses. + // In the paper, this is tmoAllNotSimul. + // For the tmoAllSimul optimization in the paper, we assume that if + // self->heartbeats_missed[0] == self->heartbeats_missed[1], then most likely, it is + // the primary that failed, and not the network, so can immediately become the primary. + // Otherwise, it is possible that one network failed, and then the other failed, in which + // case, we may have a partitioned network. + lf_print(PRINTF_TIME ": **** Backup node %d detects missing heartbeats on both networks.", + lf_time_logical_elapsed(), self->id + ); + if (self->heartbeats_missed[0] == self->heartbeats_missed[1]) { + lf_print(PRINTF_TIME ": **** Missing heartbeats on both networks were simultaneous. " + "Assume the primary failed.", + lf_time_logical_elapsed() + ); + self->my_mode = primary; + lf_print(PRINTF_TIME ": ---- Node %d becomes primary.", lf_time_logical_elapsed(), self->id); + } else if (self->NRP_switch_id != 0) { + // Ping the NRP because if we can't access it, we are on the wrong side of + // a network partition and could end up with two primaries. + message_t message = {ping_NRP, self->id, self->NRP_switch_id, 0}; + lf_set(out[self->NRP_network], message); + // Wait for a response before becoming primary. + self->become_primary_on_ping_response = true; + self->ping_pending = true; + self->ping_timeout_pending = true; + lf_schedule(ping_timed_out, 0); + } else { + lf_print_warning(PRINTF_TIME "**** Do not know which switch is the NRP! Cannot become primary.", + lf_time_logical_elapsed() + ); + } + self->heartbeats_missed[0] = 0; // Prevent detecting again immediately. + self->heartbeats_missed[1] = 0; + } else if (self->heartbeats_missed[0] > self->max_missed_heartbeats + || self->heartbeats_missed[1] > self->max_missed_heartbeats) { + // Heartbeat missed on one network but not yet on the other. + // Ping the NRP to make sure we retain access to it so that we can be an effective backup. + // This corresponds to tmoSomeNotAll in the paper. + lf_print(PRINTF_TIME ": **** Backup node %d detects missing heartbeats on one network.", + lf_time_logical_elapsed(), self->id + ); + // Ping the NRP. + message_t message = {ping_NRP, self->id, self->NRP_switch_id, 0}; + if (!self->ping_pending && !self->ping_timeout_pending && self->NRP_switch_id != 0) { + lf_set(out[self->NRP_network], message); + lf_print(PRINTF_TIME ": Backup node %d pings NRP on network %d, switch %d", + lf_time_logical_elapsed(), self->id, self->NRP_network, self->NRP_switch_id + ); + self->ping_pending = true; + self->ping_timeout_pending = true; + lf_schedule(ping_timed_out, 0); + } + } + // Increment the counters so if they are not reset to 0 by the next time, + // we detect the missed heartbeat. + self->heartbeats_missed[0]++; + self->heartbeats_missed[1]++; + } + =} + + reaction(ping_NRP_timer) -> out, ping_timed_out {= + if (self->my_mode == primary) { + // Ping the NRP if there is one and there isn't a ping timeout pending. + if (self->NRP_switch_id != 0 && !self->ping_timeout_pending) { + lf_print(PRINTF_TIME ": Primary node %d pings NRP %d (routine).", + lf_time_logical_elapsed(), self->id, self->NRP_switch_id + ); + message_t ping = {ping_NRP, self->id, self->NRP_switch_id, 0}; + lf_set(out[self->NRP_network], ping); + self->ping_pending = true; + self->ping_timeout_pending = true; + lf_schedule(ping_timed_out, 0); + } + } + =} + + reaction(ping_timed_out) -> out, ping_timed_out, new_NRP_request_timed_out {= + if (self->my_mode == primary) { + self->ping_timeout_pending = false; + if (self->ping_pending) { + // Ping timed out. + self->ping_pending = false; + lf_print(PRINTF_TIME ": Primary node %d gets no response from ping.", + lf_time_logical_elapsed(), self->id + ); + if (self->NRP_switch_id == 0) { + // Failed to get a new NRP. Declare failure. + self->my_mode = failed; + lf_print(PRINTF_TIME ": #### Node %d failed to get new NRP. Failing.", lf_time_logical_elapsed(), self->id); + } else { + // Invalidate current NRP. + self->NRP_switch_id = 0; + + // Switch networks. + if (self->NRP_network == 0) self->NRP_network = 1; + else self->NRP_network = 0; + + lf_print(PRINTF_TIME ": Primary node %d looking for new NRP on network %d.", + lf_time_logical_elapsed(), self->id, self->NRP_network + ); + message_t message = {ping_NRP, self->id, 0, 0}; + lf_set(out[self->NRP_network], message); + self->ping_pending = true; + self->ping_timeout_pending = true; + lf_schedule(ping_timed_out, 0); + } + } + } else if (self->my_mode == backup) { + self->ping_timeout_pending = false; + if (self->ping_pending) { + // Ping timed out. + lf_print(PRINTF_TIME ": Backup node %d gets no response from ping.", lf_time_logical_elapsed(), self->id); + if (self->NRP_switch_id != 0) { + // Send request for new NRP on the other network. + lf_print(PRINTF_TIME ": Backup node %d requests new NRP.", lf_time_logical_elapsed(), self->id); + + // Invalidate current NRP. + self->NRP_switch_id = 0; + + // Switch networks. + if (self->NRP_network == 0) self->NRP_network = 1; + else self->NRP_network = 0; + + message_t message = {request_new_NRP, self->id, self->primary, 0}; + lf_set(out[self->NRP_network], message); + + lf_schedule(new_NRP_request_timed_out, 0); + } else { + // Failed to connect to new NRP. + self->my_mode = failed; + lf_print(PRINTF_TIME ": #### Node %d failed to connect to new NRP.", lf_time_logical_elapsed(), self->id); + } + self->ping_pending = false; + } + } + =} + + reaction(new_NRP_request_timed_out) {= + if (self->my_mode == backup && self->NRP_switch_id == 0) { + lf_print(PRINTF_TIME ": Backup node %d new NRP request timed out. Will not function as backup.", + lf_time_logical_elapsed(), self->id + ); + if (self->become_primary_on_ping_response) { + lf_print(PRINTF_TIME ": Network is likely partitioned. Remaining as (non-functional) backup.", + lf_time_logical_elapsed() + ); + self->become_primary_on_ping_response = false; + } + } + =} +} + +/** + * Switch with two interfaces. When a ping_NRP message arrives on either interface, if the + * destination matches the ID of this switch or the destination is 0, then the switch responds on + * the same interface with a ping_NRP_response message. When any other message arrives on either + * interface, the switch forwards a copy of the message to the other interface. If any two messages + * would be simultaneous on an output, one will be sent one microstep later. + */ +reactor Switch( + id: int = 0, + // For testing. 0 for no failure. + fails_at_time: time = 0) { + input in1: message_t + @side("east") + input in2: message_t + @side("west") + output out1: message_t + output out2: message_t + + logical action pending_out1: message_t + logical action pending_out2: message_t + + state failed: bool = false + + timer switch_fails(fails_at_time) + + reaction(switch_fails) {= + if(lf_time_logical_elapsed() > 0LL) { + self->failed = true; + lf_print(PRINTF_TIME ": ==== Switch %d fails.", lf_time_logical_elapsed(), self->id); + } + =} + + reaction(pending_out1) -> out1 {= + lf_set(out1, pending_out1->value); + =} + + reaction(pending_out2) -> out2 {= + lf_set(out2, pending_out2->value); + =} + + reaction(in1, in2) -> out1, out2, pending_out1, pending_out2 {= + if (in1->is_present) { + if (in1->value.type == ping_NRP) { + if (in1->value.destination == self->id || in1->value.destination == 0) { + lf_print(PRINTF_TIME ": ==== Switch %d pinged by node %d. Responding.", lf_time_logical_elapsed(), self->id, in1->value.source); + // Respond to the ping. + message_t message = {ping_NRP_response, self->id, in1->value.source}; + if (!out1->is_present) { + lf_set(out1, message); + } else { + lf_schedule_copy(pending_out1, 0, &message, 1); + } + } else { + // Forward the ping. + if (!out2->is_present) { + lf_set(out2, in1->value); + } else { + lf_schedule_copy(pending_out2, 0, &in1->value, 1); + } + } + } else { + // Forward the message. + if (!out2->is_present) { + lf_set(out2, in1->value); + } else { + lf_schedule_copy(pending_out2, 0, &in1->value, 1); + } + } + } + if (in2->is_present) { + if (in2->value.type == ping_NRP) { + if (in2->value.destination == self->id) { + lf_print(PRINTF_TIME ": ==== Switch %d pinged by node %d. Responding.", lf_time_logical_elapsed(), self->id, in2->value.source); + // Construct a response to the ping. + message_t message = {ping_NRP_response, self->id, in2->value.source}; + // Respond to the ping if out2 is available. + if (!out2->is_present) { + lf_set(out2, message); + } else { + lf_schedule_copy(pending_out2, 0, &message, 1); + } + } else { + // Forward the ping to out1 if out1 is available. + if (!out1->is_present) { + lf_set(out1, in2->value); + } else { + lf_schedule_copy(pending_out1, 0, &in2->value, 1); + } + } + } else { + // Forward the message if out1 is available. + if (!out1->is_present) { + lf_set(out1, in2->value); + } else { + lf_schedule_copy(pending_out1, 0, &in2->value, 1); + } + } + } + =} +} + +federated reactor(heartbeat_period: time = 1 s, delay: time = 1 ms) { + node1 = new Node(heartbeat_period=heartbeat_period, id=1, fails_at_time = 10 s) + switch1 = new Switch(id=1, fails_at_time = 3 s) + switch3 = new Switch(id=3) + + node2 = new Node(heartbeat_period=heartbeat_period, id=2, fails_at_time = 15 s) + switch2 = new Switch(id=2) + switch4 = new Switch(id=4) + + node1.out -> switch1.in1, switch3.in1 after delay + switch1.out1, switch3.out1 -> node1.in after delay + + switch1.out2 -> switch2.in2 after delay + switch2.out2 -> switch1.in2 after delay + + switch2.out1, switch4.out1 -> node2.in after delay + node2.out -> switch2.in1, switch4.in1 after delay + + switch3.out2 -> switch4.in2 after delay + switch4.out2 -> switch3.in2 after delay +} diff --git a/examples/C/src/leader-election/README.md b/examples/C/src/leader-election/README.md index d2cce2c9..9f3b919f 100644 --- a/examples/C/src/leader-election/README.md +++ b/examples/C/src/leader-election/README.md @@ -33,4 +33,12 @@ To run these programs, you are required to first [install the RTI](https://www.l NRP_FD_Partitioning NRP_FD_Partitioning.lf : This version partitions the network and shows that the protocol prevents the backup from becoming primary, thereby preventing two primaries. + + NRP_FD_NobodyFails + NRP_FD_NobodyFails.lf : This version has no failures at all. Use it to manually kill processes. + + + NRP_FD_notModal + NRP_FD_notModal.lf : This version does not use modal models to better align with the Rebeca version on which it is based. + diff --git a/examples/C/src/leader-election/Rebeca/src/NRP_FD.property b/examples/C/src/leader-election/Rebeca/src/NRP_FD.property new file mode 100644 index 00000000..e41ee16d --- /dev/null +++ b/examples/C/src/leader-election/Rebeca/src/NRP_FD.property @@ -0,0 +1,45 @@ +property { + define { + node1Waiting = (node1.mode ==0); + node1Primary = (node1.mode ==1); + node1Backup = (node1.mode ==2); + node1Failed = (node1.mode ==3); + + node2Waiting = (node2.mode ==0); + node2Primary = (node2.mode ==1); + node2Backup = (node2.mode ==2); + node2Failed = (node2.mode ==3); + + switch1Failed = (switch1.failed); + switch2Failed = (switch2.failed); + switch3Failed = (switch3.failed); + switch4Failed = (switch4.failed); + +// node1NRP1 = (node1.NRP_switch_id == 1); +// node1NRP2 = (node1.NRP_switch_id == 2); +// node2NRP1 = (node2.NRP_switch_id == 1); +// node2NRP2 = (node2.NRP_switch_id == 2); + switch1NRP = (node1.NRP_switch_id==1 && node2.NRP_switch_id==1); + switch3NRP = (node1.NRP_switch_id==3 && node2.NRP_switch_id==3); + switch2NRP = (node2.NRP_switch_id==2 && node1.NRP_switch_id==2); + switch4NRP = (node2.NRP_switch_id==4 && node1.NRP_switch_id==4); + + + + net1miss1 = (node2.heartbeats_missed_1 == 1); + net1miss2 = (node2.heartbeats_missed_1 == 2); + net1miss3 = (node2.heartbeats_missed_1 == 3); + net1miss4 = (node2.heartbeats_missed_1 == 4); + net1miss5 = (node2.heartbeats_missed_1 == 5); + + net2miss1 = (node2.heartbeats_missed_2 == 1); + net2miss2 = (node2.heartbeats_missed_2 == 2); + net2miss3 = (node2.heartbeats_missed_2 == 3); + net2miss4 = (node2.heartbeats_missed_2 == 4); + net2miss5 = (node2.heartbeats_missed_2 == 5); + } +Assertion{ + Assertion0:!(node1Primary && node2Primary); + Assertion1:!(node1.primary!=-1 && node2.primary!=-1 && node1.primary!=node2.primary); +} +} diff --git a/examples/C/src/leader-election/Rebeca/src/NRP_FD.rebeca b/examples/C/src/leader-election/Rebeca/src/NRP_FD.rebeca new file mode 100644 index 00000000..5d6bee99 --- /dev/null +++ b/examples/C/src/leader-election/Rebeca/src/NRP_FD.rebeca @@ -0,0 +1,514 @@ + +// +// This program models a redundant fault tolerant system where a primary node, if and when it fails, +// is replaced by a backup node. The protocol is described in this paper: +// +// Bjarne Johansson; Mats RĂ„gberger; Alessandro V. Papadopoulos; Thomas Nolte, "Consistency Before +// Availability: Network Reference Point based Failure Detection for Controller Redundancy," paper +// draft 8/15/23. +// +// The key idea in this protocol is that when a backup fails to detect the heartbeats of a primary +// node, it becomes primary only if it has access to Network Reference Point (NRP), which is a point +// in the network. +// +// In the paper, when a backup node detects that it can no longer reach the NRP, then the node +// returns to the WAITING state and requires manual restart. In this version, it just remains a +// non-functional backup. Manual restart will still be required to get things working again. +// +// The Primary sends heartbeats on two networks, +// if the Backup receives the heartbeats from both networks then all is fine. +// If it receives the heartbeat only from one network the Backup pings the NRP, if NRP replies all is fine, +// if not +// If Backup misses heartbeats on both networks then it assumes that the Primary failed and pings NRP, +// if NRP replies, Backup becomes the Primary +// if not ... +// +// The Rebeca code is adopted from the LF code by Edward Lee and Marjan Sirjani + +env int INITIAL_PRIMARY_ID = 10; // ID of the initial primary. +env int INITIAL_NRP_NETWORK = 1; // Which network on which to appoint the initial NRP. + +env int heartbeat_period = 1000; +env int max_missed_heartbeats = 2; +env int ping_timeout =500; +env int nrp_timeout = 500; +// Node Modes +env byte WAITING = 0; +env byte PRIMARY = 1; +env byte BACKUP = 2; +env byte FAILED = 3; + +env byte NumberOfNetworks = 2; + +// Message types +env byte HEARTBEAT = 0; +env byte PING_NRP = 1; +env byte PING_NRP_RESPONSE = 2; +env byte REQUEST_NEW_NRP = 3; +env byte NEW_NRP = 4; + +// For testing, set a time for nodes or switches to fail. +// A value 0 means no failure, whereas -1 means nondeterminstic failure. +env int switch1failtime = 0; +env int switch2failtime = 2500; +env int switch3failtime = 0; +env int switch4failtime = 0; + +env int node10failtime = 0; +env int node20failtime = 0; + + +env int network_delay = 1; + +reactiveclass Node (4) { + + knownrebecs { + Switch net1, net2; + } + + statevars { + byte mode; + int id; + int heartbeats_missed_1; + int heartbeats_missed_2; + int NRP_network; // Will be 1 or 2 for net1 or net2 network. + int NRP_switch_id; + boolean become_primary_on_ping_response; + int primary; + boolean ping_pending; + boolean ping_timeout_pending; + boolean nondeterministic_fail; + } + + Node (int my_id, int fail_time) { + id = my_id; + mode = WAITING; + + // NRP setup. + // Manual configuration here is that the initial NRP network is 1. + NRP_network = INITIAL_NRP_NETWORK; + // Unknown switch for NRP. + NRP_switch_id = 0; // Means unknown. + ping_pending = false; + ping_timeout_pending = false; + + become_primary_on_ping_response = false; + primary = 0; // Initially unknown. + + // Configure failure. + if (fail_time > 0) { + fail() after(fail_time); + nondeterministic_fail = false; + } else if (fail_time < 0) { + nondeterministic_fail = true; + } else { + nondeterministic_fail = false; + } + + heartbeats_missed_1 = 0; + heartbeats_missed_2 = 0; + + startup(); + } + + void startup() { + // If I am the initial primary, broadcast a ping on network 1. + // The first switch to get this will respond. + if (id == INITIAL_PRIMARY_ID) { + primary = id; + net1.message(PING_NRP, id, 0, 0) after(network_delay); + ping_pending = true; + // Instead of scheduling ping_timed_out, we just continue waiting until a ping response arrives. + } + } + + void fail() { + mode = FAILED; + } + + void send_message(byte message_type, int source, int destination, int payload) { + net1.message(message_type, source, destination, payload); + net2.message(message_type, source, destination, payload); + } + + void sendHeartbeat() { + net1.heartbeat(0, id) after(network_delay); + net2.heartbeat(1, id) after(network_delay); + } + + msgsrv message(byte message_type, int source, int destination, int payload) { + NRP_switch_id = switch_id; + switch(mode) { + case 0: // WAITING: + // If this is to be the primary, then expecting a ping response. + // Otherwise, expecting a new NRP message. + // Ignore anything else. + if (id == primary && message_type == PING_NRP_RESPONSE) { + mode = PRIMARY; + NRP_switch_id = source; + // Destination 0 here means broadcast: + send_message(NEW_NRP, id, 0, source); + sendHeartbeat(); + } else if (message_type == NEW_NRP) { + mode = BACKUP; + primary = source; + checkForHeartbeat() after(heartbeat_period); + } + break; + case 1: // PRIMARY: + if (message_type == PING_NRP_RESPONSE) { + ping_pending = false; + if (source != NRP_switch_id) { + // Have a new NRP. + NRP_switch_id = source; + // Destination 0 here means broadcast: + send_message(NEW_NRP, id, 0, source); + } + } else if (message_type == REQUEST_NEW_NRP) { + NRP_switch_id = 0; // Pending NRP. + if (NRP_network == 1) { + // Try the other network. + NRP_network = 2; + net2.message(PING_NRP, id, 0, 0); + } else { + NRP_network = 1; + net1.message(PING_NRP, id, 0, 0); + } + ping_pending = true; + break; + case 2: // BACKUP: + if (become_primary_on_ping_response) { + mode == PRIMARY; + become_primary_on_ping_response = false; + sendHeartbeat(); + } + ping_pending = false; + break; + case 3: // FAILED: + // Do nothing. + break; + } + } + + @Priority(5) msgsrv heartbeat(byte networkId, int senderid) { + // Only the backup cares about heartbeats. + if (mode == BACKUP) { + if (networkId == 1) { + heartbeats_missed_1 = 0; + } else { + heartbeats_missed_2 = 0; + } + } + } + + @Priority(6) msgsrv checkForHeartbeat() { + // Only the backup checks for heartbeats. + if (mode == BACKUP) { + if (heartbeats_missed_1 > max_missed_heartbeats + && heartbeats_missed_2 > max_missed_heartbeats) { + + // Simultaneous heartbeat misses. + // In the paper, this is tmoAllNotSimul. + // For the tmoAllSimul optimization in the paper, we assume that if + // self->heartbeats_missed_1 == self->heartbeats_missed_2, then most likely, it is + // the primary that failed, and not the network, so can immediately become the primary. + // Otherwise, it is possible that one network failed, and then the other failed, in which + // case, we may have a partitioned network. + if (heartbeats_missed_1 == heartbeats_missed_2) { + // Assume the primary failed. Become primary immediately. + mode = PRIMARY; + primary=id; + sendHeartbeat(); + } else { + // Ping the NRP because if we can't access it, we are on the wrong side of + // a network partition and could end up with two primaries. + if (NRP_network == 1) { + net1.pingNRP(id, NRP_switch_id) after(network_delay); + } else { + net2.pingNRP(id, NRP_switch_id) after(network_delay); + } + ping_pending = true; + become_primary_on_ping_response = true; + pingTimedOut() after(ping_timeout); + // Prevent detecting again immediately. + heartbeats_missed_1 = 0; + heartbeats_missed_2 = 0; + } + } else if (heartbeats_missed_1 > max_missed_heartbeats + || heartbeats_missed_2 > max_missed_heartbeats) { + // Heartbeat missed on one network but not yet on the other. + // Ping the NRP to make sure we retain access to it so that we can be an effective backup. + // This corresponds to tmoSomeNotAll in the paper. + if (!ping_pending) { + if (NRP_network == 1) { + net1.pingNRP(id, NRP_switch_id) after(network_delay); + ping_pending = true; + pingTimedOut() after(ping_timeout); + } else if (NRP_network == 2) { + net2.pingNRP(id, NRP_switch_id) after(network_delay); + ping_pending = true; + pingTimedOut() after(ping_timeout); + } + } + } + // Increment the counters so if they are not reset to 0 by the next time, + // we detect the missed heartbeat. + heartbeats_missed_1++; + heartbeats_missed_2++; + // Schedule the next check. + checkForHeartbeat() after(heartbeat_period); + } + } + + @Priority(7) msgsrv pingTimedOut() { + ping_timeout_pending = false; + switch(mode) { + case 0: // WAITING: + // Ignore timeout. Just keep waiting. + break; + case 1: // PRIMARY: + if (NRP_switch_id == 0) { + // Failed to get a new NRP. Declare failure. + fail(); + } else { + // Invalidate current NRP. + NRP_switch_id = 0; + + // Switch networks. + if (self->NRP_network == 0) self->NRP_network = 1; + else self->NRP_network = 0; + net1.new_NRP(id, NRP_network, NRP_switch_id) after(network_delay); + + message_t message = {ping_NRP, self->id, 0, 0}; + lf_set(out[self->NRP_network], message); + self->ping_pending = true; + self->ping_timeout_pending = true; + lf_schedule(ping_timed_out, 0); + + break; + case 2: // BACKUP: + if (ping_pending) { + ping_pending = false; + if (NRP_switch_id != 0) { + // Invalidate current NRP. + NRP_switch_id = 0; + // Send request for new NRP on the other network. + if (NRP_network == 1) { + net2.new_NRP(id, NRP_network, NRP_switch_id) after(network_delay); + NRP_network = 2; + } else { + net1.new_NRP(id, NRP_network, NRP_switch_id) after(network_delay); + NRP_network = 1; + } + } + } + break; + case 3: // FAILED: + // Do nothing. + break; + } + } + + // logical action new_NRP_request_timed_out(nrp_timeout) + msgsrv new_NRP_request_timed_out() { + if (mode == BACKUP) { + if (NRP_pending) { + NRP_pending = false; + if (become_primary_on_ping_response) { + become_primary_on_ping_response = false; + } + } + } + } + + msgsrv request_new_NRP(int iddd) { + if (mode == PRIMARY) { + NRP_network++; // FIXME + NRP_switch_id = NRPCandidates[NRP_network]; // FIXME + net1.new_NRP(id, NRP_network, NRP_switch_id) after(network_delay); + net2.new_NRP(id, NRP_network, NRP_switch_id) after(network_delay); + NRP_pending = true; + } + } + + msgsrv new_NRP(int mid, int mNRP_network, int mNRP_switch_id) { + NRP_network = mNRP_network; + NRP_switch_id = mNRP_switch_id; + } + + msgsrv runMe() { + if (nondeterministic_fail && ?(true,false)) fail(); + switch(mode) { + case 0: // WAITING : + if (init) { + if (id == primary) { + mode = PRIMARY; + NRP_network++; + if (NRP_network < NumberOfNetworks) { + NRP_switch_id = NRPCandidates[NRP_network]; // FIXME + if (NRP_network == 1) { + net1.new_NRP(id, NRP_switch_id); + } else { + net2.new_NRP(id, NRP_switch_id); + } + } else { + NRP_network=NumberOfNetworks; + } + } else { + mode =BACKUP; + } + init=false; + } + break; + + case 1: // PRIMARY : +// FIXME: Why are these commented out? +// net1.heartbeat(0, id) after(network_delay); +// net2.heartbeat(1, id) after(network_delay); + if (NRP_network == 1) { + ping_pending = true; + net1.pingNRP(id, NRP_switch_id) after(5); + pingTimedOut() after(ping_timeout); + } else { + ping_pending = true; + net2.pingNRP(id, NRP_switch_id) after(5); + pingTimedOut() after(ping_timeout); + } + NRP_pending = true; + + break; + case 2: // BACKUP : + heartbeats_missed_1++; + heartbeats_missed_2++; + + if (heartbeats_missed_1 > max_missed_heartbeats + && heartbeats_missed_2 > max_missed_heartbeats) { + + if (heartbeats_missed_1==heartbeats_missed_2 && heartbeats_missed_2==max_missed_heartbeats+1) { + mode = WAITING; + NRP_network = 1; + + heartbeats_missed_1 = 0; // Prevent detecting again immediately. + heartbeats_missed_2 = 0; + primary=id; + NRP_pending = true; + } else { + heartbeats_missed_1 = (heartbeats_missed_1>max_missed_heartbeats+2)?max_missed_heartbeats+2:heartbeats_missed_1; + heartbeats_missed_2 = (heartbeats_missed_2>max_missed_heartbeats+2)?max_missed_heartbeats+2:heartbeats_missed_2; + if (NRP_network == 1) { + ping_pending = true; + NRP_network = -1; + net1.pingNRP(id, NRP_switch_id) after(5); + pingTimedOut() after(ping_timeout); + } else { + ping_pending = true; + NRP_network=-1; + net2.pingNRP(id, NRP_switch_id) after(5); + pingTimedOut() after(ping_timeout); + } + NRP_pending = true; + } + } else if (heartbeats_missed_1 > max_missed_heartbeats + || heartbeats_missed_2 > max_missed_heartbeats) { + + if (NRP_network==1 && heartbeats_missed_1 > max_missed_heartbeats) { + ping_pending = true; + net1.pingNRP(id, NRP_switch_id) after(5); + pingTimedOut() after(ping_timeout); + } else if (heartbeats_missed_2 > max_missed_heartbeats) { + ping_pending = true; + net2.pingNRP(id, NRP_switch_id) after(5); + pingTimedOut() after(ping_timeout); + } + heartbeats_missed_1 = (heartbeats_missed_1>max_missed_heartbeats+2)?max_missed_heartbeats+2:heartbeats_missed_1; + heartbeats_missed_2 = (heartbeats_missed_2>max_missed_heartbeats+2)?max_missed_heartbeats+2:heartbeats_missed_2; + } + break; + + case 3: // FAILED : + break; + + } + + self.runMe() after(heartbeat_period); + } + + +} + +/** + * A Switch has a connected Node and a connected Switch. + * Note that this will only work for a very specific network topology. + */ +reactiveclass Switch(10) { + knownrebecs { + Node connected_node; + Switch connected_switch; + } + + statevars { + int id; + boolean failed; + boolean nondeterministic_fail; + } + + Switch (int my_id, int fail_time) { + id = my_id; + failed = false; + if (fail_time > 0) { + switchFail() after(fail_time); + nondeterministic_fail = false; + } else if (fail_time < 0) { + nondeterministic_fail = true; + } else { + nondeterministic_fail = false; + } + } + + void fail() { + failed = true; + } + + msgsrv message(byte message_type, int source, int destination) { + if (nondeterministic_fail && ?(true,false)) fail(); + if (!failed) { + if (message_type == PING_NRP) { + if (destination == id || destination == 0) { + // I am being pinged or it's a general ping. + // Respond to the switch or node that is sending. + if (source <= 9) { + // Source is a switch (shouldn't happen). + connected_switch.message(PING_NRP_RESPONSE, id, source) after(network_delay); + } else { + // Source is a node. + connected_node.message(PING_NRP_RESPONSE, id, source) after(network_delay); + } + } else { + // I am not the NRP and it's not a general ping. + // Forward to the next switch. + connected_switch.message(message_type, source, destination) after(network_delay); + } + } else { + // Forward the message. + if (source <= 9) { + // Source is a switch. Send to the connected node. + connected_node.message(message_type, source, destination); + } else { + // Source is a node. Send to the connected switch. + connected_switch.message(message_type, source, destination); + } + } + } + } +} + +main { + // Switches have IDs less than 10. + @Priority(1) Switch switch1(node10, switch2):(1, switch1failtime); + @Priority(1) Switch switch2(node20, switch1):(2, switch2failtime); + @Priority(1) Switch switch3(node10, switch4):(3, switch3failtime); + @Priority(1) Switch switch4(node20, switch3):(4, switch4failtime); + // Nodes have IDs larger than 10. + @Priority(2) Node node10(switch1, switch3):(10, node10failtime); + @Priority(2) Node node20(switch2, switch4):(20, node20failtime); +} diff --git a/examples/C/src/leader-election/img/NRP_FD_NobodyFails.png b/examples/C/src/leader-election/img/NRP_FD_NobodyFails.png new file mode 100644 index 00000000..72c3f751 Binary files /dev/null and b/examples/C/src/leader-election/img/NRP_FD_NobodyFails.png differ diff --git a/examples/C/src/leader-election/img/NRP_FD_notModal.png b/examples/C/src/leader-election/img/NRP_FD_notModal.png new file mode 100644 index 00000000..b6d015df Binary files /dev/null and b/examples/C/src/leader-election/img/NRP_FD_notModal.png differ