sigp · ackintosh · Jun 7, 2024 · Jun 9, 2024 · Jun 10, 2024 · Jun 10, 2024
diff --git a/beacon_node/lighthouse_network/src/rpc/active_requests_limiter.rs b/beacon_node/lighthouse_network/src/rpc/active_requests_limiter.rs
@@ -0,0 +1,121 @@
+use crate::rpc::{Protocol, SubstreamId};
+use libp2p::swarm::ConnectionId;
+use libp2p::PeerId;
+use std::collections::hash_map::Entry;
+use std::collections::HashMap;
+
+/// Restricts more than two inbound requests from running simultaneously on the same protocol per peer.
+pub(super) struct ActiveRequestsLimiter {
+    requests: HashMap<PeerId, Vec<(Protocol, ConnectionId, SubstreamId)>>,
+}
+
+impl ActiveRequestsLimiter {
+    pub(super) fn new() -> Self {
+        Self {
+            requests: HashMap::new(),
+        }
+    }
+
+    /// Allows if there is not a request on the same protocol.
+    pub(super) fn allows(
+        &mut self,
+        peer_id: PeerId,
+        protocol: Protocol,
+        connection_id: &ConnectionId,
+        substream_id: &SubstreamId,
+    ) -> bool {
+        match self.requests.entry(peer_id) {
+            Entry::Occupied(mut entry) => {
+                for (p, _cid, _sid) in entry.get_mut().iter_mut() {
+                    // Check if there is a request on the same protocol.
+                    if p == &protocol {
+                        return false;
+                    }
+                }
+
+                // Request on the same protocol was not found.
+                entry
+                    .get_mut()
+                    .push((protocol, *connection_id, *substream_id));
+                true
+            }
+            Entry::Vacant(entry) => {
+                // No active requests for the peer.
+                entry.insert(vec![(protocol, *connection_id, *substream_id)]);
+                true
+            }
+        }
+    }
+
+    /// Removes the request with the given SubstreamId.
+    pub(super) fn remove_request(
+        &mut self,
+        peer_id: PeerId,
+        connection_id: &ConnectionId,
+        substream_id: &SubstreamId,
+    ) {
+        if let Some(requests) = self.requests.get_mut(&peer_id) {
+            requests.retain(|(_protocol, cid, sid)| cid != connection_id && sid != substream_id);
+        }
+    }
+
+    /// Removes the requests with the given PeerId.
+    pub(super) fn remove_peer(&mut self, peer_id: &PeerId) {
+        self.requests.remove(peer_id);
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_limiter() {
+        let mut limiter = ActiveRequestsLimiter::new();
+        let peer_id = PeerId::random();
+        let connection_id = ConnectionId::new_unchecked(1);
+        let substream_id = SubstreamId::new(1);
+
+        assert!(limiter.allows(peer_id, Protocol::Status, &connection_id, &substream_id));
+        // Not allowed since a request for the same protocol is in progress.
+        assert!(!limiter.allows(peer_id, Protocol::Status, &connection_id, &substream_id));
+        // Allowed since there is no BlocksByRange request in the active requests.
+        assert!(limiter.allows(
+            peer_id,
+            Protocol::BlocksByRange,
+            &connection_id,
+            &SubstreamId::new(2)
+        ));
+        // Allowed since there is no request from the peer in the active requests.
+        assert!(limiter.allows(
+            PeerId::random(),
+            Protocol::Status,
+            &connection_id,
+            &substream_id
+        ));
+
+        // Remove the Status request.
+        limiter.remove_request(peer_id, &connection_id, &substream_id);
+        assert!(limiter.allows(
+            peer_id,
+            Protocol::Status,
+            &connection_id,
+            &SubstreamId::new(3)
+        ));
+
+        // Remove the peer.
+        limiter.remove_peer(&peer_id);
+        assert!(limiter.allows(
+            peer_id,
+            Protocol::Status,
+            &connection_id,
+            &SubstreamId::new(4)
+        ));
+        assert!(limiter.allows(
+            peer_id,
+            Protocol::BlocksByRange,
+            &connection_id,
+            &SubstreamId::new(5)
+        ));
+    }
+}
diff --git a/beacon_node/lighthouse_network/src/rpc/handler.rs b/beacon_node/lighthouse_network/src/rpc/handler.rs
@@ -7,6 +7,7 @@ use super::protocol::{InboundOutput, InboundRequest, Protocol, RPCError, RPCProt
 use super::{RPCReceived, RPCSend, ReqId};
 use crate::rpc::outbound::{OutboundFramed, OutboundRequest};
 use crate::rpc::protocol::InboundFramed;
+use crate::rpc::rate_limiter::{RPCRateLimiter, RateLimitedErr};
 use fnv::FnvHashMap;
 use futures::prelude::*;
 use futures::SinkExt;
@@ -15,6 +16,8 @@ use libp2p::swarm::handler::{
     FullyNegotiatedInbound, FullyNegotiatedOutbound, StreamUpgradeError, SubstreamProtocol,
 };
 use libp2p::swarm::Stream;
+use libp2p::PeerId;
+use parking_lot::Mutex;
 use slog::{crit, debug, trace};
 use smallvec::SmallVec;
 use std::{
@@ -137,8 +140,18 @@ where
     /// Logger for handling RPC streams
     log: slog::Logger,
 
-    /// Timeout that will me used for inbound and outbound responses.
+    /// Timeout that will be used for inbound and outbound responses.
     resp_timeout: Duration,
+
+    /// Rate limiter for our responses and the PeerId that this handler interacts with.
+    /// The PeerId is necessary since the rate limiter manages rate limiting per peer.
+    response_limiter: Option<(PeerId, Arc<Mutex<RPCRateLimiter>>)>,
+
+    /// Responses queued for sending. These responses are stored when the response limiter rejects them.
+    delayed_responses: FnvHashMap<Protocol, VecDeque<QueuedResponse<E>>>,
+
+    /// The delay required to allow for sending a response per protocol.
+    next_response: DelayQueue<Protocol>,
 }
 
 enum HandlerState {
@@ -213,6 +226,12 @@ pub enum OutboundSubstreamState<E: EthSpec> {
     Poisoned,
 }
 
+struct QueuedResponse<E: EthSpec> {
+    response: RPCCodedResponse<E>,
+    protocol: Protocol,
+    inbound_id: SubstreamId,
+}
+
 impl<Id, E> RPCHandler<Id, E>
 where
     E: EthSpec,
@@ -222,6 +241,7 @@ where
         fork_context: Arc<ForkContext>,
         log: &slog::Logger,
         resp_timeout: Duration,
+        response_limiter: Option<(PeerId, Arc<Mutex<RPCRateLimiter>>)>,
     ) -> Self {
         RPCHandler {
             listen_protocol,
@@ -241,6 +261,9 @@ where
             waker: None,
             log: log.clone(),
             resp_timeout,
+            response_limiter,
+            delayed_responses: FnvHashMap::default(),
+            next_response: DelayQueue::default(),
         }
     }
 
@@ -288,6 +311,36 @@ where
         }
     }
 
+    /// Checks if the response limiter allows the response. If the response should be delayed, the
+    /// duration to wait is returned.
+    fn try_response_limiter(
+        limiter: &mut Arc<Mutex<RPCRateLimiter>>,
+        peer_id: &PeerId,
+        protocol: Protocol,
+        response: RPCCodedResponse<E>,
+        log: &slog::Logger,
+    ) -> Result<(), Duration> {
+        match limiter.lock().allows(peer_id, &(response, protocol)) {
+            Ok(()) => Ok(()),
+            Err(e) => match e {
+                RateLimitedErr::TooLarge => {
+                    // This should never happen with default parameters. Let's just send the response.
+                    // Log a crit since this is a config issue.
+                    crit!(
+                       log,
+                        "Response rate limiting error for a batch that will never fit. Sending response anyway. Check configuration parameters.";
+                        "protocol" => %protocol
+                    );
+                    Ok(())
+                }
+                RateLimitedErr::TooSoon(wait_time) => {
+                    debug!(log, "Response rate limiting"; "protocol" => %protocol, "wait_time_ms" => wait_time.as_millis(), "peer_id" => %peer_id);
+                    Err(wait_time)
+                }
+            },
+        }
+    }
+
     /// Sends a response to a peer's request.
     // NOTE: If the substream has closed due to inactivity, or the substream is in the
     // wrong state a response will fail silently.
@@ -301,21 +354,84 @@ where
             }
             return;
         };
+
+        if let Some((peer_id, limiter)) = self.response_limiter.as_mut() {
+            // First check that there are not already other responses waiting to be sent.
+            if let Some(queued_responses) = self.delayed_responses.get_mut(&inbound_info.protocol) {
+                queued_responses.push_back(QueuedResponse {
+                    response,
+                    protocol: inbound_info.protocol,
+                    inbound_id,
+                });
+                return;
+            }
+
+            match Self::try_response_limiter(
+                limiter,
+                peer_id,
+                inbound_info.protocol,
+                response.clone(),
+                &self.log,
+            ) {
+                Ok(()) => {
+                    Self::send_response_inner(
+                        inbound_id,
+                        inbound_info,
+                        response,
+                        &mut self.events_out,
+                        &self.state,
+                        &self.log,
+                    );
+                }
+                Err(wait_time) => {
+                    self.next_response.insert(inbound_info.protocol, wait_time);
+                    self.delayed_responses
+                        .entry(inbound_info.protocol)
+                        .or_default()
+                        .push_back(QueuedResponse {
+                            response,
+                            protocol: inbound_info.protocol,
+                            inbound_id,
+                        });
+                }
+            }
+        } else {
+            Self::send_response_inner(
+                inbound_id,
+                inbound_info,
+                response,
+                &mut self.events_out,
+                &self.state,
+                &self.log,
+            );
+        }
+    }
+
+    /// Sends a response to a peer's request.
+    fn send_response_inner(
+        inbound_id: SubstreamId,
+        inbound_info: &mut InboundInfo<E>,
+        response: RPCCodedResponse<E>,
+        events_out: &mut SmallVec<[HandlerEvent<Id, E>; 4]>,
+        handler_state: &HandlerState,
+        log: &slog::Logger,
+    ) {
         // If the response we are sending is an error, report back for handling
         if let RPCCodedResponse::Error(ref code, ref reason) = response {
-            self.events_out.push(HandlerEvent::Err(HandlerErr::Inbound {
+            events_out.push(HandlerEvent::Err(HandlerErr::Inbound {
                 error: RPCError::ErrorResponse(*code, reason.to_string()),
                 proto: inbound_info.protocol,
                 id: inbound_id,
             }));
         }
 
-        if matches!(self.state, HandlerState::Deactivated) {
+        if matches!(handler_state, HandlerState::Deactivated) {
             // we no longer send responses after the handler is deactivated
-            debug!(self.log, "Response not sent. Deactivated handler";
-                "response" => %response, "id" => inbound_id);
+            debug!(log, "Response not sent. Deactivated handler";
+                        "response" => %response, "id" => inbound_id);
             return;
         }
+
         inbound_info.pending_items.push_back(response);
     }
 }
@@ -388,6 +504,52 @@ where
             };
         }
 
+        if let Some((peer_id, limiter)) = self.response_limiter.as_mut() {
+            // Process delayed responses that are ready to be sent.
+            if let Poll::Ready(Some(expired)) = self.next_response.poll_expired(cx) {
+                let protocol = expired.into_inner();
+                if let Entry::Occupied(mut entry) = self.delayed_responses.entry(protocol) {
+                    let queued_responses = entry.get_mut();
+                    while let Some(res) = queued_responses.pop_front() {
+                        let Some(inbound_info) = self.inbound_substreams.get_mut(&res.inbound_id)
+                        else {
+                            debug!(self.log, "The inbound stream has expired. The delayed response was not sent."; "protocol" => %protocol, "peer_id" => %peer_id, "inbound_id" => res.inbound_id);
+                            continue;
+                        };
+                        match Self::try_response_limiter(
+                            limiter,
+                            peer_id,
+                            res.protocol,
+                            res.response.clone(),
+                            &self.log,
+                        ) {
+                            Ok(()) => {
+                                debug!(self.log, "The waiting time for response rate-limiting has over. Sending the response."; "protocol" => %protocol, "peer_id" => %peer_id, "inbound_id" => res.inbound_id);
+                                Self::send_response_inner(
+                                    res.inbound_id,
+                                    inbound_info,
+                                    res.response,
+                                    &mut self.events_out,
+                                    &self.state,
+                                    &self.log,
+                                );
+                            }
+                            Err(wait_time) => {
+                                self.next_response.insert(protocol, wait_time);
+                                queued_responses.push_front(res);
+                                // If one fails just wait for the next window that allows sending responses.
+                                break;
+                            }
+                        }
+                    }
+
+                    if queued_responses.is_empty() {
+                        entry.remove();
+                    }
+                }
+            }
+        }
+
         // purge expired inbound substreams and send an error
 
         while let Poll::Ready(Some(inbound_id)) = self.inbound_substreams_delay.poll_expired(cx) {