Skip to content

Commit

Permalink
Merge branch 'main' into rename_redis_feature_to_valkey
Browse files Browse the repository at this point in the history
  • Loading branch information
rukai authored Nov 26, 2024
2 parents 5c6dee4 + 071e0f5 commit f4d49c3
Show file tree
Hide file tree
Showing 6 changed files with 155 additions and 41 deletions.
24 changes: 12 additions & 12 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion shotover/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ aws-sdk-kms = { version = "1.1.0", optional = true }
chacha20poly1305 = { version = "0.10.0", features = ["std"], optional = true }
generic-array = { version = "0.14", features = ["serde"], optional = true }
kafka-protocol = { version = "0.13.0", optional = true, default-features = false, features = ["messages_enums", "broker", "client"] }
rustls = { version = "0.23.0", default-features = false, features = ["tls12"] }
rustls = { version = "0.23.18", default-features = false, features = ["tls12"] }
tokio-rustls = { version = "0.26", default-features = false, features = ["ring"] }
rustls-pemfile = "2.0.0"
rustls-pki-types = "1.0.1"
Expand Down
62 changes: 62 additions & 0 deletions shotover/src/transforms/kafka/sink_cluster/api_versions.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
use kafka_protocol::{messages::ApiKey, protocol::VersionRange};

// This table defines which api versions KafkaSinkCluster supports.
// * When adding a new message type:
// + Make sure you have implemented routing logic for the message type
// + Make sure any fields referring to internal cluster details such as broker ids or addresses are rewritten to refer to shotover nodes
// * When adding a new message type version:
// + Make sure any new fields do not break any of the requirements listed above
pub(crate) fn versions_supported_by_key(api_key: i16) -> Option<VersionRange> {
match ApiKey::try_from(api_key) {
Ok(ApiKey::ProduceKey) => Some(VersionRange { min: 0, max: 11 }),
Ok(ApiKey::FetchKey) => Some(VersionRange { min: 0, max: 16 }),
Ok(ApiKey::ListOffsetsKey) => Some(VersionRange { min: 0, max: 8 }),
Ok(ApiKey::MetadataKey) => Some(VersionRange { min: 0, max: 12 }),
Ok(ApiKey::OffsetCommitKey) => Some(VersionRange { min: 0, max: 9 }),
Ok(ApiKey::OffsetFetchKey) => Some(VersionRange { min: 0, max: 9 }),
Ok(ApiKey::FindCoordinatorKey) => Some(VersionRange { min: 0, max: 5 }),
Ok(ApiKey::JoinGroupKey) => Some(VersionRange { min: 0, max: 9 }),
Ok(ApiKey::HeartbeatKey) => Some(VersionRange { min: 0, max: 4 }),
Ok(ApiKey::LeaveGroupKey) => Some(VersionRange { min: 0, max: 5 }),
Ok(ApiKey::SyncGroupKey) => Some(VersionRange { min: 0, max: 5 }),
Ok(ApiKey::DescribeGroupsKey) => Some(VersionRange { min: 0, max: 5 }),
Ok(ApiKey::ListGroupsKey) => Some(VersionRange { min: 0, max: 5 }),
Ok(ApiKey::SaslHandshakeKey) => Some(VersionRange { min: 0, max: 1 }),
Ok(ApiKey::ApiVersionsKey) => Some(VersionRange { min: 0, max: 3 }),
Ok(ApiKey::CreateTopicsKey) => Some(VersionRange { min: 0, max: 7 }),
Ok(ApiKey::DeleteTopicsKey) => Some(VersionRange { min: 0, max: 6 }),
Ok(ApiKey::DeleteRecordsKey) => Some(VersionRange { min: 0, max: 2 }),
Ok(ApiKey::InitProducerIdKey) => Some(VersionRange { min: 0, max: 5 }),
Ok(ApiKey::OffsetForLeaderEpochKey) => Some(VersionRange { min: 0, max: 4 }),
Ok(ApiKey::AddPartitionsToTxnKey) => Some(VersionRange { min: 0, max: 5 }),
Ok(ApiKey::AddOffsetsToTxnKey) => Some(VersionRange { min: 0, max: 4 }),
Ok(ApiKey::EndTxnKey) => Some(VersionRange { min: 0, max: 4 }),
Ok(ApiKey::TxnOffsetCommitKey) => Some(VersionRange { min: 0, max: 4 }),
Ok(ApiKey::CreateAclsKey) => Some(VersionRange { min: 0, max: 3 }),
Ok(ApiKey::DescribeConfigsKey) => Some(VersionRange { min: 0, max: 4 }),
Ok(ApiKey::AlterConfigsKey) => Some(VersionRange { min: 0, max: 2 }),
Ok(ApiKey::DescribeLogDirsKey) => Some(VersionRange { min: 0, max: 4 }),
Ok(ApiKey::SaslAuthenticateKey) => Some(VersionRange { min: 0, max: 2 }),
Ok(ApiKey::CreatePartitionsKey) => Some(VersionRange { min: 0, max: 3 }),
Ok(ApiKey::DeleteGroupsKey) => Some(VersionRange { min: 0, max: 2 }),
Ok(ApiKey::ElectLeadersKey) => Some(VersionRange { min: 0, max: 2 }),
Ok(ApiKey::AlterPartitionReassignmentsKey) => Some(VersionRange { min: 0, max: 0 }),
Ok(ApiKey::ListPartitionReassignmentsKey) => Some(VersionRange { min: 0, max: 0 }),
Ok(ApiKey::OffsetDeleteKey) => Some(VersionRange { min: 0, max: 0 }),
Ok(ApiKey::AlterPartitionKey) => Some(VersionRange { min: 0, max: 3 }),
Ok(ApiKey::DescribeClusterKey) => Some(VersionRange { min: 0, max: 1 }),
Ok(ApiKey::DescribeProducersKey) => Some(VersionRange { min: 0, max: 0 }),
Ok(ApiKey::DescribeTransactionsKey) => Some(VersionRange { min: 0, max: 0 }),
Ok(ApiKey::ListTransactionsKey) => Some(VersionRange { min: 0, max: 1 }),
Ok(ApiKey::ConsumerGroupHeartbeatKey) => Some(VersionRange { min: 0, max: 0 }),
Ok(ApiKey::ConsumerGroupDescribeKey) => Some(VersionRange { min: 0, max: 0 }),
// This message type has very little documentation available and kafka responds to it with an error code 35 UNSUPPORTED_VERSION
// So its not clear at all how to implement this and its not even possible to test it.
// Instead lets just ask the client to not send it at all.
// We can consider supporting it when kafka itself starts to support it but we will need to be very
// careful to correctly implement the pagination/cursor logic.
Ok(ApiKey::DescribeTopicPartitionsKey) => None,
Ok(_) => None,
Err(_) => None,
}
}
84 changes: 59 additions & 25 deletions shotover/src/transforms/kafka/sink_cluster/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ use std::time::{Duration, Instant};
use tokio::sync::RwLock;
use uuid::Uuid;

mod api_versions;
mod connections;
mod kafka_node;
mod scram_over_mtls;
Expand Down Expand Up @@ -887,8 +888,11 @@ impl KafkaSinkCluster {
if !topic_names.is_empty()
|| !topic_ids.is_empty()
|| self.controller_broker.get().is_none()
|| self.nodes.is_empty()
{
let mut metadata = self.get_metadata_of_topics(topic_names, topic_ids).await?;
let mut metadata = self
.get_metadata_of_topics_with_retry(topic_names, topic_ids)
.await?;
match metadata.frame() {
Some(Frame::Kafka(KafkaFrame::Response {
body: ResponseBody::Metadata(metadata),
Expand Down Expand Up @@ -1978,10 +1982,43 @@ The connection to the client has been closed."
}
}

async fn get_metadata_of_topics(
/// Retry if we get an empty brokers list
/// We dont actually retry on failure since thats not a known failure mode for this request.
async fn get_metadata_of_topics_with_retry(
&mut self,
topic_names: Vec<TopicName>,
topic_ids: Vec<Uuid>,
) -> Result<Message> {
for _ in 0..3 {
let mut response = self
.get_metadata_of_topics(&topic_names, &topic_ids)
.await?;

match response.frame() {
Some(Frame::Kafka(KafkaFrame::Response {
body: ResponseBody::Metadata(metadata),
..
})) => {
if metadata.brokers.is_empty() {
tracing::info!("Metadata response from broker contained an empty list of brokers, this likely indicates the cluster is still starting up, will retry metadata request after a delay.");
tokio::time::sleep(Duration::from_millis(200)).await;
continue;
} else {
// cluster is ready, return the response
return Ok(response);
}
}
response => return Err(anyhow!("Expected metadata response but was {response:?}")),
}
}

Err(anyhow!("Broker returned empty list of brokers"))
}

async fn get_metadata_of_topics(
&mut self,
topic_names: &[TopicName],
topic_ids: &[Uuid],
) -> Result<Message> {
let api_version = if topic_ids.is_empty() { 4 } else { 12 };
let request = Message::from_frame(Frame::Kafka(KafkaFrame::Request {
Expand All @@ -1992,12 +2029,12 @@ The connection to the client has been closed."
body: RequestBody::Metadata(
MetadataRequest::default().with_topics(Some(
topic_names
.into_iter()
.map(|name| MetadataRequestTopic::default().with_name(Some(name)))
.chain(topic_ids.into_iter().map(|id| {
.iter()
.map(|name| MetadataRequestTopic::default().with_name(Some(name.clone())))
.chain(topic_ids.iter().map(|id| {
MetadataRequestTopic::default()
.with_name(None)
.with_topic_id(id)
.with_topic_id(*id)
}))
.collect(),
)),
Expand Down Expand Up @@ -3223,25 +3260,22 @@ The connection to the client has been closed."
body: ResponseBody::ApiVersions(api_versions),
..
})) => {
let original_size = api_versions.api_keys.len();

// List of keys that shotover doesnt support and so should be removed from supported keys list
let disable_keys = [
// This message type has very little documentation available and kafka responds to it with an error code 35 UNSUPPORTED_VERSION
// So its not clear at all how to implement this and its not even possible to test it.
// Instead lets just ask the client to not send it at all.
// We can consider supporting it when kafka itself starts to support it but we will need to be very
// careful to correctly implement the pagination/cursor logic.
ApiKey::DescribeTopicPartitionsKey as i16,
];
api_versions
.api_keys
.retain(|x| !disable_keys.contains(&x.api_key));

if original_size != api_versions.api_keys.len() {
// only invalidate the cache if we actually removed anything
response.invalidate_cache();
}
api_versions.api_keys.retain_mut(|api_key| {
match api_versions::versions_supported_by_key(api_key.api_key) {
Some(version) => {
if api_key.max_version > version.max {
api_key.max_version = version.max;
}
if api_key.min_version < version.min {
api_key.min_version = version.min;
}
true
}
None => false,
}
});

response.invalidate_cache();
}
_ => {}
}
Expand Down
2 changes: 1 addition & 1 deletion test-helpers/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ docker-compose-runner = "0.3.0"
j4rs = "0.21.0"
futures-util = "0.3.28"
http = "1.1.0"
rustls = { version = "0.23.2", default-features = false }
rustls = { version = "0.23.18", default-features = false }
rustls-pki-types = "1.0.1"
rustls-pemfile = "2.0.0"
tokio-tungstenite = { version = "0.24", features = ["rustls-tls-native-roots"] }
Expand Down
22 changes: 20 additions & 2 deletions test-helpers/src/connection/kafka/python/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
from kafka import KafkaAdminClient
from kafka import KafkaProducer
from kafka.admin import NewTopic
from kafka.errors import UnknownTopicOrPartitionError
from time import sleep
import sys

def main():
Expand All @@ -17,9 +19,12 @@ def main():
replication_factor=1
)
])

producer = KafkaProducer(**config)
producer.send('python_test_topic', b'some_message_bytes').get(timeout=30)

# send first message with retry since the topic may not be created yet.
retry_if_not_ready(lambda : producer.send('python_test_topic', b'some_message_bytes').get(timeout=30))

# send second message without retry, it has no reason to fail.
producer.send('python_test_topic', b'another_message').get(timeout=30)

consumer = KafkaConsumer('python_test_topic', auto_offset_reset='earliest', **config)
Expand All @@ -36,6 +41,19 @@ def main():

print("kafka-python script passed all test cases")

def retry_if_not_ready(attempt):
tries = 0
while True:
try:
attempt()
return
except UnknownTopicOrPartitionError:
tries += 1
sleep(0.1)
# fail after 10s worth of attempts
if tries > 100:
raise Exception("Timedout, hit UnknownTopicOrPartitionError 100 times in a row")


if __name__ == "__main__":
main()

0 comments on commit f4d49c3

Please sign in to comment.