From 65bca7de8761cb5e2fea7984b4bf5aacbf3a583d Mon Sep 17 00:00:00 2001
From: Lucas Kent <rubickent@gmail.com>
Date: Wed, 13 Mar 2024 11:43:35 +1100
Subject: [PATCH 1/4] Implement java producer + consumer (#1521)

---
 shotover-proxy/tests/kafka_int_tests/mod.rs   |  33 +-
 .../tests/kafka_int_tests/test_cases.rs       |   4 +-
 test-helpers/src/connection/kafka/java.rs     | 296 ++++++++++++++----
 test-helpers/src/connection/kafka/mod.rs      |   2 +-
 4 files changed, 256 insertions(+), 79 deletions(-)

diff --git a/shotover-proxy/tests/kafka_int_tests/mod.rs b/shotover-proxy/tests/kafka_int_tests/mod.rs
index 34672307a..fd4418f60 100644
--- a/shotover-proxy/tests/kafka_int_tests/mod.rs
+++ b/shotover-proxy/tests/kafka_int_tests/mod.rs
@@ -9,7 +9,7 @@ use test_helpers::docker_compose::docker_compose;
 #[rstest]
 #[cfg_attr(feature = "rdkafka-driver-tests", case::cpp(KafkaDriver::Cpp))]
 #[case::java(KafkaDriver::Java)]
-#[tokio::test]
+#[tokio::test(flavor = "multi_thread")] // multi_thread is needed since java driver will block when consuming, causing shotover logs to not appear
 async fn passthrough_standard(#[case] driver: KafkaDriver) {
     let _docker_compose =
         docker_compose("tests/test-configs/kafka/passthrough/docker-compose.yaml");
@@ -31,7 +31,7 @@ async fn passthrough_standard(#[case] driver: KafkaDriver) {
 #[rstest]
 #[cfg_attr(feature = "rdkafka-driver-tests", case::cpp(KafkaDriver::Cpp))]
 #[case::java(KafkaDriver::Java)]
-#[tokio::test]
+#[tokio::test(flavor = "multi_thread")] // multi_thread is needed since java driver will block when consuming, causing shotover logs to not appear
 async fn passthrough_tls(#[case] driver: KafkaDriver) {
     test_helpers::cert::generate_kafka_test_certs();
 
@@ -52,10 +52,11 @@ async fn passthrough_tls(#[case] driver: KafkaDriver) {
     .expect("Shotover did not shutdown within 10s");
 }
 
+#[cfg(feature = "rdkafka-driver-tests")] // temporarily needed to avoid a warning
 #[rstest]
 #[cfg_attr(feature = "rdkafka-driver-tests", case::cpp(KafkaDriver::Cpp))]
-#[case::java(KafkaDriver::Java)]
-#[tokio::test]
+// #[case::java(KafkaDriver::Java)]
+#[tokio::test(flavor = "multi_thread")] // multi_thread is needed since java driver will block when consuming, causing shotover logs to not appear
 async fn cluster_tls(#[case] driver: KafkaDriver) {
     test_helpers::cert::generate_kafka_test_certs();
 
@@ -80,7 +81,7 @@ async fn cluster_tls(#[case] driver: KafkaDriver) {
 #[rstest]
 #[cfg_attr(feature = "rdkafka-driver-tests", case::cpp(KafkaDriver::Cpp))]
 #[case::java(KafkaDriver::Java)]
-#[tokio::test]
+#[tokio::test(flavor = "multi_thread")] // multi_thread is needed since java driver will block when consuming, causing shotover logs to not appear
 async fn passthrough_encode(#[case] driver: KafkaDriver) {
     let _docker_compose =
         docker_compose("tests/test-configs/kafka/passthrough/docker-compose.yaml");
@@ -97,7 +98,7 @@ async fn passthrough_encode(#[case] driver: KafkaDriver) {
 #[rstest]
 #[cfg_attr(feature = "rdkafka-driver-tests", case::cpp(KafkaDriver::Cpp))]
 #[case::java(KafkaDriver::Java)]
-#[tokio::test]
+#[tokio::test(flavor = "multi_thread")] // multi_thread is needed since java driver will block when consuming, causing shotover logs to not appear
 async fn passthrough_sasl(#[case] driver: KafkaDriver) {
     let _docker_compose =
         docker_compose("tests/test-configs/kafka/passthrough-sasl/docker-compose.yaml");
@@ -116,7 +117,7 @@ async fn passthrough_sasl(#[case] driver: KafkaDriver) {
 #[rstest]
 #[cfg_attr(feature = "rdkafka-driver-tests", case::cpp(KafkaDriver::Cpp))]
 #[case::java(KafkaDriver::Java)]
-#[tokio::test]
+#[tokio::test(flavor = "multi_thread")] // multi_thread is needed since java driver will block when consuming, causing shotover logs to not appear
 async fn passthrough_sasl_encode(#[case] driver: KafkaDriver) {
     let _docker_compose =
         docker_compose("tests/test-configs/kafka/passthrough-sasl/docker-compose.yaml");
@@ -132,10 +133,11 @@ async fn passthrough_sasl_encode(#[case] driver: KafkaDriver) {
     shotover.shutdown_and_then_consume_events(&[]).await;
 }
 
+#[cfg(feature = "rdkafka-driver-tests")] // temporarily needed to avoid a warning
 #[rstest]
 #[cfg_attr(feature = "rdkafka-driver-tests", case::cpp(KafkaDriver::Cpp))]
-#[case::java(KafkaDriver::Java)]
-#[tokio::test]
+// #[case::java(KafkaDriver::Java)]
+#[tokio::test(flavor = "multi_thread")] // multi_thread is needed since java driver will block when consuming, causing shotover logs to not appear
 async fn cluster_1_rack_single_shotover(#[case] driver: KafkaDriver) {
     let _docker_compose =
         docker_compose("tests/test-configs/kafka/cluster-1-rack/docker-compose.yaml");
@@ -154,9 +156,10 @@ async fn cluster_1_rack_single_shotover(#[case] driver: KafkaDriver) {
     .expect("Shotover did not shutdown within 10s");
 }
 
+#[cfg(feature = "rdkafka-driver-tests")] // temporarily needed to avoid a warning
 #[rstest]
 #[cfg_attr(feature = "rdkafka-driver-tests", case::cpp(KafkaDriver::Cpp))]
-#[case::java(KafkaDriver::Java)]
+// #[case::java(KafkaDriver::Java)]
 #[tokio::test(flavor = "multi_thread")]
 async fn cluster_1_rack_multi_shotover(#[case] driver: KafkaDriver) {
     let _docker_compose =
@@ -189,10 +192,11 @@ async fn cluster_1_rack_multi_shotover(#[case] driver: KafkaDriver) {
     }
 }
 
+#[cfg(feature = "rdkafka-driver-tests")] // temporarily needed to avoid a warning
 #[rstest]
 #[cfg_attr(feature = "rdkafka-driver-tests", case::cpp(KafkaDriver::Cpp))]
-#[case::java(KafkaDriver::Java)]
-#[tokio::test]
+// #[case::java(KafkaDriver::Java)]
+#[tokio::test(flavor = "multi_thread")] // multi_thread is needed since java driver will block when consuming, causing shotover logs to not appear
 async fn cluster_2_racks_single_shotover(#[case] driver: KafkaDriver) {
     let _docker_compose =
         docker_compose("tests/test-configs/kafka/cluster-2-racks/docker-compose.yaml");
@@ -212,10 +216,11 @@ async fn cluster_2_racks_single_shotover(#[case] driver: KafkaDriver) {
     .expect("Shotover did not shutdown within 10s");
 }
 
+#[cfg(feature = "rdkafka-driver-tests")] // temporarily needed to avoid a warning
 #[rstest]
 #[cfg_attr(feature = "rdkafka-driver-tests", case::cpp(KafkaDriver::Cpp))]
-#[case::java(KafkaDriver::Java)]
-#[tokio::test]
+//#[case::java(KafkaDriver::Java)]
+#[tokio::test(flavor = "multi_thread")] // multi_thread is needed since java driver will block when consuming, causing shotover logs to not appear
 async fn cluster_2_racks_multi_shotover(#[case] driver: KafkaDriver) {
     let _docker_compose =
         docker_compose("tests/test-configs/kafka/cluster-2-racks/docker-compose.yaml");
diff --git a/shotover-proxy/tests/kafka_int_tests/test_cases.rs b/shotover-proxy/tests/kafka_int_tests/test_cases.rs
index edff3e199..0a9436be7 100644
--- a/shotover-proxy/tests/kafka_int_tests/test_cases.rs
+++ b/shotover-proxy/tests/kafka_int_tests/test_cases.rs
@@ -81,7 +81,7 @@ async fn produce_consume(connection_builder: &KafkaConnectionBuilder, topic_name
         )
         .await;
 
-    let consumer = connection_builder.connect_consumer(topic_name).await;
+    let mut consumer = connection_builder.connect_consumer(topic_name).await;
 
     consumer
         .assert_consume(ExpectedResponse {
@@ -118,7 +118,7 @@ async fn produce_consume_acks0(connection_builder: &KafkaConnectionBuilder) {
             .await;
     }
 
-    let consumer = connection_builder.connect_consumer(topic_name).await;
+    let mut consumer = connection_builder.connect_consumer(topic_name).await;
 
     for j in 0..10 {
         consumer
diff --git a/test-helpers/src/connection/kafka/java.rs b/test-helpers/src/connection/kafka/java.rs
index 9fbd6db58..1682ae52d 100644
--- a/test-helpers/src/connection/kafka/java.rs
+++ b/test-helpers/src/connection/kafka/java.rs
@@ -1,6 +1,9 @@
 use super::{AlterConfig, ExpectedResponse, NewPartition, NewTopic, Record, ResourceSpecifier};
 use j4rs::{errors::J4RsError, Instance, InvocationArg, Jvm, JvmBuilder, MavenArtifact};
-use std::{collections::HashMap, rc::Rc};
+use std::{
+    collections::{HashMap, VecDeque},
+    rc::Rc,
+};
 
 fn properties(jvm: &Jvm, props: &HashMap<String, String>) -> Instance {
     let properties = jvm.create_instance("java.util.Properties", &[]).unwrap();
@@ -73,13 +76,52 @@ impl KafkaConnectionBuilderJava {
                 &[properties.into()],
             )
             .unwrap();
-        KafkaProducerJava {
-            _producer: producer,
-        }
+
+        let jvm = self.jvm.clone();
+        KafkaProducerJava { jvm, producer }
     }
 
-    pub async fn connect_consumer(&self, _topic_name: &str) -> KafkaConsumerJava {
-        KafkaConsumerJava {}
+    pub async fn connect_consumer(&self, topic_name: &str) -> KafkaConsumerJava {
+        let mut config = self.base_config.clone();
+        config.insert("group.id".to_owned(), "some_group".to_owned());
+        config.insert("session.timeout.ms".to_owned(), "6000".to_owned());
+        config.insert("auto.offset.reset".to_owned(), "earliest".to_owned());
+        config.insert("enable.auto.commit".to_owned(), "false".to_owned());
+        config.insert(
+            "key.deserializer".to_owned(),
+            "org.apache.kafka.common.serialization.StringDeserializer".to_owned(),
+        );
+        config.insert(
+            "value.deserializer".to_owned(),
+            "org.apache.kafka.common.serialization.StringDeserializer".to_owned(),
+        );
+
+        let properties = properties(&self.jvm, &config);
+        let consumer = self
+            .jvm
+            .create_instance(
+                "org.apache.kafka.clients.consumer.KafkaConsumer",
+                &[properties.into()],
+            )
+            .unwrap();
+        self.jvm
+            .invoke(
+                &consumer,
+                "subscribe",
+                &[self
+                    .jvm
+                    .java_list("java.lang.String", vec![topic_name])
+                    .unwrap()
+                    .into()],
+            )
+            .unwrap();
+
+        let jvm = self.jvm.clone();
+        KafkaConsumerJava {
+            jvm,
+            consumer,
+            waiting_records: VecDeque::new(),
+        }
     }
 
     pub async fn connect_admin(&self) -> KafkaAdminJava {
@@ -97,20 +139,156 @@ impl KafkaConnectionBuilderJava {
     }
 }
 pub struct KafkaProducerJava {
-    _producer: Instance,
+    jvm: Rc<Jvm>,
+    producer: Instance,
 }
 
 impl KafkaProducerJava {
-    pub async fn assert_produce(&self, _record: Record<'_>, _expected_offset: Option<i64>) {
-        tracing::error!("Unimplemented assert");
+    pub async fn assert_produce(&self, record: Record<'_>, expected_offset: Option<i64>) {
+        let record = match record.key {
+            Some(key) => self
+                .jvm
+                .create_instance(
+                    "org.apache.kafka.clients.producer.ProducerRecord",
+                    &[
+                        InvocationArg::try_from(record.topic_name).unwrap(),
+                        InvocationArg::try_from(key).unwrap(),
+                        InvocationArg::try_from(record.payload).unwrap(),
+                    ],
+                )
+                .unwrap(),
+            None => self
+                .jvm
+                .create_instance(
+                    "org.apache.kafka.clients.producer.ProducerRecord",
+                    &[
+                        InvocationArg::try_from(record.topic_name).unwrap(),
+                        InvocationArg::try_from(record.payload).unwrap(),
+                    ],
+                )
+                .unwrap(),
+        };
+        let result = self
+            .jvm
+            .invoke_async(&self.producer, "send", &[record.into()])
+            .await
+            .unwrap();
+        let actual_offset: i64 = self
+            .jvm
+            .chain(&result)
+            .unwrap()
+            .cast("org.apache.kafka.clients.producer.RecordMetadata")
+            .unwrap()
+            .invoke("offset", &[])
+            .unwrap()
+            .to_rust()
+            .unwrap();
+
+        if let Some(expected_offset) = expected_offset {
+            assert_eq!(expected_offset, actual_offset);
+        }
     }
 }
 
-pub struct KafkaConsumerJava {}
+pub struct KafkaConsumerJava {
+    jvm: Rc<Jvm>,
+    consumer: Instance,
+    waiting_records: VecDeque<Instance>,
+}
 
 impl KafkaConsumerJava {
-    pub async fn assert_consume(&self, _response: ExpectedResponse<'_>) {
-        tracing::error!("Unimplemented assert");
+    pub async fn assert_consume(&mut self, expected_response: ExpectedResponse<'_>) {
+        // This method asserts that we have consumed a single record from the broker.
+        // Internally we may have actually received multiple records from the broker.
+        // But that is hidden from the test by storing any extra messages for use in the next call to `assert_consume`
+
+        if self.waiting_records.is_empty() {
+            self.fetch_from_broker();
+        }
+
+        self.process_one_fetched_record(expected_response);
+    }
+
+    fn fetch_from_broker(&mut self) {
+        let timeout = self
+            .jvm
+            .invoke_static(
+                "java.time.Duration",
+                "ofSeconds",
+                &[InvocationArg::try_from(30_i64)
+                    .unwrap()
+                    .into_primitive()
+                    .unwrap()],
+            )
+            .unwrap();
+
+        let result = tokio::task::block_in_place(|| {
+            self.jvm
+                .invoke(&self.consumer, "poll", &[timeout.into()])
+                .unwrap()
+        });
+
+        let iterator = JavaIterator::new(self.jvm.invoke(&result, "iterator", &[]).unwrap());
+        while let Some(record) = iterator.next(&self.jvm) {
+            let record = self
+                .jvm
+                .cast(&record, "org.apache.kafka.clients.consumer.ConsumerRecord")
+                .unwrap();
+            self.waiting_records.push_front(record);
+        }
+    }
+
+    fn process_one_fetched_record(&mut self, expected_response: ExpectedResponse<'_>) {
+        let record = self
+            .waiting_records
+            .pop_back()
+            .expect("KafkaConsumer.poll timed out");
+
+        let offset: i64 = self
+            .jvm
+            .chain(&record)
+            .unwrap()
+            .invoke("offset", &[])
+            .unwrap()
+            .to_rust()
+            .unwrap();
+        assert_eq!(expected_response.offset, offset);
+
+        let topic: String = self
+            .jvm
+            .chain(&record)
+            .unwrap()
+            .invoke("topic", &[])
+            .unwrap()
+            .to_rust()
+            .unwrap();
+        assert_eq!(expected_response.topic_name, topic);
+
+        let value: String = self
+            .jvm
+            .chain(&record)
+            .unwrap()
+            .invoke("value", &[])
+            .unwrap()
+            .to_rust()
+            .unwrap();
+        assert_eq!(expected_response.message, value);
+
+        let key: Option<String> = self
+            .jvm
+            .chain(&record)
+            .unwrap()
+            .invoke("key", &[])
+            .unwrap()
+            .to_rust()
+            .unwrap();
+        assert_eq!(expected_response.key, key.as_deref());
+    }
+}
+
+impl Drop for KafkaConsumerJava {
+    fn drop(&mut self) {
+        tokio::task::block_in_place(|| self.jvm.invoke(&self.consumer, "close", &[]).unwrap());
     }
 }
 
@@ -162,16 +340,11 @@ impl KafkaAdminJava {
             .java_list("org.apache.kafka.clients.admin.NewTopic", topics)
             .unwrap();
 
-        self.jvm
-            .chain(&self.admin)
-            .unwrap()
-            .invoke("createTopics", &[topics.into()])
-            .unwrap()
-            .invoke("all", &[])
-            .unwrap()
-            .invoke("get", &[])
-            .unwrap()
-            .collect();
+        let result = self
+            .jvm
+            .invoke(&self.admin, "createTopics", &[topics.into()])
+            .unwrap();
+        self.jvm.invoke_async(&result, "all", &[]).await.unwrap();
     }
 
     pub async fn delete_topics(&self, to_delete: &[&str]) {
@@ -180,16 +353,11 @@ impl KafkaAdminJava {
             .java_list("java.lang.String", to_delete.to_vec())
             .unwrap();
 
-        self.jvm
-            .chain(&self.admin)
-            .unwrap()
-            .invoke("deleteTopics", &[topics.into()])
-            .unwrap()
-            .invoke("all", &[])
-            .unwrap()
-            .invoke("get", &[])
-            .unwrap()
-            .collect();
+        let result = self
+            .jvm
+            .invoke(&self.admin, "deleteTopics", &[topics.into()])
+            .unwrap();
+        self.jvm.invoke_async(&result, "all", &[]).await.unwrap();
     }
 
     pub async fn create_partitions(&self, partitions: &[NewPartition<'_>]) {
@@ -218,16 +386,11 @@ impl KafkaAdminJava {
             )
             .unwrap();
 
-        self.jvm
-            .chain(&self.admin)
-            .unwrap()
-            .invoke("createPartitions", &[partitions.into()])
-            .unwrap()
-            .invoke("all", &[])
-            .unwrap()
-            .invoke("get", &[])
-            .unwrap()
-            .collect();
+        let result = self
+            .jvm
+            .invoke(&self.admin, "createPartitions", &[partitions.into()])
+            .unwrap();
+        self.jvm.invoke_async(&result, "all", &[]).await.unwrap();
     }
 
     pub async fn describe_configs(&self, resources: &[ResourceSpecifier<'_>]) {
@@ -256,16 +419,11 @@ impl KafkaAdminJava {
             .java_list("org.apache.kafka.common.config.ConfigResource", resources)
             .unwrap();
 
-        self.jvm
-            .chain(&self.admin)
-            .unwrap()
-            .invoke("describeConfigs", &[resources.into()])
-            .unwrap()
-            .invoke("all", &[])
-            .unwrap()
-            .invoke("get", &[])
-            .unwrap()
-            .collect();
+        let result = self
+            .jvm
+            .invoke(&self.admin, "describeConfigs", &[resources.into()])
+            .unwrap();
+        self.jvm.invoke_async(&result, "all", &[]).await.unwrap();
     }
 
     pub async fn alter_configs(&self, alter_configs: &[AlterConfig<'_>]) {
@@ -318,16 +476,11 @@ impl KafkaAdminJava {
 
         let alter_configs = self.java_map(alter_configs);
 
-        self.jvm
-            .chain(&self.admin)
-            .unwrap()
-            .invoke("alterConfigs", &[alter_configs.into()])
-            .unwrap()
-            .invoke("all", &[])
-            .unwrap()
-            .invoke("get", &[])
-            .unwrap()
-            .collect();
+        let result = self
+            .jvm
+            .invoke(&self.admin, "alterConfigs", &[alter_configs.into()])
+            .unwrap();
+        self.jvm.invoke_async(&result, "all", &[]).await.unwrap();
     }
 
     fn java_map(&self, key_values: Vec<(Instance, Instance)>) -> Instance {
@@ -338,3 +491,22 @@ impl KafkaAdminJava {
         map
     }
 }
+
+struct JavaIterator(Instance);
+
+impl JavaIterator {
+    fn new(iterator_instance: Instance) -> Self {
+        JavaIterator(iterator_instance)
+    }
+
+    fn next(&self, jvm: &Jvm) -> Option<Instance> {
+        let has_next: bool = jvm
+            .to_rust(jvm.invoke(&self.0, "hasNext", &[]).unwrap())
+            .unwrap();
+        if has_next {
+            Some(jvm.invoke(&self.0, "next", &[]).unwrap())
+        } else {
+            None
+        }
+    }
+}
diff --git a/test-helpers/src/connection/kafka/mod.rs b/test-helpers/src/connection/kafka/mod.rs
index e270dff6e..bbce3e1dc 100644
--- a/test-helpers/src/connection/kafka/mod.rs
+++ b/test-helpers/src/connection/kafka/mod.rs
@@ -97,7 +97,7 @@ pub enum KafkaConsumer {
 }
 
 impl KafkaConsumer {
-    pub async fn assert_consume(&self, response: ExpectedResponse<'_>) {
+    pub async fn assert_consume(&mut self, response: ExpectedResponse<'_>) {
         match self {
             #[cfg(feature = "rdkafka-driver-tests")]
             Self::Cpp(cpp) => cpp.assert_consume(response).await,

From 2bef5e7bb56ec561b846d322ee8ebab87083e983 Mon Sep 17 00:00:00 2001
From: Lucas Kent <rubickent@gmail.com>
Date: Wed, 13 Mar 2024 22:11:55 +1100
Subject: [PATCH 2/4] Move windsock to separate repo (#1523)

---
 .github/workflows/windsock_benches.yaml       |   3 -
 Cargo.lock                                    |   4 +-
 Cargo.toml                                    |   1 -
 shotover-proxy/Cargo.toml                     |   2 +-
 windsock/Cargo.toml                           |  23 -
 windsock/build.rs                             |   7 -
 .../examples/cassandra-docker-compose.yaml    |  26 -
 windsock/examples/cassandra.rs                | 163 ----
 windsock/readme.md                            | 206 ----
 windsock/src/bench.rs                         | 343 -------
 windsock/src/cli.rs                           | 198 ----
 windsock/src/cloud.rs                         | 102 --
 windsock/src/data.rs                          |  18 -
 windsock/src/filter.rs                        |  50 -
 windsock/src/lib.rs                           | 328 -------
 windsock/src/list.rs                          |  26 -
 windsock/src/report.rs                        | 549 -----------
 windsock/src/tables.rs                        | 915 ------------------
 18 files changed, 3 insertions(+), 2961 deletions(-)
 delete mode 100644 windsock/Cargo.toml
 delete mode 100644 windsock/build.rs
 delete mode 100644 windsock/examples/cassandra-docker-compose.yaml
 delete mode 100644 windsock/examples/cassandra.rs
 delete mode 100644 windsock/readme.md
 delete mode 100644 windsock/src/bench.rs
 delete mode 100644 windsock/src/cli.rs
 delete mode 100644 windsock/src/cloud.rs
 delete mode 100644 windsock/src/data.rs
 delete mode 100644 windsock/src/filter.rs
 delete mode 100644 windsock/src/lib.rs
 delete mode 100644 windsock/src/list.rs
 delete mode 100644 windsock/src/report.rs
 delete mode 100644 windsock/src/tables.rs

diff --git a/.github/workflows/windsock_benches.yaml b/.github/workflows/windsock_benches.yaml
index a3595f3cf..b2e5cc5e1 100644
--- a/.github/workflows/windsock_benches.yaml
+++ b/.github/workflows/windsock_benches.yaml
@@ -41,9 +41,6 @@ jobs:
         cargo windsock local-run --bench-length-seconds 5 --operations-per-second 100 --profilers samply name=cassandra,compression=none,connection_count=1,driver=scylla,operation=read_i64,protocol=v4,shotover=standard,topology=single
         cargo windsock local-run --bench-length-seconds 5 --operations-per-second 100 --profilers sys_monitor name=kafka,shotover=standard,size=1B,topology=single
         cargo windsock local-run --bench-length-seconds 5 --operations-per-second 100 --profilers shotover_metrics name=redis,encryption=none,operation=get,shotover=standard,topology=single
-
-        # windsock/examples/cassandra.rs - this can stay here until windsock is moved to its own repo
-        cargo run --release --example cassandra -- local-run --bench-length-seconds 5 --operations-per-second 100
     - name: Ensure that tests did not create or modify any files that arent .gitignore'd
       run: |
         if [ -n "$(git status --porcelain)" ]; then
diff --git a/Cargo.lock b/Cargo.lock
index cc6efe129..c559f0b64 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -5639,6 +5639,8 @@ checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8"
 [[package]]
 name = "windsock"
 version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5730a5bb3421d2422b41fc14e51e8dd191294f3de9eeccc6568eebf9644ee027"
 dependencies = [
  "anyhow",
  "async-trait",
@@ -5646,8 +5648,6 @@ dependencies = [
  "clap",
  "console",
  "copy_dir",
- "docker-compose-runner",
- "scylla",
  "serde",
  "strum 0.26.1",
  "time",
diff --git a/Cargo.toml b/Cargo.toml
index 75519017b..8f60f193d 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,5 @@
 [workspace]
 members = [
-    "windsock",
     "shotover",
     "shotover-proxy",
     "test-helpers",
diff --git a/shotover-proxy/Cargo.toml b/shotover-proxy/Cargo.toml
index c36e4d72f..7a7ee8d94 100644
--- a/shotover-proxy/Cargo.toml
+++ b/shotover-proxy/Cargo.toml
@@ -47,7 +47,7 @@ tokio-bin-process.workspace = true
 rustls-pemfile = "2.0.0"
 rustls-pki-types = "1.1.0"
 aws-throwaway.workspace = true
-windsock = { path = "../windsock" }
+windsock = "0.1.0"
 regex = "1.7.0"
 cql-ws = { git = "https://github.com/shotover/cql-ws" }
 opensearch = "2.1.0"
diff --git a/windsock/Cargo.toml b/windsock/Cargo.toml
deleted file mode 100644
index 178da7444..000000000
--- a/windsock/Cargo.toml
+++ /dev/null
@@ -1,23 +0,0 @@
-[package]
-name = "windsock"
-version = "0.1.0"
-edition = "2021"
-license = "Apache-2.0"
-
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
-
-[dependencies]
-anyhow.workspace = true
-async-trait = "0.1.68"
-bincode.workspace = true
-clap.workspace = true
-console = "0.15.5"
-copy_dir = "0.1.2"
-serde = { workspace = true, features = ["derive"] }
-strum = { version = "0.26.0", features = ["derive"] }
-time = { version = "0.3.25", features = ["serde"] }
-tokio.workspace = true
-
-[dev-dependencies]
-scylla = { version = "0.12.0", features = ["ssl"] }
-docker-compose-runner = "0.3.0"
diff --git a/windsock/build.rs b/windsock/build.rs
deleted file mode 100644
index a566c2b4c..000000000
--- a/windsock/build.rs
+++ /dev/null
@@ -1,7 +0,0 @@
-use std::env;
-
-fn main() {
-    let profile = env::var("PROFILE").unwrap();
-    println!("cargo:rustc-env=PROFILE={profile}");
-    println!("cargo:rerun-if-changed=build.rs");
-}
diff --git a/windsock/examples/cassandra-docker-compose.yaml b/windsock/examples/cassandra-docker-compose.yaml
deleted file mode 100644
index 6993ff13c..000000000
--- a/windsock/examples/cassandra-docker-compose.yaml
+++ /dev/null
@@ -1,26 +0,0 @@
-version: "3.3"
-
-networks:
-  cassandra_subnet:
-    name: cassandra_subnet
-    driver: bridge
-    ipam:
-      driver: default
-      config:
-        - subnet: 172.16.1.0/24
-          gateway: 172.16.1.1
-
-services:
-  cassandra-one:
-    image: bitnami/cassandra:4.0.6
-    networks:
-      cassandra_subnet:
-        ipv4_address: 172.16.1.2
-    environment:
-      &environment
-      MAX_HEAP_SIZE: "400M"
-      MIN_HEAP_SIZE: "400M"
-      HEAP_NEWSIZE: "48M"
-    volumes:
-      - type: tmpfs
-        target: /var/lib/cassandra
diff --git a/windsock/examples/cassandra.rs b/windsock/examples/cassandra.rs
deleted file mode 100644
index 9b28183d6..000000000
--- a/windsock/examples/cassandra.rs
+++ /dev/null
@@ -1,163 +0,0 @@
-use anyhow::Result;
-use async_trait::async_trait;
-use docker_compose_runner::{DockerCompose, Image};
-use scylla::SessionBuilder;
-use scylla::{transport::Compression, Session};
-use std::{
-    collections::HashMap,
-    path::Path,
-    sync::Arc,
-    time::{Duration, Instant},
-};
-use tokio::sync::mpsc::UnboundedSender;
-use windsock::cloud::NoCloud;
-use windsock::{Bench, BenchParameters, BenchTask, Profiling, Report, Windsock};
-
-fn main() {
-    set_working_dir();
-    Windsock::new(
-        vec![
-            Box::new(CassandraBench::new(Some(Compression::Lz4))),
-            Box::new(CassandraBench::new(None)),
-        ],
-        NoCloud::new_boxed(),
-        &["release"],
-    )
-    .run();
-}
-
-struct CassandraBench {
-    compression: Option<Compression>,
-}
-
-impl CassandraBench {
-    fn new(compression: Option<Compression>) -> Self {
-        CassandraBench { compression }
-    }
-}
-
-#[async_trait]
-impl Bench for CassandraBench {
-    type CloudResourcesRequired = ();
-    type CloudResources = ();
-    fn tags(&self) -> HashMap<String, String> {
-        [
-            ("name".to_owned(), "cassandra".to_owned()),
-            ("topology".to_owned(), "single".to_owned()),
-            ("message_type".to_owned(), "write1000bytes".to_owned()),
-            (
-                "compression".to_owned(),
-                match &self.compression {
-                    Some(Compression::Lz4) => "LZ4".to_owned(),
-                    Some(Compression::Snappy) => "Snappy".to_owned(),
-                    None => "None".to_owned(),
-                },
-            ),
-        ]
-        .into_iter()
-        .collect()
-    }
-
-    async fn orchestrate_cloud(
-        &self,
-        _resources: (),
-        _running_in_release: bool,
-        _profiling: Profiling,
-        _bench_parameters: BenchParameters,
-    ) -> Result<()> {
-        todo!()
-    }
-
-    async fn orchestrate_local(
-        &self,
-        _running_in_release: bool,
-        _profiling: Profiling,
-        parameters: BenchParameters,
-    ) -> Result<()> {
-        let _docker_compose = docker_compose("examples/cassandra-docker-compose.yaml");
-        let address = "127.0.0.1:9042";
-
-        self.execute_run(address, &parameters).await;
-
-        Ok(())
-    }
-
-    async fn run_bencher(
-        &self,
-        _resources: &str,
-        parameters: BenchParameters,
-        reporter: UnboundedSender<Report>,
-    ) {
-        let session = Arc::new(
-            SessionBuilder::new()
-                .known_nodes(["172.16.1.2:9042"])
-                // We do not need to refresh metadata as there is nothing else fiddling with the topology or schema.
-                // By default the metadata refreshes every 60s and that can cause performance issues so we disable it by using an absurdly high refresh interval
-                .cluster_metadata_refresh_interval(Duration::from_secs(10000000000))
-                .user("cassandra", "cassandra")
-                .compression(self.compression)
-                .build()
-                .await
-                .unwrap(),
-        );
-
-        let tasks = BenchTaskCassandra { session }
-            .spawn_tasks(reporter.clone(), parameters.operations_per_second)
-            .await;
-
-        let start = Instant::now();
-        reporter.send(Report::Start).unwrap();
-
-        for _ in 0..parameters.runtime_seconds {
-            let second = Instant::now();
-            tokio::time::sleep(Duration::from_secs(1)).await;
-            reporter
-                .send(Report::SecondPassed(second.elapsed()))
-                .unwrap();
-        }
-
-        reporter.send(Report::FinishedIn(start.elapsed())).unwrap();
-
-        // make sure the tasks complete before we drop the database they are connecting to
-        for task in tasks {
-            task.await.unwrap();
-        }
-    }
-}
-
-#[derive(Clone)]
-struct BenchTaskCassandra {
-    session: Arc<Session>,
-}
-
-#[async_trait]
-impl BenchTask for BenchTaskCassandra {
-    async fn run_one_operation(&self) -> Result<(), String> {
-        self.session
-            .query("SELECT * FROM system.peers", ())
-            .await
-            .map_err(|err| format!("{err:?}"))
-            .map(|_| ())
-    }
-}
-
-fn docker_compose(file_path: &str) -> DockerCompose {
-    DockerCompose::new(&IMAGE_WAITERS, |_| {}, file_path)
-}
-
-static IMAGE_WAITERS: [Image; 1] = [Image {
-    name: "bitnami/cassandra:4.0.6",
-    log_regex_to_wait_for: r"Startup complete",
-    timeout: Duration::from_secs(120),
-}];
-
-fn set_working_dir() {
-    // tests and benches will set the directory to the directory of the crate, we are acting as a benchmark so we do the same
-    std::env::set_current_dir(
-        Path::new(env!("CARGO_MANIFEST_DIR"))
-            .parent()
-            .unwrap()
-            .join(env!("CARGO_PKG_NAME")),
-    )
-    .unwrap();
-}
diff --git a/windsock/readme.md b/windsock/readme.md
deleted file mode 100644
index 8d072d031..000000000
--- a/windsock/readme.md
+++ /dev/null
@@ -1,206 +0,0 @@
-# Windsock - A DB benchmarking framework
-
-Windsock is a generic DB benchmarking framework.
-
-What you do:
-
-* Bring your own rust async compatible DB driver
-* Define your benchmark logic which reports some simple stats back to windsock
-* Define your pool of benchmarks
-
-What windsock does:
-
-* Provides a CLI from which you can:
-  * Query available benchmarks
-  * Selectively run benchmarks
-  * Process benchmark results into readable tables
-* Generates a webpage from which you can explore graphed results
-
-Windsock is suitable for:
-
-* Iteratively testing performance during development of a database or service (for microbenchmarks you will need a different tool though)
-* Investigating performance of different workloads on a database you intend to use.
-* Generating a webpage of graphs to show off the performance of your released database. (not yet implemented)
-
-## Define benches
-
-To use windsock create a rust crate that imports windsock:
-
-```toml
-windsock = { git = "https://github.com/shotover/shotover-proxy" }
-```
-
-And then implement the crate like this (simplified):
-
-```rust
-fn main() {
-    // Define our benchmarks and give them to windsock
-    Windsock::new(vec![
-        Box::new(CassandraBench { topology: Topology::Cluster3 }),
-        Box::new(CassandraBench { topology: Topology::Single })
-    ])
-    // Hand control of the app over to windsock
-    // Windsock processes CLI args, possibly running benchmarks and then terminates.
-    .run();
-}
-
-pub struct CassandraBench { topology: Topology }
-
-#[async_trait]
-impl Bench for CassandraBench {
-    // define tags that windsock will use to filter and name the benchmark instance
-    fn tags(&self) -> HashMap<String, String> {
-        [
-            ("name".to_owned(), "cassandra".to_owned()),
-            (
-                "topology".to_owned(),
-                match &self.topology {
-                    Topology::Single => "single".to_owned(),
-                    Topology::Cluster3 => "cluster3".to_owned(),
-                },
-            ),
-        ]
-        .into_iter()
-        .collect()
-    }
-
-    // the benchmark logic for this benchmark instance
-    async fn run(&self, runtime_seconds: usize, operations_per_second: Option<u64>, reporter: UnboundedSender<Report>) {
-        // bring up the DB
-        let _handle = init_cassandra();
-
-        // create the DB driver session
-        let session = init_session().await;
-
-        // spawn tokio tasks to concurrently hit the database
-        // The exact query is defined in `run_one_operation` below
-        BenchTaskCassandra { session }.spawn_tasks(reporter.clone(), operations_per_second).await;
-
-        // tell windsock to begin benchmarking
-        reporter.send(Report::Start).unwrap();
-        let start = Instant::now();
-
-        // run the bench for the time requested by the user on the CLI (defaults to 15s)
-        tokio::time::sleep(Duration::from_secs(runtime_seconds)).await;
-
-        // tell windsock to finalize the benchmark
-        reporter.send(Report::FinishedIn(start.elapsed())).unwrap();
-    }
-}
-
-// This struct is cloned once for each tokio task it will be run in.
-#[derive(Clone)]
-struct BenchTaskCassandra {
-    session: Arc<Session>,
-}
-
-#[async_trait]
-impl BenchTask for BenchTaskCassandra {
-    async fn run_one_operation(&self) -> Result<(), String> {
-        self.session.query("SELECT * FROM table").await
-    }
-}
-```
-
-**TODO:** document running windsock as both a standalone crate and as a cargo bench.
-
-This example is simplified for demonstration purposes, refer to `examples/cassandra.rs` for a full working example.
-
-## Running benches
-
-Then we run our crate to run the benchmarks and view results like:
-
-```none
-> cargo windsock run-local
-... benchmark running logs
-> cargo windsock results
-Results for cassandra
-           topology   ──single ─cluster3
-Measurements ═══════════════════════════
-   Operations Total     750762    372624
- Operations Per Sec      83418     41403
-             Min       0.255ms   0.229ms
-               1       0.389ms   0.495ms
-               2       0.411ms   0.571ms
-               5       0.460ms   0.714ms
-              10       0.567ms   0.876ms
-              25       1.131ms   1.210ms
-              50       1.306ms   1.687ms
-              75       1.519ms   2.600ms
-              90       1.763ms   4.881ms
-              95       2.132ms   7.542ms
-              98       2.588ms  14.008ms
-              99       2.951ms  19.297ms
-              99.9     7.952ms  40.896ms
-              99.99   25.559ms  80.692ms
-```
-
-TODO: make this into a comparison to make it more flashy and use an image to include the coloring
-
-and graphs: TODO
-
-## How to perform various tasks in windsock
-
-### Just run every bench
-
-```shell
-> cargo windsock run-local
-```
-
-### Run benches with matching tags and view all the results in one table
-
-```shell
-> cargo windsock run-local db=kafka OPS=1000 topology=single # run benchmarks matching some tags
-> cargo windsock results # view the results of the benchmarks with the same tags in a single table
-```
-
-### Iteratively compare results against a previous implementation
-
-```shell
-> git checkout main # checkout original implementation
-> cargo windsock run-local # run all benchmarks
-> cargo windsock baseline-set # set the last benchmark run as the baseline
-> vim src/main.rs # modify implementation
-> cargo windsock run-local # run all benchmarks, every result is compared against the baseline
-> cargo windsock results # view those results in a nice table
-> vim src/main.rs # modify implementation again
-> cargo windsock run-local # run all benchmarks, every result is compared against the baseline
-```
-
-### Run benchmarks in the cloud (simple)
-
-```shell
-# create cloud resources, run benchmarks and then cleanup - all in one command
-> cargo windsock cloud-setup-run-cleanup
-```
-
-### Iteratively compare results against a previous implementation (running in a remote cloud)
-
-```shell
-# Setup the cloud resources and then form a baseline
-> git checkout main # checkout original implementation
-> cargo windsock cloud-setup db=kafka # setup the cloud resources required to run all kafka benchmarks
-> cargo windsock cloud-run db=kafka # run all the kafka benchmarks in the cloud
-> cargo windsock baseline-set # set the last benchmark run as the baseline
-
-# Make a change and and measure the effect
-> vim src/main.rs # modify implementation
-> cargo windsock cloud-run db=kafka # run all benchmarks, every result is compared against the baseline
-> cargo windsock results # view those results in a nice table, compared against the baseline
-
-# And again
-> vim src/main.rs # modify implementation again
-> cargo windsock cloud-run db=kafka # run all benchmarks, every result is compared against the baseline
-
-# And finally...
-> cargo windsock cloud-cleanup # Terminate all the cloud resources now that we are done
-```
-
-### Generate graph webpage
-
-TODO: not yet implemented
-
-```shell
-> cargo windsock local-run # run all benches
-> cargo windsock generate-webpage # generate a webpage from the results
-```
diff --git a/windsock/src/bench.rs b/windsock/src/bench.rs
deleted file mode 100644
index fa8fe1ce4..000000000
--- a/windsock/src/bench.rs
+++ /dev/null
@@ -1,343 +0,0 @@
-use crate::cli::RunArgs;
-use crate::report::{report_builder, Report, ReportArchive};
-use crate::tables::ReportColumn;
-use anyhow::Result;
-use async_trait::async_trait;
-use serde::{Deserialize, Serialize};
-use std::collections::{HashMap, HashSet};
-use std::fmt::Write;
-use std::path::PathBuf;
-use std::time::{Duration, Instant};
-use tokio::sync::mpsc::UnboundedSender;
-use tokio::task::JoinHandle;
-
-pub struct BenchState<ResourcesRequired, Resources> {
-    bench: Box<dyn Bench<CloudResourcesRequired = ResourcesRequired, CloudResources = Resources>>,
-    pub(crate) tags: Tags,
-    pub(crate) supported_profilers: Vec<String>,
-}
-
-impl<ResourcesRequired, Resources> BenchState<ResourcesRequired, Resources> {
-    pub fn new(
-        bench: Box<
-            dyn Bench<CloudResourcesRequired = ResourcesRequired, CloudResources = Resources>,
-        >,
-    ) -> Self {
-        let tags = Tags(bench.tags());
-        let supported_profilers = bench.supported_profilers();
-        BenchState {
-            bench,
-            tags,
-            supported_profilers,
-        }
-    }
-
-    pub async fn orchestrate(
-        &mut self,
-        args: &RunArgs,
-        running_in_release: bool,
-        cloud_resources: Option<Resources>,
-    ) {
-        let name = self.tags.get_name();
-        println!("Running {:?}", name);
-
-        let profilers_to_use = args.profilers.clone();
-        let results_path = if !profilers_to_use.is_empty() {
-            let path = crate::data::windsock_path()
-                .join("profiler_results")
-                .join(&name);
-            std::fs::create_dir_all(&path).unwrap();
-            path
-        } else {
-            PathBuf::new()
-        };
-
-        if let Some(cloud_resources) = cloud_resources {
-            self.bench
-                .orchestrate_cloud(
-                    cloud_resources,
-                    running_in_release,
-                    Profiling {
-                        results_path,
-                        profilers_to_use,
-                    },
-                    BenchParameters::from_args(args),
-                )
-                .await
-                .unwrap();
-        } else {
-            self.bench
-                .orchestrate_local(
-                    running_in_release,
-                    Profiling {
-                        results_path,
-                        profilers_to_use,
-                    },
-                    BenchParameters::from_args(args),
-                )
-                .await
-                .unwrap();
-        }
-
-        crate::tables::display_results_table(&[ReportColumn {
-            baseline: ReportArchive::load_baseline(&name).unwrap(),
-            current: ReportArchive::load(&name).unwrap(),
-        }]);
-    }
-
-    pub async fn run(&mut self, args: &RunArgs, running_in_release: bool, resources: &str) {
-        let (tx, rx) = tokio::sync::mpsc::unbounded_channel();
-        let process = tokio::spawn(report_builder(
-            self.tags.clone(),
-            rx,
-            args.operations_per_second,
-            running_in_release,
-        ));
-
-        self.bench
-            .run_bencher(resources, BenchParameters::from_args(args), tx)
-            .await;
-
-        process.await.unwrap();
-    }
-
-    // TODO: will return None when running in non-local setup
-    pub fn cores_required(&self) -> Option<usize> {
-        Some(self.bench.cores_required())
-    }
-
-    pub fn required_cloud_resources(&self) -> ResourcesRequired {
-        self.bench.required_cloud_resources()
-    }
-}
-
-/// Implement this to define your benchmarks
-/// A single implementation of `Bench` can represent multiple benchmarks by initializing it multiple times with different state that returns unique tags.
-#[async_trait]
-pub trait Bench {
-    type CloudResourcesRequired;
-    type CloudResources;
-
-    /// Returns tags that are used for forming comparisons, graphs and naming the benchmark
-    fn tags(&self) -> HashMap<String, String>;
-
-    /// Returns the names of profilers that this bench can be run with
-    fn supported_profilers(&self) -> Vec<String> {
-        vec![]
-    }
-
-    /// Specifies the cloud resources that should be provided to this bench
-    fn required_cloud_resources(&self) -> Self::CloudResourcesRequired {
-        unimplemented!("To support running in cloud this bench needs to implement `Bench::required_cloud_resources`");
-    }
-
-    /// How many cores to assign the async runtime in which the bench runs.
-    fn cores_required(&self) -> usize {
-        1
-    }
-
-    /// Windsock will call this method to orchestrate the bench in cloud mode.
-    /// It must setup cloud resources to run the bench in a cloud and then start the bench returning the results on conclusion
-    async fn orchestrate_cloud(
-        &self,
-        cloud: Self::CloudResources,
-        running_in_release: bool,
-        profiling: Profiling,
-        bench_parameters: BenchParameters,
-    ) -> Result<()>;
-
-    /// Windsock will call this method to orchestrate the bench in local mode.
-    /// It must setup local resources to run the bench locally and then start the bench returning the results on conclusion
-    async fn orchestrate_local(
-        &self,
-        running_in_release: bool,
-        profiling: Profiling,
-        bench_parameters: BenchParameters,
-    ) -> Result<()>;
-
-    /// Windsock will call this method to run the bencher.
-    /// But the implementation of `orchestrate_local` or `orchestrate_cloud` must run the bencher through windsock in some way.
-    /// This will be:
-    /// * In the case of `orchestrate_cloud`, the windsock binary must be uploaded to a cloud VM and `windsock --internal-run` executed there.
-    /// * In the case of `orchestrate_local`, call `Bench::execute_run` to indirectly call this method by executing another instance of the windsock executable.
-    ///
-    /// The `resources` arg is a string that is passed in from the argument to `--internal-run` or at a higher level the argument to `Bench::execute_run`.
-    /// Use this string to instruct the bencher where to find the resources it needs. e.g. the IP address of the DB to benchmark.
-    /// To pass in multiple resources it is recommended to use a serialization method such as `serde-json`.
-    async fn run_bencher(
-        &self,
-        resources: &str,
-        bench_parameters: BenchParameters,
-        reporter: UnboundedSender<Report>,
-    );
-
-    /// Call within `Bench::orchestrate_local` to call `Bench::run`
-    async fn execute_run(&self, resources: &str, bench_parameters: &BenchParameters) {
-        let name_and_resources = format!("{} {}", self.name(), resources);
-        let output = tokio::process::Command::new(std::env::current_exe().unwrap().as_os_str())
-            .args(run_args_vec(name_and_resources, bench_parameters))
-            .output()
-            .await
-            .unwrap();
-        if !output.status.success() {
-            let stdout = String::from_utf8(output.stdout).unwrap();
-            let stderr = String::from_utf8(output.stderr).unwrap();
-            panic!("Bench run failed:\nstdout:\n{stdout}\nstderr:\n{stderr}")
-        }
-    }
-
-    /// Call within `Bench::orchestrate_cloud` to determine how to invoke the uploaded windsock executable
-    fn run_args(&self, resources: &str, bench_parameters: &BenchParameters) -> String {
-        let name_and_resources = format!("\"{} {}\"", self.name(), resources);
-        run_args_vec(name_and_resources, bench_parameters).join(" ")
-    }
-
-    fn name(&self) -> String {
-        Tags(self.tags()).get_name()
-    }
-}
-
-fn run_args_vec(name_and_resources: String, bench_parameters: &BenchParameters) -> Vec<String> {
-    let mut args = vec![];
-    args.push("internal-run".to_owned());
-    args.push("--bench-length-seconds".to_owned());
-    args.push(bench_parameters.runtime_seconds.to_string());
-
-    if let Some(ops) = bench_parameters.operations_per_second {
-        args.push("--operations-per-second".to_owned());
-        args.push(ops.to_string());
-    };
-
-    args.push(name_and_resources);
-
-    args
-}
-
-pub struct BenchParameters {
-    pub runtime_seconds: u32,
-    pub operations_per_second: Option<u64>,
-}
-
-impl BenchParameters {
-    fn from_args(args: &RunArgs) -> Self {
-        BenchParameters {
-            runtime_seconds: args.bench_length_seconds.unwrap_or(15),
-            operations_per_second: args.operations_per_second,
-        }
-    }
-}
-
-pub struct Profiling {
-    pub results_path: PathBuf,
-    pub profilers_to_use: Vec<String>,
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub(crate) struct Tags(pub HashMap<String, String>);
-
-impl Tags {
-    pub fn get_name(&self) -> String {
-        let mut result = String::new();
-
-        let mut tags: Vec<(&String, &String)> = self.0.iter().collect();
-        tags.sort_by_key(|x| x.0);
-        for (key, value) in tags {
-            if !result.is_empty() {
-                write!(result, ",").unwrap();
-            }
-            write!(result, "{key}={value}").unwrap();
-        }
-        result
-    }
-
-    /// Does not handle invalid names, only use on internally generated names
-    pub fn from_name(name: &str) -> Self {
-        let mut map = HashMap::new();
-        for tag in name.split(',') {
-            if tag.contains('=') {
-                let mut pair = tag.split('=');
-                let key = pair.next().unwrap().to_owned();
-                let value = pair.next().unwrap().to_owned();
-                map.insert(key, value);
-            } else {
-                panic!("tag without an '=' was found")
-            }
-        }
-        Tags(map)
-    }
-
-    /// returns the set wise intersection of two `Tags`s
-    pub(crate) fn intersection(&self, other: &Tags) -> Self {
-        let mut intersection = HashMap::new();
-        for (key, value) in &self.0 {
-            if other.0.get(key).map(|x| x == value).unwrap_or(false) {
-                intersection.insert(key.clone(), value.clone());
-            }
-        }
-        Tags(intersection)
-    }
-
-    pub(crate) fn keys(&self) -> HashSet<String> {
-        self.0.keys().cloned().collect()
-    }
-}
-
-/// An optional helper trait for defining benchmarks.
-/// Usually you have an async rust DB driver that you need to call across multiple tokio tasks
-/// This helper will spawn these tasks and send the required `Report::QueryCompletedIn`.
-///
-/// To use this helper:
-///  1. implement `BenchTask` for a struct that contains the required db resources
-///  2. have run_one_operation use those resources to perform a single operation
-///  3. call spawn_tasks on an instance of BenchTask, it will clone your BenchTask instance once for each task it generates
-#[async_trait]
-pub trait BenchTask: Clone + Send + Sync + 'static {
-    async fn run_one_operation(&self) -> Result<(), String>;
-
-    async fn spawn_tasks(
-        &self,
-        reporter: UnboundedSender<Report>,
-        operations_per_second: Option<u64>,
-    ) -> Vec<JoinHandle<()>> {
-        let mut tasks = vec![];
-        // 100 is a generally nice amount of tasks to have, but if we have more tasks than OPS the throughput is very unstable
-        let task_count = operations_per_second.map(|x| x.min(100)).unwrap_or(100);
-
-        let allocated_time_per_op = operations_per_second
-            .map(|ops| (Duration::from_secs(1) * task_count as u32) / ops as u32);
-        for i in 0..task_count {
-            let task = self.clone();
-            let reporter = reporter.clone();
-            tasks.push(tokio::spawn(async move {
-                // spread load out over a second
-                tokio::time::sleep(Duration::from_nanos((1_000_000_000 / task_count) * i)).await;
-
-                let mut interval = allocated_time_per_op.map(tokio::time::interval);
-
-                loop {
-                    if let Some(interval) = &mut interval {
-                        interval.tick().await;
-                    }
-
-                    let operation_start = Instant::now();
-                    let report = match task.run_one_operation().await {
-                        Ok(()) => Report::QueryCompletedIn(operation_start.elapsed()),
-                        Err(message) => Report::QueryErrored {
-                            completed_in: operation_start.elapsed(),
-                            message,
-                        },
-                    };
-                    if reporter.send(report).is_err() {
-                        // The benchmark has completed and the reporter no longer wants to receive reports so just shutdown
-                        return;
-                    }
-                }
-            }));
-        }
-
-        // sleep until all tasks have started running
-        tokio::time::sleep(Duration::from_secs(1)).await;
-
-        tasks
-    }
-}
diff --git a/windsock/src/cli.rs b/windsock/src/cli.rs
deleted file mode 100644
index a5e09e32b..000000000
--- a/windsock/src/cli.rs
+++ /dev/null
@@ -1,198 +0,0 @@
-use anyhow::{anyhow, Error};
-use clap::{Args, Parser, Subcommand};
-
-const ABOUT: &str = r#"Bench Names:
-    Each benchmark has a unique name, this name is used by many options listed below.
-    The name is derived from an alphabetical sorting of its tags so you wont find it directly in the bench
-    implementation but it will be listed in the `list` command.
-
-Tag Filters:
-    Many options below take tag filters that specify which benches to include.
-    Tag filters specify which benches to include and the filter results are unioned.
-
-    So:
-    * The filter "foo=some_value" will include only benches with the tag key `foo` and the tag value `some_value`
-    * The filter "foo=some_value bar=another_value" will include only benches that match "foo=some_value" and "bar=another_value"
-    * The filter "" will include all benches
-
-    A filters tags can also be separated by commas allowing names to function as filters.
-    So: foo=some_value,bar=another_value is a name but it can also be used where a filter is accepted."#;
-
-#[derive(Subcommand, Clone)]
-pub enum Command {
-    /// List the name of every bench.
-    #[clap(verbatim_doc_comment)]
-    List,
-
-    /// Create cloud resources for running benches
-    #[clap(verbatim_doc_comment)]
-    CloudSetup {
-        /// e.g. "db=kafka connection_count=100"
-        #[clap(verbatim_doc_comment)]
-        filter: String,
-    },
-
-    /// Run benches in the cloud using the resources created by cloud-setup
-    #[clap(verbatim_doc_comment)]
-    CloudRun(RunArgs),
-
-    /// cleanup cloud resources created by cloud-setup
-    /// Make sure to call this when your benchmarking session is finished!
-    #[clap(verbatim_doc_comment)]
-    CloudCleanup,
-
-    /// cloud-setup, cloud-run and cloud-cleanup combined into a single command.
-    /// Convenient for getting a quick understanding of performance.
-    /// However, if you are performing optimization work prefer the individual commands as you will get:
-    /// * more stable results (same cloud instance)
-    /// * faster results (skip recreating and destroying cloud resources)
-    #[clap(verbatim_doc_comment)]
-    CloudSetupRunCleanup(RunArgs),
-
-    /// Run benches entirely on your local machine
-    #[clap(verbatim_doc_comment)]
-    LocalRun(RunArgs),
-
-    /// The results of the last benchmarks run becomes the new baseline from which future benchmark runs will be compared.
-    #[clap(verbatim_doc_comment)]
-    BaselineSet,
-
-    /// Removes the stored baseline. Following runs will no longer compare against a baseline.
-    #[clap(verbatim_doc_comment)]
-    BaselineClear,
-
-    /// Generate graphs webpage from the last benchmarks run.
-    #[clap(verbatim_doc_comment)]
-    GenerateWebpage,
-
-    /// Display results from the last benchmark run by:
-    ///     Listing bench results matching tag filters.
-    ///
-    /// Usage: Provide tag filters
-    #[clap(verbatim_doc_comment)]
-    // TODO: get trailing_var_arg(true) working so user can avoid having to wrap in ""
-    Results {
-        /// Do not compare against the set baseline.
-        #[clap(long, verbatim_doc_comment)]
-        ignore_baseline: bool,
-
-        /// e.g. "db=kafka connection_count=100"
-        #[clap(verbatim_doc_comment)]
-        filter: Option<String>,
-    },
-
-    /// Display results from the last benchmark run by:
-    ///     Comparing various benches against a specific base bench.
-    ///
-    /// Usage: First provide the base benchmark name then provide benchmark names to compare against the base.
-    ///     "base_name other_name1 other_name2"
-    #[clap(verbatim_doc_comment)]
-    CompareByName { filter: String },
-
-    /// Display results from the last benchmark run by:
-    ///     Comparing benches matching tag filters against a specific base bench.
-    ///
-    /// Usage: First provide the base benchmark name then provide tag filters
-    ///     "base_name db=kafka connection_count=10"
-    #[clap(verbatim_doc_comment)]
-    CompareByTags { filter: String },
-
-    /// Not for human use. Call this from your bench orchestration method to launch your bencher.
-    #[clap(verbatim_doc_comment)]
-    InternalRun(RunArgs),
-}
-
-#[derive(Args, Clone)]
-pub struct RunArgs {
-    /// Instruct benches to profile the application under test with the specified profilers.
-    /// Benches that do not support the specified profilers will be skipped.
-    #[clap(long, verbatim_doc_comment, value_delimiter = ',')]
-    pub profilers: Vec<String>,
-
-    /// How long in seconds to run each bench for.
-    /// By default benches will run for 15 seconds.
-    #[clap(long, verbatim_doc_comment)]
-    pub bench_length_seconds: Option<u32>,
-
-    /// Instruct the benches to cap their operations per second to the specified amount.
-    /// By default the benches will run with unlimited operations per second.
-    #[clap(long, verbatim_doc_comment)]
-    pub operations_per_second: Option<u64>,
-
-    /// Run all benches that match the specified tag key/values.
-    /// `tag_key=tag_value foo=bar`
-    #[clap(verbatim_doc_comment)]
-    pub filter: Option<String>,
-}
-
-impl RunArgs {
-    pub fn filter(&self) -> String {
-        match &self.filter {
-            // convert a name into a filter by swapping commas for spaces
-            Some(filter) => filter.replace(',', " "),
-            // If not provided use the empty filter
-            None => String::new(),
-        }
-    }
-}
-
-#[derive(Parser)]
-#[clap(about=ABOUT)]
-pub struct WindsockArgs {
-    #[command(subcommand)]
-    pub command: Option<Command>,
-
-    #[clap(long, hide(true))]
-    list: bool,
-
-    #[clap(long, hide(true))]
-    format: Option<NextestFormat>,
-
-    #[clap(long, hide(true))]
-    ignored: bool,
-
-    #[clap(long, hide(true))]
-    pub exact: Option<String>,
-
-    #[clap(long, hide(true))]
-    nocapture: bool,
-}
-
-#[derive(clap::ValueEnum, Clone, Copy)]
-enum NextestFormat {
-    Terse,
-}
-
-impl WindsockArgs {
-    pub fn nextest_list(&self) -> bool {
-        self.list
-    }
-
-    pub fn nextest_list_all(&self) -> bool {
-        self.list && matches!(&self.format, Some(NextestFormat::Terse)) && !self.ignored
-    }
-
-    pub fn nextest_list_ignored(&self) -> bool {
-        self.list && matches!(&self.format, Some(NextestFormat::Terse)) && self.ignored
-    }
-
-    pub fn nextest_run_by_name(&self) -> Option<&str> {
-        if self.nocapture {
-            self.exact.as_deref()
-        } else {
-            None
-        }
-    }
-
-    pub fn nextest_invalid_args(&self) -> Option<Error> {
-        if self.format.is_some() && self.list {
-            Some(anyhow!("`--format` only exists for nextest compatibility and is not supported without `--list`"))
-        } else if self.nocapture && self.exact.is_none() {
-            Some(anyhow!("`--nocapture` only exists for nextest compatibility and is not supported without `--exact`"))
-        } else if self.exact.is_some() && !self.nocapture {
-            Some(anyhow!("`--exact` only exists for nextest compatibility and is not supported without `--nocapture`"))
-        } else {
-            None
-        }
-    }
-}
diff --git a/windsock/src/cloud.rs b/windsock/src/cloud.rs
deleted file mode 100644
index 82b4dd379..000000000
--- a/windsock/src/cloud.rs
+++ /dev/null
@@ -1,102 +0,0 @@
-use std::path::Path;
-
-use async_trait::async_trait;
-
-/// Implement this to give windsock some control over your cloud.
-/// Currently the only thing windsock needs is the ability to cleanup resources since resource creation should happen within your own benches.
-#[async_trait(?Send)]
-pub trait Cloud {
-    /// Each bench creates an instance of this to provide its resource requirements
-    type CloudResourcesRequired;
-    /// This is given to the benches and contains methods or data required to access the instances.
-    type CloudResources;
-
-    /// Cleanup all cloud resources created by windsock.
-    /// You should destroy not just resources created during this bench run but also resources created in past bench runs that might have missed cleanup due to a panic.
-    async fn cleanup_resources(&mut self);
-
-    /// This is called once at start up before running any benches.
-    /// The implementation must return an object containing all the requested cloud resources.
-    /// The `required_resources` contains the `CloudResourcesRequired` returned by each bench that will be executed in this run.
-    ///
-    /// benches_will_run:
-    /// * true  - the benches will be run, ensure they have everything they need to complete succesfully.
-    /// * false - the benches will not be run, due to `--store-cloud-resources-file`, you can skip uploading anything that will be reuploaded when restoring the resources
-    async fn create_resources(
-        &mut self,
-        required_resources: Vec<Self::CloudResourcesRequired>,
-        benches_will_run: bool,
-    ) -> Self::CloudResources;
-
-    /// Construct a file at the provided path that will allow restoring the passed resources
-    ///
-    /// It is gauranteed this will be called after all the benches have completed.
-    async fn store_resources_file(&mut self, path: &Path, resources: Self::CloudResources);
-
-    /// Restore the resources from the data in the passed file.
-    /// It is the same file path that was passed to [`Cloud::store_resources_file`]
-    ///
-    /// The implementation should panic when the loaded messages cannot meet the requirements of the passed `required_sources`.
-    /// This is done rather than loading the required resources from disk as this case usually represents a user error.
-    /// Loading from disk is used for more consistent results across benches but the user cannot hope to get consistent results while changing the benches that will be run.
-    /// They are better off recreating the resources from scratch in this case.
-    async fn load_resources_file(
-        &mut self,
-        path: &Path,
-        required_resources: Vec<Self::CloudResourcesRequired>,
-    ) -> Self::CloudResources;
-
-    /// This is called once at start up before running any benches.
-    /// The returned Vec specifies the order in which to run benches.
-    fn order_benches(
-        &mut self,
-        benches: Vec<BenchInfo<Self::CloudResourcesRequired>>,
-    ) -> Vec<BenchInfo<Self::CloudResourcesRequired>> {
-        benches
-    }
-
-    /// This is called before running each bench.
-    /// Use it to destroy or create resources as needed.
-    /// However, this method will not be called when `--save-resources-file` or `--load-resources-file` is set.
-    ///
-    /// It is recommended to create all resources within create_resources for faster completion time, but it may be desirable in some circumstances to create some of them here.
-    /// It is recommended to always destroy resources that will never be used again here.
-    async fn adjust_resources(
-        &mut self,
-        _benches: &[BenchInfo<Self::CloudResourcesRequired>],
-        _bench_index: usize,
-        _resources: &mut Self::CloudResources,
-    ) {
-    }
-}
-
-pub struct BenchInfo<CloudResourceRequest> {
-    pub name: String,
-    pub resources: CloudResourceRequest,
-}
-
-/// A dummy cloud instance for when the user isnt using windsock cloud functionality
-pub struct NoCloud;
-impl NoCloud {
-    pub fn new_boxed() -> Box<dyn Cloud<CloudResourcesRequired = (), CloudResources = ()>>
-    where
-        Self: Sized,
-    {
-        Box::new(NoCloud)
-    }
-}
-
-#[async_trait(?Send)]
-impl Cloud for NoCloud {
-    type CloudResourcesRequired = ();
-    type CloudResources = ();
-    async fn cleanup_resources(&mut self) {}
-    async fn create_resources(&mut self, _requests: Vec<()>, _benches_will_run: bool) {}
-    async fn store_resources_file(&mut self, _path: &Path, _resources: ()) {}
-    async fn load_resources_file(
-        &mut self,
-        _path: &Path,
-        _required_resources: Vec<Self::CloudResourcesRequired>,
-    ) {
-    }
-}
diff --git a/windsock/src/data.rs b/windsock/src/data.rs
deleted file mode 100644
index 76242e92c..000000000
--- a/windsock/src/data.rs
+++ /dev/null
@@ -1,18 +0,0 @@
-use std::path::PathBuf;
-
-pub fn windsock_path() -> PathBuf {
-    // If we are run via cargo (we are in a target directory) use the target directory for storage.
-    // Otherwise just fallback to the current working directory.
-    let mut path = std::env::current_exe().unwrap();
-    while path.pop() {
-        if path.file_name().map(|x| x == "target").unwrap_or(false) {
-            return path.join("windsock_data");
-        }
-    }
-
-    PathBuf::from("windsock_data")
-}
-
-pub fn cloud_resources_path() -> PathBuf {
-    windsock_path().join("cloud_resources")
-}
diff --git a/windsock/src/filter.rs b/windsock/src/filter.rs
deleted file mode 100644
index 5d3e730c9..000000000
--- a/windsock/src/filter.rs
+++ /dev/null
@@ -1,50 +0,0 @@
-use crate::bench::Tags;
-use anyhow::{anyhow, Result};
-
-struct FilterTag {
-    key: String,
-    values: Vec<String>,
-}
-
-pub(crate) struct Filter {
-    filter: Vec<FilterTag>,
-}
-
-impl Filter {
-    pub(crate) fn from_query(query: &str) -> Result<Filter> {
-        let mut filter = vec![];
-        for pair in query.split_whitespace() {
-            let mut iter = pair.split('=');
-            let key = iter.next().unwrap().to_owned();
-            let values = match iter.next() {
-                Some(rhs) => rhs.split('|').map(|x| x.to_owned()).collect(),
-                None => {
-                    return Err(anyhow!(
-                        "Expected exactly one '=' but found no '=' in tag {pair:?}"
-                    ))
-                }
-            };
-            if iter.next().is_some() {
-                return Err(anyhow!(
-                    "Expected exactly one '=' but found multiple '=' in tag {pair:?}"
-                ));
-            }
-            filter.push(FilterTag { key, values })
-        }
-        Ok(Filter { filter })
-    }
-
-    pub(crate) fn matches(&self, tags: &Tags) -> bool {
-        for FilterTag { key, values } in &self.filter {
-            match tags.0.get(key) {
-                Some(check_value) => {
-                    if !values.contains(check_value) {
-                        return false;
-                    }
-                }
-                None => return false,
-            }
-        }
-        true
-    }
-}
diff --git a/windsock/src/lib.rs b/windsock/src/lib.rs
deleted file mode 100644
index 1096e309c..000000000
--- a/windsock/src/lib.rs
+++ /dev/null
@@ -1,328 +0,0 @@
-mod bench;
-mod cli;
-pub mod cloud;
-mod data;
-mod filter;
-mod list;
-mod report;
-mod tables;
-
-pub use bench::{Bench, BenchParameters, BenchTask, Profiling};
-use data::cloud_resources_path;
-pub use report::{
-    ExternalReport, LatencyPercentile, Metric, OperationsReport, PubSubReport, Report,
-    ReportArchive,
-};
-pub use tables::Goal;
-
-use anyhow::{anyhow, Result};
-use bench::BenchState;
-use clap::{CommandFactory, Parser};
-use cli::{Command, RunArgs, WindsockArgs};
-use cloud::{BenchInfo, Cloud};
-use filter::Filter;
-use std::process::exit;
-use tokio::runtime::Runtime;
-
-pub struct Windsock<ResourcesRequired, Resources> {
-    benches: Vec<BenchState<ResourcesRequired, Resources>>,
-    cloud: Box<dyn Cloud<CloudResourcesRequired = ResourcesRequired, CloudResources = Resources>>,
-    running_in_release: bool,
-}
-
-impl<ResourcesRequired: Clone, Resources: Clone> Windsock<ResourcesRequired, Resources> {
-    /// The benches will be run and filtered out according to the CLI arguments
-    ///
-    /// Run order:
-    /// * Locally: The benches that are run will always be done so in the order they are listed, this allows tricks to avoid recreating DB's for every bench.
-    ///      e.g. the database handle can be put behind a mutex and only resetup when actually neccessary
-    /// * Cloud: The benches will be run in an order optimized according to its required cloud resources.
-    ///
-    /// `release_profiles` specifies which cargo profiles Windsock will run under, if a different profile is used windsock will refuse to run.
-    pub fn new(
-        benches: Vec<
-            Box<dyn Bench<CloudResourcesRequired = ResourcesRequired, CloudResources = Resources>>,
-        >,
-        cloud: Box<
-            dyn Cloud<CloudResourcesRequired = ResourcesRequired, CloudResources = Resources>,
-        >,
-        release_profiles: &[&str],
-    ) -> Self {
-        let running_in_release = release_profiles.contains(&env!("PROFILE"));
-
-        Windsock {
-            benches: benches.into_iter().map(BenchState::new).collect(),
-            cloud,
-            running_in_release,
-        }
-    }
-
-    // Hands control of the process over to windsock, this method will never return
-    // Windsock processes CLI arguments and then runs benchmarks as instructed by the user.
-    pub fn run(self) -> ! {
-        match self.run_inner() {
-            Ok(()) => exit(0),
-            Err(err) => {
-                eprintln!("{:?}", err);
-                exit(1);
-            }
-        }
-    }
-
-    fn run_inner(mut self) -> Result<()> {
-        let args = WindsockArgs::parse();
-
-        let running_in_release = self.running_in_release;
-        if let Some(command) = args.command {
-            match command {
-                Command::List => list::list(&self.benches),
-                Command::BaselineSet => {
-                    ReportArchive::set_baseline();
-                    println!("Baseline set");
-                }
-                Command::BaselineClear => {
-                    ReportArchive::clear_baseline();
-                    println!("Baseline cleared");
-                }
-                Command::GenerateWebpage => {
-                    println!("Webpage generation is not implemented yet!")
-                }
-                Command::Results {
-                    ignore_baseline,
-                    filter,
-                } => tables::results(
-                    ignore_baseline,
-                    &filter.unwrap_or_default().replace(',', " "),
-                )?,
-                Command::CompareByName { filter } => tables::compare_by_name(&filter)?,
-                Command::CompareByTags { filter } => tables::compare_by_tags(&filter)?,
-                Command::CloudSetup { filter } => {
-                    create_runtime(None).block_on(self.cloud_setup(filter))?
-                }
-                Command::CloudRun(args) => {
-                    create_runtime(None).block_on(self.cloud_run(args, running_in_release))?;
-                }
-                Command::CloudCleanup => {
-                    create_runtime(None).block_on(self.cloud_cleanup());
-                }
-                Command::CloudSetupRunCleanup(args) => {
-                    create_runtime(None)
-                        .block_on(self.cloud_setup_run_cleanup(args, running_in_release))?;
-                }
-                Command::LocalRun(args) => {
-                    create_runtime(None).block_on(self.local_run(args, running_in_release))?;
-                }
-                Command::InternalRun(args) => self.internal_run(&args, running_in_release)?,
-            }
-        } else if args.nextest_list() {
-            list::nextest_list(&args, &self.benches);
-        } else if let Some(name) = args.nextest_run_by_name() {
-            create_runtime(None).block_on(self.run_nextest(name, running_in_release))?;
-        } else if let Some(err) = args.nextest_invalid_args() {
-            return Err(err);
-        } else {
-            WindsockArgs::command().print_help().unwrap();
-        }
-
-        Ok(())
-    }
-
-    async fn cloud_run(&mut self, args: RunArgs, running_in_release: bool) -> Result<()> {
-        let bench_infos = self.bench_infos(&args.filter(), &args.profilers)?;
-        let resources = self.load_cloud_from_disk(&bench_infos).await?;
-        self.run_filtered_benches_cloud(args, running_in_release, bench_infos, resources)
-            .await?;
-        println!("Cloud resources have not been cleaned up.");
-        println!("Make sure to use `cloud-cleanup` when you are finished with them.");
-        Ok(())
-    }
-
-    async fn cloud_setup_run_cleanup(
-        &mut self,
-        args: RunArgs,
-        running_in_release: bool,
-    ) -> Result<()> {
-        let bench_infos = self.bench_infos(&args.filter(), &args.profilers)?;
-        let resources = self.temp_setup_cloud(&bench_infos).await?;
-        self.run_filtered_benches_cloud(args, running_in_release, bench_infos, resources)
-            .await?;
-        self.cloud_cleanup().await;
-        Ok(())
-    }
-
-    fn internal_run(&mut self, args: &RunArgs, running_in_release: bool) -> Result<()> {
-        let name_and_resources = args
-            .filter
-            .as_ref()
-            .expect("Filter arg must be provided for internal-run");
-        let (name, resources) =
-            name_and_resources.split_at(name_and_resources.find(' ').unwrap() + 1);
-        let name = name.trim();
-        match self.benches.iter_mut().find(|x| x.tags.get_name() == name) {
-            Some(bench) => {
-                if args
-                    .profilers
-                    .iter()
-                    .all(|x| bench.supported_profilers.contains(x))
-                {
-                    create_runtime(bench.cores_required()).block_on(async {
-                        bench.run(args, running_in_release, resources).await;
-                    });
-                    Ok(())
-                } else {
-                    Err(anyhow!("Specified bench {name:?} was requested to run with the profilers {:?} but it only supports the profilers {:?}", args.profilers, bench.supported_profilers))
-                }
-            }
-            None => Err(anyhow!("Specified bench {name:?} does not exist.")),
-        }
-    }
-
-    async fn run_nextest(&mut self, name: &str, running_in_release: bool) -> Result<()> {
-        let args = RunArgs {
-            profilers: vec![],
-            // This is not a real bench we are just testing that it works,
-            // so set some really minimal runtime values
-            bench_length_seconds: Some(2),
-            operations_per_second: Some(100),
-            filter: Some(name.to_string()),
-        };
-
-        self.local_run(args, running_in_release).await
-    }
-
-    fn bench_infos(
-        &mut self,
-        filter: &str,
-        profilers_enabled: &[String],
-    ) -> Result<Vec<BenchInfo<ResourcesRequired>>> {
-        let filter = Filter::from_query(filter)
-            .map_err(|err| anyhow!("Failed to parse FILTER {filter:?}\n{err}"))?;
-        let mut bench_infos = vec![];
-        for bench in &mut self.benches {
-            if filter.matches(&bench.tags)
-                && profilers_enabled
-                    .iter()
-                    .all(|x| bench.supported_profilers.contains(x))
-            {
-                bench_infos.push(BenchInfo {
-                    resources: bench.required_cloud_resources(),
-                    name: bench.tags.get_name(),
-                });
-            }
-        }
-        Ok(self.cloud.order_benches(bench_infos))
-    }
-
-    async fn load_cloud_from_disk(
-        &mut self,
-        bench_infos: &[BenchInfo<ResourcesRequired>],
-    ) -> Result<Resources> {
-        if !bench_infos.is_empty() {
-            let resources = bench_infos.iter().map(|x| x.resources.clone()).collect();
-            Ok(self
-                .cloud
-                .load_resources_file(&cloud_resources_path(), resources)
-                .await)
-        } else {
-            Err(anyhow!("No benches found with the specified filter"))
-        }
-    }
-
-    async fn cloud_setup(&mut self, filter: String) -> Result<()> {
-        let bench_infos = self.bench_infos(&filter, &[])?;
-
-        let resources = if !bench_infos.is_empty() {
-            let resources = bench_infos.iter().map(|x| x.resources.clone()).collect();
-            self.cloud.create_resources(resources, false).await
-        } else {
-            return Err(anyhow!("No benches found with the specified filter"));
-        };
-
-        self.cloud
-            .store_resources_file(&cloud_resources_path(), resources)
-            .await;
-
-        println!(
-            "Cloud resources have been created in preparation for running the following benches:"
-        );
-        for bench in bench_infos {
-            println!("  {}", bench.name);
-        }
-        println!("Make sure to use `cloud-cleanup` when you are finished with these resources");
-
-        Ok(())
-    }
-
-    async fn temp_setup_cloud(
-        &mut self,
-        bench_infos: &[BenchInfo<ResourcesRequired>],
-    ) -> Result<Resources> {
-        let resources = if !bench_infos.is_empty() {
-            let resources = bench_infos.iter().map(|x| x.resources.clone()).collect();
-            self.cloud.create_resources(resources, true).await
-        } else {
-            return Err(anyhow!("No benches found with the specified filter"));
-        };
-
-        Ok(resources)
-    }
-
-    async fn run_filtered_benches_cloud(
-        &mut self,
-        args: RunArgs,
-        running_in_release: bool,
-        bench_infos: Vec<BenchInfo<ResourcesRequired>>,
-        mut resources: Resources,
-    ) -> Result<()> {
-        ReportArchive::clear_last_run();
-
-        for (i, bench_info) in bench_infos.iter().enumerate() {
-            for bench in &mut self.benches {
-                if bench.tags.get_name() == bench_info.name {
-                    self.cloud
-                        .adjust_resources(&bench_infos, i, &mut resources)
-                        .await;
-                    bench
-                        .orchestrate(&args, running_in_release, Some(resources.clone()))
-                        .await;
-                    break;
-                }
-            }
-        }
-
-        Ok(())
-    }
-
-    async fn cloud_cleanup(&mut self) {
-        std::fs::remove_file(cloud_resources_path()).ok();
-        self.cloud.cleanup_resources().await;
-    }
-
-    async fn local_run(&mut self, args: RunArgs, running_in_release: bool) -> Result<()> {
-        ReportArchive::clear_last_run();
-        let filter = args.filter();
-        let filter = Filter::from_query(&filter)
-            .map_err(|err| anyhow!("Failed to parse FILTER {:?}\n{err}", filter))?;
-
-        for bench in &mut self.benches {
-            if filter.matches(&bench.tags)
-                && args
-                    .profilers
-                    .iter()
-                    .all(|x| bench.supported_profilers.contains(x))
-            {
-                bench.orchestrate(&args, running_in_release, None).await;
-            }
-        }
-        Ok(())
-    }
-}
-
-fn create_runtime(worker_threads: Option<usize>) -> Runtime {
-    let mut runtime_builder = tokio::runtime::Builder::new_multi_thread();
-    runtime_builder.enable_all().thread_name("Windsock-Thread");
-    if let Some(worker_threads) = worker_threads {
-        runtime_builder.worker_threads(worker_threads);
-    }
-    runtime_builder.build().unwrap()
-}
diff --git a/windsock/src/list.rs b/windsock/src/list.rs
deleted file mode 100644
index 109297f65..000000000
--- a/windsock/src/list.rs
+++ /dev/null
@@ -1,26 +0,0 @@
-use crate::{bench::BenchState, cli::WindsockArgs};
-
-pub fn list<ResourcesRequired, Resources>(benches: &[BenchState<ResourcesRequired, Resources>]) {
-    // regular usage
-    println!("Benches:");
-    for bench in benches {
-        println!("{}", bench.tags.get_name());
-    }
-}
-
-pub fn nextest_list<ResourcesRequired, Resources>(
-    args: &WindsockArgs,
-    benches: &[BenchState<ResourcesRequired, Resources>],
-) {
-    if args.nextest_list_all() {
-        // list all windsock benches in nextest format
-        for bench in benches {
-            println!("{}: benchmark", bench.tags.get_name());
-        }
-    } else if args.nextest_list_ignored() {
-        // windsock does not support ignored tests
-    } else {
-        // in case the user accidentally runs `--list` just give them the regular `list` output.
-        list(benches);
-    }
-}
diff --git a/windsock/src/report.rs b/windsock/src/report.rs
deleted file mode 100644
index f31e04228..000000000
--- a/windsock/src/report.rs
+++ /dev/null
@@ -1,549 +0,0 @@
-use crate::{bench::Tags, data::windsock_path, Goal};
-use anyhow::{anyhow, Result};
-use serde::{Deserialize, Serialize};
-use std::{io::ErrorKind, path::PathBuf, time::Duration};
-use strum::{EnumCount, EnumIter, IntoEnumIterator};
-use time::OffsetDateTime;
-use tokio::sync::mpsc::UnboundedReceiver;
-
-#[derive(Debug, Serialize, Deserialize)]
-pub enum Report {
-    /// Indicates the warmup is over and the benchmark has begun.
-    /// Any Completed/Errored Events received before this are considered warmups and discarded.
-    Start,
-
-    /// Indicates a response came back from the service.
-    /// The Duration should be the time between the request being sent and the response being received
-    QueryCompletedIn(Duration),
-
-    /// Indicates an an error response came back from the service.
-    QueryErrored {
-        /// The time between the request being sent and the response being received
-        completed_in: Duration,
-        /// The error message received from the service or the local error that occured while trying to communicate with the service.
-        message: String,
-    },
-
-    /// Indicates a pubsub produce ack came back from the service.
-    /// The Duration should be the time between the request being sent and the response being received
-    ProduceCompletedIn(Duration),
-
-    /// Indicates a pubsub produce error response came back from the service.
-    ProduceErrored {
-        completed_in: Duration,
-        message: String,
-    },
-    /// Indicates a pubsub consume response comes back from the service.
-    ConsumeCompleted,
-
-    /// Indicates pubsub consume error response came back from the service.
-    ConsumeErrored { message: String },
-
-    /// Indicates a second has passed for the benchmarker
-    SecondPassed(Duration),
-
-    /// Contains the time that the test ran for
-    FinishedIn(Duration),
-
-    /// Adds a note that will be visible to the user when viewing the benchmark results.
-    AddInfoMessage(String),
-
-    /// Ignore all other reports and use the ManualReport as the only source of benchmark metrics.
-    /// Do not use this under normal circumstances.
-    /// Instead this should only be used if you have an independent benchmarker that you want to call from windsock and include in windsocks results.
-    ExternalBenchmark(Box<ExternalReport>),
-}
-
-#[derive(Clone, Debug, Serialize, Deserialize)]
-pub struct ExternalReport {
-    pub bench_started_at: OffsetDateTime,
-    pub operations_report: Option<OperationsReport>,
-    pub pubsub_report: Option<PubSubReport>,
-    pub error_messages: Vec<String>,
-}
-
-#[derive(EnumIter, EnumCount)]
-pub enum Percentile {
-    Min = 0,
-    P1,
-    P2,
-    P5,
-    P10,
-    P25,
-    P50,
-    P75,
-    P90,
-    P95,
-    P98,
-    P99,
-    P99_9,
-    P99_99,
-    Max,
-}
-
-impl Percentile {
-    pub fn value(&self) -> f64 {
-        match self {
-            Percentile::Min => 0.0,
-            Percentile::P1 => 0.01,
-            Percentile::P2 => 0.02,
-            Percentile::P5 => 0.05,
-            Percentile::P10 => 0.10,
-            Percentile::P25 => 0.25,
-            Percentile::P50 => 0.50,
-            Percentile::P75 => 0.75,
-            Percentile::P90 => 0.90,
-            Percentile::P95 => 0.95,
-            Percentile::P98 => 0.98,
-            Percentile::P99 => 0.99,
-            Percentile::P99_9 => 0.999,
-            Percentile::P99_99 => 0.9999,
-            Percentile::Max => 1.0,
-        }
-    }
-
-    pub fn name(&self) -> &'static str {
-        match self {
-            Percentile::Min => "Min   ",
-            Percentile::P1 => "1   ",
-            Percentile::P2 => "2   ",
-            Percentile::P5 => "5   ",
-            Percentile::P10 => "10   ",
-            Percentile::P25 => "25   ",
-            Percentile::P50 => "50   ",
-            Percentile::P75 => "75   ",
-            Percentile::P90 => "90   ",
-            Percentile::P95 => "95   ",
-            Percentile::P98 => "98   ",
-            Percentile::P99 => "99   ",
-            Percentile::P99_9 => "99.9 ",
-            Percentile::P99_99 => "99.99",
-            Percentile::Max => "Max   ",
-        }
-    }
-}
-
-pub type Percentiles = [Duration; Percentile::COUNT];
-
-#[derive(Clone, Debug, Serialize, Deserialize)]
-pub struct ReportArchive {
-    pub(crate) running_in_release: bool,
-    pub(crate) tags: Tags,
-    pub bench_started_at: OffsetDateTime,
-    pub(crate) operations_report: Option<OperationsReport>,
-    pub(crate) pubsub_report: Option<PubSubReport>,
-    pub metrics: Vec<Metric>,
-    pub error_messages: Vec<String>,
-    pub info_messages: Vec<String>,
-}
-
-#[derive(Clone, Debug, Serialize, Deserialize, Default)]
-pub struct OperationsReport {
-    pub total: u64,
-    pub total_errors: u64,
-    pub requested_operations_per_second: Option<u64>,
-    pub total_operations_per_second: u32,
-    pub total_errors_per_second: u32,
-    pub mean_time: Duration,
-    pub time_percentiles: Percentiles,
-    pub total_each_second: Vec<u64>,
-}
-
-#[derive(Clone, Debug, Serialize, Deserialize, Default)]
-pub struct PubSubReport {
-    pub total_produce: u64,
-    pub total_produce_error: u64,
-    pub total_consume: u64,
-    pub total_consume_error: u64,
-    pub total_backlog: i64,
-    pub requested_produce_per_second: Option<u64>,
-    pub produce_per_second: u32,
-    pub produce_errors_per_second: u32,
-    pub consume_per_second: u32,
-    pub consume_errors_per_second: u32,
-    pub produce_mean_time: Duration,
-    pub produce_time_percentiles: Percentiles,
-    pub produce_each_second: Vec<u64>,
-    pub consume_each_second: Vec<u64>,
-    pub backlog_each_second: Vec<i64>,
-}
-
-#[derive(Clone, Debug, Serialize, Deserialize)]
-pub enum Metric {
-    Total {
-        name: String,
-        compare: f64,
-        value: String,
-        goal: Goal,
-    },
-    EachSecond {
-        name: String,
-        values: Vec<(f64, String, Goal)>,
-    },
-    LatencyPercentiles {
-        name: String,
-        values: Vec<LatencyPercentile>,
-    },
-}
-
-#[derive(Clone, Debug, Serialize, Deserialize)]
-pub struct LatencyPercentile {
-    pub quantile: String,
-    pub value: f64,
-    pub value_display: String,
-}
-
-impl LatencyPercentile {
-    pub(crate) fn to_measurement(&self) -> (f64, String, Goal) {
-        (
-            self.value,
-            self.value_display.clone(),
-            Goal::SmallerIsBetter,
-        )
-    }
-}
-
-impl Metric {
-    pub fn name(&self) -> &str {
-        match self {
-            Metric::Total { name, .. } => name,
-            Metric::EachSecond { name, .. } => name,
-            Metric::LatencyPercentiles { name, .. } => name,
-        }
-    }
-
-    pub(crate) fn identifier(&self) -> MetricIdentifier {
-        match self {
-            Metric::Total { name, .. } => MetricIdentifier::Total {
-                name: name.to_owned(),
-            },
-            Metric::EachSecond { name, .. } => MetricIdentifier::EachSecond {
-                name: name.to_owned(),
-            },
-            Metric::LatencyPercentiles { name, .. } => MetricIdentifier::LatencyPercentiles {
-                name: name.to_owned(),
-            },
-        }
-    }
-
-    #[allow(clippy::len_without_is_empty)]
-    pub(crate) fn len(&self) -> usize {
-        match self {
-            Metric::Total { .. } => 1,
-            Metric::EachSecond { values, .. } => values.len(),
-            Metric::LatencyPercentiles { values, .. } => values.len(),
-        }
-    }
-}
-
-#[derive(PartialEq)]
-pub enum MetricIdentifier {
-    Total { name: String },
-    EachSecond { name: String },
-    LatencyPercentiles { name: String },
-}
-
-fn error_message_insertion(messages: &mut Vec<String>, new_message: String) {
-    if !messages.contains(&new_message) {
-        if messages.len() <= 5 {
-            messages.push(new_message);
-        } else if messages.len() == 6 {
-            messages.push("more than 5 unique error messages encountered, most likely they are actually small variants of the the same error. Only the first 5 error messages have been logged".to_owned());
-        }
-    }
-}
-
-impl ReportArchive {
-    fn path(&self) -> PathBuf {
-        Self::last_run_path().join(self.tags.get_name())
-    }
-
-    pub fn load(name: &str) -> Result<Self> {
-        match std::fs::read(Self::last_run_path().join(name)) {
-            Ok(bytes) => bincode::deserialize(&bytes).map_err(|e|
-                anyhow!(e).context("The bench archive from the previous run is not a valid archive, maybe the format changed since the last run")
-            ),
-            Err(err) if err.kind() == ErrorKind::NotFound => Err(anyhow!("The bench {name:?} does not exist or was not run in the previous run")),
-            Err(err) => Err(anyhow!("The bench {name:?} encountered a file read error {err:?}"))
-        }
-    }
-
-    pub fn load_baseline(name: &str) -> Result<Option<Self>> {
-        match std::fs::read(Self::baseline_path().join(name)) {
-            Ok(bytes) => bincode::deserialize(&bytes)
-                .map_err(|e|
-                    anyhow!(e).context("The bench archive from the baseline is not a valid archive, maybe the format changed since the baseline was set")
-                )
-                .map(Some),
-            Err(err) if err.kind() == ErrorKind::NotFound => Ok(None),
-            Err(err) => Err(anyhow!("The bench {name:?} encountered a file read error {err:?}"))
-        }
-    }
-
-    pub fn reports_in_last_run() -> Vec<String> {
-        let report_dir = Self::last_run_path();
-        std::fs::create_dir_all(&report_dir).unwrap();
-
-        let mut reports: Vec<String> = std::fs::read_dir(report_dir)
-            .unwrap()
-            .map(|x| {
-                x.unwrap()
-                    .path()
-                    .file_name()
-                    .unwrap()
-                    .to_str()
-                    .unwrap()
-                    .to_owned()
-            })
-            .collect();
-        reports.sort();
-        reports
-    }
-
-    pub fn save(&self) {
-        let path = self.path();
-        std::fs::create_dir_all(path.parent().unwrap()).unwrap();
-        std::fs::write(&path, bincode::serialize(self).unwrap())
-            .map_err(|e| panic!("Failed to write to {path:?} {e}"))
-            .unwrap()
-    }
-
-    pub(crate) fn clear_last_run() {
-        let path = Self::last_run_path();
-        if path.exists() {
-            // Just an extra sanity check that we truly are deleting a last_run directory
-            assert_eq!(path.file_name().unwrap(), "last_run");
-            std::fs::remove_dir_all(path).unwrap();
-        }
-    }
-
-    pub fn set_baseline() {
-        Self::clear_baseline();
-
-        let last_run_path = Self::last_run_path();
-        let baseline_path = Self::baseline_path();
-        if last_run_path.exists() {
-            copy_dir::copy_dir(last_run_path, baseline_path).unwrap();
-        }
-    }
-
-    pub fn clear_baseline() {
-        let path = Self::baseline_path();
-        if path.exists() {
-            // Just an extra sanity check that we truly are deleting a baseline directory
-            assert_eq!(path.file_name().unwrap(), "baseline");
-            std::fs::remove_dir_all(path).unwrap();
-        }
-    }
-
-    pub fn last_run_path() -> PathBuf {
-        let path = windsock_path().join("last_run");
-        std::fs::create_dir_all(&path).unwrap();
-        path
-    }
-
-    pub fn baseline_path() -> PathBuf {
-        windsock_path().join("baseline")
-    }
-}
-
-pub(crate) async fn report_builder(
-    tags: Tags,
-    mut rx: UnboundedReceiver<Report>,
-    requested_ops: Option<u64>,
-    running_in_release: bool,
-) -> ReportArchive {
-    let mut external_report = None;
-    let mut finished_in = None;
-    let mut started = None;
-    let mut pubsub_report = None;
-    let mut operations_report = None;
-    let mut operation_times = vec![];
-    let mut produce_times = vec![];
-    let mut total_operation_time = Duration::from_secs(0);
-    let mut total_produce_time = Duration::from_secs(0);
-    let mut error_messages = vec![];
-    let mut info_messages = vec![];
-
-    while let Some(report) = rx.recv().await {
-        match report {
-            Report::Start => {
-                started = Some(OffsetDateTime::now_utc());
-            }
-            Report::AddInfoMessage(message) => info_messages.push(message),
-            Report::QueryCompletedIn(duration) => {
-                let report = operations_report.get_or_insert_with(OperationsReport::default);
-                if started.is_some() {
-                    report.total += 1;
-                    total_operation_time += duration;
-                    operation_times.push(duration);
-                    match report.total_each_second.last_mut() {
-                        Some(last) => *last += 1,
-                        None => report.total_each_second.push(0),
-                    }
-                }
-            }
-            Report::QueryErrored {
-                completed_in,
-                message,
-            } => {
-                let report = operations_report.get_or_insert_with(OperationsReport::default);
-                if started.is_some() {
-                    error_message_insertion(&mut error_messages, message);
-                    report.total_errors += 1;
-                    total_operation_time += completed_in;
-                }
-            }
-            Report::ProduceCompletedIn(duration) => {
-                let report = pubsub_report.get_or_insert_with(PubSubReport::default);
-                if started.is_some() {
-                    report.total_backlog += 1;
-                    report.total_produce += 1;
-                    total_produce_time += duration;
-                    produce_times.push(duration);
-                    match report.produce_each_second.last_mut() {
-                        Some(last) => *last += 1,
-                        None => report.produce_each_second.push(0),
-                    }
-                }
-            }
-            Report::ProduceErrored {
-                completed_in,
-                message,
-            } => {
-                let report = pubsub_report.get_or_insert_with(PubSubReport::default);
-                if started.is_some() {
-                    error_message_insertion(&mut error_messages, message);
-                    report.total_produce_error += 1;
-                    total_produce_time += completed_in;
-                }
-            }
-            Report::ConsumeCompleted => {
-                let report = pubsub_report.get_or_insert_with(PubSubReport::default);
-                if started.is_some() {
-                    report.total_backlog -= 1;
-                    report.total_consume += 1;
-                    match report.consume_each_second.last_mut() {
-                        Some(last) => *last += 1,
-                        None => report.consume_each_second.push(0),
-                    }
-                }
-            }
-            Report::ConsumeErrored { message } => {
-                let report = pubsub_report.get_or_insert_with(PubSubReport::default);
-                if started.is_some() {
-                    error_message_insertion(&mut error_messages, message);
-                    report.total_consume_error += 1;
-                }
-            }
-            Report::SecondPassed(duration) => {
-                assert!(
-                    duration >= Duration::from_secs(1) && duration < Duration::from_millis(1050),
-                    "Expected duration to be within 50ms of a second but was {duration:?}"
-                );
-                if let Some(report) = operations_report.as_mut() {
-                    report.total_each_second.push(0);
-                }
-                if let Some(report) = pubsub_report.as_mut() {
-                    report.produce_each_second.push(0);
-                    report.consume_each_second.push(0);
-                    report.backlog_each_second.push(report.total_backlog);
-                }
-            }
-            Report::FinishedIn(duration) => {
-                if started.is_none() {
-                    panic!("The bench never returned Report::Start")
-                }
-                finished_in = Some(duration);
-                // immediately drop rx so the benchmarks tasks stop trying to bench, logic doesnt rely on this it just saves resources
-                std::mem::drop(rx);
-                break;
-            }
-            Report::ExternalBenchmark(report) => {
-                // immediately drop rx so the benchmarks tasks stop trying to bench, logic doesnt rely on this it just saves resources
-                std::mem::drop(rx);
-
-                external_report = Some(report);
-                break;
-            }
-        }
-    }
-
-    if let Some(external_report) = external_report {
-        started = Some(external_report.bench_started_at);
-        operations_report = external_report.operations_report;
-        pubsub_report = external_report.pubsub_report;
-        error_messages = external_report.error_messages;
-    } else {
-        let finished_in = match finished_in {
-            Some(x) => x,
-            None => panic!("The bench never returned Report::FinishedIn(..)"),
-        };
-
-        if let Some(report) = operations_report.as_mut() {
-            report.requested_operations_per_second = requested_ops;
-            report.mean_time = mean_time(&operation_times, total_operation_time);
-            report.total_operations_per_second = calculate_ops(report.total, finished_in);
-            report.total_errors_per_second = calculate_ops(report.total_errors, finished_in);
-            report.time_percentiles = calculate_percentiles(operation_times);
-
-            // This is not a complete result so discard it.
-            report.total_each_second.pop();
-        }
-
-        if let Some(report) = pubsub_report.as_mut() {
-            report.requested_produce_per_second = requested_ops;
-            report.produce_mean_time = mean_time(&produce_times, total_produce_time);
-            report.produce_per_second = calculate_ops(report.total_produce, finished_in);
-            report.produce_errors_per_second =
-                calculate_ops(report.total_produce_error, finished_in);
-            report.consume_per_second = calculate_ops(report.total_consume, finished_in);
-            report.consume_errors_per_second =
-                calculate_ops(report.total_consume_error, finished_in);
-            report.produce_time_percentiles = calculate_percentiles(produce_times);
-
-            // This is not a complete result so discard it.
-            report.produce_each_second.pop();
-            report.consume_each_second.pop();
-        }
-    }
-
-    let archive = ReportArchive {
-        bench_started_at: started.unwrap(),
-        running_in_release,
-        tags,
-        pubsub_report,
-        error_messages,
-        info_messages,
-        operations_report,
-        metrics: vec![],
-    };
-    archive.save();
-    archive
-}
-
-fn mean_time(times: &[Duration], total_time: Duration) -> Duration {
-    if !times.is_empty() {
-        total_time / times.len() as u32
-    } else {
-        Duration::from_secs(0)
-    }
-}
-
-fn calculate_ops(total: u64, finished_in: Duration) -> u32 {
-    (total as u128 / (finished_in.as_nanos() / 1_000_000_000)) as u32
-}
-
-fn calculate_percentiles(mut times: Vec<Duration>) -> Percentiles {
-    let mut percentiles = [Duration::ZERO; Percentile::COUNT];
-    times.sort();
-    if !times.is_empty() {
-        for (i, p) in Percentile::iter().enumerate() {
-            let percentile_index = (p.value() * times.len() as f64) as usize;
-            // Need to cap at last index, otherwise the MAX percentile will overflow by 1
-            let index = percentile_index.min(times.len() - 1);
-            percentiles[i] = times[index];
-        }
-    }
-    percentiles
-}
diff --git a/windsock/src/tables.rs b/windsock/src/tables.rs
deleted file mode 100644
index 21faca8c7..000000000
--- a/windsock/src/tables.rs
+++ /dev/null
@@ -1,915 +0,0 @@
-use crate::{
-    bench::Tags,
-    filter::Filter,
-    report::{MetricIdentifier, Percentile, ReportArchive},
-    Metric,
-};
-use anyhow::{Context, Result};
-use console::{pad_str, pad_str_with, style, Alignment};
-use serde::{Deserialize, Serialize};
-use std::{collections::HashSet, time::Duration};
-use strum::IntoEnumIterator;
-
-pub(crate) struct ReportColumn {
-    pub(crate) baseline: Option<ReportArchive>,
-    pub(crate) current: ReportArchive,
-}
-
-impl ReportColumn {
-    pub fn load(name: &str) -> Result<Self> {
-        Ok(ReportColumn {
-            baseline: None,
-            current: ReportArchive::load(name)?,
-        })
-    }
-
-    pub fn load_with_baseline(name: &str) -> Result<Self> {
-        Ok(ReportColumn {
-            baseline: ReportArchive::load_baseline(name)?,
-            current: ReportArchive::load(name)?,
-        })
-    }
-}
-
-pub fn compare_by_name(names: &str) -> Result<()> {
-    let columns: Result<Vec<ReportColumn>> =
-        names.split_whitespace().map(ReportColumn::load).collect();
-    let mut columns = columns?;
-
-    let baseline = columns.first().map(|x| x.current.clone());
-    for column in &mut columns.iter_mut().skip(1) {
-        column.baseline = baseline.clone();
-    }
-
-    display_compare_table(&columns);
-    Ok(())
-}
-
-pub fn compare_by_tags(arg: &str) -> Result<()> {
-    let mut split = arg.split_whitespace();
-    let base_name = split.next().unwrap().to_owned();
-    let base = ReportArchive::load(&base_name)?;
-
-    let tag_args: Vec<_> = split.collect();
-    let tag_args = tag_args.join(" ");
-
-    let filter = Filter::from_query(&tag_args)
-        .with_context(|| format!("Failed to parse tag filter from {:?}", tag_args))?;
-    let archives: Result<Vec<ReportColumn>> = ReportArchive::reports_in_last_run()
-        .iter()
-        .filter(|name| **name != base_name && filter.matches(&Tags::from_name(name)))
-        .map(|x| {
-            Ok(ReportColumn {
-                baseline: Some(base.clone()),
-                current: ReportArchive::load(x)?,
-            })
-        })
-        .collect();
-    let mut archives = archives?;
-
-    archives.insert(
-        0,
-        ReportColumn {
-            baseline: None,
-            current: base,
-        },
-    );
-
-    display_compare_table(&archives);
-
-    Ok(())
-}
-
-pub fn results(ignore_baseline: bool, filter: &str) -> Result<()> {
-    let filter = Filter::from_query(filter)
-        .with_context(|| format!("Failed to parse tag filter from {:?}", filter))?;
-    let archives: Result<Vec<ReportColumn>> = ReportArchive::reports_in_last_run()
-        .iter()
-        .filter(|name| filter.matches(&Tags::from_name(name)))
-        .map(|x| {
-            if ignore_baseline {
-                ReportColumn::load(x)
-            } else {
-                ReportColumn::load_with_baseline(x)
-            }
-        })
-        .collect();
-    let archives = archives?;
-    if archives.iter().any(|x| x.baseline.is_some()) {
-        // If there are any baselines then compare against baselines
-        display_baseline_compare_table(&archives);
-    } else {
-        // Otherwise display just results without any comparison
-        display_results_table(&archives);
-    }
-
-    Ok(())
-}
-
-pub(crate) fn display_baseline_compare_table(reports: &[ReportColumn]) {
-    if reports.is_empty() {
-        println!("Need at least one report to display baseline comparison");
-        return;
-    }
-
-    base(reports, "Comparison against baseline");
-}
-
-pub(crate) fn display_compare_table(reports: &[ReportColumn]) {
-    if reports.len() < 2 {
-        println!("Need at least two reports to display a comparison against first column");
-        return;
-    }
-
-    base(reports, "Comparison against first column");
-}
-
-pub(crate) fn display_results_table(reports: &[ReportColumn]) {
-    if reports.is_empty() {
-        println!("Need at least one report to display results");
-        return;
-    }
-
-    base(reports, "Results");
-}
-
-fn base(reports: &[ReportColumn], table_type: &str) {
-    // if the user has set CARGO_TERM_COLOR to force cargo to use colors then they probably want us to use colors too
-    if std::env::var("CARGO_TERM_COLOR")
-        .map(|x| x.to_lowercase() == "always")
-        .unwrap_or(false)
-    {
-        console::set_colors_enabled(true);
-    }
-
-    let mut intersection = reports[0].current.tags.clone();
-    for report in reports {
-        intersection = intersection.intersection(&report.current.tags);
-    }
-
-    let mut rows = vec![];
-    rows.push(Row::Heading(format!(
-        "{} for {}",
-        table_type,
-        intersection.get_name()
-    )));
-
-    let intersection_keys = intersection.keys();
-    let mut nonintersecting_keys: Vec<String> = reports
-        .iter()
-        .fold(HashSet::new(), |acc, x| {
-            acc.union(
-                &x.current
-                    .tags
-                    .keys()
-                    .difference(&intersection_keys)
-                    .cloned()
-                    .collect(),
-            )
-            .cloned()
-            .collect()
-        })
-        .into_iter()
-        .collect();
-    nonintersecting_keys.sort();
-    if !nonintersecting_keys.is_empty() {
-        rows.push(Row::Heading("Unique Tags".to_owned()));
-    }
-    for key in nonintersecting_keys {
-        rows.push(Row::ColumnNames {
-            names: reports
-                .iter()
-                .map(|x| x.current.tags.0.get(&key).cloned().unwrap_or("".to_owned()))
-                .collect(),
-            legend: key,
-        });
-    }
-
-    if reports
-        .iter()
-        .any(|x| x.current.operations_report.is_some())
-    {
-        rows.push(Row::Heading("Opns (Operations)".to_owned()));
-        rows.push(Row::measurements(reports, "Total Opns", |report| {
-            report.operations_report.as_ref().map(|report| {
-                (
-                    report.total as f64,
-                    report.total.to_string(),
-                    Goal::BiggerIsBetter,
-                )
-            })
-        }));
-        rows.push(Row::measurements(reports, "Total Errors", |report| {
-            report.operations_report.as_ref().map(|report| {
-                (
-                    report.total_errors as f64,
-                    report.total_errors.to_string(),
-                    Goal::SmallerIsBetter,
-                )
-            })
-        }));
-        rows.push(Row::measurements(
-            reports,
-            "Target Opns Per Sec",
-            |report| {
-                report.operations_report.as_ref().map(|report| {
-                    (
-                        report
-                            .requested_operations_per_second
-                            .map(|x| x as f64)
-                            .unwrap_or(f64::INFINITY),
-                        report
-                            .requested_operations_per_second
-                            .map(|x| x.to_string())
-                            .unwrap_or("MAX".to_owned()),
-                        Goal::BiggerIsBetter,
-                    )
-                })
-            },
-        ));
-        rows.push(Row::measurements(reports, "Opns Per Sec", |report| {
-            report.operations_report.as_ref().map(|report| {
-                (
-                    report.total_operations_per_second as f64,
-                    format!("{:.0}", report.total_operations_per_second),
-                    Goal::BiggerIsBetter,
-                )
-            })
-        }));
-        rows.push(Row::measurements(reports, "Errors Per Sec", |report| {
-            report.operations_report.as_ref().map(|report| {
-                (
-                    report.total_errors_per_second as f64,
-                    format!("{:.0}", report.total_errors_per_second),
-                    Goal::SmallerIsBetter,
-                )
-            })
-        }));
-
-        rows.push(Row::measurements(reports, "Opn Time Mean", |report| {
-            report.operations_report.as_ref().map(|report| {
-                (
-                    report.mean_time.as_secs_f64(),
-                    duration_ms(report.mean_time),
-                    Goal::SmallerIsBetter,
-                )
-            })
-        }));
-
-        rows.push(Row::Heading("Opn Time Percentiles".to_owned()));
-        for (i, p) in Percentile::iter().enumerate() {
-            rows.push(Row::measurements(reports, p.name(), |report| {
-                report.operations_report.as_ref().map(|report| {
-                    (
-                        report.time_percentiles[i].as_secs_f64(),
-                        duration_ms(report.time_percentiles[i]),
-                        Goal::SmallerIsBetter,
-                    )
-                })
-            }));
-        }
-
-        rows.push(Row::Heading("Opns Each Second".to_owned()));
-        for i in 0..reports
-            .iter()
-            .map(|x| {
-                x.current
-                    .operations_report
-                    .as_ref()
-                    .map(|report| report.total_each_second.len())
-                    .unwrap_or(0)
-            })
-            .max()
-            .unwrap()
-        {
-            rows.push(Row::measurements(reports, &i.to_string(), |report| {
-                report.operations_report.as_ref().and_then(|report| {
-                    report
-                        .total_each_second
-                        .get(i)
-                        .map(|value| (*value as f64, value.to_string(), Goal::BiggerIsBetter))
-                })
-            }));
-        }
-    }
-
-    if reports.iter().any(|x| x.current.pubsub_report.is_some()) {
-        rows.push(Row::Heading("Produce/Consume".to_owned()));
-        rows.push(Row::measurements(reports, "Total Produce", |report| {
-            report.pubsub_report.as_ref().map(|report| {
-                (
-                    report.total_produce as f64,
-                    report.total_produce.to_string(),
-                    Goal::BiggerIsBetter,
-                )
-            })
-        }));
-        rows.push(Row::measurements(
-            reports,
-            "Errors Total Produce",
-            |report| {
-                report.pubsub_report.as_ref().map(|report| {
-                    (
-                        report.total_produce_error as f64,
-                        report.total_produce_error.to_string(),
-                        Goal::SmallerIsBetter,
-                    )
-                })
-            },
-        ));
-        rows.push(Row::measurements(reports, "Total Consume", |report| {
-            report.pubsub_report.as_ref().map(|report| {
-                (
-                    report.total_consume as f64,
-                    report.total_consume.to_string(),
-                    Goal::BiggerIsBetter,
-                )
-            })
-        }));
-        rows.push(Row::measurements(
-            reports,
-            "Errors Total Consume",
-            |report| {
-                report.pubsub_report.as_ref().map(|report| {
-                    (
-                        report.total_consume_error as f64,
-                        report.total_consume_error.to_string(),
-                        Goal::SmallerIsBetter,
-                    )
-                })
-            },
-        ));
-        rows.push(Row::measurements(reports, "Total Backlog", |report| {
-            report.pubsub_report.as_ref().map(|report| {
-                (
-                    report.total_backlog as f64,
-                    report.total_backlog.to_string(),
-                    Goal::SmallerIsBetter,
-                )
-            })
-        }));
-
-        rows.push(Row::measurements(
-            reports,
-            "Target Produce Per Sec",
-            |report| {
-                report.pubsub_report.as_ref().map(|report| {
-                    (
-                        report
-                            .requested_produce_per_second
-                            .map(|x| x as f64)
-                            .unwrap_or(f64::INFINITY),
-                        report
-                            .requested_produce_per_second
-                            .map(|x| x.to_string())
-                            .unwrap_or("MAX".to_owned()),
-                        Goal::BiggerIsBetter,
-                    )
-                })
-            },
-        ));
-        rows.push(Row::measurements(reports, "Produce Per Sec", |report| {
-            report.pubsub_report.as_ref().map(|report| {
-                (
-                    report.produce_per_second as f64,
-                    format!("{:.0}", report.produce_per_second),
-                    Goal::BiggerIsBetter,
-                )
-            })
-        }));
-        rows.push(Row::measurements(
-            reports,
-            "Errors Produce Per Sec",
-            |report| {
-                report.pubsub_report.as_ref().map(|report| {
-                    (
-                        report.produce_errors_per_second as f64,
-                        format!("{:.0}", report.produce_errors_per_second),
-                        Goal::SmallerIsBetter,
-                    )
-                })
-            },
-        ));
-        rows.push(Row::measurements(reports, "Consume Per Sec", |report| {
-            report.pubsub_report.as_ref().map(|report| {
-                (
-                    report.consume_per_second as f64,
-                    format!("{:.0}", report.consume_per_second),
-                    Goal::BiggerIsBetter,
-                )
-            })
-        }));
-        rows.push(Row::measurements(
-            reports,
-            "Errors Consume Per Sec",
-            |report| {
-                report.pubsub_report.as_ref().map(|report| {
-                    (
-                        report.consume_errors_per_second as f64,
-                        format!("{:.0}", report.consume_errors_per_second),
-                        Goal::SmallerIsBetter,
-                    )
-                })
-            },
-        ));
-
-        rows.push(Row::measurements(reports, "Produce Time Mean", |report| {
-            report.pubsub_report.as_ref().map(|report| {
-                (
-                    report.produce_mean_time.as_secs_f64(),
-                    duration_ms(report.produce_mean_time),
-                    Goal::SmallerIsBetter,
-                )
-            })
-        }));
-
-        rows.push(Row::Heading("Produce Time Percentiles".to_owned()));
-        for (i, p) in Percentile::iter().enumerate() {
-            rows.push(Row::measurements(reports, p.name(), |report| {
-                report.pubsub_report.as_ref().map(|report| {
-                    (
-                        report.produce_time_percentiles[i].as_secs_f64(),
-                        duration_ms(report.produce_time_percentiles[i]),
-                        Goal::SmallerIsBetter,
-                    )
-                })
-            }));
-        }
-
-        rows.push(Row::Heading("Produce Each Second".to_owned()));
-        for i in 0..reports
-            .iter()
-            .map(|x| {
-                x.current
-                    .pubsub_report
-                    .as_ref()
-                    .map(|report| report.produce_each_second.len())
-                    .unwrap_or(0)
-            })
-            .max()
-            .unwrap()
-        {
-            rows.push(Row::measurements(reports, &i.to_string(), |report| {
-                report.pubsub_report.as_ref().and_then(|report| {
-                    report
-                        .produce_each_second
-                        .get(i)
-                        .map(|value| (*value as f64, value.to_string(), Goal::BiggerIsBetter))
-                })
-            }));
-        }
-
-        rows.push(Row::Heading("Consume Each Second".to_owned()));
-        for i in 0..reports
-            .iter()
-            .map(|x| {
-                x.current
-                    .pubsub_report
-                    .as_ref()
-                    .map(|report| report.consume_each_second.len())
-                    .unwrap_or(0)
-            })
-            .max()
-            .unwrap()
-        {
-            rows.push(Row::measurements(reports, &i.to_string(), |report| {
-                report.pubsub_report.as_ref().and_then(|report| {
-                    report
-                        .consume_each_second
-                        .get(i)
-                        .map(|value| (*value as f64, value.to_string(), Goal::BiggerIsBetter))
-                })
-            }));
-        }
-
-        rows.push(Row::Heading("Total Backlog Each Second".to_owned()));
-        for i in 0..reports
-            .iter()
-            .map(|x| {
-                x.current
-                    .pubsub_report
-                    .as_ref()
-                    .map(|report| report.backlog_each_second.len())
-                    .unwrap_or(0)
-            })
-            .max()
-            .unwrap()
-        {
-            rows.push(Row::measurements(reports, &i.to_string(), |report| {
-                report.pubsub_report.as_ref().and_then(|report| {
-                    report
-                        .backlog_each_second
-                        .get(i)
-                        .map(|value| (*value as f64, value.to_string(), Goal::SmallerIsBetter))
-                })
-            }));
-        }
-    }
-
-    let mut metrics_to_display = vec![];
-    for report in reports {
-        for metric in &report.current.metrics {
-            if !metrics_to_display.contains(&metric.identifier()) {
-                metrics_to_display.push(metric.identifier())
-            }
-        }
-    }
-    for metric_identifier in metrics_to_display {
-        match &metric_identifier {
-            MetricIdentifier::Total { name } => {
-                rows.push(Row::measurements(reports, name, |report| {
-                    report
-                        .metrics
-                        .iter()
-                        .find(|metric| metric.identifier() == metric_identifier)
-                        .map(|metric| match metric {
-                            Metric::Total {
-                                compare,
-                                value,
-                                goal,
-                                ..
-                            } => (*compare, value.to_owned(), *goal),
-                            _ => unreachable!(),
-                        })
-                }));
-            }
-            MetricIdentifier::EachSecond { name } => {
-                rows.push(Row::Heading(format!("{name} Each Second")));
-                for i in 0..reports
-                    .iter()
-                    .map(|x| {
-                        x.current
-                            .metrics
-                            .iter()
-                            .find(|x| x.identifier() == metric_identifier)
-                            .map(|metric| metric.len())
-                            .unwrap_or(0)
-                    })
-                    .max()
-                    .unwrap()
-                {
-                    rows.push(Row::measurements(reports, &i.to_string(), |report| {
-                        report
-                            .metrics
-                            .iter()
-                            .find(|x| x.identifier() == metric_identifier)
-                            .and_then(|metric| match metric {
-                                Metric::EachSecond { values, .. } => values.get(i).cloned(),
-                                _ => unreachable!(),
-                            })
-                    }));
-                }
-            }
-            MetricIdentifier::LatencyPercentiles { name } => {
-                rows.push(Row::Heading(format!("{name} Percentiles")));
-                for (i, largest_col) in reports
-                    .iter()
-                    .map(|x| {
-                        x.current
-                            .metrics
-                            .iter()
-                            .find(|x| x.identifier() == metric_identifier)
-                            .map(|metric| match metric {
-                                Metric::LatencyPercentiles { values, .. } => values.clone(),
-                                _ => unreachable!(),
-                            })
-                            .unwrap_or(vec![])
-                    })
-                    .max_by_key(|x| x.len())
-                    .unwrap()
-                    .into_iter()
-                    .enumerate()
-                {
-                    rows.push(Row::measurements(
-                        reports,
-                        &largest_col.quantile,
-                        |report| {
-                            report
-                                .metrics
-                                .iter()
-                                .find(|x| x.identifier() == metric_identifier)
-                                .and_then(|metric| match metric {
-                                    Metric::LatencyPercentiles { values, .. } => {
-                                        values.get(i).map(|x| x.to_measurement())
-                                    }
-                                    _ => unreachable!(),
-                                })
-                        },
-                    ));
-                }
-            }
-        }
-    }
-
-    // the width of the legend column
-    let legend_width: usize = rows
-        .iter()
-        .skip(1) // skip the main heading because its big and its alignment doesnt matter
-        .map(|x| match x {
-            Row::Heading(heading) => heading.len(),
-            Row::ColumnNames { legend, .. } => legend.len(),
-            Row::Measurements { legend, .. } => legend.len(),
-        })
-        .max()
-        .unwrap_or(10);
-    // the width of the comparison component of each column
-    let comparison_widths: Vec<usize> = reports
-        .iter()
-        .enumerate()
-        .map(|(i, _)| {
-            rows.iter()
-                .map(|x| match x {
-                    Row::Heading(_) => 0,
-                    Row::ColumnNames { .. } => 0,
-                    Row::Measurements { measurements, .. } => measurements[i].comparison.len() + 1, // + 1 ensures we get separation from the previous column
-                })
-                .max()
-                .unwrap()
-        })
-        .collect();
-    // the width of each entire column
-    let column_widths: Vec<usize> = reports
-        .iter()
-        .enumerate()
-        .map(|(i, _)| {
-            rows.iter()
-                .map(|x| match x {
-                    Row::Heading(_) => 0,                                 // Ignore these
-                    Row::ColumnNames { names, .. } => names[i].len() + 1, // + 1 ensures we get separation from the previous column
-                    Row::Measurements { measurements, .. } => {
-                        measurements[i].value.len() + 1 // ensures we get seperation from the previous column
-                        + comparison_widths[i]
-                    }
-                })
-                .max()
-                .unwrap()
-        })
-        .collect();
-    let total_width = legend_width + column_widths.iter().sum::<usize>();
-
-    for row in rows {
-        match row {
-            Row::Heading(heading) => {
-                println!(
-                    "{}",
-                    style(pad_str_with(
-                        &format!("{} ", heading),
-                        total_width,
-                        Alignment::Left,
-                        None,
-                        '═'
-                    ))
-                    .yellow()
-                    .bold()
-                )
-            }
-            Row::ColumnNames { legend, names } => {
-                print!(
-                    "{}",
-                    style(pad_str(&legend, legend_width, Alignment::Right, None))
-                        .yellow()
-                        .bold()
-                );
-                for (i, name) in names.into_iter().enumerate() {
-                    print!(
-                        " {}",
-                        style(pad_str_with(
-                            &name,
-                            column_widths[i] - 1,
-                            Alignment::Center,
-                            None,
-                            '─',
-                        ))
-                    )
-                }
-                println!()
-            }
-            Row::Measurements {
-                legend,
-                measurements,
-            } => {
-                print!(
-                    "{}",
-                    style(pad_str(&legend, legend_width, Alignment::Right, None))
-                        .yellow()
-                        .bold()
-                );
-                for (i, measurement) in measurements.into_iter().enumerate() {
-                    let colorer = match measurement.color {
-                        Color::Good => |x| style(x).green(),
-                        Color::Bad => |x| style(x).red(),
-                        Color::Neutral => |x| style(x).dim(),
-                    };
-                    let contents = format!(
-                        "{}{}",
-                        measurement.value,
-                        colorer(pad_str(
-                            &measurement.comparison,
-                            comparison_widths[i],
-                            Alignment::Right,
-                            None
-                        )),
-                    );
-                    print!(
-                        "{}",
-                        pad_str(&contents, column_widths[i], Alignment::Right, None),
-                    );
-                }
-                println!()
-            }
-        }
-    }
-
-    for report in reports {
-        if !report.current.error_messages.is_empty() {
-            let error = format!(
-                "Bench encountered errors: {}",
-                report.current.tags.get_name()
-            );
-            println!("{}", style(error).red().bold());
-            for (i, message) in report.current.error_messages.iter().enumerate() {
-                println!("    {i}.  {message}");
-            }
-        }
-
-        if let Some(baseline) = &report.baseline {
-            if !baseline.error_messages.is_empty() {
-                let error = format!(
-                    "Bench baseline encountered errors: {}",
-                    report.current.tags.get_name()
-                );
-                println!("{}", style(error).red().bold());
-                for (i, message) in report.current.error_messages.iter().enumerate() {
-                    println!("    {i}.  {message}");
-                }
-            }
-        }
-    }
-
-    let errors_found = reports.iter().any(|x| {
-        !x.current.error_messages.is_empty()
-            || x.baseline
-                .as_ref()
-                .map(|x| !x.error_messages.is_empty())
-                .unwrap_or(false)
-    });
-    let not_running_in_release_found = reports.iter().any(|x| {
-        !x.current.running_in_release
-            || x.baseline
-                .as_ref()
-                .map(|x| !x.running_in_release)
-                .unwrap_or(false)
-    });
-    let info_found = reports.iter().any(|x| {
-        !x.current.info_messages.is_empty()
-            || x.baseline
-                .as_ref()
-                .map(|x| !x.info_messages.is_empty())
-                .unwrap_or(false)
-    });
-
-    if errors_found && not_running_in_release_found {
-        // ensure these two sections are kept apart
-        println!();
-    }
-
-    for report in reports {
-        if !report.current.running_in_release {
-            let error = format!(
-                "Bench results invalid! Bench compiled with non-release profile: {}",
-                report.current.tags.get_name()
-            );
-            println!("{}", style(error).red().bold());
-        }
-
-        if let Some(baseline) = &report.baseline {
-            if !baseline.running_in_release {
-                let error = format!(
-                    "Baseline bench results invalid! Baseline bench compiled with non-release profile: {}",
-                    baseline.tags.get_name()
-                );
-                println!("{}", style(error).red().bold());
-            }
-        }
-    }
-
-    #[allow(clippy::nonminimal_bool)]
-    if info_found
-        && (not_running_in_release_found || (errors_found && !not_running_in_release_found))
-    {
-        // ensure these two sections are kept apart
-        println!();
-    }
-
-    for report in reports {
-        if !report.current.info_messages.is_empty() {
-            let error = format!("notes for {}", report.current.tags.get_name());
-            println!("{}", style(error).blue().bold());
-            for (i, message) in report.current.info_messages.iter().enumerate() {
-                println!("    {i}.  {message}");
-            }
-        }
-
-        if let Some(baseline) = &report.baseline {
-            if !baseline.info_messages.is_empty() {
-                let error = format!("notes for baseline {}", report.current.tags.get_name());
-                println!("{}", style(error).blue().bold());
-                for (i, message) in report.current.info_messages.iter().enumerate() {
-                    println!("    {i}.  {message}");
-                }
-            }
-        }
-    }
-}
-
-fn duration_ms(duration: Duration) -> String {
-    format!("{:.3}ms", duration.as_micros() as f32 / 1000.0)
-}
-
-enum Row {
-    Heading(String),
-    ColumnNames {
-        legend: String,
-        names: Vec<String>,
-    },
-    Measurements {
-        legend: String,
-        measurements: Vec<Measurement>,
-    },
-}
-
-struct Measurement {
-    value: String,
-    comparison: String,
-    color: Color,
-}
-
-#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
-pub enum Goal {
-    BiggerIsBetter,
-    SmallerIsBetter,
-    None,
-}
-
-enum Color {
-    Good,
-    Bad,
-    Neutral,
-}
-
-impl Row {
-    fn measurements<F: Fn(&ReportArchive) -> Option<(f64, String, Goal)>>(
-        reports: &[ReportColumn],
-        legend: &str,
-        f: F,
-    ) -> Row {
-        let legend = legend.to_owned();
-        let measurements = reports
-            .iter()
-            .map(|x| {
-                let (value, comparison, comparison_raw, goal) =
-                    if let Some((compare, value, goal)) = f(&x.current) {
-                        if let Some((base, _, _)) = x.baseline.as_ref().and_then(&f) {
-                            let comparison_raw: f64 = (compare - base) / base * 100.0;
-                            let comparison = if comparison_raw.is_nan() {
-                                "-".into()
-                            } else {
-                                format!("{:+.1}%", comparison_raw)
-                            };
-
-                            (value, comparison, comparison_raw, goal)
-                        } else {
-                            (value, "".to_owned(), 0.0, Goal::BiggerIsBetter)
-                        }
-                    } else {
-                        ("".to_owned(), "".to_owned(), 0.0, Goal::BiggerIsBetter)
-                    };
-
-                let color = if comparison_raw > 5.0 {
-                    if let Goal::BiggerIsBetter = goal {
-                        Color::Good
-                    } else {
-                        Color::Bad
-                    }
-                } else if comparison_raw < -5.0 {
-                    if let Goal::SmallerIsBetter = goal {
-                        Color::Good
-                    } else {
-                        Color::Bad
-                    }
-                } else {
-                    Color::Neutral
-                };
-                Measurement {
-                    value,
-                    comparison,
-                    color,
-                }
-            })
-            .collect();
-        Row::Measurements {
-            legend,
-            measurements,
-        }
-    }
-}

From 1d23452d88f290409e99a00990bd2eb95b7bc166 Mon Sep 17 00:00:00 2001
From: Conor <conor.brosnan@instaclustr.com>
Date: Thu, 14 Mar 2024 16:38:22 +1100
Subject: [PATCH 3/4] Kafka cluster sasl (#1513)

---
 docs/src/transforms.md                        |  5 +-
 .../benches/windsock/kafka/bench.rs           |  1 +
 shotover-proxy/tests/kafka_int_tests/mod.rs   | 62 ++++++++++++
 .../kafka/cluster-sasl/docker-compose.yaml    | 60 ++++++++++++
 .../kafka/cluster-sasl/topology-single.yaml   | 11 +++
 .../kafka/cluster-sasl/topology1.yaml         | 14 +++
 .../kafka/cluster-sasl/topology2.yaml         | 14 +++
 .../kafka/cluster-sasl/topology3.yaml         | 14 +++
 .../src/transforms/kafka/sink_cluster/mod.rs  | 94 ++++++++++++++++++-
 .../src/transforms/kafka/sink_cluster/node.rs | 43 ++++++++-
 10 files changed, 315 insertions(+), 3 deletions(-)
 create mode 100644 shotover-proxy/tests/test-configs/kafka/cluster-sasl/docker-compose.yaml
 create mode 100644 shotover-proxy/tests/test-configs/kafka/cluster-sasl/topology-single.yaml
 create mode 100644 shotover-proxy/tests/test-configs/kafka/cluster-sasl/topology1.yaml
 create mode 100644 shotover-proxy/tests/test-configs/kafka/cluster-sasl/topology2.yaml
 create mode 100644 shotover-proxy/tests/test-configs/kafka/cluster-sasl/topology3.yaml

diff --git a/docs/src/transforms.md b/docs/src/transforms.md
index 33bad3387..808f113d1 100644
--- a/docs/src/transforms.md
+++ b/docs/src/transforms.md
@@ -284,7 +284,7 @@ Instead Shotover will pretend to be either a single Kafka node or part of a clus
 This is achieved by rewriting the FindCoordinator, Metadata and DescribeCluster messages to contain the nodes in the shotover cluster instead of the kafka cluster.
 
 ```yaml
-- CassandraSinkCluster:
+- KafkaSinkCluster:
     # Addresses of the initial kafka brokers to connect to.
     first_contact_points: ["172.16.1.2:9042", "172.16.1.3:9042"]
 
@@ -309,6 +309,9 @@ This is achieved by rewriting the FindCoordinator, Metadata and DescribeCluster
     # When a timeout occurs the connection to the client is immediately closed.
     # read_timeout: 60
 
+    # When this field is enabled it allows the use of SASL authentication. If you intend to use SASL this field must be enabled, it is false by default.
+    sasl_enabled: false
+
     # When this field is provided TLS is used when connecting to the remote address.
     # Removing this field will disable TLS.
     #tls:
diff --git a/shotover-proxy/benches/windsock/kafka/bench.rs b/shotover-proxy/benches/windsock/kafka/bench.rs
index 951cbcb02..e6713fa64 100644
--- a/shotover-proxy/benches/windsock/kafka/bench.rs
+++ b/shotover-proxy/benches/windsock/kafka/bench.rs
@@ -94,6 +94,7 @@ impl KafkaBench {
                 first_contact_points: vec![kafka_address],
                 shotover_nodes: vec![host_address.clone()],
                 tls: None,
+                sasl_enabled: Some(false),
             }),
         });
         common::generate_topology(SourceConfig::Kafka(shotover::sources::kafka::KafkaConfig {
diff --git a/shotover-proxy/tests/kafka_int_tests/mod.rs b/shotover-proxy/tests/kafka_int_tests/mod.rs
index fd4418f60..dd2d416e7 100644
--- a/shotover-proxy/tests/kafka_int_tests/mod.rs
+++ b/shotover-proxy/tests/kafka_int_tests/mod.rs
@@ -253,3 +253,65 @@ async fn cluster_2_racks_multi_shotover(#[case] driver: KafkaDriver) {
         .expect("Shotover did not shutdown within 10s");
     }
 }
+
+#[cfg(feature = "rdkafka-driver-tests")]
+#[rstest]
+#[cfg_attr(feature = "rdkafka-driver-tests", case::cpp(KafkaDriver::Cpp))]
+// #[case::java(KafkaDriver::Java)]
+#[tokio::test]
+async fn cluster_sasl_single_shotover(#[case] driver: KafkaDriver) {
+    let _docker_compose =
+        docker_compose("tests/test-configs/kafka/cluster-sasl/docker-compose.yaml");
+
+    let shotover = shotover_process("tests/test-configs/kafka/cluster-sasl/topology-single.yaml")
+        .start()
+        .await;
+
+    let connection_builder =
+        KafkaConnectionBuilder::new(driver, "127.0.0.1:9192").use_sasl("user", "password");
+    test_cases::basic(connection_builder).await;
+
+    tokio::time::timeout(
+        Duration::from_secs(10),
+        shotover.shutdown_and_then_consume_events(&[]),
+    )
+    .await
+    .expect("Shotover did not shutdown within 10s");
+}
+
+#[cfg(feature = "rdkafka-driver-tests")]
+#[rstest]
+#[cfg_attr(feature = "rdkafka-driver-tests", case::cpp(KafkaDriver::Cpp))]
+// #[case::java(KafkaDriver::Java)]
+#[tokio::test]
+async fn cluster_sasl_multi_shotover(#[case] driver: KafkaDriver) {
+    let _docker_compose =
+        docker_compose("tests/test-configs/kafka/cluster-sasl/docker-compose.yaml");
+    let mut shotovers = vec![];
+    for i in 1..4 {
+        shotovers.push(
+            shotover_process(&format!(
+                "tests/test-configs/kafka/cluster-sasl/topology{i}.yaml"
+            ))
+            .with_config(&format!(
+                "tests/test-configs/shotover-config/config{i}.yaml"
+            ))
+            .with_log_name(&format!("shotover{i}"))
+            .start()
+            .await,
+        );
+    }
+
+    let connection_builder =
+        KafkaConnectionBuilder::new(driver, "127.0.0.1:9192").use_sasl("user", "password");
+    test_cases::basic(connection_builder).await;
+
+    for shotover in shotovers {
+        tokio::time::timeout(
+            Duration::from_secs(10),
+            shotover.shutdown_and_then_consume_events(&[]),
+        )
+        .await
+        .expect("Shotover did not shutdown within 10s");
+    }
+}
diff --git a/shotover-proxy/tests/test-configs/kafka/cluster-sasl/docker-compose.yaml b/shotover-proxy/tests/test-configs/kafka/cluster-sasl/docker-compose.yaml
new file mode 100644
index 000000000..6abfe70a5
--- /dev/null
+++ b/shotover-proxy/tests/test-configs/kafka/cluster-sasl/docker-compose.yaml
@@ -0,0 +1,60 @@
+version: '3'
+
+networks:
+  cluster_subnet:
+    name: cluster_subnet
+    driver: bridge
+    ipam:
+      driver: default
+      config:
+        - subnet: 172.16.1.0/24
+          gateway: 172.16.1.1
+
+services:
+  kafka0:
+    image: &image 'bitnami/kafka:3.6.1-debian-11-r24'
+    networks:
+      cluster_subnet:
+        ipv4_address: 172.16.1.2
+    environment: &environment
+      KAFKA_CFG_NODE_ID: 0
+      KAFKA_CFG_PROCESS_ROLES: "controller,broker"
+      KAFKA_CFG_CONTROLLER_QUORUM_VOTERS: "0@kafka0:9093,1@kafka1:9093,2@kafka2:9093"
+      KAFKA_CFG_LISTENERS: "SASL_PLAINTEXT://:9092,CONTROLLER://:9093"
+      KAFKA_CFG_LISTENER_SECURITY_PROTOCOL_MAP: "CONTROLLER:SASL_PLAINTEXT,SASL_PLAINTEXT:SASL_PLAINTEXT"
+      KAFKA_CFG_ADVERTISED_LISTENERS: "SASL_PLAINTEXT://172.16.1.2:9092"
+      KAFKA_CLIENT_USERS: "user"
+      KAFKA_CLIENT_PASSWORDS: "password"
+      KAFKA_CFG_CONTROLLER_LISTENER_NAMES: "CONTROLLER"
+      KAFKA_CFG_SASL_MECHANISM_CONTROLLER_PROTOCOL: "PLAIN"
+      KAFKA_CONTROLLER_USER: "controller_user"
+      KAFKA_CONTROLLER_PASSWORD: "controller_password"
+      KAFKA_CFG_INTER_BROKER_LISTENER_NAME: "SASL_PLAINTEXT"
+      KAFKA_CFG_SASL_MECHANISM_INTER_BROKER_PROTOCOL: "PLAIN"
+      KAFKA_INTER_BROKER_USER: "controller_user"
+      KAFKA_INTER_BROKER_PASSWORD: "controller_password"
+      KAFKA_CERTIFICATE_PASSWORD: "123456"
+      KAFKA_KRAFT_CLUSTER_ID: "abcdefghijklmnopqrstuv"
+    volumes: &volumes
+      - type: tmpfs
+        target: /bitnami/kafka
+  kafka1:
+    image: *image
+    networks:
+      cluster_subnet:
+        ipv4_address: 172.16.1.3
+    environment:
+      <<: *environment
+      KAFKA_CFG_ADVERTISED_LISTENERS: "SASL_PLAINTEXT://172.16.1.3:9092"
+      KAFKA_CFG_NODE_ID: 1
+    volumes: *volumes
+  kafka2:
+    image: *image
+    networks:
+      cluster_subnet:
+        ipv4_address: 172.16.1.4
+    environment:
+      <<: *environment
+      KAFKA_CFG_ADVERTISED_LISTENERS: "SASL_PLAINTEXT://172.16.1.4:9092"
+      KAFKA_CFG_NODE_ID: 2
+    volumes: *volumes
diff --git a/shotover-proxy/tests/test-configs/kafka/cluster-sasl/topology-single.yaml b/shotover-proxy/tests/test-configs/kafka/cluster-sasl/topology-single.yaml
new file mode 100644
index 000000000..6daa7a417
--- /dev/null
+++ b/shotover-proxy/tests/test-configs/kafka/cluster-sasl/topology-single.yaml
@@ -0,0 +1,11 @@
+---
+sources:
+  - Kafka:
+      name: "kafka"
+      listen_addr: "127.0.0.1:9192"
+      chain:
+        - KafkaSinkCluster:
+            sasl_enabled: true
+            shotover_nodes: ["127.0.0.1:9192"]
+            first_contact_points: ["172.16.1.2:9092"]
+            connect_timeout_ms: 3000
diff --git a/shotover-proxy/tests/test-configs/kafka/cluster-sasl/topology1.yaml b/shotover-proxy/tests/test-configs/kafka/cluster-sasl/topology1.yaml
new file mode 100644
index 000000000..4f141c8b6
--- /dev/null
+++ b/shotover-proxy/tests/test-configs/kafka/cluster-sasl/topology1.yaml
@@ -0,0 +1,14 @@
+---
+sources:
+  - Kafka:
+      name: "kafka"
+      listen_addr: "127.0.0.1:9191"
+      chain:
+        - KafkaSinkCluster:
+            sasl_enabled: true
+            shotover_nodes: 
+              - "127.0.0.1:9191"
+              - "127.0.0.1:9192"
+              - "127.0.0.1:9193"
+            first_contact_points: ["172.16.1.2:9092"]
+            connect_timeout_ms: 3000
diff --git a/shotover-proxy/tests/test-configs/kafka/cluster-sasl/topology2.yaml b/shotover-proxy/tests/test-configs/kafka/cluster-sasl/topology2.yaml
new file mode 100644
index 000000000..9cc781468
--- /dev/null
+++ b/shotover-proxy/tests/test-configs/kafka/cluster-sasl/topology2.yaml
@@ -0,0 +1,14 @@
+---
+sources:
+  - Kafka:
+      name: "kafka"
+      listen_addr: "127.0.0.1:9192"
+      chain:
+        - KafkaSinkCluster:
+            sasl_enabled: true
+            shotover_nodes: 
+              - "127.0.0.1:9191"
+              - "127.0.0.1:9192"
+              - "127.0.0.1:9193"
+            first_contact_points: ["172.16.1.2:9092"]
+            connect_timeout_ms: 3000
diff --git a/shotover-proxy/tests/test-configs/kafka/cluster-sasl/topology3.yaml b/shotover-proxy/tests/test-configs/kafka/cluster-sasl/topology3.yaml
new file mode 100644
index 000000000..95e2fd375
--- /dev/null
+++ b/shotover-proxy/tests/test-configs/kafka/cluster-sasl/topology3.yaml
@@ -0,0 +1,14 @@
+---
+sources:
+  - Kafka:
+      name: "kafka"
+      listen_addr: "127.0.0.1:9193"
+      chain:
+        - KafkaSinkCluster:
+            sasl_enabled: true
+            shotover_nodes: 
+              - "127.0.0.1:9191"
+              - "127.0.0.1:9192"
+              - "127.0.0.1:9193"
+            first_contact_points: ["172.16.1.2:9092"]
+            connect_timeout_ms: 3000
diff --git a/shotover/src/transforms/kafka/sink_cluster/mod.rs b/shotover/src/transforms/kafka/sink_cluster/mod.rs
index f09ee1a7f..6241287cc 100644
--- a/shotover/src/transforms/kafka/sink_cluster/mod.rs
+++ b/shotover/src/transforms/kafka/sink_cluster/mod.rs
@@ -41,6 +41,7 @@ pub struct KafkaSinkClusterConfig {
     pub connect_timeout_ms: u64,
     pub read_timeout: Option<u64>,
     pub tls: Option<TlsConnectorConfig>,
+    pub sasl_enabled: Option<bool>,
 }
 
 const NAME: &str = "KafkaSinkCluster";
@@ -59,6 +60,7 @@ impl TransformConfig for KafkaSinkClusterConfig {
             self.connect_timeout_ms,
             self.read_timeout,
             tls,
+            self.sasl_enabled.unwrap_or(false),
         )))
     }
 }
@@ -74,6 +76,7 @@ pub struct KafkaSinkClusterBuilder {
     topics: Arc<DashMap<TopicName, Topic>>,
     nodes_shared: Arc<RwLock<Vec<KafkaNode>>>,
     tls: Option<TlsConnector>,
+    sasl_enabled: bool,
 }
 
 impl KafkaSinkClusterBuilder {
@@ -84,6 +87,7 @@ impl KafkaSinkClusterBuilder {
         connect_timeout_ms: u64,
         timeout: Option<u64>,
         tls: Option<TlsConnector>,
+        sasl_enabled: bool,
     ) -> KafkaSinkClusterBuilder {
         let receive_timeout = timeout.map(Duration::from_secs);
 
@@ -108,6 +112,7 @@ impl KafkaSinkClusterBuilder {
             topics: Arc::new(DashMap::new()),
             nodes_shared: Arc::new(RwLock::new(vec![])),
             tls,
+            sasl_enabled,
         }
     }
 }
@@ -125,6 +130,7 @@ impl TransformBuilder for KafkaSinkClusterBuilder {
             group_to_coordinator_broker: self.group_to_coordinator_broker.clone(),
             topics: self.topics.clone(),
             rng: SmallRng::from_rng(rand::thread_rng()).unwrap(),
+            sasl_status: SaslStatus::new(self.sasl_enabled),
             connection_factory: ConnectionFactory::new(self.tls.clone(), self.connect_timeout),
         })
     }
@@ -160,6 +166,33 @@ impl AtomicBrokerId {
     }
 }
 
+#[derive(Debug)]
+struct SaslStatus {
+    enabled: bool,
+    handshake_complete: bool,
+}
+
+impl SaslStatus {
+    fn new(enabled: bool) -> Self {
+        Self {
+            enabled,
+            handshake_complete: false,
+        }
+    }
+
+    fn set_handshake_complete(&mut self) {
+        self.handshake_complete = true;
+    }
+
+    fn is_handshake_complete(&self) -> bool {
+        if self.enabled {
+            self.handshake_complete
+        } else {
+            true
+        }
+    }
+}
+
 pub struct KafkaSinkCluster {
     first_contact_points: Vec<String>,
     shotover_nodes: Vec<KafkaAddress>,
@@ -171,6 +204,7 @@ pub struct KafkaSinkCluster {
     group_to_coordinator_broker: Arc<DashMap<GroupId, BrokerId>>,
     topics: Arc<DashMap<TopicName, Topic>>,
     rng: SmallRng,
+    sasl_status: SaslStatus,
     connection_factory: ConnectionFactory,
 }
 
@@ -302,7 +336,9 @@ impl KafkaSinkCluster {
         }
 
         // request and process metadata if we are missing topics or the controller broker id
-        if !topics.is_empty() || self.controller_broker.get().is_none() {
+        if (!topics.is_empty() || self.controller_broker.get().is_none())
+            && self.sasl_status.is_handshake_complete()
+        {
             let mut metadata = self.get_metadata_of_topics(topics).await?;
             match metadata.frame() {
                 Some(Frame::Kafka(KafkaFrame::Response {
@@ -462,6 +498,40 @@ impl KafkaSinkCluster {
                     ..
                 })) => results.push(self.route_to_controller(message).await?),
 
+                Some(Frame::Kafka(KafkaFrame::Request {
+                    body: RequestBody::ApiVersions(_),
+                    ..
+                })) => {
+                    let (tx, rx) = oneshot::channel();
+                    self.route_to_first_node(message.clone(), Some(tx)).await?;
+
+                    results.push(rx);
+                }
+
+                Some(Frame::Kafka(KafkaFrame::Request {
+                    body: RequestBody::SaslHandshake(_),
+                    ..
+                })) => {
+                    let (tx, rx) = oneshot::channel();
+                    self.route_to_first_node(message.clone(), Some(tx)).await?;
+
+                    self.connection_factory
+                        .add_handshake_message(message.clone());
+
+                    results.push(rx);
+                }
+
+                Some(Frame::Kafka(KafkaFrame::Request {
+                    body: RequestBody::SaslAuthenticate(_),
+                    ..
+                })) => {
+                    let (tx, rx) = oneshot::channel();
+                    self.route_to_first_node(message.clone(), Some(tx)).await?;
+                    self.connection_factory.add_auth_message(message.clone());
+                    results.push(rx);
+                    self.sasl_status.set_handshake_complete();
+                }
+
                 // route to random node
                 _ => {
                     let connection = self
@@ -484,6 +554,28 @@ impl KafkaSinkCluster {
         Ok(results)
     }
 
+    async fn route_to_first_node(
+        &mut self,
+        message: Message,
+        return_chan: Option<oneshot::Sender<Response>>,
+    ) -> Result<()> {
+        let connection = self
+            .nodes
+            .get_mut(0)
+            .unwrap()
+            .get_connection(&self.connection_factory)
+            .await?;
+
+        connection
+            .send(Request {
+                message,
+                return_chan,
+            })
+            .map_err(|_| anyhow!("Failed to send"))?;
+
+        Ok(())
+    }
+
     async fn find_coordinator_of_group(&mut self, group: GroupId) -> Result<KafkaNode> {
         let request = Message::from_frame(Frame::Kafka(KafkaFrame::Request {
             header: RequestHeader::builder()
diff --git a/shotover/src/transforms/kafka/sink_cluster/node.rs b/shotover/src/transforms/kafka/sink_cluster/node.rs
index afe059525..5f6f4a9ce 100644
--- a/shotover/src/transforms/kafka/sink_cluster/node.rs
+++ b/shotover/src/transforms/kafka/sink_cluster/node.rs
@@ -1,16 +1,21 @@
 use crate::codec::{kafka::KafkaCodecBuilder, CodecBuilder, Direction};
+use crate::message::Message;
 use crate::tcp;
 use crate::tls::TlsConnector;
 use crate::transforms::util::cluster_connection_pool::{spawn_read_write_tasks, Connection};
+use crate::transforms::util::Request;
 use anyhow::{anyhow, Result};
 use kafka_protocol::messages::BrokerId;
 use kafka_protocol::protocol::StrBytes;
 use std::time::Duration;
 use tokio::io::split;
+use tokio::sync::oneshot;
 
 pub struct ConnectionFactory {
     tls: Option<TlsConnector>,
     connect_timeout: Duration,
+    handshake_message: Option<Message>,
+    auth_message: Option<Message>,
 }
 
 impl ConnectionFactory {
@@ -18,11 +23,22 @@ impl ConnectionFactory {
         ConnectionFactory {
             tls,
             connect_timeout,
+            handshake_message: None,
+            auth_message: None,
         }
     }
 
+    pub fn add_handshake_message(&mut self, message: Message) {
+        self.handshake_message = Some(message);
+    }
+
+    pub fn add_auth_message(&mut self, message: Message) {
+        self.auth_message = Some(message);
+    }
+
     pub async fn create_connection(&self, kafka_address: &KafkaAddress) -> Result<Connection> {
         let codec = KafkaCodecBuilder::new(Direction::Sink, "KafkaSinkCluster".to_owned());
+
         let address = (kafka_address.host.to_string(), kafka_address.port as u16);
         if let Some(tls) = self.tls.as_ref() {
             let tls_stream = tls.connect(self.connect_timeout, address).await?;
@@ -33,12 +49,37 @@ impl ConnectionFactory {
             let tcp_stream = tcp::tcp_stream(self.connect_timeout, address).await?;
             let (rx, tx) = tcp_stream.into_split();
             let connection = spawn_read_write_tasks(&codec, rx, tx);
+
+            if let Some(message) = self.auth_message.as_ref() {
+                let handshake_msg = self.handshake_message.as_ref().unwrap();
+
+                let (tx, rx) = oneshot::channel();
+                connection
+                    .send(Request {
+                        message: handshake_msg.clone(),
+                        return_chan: Some(tx),
+                    })
+                    .map_err(|_| anyhow!("Failed to send"))?;
+
+                let _response = rx.await.map_err(|_| anyhow!("Failed to receive"))?;
+
+                let (tx, rx) = oneshot::channel();
+                connection
+                    .send(Request {
+                        message: message.clone(),
+                        return_chan: Some(tx),
+                    })
+                    .map_err(|_| anyhow!("Failed to send"))?;
+
+                let _response = rx.await.map_err(|_| anyhow!("Failed to receive"))?;
+            }
+
             Ok(connection)
         }
     }
 }
 
-#[derive(Clone, PartialEq)]
+#[derive(Clone, PartialEq, Debug)]
 pub struct KafkaAddress {
     pub host: StrBytes,
     pub port: i32,

From 5523020660a18942d9bc2e4b4e718579715b8eb1 Mon Sep 17 00:00:00 2001
From: Lucas Kent <rubickent@gmail.com>
Date: Thu, 14 Mar 2024 20:46:11 +1100
Subject: [PATCH 4/4] Make KafkaSinkCluster rack aware (#1527)

---
 .../benches/windsock/kafka/bench.rs           |   8 +-
 shotover-proxy/tests/kafka_int_tests/mod.rs   |  24 -
 .../kafka/cluster-1-rack/topology-single.yaml |   5 +-
 .../kafka/cluster-1-rack/topology1.yaml       |  14 +-
 .../kafka/cluster-1-rack/topology2.yaml       |  14 +-
 .../kafka/cluster-1-rack/topology3.yaml       |  14 +-
 .../kafka/cluster-2-racks/topology-rack1.yaml |  10 +-
 .../kafka/cluster-2-racks/topology-rack2.yaml |  10 +-
 .../cluster-2-racks/topology-single.yaml      |  10 -
 .../kafka/cluster-sasl/topology-single.yaml   |   5 +-
 .../kafka/cluster-sasl/topology1.yaml         |  14 +-
 .../kafka/cluster-sasl/topology2.yaml         |  14 +-
 .../kafka/cluster-sasl/topology3.yaml         |  14 +-
 .../kafka/cluster-tls/topology.yaml           |   5 +-
 .../src/transforms/kafka/sink_cluster/mod.rs  | 410 +++++++++++-------
 .../src/transforms/kafka/sink_cluster/node.rs |   6 +-
 16 files changed, 357 insertions(+), 220 deletions(-)
 delete mode 100644 shotover-proxy/tests/test-configs/kafka/cluster-2-racks/topology-single.yaml

diff --git a/shotover-proxy/benches/windsock/kafka/bench.rs b/shotover-proxy/benches/windsock/kafka/bench.rs
index e6713fa64..13f99c86b 100644
--- a/shotover-proxy/benches/windsock/kafka/bench.rs
+++ b/shotover-proxy/benches/windsock/kafka/bench.rs
@@ -13,7 +13,7 @@ use itertools::Itertools;
 use shotover::config::chain::TransformChainConfig;
 use shotover::sources::SourceConfig;
 use shotover::transforms::debug::force_parse::DebugForceEncodeConfig;
-use shotover::transforms::kafka::sink_cluster::KafkaSinkClusterConfig;
+use shotover::transforms::kafka::sink_cluster::{KafkaSinkClusterConfig, ShotoverNodeConfig};
 use shotover::transforms::kafka::sink_single::KafkaSinkSingleConfig;
 use shotover::transforms::TransformConfig;
 use std::sync::Arc;
@@ -92,7 +92,11 @@ impl KafkaBench {
                 connect_timeout_ms: 3000,
                 read_timeout: None,
                 first_contact_points: vec![kafka_address],
-                shotover_nodes: vec![host_address.clone()],
+                shotover_nodes: vec![ShotoverNodeConfig {
+                    address: host_address.parse().unwrap(),
+                    rack: "rack1".into(),
+                    broker_id: 0,
+                }],
                 tls: None,
                 sasl_enabled: Some(false),
             }),
diff --git a/shotover-proxy/tests/kafka_int_tests/mod.rs b/shotover-proxy/tests/kafka_int_tests/mod.rs
index dd2d416e7..74db991c3 100644
--- a/shotover-proxy/tests/kafka_int_tests/mod.rs
+++ b/shotover-proxy/tests/kafka_int_tests/mod.rs
@@ -192,30 +192,6 @@ async fn cluster_1_rack_multi_shotover(#[case] driver: KafkaDriver) {
     }
 }
 
-#[cfg(feature = "rdkafka-driver-tests")] // temporarily needed to avoid a warning
-#[rstest]
-#[cfg_attr(feature = "rdkafka-driver-tests", case::cpp(KafkaDriver::Cpp))]
-// #[case::java(KafkaDriver::Java)]
-#[tokio::test(flavor = "multi_thread")] // multi_thread is needed since java driver will block when consuming, causing shotover logs to not appear
-async fn cluster_2_racks_single_shotover(#[case] driver: KafkaDriver) {
-    let _docker_compose =
-        docker_compose("tests/test-configs/kafka/cluster-2-racks/docker-compose.yaml");
-    let shotover =
-        shotover_process("tests/test-configs/kafka/cluster-2-racks/topology-single.yaml")
-            .start()
-            .await;
-
-    let connection_builder = KafkaConnectionBuilder::new(driver, "127.0.0.1:9192");
-    test_cases::basic(connection_builder).await;
-
-    tokio::time::timeout(
-        Duration::from_secs(10),
-        shotover.shutdown_and_then_consume_events(&[]),
-    )
-    .await
-    .expect("Shotover did not shutdown within 10s");
-}
-
 #[cfg(feature = "rdkafka-driver-tests")] // temporarily needed to avoid a warning
 #[rstest]
 #[cfg_attr(feature = "rdkafka-driver-tests", case::cpp(KafkaDriver::Cpp))]
diff --git a/shotover-proxy/tests/test-configs/kafka/cluster-1-rack/topology-single.yaml b/shotover-proxy/tests/test-configs/kafka/cluster-1-rack/topology-single.yaml
index d38206e7f..f2d8f4368 100644
--- a/shotover-proxy/tests/test-configs/kafka/cluster-1-rack/topology-single.yaml
+++ b/shotover-proxy/tests/test-configs/kafka/cluster-1-rack/topology-single.yaml
@@ -5,6 +5,9 @@ sources:
       listen_addr: "127.0.0.1:9192"
       chain:
         - KafkaSinkCluster:
-            shotover_nodes: ["127.0.0.1:9192"]
+            shotover_nodes:
+              - address: "127.0.0.1:9192"
+                rack: "rack0"
+                broker_id: 0
             first_contact_points: ["172.16.1.2:9092"]
             connect_timeout_ms: 3000
diff --git a/shotover-proxy/tests/test-configs/kafka/cluster-1-rack/topology1.yaml b/shotover-proxy/tests/test-configs/kafka/cluster-1-rack/topology1.yaml
index 11edd3e78..15fe492e1 100644
--- a/shotover-proxy/tests/test-configs/kafka/cluster-1-rack/topology1.yaml
+++ b/shotover-proxy/tests/test-configs/kafka/cluster-1-rack/topology1.yaml
@@ -5,9 +5,15 @@ sources:
       listen_addr: "127.0.0.1:9191"
       chain:
         - KafkaSinkCluster:
-            shotover_nodes: 
-              - "127.0.0.1:9191"
-              - "127.0.0.1:9192"
-              - "127.0.0.1:9193"
+            shotover_nodes:
+              - address: "127.0.0.1:9191"
+                rack: "rack0"
+                broker_id: 0
+              - address: "127.0.0.1:9192"
+                rack: "rack0"
+                broker_id: 1
+              - address: "127.0.0.1:9193"
+                rack: "rack0"
+                broker_id: 2
             first_contact_points: ["172.16.1.2:9092"]
             connect_timeout_ms: 3000
diff --git a/shotover-proxy/tests/test-configs/kafka/cluster-1-rack/topology2.yaml b/shotover-proxy/tests/test-configs/kafka/cluster-1-rack/topology2.yaml
index 2269f83dc..3844a8486 100644
--- a/shotover-proxy/tests/test-configs/kafka/cluster-1-rack/topology2.yaml
+++ b/shotover-proxy/tests/test-configs/kafka/cluster-1-rack/topology2.yaml
@@ -5,9 +5,15 @@ sources:
       listen_addr: "127.0.0.1:9192"
       chain:
         - KafkaSinkCluster:
-            shotover_nodes: 
-              - "127.0.0.1:9191"
-              - "127.0.0.1:9192"
-              - "127.0.0.1:9193"
+            shotover_nodes:
+              - address: "127.0.0.1:9191"
+                rack: "rack0"
+                broker_id: 0
+              - address: "127.0.0.1:9192"
+                rack: "rack0"
+                broker_id: 1
+              - address: "127.0.0.1:9193"
+                rack: "rack0"
+                broker_id: 2
             first_contact_points: ["172.16.1.2:9092"]
             connect_timeout_ms: 3000
diff --git a/shotover-proxy/tests/test-configs/kafka/cluster-1-rack/topology3.yaml b/shotover-proxy/tests/test-configs/kafka/cluster-1-rack/topology3.yaml
index 528030e24..0fa7eb760 100644
--- a/shotover-proxy/tests/test-configs/kafka/cluster-1-rack/topology3.yaml
+++ b/shotover-proxy/tests/test-configs/kafka/cluster-1-rack/topology3.yaml
@@ -5,9 +5,15 @@ sources:
       listen_addr: "127.0.0.1:9193"
       chain:
         - KafkaSinkCluster:
-            shotover_nodes: 
-              - "127.0.0.1:9191"
-              - "127.0.0.1:9192"
-              - "127.0.0.1:9193"
+            shotover_nodes:
+              - address: "127.0.0.1:9191"
+                rack: "rack0"
+                broker_id: 0
+              - address: "127.0.0.1:9192"
+                rack: "rack0"
+                broker_id: 1
+              - address: "127.0.0.1:9193"
+                rack: "rack0"
+                broker_id: 2
             first_contact_points: ["172.16.1.2:9092"]
             connect_timeout_ms: 3000
diff --git a/shotover-proxy/tests/test-configs/kafka/cluster-2-racks/topology-rack1.yaml b/shotover-proxy/tests/test-configs/kafka/cluster-2-racks/topology-rack1.yaml
index 440ba2f13..9de259aa3 100644
--- a/shotover-proxy/tests/test-configs/kafka/cluster-2-racks/topology-rack1.yaml
+++ b/shotover-proxy/tests/test-configs/kafka/cluster-2-racks/topology-rack1.yaml
@@ -5,8 +5,12 @@ sources:
       listen_addr: "127.0.0.1:9191"
       chain:
         - KafkaSinkCluster:
-            shotover_nodes: 
-              - "127.0.0.1:9191"
-              - "127.0.0.1:9192"
+            shotover_nodes:
+              - address: "127.0.0.1:9191"
+                rack: "rack1"
+                broker_id: 0
+              - address: "127.0.0.1:9192"
+                rack: "rack2"
+                broker_id: 1
             first_contact_points: ["172.16.1.2:9092"]
             connect_timeout_ms: 3000
diff --git a/shotover-proxy/tests/test-configs/kafka/cluster-2-racks/topology-rack2.yaml b/shotover-proxy/tests/test-configs/kafka/cluster-2-racks/topology-rack2.yaml
index ef4234294..3d98610ca 100644
--- a/shotover-proxy/tests/test-configs/kafka/cluster-2-racks/topology-rack2.yaml
+++ b/shotover-proxy/tests/test-configs/kafka/cluster-2-racks/topology-rack2.yaml
@@ -5,8 +5,12 @@ sources:
       listen_addr: "127.0.0.1:9192"
       chain:
         - KafkaSinkCluster:
-            shotover_nodes: 
-              - "127.0.0.1:9191"
-              - "127.0.0.1:9192"
+            shotover_nodes:
+              - address: "127.0.0.1:9191"
+                rack: "rack1"
+                broker_id: 0
+              - address: "127.0.0.1:9192"
+                rack: "rack2"
+                broker_id: 1
             first_contact_points: ["172.16.1.5:9092"]
             connect_timeout_ms: 3000
diff --git a/shotover-proxy/tests/test-configs/kafka/cluster-2-racks/topology-single.yaml b/shotover-proxy/tests/test-configs/kafka/cluster-2-racks/topology-single.yaml
deleted file mode 100644
index d38206e7f..000000000
--- a/shotover-proxy/tests/test-configs/kafka/cluster-2-racks/topology-single.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
----
-sources:
-  - Kafka:
-      name: "kafka"
-      listen_addr: "127.0.0.1:9192"
-      chain:
-        - KafkaSinkCluster:
-            shotover_nodes: ["127.0.0.1:9192"]
-            first_contact_points: ["172.16.1.2:9092"]
-            connect_timeout_ms: 3000
diff --git a/shotover-proxy/tests/test-configs/kafka/cluster-sasl/topology-single.yaml b/shotover-proxy/tests/test-configs/kafka/cluster-sasl/topology-single.yaml
index 6daa7a417..8a59741ac 100644
--- a/shotover-proxy/tests/test-configs/kafka/cluster-sasl/topology-single.yaml
+++ b/shotover-proxy/tests/test-configs/kafka/cluster-sasl/topology-single.yaml
@@ -5,7 +5,10 @@ sources:
       listen_addr: "127.0.0.1:9192"
       chain:
         - KafkaSinkCluster:
+            shotover_nodes:
+              - address: "127.0.0.1:9192"
+                rack: "rack0"
+                broker_id: 0
             sasl_enabled: true
-            shotover_nodes: ["127.0.0.1:9192"]
             first_contact_points: ["172.16.1.2:9092"]
             connect_timeout_ms: 3000
diff --git a/shotover-proxy/tests/test-configs/kafka/cluster-sasl/topology1.yaml b/shotover-proxy/tests/test-configs/kafka/cluster-sasl/topology1.yaml
index 4f141c8b6..7a6990015 100644
--- a/shotover-proxy/tests/test-configs/kafka/cluster-sasl/topology1.yaml
+++ b/shotover-proxy/tests/test-configs/kafka/cluster-sasl/topology1.yaml
@@ -6,9 +6,15 @@ sources:
       chain:
         - KafkaSinkCluster:
             sasl_enabled: true
-            shotover_nodes: 
-              - "127.0.0.1:9191"
-              - "127.0.0.1:9192"
-              - "127.0.0.1:9193"
+            shotover_nodes:
+              - address: "127.0.0.1:9191"
+                rack: "rack0"
+                broker_id: 0
+              - address: "127.0.0.1:9192"
+                rack: "rack0"
+                broker_id: 1
+              - address: "127.0.0.1:9193"
+                rack: "rack0"
+                broker_id: 2
             first_contact_points: ["172.16.1.2:9092"]
             connect_timeout_ms: 3000
diff --git a/shotover-proxy/tests/test-configs/kafka/cluster-sasl/topology2.yaml b/shotover-proxy/tests/test-configs/kafka/cluster-sasl/topology2.yaml
index 9cc781468..1fa948922 100644
--- a/shotover-proxy/tests/test-configs/kafka/cluster-sasl/topology2.yaml
+++ b/shotover-proxy/tests/test-configs/kafka/cluster-sasl/topology2.yaml
@@ -6,9 +6,15 @@ sources:
       chain:
         - KafkaSinkCluster:
             sasl_enabled: true
-            shotover_nodes: 
-              - "127.0.0.1:9191"
-              - "127.0.0.1:9192"
-              - "127.0.0.1:9193"
+            shotover_nodes:
+              - address: "127.0.0.1:9191"
+                rack: "rack0"
+                broker_id: 0
+              - address: "127.0.0.1:9192"
+                rack: "rack0"
+                broker_id: 1
+              - address: "127.0.0.1:9193"
+                rack: "rack0"
+                broker_id: 2
             first_contact_points: ["172.16.1.2:9092"]
             connect_timeout_ms: 3000
diff --git a/shotover-proxy/tests/test-configs/kafka/cluster-sasl/topology3.yaml b/shotover-proxy/tests/test-configs/kafka/cluster-sasl/topology3.yaml
index 95e2fd375..1cf09d057 100644
--- a/shotover-proxy/tests/test-configs/kafka/cluster-sasl/topology3.yaml
+++ b/shotover-proxy/tests/test-configs/kafka/cluster-sasl/topology3.yaml
@@ -6,9 +6,15 @@ sources:
       chain:
         - KafkaSinkCluster:
             sasl_enabled: true
-            shotover_nodes: 
-              - "127.0.0.1:9191"
-              - "127.0.0.1:9192"
-              - "127.0.0.1:9193"
+            shotover_nodes:
+              - address: "127.0.0.1:9191"
+                rack: "rack0"
+                broker_id: 0
+              - address: "127.0.0.1:9192"
+                rack: "rack0"
+                broker_id: 1
+              - address: "127.0.0.1:9193"
+                rack: "rack0"
+                broker_id: 2
             first_contact_points: ["172.16.1.2:9092"]
             connect_timeout_ms: 3000
diff --git a/shotover-proxy/tests/test-configs/kafka/cluster-tls/topology.yaml b/shotover-proxy/tests/test-configs/kafka/cluster-tls/topology.yaml
index 1d4a8b514..52829dd1b 100644
--- a/shotover-proxy/tests/test-configs/kafka/cluster-tls/topology.yaml
+++ b/shotover-proxy/tests/test-configs/kafka/cluster-tls/topology.yaml
@@ -5,7 +5,10 @@ sources:
       listen_addr: "127.0.0.1:9192"
       chain:
         - KafkaSinkCluster:
-            shotover_nodes: ["127.0.0.1:9192"]
+            shotover_nodes:
+              - address: "127.0.0.1:9192"
+                rack: "rack0"
+                broker_id: 0
             first_contact_points: ["172.16.1.2:9092"]
             connect_timeout_ms: 3000
             tls:
diff --git a/shotover/src/transforms/kafka/sink_cluster/mod.rs b/shotover/src/transforms/kafka/sink_cluster/mod.rs
index 6241287cc..f936c1134 100644
--- a/shotover/src/transforms/kafka/sink_cluster/mod.rs
+++ b/shotover/src/transforms/kafka/sink_cluster/mod.rs
@@ -11,10 +11,11 @@ use async_trait::async_trait;
 use dashmap::DashMap;
 use kafka_protocol::messages::find_coordinator_response::Coordinator;
 use kafka_protocol::messages::metadata_request::MetadataRequestTopic;
+use kafka_protocol::messages::metadata_response::MetadataResponseBroker;
 use kafka_protocol::messages::{
-    ApiKey, BrokerId, FindCoordinatorRequest, GroupId, HeartbeatRequest, JoinGroupRequest,
-    MetadataRequest, MetadataResponse, OffsetFetchRequest, RequestHeader, SyncGroupRequest,
-    TopicName,
+    ApiKey, BrokerId, FindCoordinatorRequest, FindCoordinatorResponse, GroupId, HeartbeatRequest,
+    JoinGroupRequest, MetadataRequest, MetadataResponse, OffsetFetchRequest, RequestHeader,
+    SyncGroupRequest, TopicName,
 };
 use kafka_protocol::protocol::{Builder, StrBytes};
 use node::{ConnectionFactory, KafkaAddress, KafkaNode};
@@ -22,7 +23,6 @@ use rand::rngs::SmallRng;
 use rand::seq::{IteratorRandom, SliceRandom};
 use rand::SeedableRng;
 use serde::{Deserialize, Serialize};
-use std::collections::HashMap;
 use std::hash::Hasher;
 use std::net::SocketAddr;
 use std::sync::atomic::AtomicI64;
@@ -30,6 +30,7 @@ use std::sync::Arc;
 use std::time::Duration;
 use tokio::sync::{mpsc, oneshot, RwLock};
 use tokio::time::timeout;
+use uuid::Uuid;
 
 mod node;
 
@@ -37,13 +38,42 @@ mod node;
 #[serde(deny_unknown_fields)]
 pub struct KafkaSinkClusterConfig {
     pub first_contact_points: Vec<String>,
-    pub shotover_nodes: Vec<String>,
+    pub shotover_nodes: Vec<ShotoverNodeConfig>,
     pub connect_timeout_ms: u64,
     pub read_timeout: Option<u64>,
     pub tls: Option<TlsConnectorConfig>,
     pub sasl_enabled: Option<bool>,
 }
 
+#[derive(Serialize, Deserialize, Debug, Clone)]
+#[serde(deny_unknown_fields)]
+pub struct ShotoverNodeConfig {
+    pub address: SocketAddr,
+    pub rack: String,
+    pub broker_id: i32,
+}
+
+impl ShotoverNodeConfig {
+    fn build(self) -> ShotoverNode {
+        let address = KafkaAddress {
+            host: StrBytes::from_string(self.address.ip().to_string()),
+            port: self.address.port() as i32,
+        };
+        ShotoverNode {
+            address,
+            rack: StrBytes::from_string(self.rack),
+            broker_id: BrokerId(self.broker_id),
+        }
+    }
+}
+
+#[derive(Clone)]
+pub struct ShotoverNode {
+    pub address: KafkaAddress,
+    pub rack: StrBytes,
+    pub broker_id: BrokerId,
+}
+
 const NAME: &str = "KafkaSinkCluster";
 #[typetag::serde(name = "KafkaSinkCluster")]
 #[async_trait(?Send)]
@@ -68,7 +98,7 @@ impl TransformConfig for KafkaSinkClusterConfig {
 pub struct KafkaSinkClusterBuilder {
     // contains address and port
     first_contact_points: Vec<String>,
-    shotover_nodes: Vec<KafkaAddress>,
+    shotover_nodes: Vec<ShotoverNode>,
     connect_timeout: Duration,
     read_timeout: Option<Duration>,
     controller_broker: Arc<AtomicBrokerId>,
@@ -82,7 +112,7 @@ pub struct KafkaSinkClusterBuilder {
 impl KafkaSinkClusterBuilder {
     pub fn new(
         first_contact_points: Vec<String>,
-        shotover_nodes: Vec<String>,
+        shotover_nodes: Vec<ShotoverNodeConfig>,
         _chain_name: String,
         connect_timeout_ms: u64,
         timeout: Option<u64>,
@@ -91,16 +121,11 @@ impl KafkaSinkClusterBuilder {
     ) -> KafkaSinkClusterBuilder {
         let receive_timeout = timeout.map(Duration::from_secs);
 
-        let shotover_nodes = shotover_nodes
+        let mut shotover_nodes: Vec<_> = shotover_nodes
             .into_iter()
-            .map(|node| {
-                let address: SocketAddr = node.parse().unwrap();
-                KafkaAddress {
-                    host: StrBytes::from_string(address.ip().to_string()),
-                    port: address.port() as i32,
-                }
-            })
+            .map(ShotoverNodeConfig::build)
             .collect();
+        shotover_nodes.sort_by_key(|x| x.broker_id);
 
         KafkaSinkClusterBuilder {
             first_contact_points,
@@ -195,7 +220,7 @@ impl SaslStatus {
 
 pub struct KafkaSinkCluster {
     first_contact_points: Vec<String>,
-    shotover_nodes: Vec<KafkaAddress>,
+    shotover_nodes: Vec<ShotoverNode>,
     pushed_messages_tx: Option<mpsc::UnboundedSender<Messages>>,
     read_timeout: Option<Duration>,
     nodes: Vec<KafkaNode>,
@@ -227,6 +252,7 @@ impl Transform for KafkaSinkCluster {
                     Ok(KafkaNode::new(
                         BrokerId(-1),
                         KafkaAddress::from_str(address)?,
+                        None,
                     ))
                 })
                 .collect();
@@ -344,7 +370,7 @@ impl KafkaSinkCluster {
                 Some(Frame::Kafka(KafkaFrame::Response {
                     body: ResponseBody::Metadata(metadata),
                     ..
-                })) => self.process_metadata(metadata).await,
+                })) => self.process_metadata_response(metadata).await,
                 other => {
                     return Err(anyhow!(
                         "Unexpected message returned to metadata request {other:?}"
@@ -614,6 +640,7 @@ impl KafkaSinkCluster {
             })) => Ok(KafkaNode::new(
                 coordinator.node_id,
                 KafkaAddress::new(coordinator.host.clone(), coordinator.port),
+                None,
             )),
             other => Err(anyhow!(
                 "Unexpected message returned to findcoordinator request {other:?}"
@@ -677,7 +704,7 @@ impl KafkaSinkCluster {
 
         // TODO: Handle errors like NOT_COORDINATOR by removing element from self.topics and self.coordinator_broker_id
 
-        // Rewrite responses to use shotovers port instead of kafkas port
+        // Rewrite responses to ensure clients only see the shotover cluster and hide the existence of the real kafka cluster
         for (i, response) in responses.iter_mut().enumerate() {
             match response.frame() {
                 Some(Frame::Kafka(KafkaFrame::Response {
@@ -690,59 +717,27 @@ impl KafkaSinkCluster {
                         .find(|x| x.index == i)
                         .ok_or_else(|| anyhow!("Received find_coordinator but not requested"))?;
 
-                    if *version <= 3 {
-                        if request.key_type == 0 {
-                            self.group_to_coordinator_broker
-                                .insert(GroupId(request.key.clone()), find_coordinator.node_id);
-                        }
-                        rewrite_address(
-                            &self.shotover_nodes,
-                            &mut find_coordinator.host,
-                            &mut find_coordinator.port,
-                        )
-                    } else {
-                        for coordinator in &mut find_coordinator.coordinators {
-                            if request.key_type == 0 {
-                                self.group_to_coordinator_broker.insert(
-                                    GroupId(coordinator.key.clone()),
-                                    find_coordinator.node_id,
-                                );
-                            }
-                            rewrite_address(
-                                &self.shotover_nodes,
-                                &mut coordinator.host,
-                                &mut coordinator.port,
-                            )
-                        }
-                        deduplicate_coordinators(&mut find_coordinator.coordinators);
-                    }
+                    self.process_find_coordinator_response(*version, request, find_coordinator);
+                    self.rewrite_find_coordinator_response(*version, find_coordinator);
                     response.invalidate_cache();
                 }
                 Some(Frame::Kafka(KafkaFrame::Response {
                     body: ResponseBody::Metadata(metadata),
                     ..
                 })) => {
-                    self.process_metadata(metadata).await;
-
-                    for (_, broker) in &mut metadata.brokers {
-                        rewrite_address(&self.shotover_nodes, &mut broker.host, &mut broker.port);
-                    }
-                    deduplicate_metadata_brokers(metadata);
-
+                    self.process_metadata_response(metadata).await;
+                    self.rewrite_metadata_response(metadata)?;
                     response.invalidate_cache();
                 }
                 Some(Frame::Kafka(KafkaFrame::Response {
-                    body: ResponseBody::DescribeCluster(describe_cluster),
+                    body: ResponseBody::DescribeCluster(_),
                     ..
                 })) => {
-                    for broker in &mut describe_cluster.brokers {
-                        rewrite_address(
-                            &self.shotover_nodes,
-                            &mut broker.1.host,
-                            &mut broker.1.port,
-                        )
-                    }
-                    response.invalidate_cache();
+                    // If clients were to send this we would need to rewrite the broker information.
+                    // However I dont think clients actually send this, so just error to ensure we dont break invariants.
+                    return Err(anyhow!(
+                        "I think this is a raft specific message and never sent by clients"
+                    ));
                 }
                 _ => {}
             }
@@ -816,9 +811,13 @@ impl KafkaSinkCluster {
         Ok(rx)
     }
 
-    async fn process_metadata(&mut self, metadata: &MetadataResponse) {
+    async fn process_metadata_response(&mut self, metadata: &MetadataResponse) {
         for (id, broker) in &metadata.brokers {
-            let node = KafkaNode::new(*id, KafkaAddress::new(broker.host.clone(), broker.port));
+            let node = KafkaNode::new(
+                *id,
+                KafkaAddress::new(broker.host.clone(), broker.port),
+                broker.rack.clone(),
+            );
             self.add_node_if_new(node).await;
         }
 
@@ -840,6 +839,202 @@ impl KafkaSinkCluster {
         }
     }
 
+    fn process_find_coordinator_response(
+        &mut self,
+        version: i16,
+        request: &FindCoordinator,
+        find_coordinator: &FindCoordinatorResponse,
+    ) {
+        if request.key_type == 0 {
+            if version <= 3 {
+                self.group_to_coordinator_broker
+                    .insert(GroupId(request.key.clone()), find_coordinator.node_id);
+            } else {
+                for coordinator in &find_coordinator.coordinators {
+                    self.group_to_coordinator_broker
+                        .insert(GroupId(coordinator.key.clone()), find_coordinator.node_id);
+                }
+            }
+        }
+    }
+
+    fn rewrite_find_coordinator_response(
+        &self,
+        version: i16,
+        find_coordinator: &mut FindCoordinatorResponse,
+    ) {
+        if version <= 3 {
+            // for version <= 3 we need to make do with only one coordinator.
+            // so we just pick the first shotover node in the rack of the coordinator.
+
+            // skip rewriting on error
+            if find_coordinator.error_code == 0 {
+                let coordinator_rack = &self
+                    .nodes
+                    .iter()
+                    .find(|x| x.broker_id == find_coordinator.node_id)
+                    .unwrap()
+                    .rack
+                    .as_ref();
+                let shotover_node = self
+                    .shotover_nodes
+                    .iter()
+                    .find(|shotover_node| {
+                        coordinator_rack
+                            .map(|rack| rack == &shotover_node.rack)
+                            .unwrap_or(true)
+                    })
+                    .unwrap();
+                find_coordinator.host = shotover_node.address.host.clone();
+                find_coordinator.port = shotover_node.address.port;
+                find_coordinator.node_id = shotover_node.broker_id;
+            }
+        } else {
+            // for version > 3 we can include as many coordinators as we want.
+            // so we include all shotover nodes in the rack of the coordinator.
+            let mut shotover_coordinators: Vec<Coordinator> = vec![];
+            for coordinator in find_coordinator.coordinators.drain(..) {
+                if coordinator.error_code == 0 {
+                    let coordinator_rack = &self
+                        .nodes
+                        .iter()
+                        .find(|x| x.broker_id == coordinator.node_id)
+                        .unwrap()
+                        .rack
+                        .as_ref();
+                    for shotover_node in self.shotover_nodes.iter().filter(|shotover_node| {
+                        coordinator_rack
+                            .map(|rack| rack == &shotover_node.rack)
+                            .unwrap_or(true)
+                    }) {
+                        if !shotover_coordinators
+                            .iter()
+                            .any(|x| x.node_id == shotover_node.broker_id)
+                        {
+                            shotover_coordinators.push(
+                                Coordinator::builder()
+                                    .node_id(shotover_node.broker_id)
+                                    .host(shotover_node.address.host.clone())
+                                    .port(shotover_node.address.port)
+                                    .build()
+                                    .unwrap(),
+                            );
+                        }
+                    }
+                } else {
+                    // pass errors through untouched
+                    shotover_coordinators.push(coordinator)
+                }
+            }
+            find_coordinator.coordinators = shotover_coordinators;
+        }
+    }
+
+    /// Rewrite metadata response to appear as if the shotover cluster is the real cluster and the real kafka brokers do not exist
+    fn rewrite_metadata_response(&self, metadata: &mut MetadataResponse) -> Result<()> {
+        // Overwrite list of brokers with the list of shotover nodes
+        metadata.brokers = self
+            .shotover_nodes
+            .iter()
+            .map(|shotover_node| {
+                (
+                    shotover_node.broker_id,
+                    MetadataResponseBroker::builder()
+                        .host(shotover_node.address.host.clone())
+                        .port(shotover_node.address.port)
+                        .rack(Some(shotover_node.rack.clone()))
+                        .build()
+                        .unwrap(),
+                )
+            })
+            .collect();
+
+        // Overwrite the list of partitions to point at all shotover nodes within the same rack
+        for (_, topic) in &mut metadata.topics {
+            for partition in &mut topic.partitions {
+                // Deterministically choose a single shotover node in the rack as leader based on topic + partition id
+                let leader_rack = self
+                    .nodes
+                    .iter()
+                    .find(|x| x.broker_id == *partition.leader_id)
+                    .map(|x| x.rack.clone())
+                    .unwrap();
+                let shotover_nodes_in_rack: Vec<_> = self
+                    .shotover_nodes
+                    .iter()
+                    .filter(|shotover_node| {
+                        leader_rack
+                            .as_ref()
+                            .map(|rack| rack == &shotover_node.rack)
+                            .unwrap_or(true)
+                    })
+                    .collect();
+                let hash = hash_partition(topic.topic_id, partition.partition_index);
+                let shotover_node = &shotover_nodes_in_rack[hash % shotover_nodes_in_rack.len()];
+                partition.leader_id = shotover_node.broker_id;
+
+                // Every replica node has its entire corresponding shotover rack included.
+                // Since we can set as many replica nodes as we like, we take this all out approach.
+                // This ensures that:
+                // * metadata is deterministic and therefore the same on all shotover nodes
+                // * clients evenly distribute their queries across shotover nodes
+                let mut shotover_replica_nodes = vec![];
+                for replica_node in &partition.replica_nodes {
+                    let rack = self
+                        .nodes
+                        .iter()
+                        .find(|x| x.broker_id == *replica_node)
+                        .map(|x| x.rack.clone())
+                        .unwrap();
+                    for shotover_node in &self.shotover_nodes {
+                        // If broker has no rack - use all shotover nodes
+                        // If broker has rack - use all shotover nodes with the same rack
+                        if rack
+                            .as_ref()
+                            .map(|rack| rack == &shotover_node.rack)
+                            .unwrap_or(true)
+                            && !shotover_replica_nodes.contains(&shotover_node.broker_id)
+                        {
+                            shotover_replica_nodes.push(shotover_node.broker_id);
+                        }
+                    }
+                }
+                partition.replica_nodes = shotover_replica_nodes;
+            }
+        }
+
+        if let Some(controller_node) = self
+            .nodes
+            .iter()
+            .find(|node| node.broker_id == metadata.controller_id)
+        {
+            // If broker has no rack - use the first shotover node
+            // If broker has rack - use the first shotover node with the same rack
+            // This is deterministic because the list of shotover nodes is sorted.
+            if let Some(shotover_node) = self.shotover_nodes.iter().find(|shotover_node| {
+                controller_node
+                    .rack
+                    .as_ref()
+                    .map(|rack| rack == &shotover_node.rack)
+                    .unwrap_or(true)
+            }) {
+                metadata.controller_id = shotover_node.broker_id;
+            } else {
+                tracing::warn!(
+                    "No shotover node configured to handle kafka rack {:?}",
+                    controller_node.rack
+                );
+            }
+        } else {
+            return Err(anyhow!(
+                "Invalid metadata, controller points at unknown node {:?}",
+                metadata.controller_id
+            ));
+        }
+
+        Ok(())
+    }
+
     async fn add_node_if_new(&mut self, new_node: KafkaNode) {
         let new = self
             .nodes_shared
@@ -863,94 +1058,11 @@ async fn read_responses(responses: Vec<oneshot::Receiver<Response>>) -> Result<M
     Ok(result)
 }
 
-fn hash_address(host: &str, port: i32) -> u64 {
+fn hash_partition(topic_id: Uuid, partition_index: i32) -> usize {
     let mut hasher = xxhash_rust::xxh3::Xxh3::new();
-    hasher.write(host.as_bytes());
-    hasher.write(&port.to_be_bytes());
-    hasher.finish()
-}
-
-fn rewrite_address(shotover_nodes: &[KafkaAddress], host: &mut StrBytes, port: &mut i32) {
-    // do not attempt to rewrite if the port is not provided (-1)
-    // this is known to occur in an error response
-    if *port >= 0 {
-        let shotover_node =
-            &shotover_nodes[hash_address(host, *port) as usize % shotover_nodes.len()];
-        *host = shotover_node.host.clone();
-        *port = shotover_node.port;
-    }
-}
-
-/// The rdkafka driver has been observed to get stuck when there are multiple brokers with identical host and port.
-/// This function deterministically rewrites metadata to avoid such duplication.
-fn deduplicate_metadata_brokers(metadata: &mut MetadataResponse) {
-    struct SeenBroker {
-        pub id: BrokerId,
-        pub address: KafkaAddress,
-    }
-    let mut seen: Vec<SeenBroker> = vec![];
-    let mut replacement_broker_id = HashMap::new();
-
-    // ensure deterministic results across shotover instances by first sorting the list of brokers by their broker id
-    metadata.brokers.sort_keys();
-
-    // populate replacement_broker_id.
-    // This is used both to determine which brokers to delete and which broker ids to use as a replacement for deleted brokers.
-    for (id, broker) in &mut metadata.brokers {
-        let address = KafkaAddress {
-            host: broker.host.clone(),
-            port: broker.port,
-        };
-        broker.rack = None;
-        if let Some(replacement) = seen.iter().find(|x| x.address == address) {
-            replacement_broker_id.insert(*id, replacement.id);
-        }
-        seen.push(SeenBroker { address, id: *id });
-    }
-
-    // remove brokers with duplicate addresses
-    for (original, _replacement) in replacement_broker_id.iter() {
-        metadata.brokers.swap_remove(original);
-    }
-
-    // In the previous step some broker id's were removed but we might be referring to those id's elsewhere in the message.
-    // If there are any such cases fix them by changing the id to refer to the equivalent undeleted broker.
-    for (_, topic) in &mut metadata.topics {
-        for partition in &mut topic.partitions {
-            if let Some(id) = replacement_broker_id.get(&partition.leader_id) {
-                partition.leader_id = *id;
-            }
-            for replica_node in &mut partition.replica_nodes {
-                if let Some(id) = replacement_broker_id.get(replica_node) {
-                    *replica_node = *id
-                }
-            }
-        }
-    }
-    if let Some(id) = replacement_broker_id.get(&metadata.controller_id) {
-        metadata.controller_id = *id;
-    }
-}
-
-/// We havent observed any failures due to duplicates in findcoordinator messages like we have in metadata messages.
-/// But there might be similar issues lurking in other drivers so deduplicating seems reasonable.
-fn deduplicate_coordinators(coordinators: &mut Vec<Coordinator>) {
-    let mut seen = vec![];
-    let mut to_delete = vec![];
-    for (i, coordinator) in coordinators.iter().enumerate() {
-        let address = KafkaAddress {
-            host: coordinator.host.clone(),
-            port: coordinator.port,
-        };
-        if seen.contains(&address) {
-            to_delete.push(i)
-        }
-        seen.push(address);
-    }
-
-    for to_delete in to_delete.iter().rev() {
-        coordinators.remove(*to_delete);
-    }
+    hasher.write(topic_id.as_bytes());
+    hasher.write(&partition_index.to_be_bytes());
+    hasher.finish() as usize
 }
 
 #[derive(Debug)]
diff --git a/shotover/src/transforms/kafka/sink_cluster/node.rs b/shotover/src/transforms/kafka/sink_cluster/node.rs
index 5f6f4a9ce..d3ad0b1b1 100644
--- a/shotover/src/transforms/kafka/sink_cluster/node.rs
+++ b/shotover/src/transforms/kafka/sink_cluster/node.rs
@@ -108,18 +108,20 @@ impl KafkaAddress {
     }
 }
 
-#[derive(Clone)]
+#[derive(Clone, Debug)]
 pub struct KafkaNode {
     pub broker_id: BrokerId,
+    pub rack: Option<StrBytes>,
     pub kafka_address: KafkaAddress,
     connection: Option<Connection>,
 }
 
 impl KafkaNode {
-    pub fn new(broker_id: BrokerId, kafka_address: KafkaAddress) -> Self {
+    pub fn new(broker_id: BrokerId, kafka_address: KafkaAddress, rack: Option<StrBytes>) -> Self {
         KafkaNode {
             broker_id,
             kafka_address,
+            rack,
             connection: None,
         }
     }