From 8fa90c85e44b73b09b0e41f16f95ea7bcd36eb52 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Mon, 19 Aug 2024 11:52:01 +0100 Subject: [PATCH 001/115] Bump zerocopy to 0.8 prerelease, begin the Great Work --- Cargo.lock | 141 +++++++++- Cargo.toml | 5 +- crates/opte-api/src/ip.rs | 16 +- lib/opte/Cargo.toml | 5 +- lib/opte/src/engine/arp.rs | 9 +- lib/opte/src/engine/ether.rs | 16 +- lib/opte/src/engine/geneve.rs | 13 +- lib/opte/src/engine/icmp/mod.rs | 13 +- lib/opte/src/engine/ingot_packet.rs | 311 +++++++++++++++++++++++ lib/opte/src/engine/ip4.rs | 9 +- lib/opte/src/engine/mod.rs | 2 + lib/opte/src/engine/packet.rs | 7 + lib/opte/src/engine/tcp.rs | 10 +- lib/opte/src/engine/udp.rs | 10 +- lib/oxide-vpc/tests/integration_tests.rs | 2 +- rust-toolchain.toml | 2 +- 16 files changed, 529 insertions(+), 42 deletions(-) create mode 100644 lib/opte/src/engine/ingot_packet.rs diff --git a/Cargo.lock b/Cargo.lock index bb319cce..e8b89baa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -598,7 +598,7 @@ dependencies = [ "serde", "serde_json", "thiserror", - "zerocopy", + "zerocopy 0.7.35", ] [[package]] @@ -795,6 +795,12 @@ version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd" +[[package]] +name = "glob" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" + [[package]] name = "goblin" version = "0.8.2" @@ -879,6 +885,45 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "ingot" +version = "0.1.0" +source = "git+ssh://git@github.com/oxidecomputer/ingot.git?rev=37dbb101bbb8781739103e0d5cecaed53c383f05#37dbb101bbb8781739103e0d5cecaed53c383f05" +dependencies = [ + "bitflags 2.6.0", + "ingot-macros", + "ingot-types", + "macaddr", + "pnet_macros", + "pnet_macros_support", + "pnet_packet", + "zerocopy 0.8.0-alpha.16", +] + +[[package]] +name = "ingot-macros" +version = "0.1.0" +source = "git+ssh://git@github.com/oxidecomputer/ingot.git?rev=37dbb101bbb8781739103e0d5cecaed53c383f05#37dbb101bbb8781739103e0d5cecaed53c383f05" +dependencies = [ + "darling", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "syn 2.0.74", +] + +[[package]] +name = "ingot-types" +version = "0.1.0" +source = "git+ssh://git@github.com/oxidecomputer/ingot.git?rev=37dbb101bbb8781739103e0d5cecaed53c383f05#37dbb101bbb8781739103e0d5cecaed53c383f05" +dependencies = [ + "heapless", + "macaddr", + "pnet_macros_support", + "zerocopy 0.8.0-alpha.16", +] + [[package]] name = "ipnetwork" version = "0.20.0" @@ -1055,6 +1100,12 @@ version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" +[[package]] +name = "macaddr" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baee0bbc17ce759db233beb01648088061bf678383130602a298e6998eedb2d8" + [[package]] name = "managed" version = "0.8.0" @@ -1116,6 +1167,12 @@ dependencies = [ "libc", ] +[[package]] +name = "no-std-net" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43794a0ace135be66a25d3ae77d41b91615fb68ae937f904090203e81f755b65" + [[package]] name = "nom" version = "7.1.3" @@ -1208,6 +1265,8 @@ dependencies = [ "dyn-clone", "heapless", "illumos-sys-hdrs", + "ingot", + "ingot-types", "itertools 0.12.1", "kstat-macro", "opte", @@ -1218,7 +1277,7 @@ dependencies = [ "tabwriter", "usdt", "version_check", - "zerocopy", + "zerocopy 0.8.0-alpha.16", ] [[package]] @@ -1319,7 +1378,7 @@ dependencies = [ "smoltcp", "tabwriter", "usdt", - "zerocopy", + "zerocopy 0.8.0-alpha.16", ] [[package]] @@ -1459,6 +1518,48 @@ dependencies = [ "plotters-backend", ] +[[package]] +name = "pnet_base" +version = "0.35.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffc190d4067df16af3aba49b3b74c469e611cad6314676eaf1157f31aa0fb2f7" +dependencies = [ + "no-std-net", +] + +[[package]] +name = "pnet_macros" +version = "0.35.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13325ac86ee1a80a480b0bc8e3d30c25d133616112bb16e86f712dcf8a71c863" +dependencies = [ + "proc-macro2", + "quote", + "regex", + "syn 2.0.74", +] + +[[package]] +name = "pnet_macros_support" +version = "0.35.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eed67a952585d509dd0003049b1fc56b982ac665c8299b124b90ea2bdb3134ab" +dependencies = [ + "pnet_base", +] + +[[package]] +name = "pnet_packet" +version = "0.35.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c96ebadfab635fcc23036ba30a7d33a80c39e8461b8bd7dc7bb186acb96560f" +dependencies = [ + "glob", + "pnet_base", + "pnet_macros", + "pnet_macros_support", +] + [[package]] name = "poptrie" version = "0.1.0" @@ -1487,7 +1588,7 @@ version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" dependencies = [ - "zerocopy", + "zerocopy 0.7.35", ] [[package]] @@ -1502,6 +1603,16 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbc83ee4a840062f368f9096d80077a9841ec117e17e7f700df81958f1451254" +[[package]] +name = "prettyplease" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f12335488a2f3b0a83b14edad48dca9879ce89b2edd10e80237e4e852dd645e" +dependencies = [ + "proc-macro2", + "syn 2.0.74", +] + [[package]] name = "proc-macro-crate" version = "1.3.1" @@ -2675,7 +2786,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" dependencies = [ "byteorder", - "zerocopy-derive", + "zerocopy-derive 0.7.35", +] + +[[package]] +name = "zerocopy" +version = "0.8.0-alpha.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a5fe242a39bc4f8b8d808be6314c0f0e5e499a902c44e704f3c86a89f7a7c64" +dependencies = [ + "zerocopy-derive 0.8.0-alpha.16", ] [[package]] @@ -2689,6 +2809,17 @@ dependencies = [ "syn 2.0.74", ] +[[package]] +name = "zerocopy-derive" +version = "0.8.0-alpha.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76fc519c421ad48c6c8ba02cee449398d54276c839887f9f3562d1862b43b91c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.74", +] + [[package]] name = "zone" version = "0.3.0" diff --git a/Cargo.toml b/Cargo.toml index 67f739d4..975dd2b4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,6 +50,8 @@ ctor = "0.2" darling = "0.20" dyn-clone = "1.0" heapless = "0.8" +ingot = { git = "ssh://git@github.com/oxidecomputer/ingot.git", rev = "37dbb101bbb8781739103e0d5cecaed53c383f05"} +ingot-types = { git = "ssh://git@github.com/oxidecomputer/ingot.git", rev = "37dbb101bbb8781739103e0d5cecaed53c383f05"} ipnetwork = { version = "0.20", default-features = false } itertools = { version = "0.12", default-features = false } libc = "0.2" @@ -74,10 +76,11 @@ thiserror = "1.0" toml = "0.8" usdt = "0.5" version_check = "0.9" -zerocopy = { version = "0.7", features = ["derive"] } +zerocopy = { version = "0.8.0-alpha.16", features = ["derive"] } zone = { git = "https://github.com/oxidecomputer/zone" } ztest = { git = "https://github.com/oxidecomputer/falcon", branch = "main" } poptrie = { git = "https://github.com/oxidecomputer/poptrie", branch = "multipath" } [profile.release] debug = 2 +lto = true diff --git a/crates/opte-api/src/ip.rs b/crates/opte-api/src/ip.rs index bfdcf689..4e533ac3 100644 --- a/crates/opte-api/src/ip.rs +++ b/crates/opte-api/src/ip.rs @@ -432,15 +432,13 @@ impl Ipv4Addr { } } -#[cfg(any(feature = "std", test))] -impl From for Ipv4Addr { - fn from(ip4: std::net::Ipv4Addr) -> Self { +impl From for Ipv4Addr { + fn from(ip4: core::net::Ipv4Addr) -> Self { Self { inner: ip4.octets() } } } -#[cfg(any(feature = "std", test))] -impl From for std::net::Ipv4Addr { +impl From for core::net::Ipv4Addr { fn from(ip4: Ipv4Addr) -> Self { Self::from(ip4.inner) } @@ -713,15 +711,13 @@ impl fmt::Display for Ipv6Addr { } } -#[cfg(any(feature = "std", test))] -impl From for Ipv6Addr { - fn from(ip6: std::net::Ipv6Addr) -> Self { +impl From for Ipv6Addr { + fn from(ip6: core::net::Ipv6Addr) -> Self { Self { inner: ip6.octets() } } } -#[cfg(any(feature = "std", test))] -impl From for std::net::Ipv6Addr { +impl From for core::net::Ipv6Addr { fn from(ip6: Ipv6Addr) -> Self { Self::from(ip6.inner) } diff --git a/lib/opte/Cargo.toml b/lib/opte/Cargo.toml index 0eb965b3..22a1ef15 100644 --- a/lib/opte/Cargo.toml +++ b/lib/opte/Cargo.toml @@ -7,7 +7,7 @@ license.workspace = true repository.workspace = true [features] -default = ["api", "std"] +default = ["api", "std", "engine"] api = [] engine = ["api", "dep:crc32fast", "dep:derror-macro", "dep:heapless", "dep:itertools", "dep:zerocopy"] kernel = ["illumos-sys-hdrs/kernel"] @@ -27,6 +27,9 @@ illumos-sys-hdrs.workspace = true kstat-macro.workspace = true opte-api.workspace = true +ingot.workspace = true +ingot-types.workspace = true + cfg-if.workspace = true crc32fast = { workspace = true, optional = true } dyn-clone.workspace = true diff --git a/lib/opte/src/engine/arp.rs b/lib/opte/src/engine/arp.rs index e9ba7956..aaaaa335 100644 --- a/lib/opte/src/engine/arp.rs +++ b/lib/opte/src/engine/arp.rs @@ -21,9 +21,10 @@ use opte_api::Ipv4Addr; use opte_api::MacAddr; use serde::Deserialize; use serde::Serialize; -use zerocopy::AsBytes; use zerocopy::FromBytes; -use zerocopy::FromZeroes; +use zerocopy::Immutable; +use zerocopy::IntoBytes; +use zerocopy::KnownLayout; use zerocopy::Ref; use zerocopy::Unaligned; @@ -224,7 +225,9 @@ impl From<&ArpEthIpv4> for ArpEthIpv4Raw { } #[repr(C)] -#[derive(AsBytes, Clone, Debug, FromBytes, FromZeroes, Unaligned)] +#[derive( + IntoBytes, Clone, Debug, FromBytes, Unaligned, Immutable, KnownLayout, +)] pub struct ArpEthIpv4Raw { pub htype: [u8; 2], pub ptype: [u8; 2], diff --git a/lib/opte/src/engine/ether.rs b/lib/opte/src/engine/ether.rs index 05927fec..9b7da4fd 100644 --- a/lib/opte/src/engine/ether.rs +++ b/lib/opte/src/engine/ether.rs @@ -22,9 +22,10 @@ use core::str::FromStr; use opte_api::MacAddr; use serde::Deserialize; use serde::Serialize; -use zerocopy::AsBytes; use zerocopy::FromBytes; -use zerocopy::FromZeroes; +use zerocopy::Immutable; +use zerocopy::IntoBytes; +use zerocopy::KnownLayout; use zerocopy::Ref; use zerocopy::Unaligned; @@ -347,7 +348,16 @@ impl From<&EtherMeta> for EtherHdrRaw { /// Note: For now we keep this unaligned to be safe. #[repr(C)] -#[derive(Clone, Debug, Default, FromBytes, AsBytes, FromZeroes, Unaligned)] +#[derive( + Clone, + Debug, + Default, + FromBytes, + IntoBytes, + Unaligned, + Immutable, + KnownLayout, +)] pub struct EtherHdrRaw { pub dst: [u8; 6], pub src: [u8; 6], diff --git a/lib/opte/src/engine/geneve.rs b/lib/opte/src/engine/geneve.rs index 28a88d13..baf57a2e 100644 --- a/lib/opte/src/engine/geneve.rs +++ b/lib/opte/src/engine/geneve.rs @@ -21,9 +21,10 @@ use core::mem; pub use opte_api::Vni; use serde::Deserialize; use serde::Serialize; -use zerocopy::AsBytes; use zerocopy::FromBytes; -use zerocopy::FromZeroes; +use zerocopy::Immutable; +use zerocopy::IntoBytes; +use zerocopy::KnownLayout; use zerocopy::Ref; use zerocopy::Unaligned; @@ -259,7 +260,9 @@ impl GeneveHdrError { /// Note: For now we keep this unaligned to be safe. #[repr(C)] -#[derive(Clone, Debug, FromBytes, AsBytes, FromZeroes, Unaligned)] +#[derive( + Clone, Debug, FromBytes, IntoBytes, Unaligned, Immutable, KnownLayout, +)] pub struct GeneveHdrRaw { ver_opt_len: u8, flags: u8, @@ -441,7 +444,9 @@ impl OxideOption { /// /// Note: Unaligned on the same rationale as [`GeneveHdrRaw`]. #[repr(C)] -#[derive(Clone, Debug, FromBytes, AsBytes, FromZeroes, Unaligned)] +#[derive( + Clone, Debug, FromBytes, IntoBytes, Unaligned, Immutable, KnownLayout, +)] pub struct GeneveOptHdrRaw { option_class: [u8; 2], crit_type: u8, diff --git a/lib/opte/src/engine/icmp/mod.rs b/lib/opte/src/engine/icmp/mod.rs index 1170ce74..b5424fab 100644 --- a/lib/opte/src/engine/icmp/mod.rs +++ b/lib/opte/src/engine/icmp/mod.rs @@ -42,9 +42,10 @@ use smoltcp::phy::Checksum; use smoltcp::phy::ChecksumCapabilities as Csum; pub use v4::Icmpv4Meta; pub use v6::Icmpv6Meta; -use zerocopy::AsBytes; use zerocopy::FromBytes; -use zerocopy::FromZeroes; +use zerocopy::Immutable; +use zerocopy::IntoBytes; +use zerocopy::KnownLayout; use zerocopy::Ref; use zerocopy::Unaligned; @@ -175,7 +176,9 @@ impl<'a> IcmpHdr<'a> { /// Note: For now we keep this unaligned to be safe. #[repr(C)] -#[derive(Clone, Debug, FromBytes, AsBytes, FromZeroes, Unaligned)] +#[derive( + Clone, Debug, FromBytes, IntoBytes, Unaligned, Immutable, KnownLayout, +)] pub struct IcmpHdrRaw { pub msg_type: u8, pub msg_code: u8, @@ -202,7 +205,9 @@ impl<'a> RawHeader<'a> for IcmpHdrRaw { /// Internal structure of an ICMP(v6) Echo(Reply)'s rest_of_header. #[repr(C)] -#[derive(Clone, Debug, FromBytes, AsBytes, FromZeroes, Unaligned)] +#[derive( + Clone, Debug, FromBytes, IntoBytes, Unaligned, Immutable, KnownLayout, +)] pub struct IcmpEchoRaw { pub id: [u8; 2], pub sequence: [u8; 2], diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs new file mode 100644 index 00000000..105f620d --- /dev/null +++ b/lib/opte/src/engine/ingot_packet.rs @@ -0,0 +1,311 @@ +use core::marker::PhantomData; +use core::ops::Deref; +use core::ops::DerefMut; +use core::ptr::NonNull; +use core::slice; + +use illumos_sys_hdrs::mblk_t; +use ingot::types::Chunk; +use ingot::types::HasView; +use ingot::types::ParseControl; +use ingot::types::ParseError as IngotParseErr; +use ingot::types::Parsed as IngotParsed; +use ingot::types::Read; +use ingot::EthernetPacket; +use ingot::EthernetRef; +use ingot::GenevePacket; +use ingot::IcmpV4Ref; +use ingot::IcmpV6Ref; +use ingot::Ipv4Ref; +use ingot::Ipv6Packet; +use ingot::Ipv6Ref; +use ingot::Parse; +use ingot::TcpRef; +use ingot::UdpPacket; +use ingot::UdpRef; +use ingot::Ulp; +use ingot::ValidEthernet; +use ingot::L3; +use ingot::L4; +use zerocopy::ByteSlice; +use zerocopy::NetworkEndian; + +// NOTE: these are not being handled correctly and need to be +// stealth-imported in ingot. +use ingot_types::HeaderParse; +use ingot_types::NextLayer; +use ingot_types::ParseChoice; +// (also, need to cleanup ::ingot_types vs. ::ingot::types +// imports, somehow) + +use super::checksum::Checksum; +use super::packet::AddrPair; +use super::packet::InnerFlowId; +use super::packet::FLOW_ID_DEFAULT; + +#[derive(Parse)] +pub struct OpteIn { + pub outer_eth: EthernetPacket, + #[ingot(from = "L3")] + pub outer_v6: Ipv6Packet, + #[ingot(from = "L4")] + pub outer_udp: UdpPacket, + pub outer_encap: GenevePacket, + + #[ingot(control = exit_on_arp)] + pub inner_eth: EthernetPacket, + // pub inner_l3: L3, + pub inner_l3: Option>, + // pub inner_ulp: L4, + pub inner_ulp: Option>, +} + +#[inline] +fn exit_on_arp(eth: &ValidEthernet) -> ParseControl { + if eth.ethertype() == 0x0806 { + ParseControl::Accept + } else { + ParseControl::Continue + } +} + +#[derive(Parse)] +pub struct OpteOut { + pub inner_eth: EthernetPacket, + pub inner_l3: Option>, + pub inner_ulp: Option>, +} + +// --- REWRITE IN PROGRESS --- +pub struct MsgBlk { + inner: NonNull, +} + +pub struct MsgBlkNode(mblk_t); + +impl Deref for MsgBlkNode { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + unsafe { + let rptr = self.0.b_rptr; + let len = self.0.b_wptr.offset_from(rptr) as usize; + slice::from_raw_parts(rptr, len) + } + } +} + +impl DerefMut for MsgBlkNode { + fn deref_mut(&mut self) -> &mut Self::Target { + unsafe { + let rptr = self.0.b_rptr; + let len = self.0.b_wptr.offset_from(rptr) as usize; + slice::from_raw_parts_mut(rptr, len) + } + } +} + +impl MsgBlk {} + +pub struct MsgBlkIter<'a> { + curr: Option>, + marker: PhantomData<&'a MsgBlk>, +} + +pub struct MsgBlkIterMut<'a> { + curr: Option>, + marker: PhantomData<&'a mut MsgBlk>, +} + +impl<'a> Iterator for MsgBlkIter<'a> { + type Item = &'a MsgBlkNode; + + fn next(&mut self) -> Option { + if let Some(ptr) = self.curr { + self.curr = NonNull::new(unsafe { (*ptr.as_ptr()).b_next }); + // SAFETY: MsgBlkNode is identical to mblk_t. + unsafe { Some(&*(ptr.as_ptr() as *const MsgBlkNode)) } + } else { + None + } + } +} + +impl<'a> Read for MsgBlkIter<'a> { + type Chunk = &'a [u8]; + + fn next_chunk(&mut self) -> ingot::types::ParseResult { + self.next().ok_or(IngotParseErr::TooSmall).map(|v| v.as_ref()) + } +} + +impl<'a> Iterator for MsgBlkIterMut<'a> { + type Item = &'a mut MsgBlkNode; + + fn next(&mut self) -> Option { + if let Some(ptr) = self.curr { + self.curr = NonNull::new(unsafe { (*ptr.as_ptr()).b_next }); + // SAFETY: MsgBlkNode is identical to mblk_t. + unsafe { Some(&mut *(ptr.as_ptr() as *mut MsgBlkNode)) } + } else { + None + } + } +} + +impl<'a> Read for MsgBlkIterMut<'a> { + type Chunk = &'a mut [u8]; + + fn next_chunk(&mut self) -> ingot::types::ParseResult { + self.next().ok_or(IngotParseErr::TooSmall).map(|v| v.as_mut()) + } +} + +pub struct OpteUnified { + pub outer_eth: Option>, + pub outer_v6: Option>, + pub outer_udp: Option>, + pub outer_encap: Option>, + + pub inner_eth: EthernetPacket, + pub inner_l3: Option>, + pub inner_ulp: Option>, +} + +impl From> for OpteUnified { + fn from(value: OpteIn) -> Self { + Self { + outer_eth: Some(value.outer_eth), + outer_v6: Some(value.outer_v6), + outer_udp: Some(value.outer_udp), + outer_encap: Some(value.outer_encap), + inner_eth: value.inner_eth, + inner_l3: value.inner_l3, + inner_ulp: value.inner_ulp, + } + } +} + +impl From> for OpteUnified { + fn from(value: OpteOut) -> Self { + Self { + outer_eth: None, + outer_v6: None, + outer_udp: None, + outer_encap: None, + inner_eth: value.inner_eth, + inner_l3: value.inner_l3, + inner_ulp: value.inner_ulp, + } + } +} + +pub struct PacketMeta3( + IngotParsed, T>, +); + +impl PacketMeta3 { + pub fn inner_l3(&self) -> Option<&ingot::L3> { + self.0.headers().inner_l3.as_ref() + } + + pub fn inner_ulp(&self) -> Option<&ingot::Ulp> { + self.0.headers().inner_ulp.as_ref() + } +} + +pub enum PacketMeta2 { + In(IngotParsed, T>), + Out(IngotParsed, T>), +} + +impl PacketMeta2 { + pub fn inner_l3(&self) -> Option<&ingot::L3> { + match self { + PacketMeta2::In(v) => v.stack.0.inner_l3.as_ref(), + PacketMeta2::Out(v) => v.stack.0.inner_l3.as_ref(), + } + } + + pub fn inner_ulp(&self) -> Option<&ingot::Ulp> { + match self { + PacketMeta2::In(v) => v.stack.0.inner_ulp.as_ref(), + PacketMeta2::Out(v) => v.stack.0.inner_ulp.as_ref(), + } + } +} + +fn actual_src_port(chunk: &ingot::Ulp) -> Option { + match chunk { + Ulp::Tcp(pkt) => Some(pkt.source()), + Ulp::Udp(pkt) => Some(pkt.source()), + _ => None, + } +} + +fn actual_dst_port(chunk: &ingot::Ulp) -> Option { + match chunk { + Ulp::Tcp(pkt) => Some(pkt.destination()), + Ulp::Udp(pkt) => Some(pkt.destination()), + _ => None, + } +} + +fn pseudo_port(chunk: &ingot::Ulp) -> Option { + match chunk { + Ulp::IcmpV4(pkt) if pkt.ty() == 0 || pkt.ty() == 3 => { + Some(u16::from_be_bytes(pkt.rest_of_hdr()[..2].try_into().unwrap())) + } + Ulp::IcmpV6(pkt) if pkt.ty() == 128 || pkt.ty() == 129 => { + Some(u16::from_be_bytes(pkt.rest_of_hdr()[..2].try_into().unwrap())) + } + _ => None, + } +} + +impl From<&PacketMeta2> for InnerFlowId { + fn from(meta: &PacketMeta2) -> Self { + let (proto, addrs) = match meta.inner_l3() { + Some(L3::Ipv4(pkt)) => ( + pkt.protocol(), + AddrPair::V4 { + src: pkt.source().into(), + dst: pkt.destination().into(), + }, + ), + Some(L3::Ipv6(pkt)) => ( + pkt.next_header(), + AddrPair::V6 { + src: pkt.source().into(), + dst: pkt.destination().into(), + }, + ), + None => (255, FLOW_ID_DEFAULT.addrs), + }; + + let (src_port, dst_port) = meta + .inner_ulp() + .map(|ulp| { + ( + actual_dst_port(ulp) + .or_else(|| pseudo_port(ulp)) + .unwrap_or(0), + actual_src_port(ulp) + .or_else(|| pseudo_port(ulp)) + .unwrap_or(0), + ) + }) + .unwrap_or((0, 0)); + + InnerFlowId { proto: proto.into(), addrs, src_port, dst_port } + } +} + +pub struct Parsed2 { + len: usize, + meta: PacketMeta2, + flow: InnerFlowId, + body_csum: Option, + // body: BodyInfo, + body_modified: bool, +} diff --git a/lib/opte/src/engine/ip4.rs b/lib/opte/src/engine/ip4.rs index 889e9910..5f7413af 100644 --- a/lib/opte/src/engine/ip4.rs +++ b/lib/opte/src/engine/ip4.rs @@ -31,9 +31,10 @@ pub use opte_api::Ipv4PrefixLen; pub use opte_api::Protocol; use serde::Deserialize; use serde::Serialize; -use zerocopy::AsBytes; use zerocopy::FromBytes; -use zerocopy::FromZeroes; +use zerocopy::Immutable; +use zerocopy::IntoBytes; +use zerocopy::KnownLayout; use zerocopy::Ref; use zerocopy::Unaligned; @@ -500,7 +501,9 @@ impl Ipv4HdrError { /// Note: For now we keep this unaligned to be safe. #[repr(C)] -#[derive(Clone, Debug, FromBytes, AsBytes, FromZeroes, Unaligned)] +#[derive( + Clone, Debug, FromBytes, IntoBytes, Unaligned, Immutable, KnownLayout, +)] pub struct Ipv4HdrRaw { pub ver_hdr_len: u8, pub dscp_ecn: u8, diff --git a/lib/opte/src/engine/mod.rs b/lib/opte/src/engine/mod.rs index 93d7132a..c18d5dde 100644 --- a/lib/opte/src/engine/mod.rs +++ b/lib/opte/src/engine/mod.rs @@ -39,6 +39,8 @@ pub mod tcp_state; #[macro_use] pub mod udp; +pub mod ingot_packet; + use alloc::string::String; use core::fmt; use core::num::ParseIntError; diff --git a/lib/opte/src/engine/packet.rs b/lib/opte/src/engine/packet.rs index d9ac2107..0a1a6f98 100644 --- a/lib/opte/src/engine/packet.rs +++ b/lib/opte/src/engine/packet.rs @@ -51,6 +51,9 @@ use super::NetworkParser; use crate::d_error::DError; use core::fmt; use core::fmt::Display; +use core::marker::PhantomData; +use core::ops::Deref; +use core::ops::DerefMut; use core::ptr; use core::ptr::NonNull; use core::result; @@ -74,6 +77,8 @@ use alloc::vec::Vec; use illumos_sys_hdrs::dblk_t; use illumos_sys_hdrs::mblk_t; use illumos_sys_hdrs::uintptr_t; +use zerocopy::ByteOrder; +use zerocopy::NetworkEndian; cfg_if! { if #[cfg(all(not(feature = "std"), not(test)))] { @@ -86,6 +91,8 @@ cfg_if! { pub static MBLK_MAX_SIZE: usize = u16::MAX as usize; +// --- REWRITE IN PROGRESS --- + pub static FLOW_ID_DEFAULT: InnerFlowId = InnerFlowId { proto: 255, addrs: AddrPair::V4 { src: Ipv4Addr::ANY_ADDR, dst: Ipv4Addr::ANY_ADDR }, diff --git a/lib/opte/src/engine/tcp.rs b/lib/opte/src/engine/tcp.rs index 618c773c..b86fbeef 100644 --- a/lib/opte/src/engine/tcp.rs +++ b/lib/opte/src/engine/tcp.rs @@ -22,9 +22,11 @@ use core::fmt::Display; use opte_api::DYNAMIC_PORT; use serde::Deserialize; use serde::Serialize; -use zerocopy::AsBytes; use zerocopy::FromBytes; -use zerocopy::FromZeroes; +use zerocopy::FromZeros; +use zerocopy::Immutable; +use zerocopy::IntoBytes; +use zerocopy::KnownLayout; use zerocopy::Ref; use zerocopy::Unaligned; @@ -401,7 +403,9 @@ impl From for TcpHdrError { /// Note: For now we keep this unaligned to be safe. #[repr(C)] -#[derive(Clone, Debug, FromBytes, AsBytes, FromZeroes, Unaligned)] +#[derive( + Clone, Debug, FromBytes, IntoBytes, Unaligned, Immutable, KnownLayout, +)] pub struct TcpHdrRaw { pub src_port: [u8; 2], pub dst_port: [u8; 2], diff --git a/lib/opte/src/engine/udp.rs b/lib/opte/src/engine/udp.rs index 6fb87ec2..5815ff0d 100644 --- a/lib/opte/src/engine/udp.rs +++ b/lib/opte/src/engine/udp.rs @@ -20,9 +20,11 @@ use core::mem; use opte_api::DYNAMIC_PORT; use serde::Deserialize; use serde::Serialize; -use zerocopy::AsBytes; use zerocopy::FromBytes; -use zerocopy::FromZeroes; +use zerocopy::FromZeros; +use zerocopy::Immutable; +use zerocopy::IntoBytes; +use zerocopy::KnownLayout; use zerocopy::Ref; use zerocopy::Unaligned; @@ -227,7 +229,9 @@ impl From for UdpHdrError { /// Note: For now we keep this unaligned to be safe. #[repr(C)] -#[derive(Clone, Debug, FromBytes, AsBytes, FromZeroes, Unaligned)] +#[derive( + Clone, Debug, FromBytes, IntoBytes, Unaligned, Immutable, KnownLayout, +)] pub struct UdpHdrRaw { pub src_port: [u8; 2], pub dst_port: [u8; 2], diff --git a/lib/oxide-vpc/tests/integration_tests.rs b/lib/oxide-vpc/tests/integration_tests.rs index e22f3d94..a55958f4 100644 --- a/lib/oxide-vpc/tests/integration_tests.rs +++ b/lib/oxide-vpc/tests/integration_tests.rs @@ -69,7 +69,7 @@ use smoltcp::wire::NdiscRouterFlags; use smoltcp::wire::RawHardwareAddress; use std::prelude::v1::*; use std::time::Duration; -use zerocopy::AsBytes; +use zerocopy::IntoBytes; const IP4_SZ: usize = EtherHdr::SIZE + Ipv4Hdr::BASE_SIZE; const IP6_SZ: usize = EtherHdr::SIZE + Ipv6Hdr::BASE_SIZE; diff --git a/rust-toolchain.toml b/rust-toolchain.toml index dfd5bfe0..7f466bd2 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,3 +1,3 @@ [toolchain] -channel = "1.80.0" +channel = "1.80.1" profile = "default" From 3d9eb6f6e6aa0ced13af4b66781176b1847bfeec Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Mon, 19 Aug 2024 15:15:25 +0100 Subject: [PATCH 002/115] Iterating... --- .cargo/config.toml | 3 + Cargo.lock | 6 +- Cargo.toml | 4 +- lib/opte/Cargo.toml | 2 +- lib/opte/src/engine/ingot_packet.rs | 218 +++++++++++++++++++++++++++- lib/opte/src/engine/packet.rs | 32 ++++ 6 files changed, 252 insertions(+), 13 deletions(-) diff --git a/.cargo/config.toml b/.cargo/config.toml index f37bce65..7a9ef735 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -5,3 +5,6 @@ kbench = "bench --package opte-bench --bench xde --" [env] CARGO_WORKSPACE_DIR = { value = "", relative = true } + +[net] +git-fetch-with-cli = true diff --git a/Cargo.lock b/Cargo.lock index e8b89baa..23dbc56b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -888,7 +888,7 @@ dependencies = [ [[package]] name = "ingot" version = "0.1.0" -source = "git+ssh://git@github.com/oxidecomputer/ingot.git?rev=37dbb101bbb8781739103e0d5cecaed53c383f05#37dbb101bbb8781739103e0d5cecaed53c383f05" +source = "git+ssh://git@github.com/oxidecomputer/ingot.git?rev=48be4aba2320794291c172f146bb391b04f77bfb#48be4aba2320794291c172f146bb391b04f77bfb" dependencies = [ "bitflags 2.6.0", "ingot-macros", @@ -903,7 +903,7 @@ dependencies = [ [[package]] name = "ingot-macros" version = "0.1.0" -source = "git+ssh://git@github.com/oxidecomputer/ingot.git?rev=37dbb101bbb8781739103e0d5cecaed53c383f05#37dbb101bbb8781739103e0d5cecaed53c383f05" +source = "git+ssh://git@github.com/oxidecomputer/ingot.git?rev=48be4aba2320794291c172f146bb391b04f77bfb#48be4aba2320794291c172f146bb391b04f77bfb" dependencies = [ "darling", "prettyplease", @@ -916,7 +916,7 @@ dependencies = [ [[package]] name = "ingot-types" version = "0.1.0" -source = "git+ssh://git@github.com/oxidecomputer/ingot.git?rev=37dbb101bbb8781739103e0d5cecaed53c383f05#37dbb101bbb8781739103e0d5cecaed53c383f05" +source = "git+ssh://git@github.com/oxidecomputer/ingot.git?rev=48be4aba2320794291c172f146bb391b04f77bfb#48be4aba2320794291c172f146bb391b04f77bfb" dependencies = [ "heapless", "macaddr", diff --git a/Cargo.toml b/Cargo.toml index 975dd2b4..4be16325 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,8 +50,8 @@ ctor = "0.2" darling = "0.20" dyn-clone = "1.0" heapless = "0.8" -ingot = { git = "ssh://git@github.com/oxidecomputer/ingot.git", rev = "37dbb101bbb8781739103e0d5cecaed53c383f05"} -ingot-types = { git = "ssh://git@github.com/oxidecomputer/ingot.git", rev = "37dbb101bbb8781739103e0d5cecaed53c383f05"} +ingot = { git = "ssh://git@github.com/oxidecomputer/ingot.git", rev = "48be4aba2320794291c172f146bb391b04f77bfb"} +ingot-types = { git = "ssh://git@github.com/oxidecomputer/ingot.git", rev = "48be4aba2320794291c172f146bb391b04f77bfb"} ipnetwork = { version = "0.20", default-features = false } itertools = { version = "0.12", default-features = false } libc = "0.2" diff --git a/lib/opte/Cargo.toml b/lib/opte/Cargo.toml index 22a1ef15..6b32d602 100644 --- a/lib/opte/Cargo.toml +++ b/lib/opte/Cargo.toml @@ -7,7 +7,7 @@ license.workspace = true repository.workspace = true [features] -default = ["api", "std", "engine"] +default = ["api", "std"] api = [] engine = ["api", "dep:crc32fast", "dep:derror-macro", "dep:heapless", "dep:itertools", "dep:zerocopy"] kernel = ["illumos-sys-hdrs/kernel"] diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index 105f620d..dc81c3ff 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -16,10 +16,12 @@ use ingot::EthernetRef; use ingot::GenevePacket; use ingot::IcmpV4Ref; use ingot::IcmpV6Ref; +use ingot::Ipv4; use ingot::Ipv4Ref; use ingot::Ipv6Packet; use ingot::Ipv6Ref; use ingot::Parse; +use ingot::TcpPacket; use ingot::TcpRef; use ingot::UdpPacket; use ingot::UdpRef; @@ -27,8 +29,18 @@ use ingot::Ulp; use ingot::ValidEthernet; use ingot::L3; use ingot::L4; +use ingot_types::Header; +use ingot_types::HeaderStack; +use ingot_types::ParseResult; +use opte_api::Direction; use zerocopy::ByteSlice; +use zerocopy::ByteSliceMut; use zerocopy::NetworkEndian; +use zerocopy::IntoBytes; +use super::checksum::Checksum as OpteCsum; +use super::checksum::HeaderChecksum; +use super::packet::Initialized; +use super::packet::Packet; // NOTE: these are not being handled correctly and need to be // stealth-imported in ingot. @@ -78,7 +90,7 @@ pub struct OpteOut { // --- REWRITE IN PROGRESS --- pub struct MsgBlk { - inner: NonNull, + pub inner: NonNull, } pub struct MsgBlkNode(mblk_t); @@ -105,7 +117,11 @@ impl DerefMut for MsgBlkNode { } } -impl MsgBlk {} +impl MsgBlk { + pub fn as_pkt(self) -> Packet { + unsafe { Packet::wrap_mblk(self.inner.as_ptr()).expect("already good.")} + } +} pub struct MsgBlkIter<'a> { curr: Option>, @@ -301,11 +317,199 @@ impl From<&PacketMeta2> for InnerFlowId { } } +impl From<&PacketMeta3> for InnerFlowId { + fn from(meta: &PacketMeta3) -> Self { + let (proto, addrs) = match meta.inner_l3() { + Some(L3::Ipv4(pkt)) => ( + pkt.protocol(), + AddrPair::V4 { + src: pkt.source().into(), + dst: pkt.destination().into(), + }, + ), + Some(L3::Ipv6(pkt)) => ( + pkt.next_header(), + AddrPair::V6 { + src: pkt.source().into(), + dst: pkt.destination().into(), + }, + ), + None => (255, FLOW_ID_DEFAULT.addrs), + }; + + let (src_port, dst_port) = meta + .inner_ulp() + .map(|ulp| { + ( + actual_dst_port(ulp) + .or_else(|| pseudo_port(ulp)) + .unwrap_or(0), + actual_src_port(ulp) + .or_else(|| pseudo_port(ulp)) + .unwrap_or(0), + ) + }) + .unwrap_or((0, 0)); + + InnerFlowId { proto: proto.into(), addrs, src_port, dst_port } + } +} + +fn transform_parse_stage1>(p: IngotParsed) -> IngotParsed { + IngotParsed { + stack: HeaderStack(S2::from(p.stack.0)), + data: p.data, + last_chunk: p.last_chunk, + } +} + +// GOAL: get to an absolute minimum point where we: +// - parse into an innerflowid +// - use existing transforms if a ULP entry exists. + pub struct Parsed2 { - len: usize, - meta: PacketMeta2, - flow: InnerFlowId, - body_csum: Option, + // len: usize, + pub meta: PacketMeta3, + pub flow: InnerFlowId, + pub body_csum: Option, // body: BodyInfo, - body_modified: bool, + // body_modified: bool, +} + +fn csum_minus_hdr(ulp: &Ulp) -> Option { + match ulp { + Ulp::IcmpV4(icmp) => { + if icmp.checksum() == 0 { + return None; + } + + let mut csum = OpteCsum::from(HeaderChecksum::wrap(icmp.checksum().to_be_bytes())); + + csum.sub_bytes(&[icmp.code(), icmp.ty()]); + csum.sub_bytes(icmp.rest_of_hdr_ref()); + + Some(csum) + } + Ulp::IcmpV6(icmp) => { + if icmp.checksum() == 0 { + return None; + } + + let mut csum = OpteCsum::from(HeaderChecksum::wrap(icmp.checksum().to_be_bytes())); + + csum.sub_bytes(&[icmp.code(), icmp.ty()]); + csum.sub_bytes(icmp.rest_of_hdr_ref()); + + Some(csum) + }, + Ulp::Tcp(tcp) => { + if tcp.checksum() == 0 { + return None; + } + + let mut csum = OpteCsum::from(HeaderChecksum::wrap(tcp.checksum().to_be_bytes())); + + let TcpPacket::Raw(t) = tcp else { + panic!("hmm... maybe one day.") + }; + + let b = t.0.as_bytes(); + + csum.sub_bytes(&b[0..16]); + csum.sub_bytes(&b[18..]); + csum.sub_bytes(t.1.as_ref()); + + Some(csum) + }, + Ulp::Udp(udp) => { + if udp.checksum() == 0 { + return None; + } + + let mut csum = OpteCsum::from(HeaderChecksum::wrap(udp.checksum().to_be_bytes())); + + let UdpPacket::Raw(t) = udp else { + panic!("hmm... maybe one day.") + }; + + let b = t.0.as_bytes(); + csum.sub_bytes(&b[0..6]); + + Some(csum) + }, + } +} + +impl Parsed2 +// where T::Chunk: ByteSliceMut +{ + pub fn parse(pkt: T, dir: Direction) -> ParseResult { + let mut meta = PacketMeta3(match dir { + Direction::In => OpteIn::parse_read(pkt).map(transform_parse_stage1), + Direction::Out => OpteOut::parse_read(pkt).map(transform_parse_stage1), + }?); + + let flow = (&meta).into(); + + let use_pseudo = if let Some(v) = meta.inner_ulp() { + !matches!(v, Ulp::IcmpV4(_)) + } else { + false + }; + + let pseudo_csum = match meta.0.headers().inner_eth.ethertype() { + // ARP + 0x0806 => { + return Ok(Self { + meta, + body_csum: None, + flow, + }); + }, + // Ipv4 + 0x0800 => { + let h = meta.0.headers(); + let mut pseudo_hdr_bytes = [0u8; 12]; + let Some(L3::Ipv4(ref v4)) = h.inner_l3 else { + panic!() + }; + pseudo_hdr_bytes[0..4].copy_from_slice(&v4.source().octets()); + pseudo_hdr_bytes[4..8].copy_from_slice(&v4.destination().octets()); + pseudo_hdr_bytes[9] = v4.protocol(); + let ulp_len = v4.total_len() - 4 * (v4.ihl() as u16); + pseudo_hdr_bytes[10..].copy_from_slice(&ulp_len.to_be_bytes()); + + Checksum::compute(&pseudo_hdr_bytes) + } + // Ipv6 + 0x86dd => { + let h = meta.0.headers(); + let mut pseudo_hdr_bytes = [0u8; 40]; + let Some(L3::Ipv6(ref v6)) = h.inner_l3 else { + panic!() + }; + pseudo_hdr_bytes[0..16].copy_from_slice(&v6.source().octets()); + pseudo_hdr_bytes[16..32].copy_from_slice(&v6.destination().octets()); + pseudo_hdr_bytes[39] = v6.next_header(); + let ulp_len = v6.payload_len() as u32; + pseudo_hdr_bytes[32..36].copy_from_slice(&ulp_len.to_be_bytes()); + Checksum::compute(&pseudo_hdr_bytes) + } + _ => return Err(IngotParseErr::Unwanted), + }; + + let body_csum = meta.inner_ulp().and_then(csum_minus_hdr) + .map(|mut v| { + if use_pseudo { + v -= pseudo_csum; + } + v + }); + + Ok(Self { + meta, + flow, + body_csum, + }) + } } diff --git a/lib/opte/src/engine/packet.rs b/lib/opte/src/engine/packet.rs index 0a1a6f98..4a8d97e3 100644 --- a/lib/opte/src/engine/packet.rs +++ b/lib/opte/src/engine/packet.rs @@ -38,6 +38,7 @@ use super::icmp::IcmpHdrError; use super::icmp::IcmpMeta; use super::icmp::Icmpv4Meta; use super::icmp::Icmpv6Meta; +use super::ingot_packet::MsgBlk; use super::ip4::Ipv4Addr; use super::ip4::Ipv4Hdr; use super::ip4::Ipv4HdrError; @@ -507,6 +508,37 @@ impl PacketChain { } } + /// Removes the next packet from the top of the chain and returns + /// it, taking ownership. + pub fn pop_front2(&mut self) -> Option { + if let Some(ref mut list) = &mut self.inner { + unsafe { + let curr_b = list.head; + let curr = curr_b.as_ptr(); + let next = NonNull::new((*curr).b_next); + + // Break the forward link on the packet we have access to, + // and the backward link on the next element if possible. + if let Some(next) = next { + (*next.as_ptr()).b_prev = ptr::null_mut(); + } + (*curr).b_next = ptr::null_mut(); + + // Update the current head. If the next element is null, + // we're now empty. + if let Some(next) = next { + list.head = next; + } else { + self.inner = None; + } + + Some(MsgBlk{ inner: curr_b }) + } + } else { + None + } + } + /// Adds an owned `Packet` to the end of this chain. /// /// Internally, this unwraps the `Packet` back into an mblk_t, From cdea9f4e87697cc6ffd89a5bcc9036d83f617a93 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 21 Aug 2024 15:52:05 +0100 Subject: [PATCH 003/115] The most hacked-together 'fast-path' imaginable. --- Cargo.lock | 7 +- Cargo.toml | 4 +- lib/opte/src/engine/ingot_packet.rs | 276 +++++++++++++--------- lib/opte/src/engine/packet.rs | 2 +- lib/opte/src/engine/port.rs | 344 ++++++++++++++++++++++++++++ xde/Cargo.toml | 2 + xde/src/lib.rs | 1 + xde/src/mac.rs | 19 ++ xde/src/xde.rs | 283 ++++++++++++++++++++++- 9 files changed, 820 insertions(+), 118 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 23dbc56b..7b9139e1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -888,7 +888,7 @@ dependencies = [ [[package]] name = "ingot" version = "0.1.0" -source = "git+ssh://git@github.com/oxidecomputer/ingot.git?rev=48be4aba2320794291c172f146bb391b04f77bfb#48be4aba2320794291c172f146bb391b04f77bfb" +source = "git+ssh://git@github.com/oxidecomputer/ingot.git?rev=044d62931a912589cb99890f193f00720841c1b9#044d62931a912589cb99890f193f00720841c1b9" dependencies = [ "bitflags 2.6.0", "ingot-macros", @@ -903,7 +903,7 @@ dependencies = [ [[package]] name = "ingot-macros" version = "0.1.0" -source = "git+ssh://git@github.com/oxidecomputer/ingot.git?rev=48be4aba2320794291c172f146bb391b04f77bfb#48be4aba2320794291c172f146bb391b04f77bfb" +source = "git+ssh://git@github.com/oxidecomputer/ingot.git?rev=044d62931a912589cb99890f193f00720841c1b9#044d62931a912589cb99890f193f00720841c1b9" dependencies = [ "darling", "prettyplease", @@ -916,7 +916,7 @@ dependencies = [ [[package]] name = "ingot-types" version = "0.1.0" -source = "git+ssh://git@github.com/oxidecomputer/ingot.git?rev=48be4aba2320794291c172f146bb391b04f77bfb#48be4aba2320794291c172f146bb391b04f77bfb" +source = "git+ssh://git@github.com/oxidecomputer/ingot.git?rev=044d62931a912589cb99890f193f00720841c1b9#044d62931a912589cb99890f193f00720841c1b9" dependencies = [ "heapless", "macaddr", @@ -2743,6 +2743,7 @@ dependencies = [ "bitflags 2.6.0", "crc32fast", "illumos-sys-hdrs", + "ingot", "opte", "oxide-vpc", "postcard", diff --git a/Cargo.toml b/Cargo.toml index 4be16325..a6702514 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,8 +50,8 @@ ctor = "0.2" darling = "0.20" dyn-clone = "1.0" heapless = "0.8" -ingot = { git = "ssh://git@github.com/oxidecomputer/ingot.git", rev = "48be4aba2320794291c172f146bb391b04f77bfb"} -ingot-types = { git = "ssh://git@github.com/oxidecomputer/ingot.git", rev = "48be4aba2320794291c172f146bb391b04f77bfb"} +ingot = { git = "ssh://git@github.com/oxidecomputer/ingot.git", rev = "044d62931a912589cb99890f193f00720841c1b9"} +ingot-types = { git = "ssh://git@github.com/oxidecomputer/ingot.git", rev = "044d62931a912589cb99890f193f00720841c1b9"} ipnetwork = { version = "0.20", default-features = false } itertools = { version = "0.12", default-features = false } libc = "0.2" diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index dc81c3ff..4fb81ebe 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -1,9 +1,12 @@ use core::marker::PhantomData; +use core::mem::ManuallyDrop; +use core::mem::MaybeUninit; use core::ops::Deref; use core::ops::DerefMut; use core::ptr::NonNull; use core::slice; +use illumos_sys_hdrs as ddi; use illumos_sys_hdrs::mblk_t; use ingot::types::Chunk; use ingot::types::HasView; @@ -35,12 +38,15 @@ use ingot_types::ParseResult; use opte_api::Direction; use zerocopy::ByteSlice; use zerocopy::ByteSliceMut; -use zerocopy::NetworkEndian; use zerocopy::IntoBytes; +use zerocopy::NetworkEndian; + use super::checksum::Checksum as OpteCsum; use super::checksum::HeaderChecksum; +use super::packet::allocb; use super::packet::Initialized; use super::packet::Packet; +use illumos_sys_hdrs::uintptr_t; // NOTE: these are not being handled correctly and need to be // stealth-imported in ingot. @@ -64,16 +70,15 @@ pub struct OpteIn { pub outer_udp: UdpPacket, pub outer_encap: GenevePacket, - #[ingot(control = exit_on_arp)] pub inner_eth: EthernetPacket, // pub inner_l3: L3, - pub inner_l3: Option>, + pub inner_l3: L3, // pub inner_ulp: L4, - pub inner_ulp: Option>, + pub inner_ulp: Ulp, } #[inline] -fn exit_on_arp(eth: &ValidEthernet) -> ParseControl { +fn exit_on_arp(eth: &EthernetPacket) -> ParseControl { if eth.ethertype() == 0x0806 { ParseControl::Accept } else { @@ -83,6 +88,7 @@ fn exit_on_arp(eth: &ValidEthernet) -> ParseControl { #[derive(Parse)] pub struct OpteOut { + #[ingot(control = exit_on_arp)] pub inner_eth: EthernetPacket, pub inner_l3: Option>, pub inner_ulp: Option>, @@ -117,9 +123,111 @@ impl DerefMut for MsgBlkNode { } } +impl MsgBlkNode { + pub fn drop_front_bytes(&mut self, n: usize) { + unsafe { + assert!(self.0.b_wptr.offset_from(self.0.b_rptr) >= n as isize); + self.0.b_rptr = self.0.b_rptr.add(n); + } + } +} + impl MsgBlk { + pub fn new(len: usize) -> Self { + let inner = unsafe { NonNull::new(allocb(len)) } + .expect("somehow failed to get an mblk..."); + + unsafe { Self { inner } } + } + + pub fn byte_len(&self) -> usize { + self.iter().map(|el| el.len()).sum() + } + + pub fn seg_len(&self) -> usize { + self.iter().count() + } + + pub fn new_with_headroom(head_len: usize, body_len: usize) -> Self { + let mut out = Self::new(head_len + body_len); + + // SAFETY: alloc is contiguous and always larger than head_len. + let mut_out = unsafe { out.inner.as_mut() }; + mut_out.b_rptr = unsafe { mut_out.b_rptr.add(head_len) }; + mut_out.b_wptr = mut_out.b_rptr; + + out + } + + // pub fn write(&mut self, n_bytes: usize, f: impl FnOnce(&mut [MaybeUninit]) -> &mut [u8]) -> usize { + pub unsafe fn write( + &mut self, + n_bytes: usize, + f: impl FnOnce(&mut [MaybeUninit]), + ) { + let mut_out = unsafe { self.inner.as_mut() }; + let avail_bytes = + unsafe { (*mut_out.b_datap).db_lim.offset_from(mut_out.b_wptr) }; + assert!(avail_bytes >= 0); + assert!(avail_bytes as usize >= n_bytes); + + let in_slice = unsafe { + slice::from_raw_parts_mut( + mut_out.b_wptr as *mut MaybeUninit, + n_bytes, + ) + }; + // let out_slice = f(in_slice); + f(in_slice); + + // assert!(out_slice.as_ptr() == mut_out.b_wptr); + // assert!(out_slice.len() <= n_bytes); + + mut_out.b_wptr = unsafe { mut_out.b_wptr.add(n_bytes) }; + } + + // TODO: I really need to rethink this one in practice. + // hacked together for POC. + pub fn extend_if_one(&mut self, other: Self) { + let mut_self = unsafe { self.inner.as_mut() }; + if !mut_self.b_cont.is_null() { + panic!("oopsie daisy") + } + + mut_self.b_cont = other.unwrap_mblk(); + } + + pub fn iter(&self) -> MsgBlkIter { + MsgBlkIter { curr: Some(self.inner), marker: PhantomData } + } + + pub fn iter_mut(&mut self) -> MsgBlkIterMut { + MsgBlkIterMut { curr: Some(self.inner), marker: PhantomData } + } + pub fn as_pkt(self) -> Packet { - unsafe { Packet::wrap_mblk(self.inner.as_ptr()).expect("already good.")} + unsafe { Packet::wrap_mblk(self.unwrap_mblk()).expect("already good.") } + } + + /// Return the pointer address of the underlying mblk_t. + /// + /// NOTE: This is purely to allow passing the pointer value up to + /// DTrace so that the mblk can be inspected (read only) in probe + /// context. + pub fn mblk_addr(&self) -> uintptr_t { + self.inner.as_ptr() as uintptr_t + } + + pub fn unwrap_mblk(mut self) -> *mut mblk_t { + let ptr_out = self.inner.as_ptr(); + _ = ManuallyDrop::new(self); + ptr_out + } + + pub unsafe fn wrap_mblk(ptr: *mut mblk_t) -> Option { + let inner = unsafe { NonNull::new(ptr)? }; + + Some(Self { inner }) } } @@ -138,7 +246,7 @@ impl<'a> Iterator for MsgBlkIter<'a> { fn next(&mut self) -> Option { if let Some(ptr) = self.curr { - self.curr = NonNull::new(unsafe { (*ptr.as_ptr()).b_next }); + self.curr = NonNull::new(unsafe { (*ptr.as_ptr()).b_cont }); // SAFETY: MsgBlkNode is identical to mblk_t. unsafe { Some(&*(ptr.as_ptr() as *const MsgBlkNode)) } } else { @@ -160,7 +268,7 @@ impl<'a> Iterator for MsgBlkIterMut<'a> { fn next(&mut self) -> Option { if let Some(ptr) = self.curr { - self.curr = NonNull::new(unsafe { (*ptr.as_ptr()).b_next }); + self.curr = NonNull::new(unsafe { (*ptr.as_ptr()).b_cont }); // SAFETY: MsgBlkNode is identical to mblk_t. unsafe { Some(&mut *(ptr.as_ptr() as *mut MsgBlkNode)) } } else { @@ -177,6 +285,18 @@ impl<'a> Read for MsgBlkIterMut<'a> { } } +impl Drop for MsgBlk { + fn drop(&mut self) { + cfg_if! { + if #[cfg(all(not(feature = "std"), not(test)))] { + unsafe { ddi::freemsg(self.inner.as_ptr()) }; + } else { + // mock_freemsg(self.inner.as_ptr()); + } + } + } +} + pub struct OpteUnified { pub outer_eth: Option>, pub outer_v6: Option>, @@ -196,8 +316,8 @@ impl From> for OpteUnified { outer_udp: Some(value.outer_udp), outer_encap: Some(value.outer_encap), inner_eth: value.inner_eth, - inner_l3: value.inner_l3, - inner_ulp: value.inner_ulp, + inner_l3: Some(value.inner_l3), + inner_ulp: Some(value.inner_ulp), } } } @@ -217,7 +337,7 @@ impl From> for OpteUnified { } pub struct PacketMeta3( - IngotParsed, T>, + pub IngotParsed, T>, ); impl PacketMeta3 { @@ -230,27 +350,6 @@ impl PacketMeta3 { } } -pub enum PacketMeta2 { - In(IngotParsed, T>), - Out(IngotParsed, T>), -} - -impl PacketMeta2 { - pub fn inner_l3(&self) -> Option<&ingot::L3> { - match self { - PacketMeta2::In(v) => v.stack.0.inner_l3.as_ref(), - PacketMeta2::Out(v) => v.stack.0.inner_l3.as_ref(), - } - } - - pub fn inner_ulp(&self) -> Option<&ingot::Ulp> { - match self { - PacketMeta2::In(v) => v.stack.0.inner_ulp.as_ref(), - PacketMeta2::Out(v) => v.stack.0.inner_ulp.as_ref(), - } - } -} - fn actual_src_port(chunk: &ingot::Ulp) -> Option { match chunk { Ulp::Tcp(pkt) => Some(pkt.source()), @@ -279,44 +378,6 @@ fn pseudo_port(chunk: &ingot::Ulp) -> Option { } } -impl From<&PacketMeta2> for InnerFlowId { - fn from(meta: &PacketMeta2) -> Self { - let (proto, addrs) = match meta.inner_l3() { - Some(L3::Ipv4(pkt)) => ( - pkt.protocol(), - AddrPair::V4 { - src: pkt.source().into(), - dst: pkt.destination().into(), - }, - ), - Some(L3::Ipv6(pkt)) => ( - pkt.next_header(), - AddrPair::V6 { - src: pkt.source().into(), - dst: pkt.destination().into(), - }, - ), - None => (255, FLOW_ID_DEFAULT.addrs), - }; - - let (src_port, dst_port) = meta - .inner_ulp() - .map(|ulp| { - ( - actual_dst_port(ulp) - .or_else(|| pseudo_port(ulp)) - .unwrap_or(0), - actual_src_port(ulp) - .or_else(|| pseudo_port(ulp)) - .unwrap_or(0), - ) - }) - .unwrap_or((0, 0)); - - InnerFlowId { proto: proto.into(), addrs, src_port, dst_port } - } -} - impl From<&PacketMeta3> for InnerFlowId { fn from(meta: &PacketMeta3) -> Self { let (proto, addrs) = match meta.inner_l3() { @@ -341,10 +402,10 @@ impl From<&PacketMeta3> for InnerFlowId { .inner_ulp() .map(|ulp| { ( - actual_dst_port(ulp) + actual_src_port(ulp) .or_else(|| pseudo_port(ulp)) .unwrap_or(0), - actual_src_port(ulp) + actual_dst_port(ulp) .or_else(|| pseudo_port(ulp)) .unwrap_or(0), ) @@ -355,7 +416,9 @@ impl From<&PacketMeta3> for InnerFlowId { } } -fn transform_parse_stage1>(p: IngotParsed) -> IngotParsed { +fn transform_parse_stage1>( + p: IngotParsed, +) -> IngotParsed { IngotParsed { stack: HeaderStack(S2::from(p.stack.0)), data: p.data, @@ -383,7 +446,9 @@ fn csum_minus_hdr(ulp: &Ulp) -> Option { return None; } - let mut csum = OpteCsum::from(HeaderChecksum::wrap(icmp.checksum().to_be_bytes())); + let mut csum = OpteCsum::from(HeaderChecksum::wrap( + icmp.checksum().to_be_bytes(), + )); csum.sub_bytes(&[icmp.code(), icmp.ty()]); csum.sub_bytes(icmp.rest_of_hdr_ref()); @@ -395,19 +460,23 @@ fn csum_minus_hdr(ulp: &Ulp) -> Option { return None; } - let mut csum = OpteCsum::from(HeaderChecksum::wrap(icmp.checksum().to_be_bytes())); + let mut csum = OpteCsum::from(HeaderChecksum::wrap( + icmp.checksum().to_be_bytes(), + )); csum.sub_bytes(&[icmp.code(), icmp.ty()]); csum.sub_bytes(icmp.rest_of_hdr_ref()); Some(csum) - }, + } Ulp::Tcp(tcp) => { if tcp.checksum() == 0 { return None; } - let mut csum = OpteCsum::from(HeaderChecksum::wrap(tcp.checksum().to_be_bytes())); + let mut csum = OpteCsum::from(HeaderChecksum::wrap( + tcp.checksum().to_be_bytes(), + )); let TcpPacket::Raw(t) = tcp else { panic!("hmm... maybe one day.") @@ -420,13 +489,15 @@ fn csum_minus_hdr(ulp: &Ulp) -> Option { csum.sub_bytes(t.1.as_ref()); Some(csum) - }, + } Ulp::Udp(udp) => { if udp.checksum() == 0 { return None; } - let mut csum = OpteCsum::from(HeaderChecksum::wrap(udp.checksum().to_be_bytes())); + let mut csum = OpteCsum::from(HeaderChecksum::wrap( + udp.checksum().to_be_bytes(), + )); let UdpPacket::Raw(t) = udp else { panic!("hmm... maybe one day.") @@ -436,7 +507,7 @@ fn csum_minus_hdr(ulp: &Ulp) -> Option { csum.sub_bytes(&b[0..6]); Some(csum) - }, + } } } @@ -445,14 +516,18 @@ impl Parsed2 { pub fn parse(pkt: T, dir: Direction) -> ParseResult { let mut meta = PacketMeta3(match dir { - Direction::In => OpteIn::parse_read(pkt).map(transform_parse_stage1), - Direction::Out => OpteOut::parse_read(pkt).map(transform_parse_stage1), + Direction::In => { + OpteIn::parse_read(pkt).map(transform_parse_stage1) + } + Direction::Out => { + OpteOut::parse_read(pkt).map(transform_parse_stage1) + } }?); let flow = (&meta).into(); let use_pseudo = if let Some(v) = meta.inner_ulp() { - !matches!(v, Ulp::IcmpV4(_)) + !matches!(v, Ulp::IcmpV4(_)) } else { false }; @@ -460,21 +535,16 @@ impl Parsed2 let pseudo_csum = match meta.0.headers().inner_eth.ethertype() { // ARP 0x0806 => { - return Ok(Self { - meta, - body_csum: None, - flow, - }); - }, + return Ok(Self { meta, body_csum: None, flow }); + } // Ipv4 0x0800 => { let h = meta.0.headers(); let mut pseudo_hdr_bytes = [0u8; 12]; - let Some(L3::Ipv4(ref v4)) = h.inner_l3 else { - panic!() - }; + let Some(L3::Ipv4(ref v4)) = h.inner_l3 else { panic!() }; pseudo_hdr_bytes[0..4].copy_from_slice(&v4.source().octets()); - pseudo_hdr_bytes[4..8].copy_from_slice(&v4.destination().octets()); + pseudo_hdr_bytes[4..8] + .copy_from_slice(&v4.destination().octets()); pseudo_hdr_bytes[9] = v4.protocol(); let ulp_len = v4.total_len() - 4 * (v4.ihl() as u16); pseudo_hdr_bytes[10..].copy_from_slice(&ulp_len.to_be_bytes()); @@ -485,31 +555,27 @@ impl Parsed2 0x86dd => { let h = meta.0.headers(); let mut pseudo_hdr_bytes = [0u8; 40]; - let Some(L3::Ipv6(ref v6)) = h.inner_l3 else { - panic!() - }; + let Some(L3::Ipv6(ref v6)) = h.inner_l3 else { panic!() }; pseudo_hdr_bytes[0..16].copy_from_slice(&v6.source().octets()); - pseudo_hdr_bytes[16..32].copy_from_slice(&v6.destination().octets()); + pseudo_hdr_bytes[16..32] + .copy_from_slice(&v6.destination().octets()); pseudo_hdr_bytes[39] = v6.next_header(); let ulp_len = v6.payload_len() as u32; - pseudo_hdr_bytes[32..36].copy_from_slice(&ulp_len.to_be_bytes()); + pseudo_hdr_bytes[32..36] + .copy_from_slice(&ulp_len.to_be_bytes()); Checksum::compute(&pseudo_hdr_bytes) } _ => return Err(IngotParseErr::Unwanted), }; - let body_csum = meta.inner_ulp().and_then(csum_minus_hdr) - .map(|mut v| { + let body_csum = + meta.inner_ulp().and_then(csum_minus_hdr).map(|mut v| { if use_pseudo { v -= pseudo_csum; } v }); - Ok(Self { - meta, - flow, - body_csum, - }) + Ok(Self { meta, flow, body_csum }) } } diff --git a/lib/opte/src/engine/packet.rs b/lib/opte/src/engine/packet.rs index 4a8d97e3..c2f60899 100644 --- a/lib/opte/src/engine/packet.rs +++ b/lib/opte/src/engine/packet.rs @@ -532,7 +532,7 @@ impl PacketChain { self.inner = None; } - Some(MsgBlk{ inner: curr_b }) + Some(MsgBlk { inner: curr_b }) } } else { None diff --git a/lib/opte/src/engine/port.rs b/lib/opte/src/engine/port.rs index a5cdb4de..b1cd4804 100644 --- a/lib/opte/src/engine/port.rs +++ b/lib/opte/src/engine/port.rs @@ -7,10 +7,16 @@ //! A virtual switch port. use self::meta::ActionMeta; +use super::ether::EtherMeta; use super::flow_table::Dump; use super::flow_table::FlowEntry; use super::flow_table::FlowTable; use super::flow_table::Ttl; +use super::headers::EncapPush; +use super::headers::HeaderAction; +use super::headers::IpPush; +use super::headers::UlpHeaderAction; +use super::ingot_packet::Parsed2; use super::ioctl; use super::ioctl::TcpFlowEntryDump; use super::ioctl::TcpFlowStateDump; @@ -25,6 +31,7 @@ use super::packet::BodyTransform; use super::packet::BodyTransformError; use super::packet::Initialized; use super::packet::InnerFlowId; +use super::packet::OuterMeta; use super::packet::Packet; use super::packet::PacketMeta; use super::packet::Parsed; @@ -69,10 +76,23 @@ use core::sync::atomic::AtomicU64; use core::sync::atomic::Ordering::SeqCst; #[cfg(all(not(feature = "std"), not(test)))] use illumos_sys_hdrs::uintptr_t; +use ingot::EthernetMut; +use ingot::IcmpV4Mut; +use ingot::IcmpV4Ref; +use ingot::IcmpV6Mut; +use ingot::IcmpV6Ref; +use ingot::Ipv4Mut; +use ingot::Ipv6Mut; +use ingot::TcpFlags; +use ingot::TcpMut; +use ingot::UdpMut; +use ingot::Ulp; +use ingot_types::Read; use kstat_macro::KStatProvider; use opte_api::Direction; use opte_api::MacAddr; use opte_api::OpteError; +use zerocopy::ByteSliceMut; pub type Result = result::Result; @@ -1185,6 +1205,324 @@ impl Port { res } + // hope and pray we find a ULP, then use that? + pub fn thin_process( + &self, + dir: Direction, + pkt: &mut Parsed2, + ) -> result::Result + where + T::Chunk: ByteSliceMut, + { + let flow_before = pkt.flow; + // let flow_before = *pkt.flow(); + let epoch = self.epoch.load(SeqCst); + let mut data = self.data.lock(); + check_state!(data.state, [PortState::Running]) + .map_err(|_| ProcessError::BadState(data.state))?; + + let mut dirty_csum = false; + + // self.port_process_entry_probe(dir, &flow_before, epoch, pskt); + // TODO: what stats? lmao + match dir { + Direction::Out => { + // opte::engine::err!("looking up {:?} in outdir...", flow_before); + let a = data.uft_out.get(&flow_before); + let Some(a) = a else { + // eh. It will get recirc'd for free... + // opte::engine::err!("not found! Releasing!"); + drop(data); + return Err(ProcessError::FlowTableFull { + kind: "()", + limit: 0, + }); + }; + // opte::engine::err!("found!"); + + let mut hm = pkt.meta.0.headers_mut(); + + let mut new_eth = None; + let mut new_ip = None; + let mut new_encap = None; + // opte::engine::err!("xforms {:?}!", &a.state().xforms.hdr); + for xf in &a.state().xforms.hdr { + // opte::engine::err!("xf..."); + if let HeaderAction::Push(outer_eth, _) = &xf.outer_ether { + new_eth = Some(outer_eth.clone()); + } + if let HeaderAction::Push(outer_ip, _) = &xf.outer_ip { + new_ip = Some(outer_ip.clone()); + } + if let HeaderAction::Push(outer_ec, _) = &xf.outer_encap { + new_encap = Some(outer_ec.clone()); + } + if let HeaderAction::Modify(m, _) = &xf.inner_ether { + if let Some(src) = m.src { + hm.inner_eth.set_source(src.bytes().into()); + } + if let Some(dst) = m.dst { + hm.inner_eth.set_destination(dst.bytes().into()); + } + } + if let HeaderAction::Modify(m, _) = &xf.inner_ip { + match m { + super::headers::IpMod::Ip4(v4) => { + let Some(ingot::L3::Ipv4(ref mut v4_t)) = + hm.inner_l3 + else { + return Err(ProcessError::FlowTableFull { + kind: "()", + limit: 0, + }); + }; + if let Some(src) = v4.src { + dirty_csum = true; + v4_t.set_source(src.into()); + } + if let Some(dst) = v4.dst { + dirty_csum = true; + v4_t.set_destination(dst.into()); + } + } + super::headers::IpMod::Ip6(v6) => { + let Some(ingot::L3::Ipv6(ref mut v6_t)) = + hm.inner_l3 + else { + return Err(ProcessError::FlowTableFull { + kind: "()", + limit: 0, + }); + }; + if let Some(src) = v6.src { + dirty_csum = true; + v6_t.set_source(src.into()); + } + if let Some(dst) = v6.dst { + dirty_csum = true; + v6_t.set_destination(dst.into()); + } + } + } + } + if let UlpHeaderAction::Modify(m) = &xf.inner_ulp { + if let Some(src) = &m.generic.src_port { + match hm.inner_ulp { + Some(Ulp::Tcp(ref mut t)) => { + dirty_csum = true; + t.set_source(*src) + } + Some(Ulp::Udp(ref mut t)) => { + dirty_csum = true; + t.set_source(*src) + } + _ => {} + } + } + if let Some(dst) = &m.generic.dst_port { + match hm.inner_ulp { + Some(Ulp::Tcp(ref mut t)) => { + dirty_csum = true; + t.set_destination(*dst) + } + Some(Ulp::Udp(ref mut t)) => { + dirty_csum = true; + t.set_destination(*dst) + } + _ => {} + } + } + if let Some(flags) = &m.tcp_flags { + match hm.inner_ulp { + Some(Ulp::Tcp(ref mut t)) => { + dirty_csum = true; + t.set_flags(TcpFlags::from_bits_retain( + *flags, + )) + } + _ => {} + } + } + if let Some(new_id) = &m.icmp_id { + match hm.inner_ulp { + Some(Ulp::IcmpV4(ref mut pkt)) + if pkt.ty() == 0 || pkt.ty() == 3 => + { + dirty_csum = true; + pkt.rest_of_hdr_mut()[..2] + .copy_from_slice(&new_id.to_be_bytes()) + } + Some(Ulp::IcmpV6(ref mut pkt)) + if pkt.ty() == 128 || pkt.ty() == 129 => + { + dirty_csum = true; + pkt.rest_of_hdr_mut()[..2] + .copy_from_slice(&new_id.to_be_bytes()) + } + _ => {} + } + } + } + } + + match (new_eth, new_ip, new_encap) { + (Some(a), Some(b), Some(c)) => { + Ok(ThinProcRes::PushEncap(a, b, c)) + } + (None, None, None) => Ok(ThinProcRes::Na), + _ => Err(ProcessError::FlowTableFull { + kind: "()", + limit: 0, + }), + } + } + + Direction::In => { + let a = data.uft_in.get(&flow_before); + let Some(a) = a else { + // eh. + return Err(ProcessError::FlowTableFull { + kind: "()", + limit: 0, + }); + }; + + let mut hm = pkt.meta.0.headers_mut(); + + let mut pop_eth = false; + let mut pop_ip = false; + let mut pop_encap = false; + for xf in &a.state().xforms.hdr { + // opte::engine::err!("xf..."); + if let HeaderAction::Pop = &xf.outer_ether { + pop_eth = true; + } + if let HeaderAction::Pop = &xf.outer_ip { + pop_ip = true; + } + if let HeaderAction::Pop = &xf.outer_encap { + pop_encap = true; + } + if let HeaderAction::Modify(m, _) = &xf.inner_ether { + if let Some(src) = m.src { + hm.inner_eth.set_source(src.bytes().into()); + } + if let Some(dst) = m.dst { + hm.inner_eth.set_destination(dst.bytes().into()); + } + } + if let HeaderAction::Modify(m, _) = &xf.inner_ip { + match m { + super::headers::IpMod::Ip4(v4) => { + let Some(ingot::L3::Ipv4(ref mut v4_t)) = + hm.inner_l3 + else { + return Err(ProcessError::FlowTableFull { + kind: "()", + limit: 0, + }); + }; + if let Some(src) = v4.src { + dirty_csum = true; + v4_t.set_source(src.into()); + } + if let Some(dst) = v4.dst { + dirty_csum = true; + v4_t.set_destination(dst.into()); + } + } + super::headers::IpMod::Ip6(v6) => { + let Some(ingot::L3::Ipv6(ref mut v6_t)) = + hm.inner_l3 + else { + return Err(ProcessError::FlowTableFull { + kind: "()", + limit: 0, + }); + }; + if let Some(src) = v6.src { + dirty_csum = true; + v6_t.set_source(src.into()); + } + if let Some(dst) = v6.dst { + dirty_csum = true; + v6_t.set_destination(dst.into()); + } + } + } + } + if let UlpHeaderAction::Modify(m) = &xf.inner_ulp { + if let Some(src) = &m.generic.src_port { + match hm.inner_ulp { + Some(Ulp::Tcp(ref mut t)) => { + dirty_csum = true; + t.set_source(*src) + } + Some(Ulp::Udp(ref mut t)) => { + dirty_csum = true; + t.set_source(*src) + } + _ => {} + } + } + if let Some(dst) = &m.generic.dst_port { + match hm.inner_ulp { + Some(Ulp::Tcp(ref mut t)) => { + dirty_csum = true; + t.set_destination(*dst) + } + Some(Ulp::Udp(ref mut t)) => { + dirty_csum = true; + t.set_destination(*dst) + } + _ => {} + } + } + if let Some(flags) = &m.tcp_flags { + match hm.inner_ulp { + Some(Ulp::Tcp(ref mut t)) => { + dirty_csum = true; + t.set_flags(TcpFlags::from_bits_retain( + *flags, + )) + } + _ => {} + } + } + if let Some(new_id) = &m.icmp_id { + match hm.inner_ulp { + Some(Ulp::IcmpV4(ref mut pkt)) + if pkt.ty() == 0 || pkt.ty() == 3 => + { + dirty_csum = true; + pkt.rest_of_hdr_mut()[..2] + .copy_from_slice(&new_id.to_be_bytes()) + } + Some(Ulp::IcmpV6(ref mut pkt)) + if pkt.ty() == 128 || pkt.ty() == 129 => + { + dirty_csum = true; + pkt.rest_of_hdr_mut()[..2] + .copy_from_slice(&new_id.to_be_bytes()) + } + _ => {} + } + } + } + } + + match (pop_eth, pop_ip, pop_encap) { + (true, true, true) => Ok(ThinProcRes::PopEncap), + (false, false, false) => Ok(ThinProcRes::Na), + _ => Err(ProcessError::FlowTableFull { + kind: "()", + limit: 0, + }), + } + } + } + } + /// Remove the rule identified by the `dir`, `layer_name`, `id` /// combination, if such a rule exists. /// @@ -1326,6 +1664,12 @@ impl From for TcpState { } } +pub enum ThinProcRes { + PushEncap(EtherMeta, IpPush, EncapPush), + PopEncap, + Na, +} + // This is a convenience wrapper for keeping the header and body // transformations under one structure, allowing them to be passes as // one argument. diff --git a/xde/Cargo.toml b/xde/Cargo.toml index 24ab5076..371a00df 100644 --- a/xde/Cargo.toml +++ b/xde/Cargo.toml @@ -11,6 +11,8 @@ illumos-sys-hdrs = { workspace = true, features = ["kernel"] } opte = { workspace = true, features = ["engine", "kernel"], default-features = false } oxide-vpc = { workspace = true, features = ["engine", "kernel"], default-features = false } +ingot.workspace = true + bitflags.workspace = true postcard.workspace = true serde.workspace = true diff --git a/xde/src/lib.rs b/xde/src/lib.rs index 726f1ef4..26f2f269 100644 --- a/xde/src/lib.rs +++ b/xde/src/lib.rs @@ -20,6 +20,7 @@ #![allow(non_snake_case)] // for bindgen code in ip.rs #![feature(alloc_error_handler)] #![feature(rustc_private)] +#![feature(maybe_uninit_slice)] #![deny(unused_must_use)] mod ioctl; diff --git a/xde/src/mac.rs b/xde/src/mac.rs index 22dbb7f8..b3602c7d 100644 --- a/xde/src/mac.rs +++ b/xde/src/mac.rs @@ -19,6 +19,7 @@ use core::fmt; use core::ptr; use illumos_sys_hdrs::*; use opte::engine::ether::EtherAddr; +use opte::engine::ingot_packet::MsgBlk; use opte::engine::packet::Initialized; use opte::engine::packet::Packet; use opte::engine::packet::PacketState; @@ -287,6 +288,24 @@ impl MacClientHandle { }; debug_assert_eq!(ret_mp, ptr::null_mut()); } + + pub fn tx_drop_on_no_desc2( + &self, + pkt: MsgBlk, + hint: uintptr_t, + flags: MacTxFlags, + ) { + // We must unwrap the raw `mblk_t` out of the `pkt` here, + // otherwise the mblk_t would be dropped at the end of this + // function along with `pkt`. + let mut raw_flags = flags.bits(); + raw_flags |= MAC_DROP_ON_NO_DESC; + let mut ret_mp = ptr::null_mut(); + unsafe { + mac_tx(self.mch, pkt.unwrap_mblk(), hint, raw_flags, &mut ret_mp) + }; + debug_assert_eq!(ret_mp, ptr::null_mut()); + } } impl Drop for MacClientHandle { diff --git a/xde/src/xde.rs b/xde/src/xde.rs index e7859b22..74dbdc6c 100644 --- a/xde/src/xde.rs +++ b/xde/src/xde.rs @@ -36,12 +36,28 @@ use alloc::string::ToString; use alloc::sync::Arc; use alloc::vec::Vec; use core::ffi::CStr; +use core::hash::Hash; +use core::mem::MaybeUninit; use core::num::NonZeroU32; use core::ptr; use core::ptr::addr_of; use core::ptr::addr_of_mut; use core::time::Duration; +use crc32fast::Hasher; use illumos_sys_hdrs::*; +use ingot::types::Header; +use ingot::types::HeaderParse; +use ingot::EthernetMut; +use ingot::EthernetRef; +use ingot::GeneveFlags; +use ingot::GeneveMut; +use ingot::GeneveRef; +use ingot::Ipv6Mut; +use ingot::UdpMut; +use ingot::ValidEthernet; +use ingot::ValidGeneve; +use ingot::ValidIpv6; +use ingot::ValidUdp; use opte::api::ClearXdeUnderlayReq; use opte::api::CmdOk; use opte::api::Direction; @@ -61,7 +77,11 @@ use opte::ddi::time::Periodic; use opte::engine::ether::EtherAddr; use opte::engine::geneve::Vni; use opte::engine::headers::EncapMeta; +use opte::engine::headers::EncapPush; use opte::engine::headers::IpAddr; +use opte::engine::headers::IpPush; +use opte::engine::ingot_packet::MsgBlk; +use opte::engine::ingot_packet::Parsed2; use opte::engine::ioctl::{self as api}; use opte::engine::ip6::Ipv6Addr; use opte::engine::packet::Initialized; @@ -1543,7 +1563,7 @@ unsafe extern "C" fn xde_mc_tx( // by the mch they're being targeted to. E.g., either build a list // of chains (u1, u2, port0, port1, ...), or hold tx until another // packet breaks the run targeting the same dest. - while let Some(pkt) = chain.pop_front() { + while let Some(pkt) = chain.pop_front2() { xde_mc_tx_one(src_dev, pkt); } @@ -1551,10 +1571,169 @@ unsafe extern "C" fn xde_mc_tx( } #[inline] -unsafe fn xde_mc_tx_one( - src_dev: &XdeDev, - pkt: Packet, -) -> *mut mblk_t { +unsafe fn xde_mc_tx_one(src_dev: &XdeDev, mut pkt: MsgBlk) -> *mut mblk_t { + let mblk_addr = pkt.mblk_addr(); + let pkt_len_old = pkt.byte_len(); + match Parsed2::parse(pkt.iter_mut(), Direction::Out) { + Ok(mut p) => { + let mch = &src_dev.u1.mch; + let hint = 0; + let port = &src_dev.port; + let flow_id = p.flow; + + let mut hasher = Hasher::new(); + flow_id.hash(&mut hasher); + let f_hash = hasher.finalize(); + + // TODO: emit hdr, reuse cksum, actually send... + let mut ip6_src = Default::default(); + let mut ip6_dst = Default::default(); + if let Ok(decision) = port.thin_process(Direction::Out, &mut p) { + match decision { + opte::engine::port::ThinProcRes::PushEncap( + eth, + ip, + udp, + ) => { + // TODO: generate methods to fill a maybeuninit. + // total bytes: ETH 14, V6 40, UDP 8, GENEVE 8 + let new_hdrs = 14 + 40 + 8 + 8; + let mut new_blk = + MsgBlk::new_with_headroom(0, new_hdrs); + + new_blk.write(14, |uninit| { + let slice = unsafe { + MaybeUninit::slice_assume_init_mut(uninit) + }; + let (mut a, _) = + ValidEthernet::parse(slice).unwrap(); + a.set_source(eth.src.bytes().into()); + a.set_destination(eth.dst.bytes().into()); + a.set_ethertype(eth.ether_type.into()); + + // slice + }); + + // we know we'er only pushing v6. + let IpPush::Ip6(v6) = ip else { panic!() }; + new_blk.write(40, |uninit| { + let slice = unsafe { + MaybeUninit::slice_assume_init_mut(uninit) + }; + let (mut a, _) = ValidIpv6::parse(slice).unwrap(); + a.set_version(6); + a.set_dscp(0); + a.set_ecn(ingot::Ecn::NotCapable); + a.set_payload_len((pkt_len_old + 16) as u16); + a.set_flow_label(0); + a.set_hop_limit(128); + a.set_next_header(v6.proto.into()); + a.set_source(v6.src.bytes().into()); + a.set_destination(v6.dst.bytes().into()); + + ip6_src = v6.src; + ip6_dst = v6.dst; + + // slice + }); + + new_blk.write(16, |uninit| { + let slice = unsafe { + MaybeUninit::slice_assume_init_mut(uninit) + }; + + let EncapPush::Geneve(gen) = udp else { panic!() }; + + let (mut a, rest) = ValidUdp::parse(slice).unwrap(); + // ideally write out w/o looking at contents, be safer. + rest[0] = 0; + let (mut b, rest) = + ValidGeneve::parse(rest).unwrap(); + + a.set_source(gen.entropy); + a.set_destination(6081); + a.set_checksum(0); + a.set_length((pkt_len_old + 16) as u16); + + b.set_flags(GeneveFlags::empty()); + b.set_reserved(0); + b.set_protocol_type(0x6558); + b.set_vni(gen.vni.into()); + + // slice + }); + + core::mem::swap(&mut new_blk, &mut pkt); + pkt.extend_if_one(new_blk); + } + // we're in Tx for a ULP'd pkt -- this should NEVER happen. + opte::engine::port::ThinProcRes::PopEncap => unreachable!(), + opte::engine::port::ThinProcRes::Na => unreachable!(), + } + + if ip6_dst == ip6_src { + // todo. broken just now ig + // return guest_loopback(src_dev, pkt, vni); + opte::engine::err!("eh?"); + return ptr::null_mut(); + } + + let my_key = RouteKey { dst: ip6_dst, l4_hash: Some(f_hash) }; + let Route { src, dst, underlay_dev } = + src_dev.routes.next_hop(my_key, src_dev); + + // Get a pointer to the beginning of the outer frame and + // fill in the dst/src addresses before sending out the + // device. + let mblk = pkt.unwrap_mblk(); + let rptr = (*mblk).b_rptr; + ptr::copy(dst.as_ptr(), rptr, 6); + ptr::copy(src.as_ptr(), rptr.add(6), 6); + // Unwrap: We know the packet is good because we just + // unwrapped it above. + let new_pkt = MsgBlk::wrap_mblk(mblk).unwrap(); + + let new_bytes = + new_pkt.iter().map(|v| v.as_ref()).collect::>(); + + let szs = new_pkt + .iter() + .map(|v| v.as_ref().len()) + .collect::>(); + + // opte::engine::err!( + // "okay, we did pkt surgery: {:?} szs {:?}", + // new_bytes, szs + // ); + + underlay_dev.mch.tx_drop_on_no_desc2( + new_pkt, + hint, + MacTxFlags::empty(), + ); + + return ptr::null_mut(); + } + } + Err(e) => { + let mut bytes = vec![]; + pkt.iter_mut().for_each(|v| bytes.extend_from_slice(v)); + opte::engine::err!("NEW Rx bad packet: {:?} -> {:?}", e, bytes); + bad_packet_parse_probe( + Some(src_dev.port.name_cstr()), + Direction::Out, + mblk_addr, + &PacketError::Parse( + opte::engine::packet::ParseError::UnexpectedProtocol( + 99.into(), + ), + ), + ); + // return ptr::null_mut(); + } + }; + let pkt = pkt.as_pkt(); + let parser = src_dev.port.network().parser(); let mblk_addr = pkt.mblk_addr(); let mut pkt = match pkt.parse(Direction::Out, parser) { @@ -1845,7 +2024,7 @@ unsafe extern "C" fn xde_rx( // by the mch they're being targeted to. E.g., either build a list // of chains (port0, port1, ...), or hold tx until another // packet breaks the run targeting the same dest. - while let Some(pkt) = chain.pop_front() { + while let Some(pkt) = chain.pop_front2() { xde_rx_one(&mch, mrh, pkt); } } @@ -1854,8 +2033,98 @@ unsafe extern "C" fn xde_rx( unsafe fn xde_rx_one( mch: &MacClientHandle, mrh: *mut mac::mac_resource_handle, - pkt: Packet, + mut pkt: MsgBlk, ) { + let mblk_addr = pkt.mblk_addr(); + let pkt_len_old = pkt.byte_len(); + match Parsed2::parse(pkt.iter_mut(), Direction::In) { + Ok(mut p) => { + // opte::engine::err!("Successful parse."); + let devs = xde_devs.read(); + let h = p.meta.0.headers(); + let (vni, ether_dst) = match (&h.outer_encap, Some(&h.inner_eth)) { + (Some(ref geneve), Some(ref eth)) => { + (Vni::new(geneve.vni()).unwrap(), eth.destination()) + } + _ => { + opte::engine::err!("Wut"); + return; + } + }; + let Some(dev) = devs.iter().find(|x| { + x.vni == vni + && x.port.mac_addr().bytes() == ether_dst.as_bytes() + }) else { + // TODO add SDT probe + // TODO add stat + opte::engine::err!( + "[encap] no device found for vni: {} mac: {}", + vni, + ether_dst + ); + return; + }; + + let e_len = h.outer_eth.as_ref().map(|v| v.packet_length()); + let v_len = h.outer_v6.as_ref().map(|v| v.packet_length()); + let u_len = h.outer_udp.as_ref().map(|v| v.packet_length()); + let g_len = h.outer_encap.as_ref().map(|v| v.packet_length()); + + let pop_len: usize = [e_len, v_len, u_len, g_len] + .iter() + .map(|v| v.unwrap_or_default()) + .sum(); + + // opte::engine::err!("Want to pop: {}", pop_len); + + let port = &dev.port; + if let Ok(decision) = port.thin_process(Direction::In, &mut p) { + match decision { + opte::engine::port::ThinProcRes::PopEncap => { + let mut to_pop = pop_len; + for layer in pkt.iter_mut() { + let max_drop = layer.len(); + let will_drop = max_drop.min(to_pop); + layer.drop_front_bytes(will_drop); + to_pop -= will_drop; + + if to_pop == 0 { + break; + } + } + + // could theoretically have empty segments here. + // not an issue over NIC for now. + mac::mac_rx(dev.mh, mrh, pkt.unwrap_mblk()); + } + // we know this to be true given how we cfg opte + opte::engine::port::ThinProcRes::PushEncap(_, _, _) => { + unreachable!() + } + opte::engine::port::ThinProcRes::Na => unreachable!(), + } + return; + } + } + Err(e) => { + let mut bytes = vec![]; + pkt.iter().for_each(|v| bytes.extend_from_slice(v)); + // opte::engine::err!("NEW Rx bad packet: {:?} -> {:?}", e, bytes); + bad_packet_parse_probe( + None, + Direction::In, + mblk_addr, + &PacketError::Parse( + opte::engine::packet::ParseError::UnexpectedProtocol( + 99.into(), + ), + ), + ); + } + } + // opte::engine::err!("bk to basics."); + let pkt = pkt.as_pkt(); + // We must first parse the packet in order to determine where it // is to be delivered. let parser = VpcParser {}; From 7ba068bd802fec2b59a5e7e70583d1991d38fa8f Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 21 Aug 2024 16:06:03 +0100 Subject: [PATCH 004/115] Merge conflict errors. --- xde/src/dls/mod.rs | 27 +++++++++++++++++++++++++++ xde/src/xde.rs | 10 +++------- 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/xde/src/dls/mod.rs b/xde/src/dls/mod.rs index 6cab4783..46ad98ef 100644 --- a/xde/src/dls/mod.rs +++ b/xde/src/dls/mod.rs @@ -21,6 +21,7 @@ use illumos_sys_hdrs::c_int; use illumos_sys_hdrs::datalink_id_t; use illumos_sys_hdrs::uintptr_t; use illumos_sys_hdrs::ENOENT; +use opte::engine::ingot_packet::MsgBlk; use opte::engine::packet::Packet; use opte::engine::packet::PacketState; pub use sys::*; @@ -222,6 +223,32 @@ impl DlsStream { ) }; } + + pub fn tx_drop_on_no_desc2( + &self, + pkt: MsgBlk, + hint: uintptr_t, + flags: MacTxFlags, + ) { + let Some(inner) = self.inner.as_ref() else { + // XXX: probably handle or signal an error here. + return; + }; + // We must unwrap the raw `mblk_t` out of the `pkt` here, + // otherwise the mblk_t would be dropped at the end of this + // function along with `pkt`. + let mut raw_flags = flags.bits(); + raw_flags |= MAC_DROP_ON_NO_DESC; + unsafe { + // mac_tx(self.mch, pkt.unwrap_mblk(), hint, raw_flags, &mut ret_mp) + str_mdata_fastpath_put( + inner.dld_str.as_ptr(), + pkt.unwrap_mblk(), + hint, + raw_flags, + ) + }; + } } impl MacClient for DlsStream { diff --git a/xde/src/xde.rs b/xde/src/xde.rs index d03132fc..88db2e48 100644 --- a/xde/src/xde.rs +++ b/xde/src/xde.rs @@ -1541,7 +1541,8 @@ unsafe fn xde_mc_tx_one(src_dev: &XdeDev, mut pkt: MsgBlk) -> *mut mblk_t { let pkt_len_old = pkt.byte_len(); match Parsed2::parse(pkt.iter_mut(), Direction::Out) { Ok(mut p) => { - let mch = &src_dev.u1.mch; + // let mch = &src_dev.u1.mch; + let stream = &src_dev.u1.stream; let hint = 0; let port = &src_dev.port; let flow_id = p.flow; @@ -1666,12 +1667,7 @@ unsafe fn xde_mc_tx_one(src_dev: &XdeDev, mut pkt: MsgBlk) -> *mut mblk_t { .map(|v| v.as_ref().len()) .collect::>(); - // opte::engine::err!( - // "okay, we did pkt surgery: {:?} szs {:?}", - // new_bytes, szs - // ); - - underlay_dev.mch.tx_drop_on_no_desc2( + underlay_dev.stream.tx_drop_on_no_desc2( new_pkt, hint, MacTxFlags::empty(), From 759b3372a99748955f704281e254128b87d900db Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 21 Aug 2024 16:55:50 +0100 Subject: [PATCH 005/115] Attempt to minimise fastpath lock contention. --- lib/opte/src/engine/headers.rs | 2 +- lib/opte/src/engine/port.rs | 16 ++++++++++------ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/lib/opte/src/engine/headers.rs b/lib/opte/src/engine/headers.rs index 646c0217..fa844fd9 100644 --- a/lib/opte/src/engine/headers.rs +++ b/lib/opte/src/engine/headers.rs @@ -600,7 +600,7 @@ impl HeaderActionModify for UlpMeta { } /// The action to take for a particular header transposition. -#[derive(Clone, Debug, Default, Deserialize, Serialize)] +#[derive(Copy, Clone, Debug, Default, Deserialize, Serialize)] pub enum HeaderAction where P: PushAction + fmt::Debug, diff --git a/lib/opte/src/engine/port.rs b/lib/opte/src/engine/port.rs index b1cd4804..539fb1ab 100644 --- a/lib/opte/src/engine/port.rs +++ b/lib/opte/src/engine/port.rs @@ -499,7 +499,7 @@ pub struct UftEntry { pair: Option, /// The transformations to perform. - xforms: Transforms, + xforms: Arc, /// The port epoch upon which this entry was established. Used for /// invalidation when the rule set is updated. @@ -1232,13 +1232,14 @@ impl Port { let Some(a) = a else { // eh. It will get recirc'd for free... // opte::engine::err!("not found! Releasing!"); - drop(data); return Err(ProcessError::FlowTableFull { kind: "()", limit: 0, }); }; // opte::engine::err!("found!"); + let xforms = Arc::clone(&a.state().xforms); + drop(data); let mut hm = pkt.meta.0.headers_mut(); @@ -1246,7 +1247,7 @@ impl Port { let mut new_ip = None; let mut new_encap = None; // opte::engine::err!("xforms {:?}!", &a.state().xforms.hdr); - for xf in &a.state().xforms.hdr { + for xf in &xforms.hdr { // opte::engine::err!("xf..."); if let HeaderAction::Push(outer_eth, _) = &xf.outer_ether { new_eth = Some(outer_eth.clone()); @@ -1386,13 +1387,15 @@ impl Port { limit: 0, }); }; + let xforms = Arc::clone(&a.state().xforms); + drop(data); let mut hm = pkt.meta.0.headers_mut(); let mut pop_eth = false; let mut pop_ip = false; let mut pop_encap = false; - for xf in &a.state().xforms.hdr { + for xf in &xforms.hdr { // opte::engine::err!("xf..."); if let HeaderAction::Pop = &xf.outer_ether { pop_eth = true; @@ -2120,7 +2123,8 @@ impl Port { } let ufid_out = pkt.flow().mirror(); - let hte = UftEntry { pair: Some(ufid_out), xforms, epoch }; + let hte = + UftEntry { pair: Some(ufid_out), xforms: xforms.into(), epoch }; // Keep around the comment on the `None` arm #[allow(clippy::single_match)] @@ -2495,7 +2499,7 @@ impl Port { let mut xforms = Transforms::new(); let flow_before = *pkt.flow(); let res = self.layers_process(data, Out, pkt, &mut xforms, ameta); - let hte = UftEntry { pair: None, xforms, epoch }; + let hte = UftEntry { pair: None, xforms: xforms.into(), epoch }; match res { Ok(LayerResult::Allow) => { From 7b5100a70377baac997176523e6d5111595d92fc Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 21 Aug 2024 17:40:05 +0100 Subject: [PATCH 006/115] Nudge along CI a little bit --- .github/buildomat/jobs/bench.sh | 3 +++ .github/buildomat/jobs/opte-api.sh | 3 +++ .github/buildomat/jobs/opte-ioctl.sh | 3 +++ .github/buildomat/jobs/opte.sh | 3 +++ .github/buildomat/jobs/opteadm.sh | 3 +++ .github/buildomat/jobs/oxide-vpc.sh | 3 +++ .github/buildomat/jobs/p5p.sh | 3 +++ .github/buildomat/jobs/test.sh | 3 +++ .github/buildomat/jobs/xde.sh | 3 +++ 9 files changed, 27 insertions(+) diff --git a/.github/buildomat/jobs/bench.sh b/.github/buildomat/jobs/bench.sh index 458c8bd0..5456d9cd 100644 --- a/.github/buildomat/jobs/bench.sh +++ b/.github/buildomat/jobs/bench.sh @@ -7,6 +7,9 @@ #: output_rules = [ #: "=/work/bench-results.tgz", #: ] +#: access_repos = [ +#: "oxidecomputer/ingot", +#: ] #: #: [[publish]] #: series = "benchmark" diff --git a/.github/buildomat/jobs/opte-api.sh b/.github/buildomat/jobs/opte-api.sh index eb4d0a7b..c08c3ff5 100755 --- a/.github/buildomat/jobs/opte-api.sh +++ b/.github/buildomat/jobs/opte-api.sh @@ -5,6 +5,9 @@ #: target = "helios-2.0" #: rust_toolchain = "nightly-2024-05-12" #: output_rules = [] +#: access_repos = [ +#: "oxidecomputer/ingot", +#: ] #: set -o errexit diff --git a/.github/buildomat/jobs/opte-ioctl.sh b/.github/buildomat/jobs/opte-ioctl.sh index fdc61df0..5f2adf40 100755 --- a/.github/buildomat/jobs/opte-ioctl.sh +++ b/.github/buildomat/jobs/opte-ioctl.sh @@ -5,6 +5,9 @@ #: target = "helios-2.0" #: rust_toolchain = "nightly-2024-05-12" #: output_rules = [] +#: access_repos = [ +#: "oxidecomputer/ingot", +#: ] #: set -o errexit diff --git a/.github/buildomat/jobs/opte.sh b/.github/buildomat/jobs/opte.sh index a04d14a5..a4668e9f 100755 --- a/.github/buildomat/jobs/opte.sh +++ b/.github/buildomat/jobs/opte.sh @@ -5,6 +5,9 @@ #: target = "helios-2.0" #: rust_toolchain = "nightly-2024-05-12" #: output_rules = [] +#: access_repos = [ +#: "oxidecomputer/ingot", +#: ] #: set -o errexit diff --git a/.github/buildomat/jobs/opteadm.sh b/.github/buildomat/jobs/opteadm.sh index 56133dde..18193b98 100755 --- a/.github/buildomat/jobs/opteadm.sh +++ b/.github/buildomat/jobs/opteadm.sh @@ -10,6 +10,9 @@ #: "=/work/release/opteadm", #: "=/work/release/opteadm.release.sha256", #: ] +#: access_repos = [ +#: "oxidecomputer/ingot", +#: ] #: set -o errexit diff --git a/.github/buildomat/jobs/oxide-vpc.sh b/.github/buildomat/jobs/oxide-vpc.sh index 65e97ab9..da3cc073 100755 --- a/.github/buildomat/jobs/oxide-vpc.sh +++ b/.github/buildomat/jobs/oxide-vpc.sh @@ -5,6 +5,9 @@ #: target = "helios-2.0" #: rust_toolchain = "nightly-2024-05-12" #: output_rules = [] +#: access_repos = [ +#: "oxidecomputer/ingot", +#: ] #: set -o errexit diff --git a/.github/buildomat/jobs/p5p.sh b/.github/buildomat/jobs/p5p.sh index 1d51caff..20e5c65c 100755 --- a/.github/buildomat/jobs/p5p.sh +++ b/.github/buildomat/jobs/p5p.sh @@ -8,6 +8,9 @@ #: "=/out/opte.p5p", #: "=/out/opte.p5p.sha256", #: ] +#: access_repos = [ +#: "oxidecomputer/ingot", +#: ] #: #: [[publish]] #: series = "repo" diff --git a/.github/buildomat/jobs/test.sh b/.github/buildomat/jobs/test.sh index 00262f91..f678ca2f 100755 --- a/.github/buildomat/jobs/test.sh +++ b/.github/buildomat/jobs/test.sh @@ -7,6 +7,9 @@ #: output_rules = [ #: "/work/*.log", #: ] +#: access_repos = [ +#: "oxidecomputer/ingot", +#: ] #: #: [dependencies.xde] #: job = "opte-xde" diff --git a/.github/buildomat/jobs/xde.sh b/.github/buildomat/jobs/xde.sh index af1aa68b..83986624 100755 --- a/.github/buildomat/jobs/xde.sh +++ b/.github/buildomat/jobs/xde.sh @@ -16,6 +16,9 @@ #: "=/work/test/loopback", #: "=/work/xde.conf", #: ] +#: access_repos = [ +#: "oxidecomputer/ingot", +#: ] #: #: [[publish]] #: series = "module" From 17b5c022e84e8734d5a52f485be10eb4ca95426a Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 21 Aug 2024 17:53:07 +0100 Subject: [PATCH 007/115] Wow, that was wasteful. --- xde/src/xde.rs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/xde/src/xde.rs b/xde/src/xde.rs index 88db2e48..65f086f3 100644 --- a/xde/src/xde.rs +++ b/xde/src/xde.rs @@ -1659,14 +1659,6 @@ unsafe fn xde_mc_tx_one(src_dev: &XdeDev, mut pkt: MsgBlk) -> *mut mblk_t { // unwrapped it above. let new_pkt = MsgBlk::wrap_mblk(mblk).unwrap(); - let new_bytes = - new_pkt.iter().map(|v| v.as_ref()).collect::>(); - - let szs = new_pkt - .iter() - .map(|v| v.as_ref().len()) - .collect::>(); - underlay_dev.stream.tx_drop_on_no_desc2( new_pkt, hint, From b905c8ce5facd9202373fa77f77397be6f1cb9a7 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 21 Aug 2024 17:58:26 +0100 Subject: [PATCH 008/115] ...CI? --- Cargo.lock | 6 +++--- Cargo.toml | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a26622f2..56c7d2a5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -888,7 +888,7 @@ dependencies = [ [[package]] name = "ingot" version = "0.1.0" -source = "git+ssh://git@github.com/oxidecomputer/ingot.git?rev=044d62931a912589cb99890f193f00720841c1b9#044d62931a912589cb99890f193f00720841c1b9" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=044d62931a912589cb99890f193f00720841c1b9#044d62931a912589cb99890f193f00720841c1b9" dependencies = [ "bitflags 2.6.0", "ingot-macros", @@ -903,7 +903,7 @@ dependencies = [ [[package]] name = "ingot-macros" version = "0.1.0" -source = "git+ssh://git@github.com/oxidecomputer/ingot.git?rev=044d62931a912589cb99890f193f00720841c1b9#044d62931a912589cb99890f193f00720841c1b9" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=044d62931a912589cb99890f193f00720841c1b9#044d62931a912589cb99890f193f00720841c1b9" dependencies = [ "darling", "prettyplease", @@ -916,7 +916,7 @@ dependencies = [ [[package]] name = "ingot-types" version = "0.1.0" -source = "git+ssh://git@github.com/oxidecomputer/ingot.git?rev=044d62931a912589cb99890f193f00720841c1b9#044d62931a912589cb99890f193f00720841c1b9" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=044d62931a912589cb99890f193f00720841c1b9#044d62931a912589cb99890f193f00720841c1b9" dependencies = [ "heapless", "macaddr", diff --git a/Cargo.toml b/Cargo.toml index 6a70c92e..88625f29 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,8 +50,8 @@ ctor = "0.2" darling = "0.20" dyn-clone = "1.0" heapless = "0.8" -ingot = { git = "ssh://git@github.com/oxidecomputer/ingot.git", rev = "044d62931a912589cb99890f193f00720841c1b9"} -ingot-types = { git = "ssh://git@github.com/oxidecomputer/ingot.git", rev = "044d62931a912589cb99890f193f00720841c1b9"} +ingot = { git = "https://github.com/oxidecomputer/ingot.git", rev = "044d62931a912589cb99890f193f00720841c1b9"} +ingot-types = { git = "https://github.com/oxidecomputer/ingot.git", rev = "044d62931a912589cb99890f193f00720841c1b9"} ipnetwork = { version = "0.20", default-features = false } itertools = { version = "0.13", default-features = false } libc = "0.2" From 470f2bc7b6168051c150df0c2cae61c30dc3e631 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 21 Aug 2024 18:08:30 +0100 Subject: [PATCH 009/115] ...CI?? --- crates/opte-api/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/opte-api/src/lib.rs b/crates/opte-api/src/lib.rs index 9298dbfe..5ead29a8 100644 --- a/crates/opte-api/src/lib.rs +++ b/crates/opte-api/src/lib.rs @@ -47,7 +47,7 @@ pub use ulp::*; /// /// We rely on CI and the check-api-version.sh script to verify that /// this number is incremented anytime the oxide-api code changes. -pub const API_VERSION: u64 = 33; +pub const API_VERSION: u64 = 34; /// Major version of the OPTE package. pub const MAJOR_VERSION: u64 = 0; From 6345ba6c3d85f831efd000df44c9942785835150 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Fri, 23 Aug 2024 18:57:54 +0100 Subject: [PATCH 010/115] Don't recompute flowhash for UFTs += 100 Mbps --- Cargo.toml | 1 + lib/opte/src/engine/ingot_packet.rs | 5 +++-- lib/opte/src/engine/packet.rs | 9 +++++++++ lib/opte/src/engine/port.rs | 30 ++++++++++++++++++++++++++--- lib/oxide-vpc/src/engine/overlay.rs | 2 +- xde/src/xde.rs | 9 ++++----- 6 files changed, 45 insertions(+), 11 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 88625f29..62322a72 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -84,3 +84,4 @@ poptrie = { git = "https://github.com/oxidecomputer/poptrie", branch = "multipat [profile.release] debug = 2 lto = true +# codegen-units = 1 diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index 4fb81ebe..71c300be 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -435,6 +435,7 @@ pub struct Parsed2 { pub meta: PacketMeta3, pub flow: InnerFlowId, pub body_csum: Option, + pub l4_hash: Option, // body: BodyInfo, // body_modified: bool, } @@ -535,7 +536,7 @@ impl Parsed2 let pseudo_csum = match meta.0.headers().inner_eth.ethertype() { // ARP 0x0806 => { - return Ok(Self { meta, body_csum: None, flow }); + return Ok(Self { meta, body_csum: None, flow, l4_hash: None }); } // Ipv4 0x0800 => { @@ -576,6 +577,6 @@ impl Parsed2 v }); - Ok(Self { meta, flow, body_csum }) + Ok(Self { meta, flow, body_csum, l4_hash: None }) } } diff --git a/lib/opte/src/engine/packet.rs b/lib/opte/src/engine/packet.rs index c2f60899..b20d1a63 100644 --- a/lib/opte/src/engine/packet.rs +++ b/lib/opte/src/engine/packet.rs @@ -52,6 +52,7 @@ use super::NetworkParser; use crate::d_error::DError; use core::fmt; use core::fmt::Display; +use core::hash::Hash; use core::marker::PhantomData; use core::ops::Deref; use core::ops::DerefMut; @@ -136,6 +137,14 @@ pub struct InnerFlowId { pub dst_port: u16, } +impl InnerFlowId { + pub fn crc32(&self) -> u32 { + let mut hasher = Hasher::new(); + self.hash(&mut hasher); + hasher.finalize() + } +} + impl Default for InnerFlowId { fn default() -> Self { FLOW_ID_DEFAULT diff --git a/lib/opte/src/engine/port.rs b/lib/opte/src/engine/port.rs index 539fb1ab..45062fa7 100644 --- a/lib/opte/src/engine/port.rs +++ b/lib/opte/src/engine/port.rs @@ -69,6 +69,7 @@ use alloc::sync::Arc; use alloc::vec::Vec; use core::fmt; use core::fmt::Display; +use core::hash::Hash; use core::num::NonZeroU32; use core::result; use core::str::FromStr; @@ -501,6 +502,9 @@ pub struct UftEntry { /// The transformations to perform. xforms: Arc, + /// Cached flow hash to speed up route selection. + l4_hash: u32, + /// The port epoch upon which this entry was established. Used for /// invalidation when the rule set is updated. epoch: u64, @@ -1237,8 +1241,13 @@ impl Port { limit: 0, }); }; + pkt.l4_hash = Some(a.state().l4_hash); // opte::engine::err!("found!"); let xforms = Arc::clone(&a.state().xforms); + Self::update_stats_out( + &mut data.stats.vals, + &Ok(ProcessResult::Modified), + ); drop(data); let mut hm = pkt.meta.0.headers_mut(); @@ -1387,7 +1396,12 @@ impl Port { limit: 0, }); }; + pkt.l4_hash = Some(a.state().l4_hash); let xforms = Arc::clone(&a.state().xforms); + Self::update_stats_in( + &mut data.stats.vals, + &Ok(ProcessResult::Modified), + ); drop(data); let mut hm = pkt.meta.0.headers_mut(); @@ -2123,8 +2137,12 @@ impl Port { } let ufid_out = pkt.flow().mirror(); - let hte = - UftEntry { pair: Some(ufid_out), xforms: xforms.into(), epoch }; + let hte = UftEntry { + pair: Some(ufid_out), + xforms: xforms.into(), + epoch, + l4_hash: ufid_in.crc32(), + }; // Keep around the comment on the `None` arm #[allow(clippy::single_match)] @@ -2499,7 +2517,13 @@ impl Port { let mut xforms = Transforms::new(); let flow_before = *pkt.flow(); let res = self.layers_process(data, Out, pkt, &mut xforms, ameta); - let hte = UftEntry { pair: None, xforms: xforms.into(), epoch }; + // XXXX: may be hashing the wrong thing. + let hte = UftEntry { + pair: None, + xforms: xforms.into(), + epoch, + l4_hash: flow_before.crc32(), + }; match res { Ok(LayerResult::Allow) => { diff --git a/lib/oxide-vpc/src/engine/overlay.rs b/lib/oxide-vpc/src/engine/overlay.rs index 730ac41e..b6ed4778 100644 --- a/lib/oxide-vpc/src/engine/overlay.rs +++ b/lib/oxide-vpc/src/engine/overlay.rs @@ -349,7 +349,7 @@ impl StaticAction for EncapAction { outer_encap: HeaderAction::Push( EncapPush::from(GenevePush { vni: phys_target.vni, - entropy: 7777, + entropy: flow_id.crc32() as u16, }), PhantomData, ), diff --git a/xde/src/xde.rs b/xde/src/xde.rs index 65f086f3..086b0310 100644 --- a/xde/src/xde.rs +++ b/xde/src/xde.rs @@ -1547,13 +1547,10 @@ unsafe fn xde_mc_tx_one(src_dev: &XdeDev, mut pkt: MsgBlk) -> *mut mblk_t { let port = &src_dev.port; let flow_id = p.flow; - let mut hasher = Hasher::new(); - flow_id.hash(&mut hasher); - let f_hash = hasher.finalize(); - // TODO: emit hdr, reuse cksum, actually send... let mut ip6_src = Default::default(); let mut ip6_dst = Default::default(); + let mut f_hash = None; if let Ok(decision) = port.thin_process(Direction::Out, &mut p) { match decision { opte::engine::port::ThinProcRes::PushEncap( @@ -1561,6 +1558,8 @@ unsafe fn xde_mc_tx_one(src_dev: &XdeDev, mut pkt: MsgBlk) -> *mut mblk_t { ip, udp, ) => { + f_hash = p.l4_hash; + // TODO: generate methods to fill a maybeuninit. // total bytes: ETH 14, V6 40, UDP 8, GENEVE 8 let new_hdrs = 14 + 40 + 8 + 8; @@ -1644,7 +1643,7 @@ unsafe fn xde_mc_tx_one(src_dev: &XdeDev, mut pkt: MsgBlk) -> *mut mblk_t { return ptr::null_mut(); } - let my_key = RouteKey { dst: ip6_dst, l4_hash: Some(f_hash) }; + let my_key = RouteKey { dst: ip6_dst, l4_hash: f_hash }; let Route { src, dst, underlay_dev } = src_dev.routes.next_hop(my_key, src_dev); From fb4f9c1e522269f8372d6838151c499827ec6473 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Fri, 23 Aug 2024 19:33:36 +0100 Subject: [PATCH 011/115] Reintroduce headroom for ETH alignment. --- Cargo.toml | 1 - xde/src/xde.rs | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 62322a72..88625f29 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -84,4 +84,3 @@ poptrie = { git = "https://github.com/oxidecomputer/poptrie", branch = "multipat [profile.release] debug = 2 lto = true -# codegen-units = 1 diff --git a/xde/src/xde.rs b/xde/src/xde.rs index 086b0310..dbac2bd6 100644 --- a/xde/src/xde.rs +++ b/xde/src/xde.rs @@ -1550,7 +1550,7 @@ unsafe fn xde_mc_tx_one(src_dev: &XdeDev, mut pkt: MsgBlk) -> *mut mblk_t { // TODO: emit hdr, reuse cksum, actually send... let mut ip6_src = Default::default(); let mut ip6_dst = Default::default(); - let mut f_hash = None; + let f_hash; if let Ok(decision) = port.thin_process(Direction::Out, &mut p) { match decision { opte::engine::port::ThinProcRes::PushEncap( @@ -1564,7 +1564,7 @@ unsafe fn xde_mc_tx_one(src_dev: &XdeDev, mut pkt: MsgBlk) -> *mut mblk_t { // total bytes: ETH 14, V6 40, UDP 8, GENEVE 8 let new_hdrs = 14 + 40 + 8 + 8; let mut new_blk = - MsgBlk::new_with_headroom(0, new_hdrs); + MsgBlk::new_with_headroom(2, new_hdrs); new_blk.write(14, |uninit| { let slice = unsafe { From 6200a294d47c917aab493b56ebe715576e8a3a1e Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Thu, 29 Aug 2024 12:29:39 +0100 Subject: [PATCH 012/115] Test fixups, post-V6EH world. We don't actually lose any real-terms perf, go us. --- Cargo.lock | 66 ++------------- Cargo.toml | 4 +- lib/opte/src/engine/ingot_packet.rs | 103 ++++++++++++----------- lib/opte/src/engine/port.rs | 42 ++++----- lib/oxide-vpc/tests/integration_tests.rs | 6 +- xde/src/xde.rs | 45 +++++----- 6 files changed, 113 insertions(+), 153 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 56c7d2a5..c283d822 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -795,12 +795,6 @@ version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd" -[[package]] -name = "glob" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" - [[package]] name = "goblin" version = "0.8.2" @@ -888,24 +882,22 @@ dependencies = [ [[package]] name = "ingot" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=044d62931a912589cb99890f193f00720841c1b9#044d62931a912589cb99890f193f00720841c1b9" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=053c02f34c5e43d0132d6446caf00f4f8fb13be1#053c02f34c5e43d0132d6446caf00f4f8fb13be1" dependencies = [ "bitflags 2.6.0", "ingot-macros", "ingot-types", "macaddr", - "pnet_macros", - "pnet_macros_support", - "pnet_packet", "zerocopy 0.8.0-alpha.16", ] [[package]] name = "ingot-macros" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=044d62931a912589cb99890f193f00720841c1b9#044d62931a912589cb99890f193f00720841c1b9" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=053c02f34c5e43d0132d6446caf00f4f8fb13be1#053c02f34c5e43d0132d6446caf00f4f8fb13be1" dependencies = [ "darling", + "itertools 0.13.0", "prettyplease", "proc-macro2", "quote", @@ -916,11 +908,11 @@ dependencies = [ [[package]] name = "ingot-types" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=044d62931a912589cb99890f193f00720841c1b9#044d62931a912589cb99890f193f00720841c1b9" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=053c02f34c5e43d0132d6446caf00f4f8fb13be1#053c02f34c5e43d0132d6446caf00f4f8fb13be1" dependencies = [ "heapless", + "ingot-macros", "macaddr", - "pnet_macros_support", "zerocopy 0.8.0-alpha.16", ] @@ -1176,12 +1168,6 @@ dependencies = [ "libc", ] -[[package]] -name = "no-std-net" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43794a0ace135be66a25d3ae77d41b91615fb68ae937f904090203e81f755b65" - [[package]] name = "nom" version = "7.1.3" @@ -1527,48 +1513,6 @@ dependencies = [ "plotters-backend", ] -[[package]] -name = "pnet_base" -version = "0.35.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffc190d4067df16af3aba49b3b74c469e611cad6314676eaf1157f31aa0fb2f7" -dependencies = [ - "no-std-net", -] - -[[package]] -name = "pnet_macros" -version = "0.35.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13325ac86ee1a80a480b0bc8e3d30c25d133616112bb16e86f712dcf8a71c863" -dependencies = [ - "proc-macro2", - "quote", - "regex", - "syn 2.0.75", -] - -[[package]] -name = "pnet_macros_support" -version = "0.35.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eed67a952585d509dd0003049b1fc56b982ac665c8299b124b90ea2bdb3134ab" -dependencies = [ - "pnet_base", -] - -[[package]] -name = "pnet_packet" -version = "0.35.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c96ebadfab635fcc23036ba30a7d33a80c39e8461b8bd7dc7bb186acb96560f" -dependencies = [ - "glob", - "pnet_base", - "pnet_macros", - "pnet_macros_support", -] - [[package]] name = "poptrie" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 88625f29..6b7a6f2a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,8 +50,8 @@ ctor = "0.2" darling = "0.20" dyn-clone = "1.0" heapless = "0.8" -ingot = { git = "https://github.com/oxidecomputer/ingot.git", rev = "044d62931a912589cb99890f193f00720841c1b9"} -ingot-types = { git = "https://github.com/oxidecomputer/ingot.git", rev = "044d62931a912589cb99890f193f00720841c1b9"} +ingot = { git = "https://github.com/oxidecomputer/ingot.git", rev = "053c02f34c5e43d0132d6446caf00f4f8fb13be1"} +ingot-types = { git = "https://github.com/oxidecomputer/ingot.git", rev = "053c02f34c5e43d0132d6446caf00f4f8fb13be1"} ipnetwork = { version = "0.20", default-features = false } itertools = { version = "0.13", default-features = false } libc = "0.2" diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index 71c300be..ad924d0c 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -8,33 +8,34 @@ use core::slice; use illumos_sys_hdrs as ddi; use illumos_sys_hdrs::mblk_t; -use ingot::types::Chunk; +use ingot::ethernet::EthernetPacket; +use ingot::ethernet::EthernetRef; +use ingot::ethernet::Ethertype; +use ingot::ethernet::ValidEthernet; +use ingot::example_chain::Ulp; +use ingot::example_chain::L3; +use ingot::example_chain::L4; +use ingot::geneve::GenevePacket; +use ingot::icmp::IcmpV4Ref; +use ingot::icmp::IcmpV6Ref; +use ingot::ip::IpProtocol; +use ingot::ip::Ipv4; +use ingot::ip::Ipv4Ref; +use ingot::ip::Ipv6Packet; +use ingot::ip::Ipv6Ref; +use ingot::tcp::TcpPacket; +use ingot::tcp::TcpRef; use ingot::types::HasView; +use ingot::types::Header; +use ingot::types::HeaderStack; use ingot::types::ParseControl; use ingot::types::ParseError as IngotParseErr; +use ingot::types::ParseResult; use ingot::types::Parsed as IngotParsed; use ingot::types::Read; -use ingot::EthernetPacket; -use ingot::EthernetRef; -use ingot::GenevePacket; -use ingot::IcmpV4Ref; -use ingot::IcmpV6Ref; -use ingot::Ipv4; -use ingot::Ipv4Ref; -use ingot::Ipv6Packet; -use ingot::Ipv6Ref; +use ingot::udp::UdpPacket; +use ingot::udp::UdpRef; use ingot::Parse; -use ingot::TcpPacket; -use ingot::TcpRef; -use ingot::UdpPacket; -use ingot::UdpRef; -use ingot::Ulp; -use ingot::ValidEthernet; -use ingot::L3; -use ingot::L4; -use ingot_types::Header; -use ingot_types::HeaderStack; -use ingot_types::ParseResult; use opte_api::Direction; use zerocopy::ByteSlice; use zerocopy::ByteSliceMut; @@ -48,21 +49,13 @@ use super::packet::Initialized; use super::packet::Packet; use illumos_sys_hdrs::uintptr_t; -// NOTE: these are not being handled correctly and need to be -// stealth-imported in ingot. -use ingot_types::HeaderParse; -use ingot_types::NextLayer; -use ingot_types::ParseChoice; -// (also, need to cleanup ::ingot_types vs. ::ingot::types -// imports, somehow) - use super::checksum::Checksum; use super::packet::AddrPair; use super::packet::InnerFlowId; use super::packet::FLOW_ID_DEFAULT; #[derive(Parse)] -pub struct OpteIn { +pub struct OpteIn { pub outer_eth: EthernetPacket, #[ingot(from = "L3")] pub outer_v6: Ipv6Packet, @@ -79,7 +72,7 @@ pub struct OpteIn { #[inline] fn exit_on_arp(eth: &EthernetPacket) -> ParseControl { - if eth.ethertype() == 0x0806 { + if eth.ethertype() == Ethertype::ARP { ParseControl::Accept } else { ParseControl::Continue @@ -87,7 +80,7 @@ fn exit_on_arp(eth: &EthernetPacket) -> ParseControl { } #[derive(Parse)] -pub struct OpteOut { +pub struct OpteOut { #[ingot(control = exit_on_arp)] pub inner_eth: EthernetPacket, pub inner_l3: Option>, @@ -297,7 +290,7 @@ impl Drop for MsgBlk { } } -pub struct OpteUnified { +pub struct OpteUnified { pub outer_eth: Option>, pub outer_v6: Option>, pub outer_udp: Option>, @@ -308,7 +301,7 @@ pub struct OpteUnified { pub inner_ulp: Option>, } -impl From> for OpteUnified { +impl From> for OpteUnified { fn from(value: OpteIn) -> Self { Self { outer_eth: Some(value.outer_eth), @@ -322,7 +315,7 @@ impl From> for OpteUnified { } } -impl From> for OpteUnified { +impl From> for OpteUnified { fn from(value: OpteOut) -> Self { Self { outer_eth: None, @@ -341,16 +334,18 @@ pub struct PacketMeta3( ); impl PacketMeta3 { - pub fn inner_l3(&self) -> Option<&ingot::L3> { + pub fn inner_l3(&self) -> Option<&ingot::example_chain::L3> { self.0.headers().inner_l3.as_ref() } - pub fn inner_ulp(&self) -> Option<&ingot::Ulp> { + pub fn inner_ulp(&self) -> Option<&ingot::example_chain::Ulp> { self.0.headers().inner_ulp.as_ref() } } -fn actual_src_port(chunk: &ingot::Ulp) -> Option { +fn actual_src_port( + chunk: &ingot::example_chain::Ulp, +) -> Option { match chunk { Ulp::Tcp(pkt) => Some(pkt.source()), Ulp::Udp(pkt) => Some(pkt.source()), @@ -358,7 +353,9 @@ fn actual_src_port(chunk: &ingot::Ulp) -> Option { } } -fn actual_dst_port(chunk: &ingot::Ulp) -> Option { +fn actual_dst_port( + chunk: &ingot::example_chain::Ulp, +) -> Option { match chunk { Ulp::Tcp(pkt) => Some(pkt.destination()), Ulp::Udp(pkt) => Some(pkt.destination()), @@ -366,7 +363,9 @@ fn actual_dst_port(chunk: &ingot::Ulp) -> Option { } } -fn pseudo_port(chunk: &ingot::Ulp) -> Option { +fn pseudo_port( + chunk: &ingot::example_chain::Ulp, +) -> Option { match chunk { Ulp::IcmpV4(pkt) if pkt.ty() == 0 || pkt.ty() == 3 => { Some(u16::from_be_bytes(pkt.rest_of_hdr()[..2].try_into().unwrap())) @@ -382,14 +381,14 @@ impl From<&PacketMeta3> for InnerFlowId { fn from(meta: &PacketMeta3) -> Self { let (proto, addrs) = match meta.inner_l3() { Some(L3::Ipv4(pkt)) => ( - pkt.protocol(), + pkt.protocol().0, AddrPair::V4 { src: pkt.source().into(), dst: pkt.destination().into(), }, ), Some(L3::Ipv6(pkt)) => ( - pkt.next_header(), + pkt.next_header().0, AddrPair::V6 { src: pkt.source().into(), dst: pkt.destination().into(), @@ -440,7 +439,7 @@ pub struct Parsed2 { // body_modified: bool, } -fn csum_minus_hdr(ulp: &Ulp) -> Option { +fn csum_minus_hdr(ulp: &Ulp) -> Option { match ulp { Ulp::IcmpV4(icmp) => { if icmp.checksum() == 0 { @@ -487,7 +486,13 @@ fn csum_minus_hdr(ulp: &Ulp) -> Option { csum.sub_bytes(&b[0..16]); csum.sub_bytes(&b[18..]); - csum.sub_bytes(t.1.as_ref()); + + // TODO: bad bound? + // csum.sub_bytes(t.1.as_ref()); + csum.sub_bytes(match &t.1 { + ingot_types::Packet::Repr(v) => &v[..], + ingot_types::Packet::Raw(v) => &v[..], + }); Some(csum) } @@ -535,32 +540,32 @@ impl Parsed2 let pseudo_csum = match meta.0.headers().inner_eth.ethertype() { // ARP - 0x0806 => { + Ethertype::ARP => { return Ok(Self { meta, body_csum: None, flow, l4_hash: None }); } // Ipv4 - 0x0800 => { + Ethertype::IPV4 => { let h = meta.0.headers(); let mut pseudo_hdr_bytes = [0u8; 12]; let Some(L3::Ipv4(ref v4)) = h.inner_l3 else { panic!() }; pseudo_hdr_bytes[0..4].copy_from_slice(&v4.source().octets()); pseudo_hdr_bytes[4..8] .copy_from_slice(&v4.destination().octets()); - pseudo_hdr_bytes[9] = v4.protocol(); + pseudo_hdr_bytes[9] = v4.protocol().0; let ulp_len = v4.total_len() - 4 * (v4.ihl() as u16); pseudo_hdr_bytes[10..].copy_from_slice(&ulp_len.to_be_bytes()); Checksum::compute(&pseudo_hdr_bytes) } // Ipv6 - 0x86dd => { + Ethertype::IPV6 => { let h = meta.0.headers(); let mut pseudo_hdr_bytes = [0u8; 40]; let Some(L3::Ipv6(ref v6)) = h.inner_l3 else { panic!() }; pseudo_hdr_bytes[0..16].copy_from_slice(&v6.source().octets()); pseudo_hdr_bytes[16..32] .copy_from_slice(&v6.destination().octets()); - pseudo_hdr_bytes[39] = v6.next_header(); + pseudo_hdr_bytes[39] = v6.next_header().0; let ulp_len = v6.payload_len() as u32; pseudo_hdr_bytes[32..36] .copy_from_slice(&ulp_len.to_be_bytes()); diff --git a/lib/opte/src/engine/port.rs b/lib/opte/src/engine/port.rs index 45062fa7..23c42f11 100644 --- a/lib/opte/src/engine/port.rs +++ b/lib/opte/src/engine/port.rs @@ -77,17 +77,17 @@ use core::sync::atomic::AtomicU64; use core::sync::atomic::Ordering::SeqCst; #[cfg(all(not(feature = "std"), not(test)))] use illumos_sys_hdrs::uintptr_t; -use ingot::EthernetMut; -use ingot::IcmpV4Mut; -use ingot::IcmpV4Ref; -use ingot::IcmpV6Mut; -use ingot::IcmpV6Ref; -use ingot::Ipv4Mut; -use ingot::Ipv6Mut; -use ingot::TcpFlags; -use ingot::TcpMut; -use ingot::UdpMut; -use ingot::Ulp; +use ingot::ethernet::EthernetMut; +use ingot::example_chain::Ulp; +use ingot::icmp::IcmpV4Mut; +use ingot::icmp::IcmpV4Ref; +use ingot::icmp::IcmpV6Mut; +use ingot::icmp::IcmpV6Ref; +use ingot::ip::Ipv4Mut; +use ingot::ip::Ipv6Mut; +use ingot::tcp::TcpFlags; +use ingot::tcp::TcpMut; +use ingot::udp::UdpMut; use ingot_types::Read; use kstat_macro::KStatProvider; use opte_api::Direction; @@ -1278,8 +1278,9 @@ impl Port { if let HeaderAction::Modify(m, _) = &xf.inner_ip { match m { super::headers::IpMod::Ip4(v4) => { - let Some(ingot::L3::Ipv4(ref mut v4_t)) = - hm.inner_l3 + let Some(ingot::example_chain::L3::Ipv4( + ref mut v4_t, + )) = hm.inner_l3 else { return Err(ProcessError::FlowTableFull { kind: "()", @@ -1296,8 +1297,9 @@ impl Port { } } super::headers::IpMod::Ip6(v6) => { - let Some(ingot::L3::Ipv6(ref mut v6_t)) = - hm.inner_l3 + let Some(ingot::example_chain::L3::Ipv6( + ref mut v6_t, + )) = hm.inner_l3 else { return Err(ProcessError::FlowTableFull { kind: "()", @@ -1431,8 +1433,9 @@ impl Port { if let HeaderAction::Modify(m, _) = &xf.inner_ip { match m { super::headers::IpMod::Ip4(v4) => { - let Some(ingot::L3::Ipv4(ref mut v4_t)) = - hm.inner_l3 + let Some(ingot::example_chain::L3::Ipv4( + ref mut v4_t, + )) = hm.inner_l3 else { return Err(ProcessError::FlowTableFull { kind: "()", @@ -1449,8 +1452,9 @@ impl Port { } } super::headers::IpMod::Ip6(v6) => { - let Some(ingot::L3::Ipv6(ref mut v6_t)) = - hm.inner_l3 + let Some(ingot::example_chain::L3::Ipv6( + ref mut v6_t, + )) = hm.inner_l3 else { return Err(ProcessError::FlowTableFull { kind: "()", diff --git a/lib/oxide-vpc/tests/integration_tests.rs b/lib/oxide-vpc/tests/integration_tests.rs index a55958f4..4fbd4393 100644 --- a/lib/oxide-vpc/tests/integration_tests.rs +++ b/lib/oxide-vpc/tests/integration_tests.rs @@ -559,7 +559,7 @@ fn guest_to_guest() { match meta.outer.encap.as_ref() { Some(EncapMeta::Geneve(geneve)) => { - assert_eq!(geneve.entropy, 7777); + assert_eq!(geneve.entropy, 12700); assert_eq!(geneve.vni, Vni::new(g1_cfg.vni).unwrap()); } @@ -776,7 +776,7 @@ fn guest_to_internet_ipv4() { match meta.outer.encap.as_ref() { Some(EncapMeta::Geneve(geneve)) => { - assert_eq!(geneve.entropy, 7777); + assert_eq!(geneve.entropy, 24329); } None => panic!("expected outer Geneve metadata"), @@ -901,7 +901,7 @@ fn guest_to_internet_ipv6() { match meta.outer.encap.as_ref() { Some(EncapMeta::Geneve(geneve)) => { - assert_eq!(geneve.entropy, 7777); + assert_eq!(geneve.entropy, 63246); } None => panic!("expected outer Geneve metadata"), diff --git a/xde/src/xde.rs b/xde/src/xde.rs index dbac2bd6..f6d01aef 100644 --- a/xde/src/xde.rs +++ b/xde/src/xde.rs @@ -44,19 +44,20 @@ use core::ptr::addr_of_mut; use core::time::Duration; use crc32fast::Hasher; use illumos_sys_hdrs::*; +use ingot::ethernet::EthernetMut; +use ingot::ethernet::EthernetRef; +use ingot::ethernet::ValidEthernet; +use ingot::geneve::GeneveFlags; +use ingot::geneve::GeneveMut; +use ingot::geneve::GeneveRef; +use ingot::geneve::ValidGeneve; +use ingot::ip::IpProtocol; +use ingot::ip::Ipv6Mut; +use ingot::ip::ValidIpv6; use ingot::types::Header; use ingot::types::HeaderParse; -use ingot::EthernetMut; -use ingot::EthernetRef; -use ingot::GeneveFlags; -use ingot::GeneveMut; -use ingot::GeneveRef; -use ingot::Ipv6Mut; -use ingot::UdpMut; -use ingot::ValidEthernet; -use ingot::ValidGeneve; -use ingot::ValidIpv6; -use ingot::ValidUdp; +use ingot::udp::UdpMut; +use ingot::udp::ValidUdp; use opte::api::ClearXdeUnderlayReq; use opte::api::CmdOk; use opte::api::Direction; @@ -1570,11 +1571,13 @@ unsafe fn xde_mc_tx_one(src_dev: &XdeDev, mut pkt: MsgBlk) -> *mut mblk_t { let slice = unsafe { MaybeUninit::slice_assume_init_mut(uninit) }; - let (mut a, _) = + let (mut a, ..) = ValidEthernet::parse(slice).unwrap(); a.set_source(eth.src.bytes().into()); a.set_destination(eth.dst.bytes().into()); - a.set_ethertype(eth.ether_type.into()); + a.set_ethertype(ingot::ethernet::Ethertype( + eth.ether_type.into(), + )); // slice }); @@ -1585,14 +1588,18 @@ unsafe fn xde_mc_tx_one(src_dev: &XdeDev, mut pkt: MsgBlk) -> *mut mblk_t { let slice = unsafe { MaybeUninit::slice_assume_init_mut(uninit) }; - let (mut a, _) = ValidIpv6::parse(slice).unwrap(); + use ingot::types::NetworkRepr; + slice[6] = IpProtocol::UDP.to_network(); + let (mut a, ..) = ValidIpv6::parse(slice).unwrap(); a.set_version(6); a.set_dscp(0); - a.set_ecn(ingot::Ecn::NotCapable); + a.set_ecn(ingot::ip::Ecn::NotCapable); a.set_payload_len((pkt_len_old + 16) as u16); a.set_flow_label(0); a.set_hop_limit(128); - a.set_next_header(v6.proto.into()); + a.set_next_header(ingot::ip::IpProtocol( + v6.proto.into(), + )); a.set_source(v6.src.bytes().into()); a.set_destination(v6.dst.bytes().into()); @@ -1609,11 +1616,11 @@ unsafe fn xde_mc_tx_one(src_dev: &XdeDev, mut pkt: MsgBlk) -> *mut mblk_t { let EncapPush::Geneve(gen) = udp else { panic!() }; - let (mut a, rest) = ValidUdp::parse(slice).unwrap(); + let (mut a, .., rest) = + ValidUdp::parse(slice).unwrap(); // ideally write out w/o looking at contents, be safer. rest[0] = 0; - let (mut b, rest) = - ValidGeneve::parse(rest).unwrap(); + let (mut b, ..) = ValidGeneve::parse(rest).unwrap(); a.set_source(gen.entropy); a.set_destination(6081); From f1346b69135f6ceb5c433fad07bb2ce27fba6a6a Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Fri, 30 Aug 2024 20:39:36 +0100 Subject: [PATCH 013/115] Try out emit_uninit. --- Cargo.lock | 25 +++--- Cargo.toml | 5 +- lib/opte/Cargo.toml | 1 - lib/opte/src/engine/ingot_packet.rs | 6 +- lib/opte/src/engine/port.rs | 2 +- lib/opte/src/lib.rs | 2 + lib/oxide-vpc/src/engine/overlay.rs | 9 +++ xde/src/xde.rs | 113 +++++++++++++++------------- 8 files changed, 88 insertions(+), 75 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c283d822..f652b8b2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -882,19 +882,19 @@ dependencies = [ [[package]] name = "ingot" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=053c02f34c5e43d0132d6446caf00f4f8fb13be1#053c02f34c5e43d0132d6446caf00f4f8fb13be1" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=9582711e0d27b82feeadfbfacb9e2f7da4f86788#9582711e0d27b82feeadfbfacb9e2f7da4f86788" dependencies = [ "bitflags 2.6.0", "ingot-macros", "ingot-types", "macaddr", - "zerocopy 0.8.0-alpha.16", + "zerocopy 0.8.0-alpha.17", ] [[package]] name = "ingot-macros" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=053c02f34c5e43d0132d6446caf00f4f8fb13be1#053c02f34c5e43d0132d6446caf00f4f8fb13be1" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=9582711e0d27b82feeadfbfacb9e2f7da4f86788#9582711e0d27b82feeadfbfacb9e2f7da4f86788" dependencies = [ "darling", "itertools 0.13.0", @@ -908,12 +908,12 @@ dependencies = [ [[package]] name = "ingot-types" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=053c02f34c5e43d0132d6446caf00f4f8fb13be1#053c02f34c5e43d0132d6446caf00f4f8fb13be1" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=9582711e0d27b82feeadfbfacb9e2f7da4f86788#9582711e0d27b82feeadfbfacb9e2f7da4f86788" dependencies = [ "heapless", "ingot-macros", "macaddr", - "zerocopy 0.8.0-alpha.16", + "zerocopy 0.8.0-alpha.17", ] [[package]] @@ -1261,7 +1261,6 @@ dependencies = [ "heapless", "illumos-sys-hdrs", "ingot", - "ingot-types", "itertools 0.13.0", "kstat-macro", "opte", @@ -1272,7 +1271,7 @@ dependencies = [ "tabwriter", "usdt", "version_check", - "zerocopy 0.8.0-alpha.16", + "zerocopy 0.8.0-alpha.17", ] [[package]] @@ -1373,7 +1372,7 @@ dependencies = [ "smoltcp", "tabwriter", "usdt", - "zerocopy 0.8.0-alpha.16", + "zerocopy 0.8.0-alpha.17", ] [[package]] @@ -2745,11 +2744,11 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.0-alpha.16" +version = "0.8.0-alpha.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a5fe242a39bc4f8b8d808be6314c0f0e5e499a902c44e704f3c86a89f7a7c64" +checksum = "da056c7307048e30bce8d625c6f0633366d31f1086b3c87ed9b1f18fa1081cb1" dependencies = [ - "zerocopy-derive 0.8.0-alpha.16", + "zerocopy-derive 0.8.0-alpha.17", ] [[package]] @@ -2765,9 +2764,9 @@ dependencies = [ [[package]] name = "zerocopy-derive" -version = "0.8.0-alpha.16" +version = "0.8.0-alpha.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76fc519c421ad48c6c8ba02cee449398d54276c839887f9f3562d1862b43b91c" +checksum = "9eb22123403bf9c05af423e2ced336a5fc2853df9179b42bea8144d6bf497a57" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 6b7a6f2a..547499f7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,8 +50,7 @@ ctor = "0.2" darling = "0.20" dyn-clone = "1.0" heapless = "0.8" -ingot = { git = "https://github.com/oxidecomputer/ingot.git", rev = "053c02f34c5e43d0132d6446caf00f4f8fb13be1"} -ingot-types = { git = "https://github.com/oxidecomputer/ingot.git", rev = "053c02f34c5e43d0132d6446caf00f4f8fb13be1"} +ingot = { git = "https://github.com/oxidecomputer/ingot.git", rev = "9582711e0d27b82feeadfbfacb9e2f7da4f86788"} ipnetwork = { version = "0.20", default-features = false } itertools = { version = "0.13", default-features = false } libc = "0.2" @@ -76,7 +75,7 @@ thiserror = "1.0" toml = "0.8" usdt = "0.5" version_check = "0.9" -zerocopy = { version = "0.8.0-alpha.16", features = ["derive"] } +zerocopy = { version = "0.8.0-alpha.17", features = ["derive"] } zone = { git = "https://github.com/oxidecomputer/zone" } ztest = { git = "https://github.com/oxidecomputer/falcon", branch = "main" } poptrie = { git = "https://github.com/oxidecomputer/poptrie", branch = "multipath" } diff --git a/lib/opte/Cargo.toml b/lib/opte/Cargo.toml index 6b32d602..b0c8160e 100644 --- a/lib/opte/Cargo.toml +++ b/lib/opte/Cargo.toml @@ -28,7 +28,6 @@ kstat-macro.workspace = true opte-api.workspace = true ingot.workspace = true -ingot-types.workspace = true cfg-if.workspace = true crc32fast = { workspace = true, optional = true } diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index ad924d0c..1af11c42 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -415,7 +415,7 @@ impl From<&PacketMeta3> for InnerFlowId { } } -fn transform_parse_stage1>( +fn transform_parse_stage1>( p: IngotParsed, ) -> IngotParsed { IngotParsed { @@ -490,8 +490,8 @@ fn csum_minus_hdr(ulp: &Ulp) -> Option { // TODO: bad bound? // csum.sub_bytes(t.1.as_ref()); csum.sub_bytes(match &t.1 { - ingot_types::Packet::Repr(v) => &v[..], - ingot_types::Packet::Raw(v) => &v[..], + ingot::types::Packet::Repr(v) => &v[..], + ingot::types::Packet::Raw(v) => &v[..], }); Some(csum) diff --git a/lib/opte/src/engine/port.rs b/lib/opte/src/engine/port.rs index 23c42f11..93ada181 100644 --- a/lib/opte/src/engine/port.rs +++ b/lib/opte/src/engine/port.rs @@ -87,8 +87,8 @@ use ingot::ip::Ipv4Mut; use ingot::ip::Ipv6Mut; use ingot::tcp::TcpFlags; use ingot::tcp::TcpMut; +use ingot::types::Read; use ingot::udp::UdpMut; -use ingot_types::Read; use kstat_macro::KStatProvider; use opte_api::Direction; use opte_api::MacAddr; diff --git a/lib/opte/src/lib.rs b/lib/opte/src/lib.rs index 970e02ee..e2f1a31c 100644 --- a/lib/opte/src/lib.rs +++ b/lib/opte/src/lib.rs @@ -31,6 +31,8 @@ use alloc::boxed::Box; use core::fmt; use core::fmt::Display; +pub use ingot; + #[cfg(any(feature = "api", test))] pub mod api { pub use opte_api::*; diff --git a/lib/oxide-vpc/src/engine/overlay.rs b/lib/oxide-vpc/src/engine/overlay.rs index b6ed4778..a7a344cd 100644 --- a/lib/oxide-vpc/src/engine/overlay.rs +++ b/lib/oxide-vpc/src/engine/overlay.rs @@ -318,6 +318,8 @@ impl StaticAction for EncapAction { } }; + let f_hash = flow_id.crc32(); + Ok(AllowOrDeny::Allow(HdrTransform { name: ENCAP_NAME.to_string(), // We leave the outer src/dst up to the driver. @@ -346,6 +348,13 @@ impl StaticAction for EncapAction { // network is always IPv6, perhaps we should just use // that? For now I defer the choice and leave this // hard-coded. + // + // (kyle) -- I think we should use both, mainly because + // we can expose the extra entropy to devices which can use it. + // We may want flow id to be symmetric, however... + // It's worth keeping in mind that Chelsio's RSS picks us a ring + // based on Toeplitz hash of the 5-tuple, so we need to write into + // there regardless. I don't believe it *looks* at v6 flowid. outer_encap: HeaderAction::Push( EncapPush::from(GenevePush { vni: phys_target.vni, diff --git a/xde/src/xde.rs b/xde/src/xde.rs index f6d01aef..0b36646d 100644 --- a/xde/src/xde.rs +++ b/xde/src/xde.rs @@ -1567,72 +1567,77 @@ unsafe fn xde_mc_tx_one(src_dev: &XdeDev, mut pkt: MsgBlk) -> *mut mblk_t { let mut new_blk = MsgBlk::new_with_headroom(2, new_hdrs); + use opte::ingot::types::EmitUninit as _; + + let w_encap_bytes = (pkt_len_old + 16) as u16; + new_blk.write(14, |uninit| { - let slice = unsafe { - MaybeUninit::slice_assume_init_mut(uninit) - }; - let (mut a, ..) = - ValidEthernet::parse(slice).unwrap(); - a.set_source(eth.src.bytes().into()); - a.set_destination(eth.dst.bytes().into()); - a.set_ethertype(ingot::ethernet::Ethertype( - eth.ether_type.into(), - )); - - // slice + let complete_eth = + opte::ingot::ethernet::Ethernet { + destination: eth.dst.bytes().into(), + source: eth.src.bytes().into(), + ethertype: ingot::ethernet::Ethertype( + eth.ether_type.into(), + ), + }; + + complete_eth + .emit_uninit(uninit) + .expect("must be enough room..."); }); // we know we'er only pushing v6. let IpPush::Ip6(v6) = ip else { panic!() }; + ip6_src = v6.src; + ip6_dst = v6.dst; + new_blk.write(40, |uninit| { - let slice = unsafe { - MaybeUninit::slice_assume_init_mut(uninit) + let complete_v6 = opte::ingot::ip::Ipv6 { + version: 6, + dscp: 0, + ecn: ingot::ip::Ecn::NotCapable, + flow_label: 12345678, + payload_len: w_encap_bytes, + next_header: ingot::ip::IpProtocol( + v6.proto.into(), + ), + hop_limit: 128, + source: v6.src.bytes().into(), + destination: v6.dst.bytes().into(), + v6ext: vec![].into(), }; - use ingot::types::NetworkRepr; - slice[6] = IpProtocol::UDP.to_network(); - let (mut a, ..) = ValidIpv6::parse(slice).unwrap(); - a.set_version(6); - a.set_dscp(0); - a.set_ecn(ingot::ip::Ecn::NotCapable); - a.set_payload_len((pkt_len_old + 16) as u16); - a.set_flow_label(0); - a.set_hop_limit(128); - a.set_next_header(ingot::ip::IpProtocol( - v6.proto.into(), - )); - a.set_source(v6.src.bytes().into()); - a.set_destination(v6.dst.bytes().into()); - - ip6_src = v6.src; - ip6_dst = v6.dst; - - // slice + + complete_v6 + .emit_uninit(uninit) + .expect("must be enough room..."); }); + let EncapPush::Geneve(gen) = udp else { panic!() }; new_blk.write(16, |uninit| { - let slice = unsafe { - MaybeUninit::slice_assume_init_mut(uninit) + let complete_udp = opte::ingot::udp::Udp { + source: gen.entropy, + destination: 6081, + length: w_encap_bytes, + checksum: 0, + }; + let complete_geneve = opte::ingot::geneve::Geneve { + version: 0, + opt_len: 0, + flags: opte::ingot::geneve::GeneveFlags::empty( + ), + protocol_type: + opte::ingot::ethernet::Ethertype::ETHERNET, + vni: gen.vni.into(), + reserved: 0, + options: Vec::new(), }; - let EncapPush::Geneve(gen) = udp else { panic!() }; - - let (mut a, .., rest) = - ValidUdp::parse(slice).unwrap(); - // ideally write out w/o looking at contents, be safer. - rest[0] = 0; - let (mut b, ..) = ValidGeneve::parse(rest).unwrap(); - - a.set_source(gen.entropy); - a.set_destination(6081); - a.set_checksum(0); - a.set_length((pkt_len_old + 16) as u16); - - b.set_flags(GeneveFlags::empty()); - b.set_reserved(0); - b.set_protocol_type(0x6558); - b.set_vni(gen.vni.into()); - - // slice + let len = complete_udp + .emit_uninit(uninit) + .expect("must be enough room..."); + complete_geneve + .emit_uninit(&mut uninit[len..]) + .expect("must be enough room..."); }); core::mem::swap(&mut new_blk, &mut pkt); From c4be1cd258d9aebdef46b4ef2d65fe17897f025c Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 4 Sep 2024 14:36:13 +0100 Subject: [PATCH 014/115] Long march to complete integration... in progress. --- Cargo.lock | 6 +- Cargo.toml | 2 +- lib/opte/src/engine/arp.rs | 2 +- lib/opte/src/engine/dhcp.rs | 28 +- lib/opte/src/engine/dhcpv6/protocol.rs | 21 +- lib/opte/src/engine/ether.rs | 2 +- lib/opte/src/engine/geneve.rs | 6 +- lib/opte/src/engine/icmp/mod.rs | 10 +- lib/opte/src/engine/icmp/v4.rs | 19 +- lib/opte/src/engine/icmp/v6.rs | 105 +- lib/opte/src/engine/ingot_packet.rs | 757 +++++++++++-- lib/opte/src/engine/ioctl.rs | 4 +- lib/opte/src/engine/ip4.rs | 2 +- lib/opte/src/engine/layer.rs | 91 +- lib/opte/src/engine/mod.rs | 117 +- lib/opte/src/engine/nat.rs | 6 +- lib/opte/src/engine/packet.rs | 414 +++---- lib/opte/src/engine/port.rs | 1412 +++++++++++++----------- lib/opte/src/engine/predicate.rs | 120 +- lib/opte/src/engine/rule.rs | 41 +- lib/opte/src/engine/snat.rs | 16 +- lib/opte/src/engine/tcp.rs | 5 +- lib/opte/src/engine/udp.rs | 3 +- 23 files changed, 1917 insertions(+), 1272 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f652b8b2..182e44cb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -882,7 +882,7 @@ dependencies = [ [[package]] name = "ingot" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=9582711e0d27b82feeadfbfacb9e2f7da4f86788#9582711e0d27b82feeadfbfacb9e2f7da4f86788" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=f3f138bcefb4c625597c4add3a509921955d646c#f3f138bcefb4c625597c4add3a509921955d646c" dependencies = [ "bitflags 2.6.0", "ingot-macros", @@ -894,7 +894,7 @@ dependencies = [ [[package]] name = "ingot-macros" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=9582711e0d27b82feeadfbfacb9e2f7da4f86788#9582711e0d27b82feeadfbfacb9e2f7da4f86788" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=f3f138bcefb4c625597c4add3a509921955d646c#f3f138bcefb4c625597c4add3a509921955d646c" dependencies = [ "darling", "itertools 0.13.0", @@ -908,7 +908,7 @@ dependencies = [ [[package]] name = "ingot-types" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=9582711e0d27b82feeadfbfacb9e2f7da4f86788#9582711e0d27b82feeadfbfacb9e2f7da4f86788" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=f3f138bcefb4c625597c4add3a509921955d646c#f3f138bcefb4c625597c4add3a509921955d646c" dependencies = [ "heapless", "ingot-macros", diff --git a/Cargo.toml b/Cargo.toml index 547499f7..33bd605f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,7 +50,7 @@ ctor = "0.2" darling = "0.20" dyn-clone = "1.0" heapless = "0.8" -ingot = { git = "https://github.com/oxidecomputer/ingot.git", rev = "9582711e0d27b82feeadfbfacb9e2f7da4f86788"} +ingot = { git = "https://github.com/oxidecomputer/ingot.git", rev = "f3f138bcefb4c625597c4add3a509921955d646c"} ipnetwork = { version = "0.20", default-features = false } itertools = { version = "0.13", default-features = false } libc = "0.2" diff --git a/lib/opte/src/engine/arp.rs b/lib/opte/src/engine/arp.rs index aaaaa335..ae622447 100644 --- a/lib/opte/src/engine/arp.rs +++ b/lib/opte/src/engine/arp.rs @@ -244,7 +244,7 @@ impl<'a> RawHeader<'a> for ArpEthIpv4Raw { #[inline] fn new_mut(src: &mut [u8]) -> Result, ReadErr> { debug_assert_eq!(src.len(), Self::SIZE); - let hdr = match Ref::new(src) { + let hdr = match Ref::from_bytes(src).ok() { Some(hdr) => hdr, None => return Err(ReadErr::BadLayout), }; diff --git a/lib/opte/src/engine/dhcp.rs b/lib/opte/src/engine/dhcp.rs index 68e7003d..9e488796 100644 --- a/lib/opte/src/engine/dhcp.rs +++ b/lib/opte/src/engine/dhcp.rs @@ -10,6 +10,9 @@ use super::checksum::HeaderChecksum; use super::ether::EtherHdr; use super::ether::EtherMeta; use super::ether::EtherType; +use super::ingot_packet::MsgBlk; +use super::ingot_packet::PacketHeaders; +use super::ingot_packet::PacketHeaders2; use super::ip4::Ipv4Addr; use super::ip4::Ipv4Hdr; use super::ip4::Ipv4Meta; @@ -35,6 +38,11 @@ use alloc::vec::Vec; use core::fmt; use core::fmt::Display; use heapless::Vec as HeaplessVec; +use ingot::ethernet::Ethernet; +use ingot::ethernet::Ethertype; +use ingot::ip::IpProtocol; +use ingot::ip::Ipv4; +use ingot::types::Emit; use opte_api::DhcpCfg; use opte_api::DhcpReplyType; use opte_api::DomainName; @@ -482,12 +490,9 @@ impl HairpinAction for DhcpAction { (hdr_preds, data_preds) } - fn gen_packet( - &self, - _meta: &PacketMeta, - rdr: &mut PacketReader, - ) -> GenPacketResult { - let body = rdr.copy_remaining(); + fn gen_packet(&self, meta: &PacketHeaders2) -> GenPacketResult { + // TODO: fold reader access into PacketHeaders2 + let body = meta.copy_remaining(); let client_pkt = DhcpPacket::new_checked(&body)?; let client_dhcp = DhcpRepr::parse(&client_pkt)?; let mt = MessageType::from(self.reply_type); @@ -617,8 +622,9 @@ impl HairpinAction for DhcpAction { // XXX: Would be preferable to write in here directly rather than // allocing tmp. - let total_len = - EtherHdr::SIZE + Ipv4Hdr::BASE_SIZE + UdpHdr::SIZE + tmp.len(); + let hdr_len = EtherHdr::SIZE + Ipv4Hdr::BASE_SIZE + UdpHdr::SIZE; + let total_len = hdr_len + tmp.len(); + let mut pkt = Packet::alloc_and_expand(total_len); let mut wtr = pkt.seg0_wtr(); eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); @@ -629,7 +635,11 @@ impl HairpinAction for DhcpAction { udp.csum = HeaderChecksum::from(csum).bytes(); udp.emit(wtr.slice_mut(udp.hdr_len()).unwrap()); wtr.write(&tmp).unwrap(); - Ok(AllowOrDeny::Allow(pkt)) + + Ok(AllowOrDeny::Allow( + unsafe { MsgBlk::wrap_mblk(pkt.unwrap_mblk()) } + .expect("known valid"), + )) } } diff --git a/lib/opte/src/engine/dhcpv6/protocol.rs b/lib/opte/src/engine/dhcpv6/protocol.rs index 09dbf608..75d39278 100644 --- a/lib/opte/src/engine/dhcpv6/protocol.rs +++ b/lib/opte/src/engine/dhcpv6/protocol.rs @@ -25,6 +25,8 @@ use crate::engine::dhcpv6::SERVER_PORT; use crate::engine::ether::EtherHdr; use crate::engine::ether::EtherMeta; use crate::engine::ether::EtherType; +use crate::engine::ingot_packet::MsgBlk; +use crate::engine::ingot_packet::PacketHeaders2; use crate::engine::ip6::Ipv6Hdr; use crate::engine::ip6::Ipv6Meta; use crate::engine::ip6::UlpCsumOpt; @@ -47,6 +49,7 @@ use alloc::borrow::Cow; use alloc::vec::Vec; use core::fmt; use core::ops::Range; +use ingot::ip::Ipv6Ref; use opte_api::Ipv6Addr; use opte_api::Ipv6Cidr; use opte_api::MacAddr; @@ -590,7 +593,7 @@ fn process_confirm_message<'a>( // Process a DHCPv6 message from the a client. fn process_client_message<'a>( action: &'a Dhcpv6Action, - _meta: &'a PacketMeta, + _meta: &'a PacketHeaders2, client_msg: &'a Message<'a>, ) -> Option> { match client_msg.typ { @@ -612,7 +615,7 @@ fn process_client_message<'a>( // the request and the actual DHCPv6 message to send out. fn generate_packet<'a>( action: &Dhcpv6Action, - meta: &PacketMeta, + meta: &PacketHeaders2, msg: &'a Message<'a>, ) -> GenPacketResult { let eth = EtherMeta { @@ -625,7 +628,7 @@ fn generate_packet<'a>( src: Ipv6Addr::from_eui64(&action.server_mac), // Safety: We're only here if the predicates match, one of which is // IPv6. - dst: meta.inner_ip6().unwrap().src, + dst: meta.inner_ip6().unwrap().source().octets().into(), proto: Protocol::UDP, next_hdr: IpProtocol::Udp, pay_len: (UdpHdr::SIZE + msg.buffer_len()) as u16, @@ -661,7 +664,9 @@ fn generate_packet<'a>( udp.csum = HeaderChecksum::from(csum).bytes(); udp.emit(wtr.slice_mut(udp.hdr_len()).unwrap()); wtr.write(&msg_buf).unwrap(); - Ok(AllowOrDeny::Allow(pkt)) + Ok(AllowOrDeny::Allow( + unsafe { MsgBlk::wrap_mblk(pkt.unwrap_mblk()) }.expect("known valid"), + )) } impl HairpinAction for Dhcpv6Action { @@ -683,12 +688,8 @@ impl HairpinAction for Dhcpv6Action { // Rather than put this logic into DataPredicates, we just parse the packet // here and reply accordingly. So the `Dhcpv6Action` is really a full // server, to the extent we emulate one. - fn gen_packet( - &self, - meta: &PacketMeta, - rdr: &mut PacketReader, - ) -> GenPacketResult { - let body = rdr.copy_remaining(); + fn gen_packet(&self, meta: &PacketHeaders2) -> GenPacketResult { + let body = meta.copy_remaining(); if let Some(client_msg) = Message::from_bytes(&body) { if let Some(reply) = process_client_message(self, meta, &client_msg) { diff --git a/lib/opte/src/engine/ether.rs b/lib/opte/src/engine/ether.rs index 9b7da4fd..f97c73f4 100644 --- a/lib/opte/src/engine/ether.rs +++ b/lib/opte/src/engine/ether.rs @@ -368,7 +368,7 @@ impl<'a> RawHeader<'a> for EtherHdrRaw { #[inline] fn new_mut(src: &mut [u8]) -> Result, ReadErr> { debug_assert_eq!(src.len(), Self::SIZE); - let hdr = match Ref::new(src) { + let hdr = match Ref::from_bytes(src).ok() { Some(hdr) => hdr, None => return Err(ReadErr::BadLayout), }; diff --git a/lib/opte/src/engine/geneve.rs b/lib/opte/src/engine/geneve.rs index baf57a2e..8089adaa 100644 --- a/lib/opte/src/engine/geneve.rs +++ b/lib/opte/src/engine/geneve.rs @@ -291,7 +291,7 @@ impl<'a> RawHeader<'a> for GeneveHdrRaw { #[inline] fn new_mut(src: &mut [u8]) -> Result, ReadErr> { debug_assert_eq!(src.len(), mem::size_of::()); - let hdr = match Ref::new(src) { + let hdr = match Ref::from_bytes(src).ok() { Some(hdr) => hdr, None => return Err(ReadErr::BadLayout), }; @@ -480,7 +480,7 @@ impl<'a> RawHeader<'a> for GeneveOptHdrRaw { #[inline] fn new_mut(src: &mut [u8]) -> Result, ReadErr> { debug_assert_eq!(src.len(), mem::size_of::()); - let hdr = match Ref::new(src) { + let hdr = match Ref::from_bytes(src).ok() { Some(hdr) => hdr, None => return Err(ReadErr::BadLayout), }; @@ -490,7 +490,7 @@ impl<'a> RawHeader<'a> for GeneveOptHdrRaw { #[inline] fn new(src: &[u8]) -> Result, ReadErr> { debug_assert_eq!(src.len(), mem::size_of::()); - let hdr = match Ref::new(src) { + let hdr = match Ref::from_bytes(src).ok() { Some(hdr) => hdr, None => return Err(ReadErr::BadLayout), }; diff --git a/lib/opte/src/engine/icmp/mod.rs b/lib/opte/src/engine/icmp/mod.rs index b5424fab..42ce9a7e 100644 --- a/lib/opte/src/engine/icmp/mod.rs +++ b/lib/opte/src/engine/icmp/mod.rs @@ -35,13 +35,17 @@ use crate::engine::rule::HairpinAction; use alloc::vec::Vec; use core::fmt; use core::fmt::Display; +use ingot::icmp::IcmpV4Packet; +use ingot::icmp::IcmpV4Ref; pub use opte_api::ip::Protocol; use serde::Deserialize; use serde::Serialize; use smoltcp::phy::Checksum; use smoltcp::phy::ChecksumCapabilities as Csum; +use smoltcp::wire::Icmpv4Message; pub use v4::Icmpv4Meta; pub use v6::Icmpv6Meta; +use zerocopy::ByteSlice; use zerocopy::FromBytes; use zerocopy::Immutable; use zerocopy::IntoBytes; @@ -195,7 +199,7 @@ impl<'a> RawHeader<'a> for IcmpHdrRaw { #[inline] fn new_mut(src: &mut [u8]) -> Result, ReadErr> { debug_assert_eq!(src.len(), Self::SIZE); - let hdr = match Ref::new(src) { + let hdr = match Ref::from_bytes(src).ok() { Some(hdr) => hdr, None => return Err(ReadErr::BadLayout), }; @@ -222,7 +226,7 @@ impl<'a> RawHeader<'a> for IcmpEchoRaw { #[inline] fn new_mut(src: &mut [u8]) -> Result, ReadErr> { debug_assert_eq!(src.len(), Self::SIZE); - let hdr = match Ref::new(src) { + let hdr = match Ref::from_bytes(src).ok() { Some(hdr) => hdr, None => return Err(ReadErr::BadLayout), }; @@ -232,7 +236,7 @@ impl<'a> RawHeader<'a> for IcmpEchoRaw { #[inline] fn new(src: &[u8]) -> Result, ReadErr> { debug_assert_eq!(src.len(), Self::SIZE); - let hdr = match Ref::new(src) { + let hdr = match Ref::from_bytes(src).ok() { Some(hdr) => hdr, None => return Err(ReadErr::BadLayout), }; diff --git a/lib/opte/src/engine/icmp/v4.rs b/lib/opte/src/engine/icmp/v4.rs index 273bfe11..a3e7b641 100644 --- a/lib/opte/src/engine/icmp/v4.rs +++ b/lib/opte/src/engine/icmp/v4.rs @@ -7,9 +7,12 @@ //! ICMPv4 headers and processing. use super::*; +use crate::engine::ingot_packet::MsgBlk; +use crate::engine::ingot_packet::PacketHeaders2; use crate::engine::ip4::Ipv4Hdr; use crate::engine::ip4::Ipv4Meta; use crate::engine::predicate::Ipv4AddrMatch; +use ingot::types::Emit; pub use opte_api::ip::IcmpEchoReply; use smoltcp::wire; use smoltcp::wire::Icmpv4Message; @@ -59,11 +62,7 @@ impl HairpinAction for IcmpEchoReply { (hdr_preds, data_preds) } - fn gen_packet( - &self, - meta: &PacketMeta, - rdr: &mut PacketReader, - ) -> GenPacketResult { + fn gen_packet(&self, meta: &PacketHeaders2) -> GenPacketResult { let Some(icmp) = meta.inner_icmp() else { // Getting here implies the predicate matched, but that the // extracted metadata indicates this isn't an ICMP packet. That @@ -77,8 +76,9 @@ impl HairpinAction for IcmpEchoReply { // `Icmpv4Packet` requires the ICMPv4 header and not just the message payload. // Given we successfully got the ICMPv4 metadata, rewinding here is fine. - rdr.seek_back(icmp.hdr_len())?; - let body = rdr.copy_remaining(); + let mut body = icmp.emit_vec(); + meta.append_remaining(&mut body); + let src_pkt = Icmpv4Packet::new_checked(&body)?; let src_icmp = Icmpv4Repr::parse(&src_pkt, &Csum::ignored())?; @@ -134,7 +134,10 @@ impl HairpinAction for IcmpEchoReply { eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); ip4.emit(wtr.slice_mut(ip4.hdr_len()).unwrap()); wtr.write(&tmp).unwrap(); - Ok(AllowOrDeny::Allow(pkt)) + Ok(AllowOrDeny::Allow( + unsafe { MsgBlk::wrap_mblk(pkt.unwrap_mblk()) } + .expect("known valid"), + )) } } diff --git a/lib/opte/src/engine/icmp/v6.rs b/lib/opte/src/engine/icmp/v6.rs index f3d48cda..05bb1e30 100644 --- a/lib/opte/src/engine/icmp/v6.rs +++ b/lib/opte/src/engine/icmp/v6.rs @@ -7,10 +7,15 @@ //! ICMPv6 headers and processing. use super::*; +use crate::engine::ingot_packet::MsgBlk; +use crate::engine::ingot_packet::PacketHeaders2; use crate::engine::ip6::Ipv6Hdr; use crate::engine::ip6::Ipv6Meta; use crate::engine::predicate::Ipv6AddrMatch; use alloc::string::String; +use ingot::ip::Ipv6Ref; +use ingot::types::Emit; +use ingot::types::Header; pub use opte_api::ip::Icmpv6EchoReply; pub use opte_api::ip::Ipv6Addr; pub use opte_api::ip::Ipv6Cidr; @@ -114,11 +119,8 @@ impl HairpinAction for Icmpv6EchoReply { (hdr_preds, data_preds) } - fn gen_packet( - &self, - meta: &PacketMeta, - rdr: &mut PacketReader, - ) -> GenPacketResult { + fn gen_packet(&self, meta: &PacketHeaders2) -> GenPacketResult { + // TODO: fold reader access into PacketHeaders2 let Some(icmp6) = meta.inner_icmp6() else { // Getting here implies the predicate matched, but that the // extracted metadata indicates this isn't an ICMPv6 packet. That @@ -134,8 +136,8 @@ impl HairpinAction for Icmpv6EchoReply { // resulting ICMPv6 echo reply. let (src_ip, dst_ip) = if let Some(metadata) = meta.inner_ip6() { ( - IpAddress::Ipv6(Ipv6Address(metadata.src.bytes())), - IpAddress::Ipv6(Ipv6Address(metadata.dst.bytes())), + IpAddress::Ipv6(Ipv6Address(metadata.source().octets())), + IpAddress::Ipv6(Ipv6Address(metadata.destination().octets())), ) } else { // We got the ICMPv6 metadata above but no IPv6 somehow? @@ -147,9 +149,9 @@ impl HairpinAction for Icmpv6EchoReply { // `Icmpv6Packet` requires the ICMPv6 header and not just the message payload. // Given we successfully got the ICMPv6 metadata, rewinding here is fine. - rdr.seek_back(icmp6.hdr_len())?; + let mut body = icmp6.emit_vec(); + meta.append_remaining(&mut body); - let body = rdr.copy_remaining(); let src_pkt = Icmpv6Packet::new_checked(&body)?; let src_icmp = Icmpv6Repr::parse(&src_ip, &dst_ip, &src_pkt, &Csum::ignored())?; @@ -208,7 +210,10 @@ impl HairpinAction for Icmpv6EchoReply { eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); ip.emit(wtr.slice_mut(ip.hdr_len()).unwrap()); wtr.write(&ulp_body).unwrap(); - Ok(AllowOrDeny::Allow(pkt)) + Ok(AllowOrDeny::Allow( + unsafe { MsgBlk::wrap_mblk(pkt.unwrap_mblk()) } + .expect("known valid"), + )) } } @@ -250,11 +255,8 @@ impl HairpinAction for RouterAdvertisement { (hdr_preds, data_preds) } - fn gen_packet( - &self, - meta: &PacketMeta, - rdr: &mut PacketReader, - ) -> GenPacketResult { + fn gen_packet(&self, meta: &PacketHeaders2) -> GenPacketResult { + // TODO: fold reader access into PacketHeaders2 use smoltcp::time::Duration; use smoltcp::wire::NdiscRouterFlags; @@ -278,14 +280,14 @@ impl HairpinAction for RouterAdvertisement { meta ))); }; - let src_ip = IpAddress::Ipv6(Ipv6Address(ip6.src.bytes())); - let dst_ip = IpAddress::Ipv6(Ipv6Address(ip6.dst.bytes())); + let src_ip = IpAddress::Ipv6(Ipv6Address(ip6.source().octets())); + let dst_ip = IpAddress::Ipv6(Ipv6Address(ip6.destination().octets())); // `Icmpv6Packet` requires the ICMPv6 header and not just the message payload. // Given we successfully got the ICMPv6 metadata, rewinding here is fine. - rdr.seek_back(icmp6.hdr_len())?; + let mut body = icmp6.emit_vec(); + meta.append_remaining(&mut body); - let body = rdr.copy_remaining(); let src_pkt = Icmpv6Packet::new_checked(&body)?; let mut csum = Csum::ignored(); csum.icmpv6 = Checksum::Rx; @@ -322,10 +324,10 @@ impl HairpinAction for RouterAdvertisement { // and thus _not_ UNSPEC, so we skip that checking here. // // This leaves the hop limit as the only validity check. - if ip6.hop_limit != 255 { + if ip6.hop_limit() != 255 { return Err(GenErr::Unexpected(format!( "Received RS with invalid hop limit ({}).", - ip6.hop_limit + ip6.hop_limit() ))); } @@ -372,7 +374,7 @@ impl HairpinAction for RouterAdvertisement { let ip = Ipv6Meta { src: *self.ip(), // Safety: We match on this being Some(_) above, so unwrap is safe. - dst: meta.inner_ip6().unwrap().src, + dst: meta.inner_ip6().unwrap().source().octets().into(), proto: Protocol::ICMPv6, next_hdr: IpProtocol::Icmpv6, // RFC 4861 6.1.2 requires that the hop limit be 255 in an RA. @@ -397,7 +399,10 @@ impl HairpinAction for RouterAdvertisement { eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); ip.emit(wtr.slice_mut(ip.hdr_len()).unwrap()); wtr.write(&ulp_body).unwrap(); - Ok(AllowOrDeny::Allow(pkt)) + Ok(AllowOrDeny::Allow( + unsafe { MsgBlk::wrap_mblk(pkt.unwrap_mblk()) } + .expect("known valid"), + )) } } @@ -407,15 +412,15 @@ impl HairpinAction for RouterAdvertisement { // the validations performed. // // Return the target address from the Neighbor Solicitation. -fn validate_neighbor_solicitation( - rdr: &mut PacketReader, - metadata: &Ipv6Meta, +fn validate_neighbor_solicitation( + rdr: &[u8], + metadata: &impl Ipv6Ref, ) -> Result { // First, check if this is in fact a NS message. - let smol_src = IpAddress::Ipv6(metadata.src.into()); - let smol_dst = IpAddress::Ipv6(metadata.dst.into()); - let body = rdr.copy_remaining(); - let src_pkt = Icmpv6Packet::new_checked(&body)?; + let smol_src = IpAddress::Ipv6(Ipv6Address(metadata.source().octets())); + let smol_dst = + IpAddress::Ipv6(Ipv6Address(metadata.destination().octets())); + let src_pkt = Icmpv6Packet::new_checked(rdr)?; let mut csum = Csum::ignored(); csum.icmpv6 = Checksum::Rx; let icmp = Icmpv6Repr::parse(&smol_src, &smol_dst, &src_pkt, &csum)?; @@ -426,10 +431,10 @@ fn validate_neighbor_solicitation( // - ICMP length is at least 24 octets // - Any included options have a non-zero length - if metadata.hop_limit != 255 { + if metadata.hop_limit() != 255 { return Err(GenErr::Unexpected(format!( "Received NS with invalid hop limit ({}).", - metadata.hop_limit + metadata.hop_limit() ))); } @@ -460,8 +465,8 @@ fn validate_neighbor_solicitation( // NS is only allowed from the unspecified address if the destination is a // solicited-node multicast address. - if metadata.src == Ipv6Addr::ANY_ADDR - && !metadata.dst.is_solicited_node_multicast() + if metadata.source().is_unspecified() + && !Ipv6Addr::from(metadata.destination()).is_solicited_node_multicast() { return Err(GenErr::Unexpected(String::from( "Received NS from UNSPEC, but destination is not the solicited \ @@ -470,7 +475,7 @@ fn validate_neighbor_solicitation( } // Cannot contain Link-Layer address option if from the unspecified address. - if metadata.src == Ipv6Addr::ANY_ADDR && has_ll_option { + if metadata.source().is_unspecified() && has_ll_option { return Err(GenErr::Unexpected(String::from( "Received NS from UNSPEC, but message contains the \ Link-Layer Address option.", @@ -586,11 +591,7 @@ impl HairpinAction for NeighborAdvertisement { (hdr_preds, data_preds) } - fn gen_packet( - &self, - meta: &PacketMeta, - rdr: &mut PacketReader, - ) -> GenPacketResult { + fn gen_packet(&self, meta: &PacketHeaders2) -> GenPacketResult { let Some(icmp6) = meta.inner_icmp6() else { // Getting here implies the predicate matched, but that the // extracted metadata indicates this isn't an ICMPv6 packet. That @@ -613,23 +614,22 @@ impl HairpinAction for NeighborAdvertisement { // `Icmpv6Packet` requires the ICMPv6 header and not just the message payload. // Given we successfully got the ICMPv6 metadata, rewinding here is fine. - rdr.seek_back(icmp6.hdr_len())?; + let mut body = icmp6.emit_vec(); + meta.append_remaining(&mut body); // Validate the ICMPv6 packet is actually a Neighbor Solicitation, and // that its data is appopriate. - let target_addr = validate_neighbor_solicitation(rdr, metadata)?; + let target_addr = validate_neighbor_solicitation(&body, metadata)?; // Build the NA, whose data depends on how we received the packet. If // `None` is returned, the NS is not destined for us, and will be // dropped. - let (dst_ip, advert) = match construct_neighbor_advert( - self, - &target_addr, - &metadata.src, - ) { - Some(data) => data, - None => return Ok(AllowOrDeny::Deny), - }; + let conv_ip = metadata.source().into(); + let (dst_ip, advert) = + match construct_neighbor_advert(self, &target_addr, &conv_ip) { + Some(data) => data, + None => return Ok(AllowOrDeny::Deny), + }; // Construct the actual bytes of the reply packet, and return it. let reply = Icmpv6Repr::Ndisc(advert); @@ -676,6 +676,9 @@ impl HairpinAction for NeighborAdvertisement { eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); ip.emit(wtr.slice_mut(ip.hdr_len()).unwrap()); wtr.write(&ulp_body).unwrap(); - Ok(AllowOrDeny::Allow(pkt)) + Ok(AllowOrDeny::Allow( + unsafe { MsgBlk::wrap_mblk(pkt.unwrap_mblk()) } + .expect("known valid"), + )) } } diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index 1af11c42..1742f06e 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -1,3 +1,26 @@ +use super::checksum::Checksum as OpteCsum; +use super::checksum::Checksum; +use super::checksum::HeaderChecksum; +use super::headers::EncapPush; +use super::headers::IpPush; +use super::icmp::QueryEcho; +use super::packet::allocb; +use super::packet::AddrPair; +use super::packet::BodyTransform; +use super::packet::BodyTransformError; +use super::packet::Initialized; +use super::packet::InnerFlowId; +use super::packet::Packet; +use super::packet::PacketState; +use super::packet::ParseError; +use super::packet::FLOW_ID_DEFAULT; +use super::rule::HdrTransform; +use super::rule::HdrTransformError; +use super::NetworkParser; +use alloc::sync::Arc; +use core::cell::Cell; +use core::cell::RefCell; +use core::hash::Hash; use core::marker::PhantomData; use core::mem::ManuallyDrop; use core::mem::MaybeUninit; @@ -5,27 +28,31 @@ use core::ops::Deref; use core::ops::DerefMut; use core::ptr::NonNull; use core::slice; - -use illumos_sys_hdrs as ddi; +use core::sync::atomic::AtomicPtr; use illumos_sys_hdrs::mblk_t; +use illumos_sys_hdrs::uintptr_t; +use ingot::ethernet::Ethernet; use ingot::ethernet::EthernetPacket; use ingot::ethernet::EthernetRef; use ingot::ethernet::Ethertype; use ingot::ethernet::ValidEthernet; +use ingot::example_chain::L3Repr; use ingot::example_chain::Ulp; use ingot::example_chain::L3; use ingot::example_chain::L4; +use ingot::geneve::Geneve; use ingot::geneve::GenevePacket; +use ingot::geneve::ValidGeneve; +use ingot::icmp::IcmpV4Packet; use ingot::icmp::IcmpV4Ref; +use ingot::icmp::IcmpV6Packet; use ingot::icmp::IcmpV6Ref; -use ingot::ip::IpProtocol; -use ingot::ip::Ipv4; +use ingot::ip::Ipv4Packet; use ingot::ip::Ipv4Ref; use ingot::ip::Ipv6Packet; use ingot::ip::Ipv6Ref; use ingot::tcp::TcpPacket; use ingot::tcp::TcpRef; -use ingot::types::HasView; use ingot::types::Header; use ingot::types::HeaderStack; use ingot::types::ParseControl; @@ -33,26 +60,15 @@ use ingot::types::ParseError as IngotParseErr; use ingot::types::ParseResult; use ingot::types::Parsed as IngotParsed; use ingot::types::Read; +use ingot::udp::Udp; use ingot::udp::UdpPacket; use ingot::udp::UdpRef; +use ingot::udp::ValidUdp; use ingot::Parse; use opte_api::Direction; use zerocopy::ByteSlice; use zerocopy::ByteSliceMut; use zerocopy::IntoBytes; -use zerocopy::NetworkEndian; - -use super::checksum::Checksum as OpteCsum; -use super::checksum::HeaderChecksum; -use super::packet::allocb; -use super::packet::Initialized; -use super::packet::Packet; -use illumos_sys_hdrs::uintptr_t; - -use super::checksum::Checksum; -use super::packet::AddrPair; -use super::packet::InnerFlowId; -use super::packet::FLOW_ID_DEFAULT; #[derive(Parse)] pub struct OpteIn { @@ -88,10 +104,12 @@ pub struct OpteOut { } // --- REWRITE IN PROGRESS --- +#[derive(Debug)] pub struct MsgBlk { pub inner: NonNull, } +#[derive(Debug)] pub struct MsgBlkNode(mblk_t); impl Deref for MsgBlkNode { @@ -127,10 +145,10 @@ impl MsgBlkNode { impl MsgBlk { pub fn new(len: usize) -> Self { - let inner = unsafe { NonNull::new(allocb(len)) } + let inner = NonNull::new(allocb(len)) .expect("somehow failed to get an mblk..."); - unsafe { Self { inner } } + Self { inner } } pub fn byte_len(&self) -> usize { @@ -152,7 +170,6 @@ impl MsgBlk { out } - // pub fn write(&mut self, n_bytes: usize, f: impl FnOnce(&mut [MaybeUninit]) -> &mut [u8]) -> usize { pub unsafe fn write( &mut self, n_bytes: usize, @@ -170,11 +187,8 @@ impl MsgBlk { n_bytes, ) }; - // let out_slice = f(in_slice); - f(in_slice); - // assert!(out_slice.as_ptr() == mut_out.b_wptr); - // assert!(out_slice.len() <= n_bytes); + f(in_slice); mut_out.b_wptr = unsafe { mut_out.b_wptr.add(n_bytes) }; } @@ -211,29 +225,48 @@ impl MsgBlk { self.inner.as_ptr() as uintptr_t } - pub fn unwrap_mblk(mut self) -> *mut mblk_t { + pub fn unwrap_mblk(self) -> *mut mblk_t { let ptr_out = self.inner.as_ptr(); _ = ManuallyDrop::new(self); ptr_out } pub unsafe fn wrap_mblk(ptr: *mut mblk_t) -> Option { - let inner = unsafe { NonNull::new(ptr)? }; + let inner = NonNull::new(ptr)?; Some(Self { inner }) } } +#[derive(Debug)] pub struct MsgBlkIter<'a> { curr: Option>, marker: PhantomData<&'a MsgBlk>, } +#[derive(Debug)] pub struct MsgBlkIterMut<'a> { curr: Option>, marker: PhantomData<&'a mut MsgBlk>, } +impl<'a> MsgBlkIterMut<'a> { + /// + pub fn next_iter(&self) -> MsgBlkIter { + let curr = self + .curr + .and_then(|ptr| NonNull::new(unsafe { ptr.as_ref() }.b_cont)); + MsgBlkIter { curr, marker: PhantomData } + } + + pub fn next_iter_mut(&mut self) -> MsgBlkIterMut { + let curr = self + .curr + .and_then(|ptr| NonNull::new(unsafe { ptr.as_ref() }.b_cont)); + MsgBlkIterMut { curr, marker: PhantomData } + } +} + impl<'a> Iterator for MsgBlkIter<'a> { type Item = &'a MsgBlkNode; @@ -292,7 +325,7 @@ impl Drop for MsgBlk { pub struct OpteUnified { pub outer_eth: Option>, - pub outer_v6: Option>, + pub outer_v6: Option>, pub outer_udp: Option>, pub outer_encap: Option>, @@ -301,11 +334,61 @@ pub struct OpteUnified { pub inner_ulp: Option>, } +pub struct OpteUnifiedLengths { + pub outer_eth: usize, + pub outer_l3: usize, + pub outer_encap: usize, + + pub inner_eth: usize, + pub inner_l3: usize, + pub inner_ulp: usize, +} + +// TODO: Choices (L3, etc.) don't have Debug in all the right places yet. +impl core::fmt::Debug for OpteUnified { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.write_str("OpteUnified{ .. }") + } +} + +// THIS IS THE GOAL. + +// pub struct OpteUnified3 { +// pub outer_eth: Weird>, +// pub outer_v6: Weird>, +// pub outer_encap: Weird>, + +// pub inner_eth: EthernetPacket<&[u8]>, +// pub inner_l3: Option>, +// pub inner_ulp: Option>, +// } + +// IDEA: anything can take an encap push which is Into<..> its meta +// type. Modification is another trait. +pub enum Weird { + Absent, + LocalForm(Compact), + Packeted(T), +} + +impl From> for Weird { + fn from(value: Option) -> Self { + match value { + Some(val) => Self::Packeted(val), + None => Self::Absent, + } + } +} + +pub enum EncapMeta { + Geneve(UdpPacket, GenevePacket), +} + impl From> for OpteUnified { fn from(value: OpteIn) -> Self { Self { outer_eth: Some(value.outer_eth), - outer_v6: Some(value.outer_v6), + outer_v6: Some(L3::Ipv6(value.outer_v6)), outer_udp: Some(value.outer_udp), outer_encap: Some(value.outer_encap), inner_eth: value.inner_eth, @@ -329,17 +412,237 @@ impl From> for OpteUnified { } } -pub struct PacketMeta3( - pub IngotParsed, T>, -); +// This really needs a rethink, but also I just need to get this working... +struct PktBodyWalker { + base: Cell, T)>>, + slice: AtomicPtr>, +} + +impl Drop for PktBodyWalker { + fn drop(&mut self) { + let ptr = self.slice.load(core::sync::atomic::Ordering::Relaxed); + if !ptr.is_null() { + // Reacquire and drop. + unsafe { Box::from_raw(ptr) }; + } + } +} + +impl PktBodyWalker { + fn reify_body_segs(&self) + where + ::Chunk: ByteSlice, + { + if let Some((first, mut rest)) = self.base.take() { + // SAFETY: ByteSlice requires as part of its API + // that any implementors are stable, so we will always + // get the same view via deref. We are then consuming them + // into references which live exactly as long as their initial + // form. + // + // The next question is one of ownership. + // We know that these chunks are at least &[u8]s, they + // *will* be exclusive if ByteSliceMut is met (because they are + // sourced from an exclusive borrow on something which ownas a [u8]). + // This allows us to cast to &mut later, but not here! + let mut to_hold = vec![]; + if let Some(chunk) = first { + let as_bytes = chunk.deref(); + to_hold.push(unsafe { core::mem::transmute(as_bytes) }); + } + while let Ok(chunk) = rest.next_chunk() { + to_hold.push(unsafe { core::mem::transmute(chunk.deref()) }); + } + + let to_store = Box::into_raw(Box::new(to_hold.into_boxed_slice())); + + self.slice + .compare_exchange( + core::ptr::null_mut(), + to_store, + core::sync::atomic::Ordering::Relaxed, + core::sync::atomic::Ordering::Relaxed, + ) + .expect("apparent concurrent access to body_seg memoiser"); + } + } + + fn body_segs(&self) -> &[&[u8]] + where + T::Chunk: ByteSlice, + { + self.reify_body_segs(); + + let slice_ptr = self.slice.load(core::sync::atomic::Ordering::Relaxed); + assert!(!slice_ptr.is_null()); + + // let use_ref: &[_] = &b; + unsafe { + let a = (&*(*slice_ptr)) as *const _; + core::mem::transmute(a) + } + } + + fn body_segs_mut(&mut self) -> &mut [&mut [u8]] + where + T::Chunk: ByteSliceMut, + { + self.reify_body_segs(); + + let slice_ptr = self.slice.load(core::sync::atomic::Ordering::Relaxed); + assert!(!slice_ptr.is_null()); + + // SAFETY: We have an exclusive reference, and the ByteSliceMut + // bound guarantees that this packet view was construced from + // an exclusive reference. In turn, we know that we are the only + // possible referent. + unsafe { + let a = (&mut *(*slice_ptr)) as *mut _; + core::mem::transmute(a) + } + } +} + +pub struct PacketHeaders { + headers: OpteUnified, + initial_lens: OpteUnifiedLengths, + body: PktBodyWalker, +} + +impl From, T>> for PacketHeaders { + fn from(value: IngotParsed, T>) -> Self { + let IngotParsed { stack: HeaderStack(headers), data, last_chunk } = + value; + let initial_lens = OpteUnifiedLengths { + outer_eth: headers.outer_eth.packet_length(), + outer_l3: headers.outer_v6.packet_length(), + outer_encap: headers.outer_udp.packet_length() + + headers.outer_encap.packet_length(), + inner_eth: headers.inner_eth.packet_length(), + inner_l3: headers.inner_l3.packet_length(), + inner_ulp: headers.inner_ulp.packet_length(), + }; + let body = PktBodyWalker { + base: Some((last_chunk, data)).into(), + slice: Default::default(), + }; + Self { headers, initial_lens, body } + } +} + +impl core::fmt::Debug for PacketHeaders { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.write_str("PacketHeaders(..)") + } +} + +pub fn ulp_src_port(pkt: &Ulp) -> Option { + match pkt { + Ulp::Tcp(t) => Some(t.source()), + Ulp::Udp(t) => Some(t.source()), + _ => None, + } +} + +pub fn ulp_dst_port(pkt: &Ulp) -> Option { + match pkt { + Ulp::Tcp(t) => Some(t.destination()), + Ulp::Udp(t) => Some(t.destination()), + _ => None, + } +} + +impl PacketHeaders { + pub fn outer_ether(&self) -> Option<&EthernetPacket> { + self.headers.outer_eth.as_ref() + } + + pub fn inner_ether(&self) -> &EthernetPacket { + &self.headers.inner_eth + } -impl PacketMeta3 { pub fn inner_l3(&self) -> Option<&ingot::example_chain::L3> { - self.0.headers().inner_l3.as_ref() + self.headers.inner_l3.as_ref() } pub fn inner_ulp(&self) -> Option<&ingot::example_chain::Ulp> { - self.0.headers().inner_ulp.as_ref() + self.headers.inner_ulp.as_ref() + } + + pub fn inner_ip4(&self) -> Option<&Ipv4Packet> { + self.inner_l3().and_then(|v| match v { + L3::Ipv4(v) => Some(v), + _ => None, + }) + } + + pub fn inner_ip6(&self) -> Option<&Ipv6Packet> { + self.inner_l3().and_then(|v| match v { + L3::Ipv6(v) => Some(v), + _ => None, + }) + } + + pub fn inner_icmp(&self) -> Option<&IcmpV4Packet> { + self.inner_ulp().and_then(|v| match v { + Ulp::IcmpV4(v) => Some(v), + _ => None, + }) + } + + pub fn inner_icmp6(&self) -> Option<&IcmpV6Packet> { + self.inner_ulp().and_then(|v| match v { + Ulp::IcmpV6(v) => Some(v), + _ => None, + }) + } + + pub fn inner_tcp(&self) -> Option<&TcpPacket> { + self.inner_ulp().and_then(|v| match v { + Ulp::Tcp(v) => Some(v), + _ => None, + }) + } + + pub fn inner_udp(&self) -> Option<&UdpPacket> { + self.inner_ulp().and_then(|v| match v { + Ulp::Udp(v) => Some(v), + _ => None, + }) + } + + pub fn is_inner_tcp(&self) -> bool { + matches!(self.inner_ulp(), Some(Ulp::Tcp(_))) + } + + pub fn body_segs(&self) -> &[&[u8]] { + self.body.body_segs() + } + + pub fn copy_remaining(&self) -> Vec { + let base = self.body_segs(); + let len = base.iter().map(|v| v.len()).sum(); + let mut out = Vec::with_capacity(len); + for el in base { + out.extend_from_slice(el); + } + out + } + + pub fn append_remaining(&self, buf: &mut Vec) { + let base = self.body_segs(); + let len = base.iter().map(|v| v.len()).sum(); + buf.reserve_exact(len); + for el in base { + buf.extend_from_slice(el); + } + } + + pub fn body_segs_mut(&mut self) -> &mut [&mut [u8]] + where + T::Chunk: ByteSliceMut, + { + self.body.body_segs_mut() } } @@ -367,18 +670,22 @@ fn pseudo_port( chunk: &ingot::example_chain::Ulp, ) -> Option { match chunk { - Ulp::IcmpV4(pkt) if pkt.ty() == 0 || pkt.ty() == 3 => { + Ulp::IcmpV4(pkt) + if pkt.code() == 0 && (pkt.ty() == 0 || pkt.ty() == 8) => + { Some(u16::from_be_bytes(pkt.rest_of_hdr()[..2].try_into().unwrap())) } - Ulp::IcmpV6(pkt) if pkt.ty() == 128 || pkt.ty() == 129 => { + Ulp::IcmpV6(pkt) + if pkt.code() == 0 && (pkt.ty() == 128 || pkt.ty() == 129) => + { Some(u16::from_be_bytes(pkt.rest_of_hdr()[..2].try_into().unwrap())) } _ => None, } } -impl From<&PacketMeta3> for InnerFlowId { - fn from(meta: &PacketMeta3) -> Self { +impl From<&PacketHeaders> for InnerFlowId { + fn from(meta: &PacketHeaders) -> Self { let (proto, addrs) = match meta.inner_l3() { Some(L3::Ipv4(pkt)) => ( pkt.protocol().0, @@ -428,17 +735,237 @@ fn transform_parse_stage1>( // GOAL: get to an absolute minimum point where we: // - parse into an innerflowid // - use existing transforms if a ULP entry exists. +#[derive(Debug)] +pub struct Packet2 { + state: S, +} + +impl Packet2> { + pub fn new(pkt: T) -> Self + where + Initialized2: PacketState, + { + let len = pkt.len(); + Self { state: Initialized2 { len, inner: pkt } } + } +} + +impl Packet2> { + pub fn parse( + self, + dir: Direction, + net: impl NetworkParser, + ) -> Result>, ParseError> { + let Packet2 { state: Initialized2 { len, inner } } = self; + let mut meta = match dir { + Direction::Out => net.parse_outbound(inner)?, + Direction::In => net.parse_inbound(inner)?, + }; + + let flow = (&meta).into(); + + let body_csum = match (&meta.headers).inner_eth.ethertype() { + Ethertype::ARP => Memoised::Known(None), + Ethertype::IPV4 | Ethertype::IPV6 => Memoised::Uninit, + _ => return Err(IngotParseErr::Unwanted.into()), + }; + + let state = Parsed2 { + meta, + flow, + body_csum, + l4_hash: Memoised::Uninit, + body_modified: false, + len, + }; + + Ok(Packet2 { state }) + } +} + +impl Packet2> { + pub fn meta(&self) -> &PacketHeaders { + &self.state.meta + } + + pub fn meta_mut(&mut self) -> &mut PacketHeaders { + &mut self.state.meta + } + + pub fn emit_spec(&self) -> EmitSpec { + todo!() + } + pub fn len(&self) -> usize { + self.state.len + } + + pub fn flow(&self) -> &InnerFlowId { + &self.state.flow + } + + /// Run the [`HdrTransform`] against this packet. + #[inline] + pub fn hdr_transform( + &mut self, + xform: &HdrTransform, + ) -> Result<(), HdrTransformError> { + xform.run(&mut self.state.meta)?; + // Given that n_transform layers is 1 or 2, probably won't + // save too much by trying to tie to a generation number. + // TODO: profile. + self.state.flow = InnerFlowId::from(self.meta()); + Ok(()) + } + + /// Run the [`BodyTransform`] against this packet. + pub fn body_transform( + &mut self, + dir: Direction, + xform: &dyn BodyTransform, + ) -> Result<(), BodyTransformError> { + // We set the flag now with the assumption that the transform + // could fail after modifying part of the body. In the future + // we could have something more sophisticated that only sets + // the flag if at least one byte was modified, but for now + // this does the job as nothing that needs top performance + // should make use of body transformations. + self.state.body_modified = true; + + match self.body_segs_mut() { + Some(mut body_segs) => Err(BodyTransformError::Todo("huh".into())), + // Some(mut body_segs) => xform.run(dir, &mut body_segs), + None => { + self.state.body_modified = false; + Err(BodyTransformError::NoPayload) + } + } + } + + #[inline] + pub fn body_segs(&self) -> Option<&[&[u8]]> { + // TODO. Not needed for today's d'plane. + None + } + + #[inline] + pub fn body_segs_mut(&mut self) -> Option<&mut [&mut [u8]]> { + // TODO. Not needed for today's d'plane. + None + } + + pub fn mblk_addr(&self) -> uintptr_t { + // TODO. + 0 + } + + pub fn body_csum(&mut self) -> Option { + *self.state.body_csum.get(|| { + let use_pseudo = if let Some(v) = self.state.meta.inner_ulp() { + !matches!(v, Ulp::IcmpV4(_)) + } else { + false + }; + + // XXX TODO: make these valid even AFTER all packet pushings occur. + let pseudo_csum = match (&self.state.meta.headers) + .inner_eth + .ethertype() + { + // ARP + Ethertype::ARP => { + return None; + } + // Ipv4 + Ethertype::IPV4 => { + let h = &self.state.meta.headers; + let mut pseudo_hdr_bytes = [0u8; 12]; + let Some(L3::Ipv4(ref v4)) = h.inner_l3 else { panic!() }; + pseudo_hdr_bytes[0..4] + .copy_from_slice(&v4.source().octets()); + pseudo_hdr_bytes[4..8] + .copy_from_slice(&v4.destination().octets()); + pseudo_hdr_bytes[9] = v4.protocol().0; + let ulp_len = v4.total_len() - 4 * (v4.ihl() as u16); + pseudo_hdr_bytes[10..] + .copy_from_slice(&ulp_len.to_be_bytes()); + + Checksum::compute(&pseudo_hdr_bytes) + } + // Ipv6 + Ethertype::IPV6 => { + let h = &self.state.meta.headers; + let mut pseudo_hdr_bytes = [0u8; 40]; + let Some(L3::Ipv6(ref v6)) = h.inner_l3 else { panic!() }; + pseudo_hdr_bytes[0..16] + .copy_from_slice(&v6.source().octets()); + pseudo_hdr_bytes[16..32] + .copy_from_slice(&v6.destination().octets()); + pseudo_hdr_bytes[39] = v6.next_header().0; + let ulp_len = v6.payload_len() as u32; + pseudo_hdr_bytes[32..36] + .copy_from_slice(&ulp_len.to_be_bytes()); + Checksum::compute(&pseudo_hdr_bytes) + } + _ => unreachable!(), + }; + + self.state.meta.inner_ulp().and_then(csum_minus_hdr).map(|mut v| { + if use_pseudo { + v -= pseudo_csum; + } + v + }) + }) + } + + pub fn l4_hash(&mut self) -> u32 { + *self.state.l4_hash.get(|| { + let mut hasher = crc32fast::Hasher::new(); + self.state.flow.hash(&mut hasher); + hasher.finalize() + }) + } + + pub fn set_l4_hash(&mut self, hash: u32) { + self.state.l4_hash.set(hash); + } +} + +/// The type state of a packet that has been initialized and allocated, but +/// about which nothing else is known besides the length. +#[derive(Debug)] +pub struct Initialized2 { + // Total length of packet, in bytes. This is equal to the sum of + // the length of the _initialized_ window in all the segments + // (`b_wptr - b_rptr`). + len: usize, + + inner: T, +} + +impl PacketState for Initialized2 {} +impl PacketState for Parsed2 {} + +/// Zerocopy view onto a parsed packet, acompanied by locally +/// computed state. pub struct Parsed2 { - // len: usize, - pub meta: PacketMeta3, - pub flow: InnerFlowId, - pub body_csum: Option, - pub l4_hash: Option, - // body: BodyInfo, - // body_modified: bool, + len: usize, + meta: PacketHeaders, + flow: InnerFlowId, + body_csum: Memoised>, + l4_hash: Memoised, + body_modified: bool, } +// Needed for now to account for not wanting to redesign ActionDescs +// to be generic over T (trait object safety rules, etc.). +pub type PacketMeta3<'a> = Parsed2>; +pub type PacketHeaders2<'a> = PacketHeaders>; + +pub type InitMblk<'a> = Initialized2>; +pub type ParsedMblk<'a> = Parsed2>; + fn csum_minus_hdr(ulp: &Ulp) -> Option { match ulp { Ulp::IcmpV4(icmp) => { @@ -517,71 +1044,87 @@ fn csum_minus_hdr(ulp: &Ulp) -> Option { } } -impl Parsed2 -// where T::Chunk: ByteSliceMut -{ - pub fn parse(pkt: T, dir: Direction) -> ParseResult { - let mut meta = PacketMeta3(match dir { - Direction::In => { - OpteIn::parse_read(pkt).map(transform_parse_stage1) - } - Direction::Out => { - OpteOut::parse_read(pkt).map(transform_parse_stage1) - } - }?); +trait QueryLen { + fn len(&self) -> usize; +} - let flow = (&meta).into(); +impl<'a> QueryLen for MsgBlkIterMut<'a> { + fn len(&self) -> usize { + let own_blk_len = self + .curr + .map(|v| unsafe { + let v = v.as_ref(); + v.b_wptr.offset_from(v.b_rptr) as usize + }) + .unwrap_or_default(); - let use_pseudo = if let Some(v) = meta.inner_ulp() { - !matches!(v, Ulp::IcmpV4(_)) - } else { - false - }; + own_blk_len + self.next_iter().map(|v| v.len()).sum::() + } +} - let pseudo_csum = match meta.0.headers().inner_eth.ethertype() { - // ARP - Ethertype::ARP => { - return Ok(Self { meta, body_csum: None, flow, l4_hash: None }); - } - // Ipv4 - Ethertype::IPV4 => { - let h = meta.0.headers(); - let mut pseudo_hdr_bytes = [0u8; 12]; - let Some(L3::Ipv4(ref v4)) = h.inner_l3 else { panic!() }; - pseudo_hdr_bytes[0..4].copy_from_slice(&v4.source().octets()); - pseudo_hdr_bytes[4..8] - .copy_from_slice(&v4.destination().octets()); - pseudo_hdr_bytes[9] = v4.protocol().0; - let ulp_len = v4.total_len() - 4 * (v4.ihl() as u16); - pseudo_hdr_bytes[10..].copy_from_slice(&ulp_len.to_be_bytes()); - - Checksum::compute(&pseudo_hdr_bytes) - } - // Ipv6 - Ethertype::IPV6 => { - let h = meta.0.headers(); - let mut pseudo_hdr_bytes = [0u8; 40]; - let Some(L3::Ipv6(ref v6)) = h.inner_l3 else { panic!() }; - pseudo_hdr_bytes[0..16].copy_from_slice(&v6.source().octets()); - pseudo_hdr_bytes[16..32] - .copy_from_slice(&v6.destination().octets()); - pseudo_hdr_bytes[39] = v6.next_header().0; - let ulp_len = v6.payload_len() as u32; - pseudo_hdr_bytes[32..36] - .copy_from_slice(&ulp_len.to_be_bytes()); - Checksum::compute(&pseudo_hdr_bytes) - } - _ => return Err(IngotParseErr::Unwanted), - }; +pub enum Emitter { + Repr(Box), + Cached(Arc<[u8]>), +} - let body_csum = - meta.inner_ulp().and_then(csum_minus_hdr).map(|mut v| { - if use_pseudo { - v -= pseudo_csum; - } - v - }); +// TODO: don't really care about pushing 'inner' reprs today. +pub struct OpteEmit { + outer_eth: Emitter, + outer_ip: Emitter, + outer_encap: Emitter<(Udp, Geneve)>, +} + +pub struct EmitSpec { + pub rewind: usize, + pub push_spec: OpteEmit, +} + +#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq, Ord, PartialOrd, Default)] +pub enum Memoised { + #[default] + Uninit, + Known(T), +} + +impl Memoised { + pub fn get(&mut self, or: impl FnOnce() -> T) -> &T { + if self.try_get().is_none() { + self.set(or()); + } + + self.try_get().unwrap() + } + + pub fn try_get(&self) -> Option<&T> { + match self { + Memoised::Uninit => None, + Memoised::Known(v) => Some(v), + } + } + + pub fn set(&mut self, val: T) { + *self = Self::Known(val); + } +} - Ok(Self { meta, flow, body_csum, l4_hash: None }) +impl QueryEcho for IcmpV4Packet { + fn echo_id(&self) -> Option { + match (self.code(), self.ty()) { + (0, 0) | (0, 8) => Some(u16::from_be_bytes( + self.rest_of_hdr()[..2].try_into().unwrap(), + )), + _ => None, + } + } +} + +impl QueryEcho for IcmpV6Packet { + fn echo_id(&self) -> Option { + match (self.code(), self.ty()) { + (0, 128) | (0, 129) => Some(u16::from_be_bytes( + self.rest_of_hdr()[..2].try_into().unwrap(), + )), + _ => None, + } } } diff --git a/lib/opte/src/engine/ioctl.rs b/lib/opte/src/engine/ioctl.rs index 0a731fbc..56f56c66 100644 --- a/lib/opte/src/engine/ioctl.rs +++ b/lib/opte/src/engine/ioctl.rs @@ -170,14 +170,14 @@ pub struct RuleDump { pub action: String, } -pub fn dump_layer( +pub fn dump_layer( port: &Port, req: &DumpLayerReq, ) -> Result { port.dump_layer(&req.name) } -pub fn dump_tcp_flows( +pub fn dump_tcp_flows( port: &Port, _req: &DumpTcpFlowsReq, ) -> Result { diff --git a/lib/opte/src/engine/ip4.rs b/lib/opte/src/engine/ip4.rs index 5f7413af..9611c00a 100644 --- a/lib/opte/src/engine/ip4.rs +++ b/lib/opte/src/engine/ip4.rs @@ -521,7 +521,7 @@ impl<'a> RawHeader<'a> for Ipv4HdrRaw { #[inline] fn new_mut(src: &mut [u8]) -> Result, ReadErr> { debug_assert_eq!(src.len(), Self::SIZE); - let hdr = match Ref::new(src) { + let hdr = match Ref::from_bytes(src).ok() { Some(hdr) => hdr, None => return Err(ReadErr::BadLayout), }; diff --git a/lib/opte/src/engine/layer.rs b/lib/opte/src/engine/layer.rs index bfdb053c..d709b59f 100644 --- a/lib/opte/src/engine/layer.rs +++ b/lib/opte/src/engine/layer.rs @@ -11,6 +11,11 @@ use super::flow_table::FlowEntry; use super::flow_table::FlowTable; use super::flow_table::FlowTableDump; use super::flow_table::FLOW_DEF_EXPIRE_SECS; +use super::ingot_packet::MsgBlk; +use super::ingot_packet::Packet2; +use super::ingot_packet::PacketHeaders2; +use super::ingot_packet::Parsed2; +use super::ingot_packet::ParsedMblk; use super::ioctl; use super::ioctl::ActionDescEntryDump; use super::packet::BodyTransformError; @@ -54,6 +59,7 @@ use core::num::NonZeroU32; use core::result; use illumos_sys_hdrs::c_char; use illumos_sys_hdrs::uintptr_t; +use ingot::types::Read; use kstat_macro::KStatProvider; use opte_api::Direction; @@ -128,7 +134,7 @@ pub enum LayerResult { reason: DenyReason, }, #[leaf] - Hairpin(Packet), + Hairpin(MsgBlk), HandlePkt, } @@ -799,7 +805,7 @@ impl Layer { &mut self, ectx: &ExecCtx, dir: Direction, - pkt: &mut Packet, + pkt: &mut Packet2, xforms: &mut Transforms, ameta: &mut ActionMeta, ) -> result::Result { @@ -817,7 +823,7 @@ impl Layer { fn process_in( &mut self, ectx: &ExecCtx, - pkt: &mut Packet, + pkt: &mut Packet2, xforms: &mut Transforms, ameta: &mut ActionMeta, ) -> result::Result { @@ -887,17 +893,14 @@ impl Layer { fn process_in_rules( &mut self, ectx: &ExecCtx, - pkt: &mut Packet, + pkt: &mut Packet2, xforms: &mut Transforms, ameta: &mut ActionMeta, ) -> result::Result { use Direction::In; self.stats.vals.in_lft_miss += 1; - let mut rdr = pkt.get_body_rdr(); - let rule = - self.rules_in.find_match(pkt.flow(), pkt.meta(), ameta, &mut rdr); - let _ = rdr.finish(); + let rule = self.rules_in.find_match(pkt.flow(), pkt.meta(), ameta); let action = if let Some(rule) = rule { self.stats.vals.in_rule_match += 1; @@ -1085,23 +1088,16 @@ impl Layer { } Action::Hairpin(action) => { - let mut rdr = pkt.get_body_rdr(); - match action.gen_packet(pkt.meta(), &mut rdr) { - Ok(aord) => match aord { - AllowOrDeny::Allow(pkt) => { - let _ = rdr.finish(); - Ok(LayerResult::Hairpin(pkt)) - } - - AllowOrDeny::Deny => Ok(LayerResult::Deny { - name: self.name, - reason: DenyReason::Action, - }), - }, - + match action.gen_packet(pkt.meta()) { + Ok(AllowOrDeny::Allow(pkt)) => { + Ok(LayerResult::Hairpin(pkt)) + } + Ok(AllowOrDeny::Deny) => Ok(LayerResult::Deny { + name: self.name, + reason: DenyReason::Action, + }), Err(e) => { // XXX SDT probe, error stat, log - let _ = rdr.finish(); Err(LayerError::GenPacket(e)) } } @@ -1114,7 +1110,7 @@ impl Layer { fn process_out( &mut self, ectx: &ExecCtx, - pkt: &mut Packet, + pkt: &mut Packet2, xforms: &mut Transforms, ameta: &mut ActionMeta, ) -> result::Result { @@ -1184,17 +1180,14 @@ impl Layer { fn process_out_rules( &mut self, ectx: &ExecCtx, - pkt: &mut Packet, + pkt: &mut Packet2, xforms: &mut Transforms, ameta: &mut ActionMeta, ) -> result::Result { use Direction::Out; self.stats.vals.out_lft_miss += 1; - let mut rdr = pkt.get_body_rdr(); - let rule = - self.rules_out.find_match(pkt.flow(), pkt.meta(), ameta, &mut rdr); - let _ = rdr.finish(); + let rule = self.rules_out.find_match(pkt.flow(), pkt.meta(), ameta); let action = if let Some(rule) = rule { self.stats.vals.out_rule_match += 1; @@ -1387,23 +1380,16 @@ impl Layer { } Action::Hairpin(action) => { - let mut rdr = pkt.get_body_rdr(); - match action.gen_packet(pkt.meta(), &mut rdr) { - Ok(aord) => match aord { - AllowOrDeny::Allow(pkt) => { - let _ = rdr.finish(); - Ok(LayerResult::Hairpin(pkt)) - } - - AllowOrDeny::Deny => Ok(LayerResult::Deny { - name: self.name, - reason: DenyReason::Action, - }), - }, - + match action.gen_packet(pkt.meta()) { + Ok(AllowOrDeny::Allow(pkt)) => { + Ok(LayerResult::Hairpin(pkt)) + } + Ok(AllowOrDeny::Deny) => Ok(LayerResult::Deny { + name: self.name, + reason: DenyReason::Action, + }), Err(e) => { // XXX SDT probe, error stat, log - let _ = rdr.finish(); Err(LayerError::GenPacket(e)) } } @@ -1620,18 +1606,14 @@ impl<'a> RuleTable { dump } - fn find_match<'b, R>( + fn find_match<'b>( &mut self, ifid: &InnerFlowId, - pmeta: &PacketMeta, + pmeta: &PacketHeaders2, ameta: &ActionMeta, - rdr: &'b mut R, - ) -> Option<&Rule> - where - R: PacketRead<'a>, - { + ) -> Option<&Rule> { for rte in self.rules.iter_mut() { - if rte.rule.is_match(pmeta, ameta, rdr) { + if rte.rule.is_match(pmeta, ameta) { rte.hits += 1; Self::rule_match_probe( self.port_c.as_c_str(), @@ -1912,12 +1894,9 @@ mod test { // The pkt/rdr aren't actually used in this case. let pkt = Packet::copy(&[0xA]); - let mut rdr = pkt.get_rdr(); let ameta = ActionMeta::new(); let ifid = InnerFlowId::from(&pmeta); - assert!(rule_table - .find_match(&ifid, &pmeta, &ameta, &mut rdr) - .is_some()); + assert!(rule_table.find_match(&ifid, &pmeta, &ameta).is_some()); } } // TODO Reinstate diff --git a/lib/opte/src/engine/mod.rs b/lib/opte/src/engine/mod.rs index c18d5dde..f461e8ad 100644 --- a/lib/opte/src/engine/mod.rs +++ b/lib/opte/src/engine/mod.rs @@ -44,6 +44,14 @@ pub mod ingot_packet; use alloc::string::String; use core::fmt; use core::num::ParseIntError; +use ingot::types::Parsed as IngotParsed; +use ingot::types::Read; +use ingot_packet::MsgBlk; +use ingot_packet::OpteOut; +use ingot_packet::Packet2; +use ingot_packet::PacketHeaders; +use ingot_packet::Parsed2; +use ingot_packet::ParsedMblk; use ip4::IpError; pub use opte_api::Direction; @@ -178,18 +186,11 @@ cfg_if! { pub use dbg_macro as dbg; pub use err_macro as err; -use crate::engine::ether::EtherType; use crate::engine::flow_table::FlowTable; -use crate::engine::ip4::Protocol; -use crate::engine::packet::HeaderOffsets; use crate::engine::packet::Initialized; use crate::engine::packet::InnerFlowId; use crate::engine::packet::Packet; -use crate::engine::packet::PacketInfo; -use crate::engine::packet::PacketMeta; -use crate::engine::packet::PacketReaderMut; use crate::engine::packet::ParseError; -use crate::engine::packet::Parsed; use crate::engine::port::UftEntry; /// The action to take for a single packet, based on the processing of @@ -207,7 +208,7 @@ pub enum HdlPktAction { /// input packet. /// /// The input packet is dropped. - Hairpin(Packet), + Hairpin(MsgBlk), } /// Some type of problem occurred during [`NetworkImpl::handle_pkt()`] @@ -271,13 +272,15 @@ pub trait NetworkImpl { /// myriad of reasons. The error returned is for informational /// purposes, rather than having any obvious direct action to take /// in response. - fn handle_pkt( + fn handle_pkt( &self, dir: Direction, - pkt: &mut Packet, + pkt: &mut Packet2>, uft_in: &FlowTable>, uft_out: &FlowTable>, - ) -> Result; + ) -> Result + where + T: Read; /// Return the parser for this network implementation. fn parser(&self) -> Self::Parser; @@ -290,21 +293,21 @@ pub trait NetworkImpl { pub trait NetworkParser { /// Parse an outbound packet. /// - /// An outbound packet is one traveling from the [`port::Port`] + /// An outbound packet is one travelling from the [`port::Port`] /// client to the network. - fn parse_outbound( + fn parse_outbound( &self, - rdr: &mut PacketReaderMut, - ) -> Result; + rdr: T, + ) -> Result, ParseError>; /// Parse an inbound packet. /// /// An inbound packet is one traveling from the network to the /// [`port::Port`] client. - fn parse_inbound( + fn parse_inbound( &self, - rdr: &mut PacketReaderMut, - ) -> Result; + rdr: T, + ) -> Result, ParseError>; } /// A generic ULP parser, useful for testing inside of the opte crate @@ -314,80 +317,34 @@ pub struct GenericUlp {} impl GenericUlp { /// Parse a generic L2 + L3 + L4 packet, storing the headers in /// the inner position. - fn parse_ulp( + fn parse_ulp( &self, - rdr: &mut PacketReaderMut, - ) -> Result { - let mut meta = PacketMeta::default(); - let mut offsets = HeaderOffsets::default(); - - let (ether_hi, _ether_hdr) = Packet::parse_ether(rdr)?; - meta.inner.ether = ether_hi.meta; - offsets.inner.ether = ether_hi.offset; - let ether_type = ether_hi.meta.ether_type; - - let (ip_hi, pseudo_csum) = match ether_type { - EtherType::Arp => { - return Ok(PacketInfo { - meta, - offsets, - body_csum: None, - extra_hdr_space: None, - }); - } - - EtherType::Ipv4 => { - let (ip_hi, hdr) = Packet::parse_ip4(rdr)?; - (ip_hi, hdr.pseudo_csum()) - } - - EtherType::Ipv6 => { - let (ip_hi, hdr) = Packet::parse_ip6(rdr)?; - (ip_hi, hdr.pseudo_csum()) - } - - _ => return Err(ParseError::UnexpectedEtherType(ether_type)), + rdr: T, + ) -> Result, ParseError> { + let stage1 = OpteOut::parse_read(rdr)?; + + let meta = IngotParsed { + stack: ingot::types::HeaderStack(stage1.stack.0.into()), + data: stage1.data, + last_chunk: stage1.last_chunk, }; - meta.inner.ip = Some(ip_hi.meta); - offsets.inner.ip = Some(ip_hi.offset); - - let (ulp_hi, ulp_hdr) = match ip_hi.meta.proto() { - Protocol::ICMP => Packet::parse_icmp(rdr)?, - Protocol::ICMPv6 => Packet::parse_icmp6(rdr)?, - Protocol::TCP => Packet::parse_tcp(rdr)?, - Protocol::UDP => Packet::parse_udp(rdr)?, - proto => return Err(ParseError::UnexpectedProtocol(proto)), - }; - - let use_pseudo = ulp_hi.meta.is_pseudoheader_in_csum(); - meta.inner.ulp = Some(ulp_hi.meta); - offsets.inner.ulp = Some(ulp_hi.offset); - let body_csum = if let Some(mut csum) = ulp_hdr.csum_minus_hdr() { - if use_pseudo { - csum -= pseudo_csum; - } - Some(csum) - } else { - None - }; - - Ok(PacketInfo { meta, offsets, body_csum, extra_hdr_space: None }) + Ok(meta.into()) } } impl NetworkParser for GenericUlp { - fn parse_inbound( + fn parse_inbound( &self, - rdr: &mut PacketReaderMut, - ) -> Result { + rdr: T, + ) -> Result, ParseError> { self.parse_ulp(rdr) } - fn parse_outbound( + fn parse_outbound( &self, - rdr: &mut PacketReaderMut, - ) -> Result { + rdr: T, + ) -> Result, ParseError> { self.parse_ulp(rdr) } } diff --git a/lib/opte/src/engine/nat.rs b/lib/opte/src/engine/nat.rs index d51f3c22..5d933f55 100644 --- a/lib/opte/src/engine/nat.rs +++ b/lib/opte/src/engine/nat.rs @@ -8,6 +8,8 @@ use super::headers::HeaderAction; use super::headers::IpMod; +use super::ingot_packet::Packet2; +use super::ingot_packet::ParsedMblk; use super::packet::InnerFlowId; use super::packet::Packet; use super::packet::Parsed; @@ -86,7 +88,7 @@ impl StatefulAction for OutboundNat { fn gen_desc( &self, flow_id: &InnerFlowId, - _pkt: &Packet, + _pkt: &Packet2, _meta: &mut ActionMeta, ) -> rule::GenDescResult { // When we have several external IPs at our disposal, we are @@ -149,7 +151,7 @@ impl StatefulAction for InboundNat { fn gen_desc( &self, flow_id: &InnerFlowId, - _pkt: &Packet, + _pkt: &Packet2, _meta: &mut ActionMeta, ) -> rule::GenDescResult { // We rely on the attached predicates to filter out IPs which are *not* diff --git a/lib/opte/src/engine/packet.rs b/lib/opte/src/engine/packet.rs index b20d1a63..cbe7b440 100644 --- a/lib/opte/src/engine/packet.rs +++ b/lib/opte/src/engine/packet.rs @@ -53,9 +53,6 @@ use crate::d_error::DError; use core::fmt; use core::fmt::Display; use core::hash::Hash; -use core::marker::PhantomData; -use core::ops::Deref; -use core::ops::DerefMut; use core::ptr; use core::ptr::NonNull; use core::result; @@ -79,8 +76,6 @@ use alloc::vec::Vec; use illumos_sys_hdrs::dblk_t; use illumos_sys_hdrs::mblk_t; use illumos_sys_hdrs::uintptr_t; -use zerocopy::ByteOrder; -use zerocopy::NetworkEndian; cfg_if! { if #[cfg(all(not(feature = "std"), not(test)))] { @@ -1038,181 +1033,181 @@ impl Packet { Ok((HdrInfo { meta, offset }, geneve)) } - pub fn parse( - mut self, - dir: Direction, - net: impl NetworkParser, - ) -> Result, ParseError> { - let mut rdr = self.get_rdr_mut(); - - let mut info = match dir { - Direction::Out => net.parse_outbound(&mut rdr)?, - Direction::In => net.parse_inbound(&mut rdr)?, - }; - - let (pkt_offset, mut seg_index, mut seg_offset, end_of_seg) = - rdr.finish(); - - // If we finished on the end of a segment, and there are more - // segments to go, then bump the segment index and reset the - // segment offset to properly indicate the start of the body. - if end_of_seg && ((seg_index + 1) < self.segs.len()) { - seg_index += 1; - seg_offset = 0; - } - - assert!( - self.state.len >= pkt_offset, - "{} >= {}", - self.state.len, - pkt_offset, - ); - - let ulp_hdr_len = info.meta.inner.ulp.map(|u| u.hdr_len()).unwrap_or(0); - let body_len = match info.meta.inner.ip { - // If we have IP and ULP metadata, we can use those to compute - // the payload length. - // If there's no ULP, just return the L3 payload length. - Some(IpMeta::Ip4(ip4)) => { - // Total length here refers to the n_bytes in this packet, - // so we won't get bogus overly long values in case of - // fragmentation. - let expected = ip4.hdr_len() + ulp_hdr_len; - - usize::from(ip4.total_len).checked_sub(expected).ok_or( - ParseError::BadInnerIpLen { - expected, - actual: usize::from(ip4.total_len), - }, - )? - } - Some(IpMeta::Ip6(ip6)) => usize::from(ip6.pay_len) - .checked_sub(ulp_hdr_len) - .ok_or(ParseError::BadInnerIpLen { - expected: ulp_hdr_len, - actual: usize::from(ip6.pay_len), - })?, - - // If there's no IP metadata, we fallback to considering any - // remaining bytes in the packet buffer to be the body. - None => self.state.len - pkt_offset, - }; - let mut body = - BodyInfo { pkt_offset, seg_index, seg_offset, len: body_len }; - let flow = InnerFlowId::from(&info.meta); - - // Packet processing logic requires all headers to be in the leading - // segment. Detect if this is not the case and squash segments - // containing headers into one segment. This value represents the - // inclusive upper bound of the squash. - let squash_to = match (body.seg_index, body.seg_offset) { - // The body is in the first segment meaning all headers are also in - // the first segment. No squashing needed. - (0, _) => 0, - - // The body starts at a zero offset in segment n. This means we need - // to squash all segments prior to n. - (n, 0) => n - 1, - - // The body starts at a non-zero offset in segment n. This means we - // need to squash all segments up to and including n. - (n, _) => n, - }; - - // If the squash bound is zero, there is nothing left to do here, just - // return. - if squash_to == 0 { - return Ok(Packet { - avail: self.avail, - // The new packet is taking ownership of the segments. - segs: core::mem::take(&mut self.segs), - state: Parsed { - len: self.state.len, - hdr_offsets: info.offsets, - meta: info.meta, - flow, - body_csum: info.body_csum, - body, - body_modified: false, - }, - }); - } - - // Calculate the body offset within the new squashed segment - if body.seg_offset != 0 { - for s in &self.segs[..squash_to] { - body.seg_offset += s.len; - } - } - body.seg_index -= squash_to; - - // Determine how big the message block for the squashed segment needs to - // be. - let mut new_seg_size = 0; - for s in &self.segs[..squash_to + 1] { - new_seg_size += s.len; - } - - let extra_space = info.extra_hdr_space.unwrap_or(0); - let mp = allocb(new_seg_size + extra_space); - unsafe { - (*mp).b_wptr = (*mp).b_wptr.add(extra_space); - (*mp).b_rptr = (*mp).b_rptr.add(extra_space); - for s in &self.segs[..squash_to + 1] { - core::ptr::copy_nonoverlapping( - (*s.mp).b_rptr, - (*mp).b_wptr, - s.len, - ); - (*mp).b_wptr = (*mp).b_wptr.add(s.len); - } - } - - // Construct a new segment vector, tacking on any remaining segments - // after the header segments. - let orig_segs = core::mem::take(&mut self.segs); - let mut segs = vec![unsafe { PacketSeg::wrap_mblk(mp) }]; - if squash_to + 1 < orig_segs.len() { - segs[0].link(&orig_segs[squash_to + 1]); - segs.extend_from_slice(&orig_segs[squash_to + 1..]); - } - #[cfg(any(feature = "std", test))] - for s in &orig_segs[..squash_to + 1] { - mock_freeb(s.mp); - } - - let mut off = 0; - for header_offsets in [ - info.offsets.outer.ether.as_mut(), - info.offsets.outer.ip.as_mut(), - info.offsets.outer.encap.as_mut(), - Some(&mut info.offsets.inner.ether), - info.offsets.inner.ip.as_mut(), - info.offsets.inner.ulp.as_mut(), - ] - .into_iter() - .flatten() - { - header_offsets.pkt_pos = off; - header_offsets.seg_idx = 0; - header_offsets.seg_pos = off; - off += header_offsets.hdr_len; - } - - Ok(Packet { - avail: self.avail, - segs, - state: Parsed { - len: self.state.len, - hdr_offsets: info.offsets, - meta: info.meta, - flow, - body_csum: info.body_csum, - body, - body_modified: false, - }, - }) - } + // pub fn parse( + // mut self, + // dir: Direction, + // net: impl NetworkParser, + // ) -> Result, ParseError> { + // let mut rdr = self.get_rdr_mut(); + + // let mut info = match dir { + // Direction::Out => net.parse_outbound(&mut rdr)?, + // Direction::In => net.parse_inbound(&mut rdr)?, + // }; + + // let (pkt_offset, mut seg_index, mut seg_offset, end_of_seg) = + // rdr.finish(); + + // // If we finished on the end of a segment, and there are more + // // segments to go, then bump the segment index and reset the + // // segment offset to properly indicate the start of the body. + // if end_of_seg && ((seg_index + 1) < self.segs.len()) { + // seg_index += 1; + // seg_offset = 0; + // } + + // assert!( + // self.state.len >= pkt_offset, + // "{} >= {}", + // self.state.len, + // pkt_offset, + // ); + + // let ulp_hdr_len = info.meta.inner.ulp.map(|u| u.hdr_len()).unwrap_or(0); + // let body_len = match info.meta.inner.ip { + // // If we have IP and ULP metadata, we can use those to compute + // // the payload length. + // // If there's no ULP, just return the L3 payload length. + // Some(IpMeta::Ip4(ip4)) => { + // // Total length here refers to the n_bytes in this packet, + // // so we won't get bogus overly long values in case of + // // fragmentation. + // let expected = ip4.hdr_len() + ulp_hdr_len; + + // usize::from(ip4.total_len).checked_sub(expected).ok_or( + // ParseError::BadInnerIpLen { + // expected, + // actual: usize::from(ip4.total_len), + // }, + // )? + // } + // Some(IpMeta::Ip6(ip6)) => usize::from(ip6.pay_len) + // .checked_sub(ulp_hdr_len) + // .ok_or(ParseError::BadInnerIpLen { + // expected: ulp_hdr_len, + // actual: usize::from(ip6.pay_len), + // })?, + + // // If there's no IP metadata, we fallback to considering any + // // remaining bytes in the packet buffer to be the body. + // None => self.state.len - pkt_offset, + // }; + // let mut body = + // BodyInfo { pkt_offset, seg_index, seg_offset, len: body_len }; + // let flow = InnerFlowId::from(&info.meta); + + // // Packet processing logic requires all headers to be in the leading + // // segment. Detect if this is not the case and squash segments + // // containing headers into one segment. This value represents the + // // inclusive upper bound of the squash. + // let squash_to = match (body.seg_index, body.seg_offset) { + // // The body is in the first segment meaning all headers are also in + // // the first segment. No squashing needed. + // (0, _) => 0, + + // // The body starts at a zero offset in segment n. This means we need + // // to squash all segments prior to n. + // (n, 0) => n - 1, + + // // The body starts at a non-zero offset in segment n. This means we + // // need to squash all segments up to and including n. + // (n, _) => n, + // }; + + // // If the squash bound is zero, there is nothing left to do here, just + // // return. + // if squash_to == 0 { + // return Ok(Packet { + // avail: self.avail, + // // The new packet is taking ownership of the segments. + // segs: core::mem::take(&mut self.segs), + // state: Parsed { + // len: self.state.len, + // hdr_offsets: info.offsets, + // meta: info.meta, + // flow, + // body_csum: info.body_csum, + // body, + // body_modified: false, + // }, + // }); + // } + + // // Calculate the body offset within the new squashed segment + // if body.seg_offset != 0 { + // for s in &self.segs[..squash_to] { + // body.seg_offset += s.len; + // } + // } + // body.seg_index -= squash_to; + + // // Determine how big the message block for the squashed segment needs to + // // be. + // let mut new_seg_size = 0; + // for s in &self.segs[..squash_to + 1] { + // new_seg_size += s.len; + // } + + // let extra_space = info.extra_hdr_space.unwrap_or(0); + // let mp = allocb(new_seg_size + extra_space); + // unsafe { + // (*mp).b_wptr = (*mp).b_wptr.add(extra_space); + // (*mp).b_rptr = (*mp).b_rptr.add(extra_space); + // for s in &self.segs[..squash_to + 1] { + // core::ptr::copy_nonoverlapping( + // (*s.mp).b_rptr, + // (*mp).b_wptr, + // s.len, + // ); + // (*mp).b_wptr = (*mp).b_wptr.add(s.len); + // } + // } + + // // Construct a new segment vector, tacking on any remaining segments + // // after the header segments. + // let orig_segs = core::mem::take(&mut self.segs); + // let mut segs = vec![unsafe { PacketSeg::wrap_mblk(mp) }]; + // if squash_to + 1 < orig_segs.len() { + // segs[0].link(&orig_segs[squash_to + 1]); + // segs.extend_from_slice(&orig_segs[squash_to + 1..]); + // } + // #[cfg(any(feature = "std", test))] + // for s in &orig_segs[..squash_to + 1] { + // mock_freeb(s.mp); + // } + + // let mut off = 0; + // for header_offsets in [ + // info.offsets.outer.ether.as_mut(), + // info.offsets.outer.ip.as_mut(), + // info.offsets.outer.encap.as_mut(), + // Some(&mut info.offsets.inner.ether), + // info.offsets.inner.ip.as_mut(), + // info.offsets.inner.ulp.as_mut(), + // ] + // .into_iter() + // .flatten() + // { + // header_offsets.pkt_pos = off; + // header_offsets.seg_idx = 0; + // header_offsets.seg_pos = off; + // off += header_offsets.hdr_len; + // } + + // Ok(Packet { + // avail: self.avail, + // segs, + // state: Parsed { + // len: self.state.len, + // hdr_offsets: info.offsets, + // meta: info.meta, + // flow, + // body_csum: info.body_csum, + // body, + // body_modified: false, + // }, + // }) + // } pub fn seg0_wtr(&mut self) -> PacketSegWriter { self.segs[0].get_writer() @@ -1304,22 +1299,22 @@ impl Packet { Ok(Packet { avail, segs, state: Initialized { len } }) } - /// A combination of [`Self::wrap_mblk()`] followed by [`Self::parse()`]. - /// - /// This is a bit more convenient than dealing with the possible - /// error from each separately. - /// - /// # Safety - /// - /// See [`Self::wrap_mblk()`]. - pub unsafe fn wrap_mblk_and_parse( - mp: *mut mblk_t, - dir: Direction, - net: N, - ) -> Result, PacketError> { - let pkt = Self::wrap_mblk(mp)?; - pkt.parse(dir, net).map_err(PacketError::from) - } + // /// A combination of [`Self::wrap_mblk()`] followed by [`Self::parse()`]. + // /// + // /// This is a bit more convenient than dealing with the possible + // /// error from each separately. + // /// + // /// # Safety + // /// + // /// See [`Self::wrap_mblk()`]. + // pub unsafe fn wrap_mblk_and_parse( + // mp: *mut mblk_t, + // dir: Direction, + // net: N, + // ) -> Result, PacketError> { + // let pkt = Self::wrap_mblk(mp)?; + // pkt.parse(dir, net).map_err(PacketError::from) + // } } /// A packet body transformation. @@ -1658,15 +1653,15 @@ impl Packet { } /// Run the [`HdrTransform`] against this packet. - #[inline] - pub fn hdr_transform( - &mut self, - xform: &HdrTransform, - ) -> Result<(), HdrTransformError> { - xform.run(&mut self.state.meta)?; - self.state.flow = InnerFlowId::from(&self.state.meta); - Ok(()) - } + // #[inline] + // pub fn hdr_transform( + // &mut self, + // xform: &HdrTransform, + // ) -> Result<(), HdrTransformError> { + // xform.run(&mut self.state.meta)?; + // self.state.flow = InnerFlowId::from(&self.state.meta); + // Ok(()) + // } /// Return a reference to the flow ID of this packet. #[inline] @@ -2552,6 +2547,9 @@ impl From for PacketError { #[derive(Clone, Debug, Eq, PartialEq, DError)] #[derror(leaf_data = ParseError::data)] pub enum ParseError { + // TODO: make this far richer... + #[leaf] + IngotError(ingot::types::ParseError), BadHeader(HeaderReadErr), BadInnerIpLen { expected: usize, @@ -2611,6 +2609,12 @@ impl ParseError { } } +impl From for ParseError { + fn from(value: ingot::types::ParseError) -> Self { + Self::IngotError(value) + } +} + impl From for ParseError { fn from(err: ReadErr) -> Self { Self::BadRead(err) diff --git a/lib/opte/src/engine/port.rs b/lib/opte/src/engine/port.rs index 93ada181..d8196a7b 100644 --- a/lib/opte/src/engine/port.rs +++ b/lib/opte/src/engine/port.rs @@ -16,7 +16,10 @@ use super::headers::EncapPush; use super::headers::HeaderAction; use super::headers::IpPush; use super::headers::UlpHeaderAction; +use super::ingot_packet::MsgBlk; +use super::ingot_packet::Packet2; use super::ingot_packet::Parsed2; +use super::ingot_packet::ParsedMblk; use super::ioctl; use super::ioctl::TcpFlowEntryDump; use super::ioctl::TcpFlowStateDump; @@ -31,7 +34,6 @@ use super::packet::BodyTransform; use super::packet::BodyTransformError; use super::packet::Initialized; use super::packet::InnerFlowId; -use super::packet::OuterMeta; use super::packet::Packet; use super::packet::PacketMeta; use super::packet::Parsed; @@ -69,7 +71,6 @@ use alloc::sync::Arc; use alloc::vec::Vec; use core::fmt; use core::fmt::Display; -use core::hash::Hash; use core::num::NonZeroU32; use core::result; use core::str::FromStr; @@ -93,6 +94,7 @@ use kstat_macro::KStatProvider; use opte_api::Direction; use opte_api::MacAddr; use opte_api::OpteError; +use std::process; use zerocopy::ByteSliceMut; pub type Result = result::Result; @@ -157,8 +159,10 @@ pub enum ProcessResult { reason: DropReason, }, Modified, + // TODO: it would be nice if this packet type could be user-specified, but might + // be tricky. #[leaf] - Hairpin(Packet), + Hairpin(MsgBlk), } impl From for ProcessResult { @@ -171,6 +175,43 @@ impl From for ProcessResult { } } +enum InternalProcessResult { + Bypass, + Drop { + reason: DropReason, + }, + Modified { + transform: Option>, + tcp_state: Option>>, + }, + Hairpin(MsgBlk), +} + +impl From for ProcessResult { + fn from(value: InternalProcessResult) -> Self { + match value { + InternalProcessResult::Bypass => Self::Bypass, + InternalProcessResult::Drop { reason } => Self::Drop { reason }, + InternalProcessResult::Hairpin(v) => Self::Hairpin(v), + InternalProcessResult::Modified { transform, tcp_state } => { + Self::Modified + } + } + } +} + +impl From for InternalProcessResult { + fn from(hpa: HdlPktAction) -> Self { + match hpa { + HdlPktAction::Allow => { + Self::Modified { transform: None, tcp_state: None } + } + HdlPktAction::Deny => Self::Drop { reason: DropReason::HandlePkt }, + HdlPktAction::Hairpin(pkt) => Self::Hairpin(pkt), + } + } +} + /// The reason for a packet being dropped. #[derive(Clone, Debug)] pub enum DropReason { @@ -1168,10 +1209,15 @@ impl Port { pub fn process( &self, dir: Direction, - pkt: &mut Packet, + pkt: &mut Packet2, mut ameta: ActionMeta, ) -> result::Result { let flow_before = *pkt.flow(); + // XXX: See remove_rule -- there is a 1-pkt wide TOCTOU here. + // This should probably be ordered: + // - remove - process + // * lock port * lock port + // * increment epoch(relaxed) * fetch epoch(relaxed) let epoch = self.epoch.load(SeqCst); let mut data = self.data.lock(); check_state!(data.state, [PortState::Running]) @@ -1181,6 +1227,9 @@ impl Port { let res = match dir { Direction::Out => { let res = self.process_out(&mut data, epoch, pkt, &mut ameta); + // XXX: Ideally the Kstat should be holding atmoic U64s, then we get + // out of the lock sooner. Note that we don't need to *apply* a given + // set of transforms in order to know which stats we'll modify. Self::update_stats_out(&mut data.stats.vals, &res); res } @@ -1199,350 +1248,393 @@ impl Port { }; drop(data); - // Emit the updated headers if the packet was modified as part - // of processing. - if let Ok(ProcessResult::Modified) = res { - pkt.emit_new_headers()?; + // Now, apply transforms and update TCP state. + // UFT misses will have done so already in the port lock. + match (dir, &res) { + ( + Direction::Out, + Ok(InternalProcessResult::Modified { + transform: Some(transform), + tcp_state, + }), + ) => { + // TCP, then transform? + // (I forget the order) + } + ( + Direction::In, + Ok(InternalProcessResult::Modified { + transform: Some(transform), + tcp_state, + }), + ) => { + // Transform, then TCP? + // (I forget the order) + } + _ => {} } - self.port_process_return_probe(dir, &flow_before, epoch, pkt, &res); - res + // Emit the updated headers if the packet was modified as part + // of processing. + // TODO: now contingent on caller to do this if they want it. + // Why? To prevent any copy-out for loopback packets. + // if let Ok(ProcessResult::Modified) = res { + // pkt.emit_new_headers()?; + // } + + let safe_res = res.map(Into::into); + self.port_process_return_probe( + dir, + &flow_before, + epoch, + pkt, + &safe_res, + ); + safe_res } // hope and pray we find a ULP, then use that? - pub fn thin_process( - &self, - dir: Direction, - pkt: &mut Parsed2, - ) -> result::Result - where - T::Chunk: ByteSliceMut, - { - let flow_before = pkt.flow; - // let flow_before = *pkt.flow(); - let epoch = self.epoch.load(SeqCst); - let mut data = self.data.lock(); - check_state!(data.state, [PortState::Running]) - .map_err(|_| ProcessError::BadState(data.state))?; - - let mut dirty_csum = false; - - // self.port_process_entry_probe(dir, &flow_before, epoch, pskt); - // TODO: what stats? lmao - match dir { - Direction::Out => { - // opte::engine::err!("looking up {:?} in outdir...", flow_before); - let a = data.uft_out.get(&flow_before); - let Some(a) = a else { - // eh. It will get recirc'd for free... - // opte::engine::err!("not found! Releasing!"); - return Err(ProcessError::FlowTableFull { - kind: "()", - limit: 0, - }); - }; - pkt.l4_hash = Some(a.state().l4_hash); - // opte::engine::err!("found!"); - let xforms = Arc::clone(&a.state().xforms); - Self::update_stats_out( - &mut data.stats.vals, - &Ok(ProcessResult::Modified), - ); - drop(data); - - let mut hm = pkt.meta.0.headers_mut(); - - let mut new_eth = None; - let mut new_ip = None; - let mut new_encap = None; - // opte::engine::err!("xforms {:?}!", &a.state().xforms.hdr); - for xf in &xforms.hdr { - // opte::engine::err!("xf..."); - if let HeaderAction::Push(outer_eth, _) = &xf.outer_ether { - new_eth = Some(outer_eth.clone()); - } - if let HeaderAction::Push(outer_ip, _) = &xf.outer_ip { - new_ip = Some(outer_ip.clone()); - } - if let HeaderAction::Push(outer_ec, _) = &xf.outer_encap { - new_encap = Some(outer_ec.clone()); - } - if let HeaderAction::Modify(m, _) = &xf.inner_ether { - if let Some(src) = m.src { - hm.inner_eth.set_source(src.bytes().into()); - } - if let Some(dst) = m.dst { - hm.inner_eth.set_destination(dst.bytes().into()); - } - } - if let HeaderAction::Modify(m, _) = &xf.inner_ip { - match m { - super::headers::IpMod::Ip4(v4) => { - let Some(ingot::example_chain::L3::Ipv4( - ref mut v4_t, - )) = hm.inner_l3 - else { - return Err(ProcessError::FlowTableFull { - kind: "()", - limit: 0, - }); - }; - if let Some(src) = v4.src { - dirty_csum = true; - v4_t.set_source(src.into()); - } - if let Some(dst) = v4.dst { - dirty_csum = true; - v4_t.set_destination(dst.into()); - } - } - super::headers::IpMod::Ip6(v6) => { - let Some(ingot::example_chain::L3::Ipv6( - ref mut v6_t, - )) = hm.inner_l3 - else { - return Err(ProcessError::FlowTableFull { - kind: "()", - limit: 0, - }); - }; - if let Some(src) = v6.src { - dirty_csum = true; - v6_t.set_source(src.into()); - } - if let Some(dst) = v6.dst { - dirty_csum = true; - v6_t.set_destination(dst.into()); - } - } - } - } - if let UlpHeaderAction::Modify(m) = &xf.inner_ulp { - if let Some(src) = &m.generic.src_port { - match hm.inner_ulp { - Some(Ulp::Tcp(ref mut t)) => { - dirty_csum = true; - t.set_source(*src) - } - Some(Ulp::Udp(ref mut t)) => { - dirty_csum = true; - t.set_source(*src) - } - _ => {} - } - } - if let Some(dst) = &m.generic.dst_port { - match hm.inner_ulp { - Some(Ulp::Tcp(ref mut t)) => { - dirty_csum = true; - t.set_destination(*dst) - } - Some(Ulp::Udp(ref mut t)) => { - dirty_csum = true; - t.set_destination(*dst) - } - _ => {} - } - } - if let Some(flags) = &m.tcp_flags { - match hm.inner_ulp { - Some(Ulp::Tcp(ref mut t)) => { - dirty_csum = true; - t.set_flags(TcpFlags::from_bits_retain( - *flags, - )) - } - _ => {} - } - } - if let Some(new_id) = &m.icmp_id { - match hm.inner_ulp { - Some(Ulp::IcmpV4(ref mut pkt)) - if pkt.ty() == 0 || pkt.ty() == 3 => - { - dirty_csum = true; - pkt.rest_of_hdr_mut()[..2] - .copy_from_slice(&new_id.to_be_bytes()) - } - Some(Ulp::IcmpV6(ref mut pkt)) - if pkt.ty() == 128 || pkt.ty() == 129 => - { - dirty_csum = true; - pkt.rest_of_hdr_mut()[..2] - .copy_from_slice(&new_id.to_be_bytes()) - } - _ => {} - } - } - } - } - - match (new_eth, new_ip, new_encap) { - (Some(a), Some(b), Some(c)) => { - Ok(ThinProcRes::PushEncap(a, b, c)) - } - (None, None, None) => Ok(ThinProcRes::Na), - _ => Err(ProcessError::FlowTableFull { - kind: "()", - limit: 0, - }), - } - } - - Direction::In => { - let a = data.uft_in.get(&flow_before); - let Some(a) = a else { - // eh. - return Err(ProcessError::FlowTableFull { - kind: "()", - limit: 0, - }); - }; - pkt.l4_hash = Some(a.state().l4_hash); - let xforms = Arc::clone(&a.state().xforms); - Self::update_stats_in( - &mut data.stats.vals, - &Ok(ProcessResult::Modified), - ); - drop(data); - - let mut hm = pkt.meta.0.headers_mut(); - - let mut pop_eth = false; - let mut pop_ip = false; - let mut pop_encap = false; - for xf in &xforms.hdr { - // opte::engine::err!("xf..."); - if let HeaderAction::Pop = &xf.outer_ether { - pop_eth = true; - } - if let HeaderAction::Pop = &xf.outer_ip { - pop_ip = true; - } - if let HeaderAction::Pop = &xf.outer_encap { - pop_encap = true; - } - if let HeaderAction::Modify(m, _) = &xf.inner_ether { - if let Some(src) = m.src { - hm.inner_eth.set_source(src.bytes().into()); - } - if let Some(dst) = m.dst { - hm.inner_eth.set_destination(dst.bytes().into()); - } - } - if let HeaderAction::Modify(m, _) = &xf.inner_ip { - match m { - super::headers::IpMod::Ip4(v4) => { - let Some(ingot::example_chain::L3::Ipv4( - ref mut v4_t, - )) = hm.inner_l3 - else { - return Err(ProcessError::FlowTableFull { - kind: "()", - limit: 0, - }); - }; - if let Some(src) = v4.src { - dirty_csum = true; - v4_t.set_source(src.into()); - } - if let Some(dst) = v4.dst { - dirty_csum = true; - v4_t.set_destination(dst.into()); - } - } - super::headers::IpMod::Ip6(v6) => { - let Some(ingot::example_chain::L3::Ipv6( - ref mut v6_t, - )) = hm.inner_l3 - else { - return Err(ProcessError::FlowTableFull { - kind: "()", - limit: 0, - }); - }; - if let Some(src) = v6.src { - dirty_csum = true; - v6_t.set_source(src.into()); - } - if let Some(dst) = v6.dst { - dirty_csum = true; - v6_t.set_destination(dst.into()); - } - } - } - } - if let UlpHeaderAction::Modify(m) = &xf.inner_ulp { - if let Some(src) = &m.generic.src_port { - match hm.inner_ulp { - Some(Ulp::Tcp(ref mut t)) => { - dirty_csum = true; - t.set_source(*src) - } - Some(Ulp::Udp(ref mut t)) => { - dirty_csum = true; - t.set_source(*src) - } - _ => {} - } - } - if let Some(dst) = &m.generic.dst_port { - match hm.inner_ulp { - Some(Ulp::Tcp(ref mut t)) => { - dirty_csum = true; - t.set_destination(*dst) - } - Some(Ulp::Udp(ref mut t)) => { - dirty_csum = true; - t.set_destination(*dst) - } - _ => {} - } - } - if let Some(flags) = &m.tcp_flags { - match hm.inner_ulp { - Some(Ulp::Tcp(ref mut t)) => { - dirty_csum = true; - t.set_flags(TcpFlags::from_bits_retain( - *flags, - )) - } - _ => {} - } - } - if let Some(new_id) = &m.icmp_id { - match hm.inner_ulp { - Some(Ulp::IcmpV4(ref mut pkt)) - if pkt.ty() == 0 || pkt.ty() == 3 => - { - dirty_csum = true; - pkt.rest_of_hdr_mut()[..2] - .copy_from_slice(&new_id.to_be_bytes()) - } - Some(Ulp::IcmpV6(ref mut pkt)) - if pkt.ty() == 128 || pkt.ty() == 129 => - { - dirty_csum = true; - pkt.rest_of_hdr_mut()[..2] - .copy_from_slice(&new_id.to_be_bytes()) - } - _ => {} - } - } - } - } - - match (pop_eth, pop_ip, pop_encap) { - (true, true, true) => Ok(ThinProcRes::PopEncap), - (false, false, false) => Ok(ThinProcRes::Na), - _ => Err(ProcessError::FlowTableFull { - kind: "()", - limit: 0, - }), - } - } - } - } + // pub fn thin_process( + // &self, + // dir: Direction, + // pkt: &mut Parsed2, + // ) -> result::Result + // where + // T::Chunk: ByteSliceMut, + // { + // let flow_before = pkt.flow(); + // // let flow_before = *pkt.flow(); + // let _epoch = self.epoch.load(SeqCst); + // let mut data = self.data.lock(); + // check_state!(data.state, [PortState::Running]) + // .map_err(|_| ProcessError::BadState(data.state))?; + + // let mut dirty_csum = false; + + // // self.port_process_entry_probe(dir, &flow_before, epoch, pskt); + // // TODO: what stats? lmao + // match dir { + // Direction::Out => { + // // opte::engine::err!("looking up {:?} in outdir...", flow_before); + // let a = data.uft_out.get(&flow_before); + // let Some(a) = a else { + // // eh. It will get recirc'd for free... + // // opte::engine::err!("not found! Releasing!"); + // return Err(ProcessError::FlowTableFull { + // kind: "()", + // limit: 0, + // }); + // }; + // pkt.set_l4_hash(a.state().l4_hash); + // // opte::engine::err!("found!"); + // let xforms = Arc::clone(&a.state().xforms); + // Self::update_stats_out( + // &mut data.stats.vals, + // &Ok(ProcessResult::Modified), + // ); + // drop(data); + + // let hm = pkt.meta.0.headers_mut(); + + // let mut new_eth = None; + // let mut new_ip = None; + // let mut new_encap = None; + // // opte::engine::err!("xforms {:?}!", &a.state().xforms.hdr); + // for xf in &xforms.hdr { + // // opte::engine::err!("xf..."); + // if let HeaderAction::Push(outer_eth, _) = &xf.outer_ether { + // new_eth = Some(outer_eth.clone()); + // } + // if let HeaderAction::Push(outer_ip, _) = &xf.outer_ip { + // new_ip = Some(outer_ip.clone()); + // } + // if let HeaderAction::Push(outer_ec, _) = &xf.outer_encap { + // new_encap = Some(outer_ec.clone()); + // } + // if let HeaderAction::Modify(m, _) = &xf.inner_ether { + // if let Some(src) = m.src { + // hm.inner_eth.set_source(src.bytes().into()); + // } + // if let Some(dst) = m.dst { + // hm.inner_eth.set_destination(dst.bytes().into()); + // } + // } + // if let HeaderAction::Modify(m, _) = &xf.inner_ip { + // match m { + // super::headers::IpMod::Ip4(v4) => { + // let Some(ingot::example_chain::L3::Ipv4( + // ref mut v4_t, + // )) = hm.inner_l3 + // else { + // return Err(ProcessError::FlowTableFull { + // kind: "()", + // limit: 0, + // }); + // }; + // if let Some(src) = v4.src { + // dirty_csum = true; + // v4_t.set_source(src.into()); + // } + // if let Some(dst) = v4.dst { + // dirty_csum = true; + // v4_t.set_destination(dst.into()); + // } + // } + // super::headers::IpMod::Ip6(v6) => { + // let Some(ingot::example_chain::L3::Ipv6( + // ref mut v6_t, + // )) = hm.inner_l3 + // else { + // return Err(ProcessError::FlowTableFull { + // kind: "()", + // limit: 0, + // }); + // }; + // if let Some(src) = v6.src { + // dirty_csum = true; + // v6_t.set_source(src.into()); + // } + // if let Some(dst) = v6.dst { + // dirty_csum = true; + // v6_t.set_destination(dst.into()); + // } + // } + // } + // } + // if let UlpHeaderAction::Modify(m) = &xf.inner_ulp { + // if let Some(src) = &m.generic.src_port { + // match hm.inner_ulp { + // Some(Ulp::Tcp(ref mut t)) => { + // dirty_csum = true; + // t.set_source(*src) + // } + // Some(Ulp::Udp(ref mut t)) => { + // dirty_csum = true; + // t.set_source(*src) + // } + // _ => {} + // } + // } + // if let Some(dst) = &m.generic.dst_port { + // match hm.inner_ulp { + // Some(Ulp::Tcp(ref mut t)) => { + // dirty_csum = true; + // t.set_destination(*dst) + // } + // Some(Ulp::Udp(ref mut t)) => { + // dirty_csum = true; + // t.set_destination(*dst) + // } + // _ => {} + // } + // } + // if let Some(flags) = &m.tcp_flags { + // match hm.inner_ulp { + // Some(Ulp::Tcp(ref mut t)) => { + // dirty_csum = true; + // t.set_flags(TcpFlags::from_bits_retain( + // *flags, + // )) + // } + // _ => {} + // } + // } + // if let Some(new_id) = &m.icmp_id { + // match hm.inner_ulp { + // Some(Ulp::IcmpV4(ref mut pkt)) + // if pkt.ty() == 0 || pkt.ty() == 3 => + // { + // dirty_csum = true; + // pkt.rest_of_hdr_mut()[..2] + // .copy_from_slice(&new_id.to_be_bytes()) + // } + // Some(Ulp::IcmpV6(ref mut pkt)) + // if pkt.ty() == 128 || pkt.ty() == 129 => + // { + // dirty_csum = true; + // pkt.rest_of_hdr_mut()[..2] + // .copy_from_slice(&new_id.to_be_bytes()) + // } + // _ => {} + // } + // } + // } + // } + + // if dirty_csum { + // // TODO: something. + // } + + // match (new_eth, new_ip, new_encap) { + // (Some(a), Some(b), Some(c)) => { + // Ok(ThinProcRes::PushEncap(a, b, c)) + // } + // (None, None, None) => Ok(ThinProcRes::Na), + // _ => Err(ProcessError::FlowTableFull { + // kind: "()", + // limit: 0, + // }), + // } + // } + + // Direction::In => { + // let a = data.uft_in.get(&flow_before); + // let Some(a) = a else { + // // eh. + // return Err(ProcessError::FlowTableFull { + // kind: "()", + // limit: 0, + // }); + // }; + // pkt.set_l4_hash(a.state().l4_hash); + // let xforms = Arc::clone(&a.state().xforms); + // Self::update_stats_in( + // &mut data.stats.vals, + // &Ok(ProcessResult::Modified), + // ); + // drop(data); + + // let hm = pkt.meta.0.headers_mut(); + + // let mut pop_eth = false; + // let mut pop_ip = false; + // let mut pop_encap = false; + // for xf in &xforms.hdr { + // // opte::engine::err!("xf..."); + // if let HeaderAction::Pop = &xf.outer_ether { + // pop_eth = true; + // } + // if let HeaderAction::Pop = &xf.outer_ip { + // pop_ip = true; + // } + // if let HeaderAction::Pop = &xf.outer_encap { + // pop_encap = true; + // } + // if let HeaderAction::Modify(m, _) = &xf.inner_ether { + // if let Some(src) = m.src { + // hm.inner_eth.set_source(src.bytes().into()); + // } + // if let Some(dst) = m.dst { + // hm.inner_eth.set_destination(dst.bytes().into()); + // } + // } + // if let HeaderAction::Modify(m, _) = &xf.inner_ip { + // match m { + // super::headers::IpMod::Ip4(v4) => { + // let Some(ingot::example_chain::L3::Ipv4( + // ref mut v4_t, + // )) = hm.inner_l3 + // else { + // return Err(ProcessError::FlowTableFull { + // kind: "()", + // limit: 0, + // }); + // }; + // if let Some(src) = v4.src { + // dirty_csum = true; + // v4_t.set_source(src.into()); + // } + // if let Some(dst) = v4.dst { + // dirty_csum = true; + // v4_t.set_destination(dst.into()); + // } + // } + // super::headers::IpMod::Ip6(v6) => { + // let Some(ingot::example_chain::L3::Ipv6( + // ref mut v6_t, + // )) = hm.inner_l3 + // else { + // return Err(ProcessError::FlowTableFull { + // kind: "()", + // limit: 0, + // }); + // }; + // if let Some(src) = v6.src { + // dirty_csum = true; + // v6_t.set_source(src.into()); + // } + // if let Some(dst) = v6.dst { + // dirty_csum = true; + // v6_t.set_destination(dst.into()); + // } + // } + // } + // } + // if let UlpHeaderAction::Modify(m) = &xf.inner_ulp { + // if let Some(src) = &m.generic.src_port { + // match hm.inner_ulp { + // Some(Ulp::Tcp(ref mut t)) => { + // dirty_csum = true; + // t.set_source(*src) + // } + // Some(Ulp::Udp(ref mut t)) => { + // dirty_csum = true; + // t.set_source(*src) + // } + // _ => {} + // } + // } + // if let Some(dst) = &m.generic.dst_port { + // match hm.inner_ulp { + // Some(Ulp::Tcp(ref mut t)) => { + // dirty_csum = true; + // t.set_destination(*dst) + // } + // Some(Ulp::Udp(ref mut t)) => { + // dirty_csum = true; + // t.set_destination(*dst) + // } + // _ => {} + // } + // } + // if let Some(flags) = &m.tcp_flags { + // match hm.inner_ulp { + // Some(Ulp::Tcp(ref mut t)) => { + // dirty_csum = true; + // t.set_flags(TcpFlags::from_bits_retain( + // *flags, + // )) + // } + // _ => {} + // } + // } + // if let Some(new_id) = &m.icmp_id { + // match hm.inner_ulp { + // Some(Ulp::IcmpV4(ref mut pkt)) + // if pkt.ty() == 0 || pkt.ty() == 3 => + // { + // dirty_csum = true; + // pkt.rest_of_hdr_mut()[..2] + // .copy_from_slice(&new_id.to_be_bytes()) + // } + // Some(Ulp::IcmpV6(ref mut pkt)) + // if pkt.ty() == 128 || pkt.ty() == 129 => + // { + // dirty_csum = true; + // pkt.rest_of_hdr_mut()[..2] + // .copy_from_slice(&new_id.to_be_bytes()) + // } + // _ => {} + // } + // } + // } + // } + + // if dirty_csum { + // // TODO: do something. + // } + + // match (pop_eth, pop_ip, pop_encap) { + // (true, true, true) => Ok(ThinProcRes::PopEncap), + // (false, false, false) => Ok(ThinProcRes::Na), + // _ => Err(ProcessError::FlowTableFull { + // kind: "()", + // limit: 0, + // }), + // } + // } + // } + // } /// Remove the rule identified by the `dir`, `layer_name`, `id` /// combination, if such a rule exists. @@ -1582,6 +1674,8 @@ impl Port { // future we could eliminate this window by passing a // reference to the epoch to `Layer::remove_rule()` // and let it perform the increment. + // XXX(kyle) Above comment misunderstands TOCTOU -- + // THE TABLE IS LOCKED. self.epoch.fetch_add(1, SeqCst); return Ok(()); } @@ -1737,7 +1831,7 @@ impl Port { &self, data: &mut PortData, dir: Direction, - pkt: &mut Packet, + pkt: &mut Packet2, xforms: &mut Transforms, ameta: &mut ActionMeta, ) -> result::Result { @@ -1781,7 +1875,7 @@ impl Port { dir: Direction, flow: &InnerFlowId, epoch: u64, - pkt: &Packet, + pkt: &Packet2, ) { cfg_if::cfg_if! { if #[cfg(all(not(feature = "std"), not(test)))] { @@ -1810,7 +1904,7 @@ impl Port { dir: Direction, flow_before: &InnerFlowId, epoch: u64, - pkt: &Packet, + pkt: &Packet2, res: &result::Result, ) { let flow_after = pkt.flow(); @@ -2100,10 +2194,10 @@ impl Port { &self, data: &mut PortData, epoch: u64, - pkt: &mut Packet, + pkt: &mut Packet2, ufid_in: &InnerFlowId, ameta: &mut ActionMeta, - ) -> result::Result { + ) -> result::Result { use Direction::In; data.stats.vals.in_uft_miss += 1; @@ -2114,22 +2208,25 @@ impl Port { // If there is no flow ID, then do not create a UFT // entry. if *ufid_in == FLOW_ID_DEFAULT { - return Ok(ProcessResult::Modified); + return Ok(InternalProcessResult::Modified { + transform: todo!(), + tcp_state: todo!(), + }); } } Ok(LayerResult::Deny { name, reason }) => { - return Ok(ProcessResult::Drop { + return Ok(InternalProcessResult::Drop { reason: DropReason::Layer { name, reason }, }) } Ok(LayerResult::Hairpin(hppkt)) => { - return Ok(ProcessResult::Hairpin(hppkt)) + return Ok(InternalProcessResult::Hairpin(hppkt)) } Ok(LayerResult::HandlePkt) => { - return Ok(ProcessResult::from(self.net.handle_pkt( + return Ok(InternalProcessResult::from(self.net.handle_pkt( In, pkt, &data.uft_in, @@ -2173,63 +2270,76 @@ impl Port { // For inbound traffic the TCP flow table must be // checked _after_ processing take place. - if pkt.meta().is_inner_tcp() { - match self.process_in_tcp( - data, - pkt.meta(), - ufid_in, - pkt.len() as u64, - ) { - Ok(TcpState::Closed) => Ok(ProcessResult::Modified), - - // Found existing TCP flow, or have just created a new one. - Ok(_) - | Err(ProcessError::TcpFlow(TcpFlowStateError::NewFlow { - .. - })) - | Err(ProcessError::MissingFlow(_)) => { - // We have a good TCP flow, create a new UFT entry. - match data.uft_in.add(*ufid_in, hte) { - Ok(_) => Ok(ProcessResult::Modified), - Err(OpteError::MaxCapacity(limit)) => { - Err(ProcessError::FlowTableFull { - kind: "UFT", - limit, - }) - } - Err(_) => unreachable!( - "Cannot return other errors from FlowTable::add" - ), - } - } - - // Unlike for existing flows, we don't allow through - // unexpected packets here for now -- the `TcpState` FSM - // already encodes a shortcut from `Closed` to `Established. - Err(ProcessError::TcpFlow(err)) => { - let e = format!("{err}"); - self.tcp_err(&data.tcp_flows, Direction::In, e, pkt); - Ok(ProcessResult::Drop { reason: DropReason::TcpErr }) - } - Err(ProcessError::FlowTableFull { kind, limit }) => { - let e = format!("{kind} flow table full ({limit} entries)"); - self.tcp_err(&data.tcp_flows, Direction::In, e, pkt); - Ok(ProcessResult::Drop { reason: DropReason::TcpErr }) - } - res => unreachable!( - "Cannot return other errors from \ - process_in_tcp, returned: {res:?}" - ), + // TODO: uncork + // if pkt.meta().is_inner_tcp() { + // match self.process_in_tcp( + // data, + // pkt.meta(), + // ufid_in, + // pkt.len() as u64, + // ) { + // Ok(TcpState::Closed) => Ok(InternalProcessResult::Modified { transform: todo!(), tcp_state: todo!() }), + + // // Found existing TCP flow, or have just created a new one. + // Ok(_) + // | Err(ProcessError::TcpFlow(TcpFlowStateError::NewFlow { + // .. + // })) + // | Err(ProcessError::MissingFlow(_)) => { + // // We have a good TCP flow, create a new UFT entry. + // match data.uft_in.add(*ufid_in, hte) { + // Ok(_) => Ok(InternalProcessResult::Modified { transform: todo!(), tcp_state: todo!() }), + // Err(OpteError::MaxCapacity(limit)) => { + // Err(ProcessError::FlowTableFull { + // kind: "UFT", + // limit, + // }) + // } + // Err(_) => unreachable!( + // "Cannot return other errors from FlowTable::add" + // ), + // } + // } + + // // Unlike for existing flows, we don't allow through + // // unexpected packets here for now -- the `TcpState` FSM + // // already encodes a shortcut from `Closed` to `Established. + // Err(ProcessError::TcpFlow(err)) => { + // let e = format!("{err}"); + // self.tcp_err(&data.tcp_flows, Direction::In, e, pkt); + // Ok(InternalProcessResult::Drop { reason: DropReason::TcpErr }) + // } + // Err(ProcessError::FlowTableFull { kind, limit }) => { + // let e = format!("{kind} flow table full ({limit} entries)"); + // self.tcp_err(&data.tcp_flows, Direction::In, e, pkt); + // Ok(InternalProcessResult::Drop { reason: DropReason::TcpErr }) + // } + // res => unreachable!( + // "Cannot return other errors from \ + // process_in_tcp, returned: {res:?}" + // ), + // } + // } else { + // match data.uft_in.add(*ufid_in, hte) { + // Ok(_) => Ok(InternalProcessResult::Modified{ transform: todo!(), tcp_state: todo!() }), + // Err(OpteError::MaxCapacity(limit)) => { + // Err(ProcessError::FlowTableFull { kind: "UFT", limit }) + // } + // Err(_) => unreachable!( + // "Cannot return other errors from FlowTable::add" + // ), + // } + // } + match data.uft_in.add(*ufid_in, hte) { + Ok(_) => Ok(InternalProcessResult::Modified { + transform: None, + tcp_state: todo!(), + }), + Err(OpteError::MaxCapacity(limit)) => { + Err(ProcessError::FlowTableFull { kind: "UFT", limit }) } - } else { - match data.uft_in.add(*ufid_in, hte) { - Ok(_) => Ok(ProcessResult::Modified), - Err(OpteError::MaxCapacity(limit)) => { - Err(ProcessError::FlowTableFull { kind: "UFT", limit }) - } - Err(_) => unreachable!( - "Cannot return other errors from FlowTable::add" - ), + Err(_) => { + unreachable!("Cannot return other errors from FlowTable::add") } } } @@ -2269,10 +2379,10 @@ impl Port { &self, data: &mut PortData, epoch: u64, - pkt: &mut Packet, + pkt: &mut Packet2, ufid_in: &InnerFlowId, ameta: &mut ActionMeta, - ) -> result::Result { + ) -> result::Result { use Direction::In; // Use the compiled UFT entry if one exists. Otherwise @@ -2288,98 +2398,106 @@ impl Port { data.stats.vals.in_uft_hit += 1; self.uft_hit_probe(In, pkt.flow(), epoch, entry.last_hit()); - for ht in &entry.state().xforms.hdr { - pkt.hdr_transform(ht)?; - } + let transform = Some(Arc::clone(&entry.state().xforms)); - for bt in &entry.state().xforms.body { - pkt.body_transform(In, &**bt)?; - } + // for ht in &entry.state().xforms.hdr { + // pkt.hdr_transform(ht)?; + // } + + // for bt in &entry.state().xforms.body { + // pkt.body_transform(In, &**bt)?; + // } // For inbound traffic the TCP flow table must be // checked _after_ processing take place. - if pkt.meta().is_inner_tcp() { - match self.process_in_tcp( - data, - pkt.meta(), - ufid_in, - pkt.len() as u64, - ) { - Ok(_) => return Ok(ProcessResult::Modified), - Err(ProcessError::TcpFlow( - e @ TcpFlowStateError::NewFlow { .. }, - )) => { - self.tcp_err( - &data.tcp_flows, - In, - e.to_string(), - pkt, - ); - // We cant redo processing here like we can in `process_out`: - // we already modified the packet to check TCP state. - // However, we *have* deleted and replaced the TCP FSM and - // removed the UFT. The next packet on this flow (SYN-ACK) will - // create the UFT, reference the existing TCP flow, and increment - // all other layers' stats. - return Ok(ProcessResult::Modified); - } - Err(ProcessError::MissingFlow(flow_id)) => { - let e = format!("Missing TCP flow ID: {flow_id}"); - self.tcp_err( - &data.tcp_flows, - Direction::In, - e, - pkt, - ); - // If we have a UFT but no TCP flow ID, there is likely a bug - // and we are now out of sync. As above we can't reprocess, - // but we have regenerated the TCP entry to be less disruptive - // than a drop. Remove the UFT entry on the same proviso since the - // next packet to use it will regenerate it. - self.uft_invalidate( - data, - None, - Some(ufid_in), - epoch, - ); - return Ok(ProcessResult::Modified); - } - Err(ProcessError::TcpFlow( - e @ TcpFlowStateError::UnexpectedSegment { .. }, - )) => { - // Technically unreachable, as we filter these out in `update_tcp_entry`. - // Panicking here would probably be overly fragile, however. - self.tcp_err( - &data.tcp_flows, - Direction::In, - e.to_string(), - pkt, - ); - return Ok(ProcessResult::Drop { - reason: DropReason::TcpErr, - }); - } - Err(ProcessError::FlowTableFull { kind, limit }) => { - let e = format!( - "{kind} flow table full ({limit} entries)" - ); - self.tcp_err( - &data.tcp_flows, - Direction::In, - e, - pkt, - ); - return Ok(ProcessResult::Drop { - reason: DropReason::TcpErr, - }); - } - _ => unreachable!( - "Cannot return other errors from process_in_tcp" - ), - } - } else { - return Ok(ProcessResult::Modified); - } + // TODO: uncork + // if pkt.meta().is_inner_tcp() { + // match self.process_in_tcp( + // data, + // pkt.meta(), + // ufid_in, + // pkt.len() as u64, + // ) { + // Ok(_) => return Ok(ProcessResult::Modified), + // Err(ProcessError::TcpFlow( + // e @ TcpFlowStateError::NewFlow { .. }, + // )) => { + // self.tcp_err( + // &data.tcp_flows, + // In, + // e.to_string(), + // pkt, + // ); + // // We cant redo processing here like we can in `process_out`: + // // we already modified the packet to check TCP state. + // // However, we *have* deleted and replaced the TCP FSM and + // // removed the UFT. The next packet on this flow (SYN-ACK) will + // // create the UFT, reference the existing TCP flow, and increment + // // all other layers' stats. + // return Ok(ProcessResult::Modified); + // } + // Err(ProcessError::MissingFlow(flow_id)) => { + // let e = format!("Missing TCP flow ID: {flow_id}"); + // self.tcp_err( + // &data.tcp_flows, + // Direction::In, + // e, + // pkt, + // ); + // // If we have a UFT but no TCP flow ID, there is likely a bug + // // and we are now out of sync. As above we can't reprocess, + // // but we have regenerated the TCP entry to be less disruptive + // // than a drop. Remove the UFT entry on the same proviso since the + // // next packet to use it will regenerate it. + // self.uft_invalidate( + // data, + // None, + // Some(ufid_in), + // epoch, + // ); + // return Ok(ProcessResult::Modified); + // } + // Err(ProcessError::TcpFlow( + // e @ TcpFlowStateError::UnexpectedSegment { .. }, + // )) => { + // // Technically unreachable, as we filter these out in `update_tcp_entry`. + // // Panicking here would probably be overly fragile, however. + // self.tcp_err( + // &data.tcp_flows, + // Direction::In, + // e.to_string(), + // pkt, + // ); + // return Ok(ProcessResult::Drop { + // reason: DropReason::TcpErr, + // }); + // } + // Err(ProcessError::FlowTableFull { kind, limit }) => { + // let e = format!( + // "{kind} flow table full ({limit} entries)" + // ); + // self.tcp_err( + // &data.tcp_flows, + // Direction::In, + // e, + // pkt, + // ); + // return Ok(ProcessResult::Drop { + // reason: DropReason::TcpErr, + // }); + // } + // _ => unreachable!( + // "Cannot return other errors from process_in_tcp" + // ), + // } + // } else { + // return Ok(ProcessResult::Modified); + // } + + return Ok(InternalProcessResult::Modified { + transform, + tcp_state: todo!(), + }); } // The entry is from a previous epoch; invalidate its UFT @@ -2459,9 +2577,9 @@ impl Port { &self, data: &mut PortData, epoch: u64, - pkt: &mut Packet, + pkt: &mut Packet2, ameta: &mut ActionMeta, - ) -> result::Result { + ) -> result::Result { use Direction::Out; data.stats.vals.out_uft_miss += 1; @@ -2469,54 +2587,55 @@ impl Port { // For outbound traffic the TCP flow table must be checked // _before_ processing take place. - if pkt.meta().is_inner_tcp() { - match self.process_out_tcp_new( - data, - pkt.flow(), - pkt.meta(), - pkt.len() as u64, - ) { - Ok(TcpMaybeClosed::Closed { ufid_inbound }) => { - tcp_closed = true; - self.uft_tcp_closed( - data, - pkt.flow(), - ufid_inbound.as_ref(), - ); - } - - // Continue with processing. - Ok(_) => (), - - // Unlike for existing flows, we don't allow through - // unexpected packets here for now -- the `TcpState` FSM - // already encodes a shortcut from `Closed` to `Established. - Err(ProcessError::TcpFlow(err)) => { - let e = format!("{err}"); - self.tcp_err(&data.tcp_flows, Out, e, pkt); - return Ok(ProcessResult::Drop { - reason: DropReason::TcpErr, - }); - } - Err(ProcessError::MissingFlow(flow_id)) => { - let e = format!("Missing TCP flow ID: {flow_id}"); - self.tcp_err(&data.tcp_flows, Direction::In, e, pkt); - return Ok(ProcessResult::Drop { - reason: DropReason::TcpErr, - }); - } - Err(ProcessError::FlowTableFull { kind, limit }) => { - let e = format!("{kind} flow table full ({limit} entries)"); - self.tcp_err(&data.tcp_flows, Direction::In, e, pkt); - return Ok(ProcessResult::Drop { - reason: DropReason::TcpErr, - }); - } - res => unreachable!( - "Cannot return other errors from process_in_tcp_new, returned: {res:?}" - ), - } - } + // TODO: uncork + // if pkt.meta().is_inner_tcp() { + // match self.process_out_tcp_new( + // data, + // pkt.flow(), + // pkt.meta(), + // pkt.len() as u64, + // ) { + // Ok(TcpMaybeClosed::Closed { ufid_inbound }) => { + // tcp_closed = true; + // self.uft_tcp_closed( + // data, + // pkt.flow(), + // ufid_inbound.as_ref(), + // ); + // } + + // // Continue with processing. + // Ok(_) => (), + + // // Unlike for existing flows, we don't allow through + // // unexpected packets here for now -- the `TcpState` FSM + // // already encodes a shortcut from `Closed` to `Established. + // Err(ProcessError::TcpFlow(err)) => { + // let e = format!("{err}"); + // self.tcp_err(&data.tcp_flows, Out, e, pkt); + // return Ok(InternalProcessResult::Drop { + // reason: DropReason::TcpErr, + // }); + // } + // Err(ProcessError::MissingFlow(flow_id)) => { + // let e = format!("Missing TCP flow ID: {flow_id}"); + // self.tcp_err(&data.tcp_flows, Direction::In, e, pkt); + // return Ok(InternalProcessResult::Drop { + // reason: DropReason::TcpErr, + // }); + // } + // Err(ProcessError::FlowTableFull { kind, limit }) => { + // let e = format!("{kind} flow table full ({limit} entries)"); + // self.tcp_err(&data.tcp_flows, Direction::In, e, pkt); + // return Ok(InternalProcessResult::Drop { + // reason: DropReason::TcpErr, + // }); + // } + // res => unreachable!( + // "Cannot return other errors from process_in_tcp_new, returned: {res:?}" + // ), + // } + // } let mut xforms = Transforms::new(); let flow_before = *pkt.flow(); @@ -2533,10 +2652,16 @@ impl Port { Ok(LayerResult::Allow) => { // If there is no Flow ID, then there is no UFT entry. if flow_before == FLOW_ID_DEFAULT || tcp_closed { - return Ok(ProcessResult::Modified); + return Ok(InternalProcessResult::Modified { + transform: None, + tcp_state: None, + }); } match data.uft_out.add(flow_before, hte) { - Ok(_) => Ok(ProcessResult::Modified), + Ok(_) => Ok(InternalProcessResult::Modified { + transform: None, + tcp_state: None, + }), Err(OpteError::MaxCapacity(limit)) => { Err(ProcessError::FlowTableFull { kind: "UFT", limit }) } @@ -2547,14 +2672,16 @@ impl Port { } Ok(LayerResult::Hairpin(hppkt)) => { - Ok(ProcessResult::Hairpin(hppkt)) + Ok(InternalProcessResult::Hairpin(hppkt)) } - Ok(LayerResult::Deny { name, reason }) => Ok(ProcessResult::Drop { - reason: DropReason::Layer { name, reason }, - }), + Ok(LayerResult::Deny { name, reason }) => { + Ok(InternalProcessResult::Drop { + reason: DropReason::Layer { name, reason }, + }) + } - Ok(LayerResult::HandlePkt) => Ok(ProcessResult::from( + Ok(LayerResult::HandlePkt) => Ok(InternalProcessResult::from( self.net.handle_pkt(Out, pkt, &data.uft_in, &data.uft_out)?, )), @@ -2566,16 +2693,16 @@ impl Port { &self, data: &mut PortData, epoch: u64, - pkt: &mut Packet, + pkt: &mut Packet2, ameta: &mut ActionMeta, - ) -> result::Result { + ) -> result::Result { use Direction::Out; let uft_out = &mut data.uft_out; // Use the compiled UFT entry if one exists. Otherwise // fallback to layer processing. - match uft_out.get_mut(pkt.flow()) { + match uft_out.get_mut(&pkt.flow()) { Some(entry) if entry.state().epoch == epoch => { entry.hit(); data.stats.vals.out_uft_hit += 1; @@ -2585,72 +2712,74 @@ impl Port { let mut reprocess = false; let mut ufid_in = None; + // TODO: find the best way to unbreak. + // For outbound traffic the TCP flow table must be // checked _before_ processing take place. - if pkt.meta().is_inner_tcp() { - match self.process_out_tcp_existing( - &mut data.tcp_flows, - pkt.flow(), - pkt.meta(), - pkt.len() as u64, - ) { - // Continue with processing. - Ok(TcpMaybeClosed::NewState(_)) => (), - - Ok(TcpMaybeClosed::Closed { ufid_inbound }) => { - invalidated = true; - ufid_in = ufid_inbound; - } - - Err(ProcessError::TcpFlow( - e @ TcpFlowStateError::NewFlow { .. }, - )) => { - invalidated = true; - reprocess = true; - self.tcp_err( - &data.tcp_flows, - Out, - e.to_string(), - pkt, - ); - } - - Err(ProcessError::MissingFlow(flow_id)) => { - // If we have a UFT but no TCP flow ID, there is likely a bug - // and we are now out of sync. A full reprocess will be - // slower for this packet but will sync up the tables again. - invalidated = true; - reprocess = true; - let e = format!("Missing TCP flow ID: {flow_id}"); - self.tcp_err( - &data.tcp_flows, - Direction::In, - e, - pkt, - ); - } - - Err(ProcessError::TcpFlow( - e @ TcpFlowStateError::UnexpectedSegment { .. }, - )) => { - // Technically unreachable, as we filter these out in `update_tcp_entry`. - // Panicking here would probably be overly fragile, however. - self.tcp_err( - &data.tcp_flows, - Direction::In, - e.to_string(), - pkt, - ); - return Ok(ProcessResult::Drop { - reason: DropReason::TcpErr, - }); - } - - _ => unreachable!( - "Cannot return other errors from process_in_tcp_new" - ), - } - } + // if pkt.meta().is_inner_tcp() { + // match self.process_out_tcp_existing( + // &mut data.tcp_flows, + // pkt.flow(), + // pkt.meta(), + // pkt.len() as u64, + // ) { + // // Continue with processing. + // Ok(TcpMaybeClosed::NewState(_)) => (), + + // Ok(TcpMaybeClosed::Closed { ufid_inbound }) => { + // invalidated = true; + // ufid_in = ufid_inbound; + // } + + // Err(ProcessError::TcpFlow( + // e @ TcpFlowStateError::NewFlow { .. }, + // )) => { + // invalidated = true; + // reprocess = true; + // self.tcp_err( + // &data.tcp_flows, + // Out, + // e.to_string(), + // pkt, + // ); + // } + + // Err(ProcessError::MissingFlow(flow_id)) => { + // // If we have a UFT but no TCP flow ID, there is likely a bug + // // and we are now out of sync. A full reprocess will be + // // slower for this packet but will sync up the tables again. + // invalidated = true; + // reprocess = true; + // let e = format!("Missing TCP flow ID: {flow_id}"); + // self.tcp_err( + // &data.tcp_flows, + // Direction::In, + // e, + // pkt, + // ); + // } + + // Err(ProcessError::TcpFlow( + // e @ TcpFlowStateError::UnexpectedSegment { .. }, + // )) => { + // // Technically unreachable, as we filter these out in `update_tcp_entry`. + // // Panicking here would probably be overly fragile, however. + // self.tcp_err( + // &data.tcp_flows, + // Direction::In, + // e.to_string(), + // pkt, + // ); + // return Ok(ProcessResult::Drop { + // reason: DropReason::TcpErr, + // }); + // } + + // _ => unreachable!( + // "Cannot return other errors from process_in_tcp_new" + // ), + // } + // } let flow_to_invalidate = invalidated.then(|| *pkt.flow()); @@ -2658,14 +2787,7 @@ impl Port { // existing transforms if we're going to behave as though we // have a UFT miss. if !reprocess { - for ht in &entry.state().xforms.hdr { - pkt.hdr_transform(ht)?; - } - - for bt in &entry.state().xforms.body { - pkt.body_transform(Out, &**bt)?; - } - + let transform = Some(Arc::clone(&entry.state().xforms)); // Due to borrowing constraints from order of operations, we have // to remove the UFT entry here rather than in `update_tcp_entry`. // The TCP entry itself is already removed. @@ -2677,7 +2799,11 @@ impl Port { ); } - return Ok(ProcessResult::Modified); + return Ok(InternalProcessResult::Modified { + transform, + // TODO + tcp_state: None, + }); } else if let Some(flow_before) = flow_to_invalidate { self.uft_tcp_closed(data, &flow_before, ufid_in.as_ref()); } @@ -2783,12 +2909,12 @@ impl Port { fn update_stats_in( stats: &mut PortStats, - res: &result::Result, + res: &result::Result, ) { match res { - Ok(ProcessResult::Bypass) => stats.in_bypass += 1, + Ok(InternalProcessResult::Bypass) => stats.in_bypass += 1, - Ok(ProcessResult::Drop { reason }) => { + Ok(InternalProcessResult::Drop { reason }) => { stats.in_drop += 1; match reason { @@ -2798,9 +2924,11 @@ impl Port { } } - Ok(ProcessResult::Modified) => stats.in_modified += 1, + Ok(InternalProcessResult::Modified { .. }) => { + stats.in_modified += 1 + } - Ok(ProcessResult::Hairpin(_)) => stats.in_hairpin += 1, + Ok(InternalProcessResult::Hairpin(_)) => stats.in_hairpin += 1, // XXX We should split the different error types out into // individual stats. However, I'm not sure exactly how I @@ -2816,12 +2944,12 @@ impl Port { fn update_stats_out( stats: &mut PortStats, - res: &result::Result, + res: &result::Result, ) { match res { - Ok(ProcessResult::Bypass) => stats.out_bypass += 1, + Ok(InternalProcessResult::Bypass) => stats.out_bypass += 1, - Ok(ProcessResult::Drop { reason }) => { + Ok(InternalProcessResult::Drop { reason }) => { stats.out_drop += 1; match reason { @@ -2831,9 +2959,11 @@ impl Port { } } - Ok(ProcessResult::Modified) => stats.out_modified += 1, + Ok(InternalProcessResult::Modified { .. }) => { + stats.out_modified += 1 + } - Ok(ProcessResult::Hairpin(_)) => stats.out_hairpin += 1, + Ok(InternalProcessResult::Hairpin(_)) => stats.out_hairpin += 1, // XXX We should split the different error types out into // individual stats. However, I'm not sure exactly how I diff --git a/lib/opte/src/engine/predicate.rs b/lib/opte/src/engine/predicate.rs index fd69ae51..d52e68ee 100644 --- a/lib/opte/src/engine/predicate.rs +++ b/lib/opte/src/engine/predicate.rs @@ -12,6 +12,10 @@ use super::ether::EtherType; use super::headers::IpMeta; use super::icmp::v4::MessageType as IcmpMessageType; use super::icmp::v6::MessageType as Icmpv6MessageType; +use super::ingot_packet::ulp_dst_port; +use super::ingot_packet::ulp_src_port; +use super::ingot_packet::PacketHeaders; +use super::ingot_packet::PacketHeaders2; use super::ip4::Ipv4Addr; use super::ip4::Ipv4Cidr; use super::ip4::Ipv4Meta; @@ -29,6 +33,11 @@ use alloc::vec::Vec; use core::fmt; use core::fmt::Display; use core::ops::RangeInclusive; +use ingot::ethernet::EthernetRef; +use ingot::example_chain::L3; +use ingot::icmp::IcmpV4Ref; +use ingot::ip::Ipv4Ref; +use ingot::ip::Ipv6Ref; use opte_api::MacAddr; use serde::Deserialize; use serde::Serialize; @@ -352,7 +361,7 @@ impl Display for Predicate { impl Predicate { pub(crate) fn is_match( &self, - meta: &PacketMeta, + meta: &PacketHeaders2, action_meta: &ActionMeta, ) -> bool { match self { @@ -368,7 +377,9 @@ impl Predicate { Self::InnerEtherType(list) => { for m in list { - if m.matches(meta.inner.ether.ether_type) { + if m.matches(EtherType::from( + meta.inner_ether().ethertype().0, + )) { return true; } } @@ -376,7 +387,9 @@ impl Predicate { Self::InnerEtherDst(list) => { for m in list { - if m.matches(meta.inner.ether.dst) { + if m.matches( + meta.inner_ether().destination().into_array().into(), + ) { return true; } } @@ -384,16 +397,20 @@ impl Predicate { Self::InnerEtherSrc(list) => { for m in list { - if m.matches(meta.inner.ether.src) { + if m.matches( + meta.inner_ether().source().into_array().into(), + ) { return true; } } } - Self::InnerIpProto(list) => match meta.inner.ip { + Self::InnerIpProto(list) => match meta.inner_l3() { None => return false, - Some(IpMeta::Ip4(Ipv4Meta { proto, .. })) => { + Some(L3::Ipv4(ipv4)) => { + let proto = Protocol::from(ipv4.protocol().0); + for m in list { if m.matches(proto) { return true; @@ -401,7 +418,10 @@ impl Predicate { } } - Some(IpMeta::Ip6(Ipv6Meta { proto, .. })) => { + Some(L3::Ipv6(ipv6)) => { + // NOTE: I know this is bugged on EHs. + let proto = Protocol::from(ipv6.next_header().0); + for m in list { if m.matches(proto) { return true; @@ -410,8 +430,9 @@ impl Predicate { } }, - Self::InnerSrcIp4(list) => match meta.inner.ip { - Some(IpMeta::Ip4(Ipv4Meta { src: ip, .. })) => { + Self::InnerSrcIp4(list) => match meta.inner_ip4() { + Some(v4) => { + let ip = v4.source().into(); for m in list { if m.matches(ip) { return true; @@ -424,8 +445,9 @@ impl Predicate { _ => return false, }, - Self::InnerDstIp4(list) => match meta.inner.ip { - Some(IpMeta::Ip4(Ipv4Meta { dst: ip, .. })) => { + Self::InnerDstIp4(list) => match meta.inner_ip4() { + Some(v4) => { + let ip = v4.destination().into(); for m in list { if m.matches(ip) { return true; @@ -438,8 +460,9 @@ impl Predicate { _ => return false, }, - Self::InnerSrcIp6(list) => match meta.inner.ip { - Some(IpMeta::Ip6(Ipv6Meta { src: ip, .. })) => { + Self::InnerSrcIp6(list) => match meta.inner_ip6() { + Some(v6) => { + let ip = v6.source().into(); for m in list { if m.matches(ip) { return true; @@ -449,8 +472,9 @@ impl Predicate { _ => return false, }, - Self::InnerDstIp6(list) => match meta.inner.ip { - Some(IpMeta::Ip6(Ipv6Meta { dst: ip, .. })) => { + Self::InnerDstIp6(list) => match meta.inner_ip6() { + Some(v6) => { + let ip = v6.destination().into(); for m in list { if m.matches(ip) { return true; @@ -461,11 +485,11 @@ impl Predicate { }, Self::InnerSrcPort(list) => { - match meta.inner.ulp.map(|m| m.src_port()) { + match meta.inner_ulp().map(ulp_src_port).flatten() { // No ULP metadata or no source port (e.g. ICMPv6). - None | Some(None) => return false, + None => return false, - Some(Some(port)) => { + Some(port) => { for m in list { if m.matches(port) { return true; @@ -476,11 +500,11 @@ impl Predicate { } Self::InnerDstPort(list) => { - match meta.inner.ulp.map(|m| m.dst_port()) { + match meta.inner_ulp().map(ulp_dst_port).flatten() { // No ULP metadata or no destination port (e.g. ICMPv6). - None | Some(None) => return false, + None => return false, - Some(Some(port)) => { + Some(port) => { for m in list { if m.matches(port) { return true; @@ -579,39 +603,21 @@ impl DataPredicate { // use `PacketMeta` to determine if there is a suitable payload to // be inspected. That is, if there is no metadata for a given // header, there is certainly no payload. - pub(crate) fn is_match<'a, 'b, R>( - &self, - meta: &PacketMeta, - rdr: &'b mut R, - ) -> bool - where - R: PacketRead<'a>, - { + pub(crate) fn is_match<'a>(&self, meta: &PacketHeaders2) -> bool { match self { - Self::Not(pred) => !pred.is_match(meta, rdr), + Self::Not(pred) => !pred.is_match(meta), Self::DhcpMsgType(mt) => { - let bytes = rdr.copy_remaining(); - let pkt = match DhcpPacket::new_checked(&bytes) { - Ok(v) => v, - Err(e) => { - super::err!( - "DhcpPacket::new_checked() failed: {:?}", - e - ); - return false; - } - }; - let dhcp = match DhcpRepr::parse(&pkt) { - Ok(v) => v, - Err(e) => { - super::err!("DhcpRepr::parse() failed: {:?}", e); - - return false; - } - }; - - mt.is_match(&DhcpMessageType::from(dhcp.message_type)) + // Formerly, this did a COMPLETE clone and parse. + let body = meta.body_segs(); + if body.len() == 0 || body[0].len() == 0 { + super::err!( + "Failed to read DHCPv6 message type from packet" + ); + false + } else { + mt.is_match(&body[0][0].into()) + } } Self::IcmpMsgType(mt) => { @@ -620,7 +626,7 @@ impl DataPredicate { return false; }; - mt.is_match(&icmp.msg_type) + mt.is_match(&IcmpMessageType::from(icmp.ty())) } Self::Icmpv6MsgType(mt) => { @@ -629,18 +635,18 @@ impl DataPredicate { return false; }; - mt.is_match(&icmp6.msg_type) + mt.is_match(&Icmpv6MessageType::from(icmp.ty())) } Self::Dhcpv6MsgType(mt) => { - if let Ok(buf) = rdr.slice(1) { - rdr.seek_back(1).expect("Failed to seek back"); - mt.is_match(&buf[0].into()) - } else { + let body = meta.body_segs(); + if body.len() == 0 || body[0].len() == 0 { super::err!( "Failed to read DHCPv6 message type from packet" ); false + } else { + mt.is_match(&body[0][0].into()) } } } diff --git a/lib/opte/src/engine/rule.rs b/lib/opte/src/engine/rule.rs index 881d7fde..d2100ac2 100644 --- a/lib/opte/src/engine/rule.rs +++ b/lib/opte/src/engine/rule.rs @@ -18,6 +18,11 @@ use super::headers::IpMeta; use super::headers::IpMod; use super::headers::IpPush; use super::headers::UlpHeaderAction; +use super::ingot_packet::MsgBlk; +use super::ingot_packet::Packet2; +use super::ingot_packet::PacketHeaders; +use super::ingot_packet::PacketHeaders2; +use super::ingot_packet::ParsedMblk; use super::packet::BodyTransform; use super::packet::Initialized; use super::packet::InnerFlowId; @@ -41,6 +46,7 @@ use core::fmt::Debug; use core::fmt::Display; use illumos_sys_hdrs::c_char; use illumos_sys_hdrs::uintptr_t; +use ingot::types::Read; use opte_api::Direction; use serde::Deserialize; use serde::Serialize; @@ -153,7 +159,7 @@ pub trait ActionDesc { fn gen_bt( &self, _dir: Direction, - _meta: &PacketMeta, + _meta: &PacketHeaders2, _payload_segs: &[&[u8]], ) -> Result>, GenBtError> { Ok(None) @@ -251,7 +257,7 @@ impl StaticAction for Identity { &self, _dir: Direction, _flow_id: &InnerFlowId, - _pkt_meta: &PacketMeta, + _pkt_meta: &PacketHeaders2, _action_meta: &mut ActionMeta, ) -> GenHtResult { Ok(AllowOrDeny::Allow(HdrTransform::identity(&self.name))) @@ -372,7 +378,10 @@ impl HdrTransform { /// If there is an [`HeaderAction::Modify`], but no metadata is /// present for that particular header, then a /// [`HdrTransformError::MissingHeader`] is returned. - pub fn run(&self, meta: &mut PacketMeta) -> Result<(), HdrTransformError> { + pub fn run( + &self, + meta: &mut PacketHeaders, + ) -> Result<(), HdrTransformError> { self.outer_ether .run(&mut meta.outer.ether) .map_err(Self::err_fn("outer ether"))?; @@ -442,7 +451,7 @@ pub trait StatefulAction: Display { fn gen_desc( &self, flow_id: &InnerFlowId, - pkt: &Packet, + pkt: &Packet2, meta: &mut ActionMeta, ) -> GenDescResult; @@ -462,7 +471,7 @@ pub trait StaticAction: Display { &self, dir: Direction, flow_id: &InnerFlowId, - packet_meta: &PacketMeta, + packet_meta: &PacketHeaders2, action_meta: &mut ActionMeta, ) -> GenHtResult; @@ -515,7 +524,7 @@ impl From for GenErr { } } -pub type GenPacketResult = ActionResult, GenErr>; +pub type GenPacketResult = ActionResult; /// An error while generating a [`BodyTransform`]. #[derive(Clone, Debug)] @@ -536,16 +545,12 @@ impl From for GenBtError { /// ARP request. pub trait HairpinAction: Display { /// Generate a [`Packet`] to hairpin back to the source. The - /// `meta` argument holds the packet metadata, inlucding any + /// `meta` argument holds the packet metadata, including any /// modifications made by previous layers up to this point. The /// `rdr` argument provides a [`PacketReader`] against /// [`Packet`], with its starting position set to the /// beginning of the packet's payload. - fn gen_packet( - &self, - meta: &PacketMeta, - rdr: &mut PacketReader, - ) -> GenPacketResult; + fn gen_packet(&self, meta: &PacketHeaders2) -> GenPacketResult; /// Return the predicates implicit to this action. /// @@ -822,15 +827,11 @@ impl Rule { } impl<'a> Rule { - pub fn is_match<'b, R>( + pub fn is_match<'b>( &self, - meta: &PacketMeta, + meta: &PacketHeaders2, action_meta: &ActionMeta, - rdr: &'b mut R, - ) -> bool - where - R: PacketRead<'a>, - { + ) -> bool { #[cfg(debug_assertions)] { if let Some(preds) = &self.state.preds { @@ -855,7 +856,7 @@ impl<'a> Rule { } for p in &preds.data_preds { - if !p.is_match(meta, rdr) { + if !p.is_match(meta) { return false; } } diff --git a/lib/opte/src/engine/snat.rs b/lib/opte/src/engine/snat.rs index 39b2ed85..e8bdf607 100644 --- a/lib/opte/src/engine/snat.rs +++ b/lib/opte/src/engine/snat.rs @@ -11,6 +11,8 @@ use super::headers::IpMod; use super::headers::UlpGenericModify; use super::headers::UlpHeaderAction; use super::headers::UlpMetaModify; +use super::ingot_packet::Packet2; +use super::ingot_packet::ParsedMblk; use super::packet::InnerFlowId; use super::packet::Packet; use super::packet::Parsed; @@ -39,6 +41,8 @@ use core::fmt; use core::fmt::Display; use core::marker::PhantomData; use core::ops::RangeInclusive; +use ingot::icmp::IcmpV4Ref; +use ingot::icmp::IcmpV6Ref; use opte_api::Direction; use opte_api::IpAddr; use opte_api::Ipv4Addr; @@ -240,7 +244,7 @@ impl SNat { fn gen_icmp_desc( &self, nat: SNatAlloc, - pkt: &Packet, + pkt: &Packet2, ) -> GenDescResult { let meta = pkt.meta(); @@ -249,8 +253,8 @@ impl SNat { let icmp = meta .inner_icmp() .ok_or(GenIcmpErr::::MetaNotFound)?; - if icmp.msg_type != Icmpv4Message::EchoRequest.into() { - Err(GenIcmpErr::NotRequest(icmp.msg_type).into()) + if icmp.ty() != u8::from(Icmpv4Message::EchoRequest) { + Err(GenIcmpErr::NotRequest(icmp.ty()).into()) } else { Ok(icmp.echo_id()) } @@ -259,8 +263,8 @@ impl SNat { let icmp6 = meta .inner_icmp6() .ok_or(GenIcmpErr::::MetaNotFound)?; - if icmp6.msg_type != Icmpv6Message::EchoRequest.into() { - Err(GenIcmpErr::NotRequest(icmp6.msg_type).into()) + if icmp6.ty() != u8::from(Icmpv6Message::EchoRequest) { + Err(GenIcmpErr::NotRequest(icmp6.ty()).into()) } else { Ok(icmp6.echo_id()) } @@ -302,7 +306,7 @@ where fn gen_desc( &self, flow_id: &InnerFlowId, - pkt: &Packet, + pkt: &Packet2, _meta: &mut ActionMeta, ) -> GenDescResult { let priv_port = flow_id.src_port; diff --git a/lib/opte/src/engine/tcp.rs b/lib/opte/src/engine/tcp.rs index b86fbeef..28e42e9d 100644 --- a/lib/opte/src/engine/tcp.rs +++ b/lib/opte/src/engine/tcp.rs @@ -23,7 +23,6 @@ use opte_api::DYNAMIC_PORT; use serde::Deserialize; use serde::Serialize; use zerocopy::FromBytes; -use zerocopy::FromZeros; use zerocopy::Immutable; use zerocopy::IntoBytes; use zerocopy::KnownLayout; @@ -113,7 +112,7 @@ pub struct TcpMeta { pub csum: [u8; 2], // Fow now we keep options as raw bytes, allowing up to 40 bytes // of options. - pub options_bytes: Option<[u8; TcpHdr::MAX_OPTION_SIZE]>, + pub options_bytes: Option<[u8; 40]>, pub options_len: usize, } @@ -428,7 +427,7 @@ impl<'a> RawHeader<'a> for TcpHdrRaw { #[inline] fn new_mut(src: &mut [u8]) -> Result, ReadErr> { debug_assert_eq!(src.len(), Self::SIZE); - let hdr = match Ref::new(src) { + let hdr = match Ref::from_bytes(src).ok() { Some(hdr) => hdr, None => return Err(ReadErr::BadLayout), }; diff --git a/lib/opte/src/engine/udp.rs b/lib/opte/src/engine/udp.rs index 5815ff0d..84dbe5bb 100644 --- a/lib/opte/src/engine/udp.rs +++ b/lib/opte/src/engine/udp.rs @@ -21,7 +21,6 @@ use opte_api::DYNAMIC_PORT; use serde::Deserialize; use serde::Serialize; use zerocopy::FromBytes; -use zerocopy::FromZeros; use zerocopy::Immutable; use zerocopy::IntoBytes; use zerocopy::KnownLayout; @@ -247,7 +246,7 @@ impl<'a> RawHeader<'a> for UdpHdrRaw { #[inline] fn new_mut(src: &mut [u8]) -> Result, ReadErr> { debug_assert_eq!(src.len(), Self::SIZE); - let hdr = match Ref::new(src) { + let hdr = match Ref::from_bytes(src).ok() { Some(hdr) => hdr, None => return Err(ReadErr::BadLayout), }; From 9a359cc8a04284a2c79b3f522deaf4847a72766f Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 4 Sep 2024 19:24:31 +0100 Subject: [PATCH 015/115] Good spot to call it for the day. --- Cargo.lock | 6 ++--- Cargo.toml | 2 +- lib/opte/src/engine/dhcp.rs | 9 ------- lib/opte/src/engine/dhcpv6/protocol.rs | 3 --- lib/opte/src/engine/icmp/mod.rs | 6 ----- lib/opte/src/engine/icmp/v6.rs | 1 - lib/opte/src/engine/ingot_packet.rs | 35 ++++++++++++++++++-------- lib/opte/src/engine/layer.rs | 7 ------ lib/opte/src/engine/packet.rs | 2 -- lib/opte/src/engine/port.rs | 31 +++++++++-------------- lib/opte/src/engine/predicate.rs | 3 ++- lib/opte/src/engine/rule.rs | 1 - 12 files changed, 43 insertions(+), 63 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 182e44cb..c0046c9a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -882,7 +882,7 @@ dependencies = [ [[package]] name = "ingot" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=f3f138bcefb4c625597c4add3a509921955d646c#f3f138bcefb4c625597c4add3a509921955d646c" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=5e0d5b1117217a5d4e96ff6366b4325366ac4d8e#5e0d5b1117217a5d4e96ff6366b4325366ac4d8e" dependencies = [ "bitflags 2.6.0", "ingot-macros", @@ -894,7 +894,7 @@ dependencies = [ [[package]] name = "ingot-macros" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=f3f138bcefb4c625597c4add3a509921955d646c#f3f138bcefb4c625597c4add3a509921955d646c" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=5e0d5b1117217a5d4e96ff6366b4325366ac4d8e#5e0d5b1117217a5d4e96ff6366b4325366ac4d8e" dependencies = [ "darling", "itertools 0.13.0", @@ -908,7 +908,7 @@ dependencies = [ [[package]] name = "ingot-types" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=f3f138bcefb4c625597c4add3a509921955d646c#f3f138bcefb4c625597c4add3a509921955d646c" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=5e0d5b1117217a5d4e96ff6366b4325366ac4d8e#5e0d5b1117217a5d4e96ff6366b4325366ac4d8e" dependencies = [ "heapless", "ingot-macros", diff --git a/Cargo.toml b/Cargo.toml index 33bd605f..f7fe2a47 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,7 +50,7 @@ ctor = "0.2" darling = "0.20" dyn-clone = "1.0" heapless = "0.8" -ingot = { git = "https://github.com/oxidecomputer/ingot.git", rev = "f3f138bcefb4c625597c4add3a509921955d646c"} +ingot = { git = "https://github.com/oxidecomputer/ingot.git", rev = "5e0d5b1117217a5d4e96ff6366b4325366ac4d8e"} ipnetwork = { version = "0.20", default-features = false } itertools = { version = "0.13", default-features = false } libc = "0.2" diff --git a/lib/opte/src/engine/dhcp.rs b/lib/opte/src/engine/dhcp.rs index 9e488796..4ad5ff42 100644 --- a/lib/opte/src/engine/dhcp.rs +++ b/lib/opte/src/engine/dhcp.rs @@ -11,7 +11,6 @@ use super::ether::EtherHdr; use super::ether::EtherMeta; use super::ether::EtherType; use super::ingot_packet::MsgBlk; -use super::ingot_packet::PacketHeaders; use super::ingot_packet::PacketHeaders2; use super::ip4::Ipv4Addr; use super::ip4::Ipv4Hdr; @@ -19,9 +18,6 @@ use super::ip4::Ipv4Meta; use super::ip4::Protocol; use super::ip6::UlpCsumOpt; use super::packet::Packet; -use super::packet::PacketMeta; -use super::packet::PacketRead; -use super::packet::PacketReader; use super::predicate::DataPredicate; use super::predicate::EtherAddrMatch; use super::predicate::IpProtoMatch; @@ -38,11 +34,6 @@ use alloc::vec::Vec; use core::fmt; use core::fmt::Display; use heapless::Vec as HeaplessVec; -use ingot::ethernet::Ethernet; -use ingot::ethernet::Ethertype; -use ingot::ip::IpProtocol; -use ingot::ip::Ipv4; -use ingot::types::Emit; use opte_api::DhcpCfg; use opte_api::DhcpReplyType; use opte_api::DomainName; diff --git a/lib/opte/src/engine/dhcpv6/protocol.rs b/lib/opte/src/engine/dhcpv6/protocol.rs index 75d39278..deb6d9a1 100644 --- a/lib/opte/src/engine/dhcpv6/protocol.rs +++ b/lib/opte/src/engine/dhcpv6/protocol.rs @@ -31,9 +31,6 @@ use crate::engine::ip6::Ipv6Hdr; use crate::engine::ip6::Ipv6Meta; use crate::engine::ip6::UlpCsumOpt; use crate::engine::packet::Packet; -use crate::engine::packet::PacketMeta; -use crate::engine::packet::PacketRead; -use crate::engine::packet::PacketReader; use crate::engine::predicate::DataPredicate; use crate::engine::predicate::EtherAddrMatch; use crate::engine::predicate::IpProtoMatch; diff --git a/lib/opte/src/engine/icmp/mod.rs b/lib/opte/src/engine/icmp/mod.rs index 42ce9a7e..a70622af 100644 --- a/lib/opte/src/engine/icmp/mod.rs +++ b/lib/opte/src/engine/icmp/mod.rs @@ -21,9 +21,6 @@ use crate::engine::ether::EtherType; use crate::engine::headers::HeaderActionModify; use crate::engine::headers::UlpMetaModify; use crate::engine::packet::Packet; -use crate::engine::packet::PacketMeta; -use crate::engine::packet::PacketRead; -use crate::engine::packet::PacketReader; use crate::engine::predicate::DataPredicate; use crate::engine::predicate::EtherAddrMatch; use crate::engine::predicate::IpProtoMatch; @@ -35,14 +32,11 @@ use crate::engine::rule::HairpinAction; use alloc::vec::Vec; use core::fmt; use core::fmt::Display; -use ingot::icmp::IcmpV4Packet; -use ingot::icmp::IcmpV4Ref; pub use opte_api::ip::Protocol; use serde::Deserialize; use serde::Serialize; use smoltcp::phy::Checksum; use smoltcp::phy::ChecksumCapabilities as Csum; -use smoltcp::wire::Icmpv4Message; pub use v4::Icmpv4Meta; pub use v6::Icmpv6Meta; use zerocopy::ByteSlice; diff --git a/lib/opte/src/engine/icmp/v6.rs b/lib/opte/src/engine/icmp/v6.rs index 05bb1e30..63842611 100644 --- a/lib/opte/src/engine/icmp/v6.rs +++ b/lib/opte/src/engine/icmp/v6.rs @@ -15,7 +15,6 @@ use crate::engine::predicate::Ipv6AddrMatch; use alloc::string::String; use ingot::ip::Ipv6Ref; use ingot::types::Emit; -use ingot::types::Header; pub use opte_api::ip::Icmpv6EchoReply; pub use opte_api::ip::Ipv6Addr; pub use opte_api::ip::Ipv6Cidr; diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index 1742f06e..1a9aed88 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -151,6 +151,10 @@ impl MsgBlk { Self { inner } } + pub fn new_ethernet(len: usize) -> Self { + Self::new_with_headroom(2, len) + } + pub fn byte_len(&self) -> usize { self.iter().map(|el| el.len()).sum() } @@ -446,7 +450,7 @@ impl PktBodyWalker { // sourced from an exclusive borrow on something which ownas a [u8]). // This allows us to cast to &mut later, but not here! let mut to_hold = vec![]; - if let Some(chunk) = first { + if let Some(ref chunk) = first { let as_bytes = chunk.deref(); to_hold.push(unsafe { core::mem::transmute(as_bytes) }); } @@ -463,7 +467,13 @@ impl PktBodyWalker { core::sync::atomic::Ordering::Relaxed, core::sync::atomic::Ordering::Relaxed, ) - .expect("apparent concurrent access to body_seg memoiser"); + .expect("unexpected concurrent access to body_seg memoiser"); + + // SAFETY: + // Replace contents to get correct drop behaviour on T. + // Currently the only ByteSlice impls are &[u8] and friends, + // but this may extend to e.g. Vec in future. + self.base.set(Some((first, rest))); } } @@ -471,9 +481,12 @@ impl PktBodyWalker { where T::Chunk: ByteSlice, { - self.reify_body_segs(); - - let slice_ptr = self.slice.load(core::sync::atomic::Ordering::Relaxed); + let mut slice_ptr = + self.slice.load(core::sync::atomic::Ordering::Relaxed); + if slice_ptr.is_null() { + self.reify_body_segs(); + slice_ptr = self.slice.load(core::sync::atomic::Ordering::Relaxed); + } assert!(!slice_ptr.is_null()); // let use_ref: &[_] = &b; @@ -487,9 +500,12 @@ impl PktBodyWalker { where T::Chunk: ByteSliceMut, { - self.reify_body_segs(); - - let slice_ptr = self.slice.load(core::sync::atomic::Ordering::Relaxed); + let mut slice_ptr = + self.slice.load(core::sync::atomic::Ordering::Relaxed); + if slice_ptr.is_null() { + self.reify_body_segs(); + slice_ptr = self.slice.load(core::sync::atomic::Ordering::Relaxed); + } assert!(!slice_ptr.is_null()); // SAFETY: We have an exclusive reference, and the ByteSliceMut @@ -833,8 +849,7 @@ impl Packet2> { self.state.body_modified = true; match self.body_segs_mut() { - Some(mut body_segs) => Err(BodyTransformError::Todo("huh".into())), - // Some(mut body_segs) => xform.run(dir, &mut body_segs), + Some(mut body_segs) => xform.run(dir, &mut body_segs), None => { self.state.body_modified = false; Err(BodyTransformError::NoPayload) diff --git a/lib/opte/src/engine/layer.rs b/lib/opte/src/engine/layer.rs index d709b59f..6997580b 100644 --- a/lib/opte/src/engine/layer.rs +++ b/lib/opte/src/engine/layer.rs @@ -14,17 +14,11 @@ use super::flow_table::FLOW_DEF_EXPIRE_SECS; use super::ingot_packet::MsgBlk; use super::ingot_packet::Packet2; use super::ingot_packet::PacketHeaders2; -use super::ingot_packet::Parsed2; use super::ingot_packet::ParsedMblk; use super::ioctl; use super::ioctl::ActionDescEntryDump; use super::packet::BodyTransformError; -use super::packet::Initialized; use super::packet::InnerFlowId; -use super::packet::Packet; -use super::packet::PacketMeta; -use super::packet::PacketRead; -use super::packet::Parsed; use super::packet::FLOW_ID_DEFAULT; use super::port::meta::ActionMeta; use super::port::Transforms; @@ -59,7 +53,6 @@ use core::num::NonZeroU32; use core::result; use illumos_sys_hdrs::c_char; use illumos_sys_hdrs::uintptr_t; -use ingot::types::Read; use kstat_macro::KStatProvider; use opte_api::Direction; diff --git a/lib/opte/src/engine/packet.rs b/lib/opte/src/engine/packet.rs index cbe7b440..672e63ea 100644 --- a/lib/opte/src/engine/packet.rs +++ b/lib/opte/src/engine/packet.rs @@ -48,7 +48,6 @@ use super::ip6::Ipv6Addr; use super::ip6::Ipv6Hdr; use super::ip6::Ipv6HdrError; use super::ip6::Ipv6Meta; -use super::NetworkParser; use crate::d_error::DError; use core::fmt; use core::fmt::Display; @@ -63,7 +62,6 @@ use serde::Deserialize; use serde::Serialize; // TODO should probably move these two into this module now. use super::rule::HdrTransform; -use super::rule::HdrTransformError; use super::tcp::TcpHdr; use super::tcp::TcpHdrError; use super::tcp::TcpMeta; diff --git a/lib/opte/src/engine/port.rs b/lib/opte/src/engine/port.rs index d8196a7b..73044c20 100644 --- a/lib/opte/src/engine/port.rs +++ b/lib/opte/src/engine/port.rs @@ -78,24 +78,11 @@ use core::sync::atomic::AtomicU64; use core::sync::atomic::Ordering::SeqCst; #[cfg(all(not(feature = "std"), not(test)))] use illumos_sys_hdrs::uintptr_t; -use ingot::ethernet::EthernetMut; -use ingot::example_chain::Ulp; -use ingot::icmp::IcmpV4Mut; -use ingot::icmp::IcmpV4Ref; -use ingot::icmp::IcmpV6Mut; -use ingot::icmp::IcmpV6Ref; -use ingot::ip::Ipv4Mut; -use ingot::ip::Ipv6Mut; -use ingot::tcp::TcpFlags; -use ingot::tcp::TcpMut; -use ingot::types::Read; -use ingot::udp::UdpMut; use kstat_macro::KStatProvider; use opte_api::Direction; use opte_api::MacAddr; use opte_api::OpteError; use std::process; -use zerocopy::ByteSliceMut; pub type Result = result::Result; @@ -180,6 +167,12 @@ enum InternalProcessResult { Drop { reason: DropReason, }, + /// A set of transforms which have not yet been performed on a + /// packet. + /// + /// Slow-path packets are transformed as they traverse tables in the lock, + /// whereas fast-path packets have a complete set of transforms to be applied + /// without blocking the rest of the table. Modified { transform: Option>, tcp_state: Option>>, @@ -1259,7 +1252,7 @@ impl Port { }), ) => { // TCP, then transform? - // (I forget the order) + todo!() } ( Direction::In, @@ -1269,7 +1262,7 @@ impl Port { }), ) => { // Transform, then TCP? - // (I forget the order) + todo!() } _ => {} } @@ -2209,8 +2202,8 @@ impl Port { // entry. if *ufid_in == FLOW_ID_DEFAULT { return Ok(InternalProcessResult::Modified { - transform: todo!(), - tcp_state: todo!(), + transform: None, + tcp_state: None, }); } } @@ -2333,7 +2326,7 @@ impl Port { match data.uft_in.add(*ufid_in, hte) { Ok(_) => Ok(InternalProcessResult::Modified { transform: None, - tcp_state: todo!(), + tcp_state: None, }), Err(OpteError::MaxCapacity(limit)) => { Err(ProcessError::FlowTableFull { kind: "UFT", limit }) @@ -2496,7 +2489,7 @@ impl Port { return Ok(InternalProcessResult::Modified { transform, - tcp_state: todo!(), + tcp_state: None, }); } diff --git a/lib/opte/src/engine/predicate.rs b/lib/opte/src/engine/predicate.rs index d52e68ee..86fcb90a 100644 --- a/lib/opte/src/engine/predicate.rs +++ b/lib/opte/src/engine/predicate.rs @@ -36,6 +36,7 @@ use core::ops::RangeInclusive; use ingot::ethernet::EthernetRef; use ingot::example_chain::L3; use ingot::icmp::IcmpV4Ref; +use ingot::icmp::IcmpV6Ref; use ingot::ip::Ipv4Ref; use ingot::ip::Ipv6Ref; use opte_api::MacAddr; @@ -635,7 +636,7 @@ impl DataPredicate { return false; }; - mt.is_match(&Icmpv6MessageType::from(icmp.ty())) + mt.is_match(&Icmpv6MessageType::from(icmp6.ty())) } Self::Dhcpv6MsgType(mt) => { diff --git a/lib/opte/src/engine/rule.rs b/lib/opte/src/engine/rule.rs index d2100ac2..5014cf6e 100644 --- a/lib/opte/src/engine/rule.rs +++ b/lib/opte/src/engine/rule.rs @@ -28,7 +28,6 @@ use super::packet::Initialized; use super::packet::InnerFlowId; use super::packet::Packet; use super::packet::PacketMeta; -use super::packet::PacketRead; use super::packet::PacketReader; use super::packet::Parsed; use super::port::meta::ActionMeta; From d417e697a2ca709dfe8362aea291ba3a8094ebaa Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Thu, 5 Sep 2024 18:44:07 +0100 Subject: [PATCH 016/115] Iterating. --- Cargo.lock | 6 +- Cargo.toml | 2 +- lib/opte/src/engine/geneve.rs | 2 +- lib/opte/src/engine/headers.rs | 128 ++++++- lib/opte/src/engine/icmp/mod.rs | 12 +- lib/opte/src/engine/ingot_packet.rs | 562 ++++++++++++++++++++++++---- lib/opte/src/engine/mod.rs | 28 +- lib/opte/src/engine/nat.rs | 4 +- lib/opte/src/engine/rule.rs | 88 ++++- lib/opte/src/engine/snat.rs | 8 +- lib/opte/src/engine/tcp.rs | 8 +- lib/opte/src/engine/udp.rs | 9 +- 12 files changed, 726 insertions(+), 131 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c0046c9a..50a60b28 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -882,7 +882,7 @@ dependencies = [ [[package]] name = "ingot" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=5e0d5b1117217a5d4e96ff6366b4325366ac4d8e#5e0d5b1117217a5d4e96ff6366b4325366ac4d8e" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=7188e7adb3f8e404fcc431501dd2312bad47b628#7188e7adb3f8e404fcc431501dd2312bad47b628" dependencies = [ "bitflags 2.6.0", "ingot-macros", @@ -894,7 +894,7 @@ dependencies = [ [[package]] name = "ingot-macros" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=5e0d5b1117217a5d4e96ff6366b4325366ac4d8e#5e0d5b1117217a5d4e96ff6366b4325366ac4d8e" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=7188e7adb3f8e404fcc431501dd2312bad47b628#7188e7adb3f8e404fcc431501dd2312bad47b628" dependencies = [ "darling", "itertools 0.13.0", @@ -908,7 +908,7 @@ dependencies = [ [[package]] name = "ingot-types" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=5e0d5b1117217a5d4e96ff6366b4325366ac4d8e#5e0d5b1117217a5d4e96ff6366b4325366ac4d8e" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=7188e7adb3f8e404fcc431501dd2312bad47b628#7188e7adb3f8e404fcc431501dd2312bad47b628" dependencies = [ "heapless", "ingot-macros", diff --git a/Cargo.toml b/Cargo.toml index f7fe2a47..63a5b596 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,7 +50,7 @@ ctor = "0.2" darling = "0.20" dyn-clone = "1.0" heapless = "0.8" -ingot = { git = "https://github.com/oxidecomputer/ingot.git", rev = "5e0d5b1117217a5d4e96ff6366b4325366ac4d8e"} +ingot = { git = "https://github.com/oxidecomputer/ingot.git", rev = "7188e7adb3f8e404fcc431501dd2312bad47b628"} ipnetwork = { version = "0.20", default-features = false } itertools = { version = "0.13", default-features = false } libc = "0.2" diff --git a/lib/opte/src/engine/geneve.rs b/lib/opte/src/engine/geneve.rs index 8089adaa..86a41f83 100644 --- a/lib/opte/src/engine/geneve.rs +++ b/lib/opte/src/engine/geneve.rs @@ -77,7 +77,7 @@ impl PushAction for GenevePush { #[derive(Clone, Debug, Deserialize, Serialize)] pub struct GeneveMod { - vni: Option, + pub vni: Option, } impl ModifyAction for GeneveMod { diff --git a/lib/opte/src/engine/headers.rs b/lib/opte/src/engine/headers.rs index fa844fd9..fbf8704c 100644 --- a/lib/opte/src/engine/headers.rs +++ b/lib/opte/src/engine/headers.rs @@ -589,7 +589,10 @@ impl<'a> From<&UlpHdr<'a>> for UlpMeta { } impl HeaderActionModify for UlpMeta { - fn run_modify(&mut self, spec: &UlpMetaModify) { + fn run_modify( + &mut self, + spec: &UlpMetaModify, + ) -> Result<(), HeaderActionError> { match self { UlpMeta::Icmpv4(icmp_meta) => icmp_meta.run_modify(spec), UlpMeta::Icmpv6(icmp6_meta) => icmp6_meta.run_modify(spec), @@ -599,35 +602,101 @@ impl HeaderActionModify for UlpMeta { } } -/// The action to take for a particular header transposition. -#[derive(Copy, Clone, Debug, Default, Deserialize, Serialize)] -pub enum HeaderAction +pub trait HasInnerCksum { + const HAS_CKSUM: bool; +} + +/// Turn HeaderAction on its head a little bit: anyone can allow +/// themselves to take an action on certain params. +pub trait Transform: HasInnerCksum +where + P: PushAction + fmt::Debug, + M: ModifyAction + fmt::Debug, +{ + /// Returns whether we will need a checksum recompute on the target field. + fn act_on( + &mut self, + action: &HeaderAction, + ) -> Result; +} + +impl HasInnerCksum for Option { + const HAS_CKSUM: bool = T::HAS_CKSUM; +} + +// impl Transform for Option +// where +// P: PushAction + fmt::Debug, +// M: ModifyAction + fmt::Debug, +// X: Transform + From +// { +// fn act_on(&mut self, action: &HeaderAction) -> Result { +// match (action, self) { +// (HeaderAction::Ignore, _) => Ok(false), +// (HeaderAction::Push(p), a) => { +// *a = Some(p.push().into()); +// Ok(X::HAS_CKSUM) +// }, +// (HeaderAction::Pop, a) => { +// *a = None; +// Ok(X::HAS_CKSUM) +// } +// (a @ HeaderAction::Modify(..), Some(h)) => h.act_on(a), +// (_, None) => Err(HeaderActionError::MissingHeader), +// } +// } +// } + +impl Transform for X where P: PushAction + fmt::Debug, M: ModifyAction + fmt::Debug, + X: HeaderActionModify + From + HasInnerCksum, { - Push(P, core::marker::PhantomData), + fn act_on( + &mut self, + action: &HeaderAction, + ) -> Result { + match action { + HeaderAction::Ignore => Ok(false), + HeaderAction::Push(p) => { + *self = p.push().into(); + Ok(Self::HAS_CKSUM) + } + HeaderAction::Pop => Err(HeaderActionError::CantPop), + HeaderAction::Modify(m) => { + self.run_modify(m); + Ok(Self::HAS_CKSUM) + } + } + } +} + +/// The action to take for a particular header transposition. +#[derive(Copy, Clone, Debug, Default, Deserialize, Serialize)] +pub enum HeaderAction { + Push(P), Pop, - Modify(M, core::marker::PhantomData), + Modify(M), #[default] Ignore, } -impl HeaderAction -where - P: PushAction + fmt::Debug, - M: ModifyAction + fmt::Debug, -{ - pub fn run(&self, meta: &mut Option) -> Result<(), HeaderActionError> { +impl HeaderAction { + pub fn run(&self, meta: &mut Option) -> Result<(), HeaderActionError> + where + P: PushAction + fmt::Debug, + M: ModifyAction + fmt::Debug, + { match self { Self::Ignore => (), - Self::Modify(action, _) => match meta { + Self::Modify(action) => match meta { Some(meta) => action.modify(meta), None => return Err(HeaderActionError::MissingHeader), }, - Self::Push(action, _) => { + Self::Push(action) => { meta.replace(action.push()); } @@ -640,19 +709,44 @@ where Ok(()) } + + pub fn act_on_option( + &self, + target: &mut Option, + ) -> Result + where + P: PushAction + fmt::Debug, + M: ModifyAction + fmt::Debug, + X: Transform + From, + { + match (self, target) { + (HeaderAction::Ignore, _) => Ok(false), + (HeaderAction::Push(p), a) => { + *a = Some(p.push().into()); + Ok(X::HAS_CKSUM) + } + (HeaderAction::Pop, a) => { + *a = None; + Ok(X::HAS_CKSUM) + } + (a @ HeaderAction::Modify(..), Some(h)) => h.act_on(a), + (_, None) => Err(HeaderActionError::MissingHeader), + } + } } #[derive(Clone, Debug)] pub enum HeaderActionError { MissingHeader, + CantPop, } pub trait ModifyActionArg {} /// A header type that allows itself to be modified via a /// [`ModifyActionArg`] specification. -pub trait HeaderActionModify { - fn run_modify(&mut self, mod_spec: &M); +pub trait HeaderActionModify { + fn run_modify(&mut self, mod_spec: &M) -> Result<(), HeaderActionError>; } #[derive(Clone, Debug, Default, Deserialize, Serialize)] @@ -686,7 +780,7 @@ impl UlpHeaderAction { match self { Self::Ignore => (), Self::Modify(arg) => match meta { - Some(meta) => meta.run_modify(arg), + Some(meta) => meta.run_modify(arg)?, None => return Err(HeaderActionError::MissingHeader), }, } diff --git a/lib/opte/src/engine/icmp/mod.rs b/lib/opte/src/engine/icmp/mod.rs index a70622af..798832c8 100644 --- a/lib/opte/src/engine/icmp/mod.rs +++ b/lib/opte/src/engine/icmp/mod.rs @@ -11,6 +11,7 @@ pub mod v6; use super::checksum::Checksum as OpteCsum; use super::checksum::HeaderChecksum; +use super::headers::HeaderActionError; use super::headers::RawHeader; use super::packet::PacketReadMut; use super::packet::ReadErr; @@ -108,17 +109,22 @@ impl + Copy> HeaderActionModify for IcmpMeta where IcmpMeta: QueryEcho, { - fn run_modify(&mut self, spec: &UlpMetaModify) { + fn run_modify( + &mut self, + spec: &UlpMetaModify, + ) -> Result<(), HeaderActionError> { let Some(new_id) = spec.icmp_id else { - return; + return Ok(()); }; if self.echo_id().is_none() { - return; + return Ok(()); } let mut echo_data = self.body_echo_mut(); echo_data.id = new_id.to_be_bytes(); + + Ok(()) } } diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index 1a9aed88..e190f28e 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -1,8 +1,17 @@ use super::checksum::Checksum as OpteCsum; use super::checksum::Checksum; use super::checksum::HeaderChecksum; +use super::ether::EtherMod; +use super::headers::EncapMeta; +use super::headers::EncapMod; use super::headers::EncapPush; +use super::headers::HasInnerCksum; +use super::headers::HeaderActionError; +use super::headers::HeaderActionModify; +use super::headers::IpMod; use super::headers::IpPush; +use super::headers::UlpMetaModify; +use super::headers::UlpMod; use super::icmp::QueryEcho; use super::packet::allocb; use super::packet::AddrPair; @@ -32,25 +41,39 @@ use core::sync::atomic::AtomicPtr; use illumos_sys_hdrs::mblk_t; use illumos_sys_hdrs::uintptr_t; use ingot::ethernet::Ethernet; +use ingot::ethernet::EthernetMut; use ingot::ethernet::EthernetPacket; use ingot::ethernet::EthernetRef; use ingot::ethernet::Ethertype; use ingot::ethernet::ValidEthernet; use ingot::example_chain::L3Repr; use ingot::example_chain::Ulp; +use ingot::example_chain::UlpRepr; +use ingot::example_chain::ValidL3; +use ingot::example_chain::ValidUlp; use ingot::example_chain::L3; use ingot::example_chain::L4; use ingot::geneve::Geneve; +use ingot::geneve::GeneveMut; use ingot::geneve::GenevePacket; use ingot::geneve::ValidGeneve; +use ingot::icmp::IcmpV4Mut; use ingot::icmp::IcmpV4Packet; use ingot::icmp::IcmpV4Ref; +use ingot::icmp::IcmpV6Mut; use ingot::icmp::IcmpV6Packet; use ingot::icmp::IcmpV6Ref; +use ingot::ip::IpProtocol; +use ingot::ip::Ipv4Mut; use ingot::ip::Ipv4Packet; use ingot::ip::Ipv4Ref; +use ingot::ip::Ipv6; +use ingot::ip::Ipv6Mut; use ingot::ip::Ipv6Packet; use ingot::ip::Ipv6Ref; +use ingot::ip::ValidIpv6; +use ingot::tcp::TcpFlags; +use ingot::tcp::TcpMut; use ingot::tcp::TcpPacket; use ingot::tcp::TcpRef; use ingot::types::Header; @@ -61,6 +84,7 @@ use ingot::types::ParseResult; use ingot::types::Parsed as IngotParsed; use ingot::types::Read; use ingot::udp::Udp; +use ingot::udp::UdpMut; use ingot::udp::UdpPacket; use ingot::udp::UdpRef; use ingot::udp::ValidUdp; @@ -71,7 +95,7 @@ use zerocopy::ByteSliceMut; use zerocopy::IntoBytes; #[derive(Parse)] -pub struct OpteIn { +pub struct GeneveOverV6 { pub outer_eth: EthernetPacket, #[ingot(from = "L3")] pub outer_v6: Ipv6Packet, @@ -96,7 +120,7 @@ fn exit_on_arp(eth: &EthernetPacket) -> ParseControl { } #[derive(Parse)] -pub struct OpteOut { +pub struct NoEncap { #[ingot(control = exit_on_arp)] pub inner_eth: EthernetPacket, pub inner_l3: Option>, @@ -357,39 +381,108 @@ impl core::fmt::Debug for OpteUnified { // THIS IS THE GOAL. -// pub struct OpteUnified3 { -// pub outer_eth: Weird>, -// pub outer_v6: Weird>, -// pub outer_encap: Weird>, +// IE +// pub struct OpteEmit { +// outer_eth: Option, +// outer_ip: Option, +// outer_encap: Option, -// pub inner_eth: EthernetPacket<&[u8]>, -// pub inner_l3: Option>, -// pub inner_ulp: Option>, +// // We can (but do not often) push/pop inner meta. +// // Splitting minimises struct size in the general case. +// inner: Option>, // } -// IDEA: anything can take an encap push which is Into<..> its meta -// type. Modification is another trait. -pub enum Weird { - Absent, - LocalForm(Compact), - Packeted(T), +// pub struct OpteInnerEmit { +// eth: Ethernet, +// l3: Option, +// ulp: Option, +// } + +pub enum ValidEncapMeta { + Geneve(ValidUdp, ValidGeneve), } -impl From> for Weird { - fn from(value: Option) -> Self { - match value { - Some(val) => Self::Packeted(val), - None => Self::Absent, +pub struct OpteMeta { + pub outer_eth: Option>>, + // pub outer_eth: Option>>, + pub outer_l3: Option>>, + // pub outer_v6: Option>>, + pub outer_encap: Option>>, + // pub outer_encap: Option>>, + pub inner_eth: EthernetPacket, + pub inner_l3: Option>, + pub inner_ulp: Option>, +} + +pub type OpteParsed = IngotParsed::Chunk>, T>; + +impl OpteMeta { + pub fn convert_ingot, Q: Read>( + value: IngotParsed, + ) -> OpteParsed { + let IngotParsed { stack: HeaderStack(headers), data, last_chunk } = + value; + + IngotParsed { stack: HeaderStack(headers.into()), data, last_chunk } + } +} + +// TODO: make sure both are in ingot, by user choice. +pub enum OwnedPacket { + Repr(O), + Raw(B), +} + +impl Header for OwnedPacket { + const MINIMUM_LENGTH: usize = O::MINIMUM_LENGTH; + + #[inline] + fn packet_length(&self) -> usize { + match self { + OwnedPacket::Repr(o) => o.packet_length(), + OwnedPacket::Raw(b) => b.packet_length(), } } } -pub enum EncapMeta { - Geneve(UdpPacket, GenevePacket), +impl Header for EncapMeta { + const MINIMUM_LENGTH: usize = Udp::MINIMUM_LENGTH + Geneve::MINIMUM_LENGTH; + + #[inline] + fn packet_length(&self) -> usize { + match self { + EncapMeta::Geneve(g) => { + Geneve::MINIMUM_LENGTH + + g.oxide_external_pkt.then_some(4).unwrap_or_default() + } + } + } } -impl From> for OpteUnified { - fn from(value: OpteIn) -> Self { +impl Header for ValidEncapMeta { + const MINIMUM_LENGTH: usize = Udp::MINIMUM_LENGTH + Geneve::MINIMUM_LENGTH; + + #[inline] + fn packet_length(&self) -> usize { + match self { + ValidEncapMeta::Geneve(u, g) => { + u.packet_length() + g.packet_length() + } + } + } +} + +impl From> for OwnedPacket { + fn from(value: ingot::types::Packet) -> Self { + match value { + ingot::types::Packet::Raw(b) => Self::Raw(b), + ingot::types::Packet::Repr(o) => Self::Repr(*o), + } + } +} + +impl From> for OpteUnified { + fn from(value: GeneveOverV6) -> Self { Self { outer_eth: Some(value.outer_eth), outer_v6: Some(L3::Ipv6(value.outer_v6)), @@ -402,8 +495,8 @@ impl From> for OpteUnified { } } -impl From> for OpteUnified { - fn from(value: OpteOut) -> Self { +impl From> for OpteUnified { + fn from(value: NoEncap) -> Self { Self { outer_eth: None, outer_v6: None, @@ -520,32 +613,76 @@ impl PktBodyWalker { } pub struct PacketHeaders { - headers: OpteUnified, + pub(crate) headers: OpteMeta, initial_lens: OpteUnifiedLengths, body: PktBodyWalker, } -impl From, T>> for PacketHeaders { - fn from(value: IngotParsed, T>) -> Self { - let IngotParsed { stack: HeaderStack(headers), data, last_chunk } = - value; - let initial_lens = OpteUnifiedLengths { - outer_eth: headers.outer_eth.packet_length(), - outer_l3: headers.outer_v6.packet_length(), - outer_encap: headers.outer_udp.packet_length() - + headers.outer_encap.packet_length(), - inner_eth: headers.inner_eth.packet_length(), - inner_l3: headers.inner_l3.packet_length(), - inner_ulp: headers.inner_ulp.packet_length(), +impl From> for OpteMeta { + fn from(value: NoEncap) -> Self { + OpteMeta { + outer_eth: None, + outer_l3: None, + outer_encap: None, + inner_eth: value.inner_eth, + inner_l3: value.inner_l3, + inner_ulp: value.inner_ulp, + } + } +} + +impl From> for OpteMeta { + fn from(value: GeneveOverV6) -> Self { + // These are practically all Valid, anyhow. + + let outer_encap = match (value.outer_udp, value.outer_encap) { + (ingot::types::Packet::Raw(u), ingot::types::Packet::Raw(g)) => { + Some(OwnedPacket::Raw(ValidEncapMeta::Geneve(u, g))) + } + _ => todo!(), }; - let body = PktBodyWalker { - base: Some((last_chunk, data)).into(), - slice: Default::default(), + + let outer_l3 = match value.outer_v6 { + ingot::types::Packet::Repr(v) => { + Some(OwnedPacket::Repr(L3Repr::Ipv6(*v))) + } + ingot::types::Packet::Raw(v) => { + Some(OwnedPacket::Raw(ValidL3::Ipv6(v))) + } }; - Self { headers, initial_lens, body } + + OpteMeta { + outer_eth: Some(value.outer_eth.into()), + outer_l3, + outer_encap, + inner_eth: value.inner_eth, + inner_l3: Some(value.inner_l3), + inner_ulp: Some(value.inner_ulp), + } } } +// impl From, T>> for PacketHeaders { +// fn from(value: IngotParsed, T>) -> Self { +// let IngotParsed { stack: HeaderStack(headers), data, last_chunk } = +// value; +// let initial_lens = OpteUnifiedLengths { +// outer_eth: headers.outer_eth.packet_length(), +// outer_l3: headers.outer_v6.packet_length(), +// outer_encap: headers.outer_udp.packet_length() +// + headers.outer_encap.packet_length(), +// inner_eth: headers.inner_eth.packet_length(), +// inner_l3: headers.inner_l3.packet_length(), +// inner_ulp: headers.inner_ulp.packet_length(), +// }; +// let body = PktBodyWalker { +// base: Some((last_chunk, data)).into(), +// slice: Default::default(), +// }; +// Self { headers, initial_lens, body } +// } +// } + impl core::fmt::Debug for PacketHeaders { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { f.write_str("PacketHeaders(..)") @@ -569,7 +706,9 @@ pub fn ulp_dst_port(pkt: &Ulp) -> Option { } impl PacketHeaders { - pub fn outer_ether(&self) -> Option<&EthernetPacket> { + pub fn outer_ether( + &self, + ) -> Option<&OwnedPacket>> { self.headers.outer_eth.as_ref() } @@ -738,16 +877,6 @@ impl From<&PacketHeaders> for InnerFlowId { } } -fn transform_parse_stage1>( - p: IngotParsed, -) -> IngotParsed { - IngotParsed { - stack: HeaderStack(S2::from(p.stack.0)), - data: p.data, - last_chunk: p.last_chunk, - } -} - // GOAL: get to an absolute minimum point where we: // - parse into an innerflowid // - use existing transforms if a ULP entry exists. @@ -773,11 +902,28 @@ impl Packet2> { net: impl NetworkParser, ) -> Result>, ParseError> { let Packet2 { state: Initialized2 { len, inner } } = self; - let mut meta = match dir { - Direction::Out => net.parse_outbound(inner)?, - Direction::In => net.parse_inbound(inner)?, + let IngotParsed { stack: HeaderStack(headers), data, last_chunk } = + match dir { + Direction::Out => net.parse_outbound(inner)?, + Direction::In => net.parse_inbound(inner)?, + }; + + let initial_lens = OpteUnifiedLengths { + outer_eth: headers.outer_eth.packet_length(), + outer_l3: headers.outer_l3.packet_length(), + outer_encap: headers.outer_encap.packet_length(), + inner_eth: headers.inner_eth.packet_length(), + inner_l3: headers.inner_l3.packet_length(), + inner_ulp: headers.inner_ulp.packet_length(), + }; + + let body = PktBodyWalker { + base: Some((last_chunk, data)).into(), + slice: Default::default(), }; + let meta = PacketHeaders { headers, initial_lens, body }; + let flow = (&meta).into(); let body_csum = match (&meta.headers).inner_eth.ethertype() { @@ -795,7 +941,13 @@ impl Packet2> { len, }; - Ok(Packet2 { state }) + let mut pkt = Packet2 { state }; + // TODO: we can probably not do this in some cases, but we + // don't have a way for headeractions to signal that they + // *may* change the fields we need in the slowpath. + let _ = pkt.body_csum(); + + Ok(pkt) } } @@ -808,7 +960,7 @@ impl Packet2> { &mut self.state.meta } - pub fn emit_spec(&self) -> EmitSpec { + pub fn emit_spec(self) -> EmitSpec { todo!() } @@ -825,7 +977,10 @@ impl Packet2> { pub fn hdr_transform( &mut self, xform: &HdrTransform, - ) -> Result<(), HdrTransformError> { + ) -> Result<(), HdrTransformError> + where + T::Chunk: ByteSliceMut, + { xform.run(&mut self.state.meta)?; // Given that n_transform layers is 1 or 2, probably won't // save too much by trying to tie to a generation number. @@ -1084,9 +1239,19 @@ pub enum Emitter { // TODO: don't really care about pushing 'inner' reprs today. pub struct OpteEmit { - outer_eth: Emitter, - outer_ip: Emitter, - outer_encap: Emitter<(Udp, Geneve)>, + outer_eth: Option, + outer_ip: Option, + outer_encap: Option, + + // We can (but do not often) push/pop inner meta. + // Splitting minimises struct size in the general case. + inner: Option>, +} + +pub struct OpteInnerEmit { + eth: Ethernet, + l3: Option, + ulp: Option, } pub struct EmitSpec { @@ -1143,3 +1308,274 @@ impl QueryEcho for IcmpV6Packet { } } } + +// TODO: generate ref/mut traits on OwnedPacket AND BoxPacket in ingot to halve the code here... +impl HeaderActionModify + for OwnedPacket> +{ + fn run_modify( + &mut self, + mod_spec: &EtherMod, + ) -> Result<(), HeaderActionError> { + match self { + OwnedPacket::Repr(a) => { + if let Some(src) = mod_spec.src { + a.set_source(src.bytes().into()); + } + if let Some(dst) = mod_spec.dst { + a.set_destination(dst.bytes().into()); + } + } + OwnedPacket::Raw(a) => { + if let Some(src) = mod_spec.src { + a.set_source(src.bytes().into()); + } + if let Some(dst) = mod_spec.dst { + a.set_destination(dst.bytes().into()); + } + } + } + + Ok(()) + } +} + +impl HeaderActionModify for EthernetPacket { + fn run_modify( + &mut self, + mod_spec: &EtherMod, + ) -> Result<(), HeaderActionError> { + if let Some(src) = mod_spec.src { + self.set_source(src.bytes().into()); + } + if let Some(dst) = mod_spec.dst { + self.set_destination(dst.bytes().into()); + } + + Ok(()) + } +} + +// TODO: generate ref/mut traits on OwnedPacket AND BoxPacket in ingot to halve the code here... +impl HeaderActionModify + for OwnedPacket> +{ + fn run_modify( + &mut self, + mod_spec: &IpMod, + ) -> Result<(), HeaderActionError> { + match mod_spec { + IpMod::Ip4(mods) => match self { + OwnedPacket::Repr(L3Repr::Ipv4(v4)) => { + if let Some(src) = mods.src { + >::set_source( + v4, + src.bytes().into(), + ); + } + if let Some(dst) = mods.dst { + >::set_destination( + v4, + dst.bytes().into(), + ); + } + if let Some(p) = mods.proto { + >::set_protocol( + v4, + IpProtocol(u8::from(p)), + ); + } + } + OwnedPacket::Raw(ValidL3::Ipv4(v4)) => { + if let Some(src) = mods.src { + v4.set_source(src.bytes().into()); + } + if let Some(dst) = mods.dst { + v4.set_destination(dst.bytes().into()); + } + if let Some(p) = mods.proto { + v4.set_protocol(IpProtocol(u8::from(p))); + } + } + // run_modify should be capable of returning error... + _ => return Err(HeaderActionError::MissingHeader), + }, + IpMod::Ip6(mods) => match self { + OwnedPacket::Repr(L3Repr::Ipv6(v6)) => { + if let Some(src) = mods.src { + >::set_source( + v6, + src.bytes().into(), + ); + } + if let Some(dst) = mods.dst { + >::set_destination( + v6, + dst.bytes().into(), + ); + } + if let Some(p) = mods.proto { + // NOTE: I know this is broken for V6EHs + >::set_next_header( + v6, + IpProtocol(u8::from(p)), + ); + } + } + OwnedPacket::Raw(ValidL3::Ipv6(v6)) => { + if let Some(src) = mods.src { + v6.set_source(src.bytes().into()); + } + if let Some(dst) = mods.dst { + v6.set_destination(dst.bytes().into()); + } + if let Some(p) = mods.proto { + // NOTE: I know this is broken for V6EHs + v6.set_next_header(IpProtocol(u8::from(p))); + } + } + // run_modify should be capable of returning error... + _ => return Err(HeaderActionError::MissingHeader), + }, + } + + Ok(()) + } +} + +impl HeaderActionModify for L3 { + fn run_modify( + &mut self, + mod_spec: &IpMod, + ) -> Result<(), HeaderActionError> { + match (self, mod_spec) { + (L3::Ipv4(v4), IpMod::Ip4(mods)) => { + if let Some(src) = mods.src { + v4.set_source(src.bytes().into()); + } + if let Some(dst) = mods.dst { + v4.set_destination(dst.bytes().into()); + } + if let Some(p) = mods.proto { + v4.set_protocol(IpProtocol(u8::from(p))); + } + Ok(()) + } + (L3::Ipv6(v6), IpMod::Ip6(mods)) => { + if let Some(src) = mods.src { + v6.set_source(src.bytes().into()); + } + if let Some(dst) = mods.dst { + v6.set_destination(dst.bytes().into()); + } + if let Some(p) = mods.proto { + // NOTE: I know this is broken for V6EHs + v6.set_next_header(IpProtocol(u8::from(p))); + } + Ok(()) + } + _ => Err(HeaderActionError::MissingHeader), + } + } +} + +impl HeaderActionModify for Ulp { + fn run_modify( + &mut self, + mod_spec: &UlpMetaModify, + ) -> Result<(), HeaderActionError> { + match self { + Ulp::Tcp(t) => { + if let Some(src) = mod_spec.generic.src_port { + t.set_source(src); + } + if let Some(dst) = mod_spec.generic.dst_port { + t.set_destination(dst); + } + if let Some(flags) = mod_spec.tcp_flags { + t.set_flags(TcpFlags::from_bits_retain(flags)); + } + } + Ulp::Udp(u) => { + if let Some(src) = mod_spec.generic.src_port { + u.set_source(src); + } + if let Some(dst) = mod_spec.generic.dst_port { + u.set_destination(dst); + } + } + Ulp::IcmpV4(i4) => { + if let Some(id) = mod_spec.icmp_id { + if i4.echo_id().is_some() { + let roh = i4.rest_of_hdr_mut(); + roh[..2].copy_from_slice(&id.to_be_bytes()) + } + } + } + Ulp::IcmpV6(i6) => { + if let Some(id) = mod_spec.icmp_id { + if i6.echo_id().is_some() { + let roh = i6.rest_of_hdr_mut(); + roh[..2].copy_from_slice(&id.to_be_bytes()) + } + } + } + } + + Ok(()) + } +} + +impl HeaderActionModify + for OwnedPacket> +{ + fn run_modify( + &mut self, + mod_spec: &EncapMod, + ) -> Result<(), HeaderActionError> { + match (self, mod_spec) { + ( + OwnedPacket::Repr(EncapMeta::Geneve(g)), + EncapMod::Geneve(mod_spec), + ) => { + if let Some(vni) = mod_spec.vni { + g.vni = vni; + } + } + ( + OwnedPacket::Raw(ValidEncapMeta::Geneve(u, g)), + EncapMod::Geneve(mod_spec), + ) => { + if let Some(vni) = mod_spec.vni { + g.set_vni(vni.as_u32()); + } + } + } + + Ok(()) + } +} + +impl HasInnerCksum for OwnedPacket> { + const HAS_CKSUM: bool = false; +} + +impl HasInnerCksum for OwnedPacket> { + const HAS_CKSUM: bool = true; +} + +impl HasInnerCksum for OwnedPacket> { + const HAS_CKSUM: bool = false; +} + +impl HasInnerCksum for EthernetPacket { + const HAS_CKSUM: bool = false; +} + +impl HasInnerCksum for L3 { + const HAS_CKSUM: bool = true; +} + +impl HasInnerCksum for Ulp { + const HAS_CKSUM: bool = true; +} diff --git a/lib/opte/src/engine/mod.rs b/lib/opte/src/engine/mod.rs index f461e8ad..e5c0166a 100644 --- a/lib/opte/src/engine/mod.rs +++ b/lib/opte/src/engine/mod.rs @@ -47,7 +47,9 @@ use core::num::ParseIntError; use ingot::types::Parsed as IngotParsed; use ingot::types::Read; use ingot_packet::MsgBlk; -use ingot_packet::OpteOut; +use ingot_packet::NoEncap; +use ingot_packet::OpteMeta; +use ingot_packet::OpteParsed; use ingot_packet::Packet2; use ingot_packet::PacketHeaders; use ingot_packet::Parsed2; @@ -298,7 +300,7 @@ pub trait NetworkParser { fn parse_outbound( &self, rdr: T, - ) -> Result, ParseError>; + ) -> Result, ParseError>; /// Parse an inbound packet. /// @@ -307,7 +309,7 @@ pub trait NetworkParser { fn parse_inbound( &self, rdr: T, - ) -> Result, ParseError>; + ) -> Result, ParseError>; } /// A generic ULP parser, useful for testing inside of the opte crate @@ -317,19 +319,9 @@ pub struct GenericUlp {} impl GenericUlp { /// Parse a generic L2 + L3 + L4 packet, storing the headers in /// the inner position. - fn parse_ulp( - &self, - rdr: T, - ) -> Result, ParseError> { - let stage1 = OpteOut::parse_read(rdr)?; - - let meta = IngotParsed { - stack: ingot::types::HeaderStack(stage1.stack.0.into()), - data: stage1.data, - last_chunk: stage1.last_chunk, - }; - - Ok(meta.into()) + fn parse_ulp(&self, rdr: T) -> Result, ParseError> { + let v = NoEncap::parse_read(rdr)?; + Ok(OpteMeta::convert_ingot(v)) } } @@ -337,14 +329,14 @@ impl NetworkParser for GenericUlp { fn parse_inbound( &self, rdr: T, - ) -> Result, ParseError> { + ) -> Result, ParseError> { self.parse_ulp(rdr) } fn parse_outbound( &self, rdr: T, - ) -> Result, ParseError> { + ) -> Result, ParseError> { self.parse_ulp(rdr) } } diff --git a/lib/opte/src/engine/nat.rs b/lib/opte/src/engine/nat.rs index 5d933f55..e97b7b8c 100644 --- a/lib/opte/src/engine/nat.rs +++ b/lib/opte/src/engine/nat.rs @@ -189,7 +189,7 @@ impl ActionDesc for NatDesc { HdrTransform { name: NAT_NAME.to_string(), - inner_ip: HeaderAction::Modify(ip, PhantomData), + inner_ip: HeaderAction::Modify(ip), ..Default::default() } } @@ -199,7 +199,7 @@ impl ActionDesc for NatDesc { HdrTransform { name: NAT_NAME.to_string(), - inner_ip: HeaderAction::Modify(ip, PhantomData), + inner_ip: HeaderAction::Modify(ip), ..Default::default() } } diff --git a/lib/opte/src/engine/rule.rs b/lib/opte/src/engine/rule.rs index 5014cf6e..c782640d 100644 --- a/lib/opte/src/engine/rule.rs +++ b/lib/opte/src/engine/rule.rs @@ -49,6 +49,7 @@ use ingot::types::Read; use opte_api::Direction; use serde::Deserialize; use serde::Serialize; +use zerocopy::ByteSliceMut; /// A marker trait indicating a type is an entry acuired from a [`Resource`]. pub trait ResourceEntry {} @@ -282,11 +283,11 @@ pub enum ModifyAction { #[derive(Clone, Debug, Default, Deserialize, Serialize)] pub struct HdrTransform { pub name: String, - pub outer_ether: HeaderAction, - pub outer_ip: HeaderAction, - pub outer_encap: HeaderAction, - pub inner_ether: HeaderAction, - pub inner_ip: HeaderAction, + pub outer_ether: HeaderAction, + pub outer_ip: HeaderAction, + pub outer_encap: HeaderAction, + pub inner_ether: HeaderAction, + pub inner_ip: HeaderAction, // We don't support push/pop for inner_ulp. pub inner_ulp: UlpHeaderAction, } @@ -372,6 +373,8 @@ impl HdrTransform { /// Run this header transformation against the passed in /// [`PacketMeta`], mutating it in place. /// + /// Returns whether the inner checksum needs recomputed. + /// /// # Errors /// /// If there is an [`HeaderAction::Modify`], but no metadata is @@ -380,27 +383,74 @@ impl HdrTransform { pub fn run( &self, meta: &mut PacketHeaders, - ) -> Result<(), HdrTransformError> { + ) -> Result + where + T::Chunk: ByteSliceMut, + { + // NOTE: we want to track cksum dirtying here, somehow. + + // meta.headers.outer_eth + // .act_on(&self.outer_ether) + // .map_err(Self::err_fn("outer ether"))?; self.outer_ether - .run(&mut meta.outer.ether) + .act_on_option(&mut meta.headers.outer_eth) .map_err(Self::err_fn("outer ether"))?; + // self.outer_ether + // .run(&mut meta.outer.ether) + // .map_err(Self::err_fn("outer ether"))?; + // self.outer_ip + // .run(&mut meta.outer.ip) + // .map_err(Self::err_fn("outer IP"))?; + // meta.headers.outer_l3 + // .act_on(&self.outer_ip) + // .map_err(Self::err_fn("outer IP"))?; self.outer_ip - .run(&mut meta.outer.ip) + .act_on_option(&mut meta.headers.outer_l3) .map_err(Self::err_fn("outer IP"))?; + // self.outer_encap + // .run(&mut meta.outer.encap) + // .map_err(Self::err_fn("outer encap"))?; + // meta.headers.outer_encap + // .act_on(&self.outer_encap) + // .map_err(Self::err_fn("outer encap"))?; self.outer_encap - .run(&mut meta.outer.encap) + .act_on_option(&mut meta.headers.outer_encap) .map_err(Self::err_fn("outer encap"))?; // XXX A hack so that inner ethernet can meet the interface of // `HeaderAction::run().` - let mut tmp = Some(meta.inner.ether); - self.inner_ether.run(&mut tmp).map_err(Self::err_fn("inner ether"))?; - meta.inner.ether = tmp.unwrap(); - self.inner_ip - .run(&mut meta.inner.ip) + // let mut tmp = Some(meta.inner.ether); + // self.inner_ether.run(&mut tmp).map_err(Self::err_fn("inner ether"))?; + // meta.inner.ether = tmp.unwrap(); + + // If I set this up right, we can handle the above w/o panic on a + // dumb EtherDrop action... + meta.headers + .inner_eth + .act_on(&self.inner_ether) + .map_err(Self::err_fn("inner eth"))?; + + // self.inner_ip + // .run(&mut meta.inner.ip) + // .map_err(Self::err_fn("inner IP"))?; + // let l3_dirty = meta.headers.inner_l3 + // .act_on(&self.inner_ip) + // .map_err(Self::err_fn("inner IP"))?; + let l3_dirty = self + .inner_ip + .act_on_option(&mut meta.headers.inner_l3) .map_err(Self::err_fn("inner IP"))?; - self.inner_ulp - .run(&mut meta.inner.ulp) - .map_err(Self::err_fn("inner ULP")) + + // self.inner_ulp + // .run(&mut meta.inner.ulp) + // .map_err(Self::err_fn("inner ULP")) + + // let ulp_dirty = meta.headers.inner_ulp + // .act_on(&self.inner_ulp) + // .map_err(Self::err_fn("inner ULP"))?; + + let ulp_dirty = todo!(); + + Ok(l3_dirty || ulp_dirty) } fn err_fn( @@ -411,6 +461,9 @@ impl HdrTransform { HeaderActionError::MissingHeader => { HdrTransformError::MissingHeader(header) } + HeaderActionError::CantPop => { + HdrTransformError::CantPop(header) + } } } } @@ -419,6 +472,7 @@ impl HdrTransform { #[derive(Clone, Copy, Debug)] pub enum HdrTransformError { MissingHeader(&'static str), + CantPop(&'static str), } #[derive(Debug)] diff --git a/lib/opte/src/engine/snat.rs b/lib/opte/src/engine/snat.rs index e8bdf607..fc107966 100644 --- a/lib/opte/src/engine/snat.rs +++ b/lib/opte/src/engine/snat.rs @@ -369,7 +369,7 @@ impl ActionDesc for SNatDesc { HdrTransform { name: SNAT_NAME.to_string(), - inner_ip: HeaderAction::Modify(ip, PhantomData), + inner_ip: HeaderAction::Modify(ip), inner_ulp: UlpHeaderAction::Modify(UlpMetaModify { generic: UlpGenericModify { src_port: Some(self.nat.entry.port), @@ -389,7 +389,7 @@ impl ActionDesc for SNatDesc { HdrTransform { name: SNAT_NAME.to_string(), - inner_ip: HeaderAction::Modify(ip, PhantomData), + inner_ip: HeaderAction::Modify(ip), inner_ulp: UlpHeaderAction::Modify(UlpMetaModify { generic: UlpGenericModify { dst_port: Some(self.priv_port), @@ -430,7 +430,7 @@ impl ActionDesc for SNatIcmpEchoDesc { HdrTransform { name: SNAT_NAME.to_string(), - inner_ip: HeaderAction::Modify(ip, PhantomData), + inner_ip: HeaderAction::Modify(ip), inner_ulp: UlpHeaderAction::Modify(UlpMetaModify { icmp_id: Some(self.nat.entry.port), ..Default::default() @@ -447,7 +447,7 @@ impl ActionDesc for SNatIcmpEchoDesc { HdrTransform { name: SNAT_NAME.to_string(), - inner_ip: HeaderAction::Modify(ip, PhantomData), + inner_ip: HeaderAction::Modify(ip), inner_ulp: UlpHeaderAction::Modify(UlpMetaModify { icmp_id: Some(self.echo_ident), ..Default::default() diff --git a/lib/opte/src/engine/tcp.rs b/lib/opte/src/engine/tcp.rs index 28e42e9d..63fdf169 100644 --- a/lib/opte/src/engine/tcp.rs +++ b/lib/opte/src/engine/tcp.rs @@ -9,6 +9,7 @@ use super::checksum::Checksum; use super::checksum::HeaderChecksum; use super::flow_table::Ttl; +use super::headers::HeaderActionError; use super::headers::HeaderActionModify; use super::headers::ModifyAction; use super::headers::PushAction; @@ -209,7 +210,10 @@ impl ModifyAction for TcpMod { } impl HeaderActionModify for TcpMeta { - fn run_modify(&mut self, spec: &UlpMetaModify) { + fn run_modify( + &mut self, + spec: &UlpMetaModify, + ) -> Result<(), HeaderActionError> { if spec.generic.src_port.is_some() { self.src = spec.generic.src_port.unwrap() } @@ -221,6 +225,8 @@ impl HeaderActionModify for TcpMeta { if spec.tcp_flags.is_some() { self.flags = spec.tcp_flags.unwrap() } + + Ok(()) } } diff --git a/lib/opte/src/engine/udp.rs b/lib/opte/src/engine/udp.rs index 84dbe5bb..ff712f09 100644 --- a/lib/opte/src/engine/udp.rs +++ b/lib/opte/src/engine/udp.rs @@ -27,6 +27,8 @@ use zerocopy::KnownLayout; use zerocopy::Ref; use zerocopy::Unaligned; +use super::headers::HeaderActionError; + #[derive(Clone, Copy, Debug, Default, Eq, Ord, PartialEq, PartialOrd)] pub struct UdpMeta { pub src: u16, @@ -104,7 +106,10 @@ impl ModifyAction for UdpMod { } impl HeaderActionModify for UdpMeta { - fn run_modify(&mut self, spec: &UlpMetaModify) { + fn run_modify( + &mut self, + spec: &UlpMetaModify, + ) -> Result<(), HeaderActionError> { if spec.generic.src_port.is_some() { self.src = spec.generic.src_port.unwrap() } @@ -112,6 +117,8 @@ impl HeaderActionModify for UdpMeta { if spec.generic.dst_port.is_some() { self.dst = spec.generic.dst_port.unwrap() } + + Ok(()) } } From 4aabcb01afc5008d0628bfd94de4d7871da16b5e Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Thu, 5 Sep 2024 19:33:19 +0100 Subject: [PATCH 017/115] We're now past OPTE::engine, at least. --- lib/opte/src/engine/headers.rs | 23 ++++-- lib/opte/src/engine/ingot_packet.rs | 124 +++++++++++++++++++++++++++- lib/opte/src/engine/rule.rs | 12 +-- 3 files changed, 144 insertions(+), 15 deletions(-) diff --git a/lib/opte/src/engine/headers.rs b/lib/opte/src/engine/headers.rs index fbf8704c..0675b743 100644 --- a/lib/opte/src/engine/headers.rs +++ b/lib/opte/src/engine/headers.rs @@ -611,7 +611,7 @@ pub trait HasInnerCksum { pub trait Transform: HasInnerCksum where P: PushAction + fmt::Debug, - M: ModifyAction + fmt::Debug, + M: fmt::Debug, { /// Returns whether we will need a checksum recompute on the target field. fn act_on( @@ -650,7 +650,7 @@ impl HasInnerCksum for Option { impl Transform for X where P: PushAction + fmt::Debug, - M: ModifyAction + fmt::Debug, + M: fmt::Debug, X: HeaderActionModify + From + HasInnerCksum, { fn act_on( @@ -665,7 +665,7 @@ where } HeaderAction::Pop => Err(HeaderActionError::CantPop), HeaderAction::Modify(m) => { - self.run_modify(m); + self.run_modify(m)?; Ok(Self::HAS_CKSUM) } } @@ -716,8 +716,9 @@ impl HeaderAction { ) -> Result where P: PushAction + fmt::Debug, - M: ModifyAction + fmt::Debug, + M: fmt::Debug, X: Transform + From, + X: HeaderActionModify + HasInnerCksum, { match (self, target) { (HeaderAction::Ignore, _) => Ok(false), @@ -773,18 +774,22 @@ pub enum UlpHeaderAction { } impl UlpHeaderAction { - pub fn run

(&self, meta: &mut Option

) -> Result<(), HeaderActionError> + pub fn run

( + &self, + meta: &mut Option

, + ) -> Result where P: HeaderActionModify, { match self { - Self::Ignore => (), + Self::Ignore => Ok(false), Self::Modify(arg) => match meta { - Some(meta) => meta.run_modify(arg)?, + Some(meta) => { + meta.run_modify(arg)?; + Ok(true) + } None => return Err(HeaderActionError::MissingHeader), }, } - - Ok(()) } } diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index e190f28e..d38e4bd1 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -1,6 +1,7 @@ use super::checksum::Checksum as OpteCsum; use super::checksum::Checksum; use super::checksum::HeaderChecksum; +use super::ether::EtherMeta; use super::ether::EtherMod; use super::headers::EncapMeta; use super::headers::EncapMod; @@ -8,6 +9,7 @@ use super::headers::EncapPush; use super::headers::HasInnerCksum; use super::headers::HeaderActionError; use super::headers::HeaderActionModify; +use super::headers::IpMeta; use super::headers::IpMod; use super::headers::IpPush; use super::headers::UlpMetaModify; @@ -64,6 +66,8 @@ use ingot::icmp::IcmpV6Mut; use ingot::icmp::IcmpV6Packet; use ingot::icmp::IcmpV6Ref; use ingot::ip::IpProtocol; +use ingot::ip::Ipv4; +use ingot::ip::Ipv4Flags; use ingot::ip::Ipv4Mut; use ingot::ip::Ipv4Packet; use ingot::ip::Ipv4Ref; @@ -83,6 +87,7 @@ use ingot::types::ParseError as IngotParseErr; use ingot::types::ParseResult; use ingot::types::Parsed as IngotParsed; use ingot::types::Read; +use ingot::types::Repeated; use ingot::udp::Udp; use ingot::udp::UdpMut; use ingot::udp::UdpPacket; @@ -520,7 +525,9 @@ impl Drop for PktBodyWalker { let ptr = self.slice.load(core::sync::atomic::Ordering::Relaxed); if !ptr.is_null() { // Reacquire and drop. - unsafe { Box::from_raw(ptr) }; + unsafe { + let _ = Box::from_raw(ptr); + } } } } @@ -1579,3 +1586,118 @@ impl HasInnerCksum for L3 { impl HasInnerCksum for Ulp { const HAS_CKSUM: bool = true; } + +// papering over a lot here... +// need to briefly keep both around while I systematically rewrite the test suite. + +impl From + for ingot::types::Packet> +{ + fn from(value: EtherMeta) -> Self { + ingot::types::Packet::Repr( + Ethernet { + destination: value.dst.bytes().into(), + source: value.src.bytes().into(), + ethertype: Ethertype(u16::from(value.ether_type)), + } + .into(), + ) + } +} + +impl From + for OwnedPacket> +{ + fn from(value: EtherMeta) -> Self { + OwnedPacket::Repr( + Ethernet { + destination: value.dst.bytes().into(), + source: value.src.bytes().into(), + ethertype: Ethertype(u16::from(value.ether_type)), + } + .into(), + ) + } +} + +impl From + for ingot::types::Packet> +{ + fn from(value: EncapMeta) -> Self { + ingot::types::Packet::Repr(value.into()) + } +} + +impl From + for OwnedPacket> +{ + fn from(value: EncapMeta) -> Self { + OwnedPacket::Repr(value) + } +} + +impl From for OwnedPacket> { + fn from(value: IpMeta) -> Self { + match value { + IpMeta::Ip4(v4) => OwnedPacket::Repr( + Ipv4 { + ihl: (v4.hdr_len / 4) as u8, + total_len: v4.total_len, + identification: v4.ident, + protocol: IpProtocol(u8::from(v4.proto)), + checksum: u16::from_be_bytes(v4.csum), + source: v4.src.bytes().into(), + destination: v4.dst.bytes().into(), + flags: Ipv4Flags::DONT_FRAGMENT, + ..Default::default() + } + .into(), + ), + IpMeta::Ip6(v6) => OwnedPacket::Repr( + Ipv6 { + payload_len: v6.pay_len, + next_header: IpProtocol(u8::from(v6.next_hdr)), + hop_limit: v6.hop_limit, + source: v6.src.bytes().into(), + destination: v6.dst.bytes().into(), + v6ext: Repeated::default(), // TODO + ..Default::default() + } + .into(), + ), + } + } +} + +impl From for L3 { + fn from(value: IpMeta) -> Self { + match value { + IpMeta::Ip4(v4) => L3::Ipv4( + Ipv4 { + ihl: (v4.hdr_len / 4) as u8, + total_len: v4.total_len, + identification: v4.ident, + protocol: IpProtocol(u8::from(v4.proto)), + checksum: u16::from_be_bytes(v4.csum), + source: v4.src.bytes().into(), + destination: v4.dst.bytes().into(), + flags: Ipv4Flags::DONT_FRAGMENT, + ..Default::default() + } + .into(), + ), + IpMeta::Ip6(v6) => L3::Ipv6( + Ipv6 { + payload_len: v6.pay_len, + next_header: IpProtocol(u8::from(v6.next_hdr)), + hop_limit: v6.hop_limit, + source: v6.src.bytes().into(), + destination: v6.dst.bytes().into(), + v6ext: Repeated::default(), // TODO + ..Default::default() + } + .into(), + ), + } + } +} diff --git a/lib/opte/src/engine/rule.rs b/lib/opte/src/engine/rule.rs index c782640d..37e33a32 100644 --- a/lib/opte/src/engine/rule.rs +++ b/lib/opte/src/engine/rule.rs @@ -17,6 +17,7 @@ use super::headers::HeaderActionError; use super::headers::IpMeta; use super::headers::IpMod; use super::headers::IpPush; +use super::headers::Transform; use super::headers::UlpHeaderAction; use super::ingot_packet::MsgBlk; use super::ingot_packet::Packet2; @@ -440,15 +441,16 @@ impl HdrTransform { .act_on_option(&mut meta.headers.inner_l3) .map_err(Self::err_fn("inner IP"))?; - // self.inner_ulp - // .run(&mut meta.inner.ulp) - // .map_err(Self::err_fn("inner ULP")) + let ulp_dirty = self + .inner_ulp + .run(&mut meta.headers.inner_ulp) + .map_err(Self::err_fn("inner ULP"))?; // let ulp_dirty = meta.headers.inner_ulp - // .act_on(&self.inner_ulp) + // .run(&self.inner_ulp) // .map_err(Self::err_fn("inner ULP"))?; - let ulp_dirty = todo!(); + // let ulp_dirty = todo!(); Ok(l3_dirty || ulp_dirty) } From 750c8ac28b149f6083a82e4a69b4f31e54af52e6 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Thu, 5 Sep 2024 22:00:44 +0100 Subject: [PATCH 018/115] Lazy workarounds to get back to later. --- lib/opte/src/engine/arp.rs | 26 +++- lib/opte/src/engine/ingot_packet.rs | 46 +++++- lib/oxide-vpc/src/engine/gateway/mod.rs | 11 +- lib/oxide-vpc/src/engine/mod.rs | 197 +++++------------------- lib/oxide-vpc/src/engine/overlay.rs | 78 ++++------ 5 files changed, 135 insertions(+), 223 deletions(-) diff --git a/lib/opte/src/engine/arp.rs b/lib/opte/src/engine/arp.rs index ae622447..c06de010 100644 --- a/lib/opte/src/engine/arp.rs +++ b/lib/opte/src/engine/arp.rs @@ -157,17 +157,33 @@ impl ArpEthIpv4 { R: PacketReadMut<'a>, { let src = rdr.slice_mut(ArpEthIpv4Raw::SIZE)?; - Self::try_from(&ArpEthIpv4Raw::new_mut(src)?) + Self::try_from(&ArpEthIpv4Raw::new(src)?) + } + + pub fn parse_normally(rdr: &[&[u8]]) -> Result { + let space_in_front = rdr.get(0).map(|v| !v.is_empty()); + + let to_use = match space_in_front { + None => { + return Err(ArpHdrError::ReadError(ReadErr::NotEnoughBytes)) + } + Some(true) => rdr.get(0), + Some(false) => rdr.get(1), + }; + + if let Some(to_use) = to_use { + Self::try_from(&ArpEthIpv4Raw::new(to_use)?) + } else { + Err(ArpHdrError::ReadError(ReadErr::NotEnoughBytes)) + } } } -impl TryFrom<&Ref<&mut [u8], ArpEthIpv4Raw>> for ArpEthIpv4 { +impl TryFrom<&Ref<&[u8], ArpEthIpv4Raw>> for ArpEthIpv4 { type Error = ArpHdrError; // NOTE: This only accepts IPv4/Ethernet ARP. - fn try_from( - raw: &Ref<&mut [u8], ArpEthIpv4Raw>, - ) -> Result { + fn try_from(raw: &Ref<&[u8], ArpEthIpv4Raw>) -> Result { let htype = u16::from_be_bytes(raw.htype); if htype != ARP_HTYPE_ETHERNET { diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index d38e4bd1..2ca0ef68 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -3,6 +3,7 @@ use super::checksum::Checksum; use super::checksum::HeaderChecksum; use super::ether::EtherMeta; use super::ether::EtherMod; +use super::geneve::GeneveMeta; use super::headers::EncapMeta; use super::headers::EncapMod; use super::headers::EncapPush; @@ -58,6 +59,7 @@ use ingot::example_chain::L4; use ingot::geneve::Geneve; use ingot::geneve::GeneveMut; use ingot::geneve::GenevePacket; +use ingot::geneve::GeneveRef; use ingot::geneve::ValidGeneve; use ingot::icmp::IcmpV4Mut; use ingot::icmp::IcmpV4Packet; @@ -95,6 +97,7 @@ use ingot::udp::UdpRef; use ingot::udp::ValidUdp; use ingot::Parse; use opte_api::Direction; +use opte_api::Vni; use zerocopy::ByteSlice; use zerocopy::ByteSliceMut; use zerocopy::IntoBytes; @@ -719,6 +722,23 @@ impl PacketHeaders { self.headers.outer_eth.as_ref() } + // Need to expose this a lil cleaner... + /// Returns whether this packet is sourced from outside the rack, + /// in addition to its VNI. + pub fn outer_encap_geneve_vni_and_origin(&self) -> Option<(Vni, bool)> { + match &self.headers.outer_encap { + Some(OwnedPacket::Repr(EncapMeta::Geneve(g))) => { + Some((g.vni, g.oxide_external_pkt)) + } + Some(OwnedPacket::Raw(ValidEncapMeta::Geneve(_, g))) => { + // TODO: hack. + let oxide_external = g.1.packet_length() != 0; + Some((Vni::new(g.vni()).unwrap(), oxide_external)) + } + None => None, + } + } + pub fn inner_ether(&self) -> &EthernetPacket { &self.headers.inner_eth } @@ -1001,7 +1021,10 @@ impl Packet2> { &mut self, dir: Direction, xform: &dyn BodyTransform, - ) -> Result<(), BodyTransformError> { + ) -> Result<(), BodyTransformError> + where + T::Chunk: ByteSliceMut, + { // We set the flag now with the assumption that the transform // could fail after modifying part of the body. In the future // we could have something more sophisticated that only sets @@ -1021,14 +1044,25 @@ impl Packet2> { #[inline] pub fn body_segs(&self) -> Option<&[&[u8]]> { - // TODO. Not needed for today's d'plane. - None + let out = self.state.meta.body_segs(); + if out.is_empty() { + None + } else { + Some(out) + } } #[inline] - pub fn body_segs_mut(&mut self) -> Option<&mut [&mut [u8]]> { - // TODO. Not needed for today's d'plane. - None + pub fn body_segs_mut(&mut self) -> Option<&mut [&mut [u8]]> + where + T::Chunk: ByteSliceMut, + { + let out = self.state.meta.body_segs_mut(); + if out.is_empty() { + None + } else { + Some(out) + } } pub fn mblk_addr(&self) -> uintptr_t { diff --git a/lib/oxide-vpc/src/engine/gateway/mod.rs b/lib/oxide-vpc/src/engine/gateway/mod.rs index a3b04065..ecc2c42a 100644 --- a/lib/oxide-vpc/src/engine/gateway/mod.rs +++ b/lib/oxide-vpc/src/engine/gateway/mod.rs @@ -57,6 +57,7 @@ use opte::api::Direction; use opte::api::OpteError; use opte::engine::ether::EtherMod; use opte::engine::headers::HeaderAction; +use opte::engine::ingot_packet::PacketHeaders2; use opte::engine::layer::DefaultAction; use opte::engine::layer::Layer; use opte::engine::layer::LayerActions; @@ -144,14 +145,14 @@ impl StaticAction for RewriteSrcMac { &self, _dir: Direction, _flow_id: &InnerFlowId, - _packet_meta: &PacketMeta, + _packet_meta: &PacketHeaders2, _action_meta: &mut ActionMeta, ) -> GenHtResult { Ok(AllowOrDeny::Allow(HdrTransform { - inner_ether: HeaderAction::Modify( - EtherMod { src: Some(self.gateway_mac), ..Default::default() }, - PhantomData, - ), + inner_ether: HeaderAction::Modify(EtherMod { + src: Some(self.gateway_mac), + ..Default::default() + }), ..Default::default() })) } diff --git a/lib/oxide-vpc/src/engine/mod.rs b/lib/oxide-vpc/src/engine/mod.rs index 516d54bd..fa0ea1a1 100644 --- a/lib/oxide-vpc/src/engine/mod.rs +++ b/lib/oxide-vpc/src/engine/mod.rs @@ -16,6 +16,13 @@ use crate::cfg::VpcCfg; use opte::engine::ether::EtherType; use opte::engine::flow_table::FlowTable; use opte::engine::headers::EncapMeta; +use opte::engine::ingot_packet::GeneveOverV6; +use opte::engine::ingot_packet::MsgBlk; +use opte::engine::ingot_packet::NoEncap; +use opte::engine::ingot_packet::OpteMeta; +use opte::engine::ingot_packet::OpteParsed; +use opte::engine::ingot_packet::Packet2; +use opte::engine::ingot_packet::Parsed2; use opte::engine::ip4::Protocol; use opte::engine::packet::HeaderOffsets; use opte::engine::packet::InnerFlowId; @@ -38,6 +45,10 @@ use opte::engine::arp::ArpEthIpv4; use opte::engine::arp::ArpOp; use opte::engine::ether::ETHER_TYPE_IPV4; use opte::engine::ip4::Ipv4Addr; +use opte::ingot::ethernet::EthernetRef; +use opte::ingot::ethernet::Ethertype; +use opte::ingot::types::Read; +use zerocopy::ByteSliceMut; #[derive(Clone, Copy, Debug, Default)] pub struct VpcParser {} @@ -67,14 +78,13 @@ fn is_arp_req_for_tpa(tpa: Ipv4Addr, arp: &ArpEthIpv4) -> bool { } impl VpcNetwork { - fn handle_arp_out( + fn handle_arp_out( &self, - pkt: &mut Packet, + pkt: &mut Packet2>, ) -> Result { - let arp_start = pkt.hdr_offsets().inner.ether.hdr_len; - let mut rdr = pkt.get_rdr_mut(); - rdr.seek(arp_start).unwrap(); - let arp = ArpEthIpv4::parse(&mut rdr) + let body = + pkt.body_segs().ok_or_else(|| HdlPktError("outbound ARP"))?; + let arp = ArpEthIpv4::parse_normally(body) .map_err(|_| HdlPktError("outbound ARP"))?; let gw_ip = self.cfg.ipv4_cfg().unwrap().gateway_ip; @@ -82,7 +92,11 @@ impl VpcNetwork { let gw_mac = self.cfg.gateway_mac; let hp = arp::gen_arp_reply(gw_mac, gw_ip, arp.sha, arp.spa); - return Ok(HdlPktAction::Hairpin(hp)); + // TODO: just emit into an mblk normally. + return Ok(HdlPktAction::Hairpin( + unsafe { MsgBlk::wrap_mblk(hp.unwrap_mblk()) } + .expect("known valid"), + )); } Ok(HdlPktAction::Deny) @@ -92,15 +106,17 @@ impl VpcNetwork { impl NetworkImpl for VpcNetwork { type Parser = VpcParser; - fn handle_pkt( + fn handle_pkt( &self, dir: Direction, - pkt: &mut Packet, + pkt: &mut Packet2>, _uft_in: &FlowTable>, _uft_out: &FlowTable>, - ) -> Result { - match (dir, pkt.meta().inner.ether.ether_type) { - (Direction::Out, EtherType::Arp) => self.handle_arp_out(pkt), + ) -> Result +// where T::Chunk: ByteSliceMut + { + match (dir, pkt.meta().inner_ether().ethertype()) { + (Direction::Out, Ethertype::ARP) => self.handle_arp_out(pkt), _ => Ok(HdlPktAction::Deny), } @@ -112,156 +128,19 @@ impl NetworkImpl for VpcNetwork { } impl NetworkParser for VpcParser { - fn parse_outbound( + fn parse_outbound( &self, - rdr: &mut PacketReaderMut, - ) -> Result { - let mut meta = PacketMeta::default(); - let mut offsets = HeaderOffsets::default(); - let (ether_hi, _hdr) = Packet::parse_ether(rdr)?; - meta.inner.ether = ether_hi.meta; - offsets.inner.ether = ether_hi.offset; - let ether_type = ether_hi.meta.ether_type; - - // Allocate a message block and copy in the squashed data. Provide - // enough extra space for geneve encapsulation to not require an extra - // allocation later on. 128 is based on - // - 18 byte ethernet header (vlan space) - // - 40 byte ipv6 header - // - 8 byte udp header - // - 8 byte geneve header - // - space for geneve options - const EXTRA_SPACE: Option = Some(128); - - let (ip_hi, pseudo_csum) = match ether_type { - EtherType::Arp => { - return Ok(PacketInfo { - meta, - offsets, - body_csum: None, - extra_hdr_space: EXTRA_SPACE, - }); - } - - EtherType::Ipv4 => { - let (ip_hi, hdr) = Packet::parse_ip4(rdr)?; - (ip_hi, hdr.pseudo_csum()) - } - - EtherType::Ipv6 => { - let (ip_hi, hdr) = Packet::parse_ip6(rdr)?; - (ip_hi, hdr.pseudo_csum()) - } - - _ => return Err(ParseError::UnexpectedEtherType(ether_type)), - }; - - meta.inner.ip = Some(ip_hi.meta); - offsets.inner.ip = Some(ip_hi.offset); - - let (ulp_hi, ulp_hdr) = match ip_hi.meta.proto() { - Protocol::ICMP => Packet::parse_icmp(rdr)?, - Protocol::ICMPv6 => Packet::parse_icmp6(rdr)?, - Protocol::TCP => Packet::parse_tcp(rdr)?, - Protocol::UDP => Packet::parse_udp(rdr)?, - proto => return Err(ParseError::UnexpectedProtocol(proto)), - }; - - let use_pseudo = ulp_hi.meta.is_pseudoheader_in_csum(); - meta.inner.ulp = Some(ulp_hi.meta); - offsets.inner.ulp = Some(ulp_hi.offset); - - let body_csum = if let Some(mut csum) = ulp_hdr.csum_minus_hdr() { - if use_pseudo { - csum -= pseudo_csum; - } - Some(csum) - } else { - None - }; - - Ok(PacketInfo { - meta, - offsets, - body_csum, - extra_hdr_space: EXTRA_SPACE, - }) + rdr: T, + ) -> Result, ParseError> { + let v = GeneveOverV6::parse_read(rdr)?; + Ok(OpteMeta::convert_ingot(v)) } - fn parse_inbound( + fn parse_inbound( &self, - rdr: &mut PacketReaderMut, - ) -> Result { - let mut meta = PacketMeta::default(); - let mut offsets = HeaderOffsets::default(); - - let (outer_ether_hi, _hdr) = Packet::parse_ether(rdr)?; - meta.outer.ether = Some(outer_ether_hi.meta); - offsets.outer.ether = Some(outer_ether_hi.offset); - let outer_et = outer_ether_hi.meta.ether_type; - - // VPC traffic is delivered exclusively on an IPv6 + - // Geneve underlay. - let outer_ip_hi = match outer_et { - EtherType::Ipv6 => Packet::parse_ip6(rdr)?.0, - - _ => return Err(ParseError::UnexpectedEtherType(outer_et)), - }; - - meta.outer.ip = Some(outer_ip_hi.meta); - offsets.outer.ip = Some(outer_ip_hi.offset); - - let (geneve_hi, _geneve_hdr) = match outer_ip_hi.meta.proto() { - Protocol::UDP => Packet::parse_geneve(rdr)?, - proto => return Err(ParseError::UnexpectedProtocol(proto)), - }; - - meta.outer.encap = Some(EncapMeta::from(geneve_hi.meta)); - offsets.outer.encap = Some(geneve_hi.offset); - - let (inner_ether_hi, _) = Packet::parse_ether(rdr)?; - meta.inner.ether = inner_ether_hi.meta; - offsets.inner.ether = inner_ether_hi.offset; - let inner_et = inner_ether_hi.meta.ether_type; - - let (inner_ip_hi, pseudo_csum) = match inner_et { - EtherType::Ipv4 => { - let (ip_hi, hdr) = Packet::parse_ip4(rdr)?; - (ip_hi, hdr.pseudo_csum()) - } - - EtherType::Ipv6 => { - let (ip_hi, hdr) = Packet::parse_ip6(rdr)?; - (ip_hi, hdr.pseudo_csum()) - } - - _ => return Err(ParseError::UnexpectedEtherType(inner_et)), - }; - - meta.inner.ip = Some(inner_ip_hi.meta); - offsets.inner.ip = Some(inner_ip_hi.offset); - - let (inner_ulp_hi, inner_ulp_hdr) = match inner_ip_hi.meta.proto() { - Protocol::ICMP => Packet::parse_icmp(rdr)?, - Protocol::ICMPv6 => Packet::parse_icmp6(rdr)?, - Protocol::TCP => Packet::parse_tcp(rdr)?, - Protocol::UDP => Packet::parse_udp(rdr)?, - proto => return Err(ParseError::UnexpectedProtocol(proto)), - }; - - let use_pseudo = inner_ulp_hi.meta.is_pseudoheader_in_csum(); - meta.inner.ulp = Some(inner_ulp_hi.meta); - offsets.inner.ulp = Some(inner_ulp_hi.offset); - - let body_csum = if let Some(mut csum) = inner_ulp_hdr.csum_minus_hdr() { - if use_pseudo { - csum -= pseudo_csum; - } - Some(csum) - } else { - None - }; - - Ok(PacketInfo { meta, offsets, body_csum, extra_hdr_space: None }) + rdr: T, + ) -> Result, ParseError> { + let v = NoEncap::parse_read(rdr)?; + Ok(OpteMeta::convert_ingot(v)) } } diff --git a/lib/oxide-vpc/src/engine/overlay.rs b/lib/oxide-vpc/src/engine/overlay.rs index a7a344cd..598f63e2 100644 --- a/lib/oxide-vpc/src/engine/overlay.rs +++ b/lib/oxide-vpc/src/engine/overlay.rs @@ -44,6 +44,7 @@ use opte::engine::headers::HeaderAction; use opte::engine::headers::IpAddr; use opte::engine::headers::IpCidr; use opte::engine::headers::IpPush; +use opte::engine::ingot_packet::PacketHeaders2; use opte::engine::ip4::Protocol; use opte::engine::ip6::Ipv6Addr; use opte::engine::ip6::Ipv6Cidr; @@ -205,9 +206,12 @@ impl StaticAction for EncapAction { // The encap action is only used for outgoing. _dir: Direction, flow_id: &InnerFlowId, - pkt_meta: &PacketMeta, + pkt_meta: &PacketHeaders2, action_meta: &mut ActionMeta, ) -> GenHtResult { + // TODO: can't access the memoised form from here.... + let f_hash = flow_id.crc32(); + // The router layer determines a RouterTarget and stores it in // the meta map. We need to map this virtual target to a // physical one. @@ -243,16 +247,7 @@ impl StaticAction for EncapAction { // Hash the packet onto a route target. This is a very // rudimentary mechanism. Should level-up to an ECMP // algorithm with well known statistical properties. - let hash = match pkt_meta.l4_hash() { - Some(h) => h, - None => { - return Err(GenHtError::Unexpected { - msg: "could not compute l4 hash for packet" - .to_string(), - }); - } - }; - let hash = hash as usize; + let hash = f_hash as usize; let target = match phys.iter().nth(hash % phys.len()) { Some(target) => target, None => return Ok(AllowOrDeny::Deny), @@ -318,27 +313,19 @@ impl StaticAction for EncapAction { } }; - let f_hash = flow_id.crc32(); - Ok(AllowOrDeny::Allow(HdrTransform { name: ENCAP_NAME.to_string(), // We leave the outer src/dst up to the driver. - outer_ether: HeaderAction::Push( - EtherMeta { - src: MacAddr::ZERO, - dst: MacAddr::ZERO, - ether_type: EtherType::Ipv6, - }, - PhantomData, - ), - outer_ip: HeaderAction::Push( - IpPush::from(Ipv6Push { - src: self.phys_ip_src, - dst: phys_target.ip, - proto: Protocol::UDP, - }), - PhantomData, - ), + outer_ether: HeaderAction::Push(EtherMeta { + src: MacAddr::ZERO, + dst: MacAddr::ZERO, + ether_type: EtherType::Ipv6, + }), + outer_ip: HeaderAction::Push(IpPush::from(Ipv6Push { + src: self.phys_ip_src, + dst: phys_target.ip, + proto: Protocol::UDP, + })), // XXX Geneve uses the UDP source port as a flow label // value for the purposes of ECMP -- a hash of the // 5-tuple. However, when using Geneve in IPv6 one could @@ -355,17 +342,14 @@ impl StaticAction for EncapAction { // It's worth keeping in mind that Chelsio's RSS picks us a ring // based on Toeplitz hash of the 5-tuple, so we need to write into // there regardless. I don't believe it *looks* at v6 flowid. - outer_encap: HeaderAction::Push( - EncapPush::from(GenevePush { - vni: phys_target.vni, - entropy: flow_id.crc32() as u16, - }), - PhantomData, - ), - inner_ether: HeaderAction::Modify( - EtherMod { dst: Some(phys_target.ether), ..Default::default() }, - PhantomData, - ), + outer_encap: HeaderAction::Push(EncapPush::from(GenevePush { + vni: phys_target.vni, + entropy: flow_id.crc32() as u16, + })), + inner_ether: HeaderAction::Modify(EtherMod { + dst: Some(phys_target.ether), + ..Default::default() + }), ..Default::default() })) } @@ -400,22 +384,20 @@ impl StaticAction for DecapAction { // The decap action is only used for inbound. _dir: Direction, _flow_id: &InnerFlowId, - pkt_meta: &PacketMeta, + pkt_meta: &PacketHeaders2, action_meta: &mut ActionMeta, ) -> GenHtResult { - match &pkt_meta.outer.encap { - Some(EncapMeta::Geneve(geneve)) => { + match pkt_meta.outer_encap_geneve_vni_and_origin() { + Some((vni, oxide_external_pkt)) => { // We only conditionally add this metadata because the // `Address::VNI` filter uses it to select VPC-originated // traffic. // External packets carry an extra Geneve tag from the // switch during NAT -- if found, `oxide_external_packet` // is filled. - if !geneve.oxide_external_pkt { - action_meta.insert( - ACTION_META_VNI.to_string(), - geneve.vni.to_string(), - ); + if !oxide_external_pkt { + action_meta + .insert(ACTION_META_VNI.to_string(), vni.to_string()); } } From c1a16584d0bec99a076c131c904a5cbdb839fc69 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Fri, 6 Sep 2024 18:16:29 +0100 Subject: [PATCH 019/115] Against the odds, XDE compiles. --- lib/opte/src/engine/ingot_packet.rs | 569 +++++++++++++++++++++++++++- lib/opte/src/engine/ioctl.rs | 4 +- lib/opte/src/engine/port.rs | 38 +- lib/opte/src/engine/rule.rs | 39 +- xde/src/xde.rs | 409 ++++++-------------- 5 files changed, 711 insertions(+), 348 deletions(-) diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index 2ca0ef68..f6090cef 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -29,7 +29,9 @@ use super::packet::FLOW_ID_DEFAULT; use super::rule::HdrTransform; use super::rule::HdrTransformError; use super::NetworkParser; +use alloc::boxed::Box; use alloc::sync::Arc; +use alloc::vec::Vec; use core::cell::Cell; use core::cell::RefCell; use core::hash::Hash; @@ -41,6 +43,8 @@ use core::ops::DerefMut; use core::ptr::NonNull; use core::slice; use core::sync::atomic::AtomicPtr; +#[cfg(all(not(feature = "std"), not(test)))] +use illumos_sys_hdrs as ddi; use illumos_sys_hdrs::mblk_t; use illumos_sys_hdrs::uintptr_t; use ingot::ethernet::Ethernet; @@ -82,8 +86,10 @@ use ingot::tcp::TcpFlags; use ingot::tcp::TcpMut; use ingot::tcp::TcpPacket; use ingot::tcp::TcpRef; +use ingot::types::Emit; use ingot::types::Header; use ingot::types::HeaderStack; +use ingot::types::Packet as IngotPacket; use ingot::types::ParseControl; use ingot::types::ParseError as IngotParseErr; use ingot::types::ParseResult; @@ -97,6 +103,7 @@ use ingot::udp::UdpRef; use ingot::udp::ValidUdp; use ingot::Parse; use opte_api::Direction; +use opte_api::Ipv6Addr; use opte_api::Vni; use zerocopy::ByteSlice; use zerocopy::ByteSliceMut; @@ -183,6 +190,14 @@ impl MsgBlk { Self { inner } } + pub fn headroom(&self) -> usize { + unsafe { + let inner = self.inner.as_ref(); + + inner.b_rptr.offset_from((*inner.b_datap).db_base) as usize + } + } + pub fn new_ethernet(len: usize) -> Self { Self::new_with_headroom(2, len) } @@ -229,6 +244,29 @@ impl MsgBlk { mut_out.b_wptr = unsafe { mut_out.b_wptr.add(n_bytes) }; } + pub unsafe fn write_front( + &mut self, + n_bytes: usize, + f: impl FnOnce(&mut [MaybeUninit]), + ) { + let mut_out = unsafe { self.inner.as_mut() }; + let avail_bytes = + unsafe { mut_out.b_rptr.offset_from((*mut_out.b_datap).db_base) }; + assert!(avail_bytes >= 0); + assert!(avail_bytes as usize >= n_bytes); + + let in_slice = unsafe { + slice::from_raw_parts_mut( + mut_out.b_wptr as *mut MaybeUninit, + n_bytes, + ) + }; + + f(in_slice); + + mut_out.b_wptr = unsafe { mut_out.b_wptr.add(n_bytes) }; + } + // TODO: I really need to rethink this one in practice. // hacked together for POC. pub fn extend_if_one(&mut self, other: Self) { @@ -380,6 +418,17 @@ pub struct OpteUnifiedLengths { pub inner_ulp: usize, } +impl OpteUnifiedLengths { + pub fn hdr_len(&self) -> usize { + self.outer_eth + + self.outer_l3 + + self.outer_encap + + self.inner_eth + + self.inner_l3 + + self.inner_ulp + } +} + // TODO: Choices (L3, etc.) don't have Debug in all the right places yet. impl core::fmt::Debug for OpteUnified { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { @@ -453,6 +502,94 @@ impl Header for OwnedPacket { } } +impl Emit for OwnedPacket { + fn emit_raw(&self, buf: V) -> usize { + match self { + OwnedPacket::Repr(o) => o.emit_raw(buf), + OwnedPacket::Raw(b) => b.emit_raw(buf), + } + } + + fn needs_emit(&self) -> bool { + match self { + OwnedPacket::Repr(o) => true, + OwnedPacket::Raw(b) => b.needs_emit(), + } + } +} + +struct SizeHoldingEncap<'a> { + encapped_len: u16, + meta: &'a EncapMeta, +} + +unsafe impl<'a> ingot::types::EmitDoesNotRelyOnBufContents + for SizeHoldingEncap<'a> +{ +} + +impl<'a> Header for SizeHoldingEncap<'a> { + const MINIMUM_LENGTH: usize = EncapMeta::MINIMUM_LENGTH; + + #[inline] + fn packet_length(&self) -> usize { + self.meta.packet_length() + } +} + +impl<'a> Emit for SizeHoldingEncap<'a> { + fn emit_raw(&self, buf: V) -> usize { + match self.meta { + EncapMeta::Geneve(g) => { + ( + Udp { + source: g.entropy, + destination: 6081, + // TODO: account for options. + length: self.encapped_len + 16, + ..Default::default() + }, + Geneve { + protocol_type: Ethertype::ETHERNET, + vni: g.vni.as_u32(), + ..Default::default() + }, + ) + .emit_raw(buf) + } + } + } + + fn needs_emit(&self) -> bool { + true + } +} + +impl Emit for EncapMeta { + #[inline] + fn emit_raw(&self, buf: V) -> usize { + SizeHoldingEncap { encapped_len: 0, meta: self }.emit_raw(buf) + } + + fn needs_emit(&self) -> bool { + true + } +} + +impl Emit for ValidEncapMeta { + fn emit_raw(&self, buf: V) -> usize { + match self { + ValidEncapMeta::Geneve(u, g) => todo!(), + } + } + + fn needs_emit(&self) -> bool { + match self { + ValidEncapMeta::Geneve(u, g) => u.needs_emit() && g.needs_emit(), + } + } +} + impl Header for EncapMeta { const MINIMUM_LENGTH: usize = Udp::MINIMUM_LENGTH + Geneve::MINIMUM_LENGTH; @@ -739,6 +876,21 @@ impl PacketHeaders { } } + // Again: really need to make Owned/Direct choices better-served by ingot. + // this interface sucks. + pub fn outer_ip6_addrs(&self) -> Option<(Ipv6Addr, Ipv6Addr)> { + match &self.headers.outer_l3 { + Some(OwnedPacket::Repr(L3Repr::Ipv6(v6))) => Some(( + v6.source.octets().into(), + v6.destination.octets().into(), + )), + Some(OwnedPacket::Raw(ValidL3::Ipv6(v6))) => { + Some((v6.source().octets().into(), v6.source().octets().into())) + } + _ => None, + } + } + pub fn inner_ether(&self) -> &EthernetPacket { &self.headers.inner_eth } @@ -826,6 +978,30 @@ impl PacketHeaders { { self.body.body_segs_mut() } + + /// Return whether the IP layer has a checksum both structurally + /// and that it is non-zero (i.e., not offloaded). + pub fn has_ip_csum(&self) -> bool { + match &self.headers.inner_l3 { + Some(L3::Ipv4(v4)) => v4.checksum() != 0, + Some(L3::Ipv6(_)) => false, + None => false, + } + } + + /// Return whether the ULP layer has a checksum both structurally + /// and that it is non-zero (i.e., not offloaded). + pub fn has_ulp_csum(&self) -> bool { + let csum = match &self.headers.inner_ulp { + Some(Ulp::Tcp(t)) => t.checksum(), + Some(Ulp::Udp(u)) => u.checksum(), + Some(Ulp::IcmpV4(i4)) => i4.checksum(), + Some(Ulp::IcmpV6(i6)) => i6.checksum(), + None => return false, + }; + + csum != 0 + } } fn actual_src_port( @@ -966,6 +1142,7 @@ impl Packet2> { l4_hash: Memoised::Uninit, body_modified: false, len, + inner_csum_dirty: false, }; let mut pkt = Packet2 { state }; @@ -987,8 +1164,195 @@ impl Packet2> { &mut self.state.meta } - pub fn emit_spec(self) -> EmitSpec { - todo!() + /// Convert a packet's metadata into a set of instructions + /// needed to serialize all its changes to the wire. + pub fn emit_spec(self) -> EmitSpec + where + T::Chunk: ByteSliceMut, + { + // Roughly how does this work: + // - Identify rightmost structural-changed field. + // - fill out owned versions into the push_spec of all + // extant fields we rewound past. + // - Rewind up to+including that point in original + // pkt space. + let state = self.state; + let init_lens = state.meta.initial_lens; + let headers = state.meta.headers; + let payload_len = state.len - init_lens.hdr_len(); + let mut encapped_len = payload_len; + + let mut push_spec = OpteEmit::default(); + let mut rewind = 0; + + // structural change if: + // hdr_len is different. + // needs_emit is true (i.e., now on an owned repr). + + // Part of the initial design idea of ingot was the desire to automatically + // do this sort of thing. We are so, so far from that... + let mut force_serialize = false; + + use ingot::types::ToOwnedPacket; + + match headers.inner_ulp { + Some(ulp) => { + let l = ulp.packet_length(); + encapped_len += l; + + if ulp.needs_emit() || l != init_lens.inner_ulp { + let inner = + push_spec.inner.get_or_insert_with(Default::default); + // TODO: impl ToOwnedPacket / From<&Ulp> for UlpRepr here? generally seems a bit anaemic. + inner.ulp = Some(match ulp { + Ulp::Tcp(IngotPacket::Repr(t)) => UlpRepr::Tcp(*t), + Ulp::Tcp(IngotPacket::Raw(t)) => { + UlpRepr::Tcp((&t).into()) + } + Ulp::Udp(IngotPacket::Repr(t)) => UlpRepr::Udp(*t), + Ulp::Udp(IngotPacket::Raw(t)) => { + UlpRepr::Udp((&t).into()) + } + Ulp::IcmpV4(IngotPacket::Repr(t)) => { + UlpRepr::IcmpV4(*t) + } + Ulp::IcmpV4(IngotPacket::Raw(t)) => { + UlpRepr::IcmpV4((&t).into()) + } + Ulp::IcmpV6(IngotPacket::Repr(t)) => { + UlpRepr::IcmpV6(*t) + } + Ulp::IcmpV6(IngotPacket::Raw(t)) => { + UlpRepr::IcmpV6((&t).into()) + } + }); + // inner.ulp = Some((&ulp).into()); + force_serialize = true; + rewind += init_lens.inner_ulp; + } + } + None if init_lens.inner_ulp != 0 => { + force_serialize = true; + rewind += init_lens.inner_ulp; + } + _ => {} + } + + match headers.inner_l3 { + Some(l3) => { + let l = l3.packet_length(); + encapped_len += l; + + if force_serialize || l3.needs_emit() || l != init_lens.inner_l3 + { + let inner = + push_spec.inner.get_or_insert_with(Default::default); + + inner.l3 = Some(match l3 { + L3::Ipv4(IngotPacket::Repr(v4)) => L3Repr::Ipv4(*v4), + L3::Ipv4(IngotPacket::Raw(v4)) => { + L3Repr::Ipv4((&v4).into()) + } + L3::Ipv6(IngotPacket::Repr(v6)) => L3Repr::Ipv6(*v6), + + // This needs a fuller ToOwnedPacket due to EHs... + // We can't actually do structural mods here today using OPTE. + L3::Ipv6(IngotPacket::Raw(v6)) => todo!(), // L3Repr::Ipv6((&v6).into()), + }); + force_serialize = true; + rewind += init_lens.inner_l3; + } + } + None if init_lens.inner_l3 != 0 => { + force_serialize = true; + rewind += init_lens.inner_l3; + } + _ => {} + } + + // inner eth + encapped_len += headers.inner_eth.packet_length(); + if force_serialize { + let inner = push_spec.inner.get_or_insert_with(Default::default); + inner.eth = match headers.inner_eth { + IngotPacket::Repr(p) => *p, + IngotPacket::Raw(p) => (&p).into(), + }; + rewind += init_lens.inner_eth; + } + + match headers.outer_encap { + Some(encap) + if force_serialize + || encap.needs_emit() + || encap.packet_length() != init_lens.outer_encap => + { + push_spec.outer_encap = Some(match encap { + OwnedPacket::Repr(o) => o, + // Needed in fullness of time, but not here. + OwnedPacket::Raw(_) => todo!(), + }); + + force_serialize = true; + rewind += init_lens.outer_encap; + } + None if init_lens.outer_encap != 0 => { + force_serialize = true; + rewind += init_lens.outer_encap; + } + _ => {} + } + + match headers.outer_l3 { + Some(l3) + if force_serialize + || l3.needs_emit() + || l3.packet_length() != init_lens.outer_l3 => + { + push_spec.outer_ip = Some(match l3 { + OwnedPacket::Repr(o) => o, + // Needed in fullness of time, but not here. + OwnedPacket::Raw(_) => todo!(), + }); + + force_serialize = true; + rewind += init_lens.outer_l3; + } + None if init_lens.outer_l3 != 0 => { + force_serialize = true; + rewind += init_lens.outer_l3; + } + _ => {} + } + + match headers.outer_eth { + Some(eth) + if force_serialize + || eth.needs_emit() + || eth.packet_length() != init_lens.outer_eth => + { + push_spec.outer_eth = Some(match eth { + OwnedPacket::Repr(o) => o, + // Needed in fullness of time, but not here. + OwnedPacket::Raw(_) => todo!(), + }); + + force_serialize = true; + rewind += init_lens.outer_eth; + } + None if init_lens.outer_eth != 0 => { + force_serialize = true; + rewind += init_lens.outer_eth; + } + _ => {} + } + + EmitSpec { + rewind: rewind as u16, + payload_len: payload_len as u16, + encapped_len: encapped_len as u16, + push_spec, + } } pub fn len(&self) -> usize { @@ -1008,7 +1372,7 @@ impl Packet2> { where T::Chunk: ByteSliceMut, { - xform.run(&mut self.state.meta)?; + self.state.inner_csum_dirty |= xform.run(&mut self.state.meta)?; // Given that n_transform layers is 1 or 2, probably won't // save too much by trying to tie to a generation number. // TODO: profile. @@ -1141,6 +1505,87 @@ impl Packet2> { pub fn set_l4_hash(&mut self, hash: u32) { self.state.l4_hash.set(hash); } + + /// Perform an incremental checksum update for the ULP checksums + /// based on the stored body checksum. + /// + /// This avoids duplicating work already done by the client in the + /// case where checksums are **not** being offloaded to the hardware. + pub fn update_checksums(&mut self) { + if !self.state.inner_csum_dirty { + return; + } + let update_ip = self.state.meta.has_ip_csum(); + let update_ulp = self.state.meta.has_ulp_csum(); + + // TODO + + // // If a ULP exists, then compute and set its checksum. + // if let (true, Some(ulp_off)) = + // (update_ulp, self.state.hdr_offsets.inner.ulp) + // { + // // Start by reusing the known checksum of the body. + // let mut csum = self.state.body_csum.unwrap(); + // // Unwrap: Can't have a ULP without an IP. + // let ip = self.meta().inner.ip.unwrap(); + // // Add pseudo header checksum. + // let pseudo_csum = ip.pseudo_csum(); + // csum += pseudo_csum; + // // All headers must reside in the first segment. + // let all_hdr_bytes = self.segs[0].slice_mut(); + // // Determine ULP slice and add its bytes to the + // // checksum. + // let ulp_start = ulp_off.seg_pos; + // let ulp_end = ulp_start + ulp_off.hdr_len; + // let ulp = &mut all_hdr_bytes[ulp_start..ulp_end]; + + // match self.state.meta.inner.ulp.as_mut().unwrap() { + // UlpMeta::Icmpv4(icmp) => { + // Self::update_icmp_csum( + // icmp, + // // ICMP4 requires the body_csum *without* + // // the pseudoheader added back in. + // self.state.body_csum.unwrap(), + // ulp, + // ); + // } + + // UlpMeta::Icmpv6(icmp) => { + // Self::update_icmp_csum(icmp, csum, ulp); + // } + + // UlpMeta::Tcp(tcp) => { + // Self::update_tcp_csum(tcp, csum, ulp); + // } + + // UlpMeta::Udp(udp) => { + // Self::update_udp_csum(udp, csum, ulp); + // } + // } + // } + + // // Compute and fill in the IPv4 header checksum. + // if let (true, Some(IpMeta::Ip4(ip))) = + // (update_ip, self.state.meta.inner.ip.as_mut()) + // { + // let ip_off = self.state.hdr_offsets.inner.ip.unwrap(); + // let all_hdr_bytes = self.segs[0].slice_mut(); + // let ip_start = ip_off.seg_pos; + // let ip_end = ip_start + ip_off.hdr_len; + // let ip_bytes = &mut all_hdr_bytes[ip_start..ip_end]; + // let csum_start = Ipv4Hdr::CSUM_BEGIN; + // let csum_end = Ipv4Hdr::CSUM_END; + // ip_bytes[csum_start..csum_end].copy_from_slice(&[0; 2]); + // let csum = + // HeaderChecksum::from(Checksum::compute(ip_bytes)).bytes(); + + // // Update the metadata. + // ip.csum = csum; + + // // Update the header bytes. + // ip_bytes[csum_start..csum_end].copy_from_slice(&csum[..]); + // } + } } /// The type state of a packet that has been initialized and allocated, but @@ -1167,6 +1612,7 @@ pub struct Parsed2 { body_csum: Memoised>, l4_hash: Memoised, body_modified: bool, + inner_csum_dirty: bool, } // Needed for now to account for not wanting to redesign ActionDescs @@ -1279,6 +1725,7 @@ pub enum Emitter { } // TODO: don't really care about pushing 'inner' reprs today. +#[derive(Default)] pub struct OpteEmit { outer_eth: Option, outer_ip: Option, @@ -1289,6 +1736,7 @@ pub struct OpteEmit { inner: Option>, } +#[derive(Default)] pub struct OpteInnerEmit { eth: Ethernet, l3: Option, @@ -1296,10 +1744,123 @@ pub struct OpteInnerEmit { } pub struct EmitSpec { - pub rewind: usize, + pub rewind: u16, + pub encapped_len: u16, + pub payload_len: u16, pub push_spec: OpteEmit, } +impl EmitSpec { + pub fn apply(&mut self, mut pkt: MsgBlk) -> MsgBlk { + // Rewind + { + let mut slots = heapless::Vec::<&mut MsgBlkNode, 6>::new(); + let mut to_rewind = self.rewind as usize; + + let mut reader = pkt.iter_mut(); + while to_rewind != 0 { + let this = reader.next(); + let Some(node) = this else { + to_rewind = 0; + break; + }; + + let has = node.len(); + let droppable = to_rewind.min(has); + node.drop_front_bytes(droppable); + to_rewind -= droppable; + + slots.push(node).unwrap(); + } + + // TODO: put available layers into said slots? + } + + // TODO: + // - remove all zero-length nodes. + // - actually push in to existing slots we rewound past if needed. + // - actually support pushing dirty segments apart from the encap. + + let needed_push = self.push_spec.outer_eth.packet_length() + + self.push_spec.outer_ip.packet_length() + + self.push_spec.outer_encap.packet_length(); + let needed_alloc = needed_push.saturating_sub(pkt.headroom()); + let mut space_in_front = needed_push - needed_alloc; + + let mut prepend = if needed_alloc > 0 { + Some(MsgBlk::new_ethernet(needed_alloc)) + } else { + None + }; + + // NOT NEEDED TODAY. + if let Some(inner_new) = &self.push_spec.inner { + todo!() + } + + if let Some(outer_encap) = &self.push_spec.outer_encap { + let a = SizeHoldingEncap { + encapped_len: self.encapped_len, + meta: &outer_encap, + }; + + let l = a.packet_length(); + + let target = if space_in_front > l { + space_in_front -= l; + &mut pkt + } else { + prepend.as_mut().unwrap() + }; + + unsafe { + target.write_front(l, |v| { + a.emit_uninit(v).unwrap(); + }) + } + } + + if let Some(outer_ip) = &self.push_spec.outer_ip { + let l = outer_ip.packet_length(); + let target = if space_in_front > 0 { + space_in_front -= l; + &mut pkt + } else { + prepend.as_mut().unwrap() + }; + + unsafe { + target.write_front(l, |v| { + outer_ip.emit_uninit(v).unwrap(); + }) + } + } + + if let Some(outer_eth) = &self.push_spec.outer_eth { + let l = outer_eth.packet_length(); + let target = if space_in_front > 0 { + space_in_front -= l; + &mut pkt + } else { + prepend.as_mut().unwrap() + }; + + unsafe { + target.write_front(l, |v| { + outer_eth.emit_uninit(v).unwrap(); + }) + } + } + + if let Some(mut prepend) = prepend { + prepend.extend_if_one(pkt); + prepend + } else { + pkt + } + } +} + #[derive(Copy, Clone, Debug, Hash, Eq, PartialEq, Ord, PartialOrd, Default)] pub enum Memoised { #[default] diff --git a/lib/opte/src/engine/ioctl.rs b/lib/opte/src/engine/ioctl.rs index 56f56c66..0a731fbc 100644 --- a/lib/opte/src/engine/ioctl.rs +++ b/lib/opte/src/engine/ioctl.rs @@ -170,14 +170,14 @@ pub struct RuleDump { pub action: String, } -pub fn dump_layer( +pub fn dump_layer( port: &Port, req: &DumpLayerReq, ) -> Result { port.dump_layer(&req.name) } -pub fn dump_tcp_flows( +pub fn dump_tcp_flows( port: &Port, _req: &DumpTcpFlowsReq, ) -> Result { diff --git a/lib/opte/src/engine/port.rs b/lib/opte/src/engine/port.rs index 73044c20..977be5ef 100644 --- a/lib/opte/src/engine/port.rs +++ b/lib/opte/src/engine/port.rs @@ -78,11 +78,12 @@ use core::sync::atomic::AtomicU64; use core::sync::atomic::Ordering::SeqCst; #[cfg(all(not(feature = "std"), not(test)))] use illumos_sys_hdrs::uintptr_t; +use ingot::types::Read; use kstat_macro::KStatProvider; use opte_api::Direction; use opte_api::MacAddr; use opte_api::OpteError; -use std::process; +use zerocopy::ByteSliceMut; pub type Result = result::Result; @@ -1252,7 +1253,10 @@ impl Port { }), ) => { // TCP, then transform? - todo!() + // TODO: tcp + + // todo!(); //TCP + transform.apply(pkt, dir)?; } ( Direction::In, @@ -1262,7 +1266,13 @@ impl Port { }), ) => { // Transform, then TCP? - todo!() + + transform.apply(pkt, dir)?; + // todo!(); //TCP + } + // Nothing left to do other than csums; we took the slowpath. + (_, Ok(InternalProcessResult::Modified { .. })) => { + pkt.update_checksums() } _ => {} } @@ -1791,6 +1801,28 @@ impl Transforms { fn new() -> Self { Self { hdr: Vec::with_capacity(8), body: Vec::with_capacity(2) } } + + fn apply( + &self, + pkt: &mut Packet2>, + dir: Direction, + ) -> result::Result<(), ProcessError> + where + T::Chunk: ByteSliceMut, + { + // TODO: prebake these into one transform? + for ht in &self.hdr { + pkt.hdr_transform(ht)?; + } + + for bt in &self.body { + pkt.body_transform(dir, &**bt)?; + } + + pkt.update_checksums(); + + Ok(()) + } } impl fmt::Debug for Transforms { diff --git a/lib/opte/src/engine/rule.rs b/lib/opte/src/engine/rule.rs index 37e33a32..fef66e9e 100644 --- a/lib/opte/src/engine/rule.rs +++ b/lib/opte/src/engine/rule.rs @@ -388,40 +388,17 @@ impl HdrTransform { where T::Chunk: ByteSliceMut, { - // NOTE: we want to track cksum dirtying here, somehow. - - // meta.headers.outer_eth - // .act_on(&self.outer_ether) - // .map_err(Self::err_fn("outer ether"))?; self.outer_ether .act_on_option(&mut meta.headers.outer_eth) .map_err(Self::err_fn("outer ether"))?; - // self.outer_ether - // .run(&mut meta.outer.ether) - // .map_err(Self::err_fn("outer ether"))?; - // self.outer_ip - // .run(&mut meta.outer.ip) - // .map_err(Self::err_fn("outer IP"))?; - // meta.headers.outer_l3 - // .act_on(&self.outer_ip) - // .map_err(Self::err_fn("outer IP"))?; + self.outer_ip .act_on_option(&mut meta.headers.outer_l3) .map_err(Self::err_fn("outer IP"))?; - // self.outer_encap - // .run(&mut meta.outer.encap) - // .map_err(Self::err_fn("outer encap"))?; - // meta.headers.outer_encap - // .act_on(&self.outer_encap) - // .map_err(Self::err_fn("outer encap"))?; + self.outer_encap .act_on_option(&mut meta.headers.outer_encap) .map_err(Self::err_fn("outer encap"))?; - // XXX A hack so that inner ethernet can meet the interface of - // `HeaderAction::run().` - // let mut tmp = Some(meta.inner.ether); - // self.inner_ether.run(&mut tmp).map_err(Self::err_fn("inner ether"))?; - // meta.inner.ether = tmp.unwrap(); // If I set this up right, we can handle the above w/o panic on a // dumb EtherDrop action... @@ -430,12 +407,6 @@ impl HdrTransform { .act_on(&self.inner_ether) .map_err(Self::err_fn("inner eth"))?; - // self.inner_ip - // .run(&mut meta.inner.ip) - // .map_err(Self::err_fn("inner IP"))?; - // let l3_dirty = meta.headers.inner_l3 - // .act_on(&self.inner_ip) - // .map_err(Self::err_fn("inner IP"))?; let l3_dirty = self .inner_ip .act_on_option(&mut meta.headers.inner_l3) @@ -446,12 +417,6 @@ impl HdrTransform { .run(&mut meta.headers.inner_ulp) .map_err(Self::err_fn("inner ULP"))?; - // let ulp_dirty = meta.headers.inner_ulp - // .run(&self.inner_ulp) - // .map_err(Self::err_fn("inner ULP"))?; - - // let ulp_dirty = todo!(); - Ok(l3_dirty || ulp_dirty) } diff --git a/xde/src/xde.rs b/xde/src/xde.rs index 0b36646d..cb40189e 100644 --- a/xde/src/xde.rs +++ b/xde/src/xde.rs @@ -61,6 +61,7 @@ use ingot::udp::ValidUdp; use opte::api::ClearXdeUnderlayReq; use opte::api::CmdOk; use opte::api::Direction; +use opte::api::MacAddr; use opte::api::NoResp; use opte::api::OpteCmd; use opte::api::OpteCmdIoctl; @@ -71,6 +72,7 @@ use opte::d_error::LabelBlock; use opte::ddi::sync::KMutex; use opte::ddi::sync::KMutexType; use opte::ddi::sync::KRwLock; +use opte::ddi::sync::KRwLockReadGuard; use opte::ddi::sync::KRwLockType; use opte::ddi::time::Interval; use opte::ddi::time::Periodic; @@ -80,7 +82,9 @@ use opte::engine::headers::EncapPush; use opte::engine::headers::IpAddr; use opte::engine::headers::IpPush; use opte::engine::ingot_packet::MsgBlk; +use opte::engine::ingot_packet::Packet2; use opte::engine::ingot_packet::Parsed2; +use opte::engine::ingot_packet::ParsedMblk; use opte::engine::ioctl::{self as api}; use opte::engine::ip6::Ipv6Addr; use opte::engine::packet::Initialized; @@ -1404,7 +1408,7 @@ unsafe extern "C" fn xde_mc_unicst( 0 } -fn guest_loopback_probe(pkt: &Packet, src: &XdeDev, dst: &XdeDev) { +fn guest_loopback_probe(pkt: &Packet2, src: &XdeDev, dst: &XdeDev) { unsafe { __dtrace_probe_guest__loopback( pkt.mblk_addr(), @@ -1416,14 +1420,16 @@ fn guest_loopback_probe(pkt: &Packet, src: &XdeDev, dst: &XdeDev) { } #[no_mangle] -fn guest_loopback( +fn guest_loopback<'a>( src_dev: &XdeDev, - mut pkt: Packet, + devs: &'a KRwLockReadGuard>>, + pkt: &mut Packet2, vni: Vni, -) -> *mut mblk_t { +) -> Option<&'a Box> { use Direction::*; - let ether_dst = pkt.meta().inner.ether.dst; - let devs = unsafe { xde_devs.read() }; + let ether_dst = + MacAddr::from(pkt.meta().inner_ether().destination().into_array()); + // let devs = unsafe { xde_devs.read() }; let maybe_dest_dev = devs.iter().find(|x| x.vni == vni && x.port.mac_addr() == ether_dst); @@ -1434,19 +1440,21 @@ fn guest_loopback( // We have found a matching Port on this host; "loop back" // the packet into the inbound processing path of the // destination Port. - match dest_dev.port.process(In, &mut pkt, ActionMeta::new()) { + match dest_dev.port.process(In, pkt, ActionMeta::new()) { Ok(ProcessResult::Modified) => { - unsafe { - mac::mac_rx( - dest_dev.mh, - ptr::null_mut(), - pkt.unwrap_mblk(), - ) - }; + // unsafe { + // mac::mac_rx( + // dest_dev.mh, + // ptr::null_mut(), + // pkt.unwrap_mblk(), + // ) + // }; + Some(dest_dev) } Ok(ProcessResult::Drop { reason }) => { opte::engine::dbg!("loopback rx drop: {:?}", reason); + None } Ok(ProcessResult::Hairpin(_hppkt)) => { @@ -1454,17 +1462,19 @@ fn guest_loopback( // inbound packet to generate a hairpin response // from the destination port. opte::engine::dbg!("unexpected loopback rx hairpin"); + None } Ok(ProcessResult::Bypass) => { opte::engine::dbg!("loopback rx bypass"); - unsafe { - mac::mac_rx( - dest_dev.mh, - ptr::null_mut(), - pkt.unwrap_mblk(), - ) - }; + // unsafe { + // mac::mac_rx( + // dest_dev.mh, + // ptr::null_mut(), + // pkt.unwrap_mblk(), + // ) + // }; + Some(dest_dev) } Err(e) => { @@ -1474,6 +1484,7 @@ fn guest_loopback( dest_dev.port.name(), e ); + None } } } @@ -1485,10 +1496,9 @@ fn guest_loopback( vni.as_u32(), ether_dst ); + None } } - - ptr::null_mut() } #[no_mangle] @@ -1540,167 +1550,11 @@ unsafe extern "C" fn xde_mc_tx( unsafe fn xde_mc_tx_one(src_dev: &XdeDev, mut pkt: MsgBlk) -> *mut mblk_t { let mblk_addr = pkt.mblk_addr(); let pkt_len_old = pkt.byte_len(); - match Parsed2::parse(pkt.iter_mut(), Direction::Out) { - Ok(mut p) => { - // let mch = &src_dev.u1.mch; - let stream = &src_dev.u1.stream; - let hint = 0; - let port = &src_dev.port; - let flow_id = p.flow; - - // TODO: emit hdr, reuse cksum, actually send... - let mut ip6_src = Default::default(); - let mut ip6_dst = Default::default(); - let f_hash; - if let Ok(decision) = port.thin_process(Direction::Out, &mut p) { - match decision { - opte::engine::port::ThinProcRes::PushEncap( - eth, - ip, - udp, - ) => { - f_hash = p.l4_hash; - - // TODO: generate methods to fill a maybeuninit. - // total bytes: ETH 14, V6 40, UDP 8, GENEVE 8 - let new_hdrs = 14 + 40 + 8 + 8; - let mut new_blk = - MsgBlk::new_with_headroom(2, new_hdrs); - - use opte::ingot::types::EmitUninit as _; - - let w_encap_bytes = (pkt_len_old + 16) as u16; - - new_blk.write(14, |uninit| { - let complete_eth = - opte::ingot::ethernet::Ethernet { - destination: eth.dst.bytes().into(), - source: eth.src.bytes().into(), - ethertype: ingot::ethernet::Ethertype( - eth.ether_type.into(), - ), - }; - - complete_eth - .emit_uninit(uninit) - .expect("must be enough room..."); - }); - - // we know we'er only pushing v6. - let IpPush::Ip6(v6) = ip else { panic!() }; - ip6_src = v6.src; - ip6_dst = v6.dst; - - new_blk.write(40, |uninit| { - let complete_v6 = opte::ingot::ip::Ipv6 { - version: 6, - dscp: 0, - ecn: ingot::ip::Ecn::NotCapable, - flow_label: 12345678, - payload_len: w_encap_bytes, - next_header: ingot::ip::IpProtocol( - v6.proto.into(), - ), - hop_limit: 128, - source: v6.src.bytes().into(), - destination: v6.dst.bytes().into(), - v6ext: vec![].into(), - }; - - complete_v6 - .emit_uninit(uninit) - .expect("must be enough room..."); - }); - - let EncapPush::Geneve(gen) = udp else { panic!() }; - new_blk.write(16, |uninit| { - let complete_udp = opte::ingot::udp::Udp { - source: gen.entropy, - destination: 6081, - length: w_encap_bytes, - checksum: 0, - }; - let complete_geneve = opte::ingot::geneve::Geneve { - version: 0, - opt_len: 0, - flags: opte::ingot::geneve::GeneveFlags::empty( - ), - protocol_type: - opte::ingot::ethernet::Ethertype::ETHERNET, - vni: gen.vni.into(), - reserved: 0, - options: Vec::new(), - }; - - let len = complete_udp - .emit_uninit(uninit) - .expect("must be enough room..."); - complete_geneve - .emit_uninit(&mut uninit[len..]) - .expect("must be enough room..."); - }); - - core::mem::swap(&mut new_blk, &mut pkt); - pkt.extend_if_one(new_blk); - } - // we're in Tx for a ULP'd pkt -- this should NEVER happen. - opte::engine::port::ThinProcRes::PopEncap => unreachable!(), - opte::engine::port::ThinProcRes::Na => unreachable!(), - } - - if ip6_dst == ip6_src { - // todo. broken just now ig - // return guest_loopback(src_dev, pkt, vni); - opte::engine::err!("eh?"); - return ptr::null_mut(); - } - - let my_key = RouteKey { dst: ip6_dst, l4_hash: f_hash }; - let Route { src, dst, underlay_dev } = - src_dev.routes.next_hop(my_key, src_dev); - - // Get a pointer to the beginning of the outer frame and - // fill in the dst/src addresses before sending out the - // device. - let mblk = pkt.unwrap_mblk(); - let rptr = (*mblk).b_rptr; - ptr::copy(dst.as_ptr(), rptr, 6); - ptr::copy(src.as_ptr(), rptr.add(6), 6); - // Unwrap: We know the packet is good because we just - // unwrapped it above. - let new_pkt = MsgBlk::wrap_mblk(mblk).unwrap(); - - underlay_dev.stream.tx_drop_on_no_desc2( - new_pkt, - hint, - MacTxFlags::empty(), - ); - - return ptr::null_mut(); - } - } - Err(e) => { - let mut bytes = vec![]; - pkt.iter_mut().for_each(|v| bytes.extend_from_slice(v)); - opte::engine::err!("NEW Rx bad packet: {:?} -> {:?}", e, bytes); - bad_packet_parse_probe( - Some(src_dev.port.name_cstr()), - Direction::Out, - mblk_addr, - &PacketError::Parse( - opte::engine::packet::ParseError::UnexpectedProtocol( - 99.into(), - ), - ), - ); - // return ptr::null_mut(); - } - }; - let pkt = pkt.as_pkt(); let parser = src_dev.port.network().parser(); let mblk_addr = pkt.mblk_addr(); - let mut pkt = match pkt.parse(Direction::Out, parser) { + let parsed_pkt = Packet2::new(pkt.iter_mut()); + let mut parsed_pkt = match parsed_pkt.parse(Direction::Out, parser) { Ok(pkt) => pkt, Err(e) => { // TODO Add bad packet stat. @@ -1733,7 +1587,8 @@ unsafe fn xde_mc_tx_one(src_dev: &XdeDev, mut pkt: MsgBlk) -> *mut mblk_t { // refresh my memory on all of this. // // TODO Is there way to set mac_tx to must use result? - stream.tx_drop_on_no_desc(pkt, hint, MacTxFlags::empty()); + drop(parsed_pkt); + stream.tx_drop_on_no_desc2(pkt, hint, MacTxFlags::empty()); return ptr::null_mut(); } @@ -1742,34 +1597,27 @@ unsafe fn xde_mc_tx_one(src_dev: &XdeDev, mut pkt: MsgBlk) -> *mut mblk_t { // The port processing code will fire a probe that describes what // action was taken -- there should be no need to add probes or // prints here. - let res = port.process(Direction::Out, &mut pkt, ActionMeta::new()); + let res = port.process(Direction::Out, &mut parsed_pkt, ActionMeta::new()); + match res { Ok(ProcessResult::Modified) => { - let meta = pkt.meta(); + let meta = parsed_pkt.meta(); // If the outer IPv6 destination is the same as the // source, then we need to loop the packet inbound to the // guest on this same host. - let ip = match meta.outer.ip { + let (ip6_src, ip6_dst) = match meta.outer_ip6_addrs() { Some(v) => v, None => { // XXX add SDT probe // XXX add stat - opte::engine::dbg!("no outer ip header, dropping"); - return ptr::null_mut(); - } - }; - - let ip6 = match ip.ip6() { - Some(v) => v, - None => { - opte::engine::dbg!("outer IP header is not v6, dropping"); + opte::engine::dbg!("no outer IPv6 header, dropping"); return ptr::null_mut(); } }; - let vni = match meta.outer.encap { - Some(EncapMeta::Geneve(geneve)) => geneve.vni, + let vni = match meta.outer_encap_geneve_vni_and_origin() { + Some((vni, _)) => vni, None => { // XXX add SDT probe // XXX add stat @@ -1778,9 +1626,37 @@ unsafe fn xde_mc_tx_one(src_dev: &XdeDev, mut pkt: MsgBlk) -> *mut mblk_t { } }; - if ip6.dst == ip6.src { - return guest_loopback(src_dev, pkt, vni); + // what we WANT to do is pass in the parsed pkt, handle the + // emitspec in the same place, then send elsewhere. + let devs = unsafe { xde_devs.read() }; + let local_port = if ip6_dst == ip6_src { + let Some(valid_local) = + guest_loopback(src_dev, &devs, &mut parsed_pkt, vni) + else { + return ptr::null_mut(); + }; + + Some(valid_local) + } else { + None + }; + + let l4_hash = parsed_pkt.l4_hash(); + let mut emit_spec = parsed_pkt.emit_spec(); + + let out_pkt = emit_spec.apply(pkt); + + if let Some(local_port) = local_port { + unsafe { + mac::mac_rx( + local_port.mh, + ptr::null_mut(), + out_pkt.unwrap_mblk(), + ) + }; + return ptr::null_mut(); } + drop(devs); // Currently the overlay layer leaves the outer frame // destination and source zero'd. Ask IRE for the route @@ -1793,21 +1669,22 @@ unsafe fn xde_mc_tx_one(src_dev: &XdeDev, mut pkt: MsgBlk) -> *mut mblk_t { // results for a given dst + entropy. These have a fairly tight // expiry so that we can actually react to new reachability/load // info from DDM. - let my_key = RouteKey { dst: ip6.dst, l4_hash: meta.l4_hash() }; + let my_key = RouteKey { dst: ip6_dst, l4_hash: Some(l4_hash) }; let Route { src, dst, underlay_dev } = src_dev.routes.next_hop(my_key, src_dev); // Get a pointer to the beginning of the outer frame and // fill in the dst/src addresses before sending out the // device. - let mblk = pkt.unwrap_mblk(); + let mblk = out_pkt.unwrap_mblk(); let rptr = (*mblk).b_rptr; ptr::copy(dst.as_ptr(), rptr, 6); ptr::copy(src.as_ptr(), rptr.add(6), 6); // Unwrap: We know the packet is good because we just // unwrapped it above. - let new_pkt = Packet::::wrap_mblk(mblk).unwrap(); - underlay_dev.stream.tx_drop_on_no_desc( + let new_pkt = MsgBlk::wrap_mblk(mblk).unwrap(); + + underlay_dev.stream.tx_drop_on_no_desc2( new_pkt, hint, MacTxFlags::empty(), @@ -1815,18 +1692,23 @@ unsafe fn xde_mc_tx_one(src_dev: &XdeDev, mut pkt: MsgBlk) -> *mut mblk_t { } Ok(ProcessResult::Drop { .. }) => { + drop(parsed_pkt); return ptr::null_mut(); } Ok(ProcessResult::Hairpin(hpkt)) => { + drop(parsed_pkt); mac::mac_rx(src_dev.mh, ptr::null_mut(), hpkt.unwrap_mblk()); } Ok(ProcessResult::Bypass) => { - stream.tx_drop_on_no_desc(pkt, hint, MacTxFlags::empty()); + drop(parsed_pkt); + stream.tx_drop_on_no_desc2(pkt, hint, MacTxFlags::empty()); } - Err(_) => {} + Err(_) => { + drop(parsed_pkt); + } } // On return the Packet is dropped and its underlying mblk @@ -2001,99 +1883,13 @@ unsafe fn xde_rx_one( ) { let mblk_addr = pkt.mblk_addr(); let pkt_len_old = pkt.byte_len(); - match Parsed2::parse(pkt.iter_mut(), Direction::In) { - Ok(mut p) => { - // opte::engine::err!("Successful parse."); - let devs = xde_devs.read(); - let h = p.meta.0.headers(); - let (vni, ether_dst) = match (&h.outer_encap, Some(&h.inner_eth)) { - (Some(ref geneve), Some(ref eth)) => { - (Vni::new(geneve.vni()).unwrap(), eth.destination()) - } - _ => { - opte::engine::err!("Wut"); - return; - } - }; - let Some(dev) = devs.iter().find(|x| { - x.vni == vni - && x.port.mac_addr().bytes() == ether_dst.as_bytes() - }) else { - // TODO add SDT probe - // TODO add stat - opte::engine::err!( - "[encap] no device found for vni: {} mac: {}", - vni, - ether_dst - ); - return; - }; - - let e_len = h.outer_eth.as_ref().map(|v| v.packet_length()); - let v_len = h.outer_v6.as_ref().map(|v| v.packet_length()); - let u_len = h.outer_udp.as_ref().map(|v| v.packet_length()); - let g_len = h.outer_encap.as_ref().map(|v| v.packet_length()); - - let pop_len: usize = [e_len, v_len, u_len, g_len] - .iter() - .map(|v| v.unwrap_or_default()) - .sum(); - - // opte::engine::err!("Want to pop: {}", pop_len); - - let port = &dev.port; - if let Ok(decision) = port.thin_process(Direction::In, &mut p) { - match decision { - opte::engine::port::ThinProcRes::PopEncap => { - let mut to_pop = pop_len; - for layer in pkt.iter_mut() { - let max_drop = layer.len(); - let will_drop = max_drop.min(to_pop); - layer.drop_front_bytes(will_drop); - to_pop -= will_drop; - - if to_pop == 0 { - break; - } - } - - // could theoretically have empty segments here. - // not an issue over NIC for now. - mac::mac_rx(dev.mh, mrh, pkt.unwrap_mblk()); - } - // we know this to be true given how we cfg opte - opte::engine::port::ThinProcRes::PushEncap(_, _, _) => { - unreachable!() - } - opte::engine::port::ThinProcRes::Na => unreachable!(), - } - return; - } - } - Err(e) => { - let mut bytes = vec![]; - pkt.iter().for_each(|v| bytes.extend_from_slice(v)); - // opte::engine::err!("NEW Rx bad packet: {:?} -> {:?}", e, bytes); - bad_packet_parse_probe( - None, - Direction::In, - mblk_addr, - &PacketError::Parse( - opte::engine::packet::ParseError::UnexpectedProtocol( - 99.into(), - ), - ), - ); - } - } - // opte::engine::err!("bk to basics."); - let pkt = pkt.as_pkt(); + let parsed_pkt = Packet2::new(pkt.iter_mut()); // We must first parse the packet in order to determine where it // is to be delivered. let parser = VpcParser {}; - let mblk_addr = pkt.mblk_addr(); - let mut pkt = match pkt.parse(Direction::In, parser) { + // let mblk_addr = parsed_pkt.mblk_addr(); + let mut parsed_pkt = match parsed_pkt.parse(Direction::In, parser) { Ok(pkt) => pkt, Err(e) => { // TODO Add bad packet stat. @@ -2110,24 +1906,28 @@ unsafe fn xde_rx_one( } }; - let meta = pkt.meta(); + let meta = parsed_pkt.meta(); let devs = xde_devs.read(); // Determine where to send packet based on Geneve VNI and // destination MAC address. - let geneve = match meta.outer.encap { - Some(EncapMeta::Geneve(geneve)) => geneve, + // Todo: this, but better. + let vni = match meta.outer_encap_geneve_vni_and_origin() { + Some((vni, _)) => vni, None => { // TODO add stat let msg = c"no geneve header, dropping"; - bad_packet_probe(None, Direction::In, pkt.mblk_addr(), msg); + bad_packet_probe(None, Direction::In, mblk_addr, msg); opte::engine::dbg!("no geneve header, dropping"); return; } }; - let vni = geneve.vni; - let ether_dst = meta.inner.ether.dst; + let ether_dst = meta.inner_ether().destination(); + let ether_dst = ether_dst.into_array().into(); + + // let vni = geneve.vni; + // let ether_dst = meta.inner.ether.dst; let Some(dev) = devs.iter().find(|x| x.vni == vni && x.port.mac_addr() == ether_dst) else { @@ -2143,18 +1943,23 @@ unsafe fn xde_rx_one( // We are in passthrough mode, skip OPTE processing. if dev.passthrough { + drop(parsed_pkt); mac::mac_rx(dev.mh, mrh, pkt.unwrap_mblk()); return; } let port = &dev.port; - let res = port.process(Direction::In, &mut pkt, ActionMeta::new()); + let res = port.process(Direction::In, &mut parsed_pkt, ActionMeta::new()); + let mut emit_spec = parsed_pkt.emit_spec(); + match res { Ok(ProcessResult::Modified | ProcessResult::Bypass) => { - mac::mac_rx(dev.mh, mrh, pkt.unwrap_mblk()); + let npkt = emit_spec.apply(pkt); + + mac::mac_rx(dev.mh, mrh, npkt.unwrap_mblk()); } Ok(ProcessResult::Hairpin(hppkt)) => { - stream.tx_drop_on_no_desc(hppkt, 0, MacTxFlags::empty()); + stream.tx_drop_on_no_desc2(hppkt, 0, MacTxFlags::empty()); } _ => {} } From f73b53290a97ede18eac98dc8b6c44cae023d573 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Sat, 7 Sep 2024 00:22:04 +0100 Subject: [PATCH 020/115] Unbreak DHCPv4 responses. --- lib/opte/src/engine/predicate.rs | 39 ++++++++++++++++++++++++-------- lib/oxide-vpc/src/engine/mod.rs | 14 ++++++++---- 2 files changed, 40 insertions(+), 13 deletions(-) diff --git a/lib/opte/src/engine/predicate.rs b/lib/opte/src/engine/predicate.rs index 86fcb90a..88bb8a98 100644 --- a/lib/opte/src/engine/predicate.rs +++ b/lib/opte/src/engine/predicate.rs @@ -609,16 +609,37 @@ impl DataPredicate { Self::Not(pred) => !pred.is_match(meta), Self::DhcpMsgType(mt) => { - // Formerly, this did a COMPLETE clone and parse. - let body = meta.body_segs(); - if body.len() == 0 || body[0].len() == 0 { - super::err!( - "Failed to read DHCPv6 message type from packet" - ); - false + // Not sure that I like that this is a complete clone/parse... + let body; + + let bytes = if meta.body_segs().len() > 1 { + body = meta.copy_remaining(); + &body } else { - mt.is_match(&body[0][0].into()) - } + meta.body_segs()[0] + }; + + let pkt = match DhcpPacket::new_checked(&bytes) { + Ok(v) => v, + Err(e) => { + super::err!( + "DhcpPacket::new_checked() failed: {:?}", + e + ); + return false; + } + }; + + let dhcp = match DhcpRepr::parse(&pkt) { + Ok(v) => v, + Err(e) => { + super::err!("DhcpRepr::parse() failed: {:?}", e); + + return false; + } + }; + + mt.is_match(&DhcpMessageType::from(dhcp.message_type)) } Self::IcmpMsgType(mt) => { diff --git a/lib/oxide-vpc/src/engine/mod.rs b/lib/oxide-vpc/src/engine/mod.rs index fa0ea1a1..74df2f97 100644 --- a/lib/oxide-vpc/src/engine/mod.rs +++ b/lib/oxide-vpc/src/engine/mod.rs @@ -132,15 +132,21 @@ impl NetworkParser for VpcParser { &self, rdr: T, ) -> Result, ParseError> { - let v = GeneveOverV6::parse_read(rdr)?; - Ok(OpteMeta::convert_ingot(v)) + let v = NoEncap::parse_read(rdr); + if let Err(e) = v { + opte::engine::err!("PARSERR OUT [NoEncap] {:?}", e); + } + Ok(OpteMeta::convert_ingot(v?)) } fn parse_inbound( &self, rdr: T, ) -> Result, ParseError> { - let v = NoEncap::parse_read(rdr)?; - Ok(OpteMeta::convert_ingot(v)) + let v = GeneveOverV6::parse_read(rdr); + if let Err(e) = v { + opte::engine::err!("PARSERR IN [GeneveOverV6] {:?}", e); + } + Ok(OpteMeta::convert_ingot(v?)) } } From 640b962a4bd9d645e9149447f2763e0903ae5d87 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Mon, 9 Sep 2024 14:02:46 +0100 Subject: [PATCH 021/115] Works, but I am dropping perf on the floor somewhere now. Packet Rx is apparently 180% more costly now on `glasgow`. --- lib/opte/src/engine/arp.rs | 9 +++++++ lib/opte/src/engine/ingot_packet.rs | 40 +++++++++++++++++++++-------- lib/oxide-vpc/src/engine/mod.rs | 13 +++------- 3 files changed, 42 insertions(+), 20 deletions(-) diff --git a/lib/opte/src/engine/arp.rs b/lib/opte/src/engine/arp.rs index c06de010..d97d03ce 100644 --- a/lib/opte/src/engine/arp.rs +++ b/lib/opte/src/engine/arp.rs @@ -266,4 +266,13 @@ impl<'a> RawHeader<'a> for ArpEthIpv4Raw { }; Ok(hdr) } + + fn new(src: &[u8]) -> Result, ReadErr> { + debug_assert_eq!(src.len(), Self::SIZE); + let hdr = match Ref::from_bytes(src).ok() { + Some(hdr) => hdr, + None => return Err(ReadErr::BadLayout), + }; + Ok(hdr) + } } diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index f6090cef..0b40337a 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -252,19 +252,19 @@ impl MsgBlk { let mut_out = unsafe { self.inner.as_mut() }; let avail_bytes = unsafe { mut_out.b_rptr.offset_from((*mut_out.b_datap).db_base) }; + assert!(avail_bytes >= 0); assert!(avail_bytes as usize >= n_bytes); + let new_head = unsafe { mut_out.b_rptr.sub(n_bytes) }; + let in_slice = unsafe { - slice::from_raw_parts_mut( - mut_out.b_wptr as *mut MaybeUninit, - n_bytes, - ) + slice::from_raw_parts_mut(new_head as *mut MaybeUninit, n_bytes) }; f(in_slice); - mut_out.b_wptr = unsafe { mut_out.b_wptr.add(n_bytes) }; + mut_out.b_rptr = new_head; } // TODO: I really need to rethink this one in practice. @@ -278,6 +278,16 @@ impl MsgBlk { mut_self.b_cont = other.unwrap_mblk(); } + /// Drop all bytes and move the cursor to the very back of the dblk. + pub fn pop_all(&mut self) { + unsafe { + (*self.inner.as_ptr()).b_rptr = + (*(*self.inner.as_ptr()).b_datap).db_lim; + (*self.inner.as_ptr()).b_wptr = + (*(*self.inner.as_ptr()).b_datap).db_lim; + } + } + pub fn iter(&self) -> MsgBlkIter { MsgBlkIter { curr: Some(self.inner), marker: PhantomData } } @@ -597,7 +607,7 @@ impl Header for EncapMeta { fn packet_length(&self) -> usize { match self { EncapMeta::Geneve(g) => { - Geneve::MINIMUM_LENGTH + Self::MINIMUM_LENGTH + g.oxide_external_pkt.then_some(4).unwrap_or_default() } } @@ -694,8 +704,11 @@ impl PktBodyWalker { let as_bytes = chunk.deref(); to_hold.push(unsafe { core::mem::transmute(as_bytes) }); } + + // TODO(drop-safety): we need to give these chunks a longer life, too. while let Ok(chunk) = rest.next_chunk() { - to_hold.push(unsafe { core::mem::transmute(chunk.deref()) }); + let as_bytes = chunk.deref(); + to_hold.push(unsafe { core::mem::transmute(as_bytes) }); } let to_store = Box::into_raw(Box::new(to_hold.into_boxed_slice())); @@ -1788,7 +1801,9 @@ impl EmitSpec { let mut space_in_front = needed_push - needed_alloc; let mut prepend = if needed_alloc > 0 { - Some(MsgBlk::new_ethernet(needed_alloc)) + let mut new_mblk = MsgBlk::new_ethernet(needed_alloc); + new_mblk.pop_all(); + Some(new_mblk) } else { None }; @@ -1806,10 +1821,11 @@ impl EmitSpec { let l = a.packet_length(); - let target = if space_in_front > l { + let target = if space_in_front >= l { space_in_front -= l; &mut pkt } else { + space_in_front = 0; prepend.as_mut().unwrap() }; @@ -1822,10 +1838,11 @@ impl EmitSpec { if let Some(outer_ip) = &self.push_spec.outer_ip { let l = outer_ip.packet_length(); - let target = if space_in_front > 0 { + let target = if space_in_front >= l { space_in_front -= l; &mut pkt } else { + space_in_front = 0; prepend.as_mut().unwrap() }; @@ -1838,10 +1855,11 @@ impl EmitSpec { if let Some(outer_eth) = &self.push_spec.outer_eth { let l = outer_eth.packet_length(); - let target = if space_in_front > 0 { + let target = if space_in_front >= l { space_in_front -= l; &mut pkt } else { + space_in_front = 0; prepend.as_mut().unwrap() }; diff --git a/lib/oxide-vpc/src/engine/mod.rs b/lib/oxide-vpc/src/engine/mod.rs index 74df2f97..3a6771f9 100644 --- a/lib/oxide-vpc/src/engine/mod.rs +++ b/lib/oxide-vpc/src/engine/mod.rs @@ -82,10 +82,11 @@ impl VpcNetwork { &self, pkt: &mut Packet2>, ) -> Result { - let body = - pkt.body_segs().ok_or_else(|| HdlPktError("outbound ARP"))?; + let body = pkt + .body_segs() + .ok_or_else(|| HdlPktError("outbound ARP (no body)"))?; let arp = ArpEthIpv4::parse_normally(body) - .map_err(|_| HdlPktError("outbound ARP"))?; + .map_err(|_| HdlPktError("outbound ARP (parse)"))?; let gw_ip = self.cfg.ipv4_cfg().unwrap().gateway_ip; if is_arp_req_for_tpa(gw_ip, &arp) { @@ -133,9 +134,6 @@ impl NetworkParser for VpcParser { rdr: T, ) -> Result, ParseError> { let v = NoEncap::parse_read(rdr); - if let Err(e) = v { - opte::engine::err!("PARSERR OUT [NoEncap] {:?}", e); - } Ok(OpteMeta::convert_ingot(v?)) } @@ -144,9 +142,6 @@ impl NetworkParser for VpcParser { rdr: T, ) -> Result, ParseError> { let v = GeneveOverV6::parse_read(rdr); - if let Err(e) = v { - opte::engine::err!("PARSERR IN [GeneveOverV6] {:?}", e); - } Ok(OpteMeta::convert_ingot(v?)) } } From ea26bbd63d1551cd6c1a6f2dc03803d50292088d Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Mon, 9 Sep 2024 20:59:25 +0100 Subject: [PATCH 022/115] Actually remember to use cached l4 hash. TODO: find where the missing 250 Mbps has gone. --- lib/opte/src/engine/ingot_packet.rs | 83 ++++++++++++++++++++++------- lib/opte/src/engine/port.rs | 2 + lib/oxide-vpc/src/engine/mod.rs | 2 + 3 files changed, 69 insertions(+), 18 deletions(-) diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index 0b40337a..bb0db9f3 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -13,6 +13,7 @@ use super::headers::HeaderActionModify; use super::headers::IpMeta; use super::headers::IpMod; use super::headers::IpPush; +use super::headers::PushAction; use super::headers::UlpMetaModify; use super::headers::UlpMod; use super::icmp::QueryEcho; @@ -429,6 +430,7 @@ pub struct OpteUnifiedLengths { } impl OpteUnifiedLengths { + #[inline] pub fn hdr_len(&self) -> usize { self.outer_eth + self.outer_l3 @@ -484,6 +486,7 @@ pub struct OpteMeta { pub type OpteParsed = IngotParsed::Chunk>, T>; impl OpteMeta { + #[inline] pub fn convert_ingot, Q: Read>( value: IngotParsed, ) -> OpteParsed { @@ -513,6 +516,7 @@ impl Header for OwnedPacket { } impl Emit for OwnedPacket { + #[inline] fn emit_raw(&self, buf: V) -> usize { match self { OwnedPacket::Repr(o) => o.emit_raw(buf), @@ -520,6 +524,7 @@ impl Emit for OwnedPacket { } } + #[inline] fn needs_emit(&self) -> bool { match self { OwnedPacket::Repr(o) => true, @@ -548,6 +553,7 @@ impl<'a> Header for SizeHoldingEncap<'a> { } impl<'a> Emit for SizeHoldingEncap<'a> { + #[inline] fn emit_raw(&self, buf: V) -> usize { match self.meta { EncapMeta::Geneve(g) => { @@ -570,6 +576,7 @@ impl<'a> Emit for SizeHoldingEncap<'a> { } } + #[inline] fn needs_emit(&self) -> bool { true } @@ -581,18 +588,21 @@ impl Emit for EncapMeta { SizeHoldingEncap { encapped_len: 0, meta: self }.emit_raw(buf) } + #[inline] fn needs_emit(&self) -> bool { true } } impl Emit for ValidEncapMeta { + #[inline] fn emit_raw(&self, buf: V) -> usize { match self { ValidEncapMeta::Geneve(u, g) => todo!(), } } + #[inline] fn needs_emit(&self) -> bool { match self { ValidEncapMeta::Geneve(u, g) => u.needs_emit() && g.needs_emit(), @@ -628,6 +638,7 @@ impl Header for ValidEncapMeta { } impl From> for OwnedPacket { + #[inline] fn from(value: ingot::types::Packet) -> Self { match value { ingot::types::Packet::Raw(b) => Self::Raw(b), @@ -637,6 +648,7 @@ impl From> for OwnedPacket { } impl From> for OpteUnified { + #[inline] fn from(value: GeneveOverV6) -> Self { Self { outer_eth: Some(value.outer_eth), @@ -651,6 +663,7 @@ impl From> for OpteUnified { } impl From> for OpteUnified { + #[inline] fn from(value: NoEncap) -> Self { Self { outer_eth: None, @@ -779,6 +792,7 @@ pub struct PacketHeaders { } impl From> for OpteMeta { + #[inline] fn from(value: NoEncap) -> Self { OpteMeta { outer_eth: None, @@ -792,6 +806,7 @@ impl From> for OpteMeta { } impl From> for OpteMeta { + #[inline] fn from(value: GeneveOverV6) -> Self { // These are practically all Valid, anyhow. @@ -1056,6 +1071,7 @@ fn pseudo_port( } impl From<&PacketHeaders> for InnerFlowId { + #[inline] fn from(meta: &PacketHeaders) -> Self { let (proto, addrs) = match meta.inner_l3() { Some(L3::Ipv4(pkt)) => ( @@ -1112,6 +1128,7 @@ impl Packet2> { } impl Packet2> { + #[inline] pub fn parse( self, dir: Direction, @@ -1177,6 +1194,7 @@ impl Packet2> { &mut self.state.meta } + #[inline] /// Convert a packet's metadata into a set of instructions /// needed to serialize all its changes to the wire. pub fn emit_spec(self) -> EmitSpec @@ -1372,6 +1390,7 @@ impl Packet2> { self.state.len } + #[inline] pub fn flow(&self) -> &InnerFlowId { &self.state.flow } @@ -1636,6 +1655,7 @@ pub type PacketHeaders2<'a> = PacketHeaders>; pub type InitMblk<'a> = Initialized2>; pub type ParsedMblk<'a> = Parsed2>; +#[inline] fn csum_minus_hdr(ulp: &Ulp) -> Option { match ulp { Ulp::IcmpV4(icmp) => { @@ -1719,6 +1739,7 @@ trait QueryLen { } impl<'a> QueryLen for MsgBlkIterMut<'a> { + #[inline] fn len(&self) -> usize { let own_blk_len = self .curr @@ -1764,26 +1785,29 @@ pub struct EmitSpec { } impl EmitSpec { + #[inline] pub fn apply(&mut self, mut pkt: MsgBlk) -> MsgBlk { // Rewind { let mut slots = heapless::Vec::<&mut MsgBlkNode, 6>::new(); let mut to_rewind = self.rewind as usize; - let mut reader = pkt.iter_mut(); - while to_rewind != 0 { - let this = reader.next(); - let Some(node) = this else { - to_rewind = 0; - break; - }; - - let has = node.len(); - let droppable = to_rewind.min(has); - node.drop_front_bytes(droppable); - to_rewind -= droppable; - - slots.push(node).unwrap(); + if to_rewind > 0 { + let mut reader = pkt.iter_mut(); + while to_rewind != 0 { + let this = reader.next(); + let Some(node) = this else { + to_rewind = 0; + break; + }; + + let has = node.len(); + let droppable = to_rewind.min(has); + node.drop_front_bytes(droppable); + to_rewind -= droppable; + + slots.push(node).unwrap(); + } } // TODO: put available layers into said slots? @@ -1797,7 +1821,7 @@ impl EmitSpec { let needed_push = self.push_spec.outer_eth.packet_length() + self.push_spec.outer_ip.packet_length() + self.push_spec.outer_encap.packet_length(); - let needed_alloc = needed_push.saturating_sub(pkt.headroom()); + let needed_alloc = needed_push; //.saturating_sub(pkt.headroom()); let mut space_in_front = needed_push - needed_alloc; let mut prepend = if needed_alloc > 0 { @@ -1821,7 +1845,7 @@ impl EmitSpec { let l = a.packet_length(); - let target = if space_in_front >= l { + let target = if prepend.is_none() { space_in_front -= l; &mut pkt } else { @@ -1838,7 +1862,7 @@ impl EmitSpec { if let Some(outer_ip) = &self.push_spec.outer_ip { let l = outer_ip.packet_length(); - let target = if space_in_front >= l { + let target = if prepend.is_none() { space_in_front -= l; &mut pkt } else { @@ -1855,7 +1879,7 @@ impl EmitSpec { if let Some(outer_eth) = &self.push_spec.outer_eth { let l = outer_eth.packet_length(); - let target = if space_in_front >= l { + let target = if prepend.is_none() { space_in_front -= l; &mut pkt } else { @@ -1887,6 +1911,7 @@ pub enum Memoised { } impl Memoised { + #[inline] pub fn get(&mut self, or: impl FnOnce() -> T) -> &T { if self.try_get().is_none() { self.set(or()); @@ -1895,6 +1920,7 @@ impl Memoised { self.try_get().unwrap() } + #[inline] pub fn try_get(&self) -> Option<&T> { match self { Memoised::Uninit => None, @@ -1902,12 +1928,14 @@ impl Memoised { } } + #[inline] pub fn set(&mut self, val: T) { *self = Self::Known(val); } } impl QueryEcho for IcmpV4Packet { + #[inline] fn echo_id(&self) -> Option { match (self.code(), self.ty()) { (0, 0) | (0, 8) => Some(u16::from_be_bytes( @@ -1919,6 +1947,7 @@ impl QueryEcho for IcmpV4Packet { } impl QueryEcho for IcmpV6Packet { + #[inline] fn echo_id(&self) -> Option { match (self.code(), self.ty()) { (0, 128) | (0, 129) => Some(u16::from_be_bytes( @@ -1933,6 +1962,7 @@ impl QueryEcho for IcmpV6Packet { impl HeaderActionModify for OwnedPacket> { + #[inline] fn run_modify( &mut self, mod_spec: &EtherMod, @@ -1961,6 +1991,7 @@ impl HeaderActionModify } impl HeaderActionModify for EthernetPacket { + #[inline] fn run_modify( &mut self, mod_spec: &EtherMod, @@ -1980,6 +2011,7 @@ impl HeaderActionModify for EthernetPacket { impl HeaderActionModify for OwnedPacket> { + #[inline] fn run_modify( &mut self, mod_spec: &IpMod, @@ -2064,6 +2096,7 @@ impl HeaderActionModify } impl HeaderActionModify for L3 { + #[inline] fn run_modify( &mut self, mod_spec: &IpMod, @@ -2100,6 +2133,7 @@ impl HeaderActionModify for L3 { } impl HeaderActionModify for Ulp { + #[inline] fn run_modify( &mut self, mod_spec: &UlpMetaModify, @@ -2149,6 +2183,7 @@ impl HeaderActionModify for Ulp { impl HeaderActionModify for OwnedPacket> { + #[inline] fn run_modify( &mut self, mod_spec: &EncapMod, @@ -2206,6 +2241,7 @@ impl HasInnerCksum for Ulp { impl From for ingot::types::Packet> { + #[inline] fn from(value: EtherMeta) -> Self { ingot::types::Packet::Repr( Ethernet { @@ -2221,6 +2257,7 @@ impl From impl From for OwnedPacket> { + #[inline] fn from(value: EtherMeta) -> Self { OwnedPacket::Repr( Ethernet { @@ -2236,6 +2273,7 @@ impl From impl From for ingot::types::Packet> { + #[inline] fn from(value: EncapMeta) -> Self { ingot::types::Packet::Repr(value.into()) } @@ -2244,12 +2282,14 @@ impl From impl From for OwnedPacket> { + #[inline] fn from(value: EncapMeta) -> Self { OwnedPacket::Repr(value) } } impl From for OwnedPacket> { + #[inline] fn from(value: IpMeta) -> Self { match value { IpMeta::Ip4(v4) => OwnedPacket::Repr( @@ -2283,6 +2323,7 @@ impl From for OwnedPacket> { } impl From for L3 { + #[inline] fn from(value: IpMeta) -> Self { match value { IpMeta::Ip4(v4) => L3::Ipv4( @@ -2314,3 +2355,9 @@ impl From for L3 { } } } + +impl PushAction for Ethernet { + fn push(&self) -> Ethernet { + *self + } +} diff --git a/lib/opte/src/engine/port.rs b/lib/opte/src/engine/port.rs index 977be5ef..da899afe 100644 --- a/lib/opte/src/engine/port.rs +++ b/lib/opte/src/engine/port.rs @@ -2424,6 +2424,7 @@ impl Port { self.uft_hit_probe(In, pkt.flow(), epoch, entry.last_hit()); let transform = Some(Arc::clone(&entry.state().xforms)); + pkt.set_l4_hash(entry.state().l4_hash); // for ht in &entry.state().xforms.hdr { // pkt.hdr_transform(ht)?; @@ -2813,6 +2814,7 @@ impl Port { // have a UFT miss. if !reprocess { let transform = Some(Arc::clone(&entry.state().xforms)); + pkt.set_l4_hash(entry.state().l4_hash); // Due to borrowing constraints from order of operations, we have // to remove the UFT entry here rather than in `update_tcp_entry`. // The TCP entry itself is already removed. diff --git a/lib/oxide-vpc/src/engine/mod.rs b/lib/oxide-vpc/src/engine/mod.rs index 3a6771f9..f1cdf790 100644 --- a/lib/oxide-vpc/src/engine/mod.rs +++ b/lib/oxide-vpc/src/engine/mod.rs @@ -129,6 +129,7 @@ impl NetworkImpl for VpcNetwork { } impl NetworkParser for VpcParser { + #[inline] fn parse_outbound( &self, rdr: T, @@ -137,6 +138,7 @@ impl NetworkParser for VpcParser { Ok(OpteMeta::convert_ingot(v?)) } + #[inline] fn parse_inbound( &self, rdr: T, From 3e6794e51ab41f168d84e969b1688ad91d098093 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Tue, 10 Sep 2024 12:51:02 +0100 Subject: [PATCH 023/115] Temporarily re-add thin process. Notes from rough turning-off-and-on of the Old Way: * Thin process is slower than it was before. I suspect this is due to the larger amount of things which have been shoved into the full Packet type once again. We're at 2.8--2.9 rather than 2.9--3. * Thin process has a bigger performance impact on the Rx pathway than Tx: - Rx-only: 2.8--2.9 - Tx-only: 2.74 - None: 2.7 - Old: <=2.5 There might be value in first-classing an extra parse state for the cases that we know we don't need to do arbitrary full-on transforms. --- lib/opte/src/engine/ingot_packet.rs | 4 + lib/opte/src/engine/port.rs | 698 ++++++++++++++-------------- xde/src/xde.rs | 168 +++++++ 3 files changed, 529 insertions(+), 341 deletions(-) diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index bb0db9f3..b8be25a7 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -881,6 +881,10 @@ pub fn ulp_dst_port(pkt: &Ulp) -> Option { } impl PacketHeaders { + pub fn initial_lens(&self) -> &OpteUnifiedLengths { + &self.initial_lens + } + pub fn outer_ether( &self, ) -> Option<&OwnedPacket>> { diff --git a/lib/opte/src/engine/port.rs b/lib/opte/src/engine/port.rs index da899afe..99be49a0 100644 --- a/lib/opte/src/engine/port.rs +++ b/lib/opte/src/engine/port.rs @@ -1297,347 +1297,362 @@ impl Port { } // hope and pray we find a ULP, then use that? - // pub fn thin_process( - // &self, - // dir: Direction, - // pkt: &mut Parsed2, - // ) -> result::Result - // where - // T::Chunk: ByteSliceMut, - // { - // let flow_before = pkt.flow(); - // // let flow_before = *pkt.flow(); - // let _epoch = self.epoch.load(SeqCst); - // let mut data = self.data.lock(); - // check_state!(data.state, [PortState::Running]) - // .map_err(|_| ProcessError::BadState(data.state))?; - - // let mut dirty_csum = false; - - // // self.port_process_entry_probe(dir, &flow_before, epoch, pskt); - // // TODO: what stats? lmao - // match dir { - // Direction::Out => { - // // opte::engine::err!("looking up {:?} in outdir...", flow_before); - // let a = data.uft_out.get(&flow_before); - // let Some(a) = a else { - // // eh. It will get recirc'd for free... - // // opte::engine::err!("not found! Releasing!"); - // return Err(ProcessError::FlowTableFull { - // kind: "()", - // limit: 0, - // }); - // }; - // pkt.set_l4_hash(a.state().l4_hash); - // // opte::engine::err!("found!"); - // let xforms = Arc::clone(&a.state().xforms); - // Self::update_stats_out( - // &mut data.stats.vals, - // &Ok(ProcessResult::Modified), - // ); - // drop(data); - - // let hm = pkt.meta.0.headers_mut(); - - // let mut new_eth = None; - // let mut new_ip = None; - // let mut new_encap = None; - // // opte::engine::err!("xforms {:?}!", &a.state().xforms.hdr); - // for xf in &xforms.hdr { - // // opte::engine::err!("xf..."); - // if let HeaderAction::Push(outer_eth, _) = &xf.outer_ether { - // new_eth = Some(outer_eth.clone()); - // } - // if let HeaderAction::Push(outer_ip, _) = &xf.outer_ip { - // new_ip = Some(outer_ip.clone()); - // } - // if let HeaderAction::Push(outer_ec, _) = &xf.outer_encap { - // new_encap = Some(outer_ec.clone()); - // } - // if let HeaderAction::Modify(m, _) = &xf.inner_ether { - // if let Some(src) = m.src { - // hm.inner_eth.set_source(src.bytes().into()); - // } - // if let Some(dst) = m.dst { - // hm.inner_eth.set_destination(dst.bytes().into()); - // } - // } - // if let HeaderAction::Modify(m, _) = &xf.inner_ip { - // match m { - // super::headers::IpMod::Ip4(v4) => { - // let Some(ingot::example_chain::L3::Ipv4( - // ref mut v4_t, - // )) = hm.inner_l3 - // else { - // return Err(ProcessError::FlowTableFull { - // kind: "()", - // limit: 0, - // }); - // }; - // if let Some(src) = v4.src { - // dirty_csum = true; - // v4_t.set_source(src.into()); - // } - // if let Some(dst) = v4.dst { - // dirty_csum = true; - // v4_t.set_destination(dst.into()); - // } - // } - // super::headers::IpMod::Ip6(v6) => { - // let Some(ingot::example_chain::L3::Ipv6( - // ref mut v6_t, - // )) = hm.inner_l3 - // else { - // return Err(ProcessError::FlowTableFull { - // kind: "()", - // limit: 0, - // }); - // }; - // if let Some(src) = v6.src { - // dirty_csum = true; - // v6_t.set_source(src.into()); - // } - // if let Some(dst) = v6.dst { - // dirty_csum = true; - // v6_t.set_destination(dst.into()); - // } - // } - // } - // } - // if let UlpHeaderAction::Modify(m) = &xf.inner_ulp { - // if let Some(src) = &m.generic.src_port { - // match hm.inner_ulp { - // Some(Ulp::Tcp(ref mut t)) => { - // dirty_csum = true; - // t.set_source(*src) - // } - // Some(Ulp::Udp(ref mut t)) => { - // dirty_csum = true; - // t.set_source(*src) - // } - // _ => {} - // } - // } - // if let Some(dst) = &m.generic.dst_port { - // match hm.inner_ulp { - // Some(Ulp::Tcp(ref mut t)) => { - // dirty_csum = true; - // t.set_destination(*dst) - // } - // Some(Ulp::Udp(ref mut t)) => { - // dirty_csum = true; - // t.set_destination(*dst) - // } - // _ => {} - // } - // } - // if let Some(flags) = &m.tcp_flags { - // match hm.inner_ulp { - // Some(Ulp::Tcp(ref mut t)) => { - // dirty_csum = true; - // t.set_flags(TcpFlags::from_bits_retain( - // *flags, - // )) - // } - // _ => {} - // } - // } - // if let Some(new_id) = &m.icmp_id { - // match hm.inner_ulp { - // Some(Ulp::IcmpV4(ref mut pkt)) - // if pkt.ty() == 0 || pkt.ty() == 3 => - // { - // dirty_csum = true; - // pkt.rest_of_hdr_mut()[..2] - // .copy_from_slice(&new_id.to_be_bytes()) - // } - // Some(Ulp::IcmpV6(ref mut pkt)) - // if pkt.ty() == 128 || pkt.ty() == 129 => - // { - // dirty_csum = true; - // pkt.rest_of_hdr_mut()[..2] - // .copy_from_slice(&new_id.to_be_bytes()) - // } - // _ => {} - // } - // } - // } - // } - - // if dirty_csum { - // // TODO: something. - // } - - // match (new_eth, new_ip, new_encap) { - // (Some(a), Some(b), Some(c)) => { - // Ok(ThinProcRes::PushEncap(a, b, c)) - // } - // (None, None, None) => Ok(ThinProcRes::Na), - // _ => Err(ProcessError::FlowTableFull { - // kind: "()", - // limit: 0, - // }), - // } - // } - - // Direction::In => { - // let a = data.uft_in.get(&flow_before); - // let Some(a) = a else { - // // eh. - // return Err(ProcessError::FlowTableFull { - // kind: "()", - // limit: 0, - // }); - // }; - // pkt.set_l4_hash(a.state().l4_hash); - // let xforms = Arc::clone(&a.state().xforms); - // Self::update_stats_in( - // &mut data.stats.vals, - // &Ok(ProcessResult::Modified), - // ); - // drop(data); - - // let hm = pkt.meta.0.headers_mut(); - - // let mut pop_eth = false; - // let mut pop_ip = false; - // let mut pop_encap = false; - // for xf in &xforms.hdr { - // // opte::engine::err!("xf..."); - // if let HeaderAction::Pop = &xf.outer_ether { - // pop_eth = true; - // } - // if let HeaderAction::Pop = &xf.outer_ip { - // pop_ip = true; - // } - // if let HeaderAction::Pop = &xf.outer_encap { - // pop_encap = true; - // } - // if let HeaderAction::Modify(m, _) = &xf.inner_ether { - // if let Some(src) = m.src { - // hm.inner_eth.set_source(src.bytes().into()); - // } - // if let Some(dst) = m.dst { - // hm.inner_eth.set_destination(dst.bytes().into()); - // } - // } - // if let HeaderAction::Modify(m, _) = &xf.inner_ip { - // match m { - // super::headers::IpMod::Ip4(v4) => { - // let Some(ingot::example_chain::L3::Ipv4( - // ref mut v4_t, - // )) = hm.inner_l3 - // else { - // return Err(ProcessError::FlowTableFull { - // kind: "()", - // limit: 0, - // }); - // }; - // if let Some(src) = v4.src { - // dirty_csum = true; - // v4_t.set_source(src.into()); - // } - // if let Some(dst) = v4.dst { - // dirty_csum = true; - // v4_t.set_destination(dst.into()); - // } - // } - // super::headers::IpMod::Ip6(v6) => { - // let Some(ingot::example_chain::L3::Ipv6( - // ref mut v6_t, - // )) = hm.inner_l3 - // else { - // return Err(ProcessError::FlowTableFull { - // kind: "()", - // limit: 0, - // }); - // }; - // if let Some(src) = v6.src { - // dirty_csum = true; - // v6_t.set_source(src.into()); - // } - // if let Some(dst) = v6.dst { - // dirty_csum = true; - // v6_t.set_destination(dst.into()); - // } - // } - // } - // } - // if let UlpHeaderAction::Modify(m) = &xf.inner_ulp { - // if let Some(src) = &m.generic.src_port { - // match hm.inner_ulp { - // Some(Ulp::Tcp(ref mut t)) => { - // dirty_csum = true; - // t.set_source(*src) - // } - // Some(Ulp::Udp(ref mut t)) => { - // dirty_csum = true; - // t.set_source(*src) - // } - // _ => {} - // } - // } - // if let Some(dst) = &m.generic.dst_port { - // match hm.inner_ulp { - // Some(Ulp::Tcp(ref mut t)) => { - // dirty_csum = true; - // t.set_destination(*dst) - // } - // Some(Ulp::Udp(ref mut t)) => { - // dirty_csum = true; - // t.set_destination(*dst) - // } - // _ => {} - // } - // } - // if let Some(flags) = &m.tcp_flags { - // match hm.inner_ulp { - // Some(Ulp::Tcp(ref mut t)) => { - // dirty_csum = true; - // t.set_flags(TcpFlags::from_bits_retain( - // *flags, - // )) - // } - // _ => {} - // } - // } - // if let Some(new_id) = &m.icmp_id { - // match hm.inner_ulp { - // Some(Ulp::IcmpV4(ref mut pkt)) - // if pkt.ty() == 0 || pkt.ty() == 3 => - // { - // dirty_csum = true; - // pkt.rest_of_hdr_mut()[..2] - // .copy_from_slice(&new_id.to_be_bytes()) - // } - // Some(Ulp::IcmpV6(ref mut pkt)) - // if pkt.ty() == 128 || pkt.ty() == 129 => - // { - // dirty_csum = true; - // pkt.rest_of_hdr_mut()[..2] - // .copy_from_slice(&new_id.to_be_bytes()) - // } - // _ => {} - // } - // } - // } - // } - - // if dirty_csum { - // // TODO: do something. - // } - - // match (pop_eth, pop_ip, pop_encap) { - // (true, true, true) => Ok(ThinProcRes::PopEncap), - // (false, false, false) => Ok(ThinProcRes::Na), - // _ => Err(ProcessError::FlowTableFull { - // kind: "()", - // limit: 0, - // }), - // } - // } - // } - // } + pub fn thin_process( + &self, + dir: Direction, + pkt: &mut Packet2, + ) -> result::Result { + use ingot::ethernet::EthernetMut; + use ingot::example_chain::Ulp; + use ingot::icmp::IcmpV4Mut; + use ingot::icmp::IcmpV4Ref; + use ingot::icmp::IcmpV6Mut; + use ingot::icmp::IcmpV6Ref; + use ingot::ip::Ipv4Mut; + use ingot::ip::Ipv6Mut; + use ingot::tcp::TcpFlags; + use ingot::tcp::TcpMut; + use ingot::udp::UdpMut; + + let flow_before = pkt.flow(); + // let flow_before = *pkt.flow(); + let _epoch = self.epoch.load(SeqCst); + let mut data = self.data.lock(); + check_state!(data.state, [PortState::Running]) + .map_err(|_| ProcessError::BadState(data.state))?; + + let mut dirty_csum = false; + + // self.port_process_entry_probe(dir, &flow_before, epoch, pskt); + // TODO: what stats? lmao + match dir { + Direction::Out => { + // opte::engine::err!("looking up {:?} in outdir...", flow_before); + let a = data.uft_out.get(&flow_before); + let Some(a) = a else { + // eh. It will get recirc'd for free... + // opte::engine::err!("not found! Releasing!"); + return Err(ProcessError::FlowTableFull { + kind: "()", + limit: 0, + }); + }; + pkt.set_l4_hash(a.state().l4_hash); + // opte::engine::err!("found!"); + let xforms = Arc::clone(&a.state().xforms); + Self::update_stats_out( + &mut data.stats.vals, + &Ok(InternalProcessResult::Modified { + transform: None, + tcp_state: None, + }), + ); + drop(data); + + let hm = &mut pkt.meta_mut().headers; + + let mut new_eth = None; + let mut new_ip = None; + let mut new_encap = None; + // opte::engine::err!("xforms {:?}!", &a.state().xforms.hdr); + for xf in &xforms.hdr { + // opte::engine::err!("xf..."); + if let HeaderAction::Push(outer_eth) = &xf.outer_ether { + new_eth = Some(outer_eth.clone()); + } + if let HeaderAction::Push(outer_ip) = &xf.outer_ip { + new_ip = Some(outer_ip.clone()); + } + if let HeaderAction::Push(outer_ec) = &xf.outer_encap { + new_encap = Some(outer_ec.clone()); + } + if let HeaderAction::Modify(m) = &xf.inner_ether { + if let Some(src) = m.src { + hm.inner_eth.set_source(src.bytes().into()); + } + if let Some(dst) = m.dst { + hm.inner_eth.set_destination(dst.bytes().into()); + } + } + if let HeaderAction::Modify(m) = &xf.inner_ip { + match m { + super::headers::IpMod::Ip4(v4) => { + let Some(ingot::example_chain::L3::Ipv4( + ref mut v4_t, + )) = hm.inner_l3 + else { + return Err(ProcessError::FlowTableFull { + kind: "()", + limit: 0, + }); + }; + if let Some(src) = v4.src { + dirty_csum = true; + v4_t.set_source(src.into()); + } + if let Some(dst) = v4.dst { + dirty_csum = true; + v4_t.set_destination(dst.into()); + } + } + super::headers::IpMod::Ip6(v6) => { + let Some(ingot::example_chain::L3::Ipv6( + ref mut v6_t, + )) = hm.inner_l3 + else { + return Err(ProcessError::FlowTableFull { + kind: "()", + limit: 0, + }); + }; + if let Some(src) = v6.src { + dirty_csum = true; + v6_t.set_source(src.into()); + } + if let Some(dst) = v6.dst { + dirty_csum = true; + v6_t.set_destination(dst.into()); + } + } + } + } + if let UlpHeaderAction::Modify(m) = &xf.inner_ulp { + if let Some(src) = &m.generic.src_port { + match hm.inner_ulp { + Some(Ulp::Tcp(ref mut t)) => { + dirty_csum = true; + t.set_source(*src) + } + Some(Ulp::Udp(ref mut t)) => { + dirty_csum = true; + t.set_source(*src) + } + _ => {} + } + } + if let Some(dst) = &m.generic.dst_port { + match hm.inner_ulp { + Some(Ulp::Tcp(ref mut t)) => { + dirty_csum = true; + t.set_destination(*dst) + } + Some(Ulp::Udp(ref mut t)) => { + dirty_csum = true; + t.set_destination(*dst) + } + _ => {} + } + } + if let Some(flags) = &m.tcp_flags { + match hm.inner_ulp { + Some(Ulp::Tcp(ref mut t)) => { + dirty_csum = true; + t.set_flags(TcpFlags::from_bits_retain( + *flags, + )) + } + _ => {} + } + } + if let Some(new_id) = &m.icmp_id { + match hm.inner_ulp { + Some(Ulp::IcmpV4(ref mut pkt)) + if pkt.ty() == 0 || pkt.ty() == 3 => + { + dirty_csum = true; + pkt.rest_of_hdr_mut()[..2] + .copy_from_slice(&new_id.to_be_bytes()) + } + Some(Ulp::IcmpV6(ref mut pkt)) + if pkt.ty() == 128 || pkt.ty() == 129 => + { + dirty_csum = true; + pkt.rest_of_hdr_mut()[..2] + .copy_from_slice(&new_id.to_be_bytes()) + } + _ => {} + } + } + } + } + + if dirty_csum { + // TODO: something. + } + + match (new_eth, new_ip, new_encap) { + (Some(a), Some(b), Some(c)) => { + Ok(ThinProcRes::PushEncap(a, b, c)) + } + (None, None, None) => Ok(ThinProcRes::Na), + _ => Err(ProcessError::FlowTableFull { + kind: "()", + limit: 0, + }), + } + } + + Direction::In => { + let a = data.uft_in.get(&flow_before); + let Some(a) = a else { + // eh. + return Err(ProcessError::FlowTableFull { + kind: "()", + limit: 0, + }); + }; + pkt.set_l4_hash(a.state().l4_hash); + let xforms = Arc::clone(&a.state().xforms); + Self::update_stats_in( + &mut data.stats.vals, + &Ok(InternalProcessResult::Modified { + transform: None, + tcp_state: None, + }), + ); + drop(data); + + let hm = &mut pkt.meta_mut().headers; + + let mut pop_eth = false; + let mut pop_ip = false; + let mut pop_encap = false; + for xf in &xforms.hdr { + // opte::engine::err!("xf..."); + if let HeaderAction::Pop = &xf.outer_ether { + pop_eth = true; + } + if let HeaderAction::Pop = &xf.outer_ip { + pop_ip = true; + } + if let HeaderAction::Pop = &xf.outer_encap { + pop_encap = true; + } + if let HeaderAction::Modify(m) = &xf.inner_ether { + if let Some(src) = m.src { + hm.inner_eth.set_source(src.bytes().into()); + } + if let Some(dst) = m.dst { + hm.inner_eth.set_destination(dst.bytes().into()); + } + } + if let HeaderAction::Modify(m) = &xf.inner_ip { + match m { + super::headers::IpMod::Ip4(v4) => { + let Some(ingot::example_chain::L3::Ipv4( + ref mut v4_t, + )) = hm.inner_l3 + else { + return Err(ProcessError::FlowTableFull { + kind: "()", + limit: 0, + }); + }; + if let Some(src) = v4.src { + dirty_csum = true; + v4_t.set_source(src.into()); + } + if let Some(dst) = v4.dst { + dirty_csum = true; + v4_t.set_destination(dst.into()); + } + } + super::headers::IpMod::Ip6(v6) => { + let Some(ingot::example_chain::L3::Ipv6( + ref mut v6_t, + )) = hm.inner_l3 + else { + return Err(ProcessError::FlowTableFull { + kind: "()", + limit: 0, + }); + }; + if let Some(src) = v6.src { + dirty_csum = true; + v6_t.set_source(src.into()); + } + if let Some(dst) = v6.dst { + dirty_csum = true; + v6_t.set_destination(dst.into()); + } + } + } + } + if let UlpHeaderAction::Modify(m) = &xf.inner_ulp { + if let Some(src) = &m.generic.src_port { + match hm.inner_ulp { + Some(Ulp::Tcp(ref mut t)) => { + dirty_csum = true; + t.set_source(*src) + } + Some(Ulp::Udp(ref mut t)) => { + dirty_csum = true; + t.set_source(*src) + } + _ => {} + } + } + if let Some(dst) = &m.generic.dst_port { + match hm.inner_ulp { + Some(Ulp::Tcp(ref mut t)) => { + dirty_csum = true; + t.set_destination(*dst) + } + Some(Ulp::Udp(ref mut t)) => { + dirty_csum = true; + t.set_destination(*dst) + } + _ => {} + } + } + if let Some(flags) = &m.tcp_flags { + match hm.inner_ulp { + Some(Ulp::Tcp(ref mut t)) => { + dirty_csum = true; + t.set_flags(TcpFlags::from_bits_retain( + *flags, + )) + } + _ => {} + } + } + if let Some(new_id) = &m.icmp_id { + match hm.inner_ulp { + Some(Ulp::IcmpV4(ref mut pkt)) + if pkt.ty() == 0 || pkt.ty() == 3 => + { + dirty_csum = true; + pkt.rest_of_hdr_mut()[..2] + .copy_from_slice(&new_id.to_be_bytes()) + } + Some(Ulp::IcmpV6(ref mut pkt)) + if pkt.ty() == 128 || pkt.ty() == 129 => + { + dirty_csum = true; + pkt.rest_of_hdr_mut()[..2] + .copy_from_slice(&new_id.to_be_bytes()) + } + _ => {} + } + } + } + } + + if dirty_csum { + // TODO: do something. + } + + match (pop_eth, pop_ip, pop_encap) { + (true, true, true) => Ok(ThinProcRes::PopEncap), + (false, false, false) => Ok(ThinProcRes::Na), + _ => Err(ProcessError::FlowTableFull { + kind: "()", + limit: 0, + }), + } + } + } + } /// Remove the rule identified by the `dir`, `layer_name`, `id` /// combination, if such a rule exists. @@ -1802,6 +1817,7 @@ impl Transforms { Self { hdr: Vec::with_capacity(8), body: Vec::with_capacity(2) } } + #[inline] fn apply( &self, pkt: &mut Packet2>, diff --git a/xde/src/xde.rs b/xde/src/xde.rs index cb40189e..baeee682 100644 --- a/xde/src/xde.rs +++ b/xde/src/xde.rs @@ -54,6 +54,7 @@ use ingot::geneve::ValidGeneve; use ingot::ip::IpProtocol; use ingot::ip::Ipv6Mut; use ingot::ip::ValidIpv6; +use ingot::types::Emit; use ingot::types::Header; use ingot::types::HeaderParse; use ingot::udp::UdpMut; @@ -1594,6 +1595,137 @@ unsafe fn xde_mc_tx_one(src_dev: &XdeDev, mut pkt: MsgBlk) -> *mut mblk_t { let port = &src_dev.port; + // BEGIN THIN_PROCESS RE-EXPERIMENT + // let mut ip6_src = Default::default(); + // let mut ip6_dst = Default::default(); + // let f_hash; + // if let Ok(decision) = port.thin_process(Direction::Out, &mut parsed_pkt) { + // match decision { + // opte::engine::port::ThinProcRes::PushEncap( + // eth, + // ip, + // udp, + // ) => { + // f_hash = parsed_pkt.l4_hash(); + // drop(parsed_pkt); + + // // TODO: generate methods to fill a maybeuninit. + // // total bytes: ETH 14, V6 40, UDP 8, GENEVE 8 + // let new_hdrs = 14 + 40 + 8 + 8; + // let mut new_blk = + // MsgBlk::new_with_headroom(2, new_hdrs); + + // let w_encap_bytes = (pkt_len_old + 16) as u16; + + // new_blk.write(14, |uninit| { + // let complete_eth = + // opte::ingot::ethernet::Ethernet { + // destination: eth.dst.bytes().into(), + // source: eth.src.bytes().into(), + // ethertype: ingot::ethernet::Ethertype( + // eth.ether_type.into(), + // ), + // }; + + // complete_eth + // .emit_uninit(uninit) + // .expect("must be enough room..."); + // }); + + // // we know we'er only pushing v6. + // let IpPush::Ip6(v6) = ip else { panic!() }; + // ip6_src = v6.src; + // ip6_dst = v6.dst; + + // new_blk.write(40, |uninit| { + // let complete_v6 = opte::ingot::ip::Ipv6 { + // version: 6, + // dscp: 0, + // ecn: ingot::ip::Ecn::NotCapable, + // flow_label: 12345678, + // payload_len: w_encap_bytes, + // next_header: ingot::ip::IpProtocol( + // v6.proto.into(), + // ), + // hop_limit: 128, + // source: v6.src.bytes().into(), + // destination: v6.dst.bytes().into(), + // v6ext: vec![].into(), + // }; + + // complete_v6 + // .emit_uninit(uninit) + // .expect("must be enough room..."); + // }); + + // let EncapPush::Geneve(gen) = udp else { panic!() }; + // new_blk.write(16, |uninit| { + // let complete_udp = opte::ingot::udp::Udp { + // source: gen.entropy, + // destination: 6081, + // length: w_encap_bytes, + // checksum: 0, + // }; + // let complete_geneve = opte::ingot::geneve::Geneve { + // version: 0, + // opt_len: 0, + // flags: opte::ingot::geneve::GeneveFlags::empty( + // ), + // protocol_type: + // opte::ingot::ethernet::Ethertype::ETHERNET, + // vni: gen.vni.into(), + // reserved: 0, + // options: Vec::new(), + // }; + + // let len = complete_udp + // .emit_uninit(uninit) + // .expect("must be enough room..."); + // complete_geneve + // .emit_uninit(&mut uninit[len..]) + // .expect("must be enough room..."); + // }); + + // core::mem::swap(&mut new_blk, &mut pkt); + // pkt.extend_if_one(new_blk); + // } + // // we're in Tx for a ULP'd pkt -- this should NEVER happen. + // opte::engine::port::ThinProcRes::PopEncap => unreachable!(), + // opte::engine::port::ThinProcRes::Na => unreachable!(), + // } + + // if ip6_dst == ip6_src { + // // todo. broken just now ig + // // return guest_loopback(src_dev, pkt, vni); + // opte::engine::err!("eh?"); + // return ptr::null_mut(); + // } + + // let my_key = RouteKey { dst: ip6_dst, l4_hash: Some(f_hash) }; + // let Route { src, dst, underlay_dev } = + // src_dev.routes.next_hop(my_key, src_dev); + + // // Get a pointer to the beginning of the outer frame and + // // fill in the dst/src addresses before sending out the + // // device. + // let mblk = pkt.unwrap_mblk(); + // let rptr = (*mblk).b_rptr; + // ptr::copy(dst.as_ptr(), rptr, 6); + // ptr::copy(src.as_ptr(), rptr.add(6), 6); + // // Unwrap: We know the packet is good because we just + // // unwrapped it above. + // let new_pkt = MsgBlk::wrap_mblk(mblk).unwrap(); + + // underlay_dev.stream.tx_drop_on_no_desc2( + // new_pkt, + // hint, + // MacTxFlags::empty(), + // ); + + // return ptr::null_mut(); + // } + // END THIN_PROCESS RE-EXPERIMENT + // The port processing code will fire a probe that describes what // action was taken -- there should be no need to add probes or // prints here. @@ -1949,6 +2081,42 @@ unsafe fn xde_rx_one( } let port = &dev.port; + + // BEGIN THIN_PROCESS EXPERIMENT + // let h = parsed_pkt.meta().initial_lens(); + + // let pop_len: usize = h.outer_eth + h.outer_l3 + h.outer_encap; + + // if let Ok(decision) = port.thin_process(Direction::In, &mut parsed_pkt) { + // match decision { + // opte::engine::port::ThinProcRes::PopEncap => { + // let mut to_pop = pop_len; + // drop(parsed_pkt); + // for layer in pkt.iter_mut() { + // let max_drop = layer.len(); + // let will_drop = max_drop.min(to_pop); + // layer.drop_front_bytes(will_drop); + // to_pop -= will_drop; + + // if to_pop == 0 { + // break; + // } + // } + + // // could theoretically have empty segments here. + // // not an issue over NIC for now. + // mac::mac_rx(dev.mh, mrh, pkt.unwrap_mblk()); + // } + // // we know this to be true given how we cfg opte + // opte::engine::port::ThinProcRes::PushEncap(_, _, _) => { + // unreachable!() + // } + // opte::engine::port::ThinProcRes::Na => unreachable!(), + // } + // return; + // } + // END THIN_PROCESS EXPERIMENT + let res = port.process(Direction::In, &mut parsed_pkt, ActionMeta::new()); let mut emit_spec = parsed_pkt.emit_spec(); From f88fe1bcb68f454dc5828b1d27abb90eaff7508f Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Tue, 10 Sep 2024 15:56:06 +0100 Subject: [PATCH 024/115] Re-enable cksum update when needed. --- lib/opte/src/engine/headers.rs | 1 + lib/opte/src/engine/ingot_packet.rs | 285 +++++++++++++++++----------- lib/opte/src/engine/rule.rs | 1 + 3 files changed, 174 insertions(+), 113 deletions(-) diff --git a/lib/opte/src/engine/headers.rs b/lib/opte/src/engine/headers.rs index 0675b743..530a0f1c 100644 --- a/lib/opte/src/engine/headers.rs +++ b/lib/opte/src/engine/headers.rs @@ -653,6 +653,7 @@ where M: fmt::Debug, X: HeaderActionModify + From + HasInnerCksum, { + #[inline] fn act_on( &mut self, action: &HeaderAction, diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index b8be25a7..bff46d93 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -1479,46 +1479,24 @@ impl Packet2> { }; // XXX TODO: make these valid even AFTER all packet pushings occur. - let pseudo_csum = match (&self.state.meta.headers) - .inner_eth - .ethertype() - { - // ARP - Ethertype::ARP => { - return None; - } - // Ipv4 - Ethertype::IPV4 => { - let h = &self.state.meta.headers; - let mut pseudo_hdr_bytes = [0u8; 12]; - let Some(L3::Ipv4(ref v4)) = h.inner_l3 else { panic!() }; - pseudo_hdr_bytes[0..4] - .copy_from_slice(&v4.source().octets()); - pseudo_hdr_bytes[4..8] - .copy_from_slice(&v4.destination().octets()); - pseudo_hdr_bytes[9] = v4.protocol().0; - let ulp_len = v4.total_len() - 4 * (v4.ihl() as u16); - pseudo_hdr_bytes[10..] - .copy_from_slice(&ulp_len.to_be_bytes()); - - Checksum::compute(&pseudo_hdr_bytes) - } - // Ipv6 - Ethertype::IPV6 => { - let h = &self.state.meta.headers; - let mut pseudo_hdr_bytes = [0u8; 40]; - let Some(L3::Ipv6(ref v6)) = h.inner_l3 else { panic!() }; - pseudo_hdr_bytes[0..16] - .copy_from_slice(&v6.source().octets()); - pseudo_hdr_bytes[16..32] - .copy_from_slice(&v6.destination().octets()); - pseudo_hdr_bytes[39] = v6.next_header().0; - let ulp_len = v6.payload_len() as u32; - pseudo_hdr_bytes[32..36] - .copy_from_slice(&ulp_len.to_be_bytes()); - Checksum::compute(&pseudo_hdr_bytes) - } - _ => unreachable!(), + let pseudo_csum = + match (&self.state.meta.headers).inner_eth.ethertype() { + // ARP + Ethertype::ARP => { + return None; + } + Ethertype::IPV4 | Ethertype::IPV6 => self + .state + .meta + .headers + .inner_l3 + .as_ref() + .map(l3_pseudo_header), + _ => unreachable!(), + }; + + let Some(pseudo_csum) = pseudo_csum else { + return None; }; self.state.meta.inner_ulp().and_then(csum_minus_hdr).map(|mut v| { @@ -1547,80 +1525,141 @@ impl Packet2> { /// /// This avoids duplicating work already done by the client in the /// case where checksums are **not** being offloaded to the hardware. - pub fn update_checksums(&mut self) { + pub fn update_checksums(&mut self) + where + T::Chunk: ByteSliceMut, + { if !self.state.inner_csum_dirty { return; } let update_ip = self.state.meta.has_ip_csum(); let update_ulp = self.state.meta.has_ulp_csum(); - // TODO - - // // If a ULP exists, then compute and set its checksum. - // if let (true, Some(ulp_off)) = - // (update_ulp, self.state.hdr_offsets.inner.ulp) - // { - // // Start by reusing the known checksum of the body. - // let mut csum = self.state.body_csum.unwrap(); - // // Unwrap: Can't have a ULP without an IP. - // let ip = self.meta().inner.ip.unwrap(); - // // Add pseudo header checksum. - // let pseudo_csum = ip.pseudo_csum(); - // csum += pseudo_csum; - // // All headers must reside in the first segment. - // let all_hdr_bytes = self.segs[0].slice_mut(); - // // Determine ULP slice and add its bytes to the - // // checksum. - // let ulp_start = ulp_off.seg_pos; - // let ulp_end = ulp_start + ulp_off.hdr_len; - // let ulp = &mut all_hdr_bytes[ulp_start..ulp_end]; - - // match self.state.meta.inner.ulp.as_mut().unwrap() { - // UlpMeta::Icmpv4(icmp) => { - // Self::update_icmp_csum( - // icmp, - // // ICMP4 requires the body_csum *without* - // // the pseudoheader added back in. - // self.state.body_csum.unwrap(), - // ulp, - // ); - // } - - // UlpMeta::Icmpv6(icmp) => { - // Self::update_icmp_csum(icmp, csum, ulp); - // } - - // UlpMeta::Tcp(tcp) => { - // Self::update_tcp_csum(tcp, csum, ulp); - // } - - // UlpMeta::Udp(udp) => { - // Self::update_udp_csum(udp, csum, ulp); - // } - // } - // } - - // // Compute and fill in the IPv4 header checksum. - // if let (true, Some(IpMeta::Ip4(ip))) = - // (update_ip, self.state.meta.inner.ip.as_mut()) - // { - // let ip_off = self.state.hdr_offsets.inner.ip.unwrap(); - // let all_hdr_bytes = self.segs[0].slice_mut(); - // let ip_start = ip_off.seg_pos; - // let ip_end = ip_start + ip_off.hdr_len; - // let ip_bytes = &mut all_hdr_bytes[ip_start..ip_end]; - // let csum_start = Ipv4Hdr::CSUM_BEGIN; - // let csum_end = Ipv4Hdr::CSUM_END; - // ip_bytes[csum_start..csum_end].copy_from_slice(&[0; 2]); - // let csum = - // HeaderChecksum::from(Checksum::compute(ip_bytes)).bytes(); - - // // Update the metadata. - // ip.csum = csum; - - // // Update the header bytes. - // ip_bytes[csum_start..csum_end].copy_from_slice(&csum[..]); - // } + // Start by reusing the known checksum of the body. + let mut body_csum = self.body_csum().unwrap_or_default(); + + // If a ULP exists, then compute and set its checksum. + if let (true, Some(ulp)) = + (update_ulp, &mut self.state.meta.headers.inner_ulp) + { + let mut csum = body_csum; + // Unwrap: Can't have a ULP without an IP. + let ip = self.state.meta.headers.inner_l3.as_ref().unwrap(); + // Add pseudo header checksum. + let pseudo_csum = l3_pseudo_header(ip); + csum += pseudo_csum; + // Determine ULP slice and add its bytes to the + // checksum. + match ulp { + // ICMP4 requires the body_csum *without* + // the pseudoheader added back in. + Ulp::IcmpV4(i4) => { + let mut bytes = [0u8; 8]; + i4.set_checksum(0); + i4.emit_raw(&mut bytes[..]); + body_csum.add_bytes(&bytes[..]); + i4.set_checksum(body_csum.finalize()); + } + Ulp::IcmpV6(i6) => { + let mut bytes = [0u8; 8]; + i6.set_checksum(0); + i6.emit_raw(&mut bytes[..]); + csum.add_bytes(&bytes[..]); + i6.set_checksum(csum.finalize()); + } + Ulp::Tcp(tcp) => { + tcp.set_checksum(0); + match tcp { + IngotPacket::Repr(tcp) => { + let mut bytes = [0u8; 56]; + tcp.emit_raw(&mut bytes[..]); + csum.add_bytes(&bytes[..]); + } + IngotPacket::Raw(tcp) => { + csum.add_bytes(tcp.0.bytes()); + match &tcp.1 { + IngotPacket::Repr(opts) => { + csum.add_bytes(&*opts); + } + IngotPacket::Raw(opts) => { + csum.add_bytes(&*opts); + } + } + } + } + tcp.set_checksum(csum.finalize()); + } + Ulp::Udp(udp) => { + udp.set_checksum(0); + match udp { + IngotPacket::Repr(udp) => { + let mut bytes = [0u8; 8]; + udp.emit_raw(&mut bytes[..]); + csum.add_bytes(&bytes[..]); + } + IngotPacket::Raw(udp) => { + csum.add_bytes(udp.0.bytes()); + } + } + udp.set_checksum(csum.finalize()); + } + } + } + + // Compute and fill in the IPv4 header checksum. + if let (true, Some(L3::Ipv4(ip))) = + (update_ip, &mut self.state.meta.headers.inner_l3) + { + ip.set_checksum(0); + + let mut csum = Checksum::default(); + + match ip { + IngotPacket::Repr(ip) => { + let mut bytes = [0u8; 56]; + ip.emit_raw(&mut bytes[..]); + csum.add_bytes(&bytes[..]); + } + IngotPacket::Raw(ip) => { + csum.add_bytes(ip.0.bytes()); + match &ip.1 { + IngotPacket::Repr(opts) => { + csum.add_bytes(&*opts); + } + IngotPacket::Raw(opts) => { + csum.add_bytes(&*opts); + } + } + } + } + + ip.set_checksum(csum.finalize()); + } + } +} + +fn l3_pseudo_header(l3: &L3) -> Checksum { + match l3 { + L3::Ipv4(v4) => { + let mut pseudo_hdr_bytes = [0u8; 12]; + pseudo_hdr_bytes[0..4].copy_from_slice(&v4.source().octets()); + pseudo_hdr_bytes[4..8].copy_from_slice(&v4.destination().octets()); + pseudo_hdr_bytes[9] = v4.protocol().0; + let ulp_len = v4.total_len() - 4 * (v4.ihl() as u16); + pseudo_hdr_bytes[10..].copy_from_slice(&ulp_len.to_be_bytes()); + + Checksum::compute(&pseudo_hdr_bytes) + } + L3::Ipv6(v6) => { + let mut pseudo_hdr_bytes = [0u8; 40]; + pseudo_hdr_bytes[0..16].copy_from_slice(&v6.source().octets()); + pseudo_hdr_bytes[16..32] + .copy_from_slice(&v6.destination().octets()); + pseudo_hdr_bytes[39] = v6.next_header().0; + let ulp_len = v6.payload_len() as u32; + pseudo_hdr_bytes[32..36].copy_from_slice(&ulp_len.to_be_bytes()); + Checksum::compute(&pseudo_hdr_bytes) + } } } @@ -2360,8 +2399,28 @@ impl From for L3 { } } -impl PushAction for Ethernet { - fn push(&self) -> Ethernet { - *self - } -} +// impl PushAction for Ethernet { +// fn push(&self) -> Ethernet { +// *self +// } +// } + +// impl PushAction>> for IpPush { +// fn push(&self) -> OwnedPacket> { +// OwnedPacket::Repr(match self { +// IpPush::Ip4(v4) => L3Repr::Ipv4(Ipv4 { +// protocol: IpProtocol(u8::from(v4.proto)), +// source: v4.src.bytes().into(), +// destination: v4.dst.bytes().into(), +// flags: Ipv4Flags::DONT_FRAGMENT, +// ..Default::default() +// }), +// IpPush::Ip6(v6) => L3Repr::Ipv6(Ipv6 { +// next_header: IpProtocol(u8::from(v6.proto)), +// source: v6.src.bytes().into(), +// destination: v6.dst.bytes().into(), +// ..Default::default() +// }), +// }) +// } +// } diff --git a/lib/opte/src/engine/rule.rs b/lib/opte/src/engine/rule.rs index fef66e9e..08a08a92 100644 --- a/lib/opte/src/engine/rule.rs +++ b/lib/opte/src/engine/rule.rs @@ -46,6 +46,7 @@ use core::fmt::Debug; use core::fmt::Display; use illumos_sys_hdrs::c_char; use illumos_sys_hdrs::uintptr_t; +use ingot::ethernet::Ethernet; use ingot::types::Read; use opte_api::Direction; use serde::Deserialize; From ac79b200fddcfb47d0ae8222f8df9a1d378d2f44 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Thu, 12 Sep 2024 14:30:43 +0100 Subject: [PATCH 025/115] Start work on removing all the insane `into`s, everywhere. --- Cargo.lock | 27 +-- Cargo.toml | 4 +- crates/opte-api/Cargo.toml | 1 + crates/opte-api/src/encap.rs | 81 +------- crates/opte-api/src/ip.rs | 21 ++ crates/opte-api/src/mac.rs | 22 ++- lib/opte/Cargo.toml | 1 + lib/opte/src/engine/arp.rs | 6 +- lib/opte/src/engine/ether.rs | 8 +- lib/opte/src/engine/geneve.rs | 4 +- lib/opte/src/engine/icmp/mod.rs | 2 +- lib/opte/src/engine/ingot_base.rs | 126 ++++++++++++ lib/opte/src/engine/ingot_packet.rs | 290 ++++++++++++++++------------ lib/opte/src/engine/ip4.rs | 4 +- lib/opte/src/engine/mod.rs | 1 + lib/opte/src/engine/port.rs | 1 + lib/opte/src/engine/rule.rs | 24 ++- lib/opte/src/engine/tcp.rs | 11 +- lib/opte/src/engine/udp.rs | 4 +- lib/oxide-vpc/src/api.rs | 4 +- rust-toolchain.toml | 2 +- xde/rust-toolchain.toml | 2 +- xde/src/xde.rs | 4 +- xde/x86_64-unknown-unknown.json | 2 +- 24 files changed, 404 insertions(+), 248 deletions(-) create mode 100644 lib/opte/src/engine/ingot_base.rs diff --git a/Cargo.lock b/Cargo.lock index 50a60b28..6b17da5b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -882,19 +882,20 @@ dependencies = [ [[package]] name = "ingot" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=7188e7adb3f8e404fcc431501dd2312bad47b628#7188e7adb3f8e404fcc431501dd2312bad47b628" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=cc332a8d74438fa27a1d0a0205428b700f31220a#cc332a8d74438fa27a1d0a0205428b700f31220a" dependencies = [ "bitflags 2.6.0", "ingot-macros", "ingot-types", "macaddr", - "zerocopy 0.8.0-alpha.17", + "serde", + "zerocopy 0.8.0-alpha.21", ] [[package]] name = "ingot-macros" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=7188e7adb3f8e404fcc431501dd2312bad47b628#7188e7adb3f8e404fcc431501dd2312bad47b628" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=cc332a8d74438fa27a1d0a0205428b700f31220a#cc332a8d74438fa27a1d0a0205428b700f31220a" dependencies = [ "darling", "itertools 0.13.0", @@ -908,12 +909,12 @@ dependencies = [ [[package]] name = "ingot-types" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=7188e7adb3f8e404fcc431501dd2312bad47b628#7188e7adb3f8e404fcc431501dd2312bad47b628" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=cc332a8d74438fa27a1d0a0205428b700f31220a#cc332a8d74438fa27a1d0a0205428b700f31220a" dependencies = [ "heapless", "ingot-macros", "macaddr", - "zerocopy 0.8.0-alpha.17", + "zerocopy 0.8.0-alpha.21", ] [[package]] @@ -1254,6 +1255,7 @@ checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9" name = "opte" version = "0.1.0" dependencies = [ + "bitflags 2.6.0", "cfg-if", "crc32fast", "derror-macro", @@ -1271,7 +1273,7 @@ dependencies = [ "tabwriter", "usdt", "version_check", - "zerocopy 0.8.0-alpha.17", + "zerocopy 0.8.0-alpha.21", ] [[package]] @@ -1279,6 +1281,7 @@ name = "opte-api" version = "0.1.0" dependencies = [ "illumos-sys-hdrs", + "ingot", "ipnetwork", "postcard", "serde", @@ -1372,7 +1375,7 @@ dependencies = [ "smoltcp", "tabwriter", "usdt", - "zerocopy 0.8.0-alpha.17", + "zerocopy 0.8.0-alpha.21", ] [[package]] @@ -2744,11 +2747,11 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.0-alpha.17" +version = "0.8.0-alpha.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da056c7307048e30bce8d625c6f0633366d31f1086b3c87ed9b1f18fa1081cb1" +checksum = "b945226be416f7fcacff01ad61474f192b3f789a3ceee54d48cb1e66d929e449" dependencies = [ - "zerocopy-derive 0.8.0-alpha.17", + "zerocopy-derive 0.8.0-alpha.21", ] [[package]] @@ -2764,9 +2767,9 @@ dependencies = [ [[package]] name = "zerocopy-derive" -version = "0.8.0-alpha.17" +version = "0.8.0-alpha.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9eb22123403bf9c05af423e2ced336a5fc2853df9179b42bea8144d6bf497a57" +checksum = "5cd62f40c5831a236cc3750ce94e668c06d68af2579c1703b1d4e769eeb8e646" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 63a5b596..24c85a7a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,7 +50,7 @@ ctor = "0.2" darling = "0.20" dyn-clone = "1.0" heapless = "0.8" -ingot = { git = "https://github.com/oxidecomputer/ingot.git", rev = "7188e7adb3f8e404fcc431501dd2312bad47b628"} +ingot = { git = "https://github.com/oxidecomputer/ingot.git", rev = "cc332a8d74438fa27a1d0a0205428b700f31220a"} ipnetwork = { version = "0.20", default-features = false } itertools = { version = "0.13", default-features = false } libc = "0.2" @@ -75,7 +75,7 @@ thiserror = "1.0" toml = "0.8" usdt = "0.5" version_check = "0.9" -zerocopy = { version = "0.8.0-alpha.17", features = ["derive"] } +zerocopy = { version = "0.8.0-alpha.21", features = ["derive"] } zone = { git = "https://github.com/oxidecomputer/zone" } ztest = { git = "https://github.com/oxidecomputer/falcon", branch = "main" } poptrie = { git = "https://github.com/oxidecomputer/poptrie", branch = "multipath" } diff --git a/crates/opte-api/Cargo.toml b/crates/opte-api/Cargo.toml index daed612a..7c4d2e60 100644 --- a/crates/opte-api/Cargo.toml +++ b/crates/opte-api/Cargo.toml @@ -13,6 +13,7 @@ std = ["ipnetwork"] [dependencies] illumos-sys-hdrs.workspace = true +ingot.workspace = true ipnetwork = { workspace = true, optional = true } postcard.workspace = true serde.workspace = true diff --git a/crates/opte-api/src/encap.rs b/crates/opte-api/src/encap.rs index d772b76e..142515d9 100644 --- a/crates/opte-api/src/encap.rs +++ b/crates/opte-api/src/encap.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2024 Oxide Computer Company use alloc::string::String; use alloc::string::ToString; @@ -13,84 +13,7 @@ use core::str::FromStr; use serde::Deserialize; use serde::Serialize; -/// A Geneve Virtual Network Identifier (VNI). -#[derive( - Clone, Copy, Deserialize, Eq, Ord, PartialEq, PartialOrd, Serialize, -)] -pub struct Vni { - // A VNI is 24-bit. By storing it this way we don't have to check - // the value on the opte-core side to know if it's a valid VNI, we - // just decode the bytes. - // - // The bytes are in network order. - inner: [u8; 3], -} - -impl Default for Vni { - fn default() -> Self { - Vni::new(0u32).unwrap() - } -} - -impl From for u32 { - fn from(vni: Vni) -> u32 { - let bytes = vni.inner; - u32::from_be_bytes([0, bytes[0], bytes[1], bytes[2]]) - } -} - -impl FromStr for Vni { - type Err = String; - - fn from_str(val: &str) -> Result { - let n = val.parse::().map_err(|e| e.to_string())?; - Self::new(n) - } -} - -impl Display for Vni { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}", u32::from(*self)) - } -} - -// There's no reason to view the VNI as its raw array, so just present -// it in a human-friendly manner. -impl Debug for Vni { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "Vni {{ inner: {} }}", self) - } -} - -const VNI_MAX: u32 = 0x00_FF_FF_FF; - -impl Vni { - pub fn as_u32(&self) -> u32 { - u32::from_be_bytes([0, self.inner[0], self.inner[1], self.inner[2]]) - } - - /// Return the bytes that represent this VNI. The bytes are in - /// network order. - pub fn bytes(&self) -> [u8; 3] { - self.inner - } - - /// Attempt to create a new VNI from any value which can be - /// converted to a `u32`. - /// - /// # Errors - /// - /// Returns an error when the value exceeds the 24-bit maximum. - pub fn new>(val: N) -> Result { - let val = val.into(); - if val > VNI_MAX { - return Err(format!("VNI value exceeds maximum: {}", val)); - } - - let be_bytes = val.to_be_bytes(); - Ok(Vni { inner: [be_bytes[1], be_bytes[2], be_bytes[3]] }) - } -} +pub use ingot::geneve::Vni; #[cfg(test)] mod test { diff --git a/crates/opte-api/src/ip.rs b/crates/opte-api/src/ip.rs index 4e533ac3..b23b05f9 100644 --- a/crates/opte-api/src/ip.rs +++ b/crates/opte-api/src/ip.rs @@ -15,6 +15,7 @@ use core::fmt::Display; use core::ops::Deref; use core::result; use core::str::FromStr; +use ingot::types::NetworkRepr; use serde::Deserialize; use serde::Serialize; @@ -1204,6 +1205,26 @@ impl From for ipnetwork::Ipv6Network { } } +impl NetworkRepr<[u8; 4]> for Ipv4Addr { + fn to_network(self) -> [u8; 4] { + self.inner + } + + fn from_network(val: [u8; 4]) -> Self { + Self { inner: val } + } +} + +impl NetworkRepr<[u8; 16]> for Ipv6Addr { + fn to_network(self) -> [u8; 16] { + self.inner + } + + fn from_network(val: [u8; 16]) -> Self { + Self { inner: val } + } +} + #[cfg(test)] mod test { use super::*; diff --git a/crates/opte-api/src/mac.rs b/crates/opte-api/src/mac.rs index 8a133f4b..36ec8c5e 100644 --- a/crates/opte-api/src/mac.rs +++ b/crates/opte-api/src/mac.rs @@ -11,17 +11,37 @@ use core::fmt; use core::fmt::Debug; use core::fmt::Display; use core::ops::Deref; +use ingot::types::NetworkRepr; use serde::Deserialize; use serde::Serialize; /// A MAC address. #[derive( - Clone, Copy, Default, Deserialize, Eq, Ord, PartialEq, PartialOrd, Serialize, + Clone, + Copy, + Default, + Deserialize, + Eq, + Ord, + PartialEq, + PartialOrd, + Serialize, + Hash, )] pub struct MacAddr { inner: [u8; 6], } +impl NetworkRepr<[u8; 6]> for MacAddr { + fn to_network(self) -> [u8; 6] { + self.inner + } + + fn from_network(val: [u8; 6]) -> Self { + Self { inner: val } + } +} + impl MacAddr { pub const BROADCAST: Self = Self { inner: [0xFF; 6] }; pub const ZERO: Self = Self { inner: [0x00; 6] }; diff --git a/lib/opte/Cargo.toml b/lib/opte/Cargo.toml index b0c8160e..dcc77f47 100644 --- a/lib/opte/Cargo.toml +++ b/lib/opte/Cargo.toml @@ -29,6 +29,7 @@ opte-api.workspace = true ingot.workspace = true +bitflags.workspace = true cfg-if.workspace = true crc32fast = { workspace = true, optional = true } dyn-clone.workspace = true diff --git a/lib/opte/src/engine/arp.rs b/lib/opte/src/engine/arp.rs index d97d03ce..be783bdb 100644 --- a/lib/opte/src/engine/arp.rs +++ b/lib/opte/src/engine/arp.rs @@ -148,8 +148,10 @@ impl ArpEthIpv4 { pub fn emit(&self, dst: &mut [u8]) { debug_assert_eq!(dst.len(), ArpEthIpv4Raw::SIZE); - let mut raw = ArpEthIpv4Raw::new_mut(dst).unwrap(); - raw.write(ArpEthIpv4Raw::from(self)); + // let mut raw = ArpEthIpv4Raw::new_mut(dst).unwrap(); + // raw.write_to(); + + ArpEthIpv4Raw::from(self).write_to(dst).unwrap() } pub fn parse<'a, 'b, R>(rdr: &'b mut R) -> Result diff --git a/lib/opte/src/engine/ether.rs b/lib/opte/src/engine/ether.rs index f97c73f4..bce4842b 100644 --- a/lib/opte/src/engine/ether.rs +++ b/lib/opte/src/engine/ether.rs @@ -242,8 +242,10 @@ impl EtherMeta { #[inline] pub fn emit(&self, dst: &mut [u8]) { debug_assert_eq!(dst.len(), EtherHdrRaw::SIZE); - let mut raw = EtherHdrRaw::new_mut(dst).unwrap(); - raw.write(EtherHdrRaw::from(self)); + // let mut raw = EtherHdrRaw::new_mut(dst).unwrap(); + // raw. .write(EtherHdrRaw::from(self)); + + EtherHdrRaw::from(self).write_to(dst).unwrap() } #[inline] @@ -263,7 +265,7 @@ impl<'a> EtherHdr<'a> { pub const SIZE: usize = EtherHdrRaw::SIZE; pub fn as_bytes(&self) -> &[u8] { - self.bytes.bytes() + self.bytes.as_bytes() } pub fn ether_type(&self) -> EtherType { diff --git a/lib/opte/src/engine/geneve.rs b/lib/opte/src/engine/geneve.rs index 86a41f83..9afa3717 100644 --- a/lib/opte/src/engine/geneve.rs +++ b/lib/opte/src/engine/geneve.rs @@ -95,7 +95,9 @@ impl GeneveMeta { debug_assert_eq!(dst.len(), self.hdr_len_inner()); let (base, remainder) = dst.split_at_mut(GeneveHdrRaw::SIZE); let mut raw = GeneveHdrRaw::new_mut(base).unwrap(); - raw.write(GeneveHdrRaw::from(self)); + Ref::write(&mut raw, GeneveHdrRaw::from(self)); + + // GeneveHdrRaw::from(self).write_to(dst).unwrap(); raw.ver_opt_len = if self.oxide_external_pkt { GeneveOption::Oxide(OxideOption::External).emit(remainder) as u8 diff --git a/lib/opte/src/engine/icmp/mod.rs b/lib/opte/src/engine/icmp/mod.rs index 798832c8..45e41e63 100644 --- a/lib/opte/src/engine/icmp/mod.rs +++ b/lib/opte/src/engine/icmp/mod.rs @@ -156,7 +156,7 @@ impl<'a> IcmpHdr<'a> { pub fn csum_minus_hdr(&self) -> Option { if self.base.csum != [0; 2] { let mut csum = OpteCsum::from(HeaderChecksum::wrap(self.base.csum)); - let bytes = self.base.bytes(); + let bytes = self.base.as_bytes(); csum.sub_bytes(&bytes[..Self::CSUM_BEGIN_OFFSET]); csum.sub_bytes(&bytes[Self::CSUM_END_OFFSET..]); Some(csum) diff --git a/lib/opte/src/engine/ingot_base.rs b/lib/opte/src/engine/ingot_base.rs new file mode 100644 index 00000000..f8d50703 --- /dev/null +++ b/lib/opte/src/engine/ingot_base.rs @@ -0,0 +1,126 @@ +use bitflags::bitflags; +use ingot::choice; +use ingot::ethernet::Ethertype; +use ingot::icmp::IcmpV4; +use ingot::icmp::IcmpV6; +use ingot::icmp::ValidIcmpV4; +use ingot::icmp::ValidIcmpV6; +use ingot::ip::Ecn; +use ingot::ip::IpProtocol; +use ingot::ip::Ipv4Flags; +use ingot::ip::LowRentV6EhRepr; +use ingot::tcp::Tcp; +use ingot::tcp::ValidTcp; +use ingot::types::primitives::*; +use ingot::types::NetworkRepr; +use ingot::types::Packet; +use ingot::types::ParseError; +use ingot::types::Repeated; +use ingot::types::Vec; +use ingot::udp::Udp; +use ingot::udp::ValidUdp; +use ingot::Ingot; +use opte_api::Ipv4Addr; +use opte_api::Ipv6Addr; +use opte_api::MacAddr; +use zerocopy::ByteSlice; + +// Redefine Ethernet and v4/v6 because we have our own, internal, +// types already. + +#[choice(on = Ethertype)] +pub enum L3 { + Ipv4 = Ethertype::IPV4, + Ipv6 = Ethertype::IPV6, +} + +#[choice(on = IpProtocol)] +pub enum Ulp { + Tcp = IpProtocol::TCP, + Udp = IpProtocol::UDP, + IcmpV4 = IpProtocol::ICMP, + IcmpV6 = IpProtocol::ICMP_V6, +} + +#[derive(Copy, Clone, Debug, Eq, Hash, PartialEq, Ingot)] +#[ingot(impl_default)] +pub struct Ethernet { + #[ingot(is = "[u8; 6]")] + pub destination: MacAddr, + #[ingot(is = "[u8; 6]")] + pub source: MacAddr, + #[ingot(is = "u16be", next_layer)] + pub ethertype: Ethertype, +} + +#[derive(Clone, Debug, Eq, Hash, PartialEq, Ingot)] +#[ingot(impl_default)] +pub struct Ipv4 { + #[ingot(default = 4)] + pub version: u4, + #[ingot(default = 5)] + pub ihl: u4, + pub dscp: u6, + #[ingot(is = "u2")] + pub ecn: Ecn, + // #[ingot(payload_len() + packet_len())] + pub total_len: u16be, + + pub identification: u16be, + #[ingot(is = "u3")] + pub flags: Ipv4Flags, + pub fragment_offset: u13be, + + #[ingot(default = 128)] + pub hop_limit: u8, + #[ingot(is = "u8", next_layer)] + pub protocol: IpProtocol, + pub checksum: u16be, + + #[ingot(is = "[u8; 4]", default = Ipv4Addr::ANY_ADDR)] + pub source: Ipv4Addr, + #[ingot(is = "[u8; 4]", default = Ipv4Addr::ANY_ADDR)] + pub destination: Ipv4Addr, + + #[ingot(var_len = "(ihl * 4).saturating_sub(20)")] + pub options: Vec, +} + +#[derive(Debug, Clone, Ingot, Eq, PartialEq)] +#[ingot(impl_default)] +pub struct Ipv6 { + #[ingot(default = "6")] + pub version: u4, + pub dscp: u6, + #[ingot(is = "u2")] + pub ecn: Ecn, + pub flow_label: u20be, + + // #[ingot(payload_len)] + pub payload_len: u16be, + #[ingot(is = "u8", next_layer)] + pub next_header: IpProtocol, + // #[ingot(default = 128)] + pub hop_limit: u8, + + #[ingot(is = "[u8; 16]", default = Ipv6Addr::ANY_ADDR)] + pub source: Ipv6Addr, + #[ingot(is = "[u8; 16]", default = Ipv6Addr::ANY_ADDR)] + pub destination: Ipv6Addr, + + #[ingot(subparse(on_next_layer))] + pub v6ext: Repeated, +} + +// Why TF do I need to redefine these? Check... +impl From> for Packet> { + fn from(value: ValidIpv4) -> Self { + Packet::Raw(value) + } +} + +impl From> for Packet> { + fn from(value: ValidIpv6) -> Self { + Packet::Raw(value) + } +} diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index bff46d93..00e4aa00 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -82,14 +82,18 @@ use ingot::ip::Ipv6; use ingot::ip::Ipv6Mut; use ingot::ip::Ipv6Packet; use ingot::ip::Ipv6Ref; +use ingot::ip::LowRentV6EhRepr; use ingot::ip::ValidIpv6; use ingot::tcp::TcpFlags; use ingot::tcp::TcpMut; use ingot::tcp::TcpPacket; use ingot::tcp::TcpRef; +use ingot::types::primitives::*; +use ingot::types::DirectPacket; use ingot::types::Emit; use ingot::types::Header; use ingot::types::HeaderStack; +use ingot::types::IndirectPacket; use ingot::types::Packet as IngotPacket; use ingot::types::ParseControl; use ingot::types::ParseError as IngotParseErr; @@ -104,6 +108,7 @@ use ingot::udp::UdpRef; use ingot::udp::ValidUdp; use ingot::Parse; use opte_api::Direction; +use opte_api::Ipv4Addr; use opte_api::Ipv6Addr; use opte_api::Vni; use zerocopy::ByteSlice; @@ -472,17 +477,20 @@ pub enum ValidEncapMeta { } pub struct OpteMeta { - pub outer_eth: Option>>, + pub outer_eth: Option>>, // pub outer_eth: Option>>, - pub outer_l3: Option>>, + pub outer_l3: Option>, + // pub outer_l3: Option>>, // pub outer_v6: Option>>, - pub outer_encap: Option>>, + pub outer_encap: Option>>, // pub outer_encap: Option>>, pub inner_eth: EthernetPacket, pub inner_l3: Option>, pub inner_ulp: Option>, } +pub type Test = OpteMeta<&'static [u8]>; + pub type OpteParsed = IngotParsed::Chunk>, T>; impl OpteMeta { @@ -497,42 +505,6 @@ impl OpteMeta { } } -// TODO: make sure both are in ingot, by user choice. -pub enum OwnedPacket { - Repr(O), - Raw(B), -} - -impl Header for OwnedPacket { - const MINIMUM_LENGTH: usize = O::MINIMUM_LENGTH; - - #[inline] - fn packet_length(&self) -> usize { - match self { - OwnedPacket::Repr(o) => o.packet_length(), - OwnedPacket::Raw(b) => b.packet_length(), - } - } -} - -impl Emit for OwnedPacket { - #[inline] - fn emit_raw(&self, buf: V) -> usize { - match self { - OwnedPacket::Repr(o) => o.emit_raw(buf), - OwnedPacket::Raw(b) => b.emit_raw(buf), - } - } - - #[inline] - fn needs_emit(&self) -> bool { - match self { - OwnedPacket::Repr(o) => true, - OwnedPacket::Raw(b) => b.needs_emit(), - } - } -} - struct SizeHoldingEncap<'a> { encapped_len: u16, meta: &'a EncapMeta, @@ -567,7 +539,7 @@ impl<'a> Emit for SizeHoldingEncap<'a> { }, Geneve { protocol_type: Ethertype::ETHERNET, - vni: g.vni.as_u32(), + vni: g.vni, ..Default::default() }, ) @@ -637,16 +609,6 @@ impl Header for ValidEncapMeta { } } -impl From> for OwnedPacket { - #[inline] - fn from(value: ingot::types::Packet) -> Self { - match value { - ingot::types::Packet::Raw(b) => Self::Raw(b), - ingot::types::Packet::Repr(o) => Self::Repr(*o), - } - } -} - impl From> for OpteUnified { #[inline] fn from(value: GeneveOverV6) -> Self { @@ -787,7 +749,7 @@ impl PktBodyWalker { pub struct PacketHeaders { pub(crate) headers: OpteMeta, - initial_lens: OpteUnifiedLengths, + initial_lens: Option>, body: PktBodyWalker, } @@ -809,22 +771,23 @@ impl From> for OpteMeta { #[inline] fn from(value: GeneveOverV6) -> Self { // These are practically all Valid, anyhow. - let outer_encap = match (value.outer_udp, value.outer_encap) { (ingot::types::Packet::Raw(u), ingot::types::Packet::Raw(g)) => { - Some(OwnedPacket::Raw(ValidEncapMeta::Geneve(u, g))) + Some(DirectPacket::Raw(ValidEncapMeta::Geneve(u, g))) } _ => todo!(), }; - let outer_l3 = match value.outer_v6 { - ingot::types::Packet::Repr(v) => { - Some(OwnedPacket::Repr(L3Repr::Ipv6(*v))) - } - ingot::types::Packet::Raw(v) => { - Some(OwnedPacket::Raw(ValidL3::Ipv6(v))) - } - }; + // let outer_l3 = match value.outer_v6 { + // ingot::types::Packet::Repr(v) => { + // Some(DirectPacket::Repr(L3Repr::Ipv6(*v))) + // } + // ingot::types::Packet::Raw(v) => { + // Some(DirectPacket::Raw(ValidL3::Ipv6(v))) + // } + // }; + + let outer_l3 = Some(L3::Ipv6(value.outer_v6)); OpteMeta { outer_eth: Some(value.outer_eth.into()), @@ -881,13 +844,13 @@ pub fn ulp_dst_port(pkt: &Ulp) -> Option { } impl PacketHeaders { - pub fn initial_lens(&self) -> &OpteUnifiedLengths { - &self.initial_lens + pub fn initial_lens(&self) -> Option<&OpteUnifiedLengths> { + self.initial_lens.as_ref().map(|v| &**v) } pub fn outer_ether( &self, - ) -> Option<&OwnedPacket>> { + ) -> Option<&DirectPacket>> { self.headers.outer_eth.as_ref() } @@ -896,10 +859,10 @@ impl PacketHeaders { /// in addition to its VNI. pub fn outer_encap_geneve_vni_and_origin(&self) -> Option<(Vni, bool)> { match &self.headers.outer_encap { - Some(OwnedPacket::Repr(EncapMeta::Geneve(g))) => { + Some(DirectPacket::Repr(EncapMeta::Geneve(g))) => { Some((g.vni, g.oxide_external_pkt)) } - Some(OwnedPacket::Raw(ValidEncapMeta::Geneve(_, g))) => { + Some(DirectPacket::Raw(ValidEncapMeta::Geneve(_, g))) => { // TODO: hack. let oxide_external = g.1.packet_length() != 0; Some((Vni::new(g.vni()).unwrap(), oxide_external)) @@ -911,14 +874,21 @@ impl PacketHeaders { // Again: really need to make Owned/Direct choices better-served by ingot. // this interface sucks. pub fn outer_ip6_addrs(&self) -> Option<(Ipv6Addr, Ipv6Addr)> { + // match &self.headers.outer_l3 { + // Some(DirectPacket::Repr(L3Repr::Ipv6(v6))) => Some(( + // v6.source.octets().into(), + // v6.destination.octets().into(), + // )), + // Some(DirectPacket::Raw(ValidL3::Ipv6(v6))) => { + // Some((v6.source().octets().into(), v6.destination().octets().into())) + // } + // _ => None, + // } match &self.headers.outer_l3 { - Some(OwnedPacket::Repr(L3Repr::Ipv6(v6))) => Some(( - v6.source.octets().into(), - v6.destination.octets().into(), + Some(L3::Ipv6(v6)) => Some(( + v6.source().octets().into(), + v6.destination().octets().into(), )), - Some(OwnedPacket::Raw(ValidL3::Ipv6(v6))) => { - Some((v6.source().octets().into(), v6.source().octets().into())) - } _ => None, } } @@ -1145,23 +1115,16 @@ impl Packet2> { Direction::In => net.parse_inbound(inner)?, }; - let initial_lens = OpteUnifiedLengths { - outer_eth: headers.outer_eth.packet_length(), - outer_l3: headers.outer_l3.packet_length(), - outer_encap: headers.outer_encap.packet_length(), - inner_eth: headers.inner_eth.packet_length(), - inner_l3: headers.inner_l3.packet_length(), - inner_ulp: headers.inner_ulp.packet_length(), - }; + let initial_lens = None; let body = PktBodyWalker { base: Some((last_chunk, data)).into(), slice: Default::default(), }; - let meta = PacketHeaders { headers, initial_lens, body }; + let meta = Box::new(PacketHeaders { headers, initial_lens, body }); - let flow = (&meta).into(); + let flow = (&*meta).into(); let body_csum = match (&meta.headers).inner_eth.ethertype() { Ethertype::ARP => Memoised::Known(None), @@ -1198,6 +1161,21 @@ impl Packet2> { &mut self.state.meta } + pub fn store_lens_for_slopath(&mut self) { + let headers = &self.state.meta.headers; + self.state.meta.initial_lens = Some( + OpteUnifiedLengths { + outer_eth: headers.outer_eth.packet_length(), + outer_l3: headers.outer_l3.packet_length(), + outer_encap: headers.outer_encap.packet_length(), + inner_eth: headers.inner_eth.packet_length(), + inner_l3: headers.inner_l3.packet_length(), + inner_ulp: headers.inner_ulp.packet_length(), + } + .into(), + ); + } + #[inline] /// Convert a packet's metadata into a set of instructions /// needed to serialize all its changes to the wire. @@ -1212,7 +1190,7 @@ impl Packet2> { // - Rewind up to+including that point in original // pkt space. let state = self.state; - let init_lens = state.meta.initial_lens; + let init_lens = state.meta.initial_lens.unwrap(); let headers = state.meta.headers; let payload_len = state.len - init_lens.hdr_len(); let mut encapped_len = payload_len; @@ -1228,7 +1206,7 @@ impl Packet2> { // do this sort of thing. We are so, so far from that... let mut force_serialize = false; - use ingot::types::ToOwnedPacket; + use ingot::types::DirectPacket; match headers.inner_ulp { Some(ulp) => { @@ -1238,7 +1216,7 @@ impl Packet2> { if ulp.needs_emit() || l != init_lens.inner_ulp { let inner = push_spec.inner.get_or_insert_with(Default::default); - // TODO: impl ToOwnedPacket / From<&Ulp> for UlpRepr here? generally seems a bit anaemic. + // TODO: impl DirectPacket / From<&Ulp> for UlpRepr here? generally seems a bit anaemic. inner.ulp = Some(match ulp { Ulp::Tcp(IngotPacket::Repr(t)) => UlpRepr::Tcp(*t), Ulp::Tcp(IngotPacket::Raw(t)) => { @@ -1290,7 +1268,7 @@ impl Packet2> { } L3::Ipv6(IngotPacket::Repr(v6)) => L3Repr::Ipv6(*v6), - // This needs a fuller ToOwnedPacket due to EHs... + // This needs a fuller DirectPacket due to EHs... // We can't actually do structural mods here today using OPTE. L3::Ipv6(IngotPacket::Raw(v6)) => todo!(), // L3Repr::Ipv6((&v6).into()), }); @@ -1323,9 +1301,9 @@ impl Packet2> { || encap.packet_length() != init_lens.outer_encap => { push_spec.outer_encap = Some(match encap { - OwnedPacket::Repr(o) => o, + DirectPacket::Repr(o) => o, // Needed in fullness of time, but not here. - OwnedPacket::Raw(_) => todo!(), + DirectPacket::Raw(_) => todo!(), }); force_serialize = true; @@ -1344,10 +1322,15 @@ impl Packet2> { || l3.needs_emit() || l3.packet_length() != init_lens.outer_l3 => { + // push_spec.outer_ip = Some(match l3 { + // DirectPacket::Repr(o) => o, + // // Needed in fullness of time, but not here. + // DirectPacket::Raw(_) => todo!(), + // }); push_spec.outer_ip = Some(match l3 { - OwnedPacket::Repr(o) => o, - // Needed in fullness of time, but not here. - OwnedPacket::Raw(_) => todo!(), + L3::Ipv6(IndirectPacket::Repr(o)) => L3Repr::Ipv6(*o), + L3::Ipv4(IndirectPacket::Repr(o)) => L3Repr::Ipv4(*o), + _ => todo!(), }); force_serialize = true; @@ -1367,9 +1350,9 @@ impl Packet2> { || eth.packet_length() != init_lens.outer_eth => { push_spec.outer_eth = Some(match eth { - OwnedPacket::Repr(o) => o, + DirectPacket::Repr(o) => o, // Needed in fullness of time, but not here. - OwnedPacket::Raw(_) => todo!(), + DirectPacket::Raw(_) => todo!(), }); force_serialize = true; @@ -1412,7 +1395,7 @@ impl Packet2> { // Given that n_transform layers is 1 or 2, probably won't // save too much by trying to tie to a generation number. // TODO: profile. - self.state.flow = InnerFlowId::from(self.meta()); + // self.state.flow = InnerFlowId::from(self.meta()); Ok(()) } @@ -1576,7 +1559,7 @@ impl Packet2> { csum.add_bytes(&bytes[..]); } IngotPacket::Raw(tcp) => { - csum.add_bytes(tcp.0.bytes()); + csum.add_bytes(tcp.0.as_bytes()); match &tcp.1 { IngotPacket::Repr(opts) => { csum.add_bytes(&*opts); @@ -1598,7 +1581,7 @@ impl Packet2> { csum.add_bytes(&bytes[..]); } IngotPacket::Raw(udp) => { - csum.add_bytes(udp.0.bytes()); + csum.add_bytes(udp.0.as_bytes()); } } udp.set_checksum(csum.finalize()); @@ -1621,7 +1604,7 @@ impl Packet2> { csum.add_bytes(&bytes[..]); } IngotPacket::Raw(ip) => { - csum.add_bytes(ip.0.bytes()); + csum.add_bytes(ip.0.as_bytes()); match &ip.1 { IngotPacket::Repr(opts) => { csum.add_bytes(&*opts); @@ -1682,7 +1665,7 @@ impl PacketState for Parsed2 {} /// computed state. pub struct Parsed2 { len: usize, - meta: PacketHeaders, + meta: Box>, flow: InnerFlowId, body_csum: Memoised>, l4_hash: Memoised, @@ -1690,6 +1673,8 @@ pub struct Parsed2 { inner_csum_dirty: bool, } +type Quack = Parsed2>; + // Needed for now to account for not wanting to redesign ActionDescs // to be generic over T (trait object safety rules, etc.). pub type PacketMeta3<'a> = Parsed2>; @@ -2001,9 +1986,9 @@ impl QueryEcho for IcmpV6Packet { } } -// TODO: generate ref/mut traits on OwnedPacket AND BoxPacket in ingot to halve the code here... +// TODO: generate ref/mut traits on DirectPacket AND BoxPacket in ingot to halve the code here... impl HeaderActionModify - for OwnedPacket> + for DirectPacket> { #[inline] fn run_modify( @@ -2011,7 +1996,7 @@ impl HeaderActionModify mod_spec: &EtherMod, ) -> Result<(), HeaderActionError> { match self { - OwnedPacket::Repr(a) => { + DirectPacket::Repr(a) => { if let Some(src) = mod_spec.src { a.set_source(src.bytes().into()); } @@ -2019,7 +2004,7 @@ impl HeaderActionModify a.set_destination(dst.bytes().into()); } } - OwnedPacket::Raw(a) => { + DirectPacket::Raw(a) => { if let Some(src) = mod_spec.src { a.set_source(src.bytes().into()); } @@ -2050,9 +2035,9 @@ impl HeaderActionModify for EthernetPacket { } } -// TODO: generate ref/mut traits on OwnedPacket AND BoxPacket in ingot to halve the code here... +// TODO: generate ref/mut traits on DirectPacket AND BoxPacket in ingot to halve the code here... impl HeaderActionModify - for OwnedPacket> + for DirectPacket> { #[inline] fn run_modify( @@ -2061,7 +2046,7 @@ impl HeaderActionModify ) -> Result<(), HeaderActionError> { match mod_spec { IpMod::Ip4(mods) => match self { - OwnedPacket::Repr(L3Repr::Ipv4(v4)) => { + DirectPacket::Repr(L3Repr::Ipv4(v4)) => { if let Some(src) = mods.src { >::set_source( v4, @@ -2081,7 +2066,7 @@ impl HeaderActionModify ); } } - OwnedPacket::Raw(ValidL3::Ipv4(v4)) => { + DirectPacket::Raw(ValidL3::Ipv4(v4)) => { if let Some(src) = mods.src { v4.set_source(src.bytes().into()); } @@ -2096,7 +2081,7 @@ impl HeaderActionModify _ => return Err(HeaderActionError::MissingHeader), }, IpMod::Ip6(mods) => match self { - OwnedPacket::Repr(L3Repr::Ipv6(v6)) => { + DirectPacket::Repr(L3Repr::Ipv6(v6)) => { if let Some(src) = mods.src { >::set_source( v6, @@ -2117,7 +2102,7 @@ impl HeaderActionModify ); } } - OwnedPacket::Raw(ValidL3::Ipv6(v6)) => { + DirectPacket::Raw(ValidL3::Ipv6(v6)) => { if let Some(src) = mods.src { v6.set_source(src.bytes().into()); } @@ -2224,7 +2209,7 @@ impl HeaderActionModify for Ulp { } impl HeaderActionModify - for OwnedPacket> + for DirectPacket> { #[inline] fn run_modify( @@ -2233,7 +2218,7 @@ impl HeaderActionModify ) -> Result<(), HeaderActionError> { match (self, mod_spec) { ( - OwnedPacket::Repr(EncapMeta::Geneve(g)), + DirectPacket::Repr(EncapMeta::Geneve(g)), EncapMod::Geneve(mod_spec), ) => { if let Some(vni) = mod_spec.vni { @@ -2241,11 +2226,11 @@ impl HeaderActionModify } } ( - OwnedPacket::Raw(ValidEncapMeta::Geneve(u, g)), + DirectPacket::Raw(ValidEncapMeta::Geneve(u, g)), EncapMod::Geneve(mod_spec), ) => { if let Some(vni) = mod_spec.vni { - g.set_vni(vni.as_u32()); + g.set_vni(vni); } } } @@ -2254,15 +2239,17 @@ impl HeaderActionModify } } -impl HasInnerCksum for OwnedPacket> { +impl HasInnerCksum for DirectPacket> { const HAS_CKSUM: bool = false; } -impl HasInnerCksum for OwnedPacket> { +impl HasInnerCksum for DirectPacket> { const HAS_CKSUM: bool = true; } -impl HasInnerCksum for OwnedPacket> { +impl HasInnerCksum + for DirectPacket> +{ const HAS_CKSUM: bool = false; } @@ -2298,11 +2285,11 @@ impl From } impl From - for OwnedPacket> + for DirectPacket> { #[inline] fn from(value: EtherMeta) -> Self { - OwnedPacket::Repr( + DirectPacket::Repr( Ethernet { destination: value.dst.bytes().into(), source: value.src.bytes().into(), @@ -2323,19 +2310,19 @@ impl From } impl From - for OwnedPacket> + for DirectPacket> { #[inline] fn from(value: EncapMeta) -> Self { - OwnedPacket::Repr(value) + DirectPacket::Repr(value) } } -impl From for OwnedPacket> { +impl From for DirectPacket> { #[inline] fn from(value: IpMeta) -> Self { match value { - IpMeta::Ip4(v4) => OwnedPacket::Repr( + IpMeta::Ip4(v4) => DirectPacket::Repr( Ipv4 { ihl: (v4.hdr_len / 4) as u8, total_len: v4.total_len, @@ -2349,7 +2336,7 @@ impl From for OwnedPacket> { } .into(), ), - IpMeta::Ip6(v6) => OwnedPacket::Repr( + IpMeta::Ip6(v6) => DirectPacket::Repr( Ipv6 { payload_len: v6.pay_len, next_header: IpProtocol(u8::from(v6.next_hdr)), @@ -2405,9 +2392,36 @@ impl From for L3 { // } // } -// impl PushAction>> for IpPush { -// fn push(&self) -> OwnedPacket> { -// OwnedPacket::Repr(match self { +impl PushAction>> + for EtherMeta +{ + #[inline] + fn push(&self) -> DirectPacket> { + DirectPacket::Repr(Ethernet { + destination: self.dst.bytes().into(), + source: self.src.bytes().into(), + ethertype: Ethertype(u16::from(self.ether_type)), + }) + } +} + +impl PushAction> for EtherMeta { + #[inline] + fn push(&self) -> EthernetPacket { + ingot::types::Packet::Repr( + Ethernet { + destination: self.dst.bytes().into(), + source: self.src.bytes().into(), + ethertype: Ethertype(u16::from(self.ether_type)), + } + .into(), + ) + } +} + +// impl PushAction>> for IpPush { +// fn push(&self) -> DirectPacket> { +// DirectPacket::Repr(match self { // IpPush::Ip4(v4) => L3Repr::Ipv4(Ipv4 { // protocol: IpProtocol(u8::from(v4.proto)), // source: v4.src.bytes().into(), @@ -2424,3 +2438,29 @@ impl From for L3 { // }) // } // } + +impl PushAction> for IpPush { + fn push(&self) -> L3 { + match self { + IpPush::Ip4(v4) => L3::Ipv4( + Ipv4 { + protocol: IpProtocol(u8::from(v4.proto)), + source: v4.src.bytes().into(), + destination: v4.dst.bytes().into(), + flags: Ipv4Flags::DONT_FRAGMENT, + ..Default::default() + } + .into(), + ), + IpPush::Ip6(v6) => L3::Ipv6( + Ipv6 { + next_header: IpProtocol(u8::from(v6.proto)), + source: v6.src.bytes().into(), + destination: v6.dst.bytes().into(), + ..Default::default() + } + .into(), + ), + } + } +} diff --git a/lib/opte/src/engine/ip4.rs b/lib/opte/src/engine/ip4.rs index 9611c00a..a5ed1afd 100644 --- a/lib/opte/src/engine/ip4.rs +++ b/lib/opte/src/engine/ip4.rs @@ -230,7 +230,7 @@ impl Ipv4Meta { // The raw header relies on the slice being the exactly length. debug_assert_eq!(dst.len(), Ipv4Hdr::BASE_SIZE); let mut raw = Ipv4HdrRaw::new_mut(dst).unwrap(); - raw.write(Ipv4HdrRaw::from(self)); + Ref::write(&mut raw, Ipv4HdrRaw::from(self)); } /// Return the length of the header needed to emit the metadata. @@ -262,7 +262,7 @@ impl Ipv4Meta { impl<'a> From<&Ipv4Hdr<'a>> for Ipv4Meta { fn from(ip4: &Ipv4Hdr) -> Self { - let raw = ip4.bytes.read(); + let raw = &ip4.bytes; let hdr_len = u16::from((raw.ver_hdr_len & IPV4_HDR_LEN_MASK) * 4); diff --git a/lib/opte/src/engine/mod.rs b/lib/opte/src/engine/mod.rs index e5c0166a..9115b097 100644 --- a/lib/opte/src/engine/mod.rs +++ b/lib/opte/src/engine/mod.rs @@ -39,6 +39,7 @@ pub mod tcp_state; #[macro_use] pub mod udp; +pub mod ingot_base; pub mod ingot_packet; use alloc::string::String; diff --git a/lib/opte/src/engine/port.rs b/lib/opte/src/engine/port.rs index 99be49a0..91515de6 100644 --- a/lib/opte/src/engine/port.rs +++ b/lib/opte/src/engine/port.rs @@ -1207,6 +1207,7 @@ impl Port { mut ameta: ActionMeta, ) -> result::Result { let flow_before = *pkt.flow(); + pkt.store_lens_for_slopath(); // XXX: See remove_rule -- there is a 1-pkt wide TOCTOU here. // This should probably be ordered: // - remove - process diff --git a/lib/opte/src/engine/rule.rs b/lib/opte/src/engine/rule.rs index 08a08a92..906090f0 100644 --- a/lib/opte/src/engine/rule.rs +++ b/lib/opte/src/engine/rule.rs @@ -47,6 +47,10 @@ use core::fmt::Display; use illumos_sys_hdrs::c_char; use illumos_sys_hdrs::uintptr_t; use ingot::ethernet::Ethernet; +use ingot::ethernet::EthernetPacket; +use ingot::ethernet::ValidEthernet; +use ingot::example_chain::L3; +use ingot::types::DirectPacket; use ingot::types::Read; use opte_api::Direction; use serde::Deserialize; @@ -390,11 +394,13 @@ impl HdrTransform { T::Chunk: ByteSliceMut, { self.outer_ether - .act_on_option(&mut meta.headers.outer_eth) + .act_on_option::>, _>( + &mut meta.headers.outer_eth, + ) .map_err(Self::err_fn("outer ether"))?; self.outer_ip - .act_on_option(&mut meta.headers.outer_l3) + .act_on_option::, _>(&mut meta.headers.outer_l3) .map_err(Self::err_fn("outer IP"))?; self.outer_encap @@ -403,14 +409,18 @@ impl HdrTransform { // If I set this up right, we can handle the above w/o panic on a // dumb EtherDrop action... - meta.headers - .inner_eth - .act_on(&self.inner_ether) - .map_err(Self::err_fn("inner eth"))?; + as Transform, _, _>>::act_on( + &mut meta.headers.inner_eth, + &self.inner_ether, + ) + // meta.headers + // .inner_eth + // .act_on::(&self.inner_ether) + .map_err(Self::err_fn("inner eth"))?; let l3_dirty = self .inner_ip - .act_on_option(&mut meta.headers.inner_l3) + .act_on_option::, _>(&mut meta.headers.inner_l3) .map_err(Self::err_fn("inner IP"))?; let ulp_dirty = self diff --git a/lib/opte/src/engine/tcp.rs b/lib/opte/src/engine/tcp.rs index 63fdf169..c165b988 100644 --- a/lib/opte/src/engine/tcp.rs +++ b/lib/opte/src/engine/tcp.rs @@ -124,7 +124,8 @@ impl TcpMeta { debug_assert_eq!(dst.len(), self.hdr_len()); let base = &mut dst[0..TcpHdrRaw::SIZE]; let mut raw = TcpHdrRaw::new_mut(base).unwrap(); - raw.write(TcpHdrRaw::from(self)); + // raw.write_to(TcpHdrRaw::from(self)); + Ref::write(&mut raw, TcpHdrRaw::from(self)); if let Some(bytes) = self.options_bytes { dst[TcpHdr::BASE_SIZE..] .copy_from_slice(&bytes[0..self.options_len]); @@ -153,7 +154,7 @@ impl<'a> From<&TcpHdr<'a>> for TcpMeta { } }; - let raw = tcp.base.read(); + let raw = &tcp.base; Self { src: u16::from_be_bytes(raw.src_port), dst: u16::from_be_bytes(raw.dst_port), @@ -261,7 +262,7 @@ impl<'a> TcpHdr<'a> { } pub fn base_bytes(&self) -> &[u8] { - self.base.bytes() + self.base.as_bytes() } pub fn options_bytes(&self) -> Option<&[u8]> { @@ -284,8 +285,8 @@ impl<'a> TcpHdr<'a> { // bytes themselves as zero; therefore its imperative we do // not include the checksum field bytes when subtracting from // the checksum value. - csum.sub_bytes(&self.base.bytes()[0..Self::CSUM_BEGIN_OFFSET]); - csum.sub_bytes(&self.base.bytes()[Self::CSUM_END_OFFSET..]); + csum.sub_bytes(&self.base.as_bytes()[0..Self::CSUM_BEGIN_OFFSET]); + csum.sub_bytes(&self.base.as_bytes()[Self::CSUM_END_OFFSET..]); if let Some(options) = self.options.as_ref() { csum.sub_bytes(options); diff --git a/lib/opte/src/engine/udp.rs b/lib/opte/src/engine/udp.rs index ff712f09..fe062f51 100644 --- a/lib/opte/src/engine/udp.rs +++ b/lib/opte/src/engine/udp.rs @@ -133,7 +133,7 @@ impl<'a> UdpHdr<'a> { pub const CSUM_END_OFFSET: usize = 8; pub fn bytes(&self) -> &[u8] { - self.base.bytes() + self.base.as_bytes() } pub fn csum_bytes(&self) -> [u8; 2] { @@ -143,7 +143,7 @@ impl<'a> UdpHdr<'a> { pub fn csum_minus_hdr(&self) -> Option { if self.base.csum != [0; 2] { let mut csum = Checksum::from(HeaderChecksum::wrap(self.base.csum)); - csum.sub_bytes(&self.base.bytes()[0..Self::CSUM_BEGIN_OFFSET]); + csum.sub_bytes(&self.base.as_bytes()[0..Self::CSUM_BEGIN_OFFSET]); Some(csum) } else { None diff --git a/lib/oxide-vpc/src/api.rs b/lib/oxide-vpc/src/api.rs index 011908bf..e97b6783 100644 --- a/lib/oxide-vpc/src/api.rs +++ b/lib/oxide-vpc/src/api.rs @@ -837,7 +837,9 @@ impl FromStr for Address { )), Some(("ip", val)) => Ok(Address::Ip(val.parse()?)), Some(("subnet", val)) => Ok(Address::Subnet(val.parse()?)), - Some(("vni", val)) => Ok(Address::Vni(val.parse()?)), + Some(("vni", val)) => { + Ok(Address::Vni(val.parse().map_err(|e| format!("{e:?}"))?)) + } Some((key, _)) => Err(format!("invalid address type: {}", key)), }, } diff --git a/rust-toolchain.toml b/rust-toolchain.toml index 7f466bd2..bbf217f2 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,3 +1,3 @@ [toolchain] -channel = "1.80.1" +channel = "1.81.0" profile = "default" diff --git a/xde/rust-toolchain.toml b/xde/rust-toolchain.toml index fe1a3bfa..e2d73ef6 100644 --- a/xde/rust-toolchain.toml +++ b/xde/rust-toolchain.toml @@ -1,5 +1,5 @@ [toolchain] -channel = "nightly-2024-05-12" +channel = "nightly-2024-09-12" target = "x86_64-unknown-illumos" components = [ "clippy", "rustfmt", "rust-src" ] profile = "minimal" diff --git a/xde/src/xde.rs b/xde/src/xde.rs index baeee682..36991140 100644 --- a/xde/src/xde.rs +++ b/xde/src/xde.rs @@ -2083,9 +2083,9 @@ unsafe fn xde_rx_one( let port = &dev.port; // BEGIN THIN_PROCESS EXPERIMENT - // let h = parsed_pkt.meta().initial_lens(); + // let h = parsed_pkt.meta(); - // let pop_len: usize = h.outer_eth + h.outer_l3 + h.outer_encap; + // let pop_len: usize = 70;//h.outer_ether().packet_length() + h.outer_l3 + h.outer_encap; // if let Ok(decision) = port.thin_process(Direction::In, &mut parsed_pkt) { // match decision { diff --git a/xde/x86_64-unknown-unknown.json b/xde/x86_64-unknown-unknown.json index 4cafc73d..d8c5ead2 100644 --- a/xde/x86_64-unknown-unknown.json +++ b/xde/x86_64-unknown-unknown.json @@ -8,7 +8,7 @@ "eh-frame-header": false, "frame-pointer": "always", "executables": true, - "features": "-mmx,-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-3dnow,-3dnowa,-avx,-avx2,+soft-float", + "features": "-mmx,-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,+soft-float", "has-rpath": true, "is-builtin": false, "is-like-solaris": true, From 89d6845326f6aedeaff4b598a15ea50b028b6394 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Thu, 12 Sep 2024 16:47:05 +0100 Subject: [PATCH 026/115] Cleanup the last tranche of needless intos --- Cargo.lock | 6 +- Cargo.toml | 2 +- lib/opte/Cargo.toml | 5 +- lib/opte/src/engine/dhcpv6/protocol.rs | 4 +- lib/opte/src/engine/icmp/v6.rs | 21 +- lib/opte/src/engine/ingot_base.rs | 40 ++- lib/opte/src/engine/ingot_packet.rs | 189 ++++++--------- lib/opte/src/engine/port.rs | 33 +-- lib/opte/src/engine/predicate.rs | 16 +- lib/opte/src/engine/rule.rs | 8 +- lib/oxide-vpc/src/engine/mod.rs | 2 +- xde/src/xde.rs | 321 ++++++++++++------------- 12 files changed, 304 insertions(+), 343 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6b17da5b..b793be8f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -882,7 +882,7 @@ dependencies = [ [[package]] name = "ingot" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=cc332a8d74438fa27a1d0a0205428b700f31220a#cc332a8d74438fa27a1d0a0205428b700f31220a" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=b79a3039940edca4770c223c99463366c8fba188#b79a3039940edca4770c223c99463366c8fba188" dependencies = [ "bitflags 2.6.0", "ingot-macros", @@ -895,7 +895,7 @@ dependencies = [ [[package]] name = "ingot-macros" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=cc332a8d74438fa27a1d0a0205428b700f31220a#cc332a8d74438fa27a1d0a0205428b700f31220a" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=b79a3039940edca4770c223c99463366c8fba188#b79a3039940edca4770c223c99463366c8fba188" dependencies = [ "darling", "itertools 0.13.0", @@ -909,7 +909,7 @@ dependencies = [ [[package]] name = "ingot-types" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=cc332a8d74438fa27a1d0a0205428b700f31220a#cc332a8d74438fa27a1d0a0205428b700f31220a" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=b79a3039940edca4770c223c99463366c8fba188#b79a3039940edca4770c223c99463366c8fba188" dependencies = [ "heapless", "ingot-macros", diff --git a/Cargo.toml b/Cargo.toml index 24c85a7a..7c37b8c9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,7 +50,7 @@ ctor = "0.2" darling = "0.20" dyn-clone = "1.0" heapless = "0.8" -ingot = { git = "https://github.com/oxidecomputer/ingot.git", rev = "cc332a8d74438fa27a1d0a0205428b700f31220a"} +ingot = { git = "https://github.com/oxidecomputer/ingot.git", rev = "b79a3039940edca4770c223c99463366c8fba188"} ipnetwork = { version = "0.20", default-features = false } itertools = { version = "0.13", default-features = false } libc = "0.2" diff --git a/lib/opte/Cargo.toml b/lib/opte/Cargo.toml index dcc77f47..88c04cf2 100644 --- a/lib/opte/Cargo.toml +++ b/lib/opte/Cargo.toml @@ -7,7 +7,7 @@ license.workspace = true repository.workspace = true [features] -default = ["api", "std"] +default = ["api", "std", "alloc"] api = [] engine = ["api", "dep:crc32fast", "dep:derror-macro", "dep:heapless", "dep:itertools", "dep:zerocopy"] kernel = ["illumos-sys-hdrs/kernel"] @@ -21,6 +21,9 @@ std = ["dep:tabwriter", "opte-api/std"] test-help = [] usdt = ["std", "dep:usdt"] +# I have made a mistake in ingot. +alloc = [] + [dependencies] derror-macro = { workspace = true, optional = true } illumos-sys-hdrs.workspace = true diff --git a/lib/opte/src/engine/dhcpv6/protocol.rs b/lib/opte/src/engine/dhcpv6/protocol.rs index deb6d9a1..5b45f9c2 100644 --- a/lib/opte/src/engine/dhcpv6/protocol.rs +++ b/lib/opte/src/engine/dhcpv6/protocol.rs @@ -25,6 +25,7 @@ use crate::engine::dhcpv6::SERVER_PORT; use crate::engine::ether::EtherHdr; use crate::engine::ether::EtherMeta; use crate::engine::ether::EtherType; +use crate::engine::ingot_base::Ipv6Ref; use crate::engine::ingot_packet::MsgBlk; use crate::engine::ingot_packet::PacketHeaders2; use crate::engine::ip6::Ipv6Hdr; @@ -46,7 +47,6 @@ use alloc::borrow::Cow; use alloc::vec::Vec; use core::fmt; use core::ops::Range; -use ingot::ip::Ipv6Ref; use opte_api::Ipv6Addr; use opte_api::Ipv6Cidr; use opte_api::MacAddr; @@ -625,7 +625,7 @@ fn generate_packet<'a>( src: Ipv6Addr::from_eui64(&action.server_mac), // Safety: We're only here if the predicates match, one of which is // IPv6. - dst: meta.inner_ip6().unwrap().source().octets().into(), + dst: meta.inner_ip6().unwrap().source(), proto: Protocol::UDP, next_hdr: IpProtocol::Udp, pay_len: (UdpHdr::SIZE + msg.buffer_len()) as u16, diff --git a/lib/opte/src/engine/icmp/v6.rs b/lib/opte/src/engine/icmp/v6.rs index 63842611..eb0c9444 100644 --- a/lib/opte/src/engine/icmp/v6.rs +++ b/lib/opte/src/engine/icmp/v6.rs @@ -7,13 +7,13 @@ //! ICMPv6 headers and processing. use super::*; +use crate::engine::ingot_base::Ipv6Ref; use crate::engine::ingot_packet::MsgBlk; use crate::engine::ingot_packet::PacketHeaders2; use crate::engine::ip6::Ipv6Hdr; use crate::engine::ip6::Ipv6Meta; use crate::engine::predicate::Ipv6AddrMatch; use alloc::string::String; -use ingot::ip::Ipv6Ref; use ingot::types::Emit; pub use opte_api::ip::Icmpv6EchoReply; pub use opte_api::ip::Ipv6Addr; @@ -135,8 +135,8 @@ impl HairpinAction for Icmpv6EchoReply { // resulting ICMPv6 echo reply. let (src_ip, dst_ip) = if let Some(metadata) = meta.inner_ip6() { ( - IpAddress::Ipv6(Ipv6Address(metadata.source().octets())), - IpAddress::Ipv6(Ipv6Address(metadata.destination().octets())), + IpAddress::Ipv6(Ipv6Address(metadata.source().bytes())), + IpAddress::Ipv6(Ipv6Address(metadata.destination().bytes())), ) } else { // We got the ICMPv6 metadata above but no IPv6 somehow? @@ -279,8 +279,8 @@ impl HairpinAction for RouterAdvertisement { meta ))); }; - let src_ip = IpAddress::Ipv6(Ipv6Address(ip6.source().octets())); - let dst_ip = IpAddress::Ipv6(Ipv6Address(ip6.destination().octets())); + let src_ip = IpAddress::Ipv6(Ipv6Address(ip6.source().bytes())); + let dst_ip = IpAddress::Ipv6(Ipv6Address(ip6.destination().bytes())); // `Icmpv6Packet` requires the ICMPv6 header and not just the message payload. // Given we successfully got the ICMPv6 metadata, rewinding here is fine. @@ -373,7 +373,7 @@ impl HairpinAction for RouterAdvertisement { let ip = Ipv6Meta { src: *self.ip(), // Safety: We match on this being Some(_) above, so unwrap is safe. - dst: meta.inner_ip6().unwrap().source().octets().into(), + dst: meta.inner_ip6().unwrap().source(), proto: Protocol::ICMPv6, next_hdr: IpProtocol::Icmpv6, // RFC 4861 6.1.2 requires that the hop limit be 255 in an RA. @@ -416,9 +416,8 @@ fn validate_neighbor_solicitation( metadata: &impl Ipv6Ref, ) -> Result { // First, check if this is in fact a NS message. - let smol_src = IpAddress::Ipv6(Ipv6Address(metadata.source().octets())); - let smol_dst = - IpAddress::Ipv6(Ipv6Address(metadata.destination().octets())); + let smol_src = IpAddress::Ipv6(Ipv6Address(metadata.source().bytes())); + let smol_dst = IpAddress::Ipv6(Ipv6Address(metadata.destination().bytes())); let src_pkt = Icmpv6Packet::new_checked(rdr)?; let mut csum = Csum::ignored(); csum.icmpv6 = Checksum::Rx; @@ -464,7 +463,7 @@ fn validate_neighbor_solicitation( // NS is only allowed from the unspecified address if the destination is a // solicited-node multicast address. - if metadata.source().is_unspecified() + if metadata.source() == Ipv6Addr::ANY_ADDR && !Ipv6Addr::from(metadata.destination()).is_solicited_node_multicast() { return Err(GenErr::Unexpected(String::from( @@ -474,7 +473,7 @@ fn validate_neighbor_solicitation( } // Cannot contain Link-Layer address option if from the unspecified address. - if metadata.source().is_unspecified() && has_ll_option { + if metadata.source() == Ipv6Addr::ANY_ADDR && has_ll_option { return Err(GenErr::Unexpected(String::from( "Received NS from UNSPEC, but message contains the \ Link-Layer Address option.", diff --git a/lib/opte/src/engine/ingot_base.rs b/lib/opte/src/engine/ingot_base.rs index f8d50703..d70cb0ca 100644 --- a/lib/opte/src/engine/ingot_base.rs +++ b/lib/opte/src/engine/ingot_base.rs @@ -12,6 +12,7 @@ use ingot::ip::LowRentV6EhRepr; use ingot::tcp::Tcp; use ingot::tcp::ValidTcp; use ingot::types::primitives::*; +use ingot::types::ByteSlice; use ingot::types::NetworkRepr; use ingot::types::Packet; use ingot::types::ParseError; @@ -23,7 +24,6 @@ use ingot::Ingot; use opte_api::Ipv4Addr; use opte_api::Ipv6Addr; use opte_api::MacAddr; -use zerocopy::ByteSlice; // Redefine Ethernet and v4/v6 because we have our own, internal, // types already. @@ -34,6 +34,12 @@ pub enum L3 { Ipv6 = Ethertype::IPV6, } +#[choice(on = IpProtocol)] +pub enum L4 { + Tcp = IpProtocol::TCP, + Udp = IpProtocol::UDP, +} + #[choice(on = IpProtocol)] pub enum Ulp { Tcp = IpProtocol::TCP, @@ -113,14 +119,26 @@ pub struct Ipv6 { } // Why TF do I need to redefine these? Check... -impl From> for Packet> { - fn from(value: ValidIpv4) -> Self { - Packet::Raw(value) - } -} +// impl From for Packet { +// fn from(value: Ipv4) -> Self { +// Packet::Repr(value) +// } +// } -impl From> for Packet> { - fn from(value: ValidIpv6) -> Self { - Packet::Raw(value) - } -} +// impl From> for Packet> { +// fn from(value: ValidIpv4) -> Self { +// Packet::Raw(value) +// } +// } + +// impl From for Packet { +// fn from(value: Ipv6) -> Self { +// Packet::Repr(value) +// } +// } + +// impl From> for Packet> { +// fn from(value: ValidIpv6) -> Self { +// Packet::Raw(value) +// } +// } diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index 00e4aa00..656fbe4e 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -17,6 +17,27 @@ use super::headers::PushAction; use super::headers::UlpMetaModify; use super::headers::UlpMod; use super::icmp::QueryEcho; +use super::ingot_base::Ethernet; +use super::ingot_base::EthernetMut; +use super::ingot_base::EthernetPacket; +use super::ingot_base::EthernetRef; +use super::ingot_base::Ipv4; +use super::ingot_base::Ipv4Mut; +use super::ingot_base::Ipv4Packet; +use super::ingot_base::Ipv4Ref; +use super::ingot_base::Ipv6; +use super::ingot_base::Ipv6Mut; +use super::ingot_base::Ipv6Packet; +use super::ingot_base::Ipv6Ref; +use super::ingot_base::L3Repr; +use super::ingot_base::Ulp; +use super::ingot_base::UlpRepr; +use super::ingot_base::ValidEthernet; +use super::ingot_base::ValidIpv6; +use super::ingot_base::ValidL3; +use super::ingot_base::ValidUlp; +use super::ingot_base::L3; +use super::ingot_base::L4; use super::packet::allocb; use super::packet::AddrPair; use super::packet::BodyTransform; @@ -48,19 +69,7 @@ use core::sync::atomic::AtomicPtr; use illumos_sys_hdrs as ddi; use illumos_sys_hdrs::mblk_t; use illumos_sys_hdrs::uintptr_t; -use ingot::ethernet::Ethernet; -use ingot::ethernet::EthernetMut; -use ingot::ethernet::EthernetPacket; -use ingot::ethernet::EthernetRef; use ingot::ethernet::Ethertype; -use ingot::ethernet::ValidEthernet; -use ingot::example_chain::L3Repr; -use ingot::example_chain::Ulp; -use ingot::example_chain::UlpRepr; -use ingot::example_chain::ValidL3; -use ingot::example_chain::ValidUlp; -use ingot::example_chain::L3; -use ingot::example_chain::L4; use ingot::geneve::Geneve; use ingot::geneve::GeneveMut; use ingot::geneve::GenevePacket; @@ -73,17 +82,8 @@ use ingot::icmp::IcmpV6Mut; use ingot::icmp::IcmpV6Packet; use ingot::icmp::IcmpV6Ref; use ingot::ip::IpProtocol; -use ingot::ip::Ipv4; use ingot::ip::Ipv4Flags; -use ingot::ip::Ipv4Mut; -use ingot::ip::Ipv4Packet; -use ingot::ip::Ipv4Ref; -use ingot::ip::Ipv6; -use ingot::ip::Ipv6Mut; -use ingot::ip::Ipv6Packet; -use ingot::ip::Ipv6Ref; use ingot::ip::LowRentV6EhRepr; -use ingot::ip::ValidIpv6; use ingot::tcp::TcpFlags; use ingot::tcp::TcpMut; use ingot::tcp::TcpPacket; @@ -865,7 +865,7 @@ impl PacketHeaders { Some(DirectPacket::Raw(ValidEncapMeta::Geneve(_, g))) => { // TODO: hack. let oxide_external = g.1.packet_length() != 0; - Some((Vni::new(g.vni()).unwrap(), oxide_external)) + Some((g.vni(), oxide_external)) } None => None, } @@ -874,21 +874,8 @@ impl PacketHeaders { // Again: really need to make Owned/Direct choices better-served by ingot. // this interface sucks. pub fn outer_ip6_addrs(&self) -> Option<(Ipv6Addr, Ipv6Addr)> { - // match &self.headers.outer_l3 { - // Some(DirectPacket::Repr(L3Repr::Ipv6(v6))) => Some(( - // v6.source.octets().into(), - // v6.destination.octets().into(), - // )), - // Some(DirectPacket::Raw(ValidL3::Ipv6(v6))) => { - // Some((v6.source().octets().into(), v6.destination().octets().into())) - // } - // _ => None, - // } match &self.headers.outer_l3 { - Some(L3::Ipv6(v6)) => Some(( - v6.source().octets().into(), - v6.destination().octets().into(), - )), + Some(L3::Ipv6(v6)) => Some((v6.source(), v6.destination())), _ => None, } } @@ -897,11 +884,11 @@ impl PacketHeaders { &self.headers.inner_eth } - pub fn inner_l3(&self) -> Option<&ingot::example_chain::L3> { + pub fn inner_l3(&self) -> Option<&L3> { self.headers.inner_l3.as_ref() } - pub fn inner_ulp(&self) -> Option<&ingot::example_chain::Ulp> { + pub fn inner_ulp(&self) -> Option<&Ulp> { self.headers.inner_ulp.as_ref() } @@ -1006,9 +993,7 @@ impl PacketHeaders { } } -fn actual_src_port( - chunk: &ingot::example_chain::Ulp, -) -> Option { +fn actual_src_port(chunk: &Ulp) -> Option { match chunk { Ulp::Tcp(pkt) => Some(pkt.source()), Ulp::Udp(pkt) => Some(pkt.source()), @@ -1016,9 +1001,7 @@ fn actual_src_port( } } -fn actual_dst_port( - chunk: &ingot::example_chain::Ulp, -) -> Option { +fn actual_dst_port(chunk: &Ulp) -> Option { match chunk { Ulp::Tcp(pkt) => Some(pkt.destination()), Ulp::Udp(pkt) => Some(pkt.destination()), @@ -1026,9 +1009,7 @@ fn actual_dst_port( } } -fn pseudo_port( - chunk: &ingot::example_chain::Ulp, -) -> Option { +fn pseudo_port(chunk: &Ulp) -> Option { match chunk { Ulp::IcmpV4(pkt) if pkt.code() == 0 && (pkt.ty() == 0 || pkt.ty() == 8) => @@ -1050,17 +1031,11 @@ impl From<&PacketHeaders> for InnerFlowId { let (proto, addrs) = match meta.inner_l3() { Some(L3::Ipv4(pkt)) => ( pkt.protocol().0, - AddrPair::V4 { - src: pkt.source().into(), - dst: pkt.destination().into(), - }, + AddrPair::V4 { src: pkt.source(), dst: pkt.destination() }, ), Some(L3::Ipv6(pkt)) => ( pkt.next_header().0, - AddrPair::V6 { - src: pkt.source().into(), - dst: pkt.destination().into(), - }, + AddrPair::V6 { src: pkt.source(), dst: pkt.destination() }, ), None => (255, FLOW_ID_DEFAULT.addrs), }; @@ -1625,8 +1600,8 @@ fn l3_pseudo_header(l3: &L3) -> Checksum { match l3 { L3::Ipv4(v4) => { let mut pseudo_hdr_bytes = [0u8; 12]; - pseudo_hdr_bytes[0..4].copy_from_slice(&v4.source().octets()); - pseudo_hdr_bytes[4..8].copy_from_slice(&v4.destination().octets()); + pseudo_hdr_bytes[0..4].copy_from_slice(v4.source().as_ref()); + pseudo_hdr_bytes[4..8].copy_from_slice(v4.destination().as_ref()); pseudo_hdr_bytes[9] = v4.protocol().0; let ulp_len = v4.total_len() - 4 * (v4.ihl() as u16); pseudo_hdr_bytes[10..].copy_from_slice(&ulp_len.to_be_bytes()); @@ -1635,9 +1610,9 @@ fn l3_pseudo_header(l3: &L3) -> Checksum { } L3::Ipv6(v6) => { let mut pseudo_hdr_bytes = [0u8; 40]; - pseudo_hdr_bytes[0..16].copy_from_slice(&v6.source().octets()); + pseudo_hdr_bytes[0..16].copy_from_slice(&v6.source().as_ref()); pseudo_hdr_bytes[16..32] - .copy_from_slice(&v6.destination().octets()); + .copy_from_slice(&v6.destination().as_ref()); pseudo_hdr_bytes[39] = v6.next_header().0; let ulp_len = v6.payload_len() as u32; pseudo_hdr_bytes[32..36].copy_from_slice(&ulp_len.to_be_bytes()); @@ -1998,18 +1973,18 @@ impl HeaderActionModify match self { DirectPacket::Repr(a) => { if let Some(src) = mod_spec.src { - a.set_source(src.bytes().into()); + a.set_source(src); } if let Some(dst) = mod_spec.dst { - a.set_destination(dst.bytes().into()); + a.set_destination(dst); } } DirectPacket::Raw(a) => { if let Some(src) = mod_spec.src { - a.set_source(src.bytes().into()); + a.set_source(src); } if let Some(dst) = mod_spec.dst { - a.set_destination(dst.bytes().into()); + a.set_destination(dst); } } } @@ -2025,10 +2000,10 @@ impl HeaderActionModify for EthernetPacket { mod_spec: &EtherMod, ) -> Result<(), HeaderActionError> { if let Some(src) = mod_spec.src { - self.set_source(src.bytes().into()); + self.set_source(src); } if let Some(dst) = mod_spec.dst { - self.set_destination(dst.bytes().into()); + self.set_destination(dst); } Ok(()) @@ -2048,19 +2023,13 @@ impl HeaderActionModify IpMod::Ip4(mods) => match self { DirectPacket::Repr(L3Repr::Ipv4(v4)) => { if let Some(src) = mods.src { - >::set_source( - v4, - src.bytes().into(), - ); + >::set_source(v4, src); } if let Some(dst) = mods.dst { - >::set_destination( - v4, - dst.bytes().into(), - ); + >::set_destination(v4, dst); } if let Some(p) = mods.proto { - >::set_protocol( + >::set_protocol( v4, IpProtocol(u8::from(p)), ); @@ -2068,10 +2037,10 @@ impl HeaderActionModify } DirectPacket::Raw(ValidL3::Ipv4(v4)) => { if let Some(src) = mods.src { - v4.set_source(src.bytes().into()); + v4.set_source(src); } if let Some(dst) = mods.dst { - v4.set_destination(dst.bytes().into()); + v4.set_destination(dst); } if let Some(p) = mods.proto { v4.set_protocol(IpProtocol(u8::from(p))); @@ -2083,20 +2052,14 @@ impl HeaderActionModify IpMod::Ip6(mods) => match self { DirectPacket::Repr(L3Repr::Ipv6(v6)) => { if let Some(src) = mods.src { - >::set_source( - v6, - src.bytes().into(), - ); + >::set_source(v6, src); } if let Some(dst) = mods.dst { - >::set_destination( - v6, - dst.bytes().into(), - ); + >::set_destination(v6, dst); } if let Some(p) = mods.proto { // NOTE: I know this is broken for V6EHs - >::set_next_header( + >::set_next_header( v6, IpProtocol(u8::from(p)), ); @@ -2104,10 +2067,10 @@ impl HeaderActionModify } DirectPacket::Raw(ValidL3::Ipv6(v6)) => { if let Some(src) = mods.src { - v6.set_source(src.bytes().into()); + v6.set_source(src); } if let Some(dst) = mods.dst { - v6.set_destination(dst.bytes().into()); + v6.set_destination(dst); } if let Some(p) = mods.proto { // NOTE: I know this is broken for V6EHs @@ -2132,10 +2095,10 @@ impl HeaderActionModify for L3 { match (self, mod_spec) { (L3::Ipv4(v4), IpMod::Ip4(mods)) => { if let Some(src) = mods.src { - v4.set_source(src.bytes().into()); + v4.set_source(src); } if let Some(dst) = mods.dst { - v4.set_destination(dst.bytes().into()); + v4.set_destination(dst); } if let Some(p) = mods.proto { v4.set_protocol(IpProtocol(u8::from(p))); @@ -2144,10 +2107,10 @@ impl HeaderActionModify for L3 { } (L3::Ipv6(v6), IpMod::Ip6(mods)) => { if let Some(src) = mods.src { - v6.set_source(src.bytes().into()); + v6.set_source(src); } if let Some(dst) = mods.dst { - v6.set_destination(dst.bytes().into()); + v6.set_destination(dst); } if let Some(p) = mods.proto { // NOTE: I know this is broken for V6EHs @@ -2269,14 +2232,14 @@ impl HasInnerCksum for Ulp { // need to briefly keep both around while I systematically rewrite the test suite. impl From - for ingot::types::Packet> + for ingot::types::Packet> { #[inline] fn from(value: EtherMeta) -> Self { ingot::types::Packet::Repr( Ethernet { - destination: value.dst.bytes().into(), - source: value.src.bytes().into(), + destination: value.dst, + source: value.src, ethertype: Ethertype(u16::from(value.ether_type)), } .into(), @@ -2285,14 +2248,14 @@ impl From } impl From - for DirectPacket> + for DirectPacket> { #[inline] fn from(value: EtherMeta) -> Self { DirectPacket::Repr( Ethernet { - destination: value.dst.bytes().into(), - source: value.src.bytes().into(), + destination: value.dst, + source: value.src, ethertype: Ethertype(u16::from(value.ether_type)), } .into(), @@ -2329,8 +2292,8 @@ impl From for DirectPacket> { identification: v4.ident, protocol: IpProtocol(u8::from(v4.proto)), checksum: u16::from_be_bytes(v4.csum), - source: v4.src.bytes().into(), - destination: v4.dst.bytes().into(), + source: v4.src, + destination: v4.dst, flags: Ipv4Flags::DONT_FRAGMENT, ..Default::default() } @@ -2341,8 +2304,8 @@ impl From for DirectPacket> { payload_len: v6.pay_len, next_header: IpProtocol(u8::from(v6.next_hdr)), hop_limit: v6.hop_limit, - source: v6.src.bytes().into(), - destination: v6.dst.bytes().into(), + source: v6.src, + destination: v6.dst, v6ext: Repeated::default(), // TODO ..Default::default() } @@ -2363,8 +2326,8 @@ impl From for L3 { identification: v4.ident, protocol: IpProtocol(u8::from(v4.proto)), checksum: u16::from_be_bytes(v4.csum), - source: v4.src.bytes().into(), - destination: v4.dst.bytes().into(), + source: v4.src, + destination: v4.dst, flags: Ipv4Flags::DONT_FRAGMENT, ..Default::default() } @@ -2375,8 +2338,8 @@ impl From for L3 { payload_len: v6.pay_len, next_header: IpProtocol(u8::from(v6.next_hdr)), hop_limit: v6.hop_limit, - source: v6.src.bytes().into(), - destination: v6.dst.bytes().into(), + source: v6.src, + destination: v6.dst, v6ext: Repeated::default(), // TODO ..Default::default() } @@ -2398,8 +2361,8 @@ impl PushAction>> #[inline] fn push(&self) -> DirectPacket> { DirectPacket::Repr(Ethernet { - destination: self.dst.bytes().into(), - source: self.src.bytes().into(), + destination: self.dst, + source: self.src, ethertype: Ethertype(u16::from(self.ether_type)), }) } @@ -2410,8 +2373,8 @@ impl PushAction> for EtherMeta { fn push(&self) -> EthernetPacket { ingot::types::Packet::Repr( Ethernet { - destination: self.dst.bytes().into(), - source: self.src.bytes().into(), + destination: self.dst, + source: self.src, ethertype: Ethertype(u16::from(self.ether_type)), } .into(), @@ -2445,8 +2408,8 @@ impl PushAction> for IpPush { IpPush::Ip4(v4) => L3::Ipv4( Ipv4 { protocol: IpProtocol(u8::from(v4.proto)), - source: v4.src.bytes().into(), - destination: v4.dst.bytes().into(), + source: v4.src, + destination: v4.dst, flags: Ipv4Flags::DONT_FRAGMENT, ..Default::default() } @@ -2455,8 +2418,8 @@ impl PushAction> for IpPush { IpPush::Ip6(v6) => L3::Ipv6( Ipv6 { next_header: IpProtocol(u8::from(v6.proto)), - source: v6.src.bytes().into(), - destination: v6.dst.bytes().into(), + source: v6.src, + destination: v6.dst, ..Default::default() } .into(), diff --git a/lib/opte/src/engine/port.rs b/lib/opte/src/engine/port.rs index 91515de6..fe82498d 100644 --- a/lib/opte/src/engine/port.rs +++ b/lib/opte/src/engine/port.rs @@ -1303,14 +1303,15 @@ impl Port { dir: Direction, pkt: &mut Packet2, ) -> result::Result { - use ingot::ethernet::EthernetMut; - use ingot::example_chain::Ulp; + use super::ingot_base::EthernetMut; + use super::ingot_base::Ipv4Mut; + use super::ingot_base::Ipv6Mut; + use super::ingot_base::Ulp; + use super::ingot_base::L3; use ingot::icmp::IcmpV4Mut; use ingot::icmp::IcmpV4Ref; use ingot::icmp::IcmpV6Mut; use ingot::icmp::IcmpV6Ref; - use ingot::ip::Ipv4Mut; - use ingot::ip::Ipv6Mut; use ingot::tcp::TcpFlags; use ingot::tcp::TcpMut; use ingot::udp::UdpMut; @@ -1369,18 +1370,16 @@ impl Port { } if let HeaderAction::Modify(m) = &xf.inner_ether { if let Some(src) = m.src { - hm.inner_eth.set_source(src.bytes().into()); + hm.inner_eth.set_source(src); } if let Some(dst) = m.dst { - hm.inner_eth.set_destination(dst.bytes().into()); + hm.inner_eth.set_destination(dst); } } if let HeaderAction::Modify(m) = &xf.inner_ip { match m { super::headers::IpMod::Ip4(v4) => { - let Some(ingot::example_chain::L3::Ipv4( - ref mut v4_t, - )) = hm.inner_l3 + let Some(L3::Ipv4(ref mut v4_t)) = hm.inner_l3 else { return Err(ProcessError::FlowTableFull { kind: "()", @@ -1397,9 +1396,7 @@ impl Port { } } super::headers::IpMod::Ip6(v6) => { - let Some(ingot::example_chain::L3::Ipv6( - ref mut v6_t, - )) = hm.inner_l3 + let Some(L3::Ipv6(ref mut v6_t)) = hm.inner_l3 else { return Err(ProcessError::FlowTableFull { kind: "()", @@ -1531,18 +1528,16 @@ impl Port { } if let HeaderAction::Modify(m) = &xf.inner_ether { if let Some(src) = m.src { - hm.inner_eth.set_source(src.bytes().into()); + hm.inner_eth.set_source(src); } if let Some(dst) = m.dst { - hm.inner_eth.set_destination(dst.bytes().into()); + hm.inner_eth.set_destination(dst); } } if let HeaderAction::Modify(m) = &xf.inner_ip { match m { super::headers::IpMod::Ip4(v4) => { - let Some(ingot::example_chain::L3::Ipv4( - ref mut v4_t, - )) = hm.inner_l3 + let Some(L3::Ipv4(ref mut v4_t)) = hm.inner_l3 else { return Err(ProcessError::FlowTableFull { kind: "()", @@ -1559,9 +1554,7 @@ impl Port { } } super::headers::IpMod::Ip6(v6) => { - let Some(ingot::example_chain::L3::Ipv6( - ref mut v6_t, - )) = hm.inner_l3 + let Some(L3::Ipv6(ref mut v6_t)) = hm.inner_l3 else { return Err(ProcessError::FlowTableFull { kind: "()", diff --git a/lib/opte/src/engine/predicate.rs b/lib/opte/src/engine/predicate.rs index 88bb8a98..e640c721 100644 --- a/lib/opte/src/engine/predicate.rs +++ b/lib/opte/src/engine/predicate.rs @@ -12,6 +12,10 @@ use super::ether::EtherType; use super::headers::IpMeta; use super::icmp::v4::MessageType as IcmpMessageType; use super::icmp::v6::MessageType as Icmpv6MessageType; +use super::ingot_base::EthernetRef; +use super::ingot_base::Ipv4Ref; +use super::ingot_base::Ipv6Ref; +use super::ingot_base::L3; use super::ingot_packet::ulp_dst_port; use super::ingot_packet::ulp_src_port; use super::ingot_packet::PacketHeaders; @@ -33,12 +37,8 @@ use alloc::vec::Vec; use core::fmt; use core::fmt::Display; use core::ops::RangeInclusive; -use ingot::ethernet::EthernetRef; -use ingot::example_chain::L3; use ingot::icmp::IcmpV4Ref; use ingot::icmp::IcmpV6Ref; -use ingot::ip::Ipv4Ref; -use ingot::ip::Ipv6Ref; use opte_api::MacAddr; use serde::Deserialize; use serde::Serialize; @@ -388,9 +388,7 @@ impl Predicate { Self::InnerEtherDst(list) => { for m in list { - if m.matches( - meta.inner_ether().destination().into_array().into(), - ) { + if m.matches(meta.inner_ether().destination()) { return true; } } @@ -398,9 +396,7 @@ impl Predicate { Self::InnerEtherSrc(list) => { for m in list { - if m.matches( - meta.inner_ether().source().into_array().into(), - ) { + if m.matches(meta.inner_ether().source()) { return true; } } diff --git a/lib/opte/src/engine/rule.rs b/lib/opte/src/engine/rule.rs index 906090f0..aa1aee05 100644 --- a/lib/opte/src/engine/rule.rs +++ b/lib/opte/src/engine/rule.rs @@ -19,6 +19,10 @@ use super::headers::IpMod; use super::headers::IpPush; use super::headers::Transform; use super::headers::UlpHeaderAction; +use super::ingot_base::Ethernet; +use super::ingot_base::EthernetPacket; +use super::ingot_base::ValidEthernet; +use super::ingot_base::L3; use super::ingot_packet::MsgBlk; use super::ingot_packet::Packet2; use super::ingot_packet::PacketHeaders; @@ -46,10 +50,6 @@ use core::fmt::Debug; use core::fmt::Display; use illumos_sys_hdrs::c_char; use illumos_sys_hdrs::uintptr_t; -use ingot::ethernet::Ethernet; -use ingot::ethernet::EthernetPacket; -use ingot::ethernet::ValidEthernet; -use ingot::example_chain::L3; use ingot::types::DirectPacket; use ingot::types::Read; use opte_api::Direction; diff --git a/lib/oxide-vpc/src/engine/mod.rs b/lib/oxide-vpc/src/engine/mod.rs index f1cdf790..5e3d73c9 100644 --- a/lib/oxide-vpc/src/engine/mod.rs +++ b/lib/oxide-vpc/src/engine/mod.rs @@ -44,8 +44,8 @@ use opte::engine::arp; use opte::engine::arp::ArpEthIpv4; use opte::engine::arp::ArpOp; use opte::engine::ether::ETHER_TYPE_IPV4; +use opte::engine::ingot_base::EthernetRef; use opte::engine::ip4::Ipv4Addr; -use opte::ingot::ethernet::EthernetRef; use opte::ingot::ethernet::Ethertype; use opte::ingot::types::Read; use zerocopy::ByteSliceMut; diff --git a/xde/src/xde.rs b/xde/src/xde.rs index 36991140..0495dd19 100644 --- a/xde/src/xde.rs +++ b/xde/src/xde.rs @@ -44,9 +44,6 @@ use core::ptr::addr_of_mut; use core::time::Duration; use crc32fast::Hasher; use illumos_sys_hdrs::*; -use ingot::ethernet::EthernetMut; -use ingot::ethernet::EthernetRef; -use ingot::ethernet::ValidEthernet; use ingot::geneve::GeneveFlags; use ingot::geneve::GeneveMut; use ingot::geneve::GeneveRef; @@ -82,6 +79,9 @@ use opte::engine::headers::EncapMeta; use opte::engine::headers::EncapPush; use opte::engine::headers::IpAddr; use opte::engine::headers::IpPush; +use opte::engine::ingot_base::EthernetMut; +use opte::engine::ingot_base::EthernetRef; +use opte::engine::ingot_base::ValidEthernet; use opte::engine::ingot_packet::MsgBlk; use opte::engine::ingot_packet::Packet2; use opte::engine::ingot_packet::Parsed2; @@ -1428,8 +1428,7 @@ fn guest_loopback<'a>( vni: Vni, ) -> Option<&'a Box> { use Direction::*; - let ether_dst = - MacAddr::from(pkt.meta().inner_ether().destination().into_array()); + let ether_dst = pkt.meta().inner_ether().destination(); // let devs = unsafe { xde_devs.read() }; let maybe_dest_dev = devs.iter().find(|x| x.vni == vni && x.port.mac_addr() == ether_dst); @@ -1596,134 +1595,125 @@ unsafe fn xde_mc_tx_one(src_dev: &XdeDev, mut pkt: MsgBlk) -> *mut mblk_t { let port = &src_dev.port; // BEGIN THIN_PROCESS RE-EXPERIMENT - // let mut ip6_src = Default::default(); - // let mut ip6_dst = Default::default(); - // let f_hash; - // if let Ok(decision) = port.thin_process(Direction::Out, &mut parsed_pkt) { - // match decision { - // opte::engine::port::ThinProcRes::PushEncap( - // eth, - // ip, - // udp, - // ) => { - // f_hash = parsed_pkt.l4_hash(); - // drop(parsed_pkt); - - // // TODO: generate methods to fill a maybeuninit. - // // total bytes: ETH 14, V6 40, UDP 8, GENEVE 8 - // let new_hdrs = 14 + 40 + 8 + 8; - // let mut new_blk = - // MsgBlk::new_with_headroom(2, new_hdrs); - - // let w_encap_bytes = (pkt_len_old + 16) as u16; - - // new_blk.write(14, |uninit| { - // let complete_eth = - // opte::ingot::ethernet::Ethernet { - // destination: eth.dst.bytes().into(), - // source: eth.src.bytes().into(), - // ethertype: ingot::ethernet::Ethertype( - // eth.ether_type.into(), - // ), - // }; - - // complete_eth - // .emit_uninit(uninit) - // .expect("must be enough room..."); - // }); - - // // we know we'er only pushing v6. - // let IpPush::Ip6(v6) = ip else { panic!() }; - // ip6_src = v6.src; - // ip6_dst = v6.dst; - - // new_blk.write(40, |uninit| { - // let complete_v6 = opte::ingot::ip::Ipv6 { - // version: 6, - // dscp: 0, - // ecn: ingot::ip::Ecn::NotCapable, - // flow_label: 12345678, - // payload_len: w_encap_bytes, - // next_header: ingot::ip::IpProtocol( - // v6.proto.into(), - // ), - // hop_limit: 128, - // source: v6.src.bytes().into(), - // destination: v6.dst.bytes().into(), - // v6ext: vec![].into(), - // }; - - // complete_v6 - // .emit_uninit(uninit) - // .expect("must be enough room..."); - // }); - - // let EncapPush::Geneve(gen) = udp else { panic!() }; - // new_blk.write(16, |uninit| { - // let complete_udp = opte::ingot::udp::Udp { - // source: gen.entropy, - // destination: 6081, - // length: w_encap_bytes, - // checksum: 0, - // }; - // let complete_geneve = opte::ingot::geneve::Geneve { - // version: 0, - // opt_len: 0, - // flags: opte::ingot::geneve::GeneveFlags::empty( - // ), - // protocol_type: - // opte::ingot::ethernet::Ethertype::ETHERNET, - // vni: gen.vni.into(), - // reserved: 0, - // options: Vec::new(), - // }; - - // let len = complete_udp - // .emit_uninit(uninit) - // .expect("must be enough room..."); - // complete_geneve - // .emit_uninit(&mut uninit[len..]) - // .expect("must be enough room..."); - // }); - - // core::mem::swap(&mut new_blk, &mut pkt); - // pkt.extend_if_one(new_blk); - // } - // // we're in Tx for a ULP'd pkt -- this should NEVER happen. - // opte::engine::port::ThinProcRes::PopEncap => unreachable!(), - // opte::engine::port::ThinProcRes::Na => unreachable!(), - // } - - // if ip6_dst == ip6_src { - // // todo. broken just now ig - // // return guest_loopback(src_dev, pkt, vni); - // opte::engine::err!("eh?"); - // return ptr::null_mut(); - // } - - // let my_key = RouteKey { dst: ip6_dst, l4_hash: Some(f_hash) }; - // let Route { src, dst, underlay_dev } = - // src_dev.routes.next_hop(my_key, src_dev); - - // // Get a pointer to the beginning of the outer frame and - // // fill in the dst/src addresses before sending out the - // // device. - // let mblk = pkt.unwrap_mblk(); - // let rptr = (*mblk).b_rptr; - // ptr::copy(dst.as_ptr(), rptr, 6); - // ptr::copy(src.as_ptr(), rptr.add(6), 6); - // // Unwrap: We know the packet is good because we just - // // unwrapped it above. - // let new_pkt = MsgBlk::wrap_mblk(mblk).unwrap(); - - // underlay_dev.stream.tx_drop_on_no_desc2( - // new_pkt, - // hint, - // MacTxFlags::empty(), - // ); - - // return ptr::null_mut(); - // } + let mut ip6_src = Default::default(); + let mut ip6_dst = Default::default(); + let f_hash; + if let Ok(decision) = port.thin_process(Direction::Out, &mut parsed_pkt) { + match decision { + opte::engine::port::ThinProcRes::PushEncap(eth, ip, udp) => { + f_hash = parsed_pkt.l4_hash(); + drop(parsed_pkt); + + // TODO: generate methods to fill a maybeuninit. + // total bytes: ETH 14, V6 40, UDP 8, GENEVE 8 + let new_hdrs = 14 + 40 + 8 + 8; + let mut new_blk = MsgBlk::new_with_headroom(2, new_hdrs); + + let w_encap_bytes = (pkt_len_old + 16) as u16; + + new_blk.write(14, |uninit| { + let complete_eth = opte::ingot::ethernet::Ethernet { + destination: eth.dst.bytes().into(), + source: eth.src.bytes().into(), + ethertype: ingot::ethernet::Ethertype( + eth.ether_type.into(), + ), + }; + + complete_eth + .emit_uninit(uninit) + .expect("must be enough room..."); + }); + + // we know we'er only pushing v6. + let IpPush::Ip6(v6) = ip else { panic!() }; + ip6_src = v6.src; + ip6_dst = v6.dst; + + new_blk.write(40, |uninit| { + let complete_v6 = opte::ingot::ip::Ipv6 { + version: 6, + dscp: 0, + ecn: ingot::ip::Ecn::NotCapable, + flow_label: 12345678, + payload_len: w_encap_bytes, + next_header: ingot::ip::IpProtocol(v6.proto.into()), + hop_limit: 128, + source: v6.src.bytes().into(), + destination: v6.dst.bytes().into(), + v6ext: vec![].into(), + }; + + complete_v6 + .emit_uninit(uninit) + .expect("must be enough room..."); + }); + + let EncapPush::Geneve(gen) = udp else { panic!() }; + new_blk.write(16, |uninit| { + let complete_udp = opte::ingot::udp::Udp { + source: gen.entropy, + destination: 6081, + length: w_encap_bytes, + checksum: 0, + }; + let complete_geneve = opte::ingot::geneve::Geneve { + version: 0, + opt_len: 0, + flags: opte::ingot::geneve::GeneveFlags::empty(), + protocol_type: + opte::ingot::ethernet::Ethertype::ETHERNET, + vni: gen.vni.into(), + reserved: 0, + options: Vec::new(), + }; + + let len = complete_udp + .emit_uninit(uninit) + .expect("must be enough room..."); + complete_geneve + .emit_uninit(&mut uninit[len..]) + .expect("must be enough room..."); + }); + + core::mem::swap(&mut new_blk, &mut pkt); + pkt.extend_if_one(new_blk); + } + // we're in Tx for a ULP'd pkt -- this should NEVER happen. + opte::engine::port::ThinProcRes::PopEncap => unreachable!(), + opte::engine::port::ThinProcRes::Na => unreachable!(), + } + + if ip6_dst == ip6_src { + // todo. broken just now ig + // return guest_loopback(src_dev, pkt, vni); + opte::engine::err!("eh?"); + return ptr::null_mut(); + } + + let my_key = RouteKey { dst: ip6_dst, l4_hash: Some(f_hash) }; + let Route { src, dst, underlay_dev } = + src_dev.routes.next_hop(my_key, src_dev); + + // Get a pointer to the beginning of the outer frame and + // fill in the dst/src addresses before sending out the + // device. + let mblk = pkt.unwrap_mblk(); + let rptr = (*mblk).b_rptr; + ptr::copy(dst.as_ptr(), rptr, 6); + ptr::copy(src.as_ptr(), rptr.add(6), 6); + // Unwrap: We know the packet is good because we just + // unwrapped it above. + let new_pkt = MsgBlk::wrap_mblk(mblk).unwrap(); + + underlay_dev.stream.tx_drop_on_no_desc2( + new_pkt, + hint, + MacTxFlags::empty(), + ); + + return ptr::null_mut(); + } // END THIN_PROCESS RE-EXPERIMENT // The port processing code will fire a probe that describes what @@ -2056,7 +2046,6 @@ unsafe fn xde_rx_one( }; let ether_dst = meta.inner_ether().destination(); - let ether_dst = ether_dst.into_array().into(); // let vni = geneve.vni; // let ether_dst = meta.inner.ether.dst; @@ -2083,38 +2072,38 @@ unsafe fn xde_rx_one( let port = &dev.port; // BEGIN THIN_PROCESS EXPERIMENT - // let h = parsed_pkt.meta(); - - // let pop_len: usize = 70;//h.outer_ether().packet_length() + h.outer_l3 + h.outer_encap; - - // if let Ok(decision) = port.thin_process(Direction::In, &mut parsed_pkt) { - // match decision { - // opte::engine::port::ThinProcRes::PopEncap => { - // let mut to_pop = pop_len; - // drop(parsed_pkt); - // for layer in pkt.iter_mut() { - // let max_drop = layer.len(); - // let will_drop = max_drop.min(to_pop); - // layer.drop_front_bytes(will_drop); - // to_pop -= will_drop; - - // if to_pop == 0 { - // break; - // } - // } - - // // could theoretically have empty segments here. - // // not an issue over NIC for now. - // mac::mac_rx(dev.mh, mrh, pkt.unwrap_mblk()); - // } - // // we know this to be true given how we cfg opte - // opte::engine::port::ThinProcRes::PushEncap(_, _, _) => { - // unreachable!() - // } - // opte::engine::port::ThinProcRes::Na => unreachable!(), - // } - // return; - // } + let h = parsed_pkt.meta(); + + let pop_len: usize = 70; //h.outer_ether().packet_length() + h.outer_l3 + h.outer_encap; + + if let Ok(decision) = port.thin_process(Direction::In, &mut parsed_pkt) { + match decision { + opte::engine::port::ThinProcRes::PopEncap => { + let mut to_pop = pop_len; + drop(parsed_pkt); + for layer in pkt.iter_mut() { + let max_drop = layer.len(); + let will_drop = max_drop.min(to_pop); + layer.drop_front_bytes(will_drop); + to_pop -= will_drop; + + if to_pop == 0 { + break; + } + } + + // could theoretically have empty segments here. + // not an issue over NIC for now. + mac::mac_rx(dev.mh, mrh, pkt.unwrap_mblk()); + } + // we know this to be true given how we cfg opte + opte::engine::port::ThinProcRes::PushEncap(_, _, _) => { + unreachable!() + } + opte::engine::port::ThinProcRes::Na => unreachable!(), + } + return; + } // END THIN_PROCESS EXPERIMENT let res = port.process(Direction::In, &mut parsed_pkt, ActionMeta::new()); From 942cdc1cd8ca3c959101d9640b939209dbf7143a Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Thu, 12 Sep 2024 22:46:42 +0100 Subject: [PATCH 027/115] Take v6 next layer fixes, attempt to chase down bad csum updates. --- Cargo.lock | 6 +++--- Cargo.toml | 2 +- lib/opte/src/engine/ingot_packet.rs | 24 ++++++++++++++++++++---- 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b793be8f..56d8c75e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -882,7 +882,7 @@ dependencies = [ [[package]] name = "ingot" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=b79a3039940edca4770c223c99463366c8fba188#b79a3039940edca4770c223c99463366c8fba188" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=8b6b16d8e7602ff0be4909880f428b1c40f28323#8b6b16d8e7602ff0be4909880f428b1c40f28323" dependencies = [ "bitflags 2.6.0", "ingot-macros", @@ -895,7 +895,7 @@ dependencies = [ [[package]] name = "ingot-macros" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=b79a3039940edca4770c223c99463366c8fba188#b79a3039940edca4770c223c99463366c8fba188" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=8b6b16d8e7602ff0be4909880f428b1c40f28323#8b6b16d8e7602ff0be4909880f428b1c40f28323" dependencies = [ "darling", "itertools 0.13.0", @@ -909,7 +909,7 @@ dependencies = [ [[package]] name = "ingot-types" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=b79a3039940edca4770c223c99463366c8fba188#b79a3039940edca4770c223c99463366c8fba188" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=8b6b16d8e7602ff0be4909880f428b1c40f28323#8b6b16d8e7602ff0be4909880f428b1c40f28323" dependencies = [ "heapless", "ingot-macros", diff --git a/Cargo.toml b/Cargo.toml index 7c37b8c9..e85c57ff 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,7 +50,7 @@ ctor = "0.2" darling = "0.20" dyn-clone = "1.0" heapless = "0.8" -ingot = { git = "https://github.com/oxidecomputer/ingot.git", rev = "b79a3039940edca4770c223c99463366c8fba188"} +ingot = { git = "https://github.com/oxidecomputer/ingot.git", rev = "8b6b16d8e7602ff0be4909880f428b1c40f28323"} ipnetwork = { version = "0.20", default-features = false } itertools = { version = "0.13", default-features = false } libc = "0.2" diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index 656fbe4e..1a485353 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -94,6 +94,7 @@ use ingot::types::Emit; use ingot::types::Header; use ingot::types::HeaderStack; use ingot::types::IndirectPacket; +use ingot::types::NextLayer; use ingot::types::Packet as IngotPacket; use ingot::types::ParseControl; use ingot::types::ParseError as IngotParseErr; @@ -1034,7 +1035,7 @@ impl From<&PacketHeaders> for InnerFlowId { AddrPair::V4 { src: pkt.source(), dst: pkt.destination() }, ), Some(L3::Ipv6(pkt)) => ( - pkt.next_header().0, + pkt.next_layer().unwrap_or_default().0, AddrPair::V6 { src: pkt.source(), dst: pkt.destination() }, ), None => (255, FLOW_ID_DEFAULT.addrs), @@ -1429,7 +1430,7 @@ impl Packet2> { } pub fn body_csum(&mut self) -> Option { - *self.state.body_csum.get(|| { + let out = *self.state.body_csum.get(|| { let use_pseudo = if let Some(v) = self.state.meta.inner_ulp() { !matches!(v, Ulp::IcmpV4(_)) } else { @@ -1463,7 +1464,22 @@ impl Packet2> { } v }) - }) + }); + + let mut manual = Checksum::default(); + if let Some(segs) = self.body_segs() { + for seg in segs { + manual.add_bytes(*seg); + } + + opte::engine::err!( + "think my csum is {:?}, reality is {:?}", + out.map(|mut v| v.finalize()), + manual.finalize() + ); + } + + out } pub fn l4_hash(&mut self) -> u32 { @@ -1613,7 +1629,7 @@ fn l3_pseudo_header(l3: &L3) -> Checksum { pseudo_hdr_bytes[0..16].copy_from_slice(&v6.source().as_ref()); pseudo_hdr_bytes[16..32] .copy_from_slice(&v6.destination().as_ref()); - pseudo_hdr_bytes[39] = v6.next_header().0; + pseudo_hdr_bytes[39] = v6.next_layer().unwrap_or_default().0; let ulp_len = v6.payload_len() as u32; pseudo_hdr_bytes[32..36].copy_from_slice(&ulp_len.to_be_bytes()); Checksum::compute(&pseudo_hdr_bytes) From d1dd000c7cc64c89fd062e0318930c2ef375001e Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Fri, 13 Sep 2024 13:19:25 +0100 Subject: [PATCH 028/115] Some extra constraints, in case we can make Ptr storage work. --- lib/opte/src/engine/ingot_packet.rs | 53 +++-- lib/opte/src/engine/mod.rs | 38 +++- lib/oxide-vpc/src/engine/mod.rs | 22 +- xde/src/xde.rs | 302 ++++++++++++++-------------- 4 files changed, 227 insertions(+), 188 deletions(-) diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index 1a485353..d5bdc0ad 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -661,9 +661,9 @@ impl Drop for PktBodyWalker { impl PktBodyWalker { fn reify_body_segs(&self) where - ::Chunk: ByteSlice, + ::Chunk: ByteSliceMut, { - if let Some((first, mut rest)) = self.base.take() { + if let Some((mut first, mut rest)) = self.base.take() { // SAFETY: ByteSlice requires as part of its API // that any implementors are stable, so we will always // get the same view via deref. We are then consuming them @@ -676,8 +676,8 @@ impl PktBodyWalker { // sourced from an exclusive borrow on something which ownas a [u8]). // This allows us to cast to &mut later, but not here! let mut to_hold = vec![]; - if let Some(ref chunk) = first { - let as_bytes = chunk.deref(); + if let Some(ref mut chunk) = first { + let as_bytes = chunk.deref_mut(); to_hold.push(unsafe { core::mem::transmute(as_bytes) }); } @@ -708,7 +708,7 @@ impl PktBodyWalker { fn body_segs(&self) -> &[&[u8]] where - T::Chunk: ByteSlice, + T::Chunk: ByteSliceMut, { let mut slice_ptr = self.slice.load(core::sync::atomic::Ordering::Relaxed); @@ -939,11 +939,17 @@ impl PacketHeaders { matches!(self.inner_ulp(), Some(Ulp::Tcp(_))) } - pub fn body_segs(&self) -> &[&[u8]] { + pub fn body_segs(&self) -> &[&[u8]] + where + T::Chunk: ByteSliceMut, + { self.body.body_segs() } - pub fn copy_remaining(&self) -> Vec { + pub fn copy_remaining(&self) -> Vec + where + T::Chunk: ByteSliceMut, + { let base = self.body_segs(); let len = base.iter().map(|v| v.len()).sum(); let mut out = Vec::with_capacity(len); @@ -953,7 +959,10 @@ impl PacketHeaders { out } - pub fn append_remaining(&self, buf: &mut Vec) { + pub fn append_remaining(&self, buf: &mut Vec) + where + T::Chunk: ByteSliceMut, + { let base = self.body_segs(); let len = base.iter().map(|v| v.len()).sum(); buf.reserve_exact(len); @@ -1077,7 +1086,10 @@ impl Packet2> { } } -impl Packet2> { +impl<'a, T: Read + 'a> Packet2> +where + T::Chunk: zerocopy::IntoByteSlice<'a>, +{ #[inline] pub fn parse( self, @@ -1402,7 +1414,10 @@ impl Packet2> { } #[inline] - pub fn body_segs(&self) -> Option<&[&[u8]]> { + pub fn body_segs(&self) -> Option<&[&[u8]]> + where + T::Chunk: ByteSliceMut, + { let out = self.state.meta.body_segs(); if out.is_empty() { None @@ -1466,18 +1481,14 @@ impl Packet2> { }) }); - let mut manual = Checksum::default(); - if let Some(segs) = self.body_segs() { - for seg in segs { - manual.add_bytes(*seg); - } + // let mut manual = Checksum::default(); + // if let Some(segs) = self.body_segs() { + // for seg in segs { + // manual.add_bytes(*seg); + // } - opte::engine::err!( - "think my csum is {:?}, reality is {:?}", - out.map(|mut v| v.finalize()), - manual.finalize() - ); - } + // opte::engine::err!("think my csum is {:?}, reality is {:?}", out.map(|mut v| v.finalize()), manual.finalize()); + // } out } diff --git a/lib/opte/src/engine/mod.rs b/lib/opte/src/engine/mod.rs index 9115b097..9b3ef2dd 100644 --- a/lib/opte/src/engine/mod.rs +++ b/lib/opte/src/engine/mod.rs @@ -57,6 +57,7 @@ use ingot_packet::Parsed2; use ingot_packet::ParsedMblk; use ip4::IpError; pub use opte_api::Direction; +use zerocopy::ByteSliceMut; // TODO Currently I'm using this for parsing many different things. It // might be wise to have different parse error types. E.g., one for @@ -283,7 +284,8 @@ pub trait NetworkImpl { uft_out: &FlowTable>, ) -> Result where - T: Read; + T: Read, + T::Chunk: ByteSliceMut; /// Return the parser for this network implementation. fn parser(&self) -> Self::Parser; @@ -298,19 +300,23 @@ pub trait NetworkParser { /// /// An outbound packet is one travelling from the [`port::Port`] /// client to the network. - fn parse_outbound( + fn parse_outbound<'a, T: Read + 'a>( &self, rdr: T, - ) -> Result, ParseError>; + ) -> Result, ParseError> + where + T::Chunk: zerocopy::IntoByteSlice<'a>; /// Parse an inbound packet. /// /// An inbound packet is one traveling from the network to the /// [`port::Port`] client. - fn parse_inbound( + fn parse_inbound<'a, T: Read + 'a>( &self, rdr: T, - ) -> Result, ParseError>; + ) -> Result, ParseError> + where + T::Chunk: zerocopy::IntoByteSlice<'a>; } /// A generic ULP parser, useful for testing inside of the opte crate @@ -320,24 +326,36 @@ pub struct GenericUlp {} impl GenericUlp { /// Parse a generic L2 + L3 + L4 packet, storing the headers in /// the inner position. - fn parse_ulp(&self, rdr: T) -> Result, ParseError> { + fn parse_ulp<'a, T: Read + 'a>( + &self, + rdr: T, + ) -> Result, ParseError> + where + T::Chunk: zerocopy::IntoByteSlice<'a>, + { let v = NoEncap::parse_read(rdr)?; Ok(OpteMeta::convert_ingot(v)) } } impl NetworkParser for GenericUlp { - fn parse_inbound( + fn parse_inbound<'a, T: Read + 'a>( &self, rdr: T, - ) -> Result, ParseError> { + ) -> Result, ParseError> + where + T::Chunk: zerocopy::IntoByteSlice<'a>, + { self.parse_ulp(rdr) } - fn parse_outbound( + fn parse_outbound<'a, T: Read + 'a>( &self, rdr: T, - ) -> Result, ParseError> { + ) -> Result, ParseError> + where + T::Chunk: zerocopy::IntoByteSlice<'a>, + { self.parse_ulp(rdr) } } diff --git a/lib/oxide-vpc/src/engine/mod.rs b/lib/oxide-vpc/src/engine/mod.rs index 5e3d73c9..65463252 100644 --- a/lib/oxide-vpc/src/engine/mod.rs +++ b/lib/oxide-vpc/src/engine/mod.rs @@ -81,7 +81,10 @@ impl VpcNetwork { fn handle_arp_out( &self, pkt: &mut Packet2>, - ) -> Result { + ) -> Result + where + T::Chunk: ByteSliceMut, + { let body = pkt .body_segs() .ok_or_else(|| HdlPktError("outbound ARP (no body)"))?; @@ -114,7 +117,8 @@ impl NetworkImpl for VpcNetwork { _uft_in: &FlowTable>, _uft_out: &FlowTable>, ) -> Result -// where T::Chunk: ByteSliceMut + where + T::Chunk: ByteSliceMut, { match (dir, pkt.meta().inner_ether().ethertype()) { (Direction::Out, Ethertype::ARP) => self.handle_arp_out(pkt), @@ -130,19 +134,25 @@ impl NetworkImpl for VpcNetwork { impl NetworkParser for VpcParser { #[inline] - fn parse_outbound( + fn parse_outbound<'a, T: Read + 'a>( &self, rdr: T, - ) -> Result, ParseError> { + ) -> Result, ParseError> + where + T::Chunk: zerocopy::IntoByteSlice<'a>, + { let v = NoEncap::parse_read(rdr); Ok(OpteMeta::convert_ingot(v?)) } #[inline] - fn parse_inbound( + fn parse_inbound<'a, T: Read + 'a>( &self, rdr: T, - ) -> Result, ParseError> { + ) -> Result, ParseError> + where + T::Chunk: zerocopy::IntoByteSlice<'a>, + { let v = GeneveOverV6::parse_read(rdr); Ok(OpteMeta::convert_ingot(v?)) } diff --git a/xde/src/xde.rs b/xde/src/xde.rs index 0495dd19..125afce6 100644 --- a/xde/src/xde.rs +++ b/xde/src/xde.rs @@ -1595,125 +1595,125 @@ unsafe fn xde_mc_tx_one(src_dev: &XdeDev, mut pkt: MsgBlk) -> *mut mblk_t { let port = &src_dev.port; // BEGIN THIN_PROCESS RE-EXPERIMENT - let mut ip6_src = Default::default(); - let mut ip6_dst = Default::default(); - let f_hash; - if let Ok(decision) = port.thin_process(Direction::Out, &mut parsed_pkt) { - match decision { - opte::engine::port::ThinProcRes::PushEncap(eth, ip, udp) => { - f_hash = parsed_pkt.l4_hash(); - drop(parsed_pkt); - - // TODO: generate methods to fill a maybeuninit. - // total bytes: ETH 14, V6 40, UDP 8, GENEVE 8 - let new_hdrs = 14 + 40 + 8 + 8; - let mut new_blk = MsgBlk::new_with_headroom(2, new_hdrs); - - let w_encap_bytes = (pkt_len_old + 16) as u16; - - new_blk.write(14, |uninit| { - let complete_eth = opte::ingot::ethernet::Ethernet { - destination: eth.dst.bytes().into(), - source: eth.src.bytes().into(), - ethertype: ingot::ethernet::Ethertype( - eth.ether_type.into(), - ), - }; - - complete_eth - .emit_uninit(uninit) - .expect("must be enough room..."); - }); - - // we know we'er only pushing v6. - let IpPush::Ip6(v6) = ip else { panic!() }; - ip6_src = v6.src; - ip6_dst = v6.dst; - - new_blk.write(40, |uninit| { - let complete_v6 = opte::ingot::ip::Ipv6 { - version: 6, - dscp: 0, - ecn: ingot::ip::Ecn::NotCapable, - flow_label: 12345678, - payload_len: w_encap_bytes, - next_header: ingot::ip::IpProtocol(v6.proto.into()), - hop_limit: 128, - source: v6.src.bytes().into(), - destination: v6.dst.bytes().into(), - v6ext: vec![].into(), - }; - - complete_v6 - .emit_uninit(uninit) - .expect("must be enough room..."); - }); - - let EncapPush::Geneve(gen) = udp else { panic!() }; - new_blk.write(16, |uninit| { - let complete_udp = opte::ingot::udp::Udp { - source: gen.entropy, - destination: 6081, - length: w_encap_bytes, - checksum: 0, - }; - let complete_geneve = opte::ingot::geneve::Geneve { - version: 0, - opt_len: 0, - flags: opte::ingot::geneve::GeneveFlags::empty(), - protocol_type: - opte::ingot::ethernet::Ethertype::ETHERNET, - vni: gen.vni.into(), - reserved: 0, - options: Vec::new(), - }; - - let len = complete_udp - .emit_uninit(uninit) - .expect("must be enough room..."); - complete_geneve - .emit_uninit(&mut uninit[len..]) - .expect("must be enough room..."); - }); - - core::mem::swap(&mut new_blk, &mut pkt); - pkt.extend_if_one(new_blk); - } - // we're in Tx for a ULP'd pkt -- this should NEVER happen. - opte::engine::port::ThinProcRes::PopEncap => unreachable!(), - opte::engine::port::ThinProcRes::Na => unreachable!(), - } - - if ip6_dst == ip6_src { - // todo. broken just now ig - // return guest_loopback(src_dev, pkt, vni); - opte::engine::err!("eh?"); - return ptr::null_mut(); - } - - let my_key = RouteKey { dst: ip6_dst, l4_hash: Some(f_hash) }; - let Route { src, dst, underlay_dev } = - src_dev.routes.next_hop(my_key, src_dev); - - // Get a pointer to the beginning of the outer frame and - // fill in the dst/src addresses before sending out the - // device. - let mblk = pkt.unwrap_mblk(); - let rptr = (*mblk).b_rptr; - ptr::copy(dst.as_ptr(), rptr, 6); - ptr::copy(src.as_ptr(), rptr.add(6), 6); - // Unwrap: We know the packet is good because we just - // unwrapped it above. - let new_pkt = MsgBlk::wrap_mblk(mblk).unwrap(); - - underlay_dev.stream.tx_drop_on_no_desc2( - new_pkt, - hint, - MacTxFlags::empty(), - ); - - return ptr::null_mut(); - } + // let mut ip6_src = Default::default(); + // let mut ip6_dst = Default::default(); + // let f_hash; + // if let Ok(decision) = port.thin_process(Direction::Out, &mut parsed_pkt) { + // match decision { + // opte::engine::port::ThinProcRes::PushEncap(eth, ip, udp) => { + // f_hash = parsed_pkt.l4_hash(); + // drop(parsed_pkt); + + // // TODO: generate methods to fill a maybeuninit. + // // total bytes: ETH 14, V6 40, UDP 8, GENEVE 8 + // let new_hdrs = 14 + 40 + 8 + 8; + // let mut new_blk = MsgBlk::new_with_headroom(2, new_hdrs); + + // let w_encap_bytes = (pkt_len_old + 16) as u16; + + // new_blk.write(14, |uninit| { + // let complete_eth = opte::ingot::ethernet::Ethernet { + // destination: eth.dst.bytes().into(), + // source: eth.src.bytes().into(), + // ethertype: ingot::ethernet::Ethertype( + // eth.ether_type.into(), + // ), + // }; + + // complete_eth + // .emit_uninit(uninit) + // .expect("must be enough room..."); + // }); + + // // we know we'er only pushing v6. + // let IpPush::Ip6(v6) = ip else { panic!() }; + // ip6_src = v6.src; + // ip6_dst = v6.dst; + + // new_blk.write(40, |uninit| { + // let complete_v6 = opte::ingot::ip::Ipv6 { + // version: 6, + // dscp: 0, + // ecn: ingot::ip::Ecn::NotCapable, + // flow_label: 12345678, + // payload_len: w_encap_bytes, + // next_header: ingot::ip::IpProtocol(v6.proto.into()), + // hop_limit: 128, + // source: v6.src.bytes().into(), + // destination: v6.dst.bytes().into(), + // v6ext: vec![].into(), + // }; + + // complete_v6 + // .emit_uninit(uninit) + // .expect("must be enough room..."); + // }); + + // let EncapPush::Geneve(gen) = udp else { panic!() }; + // new_blk.write(16, |uninit| { + // let complete_udp = opte::ingot::udp::Udp { + // source: gen.entropy, + // destination: 6081, + // length: w_encap_bytes, + // checksum: 0, + // }; + // let complete_geneve = opte::ingot::geneve::Geneve { + // version: 0, + // opt_len: 0, + // flags: opte::ingot::geneve::GeneveFlags::empty(), + // protocol_type: + // opte::ingot::ethernet::Ethertype::ETHERNET, + // vni: gen.vni.into(), + // reserved: 0, + // options: Vec::new(), + // }; + + // let len = complete_udp + // .emit_uninit(uninit) + // .expect("must be enough room..."); + // complete_geneve + // .emit_uninit(&mut uninit[len..]) + // .expect("must be enough room..."); + // }); + + // core::mem::swap(&mut new_blk, &mut pkt); + // pkt.extend_if_one(new_blk); + // } + // // we're in Tx for a ULP'd pkt -- this should NEVER happen. + // opte::engine::port::ThinProcRes::PopEncap => unreachable!(), + // opte::engine::port::ThinProcRes::Na => unreachable!(), + // } + + // if ip6_dst == ip6_src { + // // todo. broken just now ig + // // return guest_loopback(src_dev, pkt, vni); + // opte::engine::err!("eh?"); + // return ptr::null_mut(); + // } + + // let my_key = RouteKey { dst: ip6_dst, l4_hash: Some(f_hash) }; + // let Route { src, dst, underlay_dev } = + // src_dev.routes.next_hop(my_key, src_dev); + + // // Get a pointer to the beginning of the outer frame and + // // fill in the dst/src addresses before sending out the + // // device. + // let mblk = pkt.unwrap_mblk(); + // let rptr = (*mblk).b_rptr; + // ptr::copy(dst.as_ptr(), rptr, 6); + // ptr::copy(src.as_ptr(), rptr.add(6), 6); + // // Unwrap: We know the packet is good because we just + // // unwrapped it above. + // let new_pkt = MsgBlk::wrap_mblk(mblk).unwrap(); + + // underlay_dev.stream.tx_drop_on_no_desc2( + // new_pkt, + // hint, + // MacTxFlags::empty(), + // ); + + // return ptr::null_mut(); + // } // END THIN_PROCESS RE-EXPERIMENT // The port processing code will fire a probe that describes what @@ -2072,38 +2072,38 @@ unsafe fn xde_rx_one( let port = &dev.port; // BEGIN THIN_PROCESS EXPERIMENT - let h = parsed_pkt.meta(); - - let pop_len: usize = 70; //h.outer_ether().packet_length() + h.outer_l3 + h.outer_encap; - - if let Ok(decision) = port.thin_process(Direction::In, &mut parsed_pkt) { - match decision { - opte::engine::port::ThinProcRes::PopEncap => { - let mut to_pop = pop_len; - drop(parsed_pkt); - for layer in pkt.iter_mut() { - let max_drop = layer.len(); - let will_drop = max_drop.min(to_pop); - layer.drop_front_bytes(will_drop); - to_pop -= will_drop; - - if to_pop == 0 { - break; - } - } - - // could theoretically have empty segments here. - // not an issue over NIC for now. - mac::mac_rx(dev.mh, mrh, pkt.unwrap_mblk()); - } - // we know this to be true given how we cfg opte - opte::engine::port::ThinProcRes::PushEncap(_, _, _) => { - unreachable!() - } - opte::engine::port::ThinProcRes::Na => unreachable!(), - } - return; - } + // let h = parsed_pkt.meta(); + + // let pop_len: usize = 70; //h.outer_ether().packet_length() + h.outer_l3 + h.outer_encap; + + // if let Ok(decision) = port.thin_process(Direction::In, &mut parsed_pkt) { + // match decision { + // opte::engine::port::ThinProcRes::PopEncap => { + // let mut to_pop = pop_len; + // drop(parsed_pkt); + // for layer in pkt.iter_mut() { + // let max_drop = layer.len(); + // let will_drop = max_drop.min(to_pop); + // layer.drop_front_bytes(will_drop); + // to_pop -= will_drop; + + // if to_pop == 0 { + // break; + // } + // } + + // // could theoretically have empty segments here. + // // not an issue over NIC for now. + // mac::mac_rx(dev.mh, mrh, pkt.unwrap_mblk()); + // } + // // we know this to be true given how we cfg opte + // opte::engine::port::ThinProcRes::PushEncap(_, _, _) => { + // unreachable!() + // } + // opte::engine::port::ThinProcRes::Na => unreachable!(), + // } + // return; + // } // END THIN_PROCESS EXPERIMENT let res = port.process(Direction::In, &mut parsed_pkt, ActionMeta::new()); From c6ab7f268d99222fbc84735368df9f8071a63994 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Fri, 13 Sep 2024 22:52:24 +0100 Subject: [PATCH 029/115] Pull in latest ingot -- 2.8Gbps w/o faster path --- Cargo.lock | 6 +++--- Cargo.toml | 2 +- lib/opte/src/engine/ingot_packet.rs | 2 +- lib/opte/src/engine/mod.rs | 10 +++++----- lib/oxide-vpc/src/engine/mod.rs | 4 ++-- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 56d8c75e..68f15fae 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -882,7 +882,7 @@ dependencies = [ [[package]] name = "ingot" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=8b6b16d8e7602ff0be4909880f428b1c40f28323#8b6b16d8e7602ff0be4909880f428b1c40f28323" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=488363618f4b85f89475546db029f1c4e6b27ce7#488363618f4b85f89475546db029f1c4e6b27ce7" dependencies = [ "bitflags 2.6.0", "ingot-macros", @@ -895,7 +895,7 @@ dependencies = [ [[package]] name = "ingot-macros" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=8b6b16d8e7602ff0be4909880f428b1c40f28323#8b6b16d8e7602ff0be4909880f428b1c40f28323" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=488363618f4b85f89475546db029f1c4e6b27ce7#488363618f4b85f89475546db029f1c4e6b27ce7" dependencies = [ "darling", "itertools 0.13.0", @@ -909,7 +909,7 @@ dependencies = [ [[package]] name = "ingot-types" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=8b6b16d8e7602ff0be4909880f428b1c40f28323#8b6b16d8e7602ff0be4909880f428b1c40f28323" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=488363618f4b85f89475546db029f1c4e6b27ce7#488363618f4b85f89475546db029f1c4e6b27ce7" dependencies = [ "heapless", "ingot-macros", diff --git a/Cargo.toml b/Cargo.toml index e85c57ff..1860e968 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,7 +50,7 @@ ctor = "0.2" darling = "0.20" dyn-clone = "1.0" heapless = "0.8" -ingot = { git = "https://github.com/oxidecomputer/ingot.git", rev = "8b6b16d8e7602ff0be4909880f428b1c40f28323"} +ingot = { git = "https://github.com/oxidecomputer/ingot.git", rev = "488363618f4b85f89475546db029f1c4e6b27ce7"} ipnetwork = { version = "0.20", default-features = false } itertools = { version = "0.13", default-features = false } libc = "0.2" diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index d5bdc0ad..81b9af3b 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -1088,7 +1088,7 @@ impl Packet2> { impl<'a, T: Read + 'a> Packet2> where - T::Chunk: zerocopy::IntoByteSlice<'a>, + T::Chunk: ingot::types::IntoBufPointer<'a>, { #[inline] pub fn parse( diff --git a/lib/opte/src/engine/mod.rs b/lib/opte/src/engine/mod.rs index 9b3ef2dd..03b7694c 100644 --- a/lib/opte/src/engine/mod.rs +++ b/lib/opte/src/engine/mod.rs @@ -305,7 +305,7 @@ pub trait NetworkParser { rdr: T, ) -> Result, ParseError> where - T::Chunk: zerocopy::IntoByteSlice<'a>; + T::Chunk: ingot::types::IntoBufPointer<'a>; /// Parse an inbound packet. /// @@ -316,7 +316,7 @@ pub trait NetworkParser { rdr: T, ) -> Result, ParseError> where - T::Chunk: zerocopy::IntoByteSlice<'a>; + T::Chunk: ingot::types::IntoBufPointer<'a>; } /// A generic ULP parser, useful for testing inside of the opte crate @@ -331,7 +331,7 @@ impl GenericUlp { rdr: T, ) -> Result, ParseError> where - T::Chunk: zerocopy::IntoByteSlice<'a>, + T::Chunk: ingot::types::IntoBufPointer<'a>, { let v = NoEncap::parse_read(rdr)?; Ok(OpteMeta::convert_ingot(v)) @@ -344,7 +344,7 @@ impl NetworkParser for GenericUlp { rdr: T, ) -> Result, ParseError> where - T::Chunk: zerocopy::IntoByteSlice<'a>, + T::Chunk: ingot::types::IntoBufPointer<'a>, { self.parse_ulp(rdr) } @@ -354,7 +354,7 @@ impl NetworkParser for GenericUlp { rdr: T, ) -> Result, ParseError> where - T::Chunk: zerocopy::IntoByteSlice<'a>, + T::Chunk: ingot::types::IntoBufPointer<'a>, { self.parse_ulp(rdr) } diff --git a/lib/oxide-vpc/src/engine/mod.rs b/lib/oxide-vpc/src/engine/mod.rs index 65463252..94cbef9c 100644 --- a/lib/oxide-vpc/src/engine/mod.rs +++ b/lib/oxide-vpc/src/engine/mod.rs @@ -139,7 +139,7 @@ impl NetworkParser for VpcParser { rdr: T, ) -> Result, ParseError> where - T::Chunk: zerocopy::IntoByteSlice<'a>, + T::Chunk: opte::ingot::types::IntoBufPointer<'a>, { let v = NoEncap::parse_read(rdr); Ok(OpteMeta::convert_ingot(v?)) @@ -151,7 +151,7 @@ impl NetworkParser for VpcParser { rdr: T, ) -> Result, ParseError> where - T::Chunk: zerocopy::IntoByteSlice<'a>, + T::Chunk: opte::ingot::types::IntoBufPointer<'a>, { let v = GeneveOverV6::parse_read(rdr); Ok(OpteMeta::convert_ingot(v?)) From 7123158171e18fc3e5227714a9297cded8e36312 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 25 Sep 2024 12:11:57 +0100 Subject: [PATCH 030/115] Bump up to latest. --- Cargo.lock | 24 ++++++++++++------------ Cargo.toml | 4 ++-- lib/opte/src/engine/ingot_packet.rs | 2 +- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 68f15fae..da89f551 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -882,20 +882,20 @@ dependencies = [ [[package]] name = "ingot" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=488363618f4b85f89475546db029f1c4e6b27ce7#488363618f4b85f89475546db029f1c4e6b27ce7" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=facb123e8b5c0dc08fd74c1c8f4945b82be2c91c#facb123e8b5c0dc08fd74c1c8f4945b82be2c91c" dependencies = [ "bitflags 2.6.0", "ingot-macros", "ingot-types", "macaddr", "serde", - "zerocopy 0.8.0-alpha.21", + "zerocopy 0.8.0-alpha.25", ] [[package]] name = "ingot-macros" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=488363618f4b85f89475546db029f1c4e6b27ce7#488363618f4b85f89475546db029f1c4e6b27ce7" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=facb123e8b5c0dc08fd74c1c8f4945b82be2c91c#facb123e8b5c0dc08fd74c1c8f4945b82be2c91c" dependencies = [ "darling", "itertools 0.13.0", @@ -909,12 +909,12 @@ dependencies = [ [[package]] name = "ingot-types" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=488363618f4b85f89475546db029f1c4e6b27ce7#488363618f4b85f89475546db029f1c4e6b27ce7" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=facb123e8b5c0dc08fd74c1c8f4945b82be2c91c#facb123e8b5c0dc08fd74c1c8f4945b82be2c91c" dependencies = [ "heapless", "ingot-macros", "macaddr", - "zerocopy 0.8.0-alpha.21", + "zerocopy 0.8.0-alpha.25", ] [[package]] @@ -1273,7 +1273,7 @@ dependencies = [ "tabwriter", "usdt", "version_check", - "zerocopy 0.8.0-alpha.21", + "zerocopy 0.8.0-alpha.25", ] [[package]] @@ -1375,7 +1375,7 @@ dependencies = [ "smoltcp", "tabwriter", "usdt", - "zerocopy 0.8.0-alpha.21", + "zerocopy 0.8.0-alpha.25", ] [[package]] @@ -2747,11 +2747,11 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.0-alpha.21" +version = "0.8.0-alpha.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b945226be416f7fcacff01ad61474f192b3f789a3ceee54d48cb1e66d929e449" +checksum = "ade16fe7106200b0c121a3251c106c40ffcdecdab68122de5909643d22db075e" dependencies = [ - "zerocopy-derive 0.8.0-alpha.21", + "zerocopy-derive 0.8.0-alpha.25", ] [[package]] @@ -2767,9 +2767,9 @@ dependencies = [ [[package]] name = "zerocopy-derive" -version = "0.8.0-alpha.21" +version = "0.8.0-alpha.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cd62f40c5831a236cc3750ce94e668c06d68af2579c1703b1d4e769eeb8e646" +checksum = "7873cce5410d6ff897beb4b4847366c1013fcda5ec96387a74fa4e0d2580025b" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 1860e968..2936ed05 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,7 +50,7 @@ ctor = "0.2" darling = "0.20" dyn-clone = "1.0" heapless = "0.8" -ingot = { git = "https://github.com/oxidecomputer/ingot.git", rev = "488363618f4b85f89475546db029f1c4e6b27ce7"} +ingot = { git = "https://github.com/oxidecomputer/ingot.git", rev = "facb123e8b5c0dc08fd74c1c8f4945b82be2c91c"} ipnetwork = { version = "0.20", default-features = false } itertools = { version = "0.13", default-features = false } libc = "0.2" @@ -75,7 +75,7 @@ thiserror = "1.0" toml = "0.8" usdt = "0.5" version_check = "0.9" -zerocopy = { version = "0.8.0-alpha.21", features = ["derive"] } +zerocopy = { version = "0.8.0-alpha.25", features = ["derive"] } zone = { git = "https://github.com/oxidecomputer/zone" } ztest = { git = "https://github.com/oxidecomputer/falcon", branch = "main" } poptrie = { git = "https://github.com/oxidecomputer/poptrie", branch = "multipath" } diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index 81b9af3b..6826152b 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -133,7 +133,7 @@ pub struct GeneveOverV6 { } #[inline] -fn exit_on_arp(eth: &EthernetPacket) -> ParseControl { +fn exit_on_arp(eth: &ValidEthernet) -> ParseControl { if eth.ethertype() == Ethertype::ARP { ParseControl::Accept } else { From 42e160430927b83006c08acb9e683774eef0711e Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 25 Sep 2024 13:29:07 +0100 Subject: [PATCH 031/115] Early plumbing for stage1/light meta --- lib/opte/src/engine/ingot_packet.rs | 150 +++++++++++++++++++++++++++- lib/opte/src/engine/mod.rs | 21 ++++ lib/oxide-vpc/src/engine/mod.rs | 6 ++ 3 files changed, 175 insertions(+), 2 deletions(-) diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index 6826152b..9350e046 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -50,6 +50,8 @@ use super::packet::ParseError; use super::packet::FLOW_ID_DEFAULT; use super::rule::HdrTransform; use super::rule::HdrTransformError; +use super::FlowKey; +use super::LightweightMeta; use super::NetworkParser; use alloc::boxed::Box; use alloc::sync::Arc; @@ -126,9 +128,7 @@ pub struct GeneveOverV6 { pub outer_encap: GenevePacket, pub inner_eth: EthernetPacket, - // pub inner_l3: L3, pub inner_l3: L3, - // pub inner_ulp: L4, pub inner_ulp: Ulp, } @@ -149,6 +149,118 @@ pub struct NoEncap { pub inner_ulp: Option>, } +// impl<'a, T: ByteSlice> From<&'a NoEncap> for InnerFlowId { +// #[inline] +// fn from(meta: &NoEncap) -> Self { +// let (proto, addrs) = match &meta.inner_l3 { +// Some(L3::Ipv4(pkt)) => ( +// pkt.protocol().0, +// AddrPair::V4 { src: pkt.source(), dst: pkt.destination() }, +// ), +// Some(L3::Ipv6(pkt)) => ( +// pkt.next_layer().unwrap_or_default().0, +// AddrPair::V6 { src: pkt.source(), dst: pkt.destination() }, +// ), +// None => (255, FLOW_ID_DEFAULT.addrs), +// }; + +// let (src_port, dst_port) = meta +// .inner_ulp +// .as_ref() +// .map(|ulp| { +// ( +// actual_src_port(ulp) +// .or_else(|| pseudo_port(ulp)) +// .unwrap_or(0), +// actual_dst_port(ulp) +// .or_else(|| pseudo_port(ulp)) +// .unwrap_or(0), +// ) +// }) +// .unwrap_or((0, 0)); + +// InnerFlowId { proto: proto.into(), addrs, src_port, dst_port } +// } +// } + +impl FlowKey for ValidNoEncap { + #[inline] + fn flow(&self) -> InnerFlowId { + let (proto, addrs) = match &self.inner_l3 { + Some(ValidL3::Ipv4(pkt)) => ( + pkt.protocol().0, + AddrPair::V4 { src: pkt.source(), dst: pkt.destination() }, + ), + Some(ValidL3::Ipv6(pkt)) => ( + pkt.next_layer().unwrap_or_default().0, + AddrPair::V6 { src: pkt.source(), dst: pkt.destination() }, + ), + None => (255, FLOW_ID_DEFAULT.addrs), + }; + + let (src_port, dst_port) = self + .inner_ulp + .as_ref() + .map(|ulp| { + ( + actual_src_port_v(ulp) + .or_else(|| pseudo_port_v(ulp)) + .unwrap_or(0), + actual_dst_port_v(ulp) + .or_else(|| pseudo_port_v(ulp)) + .unwrap_or(0), + ) + }) + .unwrap_or((0, 0)); + + InnerFlowId { proto: proto.into(), addrs, src_port, dst_port } + } +} + +impl From> for OpteMeta { + #[inline] + fn from(value: ValidNoEncap) -> Self { + NoEncap::from(value).into() + } +} + +impl LightweightMeta for ValidNoEncap {} + +impl FlowKey for ValidGeneveOverV6 { + #[inline] + fn flow(&self) -> InnerFlowId { + let (proto, addrs) = match &self.inner_l3 { + ValidL3::Ipv4(pkt) => ( + pkt.protocol().0, + AddrPair::V4 { src: pkt.source(), dst: pkt.destination() }, + ), + ValidL3::Ipv6(pkt) => ( + pkt.next_layer().unwrap_or_default().0, + AddrPair::V6 { src: pkt.source(), dst: pkt.destination() }, + ), + }; + + let src_port = actual_src_port_v(&self.inner_ulp) + .or_else(|| pseudo_port_v(&self.inner_ulp)) + .unwrap_or(0); + + let dst_port = actual_dst_port_v(&self.inner_ulp) + .or_else(|| pseudo_port_v(&self.inner_ulp)) + .unwrap_or(0); + + InnerFlowId { proto: proto.into(), addrs, src_port, dst_port } + } +} + +impl From> for OpteMeta { + #[inline] + fn from(value: ValidGeneveOverV6) -> Self { + GeneveOverV6::from(value).into() + } +} + +impl LightweightMeta for ValidGeneveOverV6 {} + // --- REWRITE IN PROGRESS --- #[derive(Debug)] pub struct MsgBlk { @@ -491,6 +603,8 @@ pub struct OpteMeta { } pub type Test = OpteMeta<&'static [u8]>; +pub type Test2 = ValidNoEncap<&'static [u8]>; +pub type Test3 = ValidGeneveOverV6<&'static [u8]>; pub type OpteParsed = IngotParsed::Chunk>, T>; @@ -1011,6 +1125,14 @@ fn actual_src_port(chunk: &Ulp) -> Option { } } +fn actual_src_port_v(chunk: &ValidUlp) -> Option { + match chunk { + ValidUlp::Tcp(pkt) => Some(pkt.source()), + ValidUlp::Udp(pkt) => Some(pkt.source()), + _ => None, + } +} + fn actual_dst_port(chunk: &Ulp) -> Option { match chunk { Ulp::Tcp(pkt) => Some(pkt.destination()), @@ -1019,6 +1141,14 @@ fn actual_dst_port(chunk: &Ulp) -> Option { } } +fn actual_dst_port_v(chunk: &ValidUlp) -> Option { + match chunk { + ValidUlp::Tcp(pkt) => Some(pkt.destination()), + ValidUlp::Udp(pkt) => Some(pkt.destination()), + _ => None, + } +} + fn pseudo_port(chunk: &Ulp) -> Option { match chunk { Ulp::IcmpV4(pkt) @@ -1035,6 +1165,22 @@ fn pseudo_port(chunk: &Ulp) -> Option { } } +fn pseudo_port_v(chunk: &ValidUlp) -> Option { + match chunk { + ValidUlp::IcmpV4(pkt) + if pkt.code() == 0 && (pkt.ty() == 0 || pkt.ty() == 8) => + { + Some(u16::from_be_bytes(pkt.rest_of_hdr()[..2].try_into().unwrap())) + } + ValidUlp::IcmpV6(pkt) + if pkt.code() == 0 && (pkt.ty() == 128 || pkt.ty() == 129) => + { + Some(u16::from_be_bytes(pkt.rest_of_hdr()[..2].try_into().unwrap())) + } + _ => None, + } +} + impl From<&PacketHeaders> for InnerFlowId { #[inline] fn from(meta: &PacketHeaders) -> Self { diff --git a/lib/opte/src/engine/mod.rs b/lib/opte/src/engine/mod.rs index 03b7694c..3c519b0f 100644 --- a/lib/opte/src/engine/mod.rs +++ b/lib/opte/src/engine/mod.rs @@ -55,8 +55,10 @@ use ingot_packet::Packet2; use ingot_packet::PacketHeaders; use ingot_packet::Parsed2; use ingot_packet::ParsedMblk; +use ingot_packet::ValidNoEncap; use ip4::IpError; pub use opte_api::Direction; +use zerocopy::ByteSlice; use zerocopy::ByteSliceMut; // TODO Currently I'm using this for parsing many different things. It @@ -296,6 +298,9 @@ pub trait NetworkImpl { /// This provides parsing for inbound/outbound packets for a given /// [`NetworkImpl`]. pub trait NetworkParser { + type InMeta: LightweightMeta; + type OutMeta: LightweightMeta; + /// Parse an outbound packet. /// /// An outbound packet is one travelling from the [`port::Port`] @@ -319,6 +324,19 @@ pub trait NetworkParser { T::Chunk: ingot::types::IntoBufPointer<'a>; } +/// Header formats which allow a flow ID to be read out, and which can be converted +/// into the shared `OpteMeta` format. +pub trait LightweightMeta: Into> + FlowKey {} + +// This is a separate trait since `where for<'a> &'a Self: Into` +// had *awful* ergonomics around that bound's propagation. +/// Headers which allow a flow ID to be read out. +pub trait FlowKey { + /// Return the flow ID (5-tuple, or other composite key) which + /// identifies this packet's parent flow. + fn flow(&self) -> InnerFlowId; +} + /// A generic ULP parser, useful for testing inside of the opte crate /// itself. pub struct GenericUlp {} @@ -339,6 +357,9 @@ impl GenericUlp { } impl NetworkParser for GenericUlp { + type InMeta = ValidNoEncap; + type OutMeta = ValidNoEncap; + fn parse_inbound<'a, T: Read + 'a>( &self, rdr: T, diff --git a/lib/oxide-vpc/src/engine/mod.rs b/lib/oxide-vpc/src/engine/mod.rs index 94cbef9c..83594e9f 100644 --- a/lib/oxide-vpc/src/engine/mod.rs +++ b/lib/oxide-vpc/src/engine/mod.rs @@ -23,6 +23,8 @@ use opte::engine::ingot_packet::OpteMeta; use opte::engine::ingot_packet::OpteParsed; use opte::engine::ingot_packet::Packet2; use opte::engine::ingot_packet::Parsed2; +use opte::engine::ingot_packet::ValidGeneveOverV6; +use opte::engine::ingot_packet::ValidNoEncap; use opte::engine::ip4::Protocol; use opte::engine::packet::HeaderOffsets; use opte::engine::packet::InnerFlowId; @@ -48,6 +50,7 @@ use opte::engine::ingot_base::EthernetRef; use opte::engine::ip4::Ipv4Addr; use opte::ingot::ethernet::Ethertype; use opte::ingot::types::Read; +use zerocopy::ByteSlice; use zerocopy::ByteSliceMut; #[derive(Clone, Copy, Debug, Default)] @@ -133,6 +136,9 @@ impl NetworkImpl for VpcNetwork { } impl NetworkParser for VpcParser { + type InMeta = ValidGeneveOverV6; + type OutMeta = ValidNoEncap; + #[inline] fn parse_outbound<'a, T: Read + 'a>( &self, From 88f2377283c3b7f0faf52640609976cedfbb8c9f Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Thu, 26 Sep 2024 19:31:44 +0100 Subject: [PATCH 032/115] Sketching out the fasterpath. Not yet working. --- lib/opte/src/engine/ingot_packet.rs | 486 +++++++++++++++++++--------- lib/opte/src/engine/mod.rs | 36 ++- lib/opte/src/engine/port.rs | 330 ++++++++++++++----- lib/opte/src/engine/rule.rs | 25 +- 4 files changed, 616 insertions(+), 261 deletions(-) diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index 9350e046..318f83ae 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -48,9 +48,9 @@ use super::packet::Packet; use super::packet::PacketState; use super::packet::ParseError; use super::packet::FLOW_ID_DEFAULT; +use super::rule::CompiledTransform; use super::rule::HdrTransform; use super::rule::HdrTransformError; -use super::FlowKey; use super::LightweightMeta; use super::NetworkParser; use alloc::boxed::Box; @@ -62,6 +62,7 @@ use core::hash::Hash; use core::marker::PhantomData; use core::mem::ManuallyDrop; use core::mem::MaybeUninit; +use core::num::NonZeroU32; use core::ops::Deref; use core::ops::DerefMut; use core::ptr::NonNull; @@ -149,41 +150,14 @@ pub struct NoEncap { pub inner_ulp: Option>, } -// impl<'a, T: ByteSlice> From<&'a NoEncap> for InnerFlowId { -// #[inline] -// fn from(meta: &NoEncap) -> Self { -// let (proto, addrs) = match &meta.inner_l3 { -// Some(L3::Ipv4(pkt)) => ( -// pkt.protocol().0, -// AddrPair::V4 { src: pkt.source(), dst: pkt.destination() }, -// ), -// Some(L3::Ipv6(pkt)) => ( -// pkt.next_layer().unwrap_or_default().0, -// AddrPair::V6 { src: pkt.source(), dst: pkt.destination() }, -// ), -// None => (255, FLOW_ID_DEFAULT.addrs), -// }; - -// let (src_port, dst_port) = meta -// .inner_ulp -// .as_ref() -// .map(|ulp| { -// ( -// actual_src_port(ulp) -// .or_else(|| pseudo_port(ulp)) -// .unwrap_or(0), -// actual_dst_port(ulp) -// .or_else(|| pseudo_port(ulp)) -// .unwrap_or(0), -// ) -// }) -// .unwrap_or((0, 0)); - -// InnerFlowId { proto: proto.into(), addrs, src_port, dst_port } -// } -// } +impl From> for OpteMeta { + #[inline] + fn from(value: ValidNoEncap) -> Self { + NoEncap::from(value).into() + } +} -impl FlowKey for ValidNoEncap { +impl LightweightMeta for ValidNoEncap { #[inline] fn flow(&self) -> InnerFlowId { let (proto, addrs) = match &self.inner_l3 { @@ -215,18 +189,56 @@ impl FlowKey for ValidNoEncap { InnerFlowId { proto: proto.into(), addrs, src_port, dst_port } } + + fn run_compiled_transform(&mut self, transform: &CompiledTransform) { + todo!() + } + + // FIXME: identical to + fn compute_body_csum(&self) -> Option { + let use_pseudo = if let Some(v) = &self.inner_ulp { + !matches!(v, ValidUlp::IcmpV4(_)) + } else { + false + }; + + let pseudo_csum = match self.inner_eth.ethertype() { + Ethertype::IPV4 | Ethertype::IPV6 => { + self.inner_l3.as_ref().map(l3_pseudo_header_v) + } + // Includes ARP. + _ => return None, + }; + + let Some(pseudo_csum) = pseudo_csum else { + return None; + }; + + self.inner_ulp.as_ref().and_then(csum_minus_hdr).map(|mut v| { + if use_pseudo { + v -= pseudo_csum; + } + v + }) + } + + fn encap_len(&self) -> u16 { + todo!() + } + + fn update_ulp_checksums(&mut self, body_csum: OpteCsum) { + todo!() + } } -impl From> for OpteMeta { +impl From> for OpteMeta { #[inline] - fn from(value: ValidNoEncap) -> Self { - NoEncap::from(value).into() + fn from(value: ValidGeneveOverV6) -> Self { + GeneveOverV6::from(value).into() } } -impl LightweightMeta for ValidNoEncap {} - -impl FlowKey for ValidGeneveOverV6 { +impl LightweightMeta for ValidGeneveOverV6 { #[inline] fn flow(&self) -> InnerFlowId { let (proto, addrs) = match &self.inner_l3 { @@ -250,16 +262,23 @@ impl FlowKey for ValidGeneveOverV6 { InnerFlowId { proto: proto.into(), addrs, src_port, dst_port } } -} -impl From> for OpteMeta { - #[inline] - fn from(value: ValidGeneveOverV6) -> Self { - GeneveOverV6::from(value).into() + fn run_compiled_transform(&mut self, transform: &CompiledTransform) { + todo!() } -} -impl LightweightMeta for ValidGeneveOverV6 {} + fn compute_body_csum(&self) -> Option { + todo!() + } + + fn encap_len(&self) -> u16 { + todo!() + } + + fn update_ulp_checksums(&mut self, body_csum: OpteCsum) { + todo!() + } +} // --- REWRITE IN PROGRESS --- #[derive(Debug)] @@ -607,6 +626,8 @@ pub type Test2 = ValidNoEncap<&'static [u8]>; pub type Test3 = ValidGeneveOverV6<&'static [u8]>; pub type OpteParsed = IngotParsed::Chunk>, T>; +pub type OpteParsed2::Chunk>> = + IngotParsed; impl OpteMeta { #[inline] @@ -1236,53 +1257,157 @@ impl<'a, T: Read + 'a> Packet2> where T::Chunk: ingot::types::IntoBufPointer<'a>, { + // #[inline] + // pub fn parse( + // self, + // dir: Direction, + // net: impl NetworkParser, + // ) -> Result>, ParseError> { + // let Packet2 { state: Initialized2 { len, inner } } = self; + // let IngotParsed { stack: HeaderStack(headers), data, last_chunk } = + // match dir { + // Direction::Out => net.parse_outbound(inner)?, + // Direction::In => net.parse_inbound(inner)?, + // }; + + // let initial_lens = Some(OpteUnifiedLengths { + // outer_eth: headers.outer_eth.packet_length(), + // outer_l3: headers.outer_l3.packet_length(), + // outer_encap: headers.outer_encap.packet_length(), + // inner_eth: headers.inner_eth.packet_length(), + // inner_l3: headers.inner_l3.packet_length(), + // inner_ulp: headers.inner_ulp.packet_length(), + // } + // .into()); + + // let body = PktBodyWalker { + // base: Some((last_chunk, data)).into(), + // slice: Default::default(), + // }; + + // let meta = Box::new(PacketHeaders { headers, initial_lens, body }); + + // let flow = (&*meta).into(); + + // let body_csum = match (&meta.headers).inner_eth.ethertype() { + // Ethertype::ARP => Memoised::Known(None), + // Ethertype::IPV4 | Ethertype::IPV6 => Memoised::Uninit, + // _ => return Err(IngotParseErr::Unwanted.into()), + // }; + + // let state = Parsed2 { + // meta, + // flow, + // body_csum, + // l4_hash: Memoised::Uninit, + // body_modified: false, + // len, + // inner_csum_dirty: false, + // }; + + // let mut pkt = Packet2 { state }; + // // TODO: we can probably not do this in some cases, but we + // // don't have a way for headeractions to signal that they + // // *may* change the fields we need in the slowpath. + // let _ = pkt.body_csum(); + + // Ok(pkt) + // } + + // TODO: cleanup type aliases. + #[inline] - pub fn parse( + pub fn parse_inbound( self, - dir: Direction, - net: impl NetworkParser, - ) -> Result>, ParseError> { + net: NP, + ) -> Result>>, ParseError> + { + let Packet2 { state: Initialized2 { len, inner } } = self; + + Ok(Packet2 { + state: ParsedStage1 { meta: net.parse_inbound(inner)?, len }, + }) + } + + #[inline] + pub fn parse_outbound( + self, + net: NP, + ) -> Result>>, ParseError> + { let Packet2 { state: Initialized2 { len, inner } } = self; + + Ok(Packet2 { + state: ParsedStage1 { meta: net.parse_outbound(inner)?, len }, + }) + } +} + +impl<'a, T: Read + 'a, M: LightweightMeta> Packet2> +where + T::Chunk: ingot::types::IntoBufPointer<'a>, +{ + #[inline] + pub fn to_full_meta(self) -> Packet2> { + let Packet2 { state: ParsedStage1 { len, meta } } = self; let IngotParsed { stack: HeaderStack(headers), data, last_chunk } = - match dir { - Direction::Out => net.parse_outbound(inner)?, - Direction::In => net.parse_inbound(inner)?, - }; + meta; - let initial_lens = None; + // TODO: we can probably not do this in some cases, but we + // don't have a way for headeractions to signal that they + // *may* change the fields we need in the slowpath. + let body_csum = headers.compute_body_csum(); + let flow = headers.flow(); + let headers: OpteMeta<_> = headers.into(); + let initial_lens = Some( + OpteUnifiedLengths { + outer_eth: headers.outer_eth.packet_length(), + outer_l3: headers.outer_l3.packet_length(), + outer_encap: headers.outer_encap.packet_length(), + inner_eth: headers.inner_eth.packet_length(), + inner_l3: headers.inner_l3.packet_length(), + inner_ulp: headers.inner_ulp.packet_length(), + } + .into(), + ); let body = PktBodyWalker { base: Some((last_chunk, data)).into(), slice: Default::default(), }; - let meta = Box::new(PacketHeaders { headers, initial_lens, body }); - let flow = (&*meta).into(); + Packet2 { + state: Parsed2 { + meta, + flow, + body_csum, + l4_hash: Memoised::Uninit, + body_modified: false, + len, + inner_csum_dirty: false, + }, + } + } - let body_csum = match (&meta.headers).inner_eth.ethertype() { - Ethertype::ARP => Memoised::Known(None), - Ethertype::IPV4 | Ethertype::IPV6 => Memoised::Uninit, - _ => return Err(IngotParseErr::Unwanted.into()), - }; + #[inline] + pub fn meta(&self) -> &M { + &self.state.meta.stack.0 + } - let state = Parsed2 { - meta, - flow, - body_csum, - l4_hash: Memoised::Uninit, - body_modified: false, - len, - inner_csum_dirty: false, - }; + #[inline] + pub fn meta_mut(&mut self) -> &mut M { + &mut self.state.meta.stack.0 + } - let mut pkt = Packet2 { state }; - // TODO: we can probably not do this in some cases, but we - // don't have a way for headeractions to signal that they - // *may* change the fields we need in the slowpath. - let _ = pkt.body_csum(); + #[inline] + pub fn len(&self) -> usize { + self.state.len + } - Ok(pkt) + #[inline] + pub fn flow(&self) -> InnerFlowId { + self.meta().flow() } } @@ -1295,21 +1420,6 @@ impl Packet2> { &mut self.state.meta } - pub fn store_lens_for_slopath(&mut self) { - let headers = &self.state.meta.headers; - self.state.meta.initial_lens = Some( - OpteUnifiedLengths { - outer_eth: headers.outer_eth.packet_length(), - outer_l3: headers.outer_l3.packet_length(), - outer_encap: headers.outer_encap.packet_length(), - inner_eth: headers.inner_eth.packet_length(), - inner_l3: headers.inner_l3.packet_length(), - inner_ulp: headers.inner_ulp.packet_length(), - } - .into(), - ); - } - #[inline] /// Convert a packet's metadata into a set of instructions /// needed to serialize all its changes to the wire. @@ -1529,7 +1639,7 @@ impl Packet2> { // Given that n_transform layers is 1 or 2, probably won't // save too much by trying to tie to a generation number. // TODO: profile. - // self.state.flow = InnerFlowId::from(self.meta()); + self.state.flow = InnerFlowId::from(self.meta()); Ok(()) } @@ -1591,52 +1701,55 @@ impl Packet2> { } pub fn body_csum(&mut self) -> Option { - let out = *self.state.body_csum.get(|| { - let use_pseudo = if let Some(v) = self.state.meta.inner_ulp() { - !matches!(v, Ulp::IcmpV4(_)) - } else { - false - }; - - // XXX TODO: make these valid even AFTER all packet pushings occur. - let pseudo_csum = - match (&self.state.meta.headers).inner_eth.ethertype() { - // ARP - Ethertype::ARP => { - return None; - } - Ethertype::IPV4 | Ethertype::IPV6 => self - .state - .meta - .headers - .inner_l3 - .as_ref() - .map(l3_pseudo_header), - _ => unreachable!(), - }; - - let Some(pseudo_csum) = pseudo_csum else { - return None; - }; - - self.state.meta.inner_ulp().and_then(csum_minus_hdr).map(|mut v| { - if use_pseudo { - v -= pseudo_csum; - } - v - }) - }); - - // let mut manual = Checksum::default(); - // if let Some(segs) = self.body_segs() { - // for seg in segs { - // manual.add_bytes(*seg); - // } - - // opte::engine::err!("think my csum is {:?}, reality is {:?}", out.map(|mut v| v.finalize()), manual.finalize()); - // } - - out + self.state.body_csum + + // let out = *self.state.body_csum + // .get(|| { + // let use_pseudo = if let Some(v) = self.state.meta.inner_ulp() { + // !matches!(v, Ulp::IcmpV4(_)) + // } else { + // false + // }; + + // // XXX TODO: make these valid even AFTER all packet pushings occur. + // let pseudo_csum = + // match (&self.state.meta.headers).inner_eth.ethertype() { + // // ARP + // Ethertype::ARP => { + // return None; + // } + // Ethertype::IPV4 | Ethertype::IPV6 => self + // .state + // .meta + // .headers + // .inner_l3 + // .as_ref() + // .map(l3_pseudo_header), + // _ => unreachable!(), + // }; + + // let Some(pseudo_csum) = pseudo_csum else { + // return None; + // }; + + // self.state.meta.inner_ulp().and_then(csum_minus_hdr).map(|mut v| { + // if use_pseudo { + // v -= pseudo_csum; + // } + // v + // }) + // }); + + // // let mut manual = Checksum::default(); + // // if let Some(segs) = self.body_segs() { + // // for seg in segs { + // // manual.add_bytes(*seg); + // // } + + // // opte::engine::err!("think my csum is {:?}, reality is {:?}", out.map(|mut v| v.finalize()), manual.finalize()); + // // } + + // out } pub fn l4_hash(&mut self) -> u32 { @@ -1794,6 +1907,33 @@ fn l3_pseudo_header(l3: &L3) -> Checksum { } } +fn l3_pseudo_header_v(l3: &ValidL3) -> Checksum { + match l3 { + ValidL3::Ipv4(v4) => { + let mut pseudo_hdr_bytes = [0u8; 12]; + pseudo_hdr_bytes[0..4].copy_from_slice(v4.source().as_ref()); + pseudo_hdr_bytes[4..8].copy_from_slice(v4.destination().as_ref()); + // pseudo_hdr_bytes[8] reserved + pseudo_hdr_bytes[9] = v4.protocol().0; + let ulp_len = v4.total_len() - 4 * (v4.ihl() as u16); + pseudo_hdr_bytes[10..].copy_from_slice(&ulp_len.to_be_bytes()); + + Checksum::compute(&pseudo_hdr_bytes) + } + ValidL3::Ipv6(v6) => { + let mut pseudo_hdr_bytes = [0u8; 40]; + pseudo_hdr_bytes[0..16].copy_from_slice(&v6.source().as_ref()); + pseudo_hdr_bytes[16..32] + .copy_from_slice(&v6.destination().as_ref()); + pseudo_hdr_bytes[39] = v6.next_layer().unwrap_or_default().0; + let ulp_len = v6.payload_len() as u32; + pseudo_hdr_bytes[32..36].copy_from_slice(&ulp_len.to_be_bytes()); + + Checksum::compute(&pseudo_hdr_bytes) + } + } +} + /// The type state of a packet that has been initialized and allocated, but /// about which nothing else is known besides the length. #[derive(Debug)] @@ -1815,12 +1955,28 @@ pub struct Parsed2 { len: usize, meta: Box>, flow: InnerFlowId, - body_csum: Memoised>, + body_csum: Option, l4_hash: Memoised, body_modified: bool, inner_csum_dirty: bool, } +pub struct ParsedStage1> { + len: usize, + // This type is... pretty fat. + // But we need to hang onto this to allow hairpins/body txms/ARP + // to function. + meta: IngotParsed, + // REMOVED: + // flow: InnerFlowId, // (can be computed out) + // body_csum: Option, // (can be computed based on header class) + // l4_hash: Option, // Should be stored in emitspec. +} + +impl> PacketState for ParsedStage1 {} + +impl> ParsedStage1 {} + type Quack = Parsed2>; // Needed for now to account for not wanting to redesign ActionDescs @@ -1830,11 +1986,12 @@ pub type PacketHeaders2<'a> = PacketHeaders>; pub type InitMblk<'a> = Initialized2>; pub type ParsedMblk<'a> = Parsed2>; +pub type LightParsedMblk<'a, M> = ParsedStage1, M>; #[inline] -fn csum_minus_hdr(ulp: &Ulp) -> Option { +fn csum_minus_hdr(ulp: &ValidUlp) -> Option { match ulp { - Ulp::IcmpV4(icmp) => { + ValidUlp::IcmpV4(icmp) => { if icmp.checksum() == 0 { return None; } @@ -1848,7 +2005,7 @@ fn csum_minus_hdr(ulp: &Ulp) -> Option { Some(csum) } - Ulp::IcmpV6(icmp) => { + ValidUlp::IcmpV6(icmp) => { if icmp.checksum() == 0 { return None; } @@ -1862,7 +2019,7 @@ fn csum_minus_hdr(ulp: &Ulp) -> Option { Some(csum) } - Ulp::Tcp(tcp) => { + ValidUlp::Tcp(tcp) => { if tcp.checksum() == 0 { return None; } @@ -1871,25 +2028,21 @@ fn csum_minus_hdr(ulp: &Ulp) -> Option { tcp.checksum().to_be_bytes(), )); - let TcpPacket::Raw(t) = tcp else { - panic!("hmm... maybe one day.") - }; - - let b = t.0.as_bytes(); + let b = tcp.0.as_bytes(); csum.sub_bytes(&b[0..16]); csum.sub_bytes(&b[18..]); // TODO: bad bound? - // csum.sub_bytes(t.1.as_ref()); - csum.sub_bytes(match &t.1 { + // csum.sub_bytes(tcp.1.as_ref()); + csum.sub_bytes(match &tcp.1 { ingot::types::Packet::Repr(v) => &v[..], ingot::types::Packet::Raw(v) => &v[..], }); Some(csum) } - Ulp::Udp(udp) => { + ValidUlp::Udp(udp) => { if udp.checksum() == 0 { return None; } @@ -1898,11 +2051,7 @@ fn csum_minus_hdr(ulp: &Ulp) -> Option { udp.checksum().to_be_bytes(), )); - let UdpPacket::Raw(t) = udp else { - panic!("hmm... maybe one day.") - }; - - let b = t.0.as_bytes(); + let b = udp.0.as_bytes(); csum.sub_bytes(&b[0..6]); Some(csum) @@ -1935,7 +2084,7 @@ pub enum Emitter { } // TODO: don't really care about pushing 'inner' reprs today. -#[derive(Default)] +#[derive(Clone, Debug, Default)] pub struct OpteEmit { outer_eth: Option, outer_ip: Option, @@ -1946,13 +2095,28 @@ pub struct OpteEmit { inner: Option>, } -#[derive(Default)] +#[derive(Clone, Debug, Default)] pub struct OpteInnerEmit { eth: Ethernet, l3: Option, ulp: Option, } +#[derive(Clone, Debug)] +pub struct EmittestSpec { + pub spec: EmitterSpec, + pub l4_hash: u32, + pub rewind: u16, + pub ulp_len: u32, +} + +#[derive(Clone, Debug)] +pub enum EmitterSpec { + Fastpath(Arc), + Slowpath(Box), +} + +#[derive(Clone, Debug)] pub struct EmitSpec { pub rewind: u16, pub encapped_len: u16, diff --git a/lib/opte/src/engine/mod.rs b/lib/opte/src/engine/mod.rs index 3c519b0f..752cec40 100644 --- a/lib/opte/src/engine/mod.rs +++ b/lib/opte/src/engine/mod.rs @@ -43,6 +43,7 @@ pub mod ingot_base; pub mod ingot_packet; use alloc::string::String; +use checksum::Checksum; use core::fmt; use core::num::ParseIntError; use ingot::types::Parsed as IngotParsed; @@ -51,6 +52,7 @@ use ingot_packet::MsgBlk; use ingot_packet::NoEncap; use ingot_packet::OpteMeta; use ingot_packet::OpteParsed; +use ingot_packet::OpteParsed2; use ingot_packet::Packet2; use ingot_packet::PacketHeaders; use ingot_packet::Parsed2; @@ -58,6 +60,7 @@ use ingot_packet::ParsedMblk; use ingot_packet::ValidNoEncap; use ip4::IpError; pub use opte_api::Direction; +use rule::CompiledTransform; use zerocopy::ByteSlice; use zerocopy::ByteSliceMut; @@ -308,7 +311,7 @@ pub trait NetworkParser { fn parse_outbound<'a, T: Read + 'a>( &self, rdr: T, - ) -> Result, ParseError> + ) -> Result>, ParseError> where T::Chunk: ingot::types::IntoBufPointer<'a>; @@ -319,22 +322,31 @@ pub trait NetworkParser { fn parse_inbound<'a, T: Read + 'a>( &self, rdr: T, - ) -> Result, ParseError> + ) -> Result>, ParseError> where T::Chunk: ingot::types::IntoBufPointer<'a>; } /// Header formats which allow a flow ID to be read out, and which can be converted /// into the shared `OpteMeta` format. -pub trait LightweightMeta: Into> + FlowKey {} +pub trait LightweightMeta: Into> { + /// Runs a compiled fastpath action against the target metadata. + fn run_compiled_transform(&mut self, transform: &CompiledTransform); -// This is a separate trait since `where for<'a> &'a Self: Into` -// had *awful* ergonomics around that bound's propagation. -/// Headers which allow a flow ID to be read out. -pub trait FlowKey { + /// Derive the checksum for the packet body from inner headers. + fn compute_body_csum(&self) -> Option; + + // This is a dedicated fn since `where for<'a> &'a Self: Into` + // had *awful* ergonomics around that bound's propagation. /// Return the flow ID (5-tuple, or other composite key) which /// identifies this packet's parent flow. fn flow(&self) -> InnerFlowId; + + /// Returns the number of bytes occupied by the packet's outer encapsulation. + fn encap_len(&self) -> u16; + + /// Recalculate checksums within ULP headers, derived from a pre-computed `body_csum`. + fn update_ulp_checksums(&mut self, body_csum: Checksum); } /// A generic ULP parser, useful for testing inside of the opte crate @@ -363,20 +375,22 @@ impl NetworkParser for GenericUlp { fn parse_inbound<'a, T: Read + 'a>( &self, rdr: T, - ) -> Result, ParseError> + ) -> Result>, ParseError> where T::Chunk: ingot::types::IntoBufPointer<'a>, { - self.parse_ulp(rdr) + // self.parse_ulp(rdr) + Ok(ValidNoEncap::parse_read(rdr)?) } fn parse_outbound<'a, T: Read + 'a>( &self, rdr: T, - ) -> Result, ParseError> + ) -> Result>, ParseError> where T::Chunk: ingot::types::IntoBufPointer<'a>, { - self.parse_ulp(rdr) + // self.parse_ulp(rdr) + Ok(ValidNoEncap::parse_read(rdr)?) } } diff --git a/lib/opte/src/engine/port.rs b/lib/opte/src/engine/port.rs index fe82498d..2ddf409d 100644 --- a/lib/opte/src/engine/port.rs +++ b/lib/opte/src/engine/port.rs @@ -17,9 +17,11 @@ use super::headers::HeaderAction; use super::headers::IpPush; use super::headers::UlpHeaderAction; use super::ingot_packet::MsgBlk; +use super::ingot_packet::MsgBlkIterMut; use super::ingot_packet::Packet2; use super::ingot_packet::Parsed2; use super::ingot_packet::ParsedMblk; +use super::ingot_packet::ParsedStage1; use super::ioctl; use super::ioctl::TcpFlowEntryDump; use super::ioctl::TcpFlowStateDump; @@ -39,6 +41,7 @@ use super::packet::PacketMeta; use super::packet::Parsed; use super::packet::FLOW_ID_DEFAULT; use super::rule::Action; +use super::rule::CompiledTransform; use super::rule::Finalized; use super::rule::HdrTransform; use super::rule::HdrTransformError; @@ -49,6 +52,7 @@ use super::tcp::TIME_WAIT_EXPIRE_TTL; use super::tcp_state::TcpFlowState; use super::tcp_state::TcpFlowStateError; use super::HdlPktAction; +use super::LightweightMeta; use super::NetworkImpl; use crate::d_error::DError; #[cfg(all(not(feature = "std"), not(test)))] @@ -61,6 +65,9 @@ use crate::ddi::sync::KMutex; use crate::ddi::sync::KMutexType; use crate::ddi::time::Moment; use crate::engine::flow_table::ExpiryPolicy; +use crate::engine::ingot_packet::EmitterSpec; +use crate::engine::ingot_packet::EmittestSpec; +use crate::engine::rule::CompiledEncap; use crate::engine::tcp::TcpMeta; use crate::ExecCtx; use alloc::boxed::Box; @@ -130,7 +137,7 @@ impl From for ProcessError { /// all. XXX This is probably going away as its only use is for /// punting on traffic I didn't want to deal with yet. /// -/// * Drop: The packet has beend dropped, as determined by the rules +/// * Drop: The packet has been dropped, as determined by the rules /// or because of resource exhaustion. Included is the reason for the /// drop. /// @@ -146,7 +153,8 @@ pub enum ProcessResult { Drop { reason: DropReason, }, - Modified, + #[leaf] + Modified(EmittestSpec), // TODO: it would be nice if this packet type could be user-specified, but might // be tricky. #[leaf] @@ -156,7 +164,7 @@ pub enum ProcessResult { impl From for ProcessResult { fn from(hpa: HdlPktAction) -> Self { match hpa { - HdlPktAction::Allow => Self::Modified, + HdlPktAction::Allow => Self::Modified(todo!()), HdlPktAction::Deny => Self::Drop { reason: DropReason::HandlePkt }, HdlPktAction::Hairpin(pkt) => Self::Hairpin(pkt), } @@ -181,19 +189,6 @@ enum InternalProcessResult { Hairpin(MsgBlk), } -impl From for ProcessResult { - fn from(value: InternalProcessResult) -> Self { - match value { - InternalProcessResult::Bypass => Self::Bypass, - InternalProcessResult::Drop { reason } => Self::Drop { reason }, - InternalProcessResult::Hairpin(v) => Self::Hairpin(v), - InternalProcessResult::Modified { transform, tcp_state } => { - Self::Modified - } - } - } -} - impl From for InternalProcessResult { fn from(hpa: HdlPktAction) -> Self { match hpa { @@ -1200,101 +1195,250 @@ impl Port { /// # States /// /// This command is valid only for [`PortState::Running`]. - pub fn process( + pub fn process<'a, M>( &self, dir: Direction, - pkt: &mut Packet2, - mut ameta: ActionMeta, - ) -> result::Result { - let flow_before = *pkt.flow(); - pkt.store_lens_for_slopath(); - // XXX: See remove_rule -- there is a 1-pkt wide TOCTOU here. - // This should probably be ordered: - // - remove - process - // * lock port * lock port - // * increment epoch(relaxed) * fetch epoch(relaxed) - let epoch = self.epoch.load(SeqCst); + // TODO: might want to pass in a &mut to an enum + // which can advance to (and hold) light->full-fat metadata. + // Then we can have our cake and eat it too. + mut pkt: Packet2, M>>, + ) -> result::Result + where + M: LightweightMeta< as Read>::Chunk>, + { + let flow_before = pkt.flow(); + + // Packet processing is split into a few mechanisms based on + // expected speed, based on actions and the size of required metadata: + // + // 1. UFT exists. Pure push/pop with simple modifications to + // inner ULP fields. No body transform. + // 2. UFT exists. Flow transform could not be compiled as above. + // Convert to full metadata and apply saved transform list. + // 3. No UFT exists. Walk all tables, save and apply transforms + // piecemeal OR produce a non-`Modified` decision. + // + // Generally, 1 > 2 >>> 3 in terms of rate of pps. + // Both 1 and 2 are able to drop the port lock very quickly. + // + // This tiering exists because we can save space on metadata + // when we know that we won't have mixed owned/borrowed packet + // data, and when we don't need to keep space for absent layers. + // The size of metadata structs is a large bottleneck on packet + // parsing performance, so we expect that minimising it for the + // majority of packets pays off in the limit. + // + // In case 1, we can also cache and reuse the same EmitSpec for + // all hit packets. + + // (1) Check for UFT and precompiled. let mut data = self.data.lock(); + let epoch = self.epoch(); check_state!(data.state, [PortState::Running]) .map_err(|_| ProcessError::BadState(data.state))?; - self.port_process_entry_probe(dir, &flow_before, epoch, pkt); - let res = match dir { - Direction::Out => { - let res = self.process_out(&mut data, epoch, pkt, &mut ameta); - // XXX: Ideally the Kstat should be holding atmoic U64s, then we get + // TODO: fixup types here. + // self.port_process_entry_probe(dir, &flow_before, epoch, &pkt); + + let mut uft: Option<&mut FlowEntry>> = match dir { + Direction::Out => data.uft_out.get_mut(&flow_before), + Direction::In => data.uft_out.get_mut(&flow_before), + }; + + enum FastPathDecision { + CompiledUft { tx: Arc, l4_hash: u32 }, + Uft { tx: Arc, l4_hash: u32 }, + Slow, + } + + let decision = match uft { + // We have a valid UFT entry of some kind -- clone out the + // saved transforms so that we can drop the lock ASAP. + Some(entry) if entry.state().epoch == epoch => { + entry.hit(); + let now = *entry.last_hit(); + + // The Fast Path. + let xforms = &entry.state().xforms; + let out = if let Some(compiled) = xforms.compiled.as_ref() { + FastPathDecision::CompiledUft { + tx: Arc::clone(compiled), + l4_hash: entry.state().l4_hash, + } + } else { + FastPathDecision::Uft { + tx: Arc::clone(xforms), + l4_hash: entry.state().l4_hash, + } + }; + + match dir { + Direction::In => data.stats.vals.in_uft_hit += 1, + Direction::Out => data.stats.vals.out_uft_hit += 1, + } + self.uft_hit_probe(dir, &flow_before, epoch, &now); + + out + } + + // The entry is from a previous epoch; invalidate its UFT + // entries and proceed to rule processing. + Some(entry) => { + let epoch = entry.state().epoch; + let owned_pair = entry.state().pair; + let (ufid_in, ufid_out) = match dir { + Direction::Out => (owned_pair.as_ref(), Some(&flow_before)), + Direction::In => (Some(&flow_before), owned_pair.as_ref()), + }; + self.uft_invalidate(&mut data, ufid_out, ufid_in, epoch); + + FastPathDecision::Slow + } + None => FastPathDecision::Slow, + }; + + // (1)/(2) UFT hit without invalidation -- We know the result for stats purposes. + match &decision { + FastPathDecision::CompiledUft { .. } + | FastPathDecision::Uft { .. } => { + // XXX: Ideally the Kstat should be holding AtomicU64s, then we get // out of the lock sooner. Note that we don't need to *apply* a given // set of transforms in order to know which stats we'll modify. - Self::update_stats_out(&mut data.stats.vals, &res); - res + // Also, not an elegant hack! + let dummy_res = Ok(InternalProcessResult::Modified { + transform: None, + tcp_state: None, + }); + match dir { + Direction::In => { + Self::update_stats_in(&mut data.stats.vals, &dummy_res) + } + Direction::Out => { + Self::update_stats_out(&mut data.stats.vals, &dummy_res) + } + } } + _ => {} + } - Direction::In => { - let res = self.process_in( + // (1) Execute precompiled, and exit. + if let FastPathDecision::CompiledUft { tx, l4_hash } = decision { + drop(data); + + let len = pkt.len(); + let meta = pkt.meta_mut(); + let body_csum = if tx.checksums_dirty { + meta.compute_body_csum() + } else { + None + }; + meta.run_compiled_transform(&tx); + if let Some(csum) = body_csum { + meta.update_ulp_checksums(csum); + } + let encap_len = meta.encap_len(); + let ulp_len = (len - (encap_len as usize)) as u32; + let rewind = match tx.encap { + CompiledEncap::Pop => encap_len, + _ => 0, + }; + let out = EmittestSpec { + spec: EmitterSpec::Fastpath(tx), + l4_hash, + rewind, + ulp_len, + }; + + let flow_after = meta.flow(); + let res = Ok(ProcessResult::Modified(out)); + self.port_process_return_probe( + dir, + &flow_before, + &flow_after, + epoch, + // &pkt, + &res, + ); + return res; + } + + // (2)/(3) Full-fat metadata is required. + let mut pkt = pkt.to_full_meta(); + let mut ameta = ActionMeta::new(); + + // TODO: remove/convert to a slopath indicator? + self.port_process_entry_probe(dir, &flow_before, epoch, &pkt); + + let res = match (&decision, dir) { + // (2) Drop lock, then apply retrieved transform. + // Store cached l4 hash. + (FastPathDecision::Uft { tx, l4_hash }, _) => { + drop(data); + pkt.set_l4_hash(*l4_hash); + tx.apply(&mut pkt, dir)?; + Ok(InternalProcessResult::Modified { + transform: None, + tcp_state: None, + }) + } + + // (3) Full-table processing for the packet, then drop the lock. + // Cksum updates are the only thing left undone. + (FastPathDecision::Slow, Direction::In) => { + let res = self.process_in_miss( &mut data, epoch, - pkt, + &mut pkt, &flow_before, &mut ameta, ); Self::update_stats_in(&mut data.stats.vals, &res); + drop(data); + pkt.update_checksums(); + res + } + (FastPathDecision::Slow, Direction::Out) => { + let res = self + .process_out_miss(&mut data, epoch, &mut pkt, &mut ameta); + Self::update_stats_out(&mut data.stats.vals, &res); + drop(data); + pkt.update_checksums(); res } + _ => unreachable!(), }; - drop(data); - - // Now, apply transforms and update TCP state. - // UFT misses will have done so already in the port lock. - match (dir, &res) { - ( - Direction::Out, - Ok(InternalProcessResult::Modified { - transform: Some(transform), - tcp_state, - }), - ) => { - // TCP, then transform? - // TODO: tcp - // todo!(); //TCP - transform.apply(pkt, dir)?; - } - ( - Direction::In, - Ok(InternalProcessResult::Modified { - transform: Some(transform), - tcp_state, - }), - ) => { - // Transform, then TCP? + let flow_after = *pkt.flow(); - transform.apply(pkt, dir)?; - // todo!(); //TCP + let res = res.map(|v| match v { + InternalProcessResult::Bypass => ProcessResult::Bypass, + InternalProcessResult::Drop { reason } => { + ProcessResult::Drop { reason } } - // Nothing left to do other than csums; we took the slowpath. - (_, Ok(InternalProcessResult::Modified { .. })) => { - pkt.update_checksums() + InternalProcessResult::Hairpin(v) => ProcessResult::Hairpin(v), + InternalProcessResult::Modified { transform, tcp_state } => { + let l4_hash = pkt.l4_hash(); + let emit_spec = pkt.emit_spec(); + + // TODO: remove EmitSpec and have above method just spit out the new + // variant. + ProcessResult::Modified(EmittestSpec { + spec: EmitterSpec::Slowpath(emit_spec.push_spec.into()), + l4_hash, + rewind: emit_spec.rewind, + ulp_len: emit_spec.encapped_len as u32, + }) } - _ => {} - } - - // Emit the updated headers if the packet was modified as part - // of processing. - // TODO: now contingent on caller to do this if they want it. - // Why? To prevent any copy-out for loopback packets. - // if let Ok(ProcessResult::Modified) = res { - // pkt.emit_new_headers()?; - // } - - let safe_res = res.map(Into::into); + }); self.port_process_return_probe( dir, &flow_before, + &flow_after, epoch, - pkt, - &safe_res, + // &pkt, + &res, ); - safe_res + res } // hope and pray we find a ULP, then use that? @@ -1804,11 +1948,16 @@ pub enum ThinProcRes { pub(crate) struct Transforms { pub(crate) hdr: Vec, pub(crate) body: Vec>, + pub(crate) compiled: Option>, } impl Transforms { fn new() -> Self { - Self { hdr: Vec::with_capacity(8), body: Vec::with_capacity(2) } + Self { + hdr: Vec::with_capacity(8), + body: Vec::with_capacity(2), + compiled: None, + } } #[inline] @@ -1842,6 +1991,7 @@ impl fmt::Debug for Transforms { f.debug_struct("Transforms") .field("hdr", &self.hdr) .field("body", &body_strs) + .field("compiled", &self.compiled) .finish() } } @@ -1938,11 +2088,13 @@ impl Port { &self, dir: Direction, flow_before: &InnerFlowId, + flow_after: &InnerFlowId, epoch: u64, - pkt: &Packet2, + // pkt: &Packet2, res: &result::Result, ) { - let flow_after = pkt.flow(); + // let flow_after = pkt.flow(); + let mblk_addr = 0; // TODO. cfg_if! { if #[cfg(all(not(feature = "std"), not(test)))] { @@ -1988,7 +2140,7 @@ impl Port { flow_before, flow_after, epoch as uintptr_t, - pkt.mblk_addr(), + mblk_addr, hp_pkt_ptr, eb.as_ptr(), ); @@ -2006,7 +2158,7 @@ impl Port { (dir, self.name.as_str()), (flow_b_s.as_ref(), flow_a_s.as_ref()), epoch, - pkt.mblk_addr(), + mblk_addr, res_str ) ); @@ -2410,6 +2562,7 @@ impl Port { } } + // TODO: remove. fn process_in( &self, data: &mut PortData, @@ -2725,6 +2878,7 @@ impl Port { } } + // TODO: remove. fn process_out( &self, data: &mut PortData, diff --git a/lib/opte/src/engine/rule.rs b/lib/opte/src/engine/rule.rs index aa1aee05..9799355e 100644 --- a/lib/opte/src/engine/rule.rs +++ b/lib/opte/src/engine/rule.rs @@ -19,6 +19,7 @@ use super::headers::IpMod; use super::headers::IpPush; use super::headers::Transform; use super::headers::UlpHeaderAction; +use super::headers::UlpMetaModify; use super::ingot_base::Ethernet; use super::ingot_base::EthernetPacket; use super::ingot_base::ValidEthernet; @@ -295,7 +296,7 @@ pub struct HdrTransform { pub inner_ether: HeaderAction, pub inner_ip: HeaderAction, // We don't support push/pop for inner_ulp. - pub inner_ulp: UlpHeaderAction, + pub inner_ulp: UlpHeaderAction, } impl StateSummary for Vec { @@ -310,6 +311,28 @@ impl Display for HdrTransform { } } +/// Header transformations matching a simple format, amenable +/// to fastpath compilation: +/// * Encap is either pushed or popped in its entirety, +/// * The inner packet is only modified, with no layers pushed or +/// popped. +/// * The packet action must be `Modified`. +#[derive(Clone, Debug, Deserialize, Serialize)] +pub struct CompiledTransform { + pub encap: CompiledEncap, + pub inner_ether: Option, + pub inner_ip: Option, + pub inner_ulp: Option, + pub checksums_dirty: bool, +} + +#[derive(Clone, Debug, Deserialize, Serialize)] +pub enum CompiledEncap { + Pop, + // TODO: can we cache these in an Arc'd buffer? + Push(EtherMeta, IpPush, EncapPush), +} + #[cfg(all(not(feature = "std"), not(test)))] extern "C" { pub fn __dtrace_probe_ht__run(arg: uintptr_t); From afbc5876a8b5d06e0b80f56bf98a97dd6e80a95a Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Fri, 27 Sep 2024 12:29:02 +0100 Subject: [PATCH 033/115] Speed loss due to copy-in, let's rethink. --- lib/opte/src/engine/ingot_packet.rs | 190 +++++++++++++++++++++++++++- lib/opte/src/engine/mod.rs | 2 - lib/opte/src/engine/port.rs | 3 +- lib/oxide-vpc/src/engine/mod.rs | 11 +- xde/src/xde.rs | 149 ++++++++++------------ 5 files changed, 259 insertions(+), 96 deletions(-) diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index 318f83ae..c01ee3b3 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -48,6 +48,7 @@ use super::packet::Packet; use super::packet::PacketState; use super::packet::ParseError; use super::packet::FLOW_ID_DEFAULT; +use super::rule::CompiledEncap; use super::rule::CompiledTransform; use super::rule::HdrTransform; use super::rule::HdrTransformError; @@ -194,7 +195,7 @@ impl LightweightMeta for ValidNoEncap { todo!() } - // FIXME: identical to + // FIXME: identical to Geneve. fn compute_body_csum(&self) -> Option { let use_pseudo = if let Some(v) = &self.inner_ulp { !matches!(v, ValidUlp::IcmpV4(_)) @@ -223,7 +224,7 @@ impl LightweightMeta for ValidNoEncap { } fn encap_len(&self) -> u16 { - todo!() + 0 } fn update_ulp_checksums(&mut self, body_csum: OpteCsum) { @@ -268,11 +269,33 @@ impl LightweightMeta for ValidGeneveOverV6 { } fn compute_body_csum(&self) -> Option { - todo!() + let use_pseudo = !matches!(self.inner_ulp, ValidUlp::IcmpV4(_)); + + let pseudo_csum = match self.inner_eth.ethertype() { + Ethertype::IPV4 | Ethertype::IPV6 => { + Some(l3_pseudo_header_v(&self.inner_l3)) + } + // Includes ARP. + _ => return None, + }; + + let Some(pseudo_csum) = pseudo_csum else { + return None; + }; + + csum_minus_hdr(&self.inner_ulp).map(|mut v| { + if use_pseudo { + v -= pseudo_csum; + } + v + }) } fn encap_len(&self) -> u16 { - todo!() + (self.outer_eth.packet_length() + + self.outer_v6.packet_length() + + self.outer_udp.packet_length() + + self.outer_encap.packet_length()) as u16 } fn update_ulp_checksums(&mut self, body_csum: OpteCsum) { @@ -2110,6 +2133,165 @@ pub struct EmittestSpec { pub ulp_len: u32, } +impl EmittestSpec { + #[inline] + pub fn apply(&mut self, mut pkt: MsgBlk) -> MsgBlk { + // Rewind + { + let mut slots = heapless::Vec::<&mut MsgBlkNode, 6>::new(); + let mut to_rewind = self.rewind as usize; + + if to_rewind > 0 { + let mut reader = pkt.iter_mut(); + while to_rewind != 0 { + let this = reader.next(); + let Some(node) = this else { + to_rewind = 0; + break; + }; + + let has = node.len(); + let droppable = to_rewind.min(has); + node.drop_front_bytes(droppable); + to_rewind -= droppable; + + slots.push(node).unwrap(); + } + } + + // TODO: put available layers into said slots? + } + + match &mut self.spec { + EmitterSpec::Fastpath(push_spec) => match push_spec.encap { + CompiledEncap::Pop => pkt, + CompiledEncap::Push(eth, ip, encap) => { + todo!() + } + }, + EmitterSpec::Slowpath(push_spec) => { + // TODO: + // - remove all zero-length nodes. + // - actually push in to existing slots we rewound past if needed. + // - actually support pushing dirty segments apart from the encap. + + let needed_push = push_spec.outer_eth.packet_length() + + push_spec.outer_ip.packet_length() + + push_spec.outer_encap.packet_length(); + let needed_alloc = needed_push; //.saturating_sub(pkt.headroom()); + let mut space_in_front = needed_push - needed_alloc; + + let mut prepend = if needed_alloc > 0 { + let mut new_mblk = MsgBlk::new_ethernet(needed_alloc); + new_mblk.pop_all(); + Some(new_mblk) + } else { + None + }; + + // NOT NEEDED TODAY. + if let Some(inner_new) = &push_spec.inner { + todo!() + } + + if let Some(outer_encap) = &push_spec.outer_encap { + let a = SizeHoldingEncap { + encapped_len: self.ulp_len as u16, + meta: &outer_encap, + }; + + let l = a.packet_length(); + + let target = if prepend.is_none() { + space_in_front -= l; + &mut pkt + } else { + space_in_front = 0; + prepend.as_mut().unwrap() + }; + + unsafe { + target.write_front(l, |v| { + a.emit_uninit(v).unwrap(); + }) + } + } + + if let Some(outer_ip) = &push_spec.outer_ip { + let l = outer_ip.packet_length(); + let target = if prepend.is_none() { + space_in_front -= l; + &mut pkt + } else { + space_in_front = 0; + prepend.as_mut().unwrap() + }; + + unsafe { + target.write_front(l, |v| { + outer_ip.emit_uninit(v).unwrap(); + }) + } + } + + if let Some(outer_eth) = &push_spec.outer_eth { + let l = outer_eth.packet_length(); + let target = if prepend.is_none() { + space_in_front -= l; + &mut pkt + } else { + space_in_front = 0; + prepend.as_mut().unwrap() + }; + + unsafe { + target.write_front(l, |v| { + outer_eth.emit_uninit(v).unwrap(); + }) + } + } + + if let Some(mut prepend) = prepend { + prepend.extend_if_one(pkt); + prepend + } else { + pkt + } + } + } + } + + #[inline] + pub fn outer_encap_vni(&self) -> Option { + match &self.spec { + EmitterSpec::Fastpath(c) => match &c.encap { + CompiledEncap::Push(_, _, EncapPush::Geneve(g)) => Some(g.vni), + _ => None, + }, + EmitterSpec::Slowpath(s) => match &s.outer_encap { + Some(EncapMeta::Geneve(g)) => Some(g.vni), + _ => None, + }, + } + } + + #[inline] + pub fn outer_ip6_addrs(&self) -> Option<(Ipv6Addr, Ipv6Addr)> { + match &self.spec { + EmitterSpec::Fastpath(c) => match &c.encap { + CompiledEncap::Push(_, IpPush::Ip6(v6), _) => { + Some((v6.src, v6.dst)) + } + _ => None, + }, + EmitterSpec::Slowpath(s) => match &s.outer_ip { + Some(L3Repr::Ipv6(v6)) => Some((v6.source, v6.destination)), + _ => None, + }, + } + } +} + #[derive(Clone, Debug)] pub enum EmitterSpec { Fastpath(Arc), diff --git a/lib/opte/src/engine/mod.rs b/lib/opte/src/engine/mod.rs index 752cec40..589b383c 100644 --- a/lib/opte/src/engine/mod.rs +++ b/lib/opte/src/engine/mod.rs @@ -379,7 +379,6 @@ impl NetworkParser for GenericUlp { where T::Chunk: ingot::types::IntoBufPointer<'a>, { - // self.parse_ulp(rdr) Ok(ValidNoEncap::parse_read(rdr)?) } @@ -390,7 +389,6 @@ impl NetworkParser for GenericUlp { where T::Chunk: ingot::types::IntoBufPointer<'a>, { - // self.parse_ulp(rdr) Ok(ValidNoEncap::parse_read(rdr)?) } } diff --git a/lib/opte/src/engine/port.rs b/lib/opte/src/engine/port.rs index 2ddf409d..d2e9af85 100644 --- a/lib/opte/src/engine/port.rs +++ b/lib/opte/src/engine/port.rs @@ -1195,6 +1195,7 @@ impl Port { /// # States /// /// This command is valid only for [`PortState::Running`]. + #[inline] pub fn process<'a, M>( &self, dir: Direction, @@ -2163,7 +2164,7 @@ impl Port { ) ); } else { - let (..) = (dir, flow_before, flow_after, epoch, pkt, res); + let (..) = (dir, flow_before, flow_after, epoch, /*pkt,*/ res); } } } diff --git a/lib/oxide-vpc/src/engine/mod.rs b/lib/oxide-vpc/src/engine/mod.rs index 83594e9f..b1a4d2aa 100644 --- a/lib/oxide-vpc/src/engine/mod.rs +++ b/lib/oxide-vpc/src/engine/mod.rs @@ -21,6 +21,7 @@ use opte::engine::ingot_packet::MsgBlk; use opte::engine::ingot_packet::NoEncap; use opte::engine::ingot_packet::OpteMeta; use opte::engine::ingot_packet::OpteParsed; +use opte::engine::ingot_packet::OpteParsed2; use opte::engine::ingot_packet::Packet2; use opte::engine::ingot_packet::Parsed2; use opte::engine::ingot_packet::ValidGeneveOverV6; @@ -143,23 +144,21 @@ impl NetworkParser for VpcParser { fn parse_outbound<'a, T: Read + 'a>( &self, rdr: T, - ) -> Result, ParseError> + ) -> Result>, ParseError> where T::Chunk: opte::ingot::types::IntoBufPointer<'a>, { - let v = NoEncap::parse_read(rdr); - Ok(OpteMeta::convert_ingot(v?)) + Ok(ValidNoEncap::parse_read(rdr)?) } #[inline] fn parse_inbound<'a, T: Read + 'a>( &self, rdr: T, - ) -> Result, ParseError> + ) -> Result>, ParseError> where T::Chunk: opte::ingot::types::IntoBufPointer<'a>, { - let v = GeneveOverV6::parse_read(rdr); - Ok(OpteMeta::convert_ingot(v?)) + Ok(ValidGeneveOverV6::parse_read(rdr)?) } } diff --git a/xde/src/xde.rs b/xde/src/xde.rs index 125afce6..44c8bd0d 100644 --- a/xde/src/xde.rs +++ b/xde/src/xde.rs @@ -1409,11 +1409,16 @@ unsafe extern "C" fn xde_mc_unicst( 0 } -fn guest_loopback_probe(pkt: &Packet2, src: &XdeDev, dst: &XdeDev) { +fn guest_loopback_probe( + mblk_addr: uintptr_t, + flow: &InnerFlowId, + src: &XdeDev, + dst: &XdeDev, +) { unsafe { __dtrace_probe_guest__loopback( - pkt.mblk_addr(), - pkt.flow(), + mblk_addr, + flow, src.port.name_cstr().as_ptr() as uintptr_t, dst.port.name_cstr().as_ptr() as uintptr_t, ) @@ -1424,37 +1429,53 @@ fn guest_loopback_probe(pkt: &Packet2, src: &XdeDev, dst: &XdeDev) { fn guest_loopback<'a>( src_dev: &XdeDev, devs: &'a KRwLockReadGuard>>, - pkt: &mut Packet2, + mut pkt: MsgBlk, vni: Vni, -) -> Option<&'a Box> { +) { use Direction::*; - let ether_dst = pkt.meta().inner_ether().destination(); - // let devs = unsafe { xde_devs.read() }; + + let mblk_addr = pkt.mblk_addr(); + let parsed_pkt = Packet2::new(pkt.iter_mut()); + + // TODO: Rework currently requires a reparse on loopback to account for UFT fastpath. + + let mut parsed_pkt = match parsed_pkt.parse_inbound(VpcParser {}) { + Ok(pkt) => pkt, + Err(e) => { + opte::engine::dbg!("Loopback bad packet: {:?}", e); + bad_packet_parse_probe(None, Direction::In, mblk_addr, &e.into()); + + return; + } + }; + + let flow = parsed_pkt.flow(); + + let ether_dst = parsed_pkt.meta().inner_eth.destination(); let maybe_dest_dev = devs.iter().find(|x| x.vni == vni && x.port.mac_addr() == ether_dst); match maybe_dest_dev { Some(dest_dev) => { - guest_loopback_probe(&pkt, src_dev, dest_dev); + guest_loopback_probe(mblk_addr, &flow, src_dev, dest_dev); // We have found a matching Port on this host; "loop back" // the packet into the inbound processing path of the // destination Port. - match dest_dev.port.process(In, pkt, ActionMeta::new()) { - Ok(ProcessResult::Modified) => { - // unsafe { - // mac::mac_rx( - // dest_dev.mh, - // ptr::null_mut(), - // pkt.unwrap_mblk(), - // ) - // }; - Some(dest_dev) + match dest_dev.port.process(In, parsed_pkt) { + Ok(ProcessResult::Modified(mut emit_spec)) => { + let pkt = emit_spec.apply(pkt); + unsafe { + mac::mac_rx( + dest_dev.mh, + ptr::null_mut(), + pkt.unwrap_mblk(), + ) + }; } Ok(ProcessResult::Drop { reason }) => { opte::engine::dbg!("loopback rx drop: {:?}", reason); - None } Ok(ProcessResult::Hairpin(_hppkt)) => { @@ -1462,19 +1483,17 @@ fn guest_loopback<'a>( // inbound packet to generate a hairpin response // from the destination port. opte::engine::dbg!("unexpected loopback rx hairpin"); - None } Ok(ProcessResult::Bypass) => { opte::engine::dbg!("loopback rx bypass"); - // unsafe { - // mac::mac_rx( - // dest_dev.mh, - // ptr::null_mut(), - // pkt.unwrap_mblk(), - // ) - // }; - Some(dest_dev) + unsafe { + mac::mac_rx( + dest_dev.mh, + ptr::null_mut(), + pkt.unwrap_mblk(), + ) + }; } Err(e) => { @@ -1484,7 +1503,6 @@ fn guest_loopback<'a>( dest_dev.port.name(), e ); - None } } } @@ -1496,7 +1514,6 @@ fn guest_loopback<'a>( vni.as_u32(), ether_dst ); - None } } } @@ -1554,7 +1571,7 @@ unsafe fn xde_mc_tx_one(src_dev: &XdeDev, mut pkt: MsgBlk) -> *mut mblk_t { let parser = src_dev.port.network().parser(); let mblk_addr = pkt.mblk_addr(); let parsed_pkt = Packet2::new(pkt.iter_mut()); - let mut parsed_pkt = match parsed_pkt.parse(Direction::Out, parser) { + let mut parsed_pkt = match parsed_pkt.parse_outbound(parser) { Ok(pkt) => pkt, Err(e) => { // TODO Add bad packet stat. @@ -1719,16 +1736,14 @@ unsafe fn xde_mc_tx_one(src_dev: &XdeDev, mut pkt: MsgBlk) -> *mut mblk_t { // The port processing code will fire a probe that describes what // action was taken -- there should be no need to add probes or // prints here. - let res = port.process(Direction::Out, &mut parsed_pkt, ActionMeta::new()); + let res = port.process(Direction::Out, parsed_pkt); match res { - Ok(ProcessResult::Modified) => { - let meta = parsed_pkt.meta(); - + Ok(ProcessResult::Modified(mut emit_spec)) => { // If the outer IPv6 destination is the same as the // source, then we need to loop the packet inbound to the // guest on this same host. - let (ip6_src, ip6_dst) = match meta.outer_ip6_addrs() { + let (ip6_src, ip6_dst) = match emit_spec.outer_ip6_addrs() { Some(v) => v, None => { // XXX add SDT probe @@ -1738,8 +1753,8 @@ unsafe fn xde_mc_tx_one(src_dev: &XdeDev, mut pkt: MsgBlk) -> *mut mblk_t { } }; - let vni = match meta.outer_encap_geneve_vni_and_origin() { - Some((vni, _)) => vni, + let vni = match emit_spec.outer_encap_vni() { + Some(vni) => vni, None => { // XXX add SDT probe // XXX add stat @@ -1751,33 +1766,16 @@ unsafe fn xde_mc_tx_one(src_dev: &XdeDev, mut pkt: MsgBlk) -> *mut mblk_t { // what we WANT to do is pass in the parsed pkt, handle the // emitspec in the same place, then send elsewhere. let devs = unsafe { xde_devs.read() }; - let local_port = if ip6_dst == ip6_src { - let Some(valid_local) = - guest_loopback(src_dev, &devs, &mut parsed_pkt, vni) - else { - return ptr::null_mut(); - }; - Some(valid_local) - } else { - None - }; - - let l4_hash = parsed_pkt.l4_hash(); - let mut emit_spec = parsed_pkt.emit_spec(); + let l4_hash = emit_spec.l4_hash; let out_pkt = emit_spec.apply(pkt); - if let Some(local_port) = local_port { - unsafe { - mac::mac_rx( - local_port.mh, - ptr::null_mut(), - out_pkt.unwrap_mblk(), - ) - }; + if ip6_src == ip6_dst { + guest_loopback(src_dev, &devs, out_pkt, vni); return ptr::null_mut(); } + drop(devs); // Currently the overlay layer leaves the outer frame @@ -1814,23 +1812,18 @@ unsafe fn xde_mc_tx_one(src_dev: &XdeDev, mut pkt: MsgBlk) -> *mut mblk_t { } Ok(ProcessResult::Drop { .. }) => { - drop(parsed_pkt); return ptr::null_mut(); } Ok(ProcessResult::Hairpin(hpkt)) => { - drop(parsed_pkt); mac::mac_rx(src_dev.mh, ptr::null_mut(), hpkt.unwrap_mblk()); } Ok(ProcessResult::Bypass) => { - drop(parsed_pkt); stream.tx_drop_on_no_desc2(pkt, hint, MacTxFlags::empty()); } - Err(_) => { - drop(parsed_pkt); - } + Err(_) => {} } // On return the Packet is dropped and its underlying mblk @@ -2011,7 +2004,7 @@ unsafe fn xde_rx_one( // is to be delivered. let parser = VpcParser {}; // let mblk_addr = parsed_pkt.mblk_addr(); - let mut parsed_pkt = match parsed_pkt.parse(Direction::In, parser) { + let mut parsed_pkt = match parsed_pkt.parse_inbound(parser) { Ok(pkt) => pkt, Err(e) => { // TODO Add bad packet stat. @@ -2033,22 +2026,10 @@ unsafe fn xde_rx_one( // Determine where to send packet based on Geneve VNI and // destination MAC address. - // Todo: this, but better. - let vni = match meta.outer_encap_geneve_vni_and_origin() { - Some((vni, _)) => vni, - None => { - // TODO add stat - let msg = c"no geneve header, dropping"; - bad_packet_probe(None, Direction::In, mblk_addr, msg); - opte::engine::dbg!("no geneve header, dropping"); - return; - } - }; + let vni = meta.outer_encap.vni(); - let ether_dst = meta.inner_ether().destination(); + let ether_dst = meta.inner_eth.destination(); - // let vni = geneve.vni; - // let ether_dst = meta.inner.ether.dst; let Some(dev) = devs.iter().find(|x| x.vni == vni && x.port.mac_addr() == ether_dst) else { @@ -2106,11 +2087,13 @@ unsafe fn xde_rx_one( // } // END THIN_PROCESS EXPERIMENT - let res = port.process(Direction::In, &mut parsed_pkt, ActionMeta::new()); - let mut emit_spec = parsed_pkt.emit_spec(); + let res = port.process(Direction::In, parsed_pkt); match res { - Ok(ProcessResult::Modified | ProcessResult::Bypass) => { + Ok(ProcessResult::Bypass) => { + mac::mac_rx(dev.mh, mrh, pkt.unwrap_mblk()); + } + Ok(ProcessResult::Modified(mut emit_spec)) => { let npkt = emit_spec.apply(pkt); mac::mac_rx(dev.mh, mrh, npkt.unwrap_mblk()); From 7164eaa0347510e6d57b306e6e10da1b5799cb03 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Fri, 27 Sep 2024 13:05:06 +0100 Subject: [PATCH 034/115] Whoops. --- lib/opte/src/engine/port.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/opte/src/engine/port.rs b/lib/opte/src/engine/port.rs index d2e9af85..b5f7a528 100644 --- a/lib/opte/src/engine/port.rs +++ b/lib/opte/src/engine/port.rs @@ -1243,7 +1243,7 @@ impl Port { let mut uft: Option<&mut FlowEntry>> = match dir { Direction::Out => data.uft_out.get_mut(&flow_before), - Direction::In => data.uft_out.get_mut(&flow_before), + Direction::In => data.uft_in.get_mut(&flow_before), }; enum FastPathDecision { From 2ebc377c038e65c7794bcbba93026d839f030615 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Fri, 27 Sep 2024 17:04:37 +0100 Subject: [PATCH 035/115] 3Gbps intra-VPC, over wire. --- lib/opte/src/engine/ingot_packet.rs | 237 +++++++++++++++++++++++++++- lib/opte/src/engine/mod.rs | 4 +- lib/opte/src/engine/port.rs | 201 +++++++++++++++-------- 3 files changed, 369 insertions(+), 73 deletions(-) diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index c01ee3b3..a1256ca5 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -4,6 +4,7 @@ use super::checksum::HeaderChecksum; use super::ether::EtherMeta; use super::ether::EtherMod; use super::geneve::GeneveMeta; +use super::geneve::GENEVE_PORT; use super::headers::EncapMeta; use super::headers::EncapMod; use super::headers::EncapPush; @@ -191,11 +192,93 @@ impl LightweightMeta for ValidNoEncap { InnerFlowId { proto: proto.into(), addrs, src_port, dst_port } } - fn run_compiled_transform(&mut self, transform: &CompiledTransform) { - todo!() + #[inline] + fn run_compiled_transform(&mut self, transform: &CompiledTransform) + where + V: ByteSliceMut, + { + // TODO: break out commonalities for this and geneve. + if let Some(ether_tx) = &transform.inner_ether { + if let Some(new_src) = ðer_tx.src { + self.inner_eth.set_source(*new_src); + } + if let Some(new_dst) = ðer_tx.dst { + self.inner_eth.set_destination(*new_dst); + } + } + match (&mut self.inner_l3, &transform.inner_ip) { + (Some(ValidL3::Ipv4(pkt)), Some(IpMod::Ip4(tx))) => { + if let Some(new_src) = &tx.src { + pkt.set_source(*new_src); + } + if let Some(new_dst) = &tx.dst { + pkt.set_destination(*new_dst); + } + if let Some(new_proto) = &tx.proto { + pkt.set_protocol(IpProtocol(u8::from(*new_proto))); + } + } + (Some(ValidL3::Ipv6(pkt)), Some(IpMod::Ip6(tx))) => { + if let Some(new_src) = &tx.src { + pkt.set_source(*new_src); + } + if let Some(new_dst) = &tx.dst { + pkt.set_destination(*new_dst); + } + if let Some(new_proto) = &tx.proto { + // TODO: wrong in the face of EHs... + // For now, we never use this on our dataplane. + pkt.set_next_header(IpProtocol(u8::from(*new_proto))); + } + } + _ => {} + } + + match (&mut self.inner_ulp, &transform.inner_ulp) { + (Some(ValidUlp::Tcp(pkt)), Some(tx)) => { + if let Some(flags) = tx.tcp_flags { + pkt.set_flags(TcpFlags::from_bits_retain(flags)); + } + + if let Some(new_src) = &tx.generic.src_port { + pkt.set_source(*new_src); + } + + if let Some(new_dst) = &tx.generic.dst_port { + pkt.set_destination(*new_dst); + } + } + (Some(ValidUlp::Udp(pkt)), Some(tx)) => { + if let Some(new_src) = &tx.generic.src_port { + pkt.set_source(*new_src); + } + + if let Some(new_dst) = &tx.generic.dst_port { + pkt.set_destination(*new_dst); + } + } + (Some(ValidUlp::IcmpV4(pkt)), Some(tx)) + if pkt.ty() == 0 || pkt.ty() == 3 => + { + if let Some(new_id) = tx.icmp_id { + pkt.rest_of_hdr_mut()[..2] + .copy_from_slice(&new_id.to_be_bytes()) + } + } + (Some(ValidUlp::IcmpV6(pkt)), Some(tx)) + if pkt.ty() == 128 || pkt.ty() == 129 => + { + if let Some(new_id) = tx.icmp_id { + pkt.rest_of_hdr_mut()[..2] + .copy_from_slice(&new_id.to_be_bytes()) + } + } + _ => {} + } } // FIXME: identical to Geneve. + #[inline] fn compute_body_csum(&self) -> Option { let use_pseudo = if let Some(v) = &self.inner_ulp { !matches!(v, ValidUlp::IcmpV4(_)) @@ -223,10 +306,12 @@ impl LightweightMeta for ValidNoEncap { }) } + #[inline] fn encap_len(&self) -> u16 { 0 } + #[inline] fn update_ulp_checksums(&mut self, body_csum: OpteCsum) { todo!() } @@ -264,10 +349,92 @@ impl LightweightMeta for ValidGeneveOverV6 { InnerFlowId { proto: proto.into(), addrs, src_port, dst_port } } - fn run_compiled_transform(&mut self, transform: &CompiledTransform) { - todo!() + #[inline] + fn run_compiled_transform(&mut self, transform: &CompiledTransform) + where + V: ByteSliceMut, + { + // TODO: break out commonalities for this and geneve. + if let Some(ether_tx) = &transform.inner_ether { + if let Some(new_src) = ðer_tx.src { + self.inner_eth.set_source(*new_src); + } + if let Some(new_dst) = ðer_tx.dst { + self.inner_eth.set_destination(*new_dst); + } + } + match (&mut self.inner_l3, &transform.inner_ip) { + (ValidL3::Ipv4(pkt), Some(IpMod::Ip4(tx))) => { + if let Some(new_src) = &tx.src { + pkt.set_source(*new_src); + } + if let Some(new_dst) = &tx.dst { + pkt.set_destination(*new_dst); + } + if let Some(new_proto) = &tx.proto { + pkt.set_protocol(IpProtocol(u8::from(*new_proto))); + } + } + (ValidL3::Ipv6(pkt), Some(IpMod::Ip6(tx))) => { + if let Some(new_src) = &tx.src { + pkt.set_source(*new_src); + } + if let Some(new_dst) = &tx.dst { + pkt.set_destination(*new_dst); + } + if let Some(new_proto) = &tx.proto { + // TODO: wrong in the face of EHs... + // For now, we never use this on our dataplane. + pkt.set_next_header(IpProtocol(u8::from(*new_proto))); + } + } + _ => {} + } + + match (&mut self.inner_ulp, &transform.inner_ulp) { + (ValidUlp::Tcp(pkt), Some(tx)) => { + if let Some(flags) = tx.tcp_flags { + pkt.set_flags(TcpFlags::from_bits_retain(flags)); + } + + if let Some(new_src) = &tx.generic.src_port { + pkt.set_source(*new_src); + } + + if let Some(new_dst) = &tx.generic.dst_port { + pkt.set_destination(*new_dst); + } + } + (ValidUlp::Udp(pkt), Some(tx)) => { + if let Some(new_src) = &tx.generic.src_port { + pkt.set_source(*new_src); + } + + if let Some(new_dst) = &tx.generic.dst_port { + pkt.set_destination(*new_dst); + } + } + (ValidUlp::IcmpV4(pkt), Some(tx)) + if pkt.ty() == 0 || pkt.ty() == 3 => + { + if let Some(new_id) = tx.icmp_id { + pkt.rest_of_hdr_mut()[..2] + .copy_from_slice(&new_id.to_be_bytes()) + } + } + (ValidUlp::IcmpV6(pkt), Some(tx)) + if pkt.ty() == 128 || pkt.ty() == 129 => + { + if let Some(new_id) = tx.icmp_id { + pkt.rest_of_hdr_mut()[..2] + .copy_from_slice(&new_id.to_be_bytes()) + } + } + _ => {} + } } + #[inline] fn compute_body_csum(&self) -> Option { let use_pseudo = !matches!(self.inner_ulp, ValidUlp::IcmpV4(_)); @@ -291,6 +458,7 @@ impl LightweightMeta for ValidGeneveOverV6 { }) } + #[inline] fn encap_len(&self) -> u16 { (self.outer_eth.packet_length() + self.outer_v6.packet_length() @@ -298,6 +466,7 @@ impl LightweightMeta for ValidGeneveOverV6 { + self.outer_encap.packet_length()) as u16 } + #[inline] fn update_ulp_checksums(&mut self, body_csum: OpteCsum) { todo!() } @@ -1443,6 +1612,10 @@ impl Packet2> { &mut self.state.meta } + pub fn checksums_dirty(&self) -> bool { + self.state.inner_csum_dirty + } + #[inline] /// Convert a packet's metadata into a set of instructions /// needed to serialize all its changes to the wire. @@ -2166,7 +2339,61 @@ impl EmittestSpec { EmitterSpec::Fastpath(push_spec) => match push_spec.encap { CompiledEncap::Pop => pkt, CompiledEncap::Push(eth, ip, encap) => { - todo!() + // ... why am I not just pre-making these guys? + let mut encap = match encap { + EncapPush::Geneve(g) => ( + Udp { + source: g.entropy, + destination: GENEVE_PORT, + ..Default::default() + }, + Geneve { vni: g.vni, ..Default::default() }, + ), + }; + + let encap_len = encap.packet_length() as u16; + encap.0.length = (self.ulp_len as u16) + encap_len; + + let eth = Ethernet { + destination: eth.dst.bytes().into(), + source: eth.src.bytes().into(), + ethertype: ingot::ethernet::Ethertype( + eth.ether_type.into(), + ), + }; + let ip = match ip { + IpPush::Ip4(v4) => L3Repr::Ipv4(Ipv4 { + protocol: ingot::ip::IpProtocol(v4.proto.into()), + source: v4.src, + destination: v4.dst, + total_len: 20 + encap.0.length, + ..Default::default() + }), + IpPush::Ip6(v6) => L3Repr::Ipv6(Ipv6 { + next_header: ingot::ip::IpProtocol(v6.proto.into()), + source: v6.src, + destination: v6.dst, + payload_len: encap.0.length, + ..Default::default() + }), + }; + + // TODO: actually use space in the front pkt. + let needed_alloc = eth.packet_length() + + ip.packet_length() + + (encap_len as usize); + let mut new_mblk = MsgBlk::new_ethernet(needed_alloc); + unsafe { + new_mblk.write(needed_alloc, |v| { + (eth, ip, encap) + .emit_uninit(v) + .expect("just allocated the necessary n bytes"); + }) + } + + new_mblk.extend_if_one(pkt); + + new_mblk } }, EmitterSpec::Slowpath(push_spec) => { diff --git a/lib/opte/src/engine/mod.rs b/lib/opte/src/engine/mod.rs index 589b383c..db2e3161 100644 --- a/lib/opte/src/engine/mod.rs +++ b/lib/opte/src/engine/mod.rs @@ -331,7 +331,9 @@ pub trait NetworkParser { /// into the shared `OpteMeta` format. pub trait LightweightMeta: Into> { /// Runs a compiled fastpath action against the target metadata. - fn run_compiled_transform(&mut self, transform: &CompiledTransform); + fn run_compiled_transform(&mut self, transform: &CompiledTransform) + where + T: ByteSliceMut; /// Derive the checksum for the packet body from inner headers. fn compute_body_csum(&self) -> Option; diff --git a/lib/opte/src/engine/port.rs b/lib/opte/src/engine/port.rs index b5f7a528..575c8080 100644 --- a/lib/opte/src/engine/port.rs +++ b/lib/opte/src/engine/port.rs @@ -173,28 +173,15 @@ impl From for ProcessResult { enum InternalProcessResult { Bypass, - Drop { - reason: DropReason, - }, - /// A set of transforms which have not yet been performed on a - /// packet. - /// - /// Slow-path packets are transformed as they traverse tables in the lock, - /// whereas fast-path packets have a complete set of transforms to be applied - /// without blocking the rest of the table. - Modified { - transform: Option>, - tcp_state: Option>>, - }, + Drop { reason: DropReason }, + Modified, Hairpin(MsgBlk), } impl From for InternalProcessResult { fn from(hpa: HdlPktAction) -> Self { match hpa { - HdlPktAction::Allow => { - Self::Modified { transform: None, tcp_state: None } - } + HdlPktAction::Allow => Self::Modified, HdlPktAction::Deny => Self::Drop { reason: DropReason::HandlePkt }, HdlPktAction::Hairpin(pkt) => Self::Hairpin(pkt), } @@ -1195,7 +1182,7 @@ impl Port { /// # States /// /// This command is valid only for [`PortState::Running`]. - #[inline] + // #[inline] pub fn process<'a, M>( &self, dir: Direction, @@ -1306,10 +1293,7 @@ impl Port { // out of the lock sooner. Note that we don't need to *apply* a given // set of transforms in order to know which stats we'll modify. // Also, not an elegant hack! - let dummy_res = Ok(InternalProcessResult::Modified { - transform: None, - tcp_state: None, - }); + let dummy_res = Ok(InternalProcessResult::Modified); match dir { Direction::In => { Self::update_stats_in(&mut data.stats.vals, &dummy_res) @@ -1377,10 +1361,7 @@ impl Port { drop(data); pkt.set_l4_hash(*l4_hash); tx.apply(&mut pkt, dir)?; - Ok(InternalProcessResult::Modified { - transform: None, - tcp_state: None, - }) + Ok(InternalProcessResult::Modified) } // (3) Full-table processing for the packet, then drop the lock. @@ -1417,7 +1398,7 @@ impl Port { ProcessResult::Drop { reason } } InternalProcessResult::Hairpin(v) => ProcessResult::Hairpin(v), - InternalProcessResult::Modified { transform, tcp_state } => { + InternalProcessResult::Modified => { let l4_hash = pkt.l4_hash(); let emit_spec = pkt.emit_spec(); @@ -1489,10 +1470,7 @@ impl Port { let xforms = Arc::clone(&a.state().xforms); Self::update_stats_out( &mut data.stats.vals, - &Ok(InternalProcessResult::Modified { - transform: None, - tcp_state: None, - }), + &Ok(InternalProcessResult::Modified), ); drop(data); @@ -1648,10 +1626,7 @@ impl Port { let xforms = Arc::clone(&a.state().xforms); Self::update_stats_in( &mut data.stats.vals, - &Ok(InternalProcessResult::Modified { - transform: None, - tcp_state: None, - }), + &Ok(InternalProcessResult::Modified), ); drop(data); @@ -1983,6 +1958,121 @@ impl Transforms { Ok(()) } + + #[inline] + fn compile(mut self, checksums_dirty: bool) -> Arc { + // Compile to a fasterpath transform iff. no body transform. + if self.body.is_empty() { + let mut still_permissable = true; + + let mut outer_ether = None; + let mut outer_ip = None; + let mut outer_encap = None; + + let mut inner_ether = None; + let mut inner_ip = None; + let mut inner_ulp = None; + for transform in &self.hdr { + if !still_permissable { + continue; + } + + // TODO: refactor. + + // All outer layers must be pushed (or popped/ignored) at the same + // time for compilation. No modifications are permissable. + match transform.outer_ether { + HeaderAction::Push(p) => outer_ether = Some(p), + HeaderAction::Pop => { + outer_ether = None; + } + HeaderAction::Modify(_) => { + still_permissable = false; + } + HeaderAction::Ignore => {} + } + + match transform.outer_ip { + HeaderAction::Push(p) => outer_ip = Some(p), + HeaderAction::Pop => { + outer_ip = None; + } + HeaderAction::Modify(_) => { + still_permissable = false; + } + HeaderAction::Ignore => {} + } + + match transform.outer_encap { + HeaderAction::Push(p) => outer_encap = Some(p), + HeaderAction::Pop => { + outer_encap = None; + } + HeaderAction::Modify(_) => { + still_permissable = false; + } + HeaderAction::Ignore => {} + } + + // Allow up to one action per ULP field, which must be modify. + // We can't yet combine sets of `Modify` actions, + // but the Oxide dataplane does not use this in practice. + match &transform.inner_ether { + HeaderAction::Push(_) | HeaderAction::Pop => { + still_permissable = false; + continue; + } + HeaderAction::Modify(m) => { + still_permissable &= !inner_ether.replace(m).is_some(); + } + HeaderAction::Ignore => {} + } + + match &transform.inner_ip { + HeaderAction::Push(_) | HeaderAction::Pop => { + still_permissable = false; + continue; + } + HeaderAction::Modify(m) => { + still_permissable &= !inner_ip.replace(m).is_some(); + } + HeaderAction::Ignore => {} + } + + match &transform.inner_ulp { + UlpHeaderAction::Modify(m) => { + still_permissable &= !inner_ulp.replace(m).is_some(); + } + UlpHeaderAction::Ignore => {} + } + } + + if still_permissable { + let encap = match (outer_ether, outer_ip, outer_encap) { + (Some(eth), Some(ip), Some(enc)) => { + Some(CompiledEncap::Push(eth, ip, enc)) + } + (None, None, None) => Some(CompiledEncap::Pop), + _ => None, + }; + + if let Some(encap) = encap { + self.compiled = Some( + CompiledTransform { + encap, + inner_ether: inner_ether.cloned(), + inner_ip: inner_ip.cloned(), + inner_ulp: inner_ulp.cloned(), + checksums_dirty, + } + .into(), + ); + } + } + } + + Arc::new(self) + } } impl fmt::Debug for Transforms { @@ -2396,10 +2486,7 @@ impl Port { // If there is no flow ID, then do not create a UFT // entry. if *ufid_in == FLOW_ID_DEFAULT { - return Ok(InternalProcessResult::Modified { - transform: None, - tcp_state: None, - }); + return Ok(InternalProcessResult::Modified); } } @@ -2428,7 +2515,7 @@ impl Port { let ufid_out = pkt.flow().mirror(); let hte = UftEntry { pair: Some(ufid_out), - xforms: xforms.into(), + xforms: xforms.compile(pkt.checksums_dirty()), epoch, l4_hash: ufid_in.crc32(), }; @@ -2519,10 +2606,7 @@ impl Port { // } // } match data.uft_in.add(*ufid_in, hte) { - Ok(_) => Ok(InternalProcessResult::Modified { - transform: None, - tcp_state: None, - }), + Ok(_) => Ok(InternalProcessResult::Modified), Err(OpteError::MaxCapacity(limit)) => { Err(ProcessError::FlowTableFull { kind: "UFT", limit }) } @@ -2684,10 +2768,7 @@ impl Port { // return Ok(ProcessResult::Modified); // } - return Ok(InternalProcessResult::Modified { - transform, - tcp_state: None, - }); + return Ok(InternalProcessResult::Modified); } // The entry is from a previous epoch; invalidate its UFT @@ -2833,7 +2914,7 @@ impl Port { // XXXX: may be hashing the wrong thing. let hte = UftEntry { pair: None, - xforms: xforms.into(), + xforms: xforms.compile(pkt.checksums_dirty()), epoch, l4_hash: flow_before.crc32(), }; @@ -2842,16 +2923,10 @@ impl Port { Ok(LayerResult::Allow) => { // If there is no Flow ID, then there is no UFT entry. if flow_before == FLOW_ID_DEFAULT || tcp_closed { - return Ok(InternalProcessResult::Modified { - transform: None, - tcp_state: None, - }); + return Ok(InternalProcessResult::Modified); } match data.uft_out.add(flow_before, hte) { - Ok(_) => Ok(InternalProcessResult::Modified { - transform: None, - tcp_state: None, - }), + Ok(_) => Ok(InternalProcessResult::Modified), Err(OpteError::MaxCapacity(limit)) => { Err(ProcessError::FlowTableFull { kind: "UFT", limit }) } @@ -2991,11 +3066,7 @@ impl Port { ); } - return Ok(InternalProcessResult::Modified { - transform, - // TODO - tcp_state: None, - }); + return Ok(InternalProcessResult::Modified); } else if let Some(flow_before) = flow_to_invalidate { self.uft_tcp_closed(data, &flow_before, ufid_in.as_ref()); } @@ -3116,9 +3187,7 @@ impl Port { } } - Ok(InternalProcessResult::Modified { .. }) => { - stats.in_modified += 1 - } + Ok(InternalProcessResult::Modified) => stats.in_modified += 1, Ok(InternalProcessResult::Hairpin(_)) => stats.in_hairpin += 1, @@ -3151,9 +3220,7 @@ impl Port { } } - Ok(InternalProcessResult::Modified { .. }) => { - stats.out_modified += 1 - } + Ok(InternalProcessResult::Modified) => stats.out_modified += 1, Ok(InternalProcessResult::Hairpin(_)) => stats.out_hairpin += 1, From a6da8e76164c9030b54654490b9f2e415ecbf820 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Fri, 27 Sep 2024 19:04:33 +0100 Subject: [PATCH 036/115] Pre-save pushed headers in Compile -- 3.1Gbps. Finally. --- lib/opte/src/engine/ingot_packet.rs | 93 ++++++++++------------------- lib/opte/src/engine/port.rs | 79 +++++++++++++++++++++++- lib/opte/src/engine/rule.rs | 77 +++++++++++++++++++++++- 3 files changed, 184 insertions(+), 65 deletions(-) diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index a1256ca5..49ad6c89 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -478,6 +478,30 @@ pub struct MsgBlk { pub inner: NonNull, } +impl Deref for MsgBlk { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + unsafe { + let self_ref = self.inner.as_ref(); + let rptr = self_ref.b_rptr; + let len = self_ref.b_wptr.offset_from(rptr) as usize; + slice::from_raw_parts(rptr, len) + } + } +} + +impl DerefMut for MsgBlk { + fn deref_mut(&mut self) -> &mut Self::Target { + unsafe { + let self_ref = self.inner.as_mut(); + let rptr = self_ref.b_rptr; + let len = self_ref.b_wptr.offset_from(rptr) as usize; + slice::from_raw_parts_mut(rptr, len) + } + } +} + #[derive(Debug)] pub struct MsgBlkNode(mblk_t); @@ -2336,66 +2360,9 @@ impl EmittestSpec { } match &mut self.spec { - EmitterSpec::Fastpath(push_spec) => match push_spec.encap { - CompiledEncap::Pop => pkt, - CompiledEncap::Push(eth, ip, encap) => { - // ... why am I not just pre-making these guys? - let mut encap = match encap { - EncapPush::Geneve(g) => ( - Udp { - source: g.entropy, - destination: GENEVE_PORT, - ..Default::default() - }, - Geneve { vni: g.vni, ..Default::default() }, - ), - }; - - let encap_len = encap.packet_length() as u16; - encap.0.length = (self.ulp_len as u16) + encap_len; - - let eth = Ethernet { - destination: eth.dst.bytes().into(), - source: eth.src.bytes().into(), - ethertype: ingot::ethernet::Ethertype( - eth.ether_type.into(), - ), - }; - let ip = match ip { - IpPush::Ip4(v4) => L3Repr::Ipv4(Ipv4 { - protocol: ingot::ip::IpProtocol(v4.proto.into()), - source: v4.src, - destination: v4.dst, - total_len: 20 + encap.0.length, - ..Default::default() - }), - IpPush::Ip6(v6) => L3Repr::Ipv6(Ipv6 { - next_header: ingot::ip::IpProtocol(v6.proto.into()), - source: v6.src, - destination: v6.dst, - payload_len: encap.0.length, - ..Default::default() - }), - }; - - // TODO: actually use space in the front pkt. - let needed_alloc = eth.packet_length() - + ip.packet_length() - + (encap_len as usize); - let mut new_mblk = MsgBlk::new_ethernet(needed_alloc); - unsafe { - new_mblk.write(needed_alloc, |v| { - (eth, ip, encap) - .emit_uninit(v) - .expect("just allocated the necessary n bytes"); - }) - } - - new_mblk.extend_if_one(pkt); - - new_mblk - } - }, + EmitterSpec::Fastpath(push_spec) => { + push_spec.encap.prepend(pkt, self.ulp_len as usize) + } EmitterSpec::Slowpath(push_spec) => { // TODO: // - remove all zero-length nodes. @@ -2492,7 +2459,9 @@ impl EmittestSpec { pub fn outer_encap_vni(&self) -> Option { match &self.spec { EmitterSpec::Fastpath(c) => match &c.encap { - CompiledEncap::Push(_, _, EncapPush::Geneve(g)) => Some(g.vni), + CompiledEncap::Push { encap: EncapPush::Geneve(g), .. } => { + Some(g.vni) + } _ => None, }, EmitterSpec::Slowpath(s) => match &s.outer_encap { @@ -2506,7 +2475,7 @@ impl EmittestSpec { pub fn outer_ip6_addrs(&self) -> Option<(Ipv6Addr, Ipv6Addr)> { match &self.spec { EmitterSpec::Fastpath(c) => match &c.encap { - CompiledEncap::Push(_, IpPush::Ip6(v6), _) => { + CompiledEncap::Push { ip: IpPush::Ip6(v6), .. } => { Some((v6.src, v6.dst)) } _ => None, diff --git a/lib/opte/src/engine/port.rs b/lib/opte/src/engine/port.rs index 575c8080..0a06d6a0 100644 --- a/lib/opte/src/engine/port.rs +++ b/lib/opte/src/engine/port.rs @@ -12,10 +12,15 @@ use super::flow_table::Dump; use super::flow_table::FlowEntry; use super::flow_table::FlowTable; use super::flow_table::Ttl; +use super::geneve::GENEVE_PORT; use super::headers::EncapPush; use super::headers::HeaderAction; use super::headers::IpPush; use super::headers::UlpHeaderAction; +use super::ingot_base::Ethernet; +use super::ingot_base::Ipv4; +use super::ingot_base::Ipv6; +use super::ingot_base::L3Repr; use super::ingot_packet::MsgBlk; use super::ingot_packet::MsgBlkIterMut; use super::ingot_packet::Packet2; @@ -85,7 +90,11 @@ use core::sync::atomic::AtomicU64; use core::sync::atomic::Ordering::SeqCst; #[cfg(all(not(feature = "std"), not(test)))] use illumos_sys_hdrs::uintptr_t; +use ingot::geneve::Geneve; +use ingot::types::Emit; +use ingot::types::Header; use ingot::types::Read; +use ingot::udp::Udp; use kstat_macro::KStatProvider; use opte_api::Direction; use opte_api::MacAddr; @@ -2049,8 +2058,74 @@ impl Transforms { if still_permissable { let encap = match (outer_ether, outer_ip, outer_encap) { - (Some(eth), Some(ip), Some(enc)) => { - Some(CompiledEncap::Push(eth, ip, enc)) + (Some(eth), Some(ip), Some(encap)) => { + let mut encap_repr = match encap { + EncapPush::Geneve(g) => ( + Udp { + source: g.entropy, + destination: GENEVE_PORT, + ..Default::default() + }, + Geneve { vni: g.vni, ..Default::default() }, + ), + }; + + let eth_repr = Ethernet { + destination: eth.dst.bytes().into(), + source: eth.src.bytes().into(), + ethertype: ingot::ethernet::Ethertype( + eth.ether_type.into(), + ), + }; + let (ip_repr, l3_extra_bytes, ip_len_offset) = match ip + { + IpPush::Ip4(v4) => ( + L3Repr::Ipv4(Ipv4 { + protocol: ingot::ip::IpProtocol( + v4.proto.into(), + ), + source: v4.src, + destination: v4.dst, + total_len: 20, + ..Default::default() + }), + 20, + 2, + ), + IpPush::Ip6(v6) => ( + L3Repr::Ipv6(Ipv6 { + next_header: ingot::ip::IpProtocol( + v6.proto.into(), + ), + source: v6.src, + destination: v6.dst, + payload_len: 0, + ..Default::default() + }), + 0, + 4, + ), + }; + + let encap_sz = encap_repr.packet_length(); + let l3_len_offset = + eth_repr.packet_length() + ip_len_offset; + let l4_len_offset = eth_repr.packet_length() + + ip_repr.packet_length() + + 4; + + let bytes = (eth_repr, ip_repr, encap_repr).emit_vec(); + + Some(CompiledEncap::Push { + encap, + eth, + ip, + bytes, + l3_len_offset, + l3_extra_bytes, + l4_len_offset, + encap_sz, + }) } (None, None, None) => Some(CompiledEncap::Pop), _ => None, diff --git a/lib/opte/src/engine/rule.rs b/lib/opte/src/engine/rule.rs index 9799355e..3198d276 100644 --- a/lib/opte/src/engine/rule.rs +++ b/lib/opte/src/engine/rule.rs @@ -49,6 +49,7 @@ use core::ffi::CStr; use core::fmt; use core::fmt::Debug; use core::fmt::Display; +use core::mem::MaybeUninit; use illumos_sys_hdrs::c_char; use illumos_sys_hdrs::uintptr_t; use ingot::types::DirectPacket; @@ -330,7 +331,81 @@ pub struct CompiledTransform { pub enum CompiledEncap { Pop, // TODO: can we cache these in an Arc'd buffer? - Push(EtherMeta, IpPush, EncapPush), + Push { + eth: EtherMeta, + ip: IpPush, + encap: EncapPush, + bytes: Vec, + l3_len_offset: usize, + l3_extra_bytes: usize, + l4_len_offset: usize, + encap_sz: usize, + }, +} + +impl CompiledEncap { + #[inline] + pub fn prepend(&self, mut pkt: MsgBlk, ulp_len: usize) -> MsgBlk { + let Self::Push { + ref bytes, + l3_len_offset, + l3_extra_bytes, + l4_len_offset, + encap_sz, + .. + } = self + else { + return pkt; + }; + + let mut prepend = if pkt.headroom() < bytes.len() { + let mut pkt = MsgBlk::new_ethernet(bytes.len()); + pkt.pop_all(); + Some(pkt) + } else { + None + }; + + let target = if let Some(prepend) = prepend.as_mut() { + prepend + } else { + &mut pkt + }; + + unsafe { + target.write_front(bytes.len(), |v| { + // feat(maybe_uninit_write_slice) -> copy_from_slice + // is unstable. + let uninit_src: &[MaybeUninit] = + core::mem::transmute(bytes.as_slice()); + v.copy_from_slice(uninit_src); + }); + } + + let l4_len = ulp_len + encap_sz; + let l3_len = l4_len + l3_extra_bytes; + + let l3_len_slot: &mut [u8; core::mem::size_of::()] = (&mut target + [*l3_len_offset..l3_len_offset + core::mem::size_of::()]) + .try_into() + .expect("exact no bytes"); + + *l3_len_slot = (l3_len as u16).to_be_bytes(); + + let l4_len_slot: &mut [u8; core::mem::size_of::()] = (&mut target + [*l4_len_offset..l4_len_offset + core::mem::size_of::()]) + .try_into() + .expect("exact no bytes"); + + *l4_len_slot = (l4_len as u16).to_be_bytes(); + + if let Some(mut prepend) = prepend { + prepend.extend_if_one(pkt); + prepend + } else { + pkt + } + } } #[cfg(all(not(feature = "std"), not(test)))] From 90354403dad0f5b21c9bdba79a7c54a68aa9ad73 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Fri, 27 Sep 2024 19:30:01 +0100 Subject: [PATCH 037/115] Remove thin process. --- lib/opte/src/engine/port.rs | 345 ------------------------------------ xde/src/xde.rs | 157 ---------------- 2 files changed, 502 deletions(-) diff --git a/lib/opte/src/engine/port.rs b/lib/opte/src/engine/port.rs index 0a06d6a0..e5838069 100644 --- a/lib/opte/src/engine/port.rs +++ b/lib/opte/src/engine/port.rs @@ -1432,351 +1432,6 @@ impl Port { res } - // hope and pray we find a ULP, then use that? - pub fn thin_process( - &self, - dir: Direction, - pkt: &mut Packet2, - ) -> result::Result { - use super::ingot_base::EthernetMut; - use super::ingot_base::Ipv4Mut; - use super::ingot_base::Ipv6Mut; - use super::ingot_base::Ulp; - use super::ingot_base::L3; - use ingot::icmp::IcmpV4Mut; - use ingot::icmp::IcmpV4Ref; - use ingot::icmp::IcmpV6Mut; - use ingot::icmp::IcmpV6Ref; - use ingot::tcp::TcpFlags; - use ingot::tcp::TcpMut; - use ingot::udp::UdpMut; - - let flow_before = pkt.flow(); - // let flow_before = *pkt.flow(); - let _epoch = self.epoch.load(SeqCst); - let mut data = self.data.lock(); - check_state!(data.state, [PortState::Running]) - .map_err(|_| ProcessError::BadState(data.state))?; - - let mut dirty_csum = false; - - // self.port_process_entry_probe(dir, &flow_before, epoch, pskt); - // TODO: what stats? lmao - match dir { - Direction::Out => { - // opte::engine::err!("looking up {:?} in outdir...", flow_before); - let a = data.uft_out.get(&flow_before); - let Some(a) = a else { - // eh. It will get recirc'd for free... - // opte::engine::err!("not found! Releasing!"); - return Err(ProcessError::FlowTableFull { - kind: "()", - limit: 0, - }); - }; - pkt.set_l4_hash(a.state().l4_hash); - // opte::engine::err!("found!"); - let xforms = Arc::clone(&a.state().xforms); - Self::update_stats_out( - &mut data.stats.vals, - &Ok(InternalProcessResult::Modified), - ); - drop(data); - - let hm = &mut pkt.meta_mut().headers; - - let mut new_eth = None; - let mut new_ip = None; - let mut new_encap = None; - // opte::engine::err!("xforms {:?}!", &a.state().xforms.hdr); - for xf in &xforms.hdr { - // opte::engine::err!("xf..."); - if let HeaderAction::Push(outer_eth) = &xf.outer_ether { - new_eth = Some(outer_eth.clone()); - } - if let HeaderAction::Push(outer_ip) = &xf.outer_ip { - new_ip = Some(outer_ip.clone()); - } - if let HeaderAction::Push(outer_ec) = &xf.outer_encap { - new_encap = Some(outer_ec.clone()); - } - if let HeaderAction::Modify(m) = &xf.inner_ether { - if let Some(src) = m.src { - hm.inner_eth.set_source(src); - } - if let Some(dst) = m.dst { - hm.inner_eth.set_destination(dst); - } - } - if let HeaderAction::Modify(m) = &xf.inner_ip { - match m { - super::headers::IpMod::Ip4(v4) => { - let Some(L3::Ipv4(ref mut v4_t)) = hm.inner_l3 - else { - return Err(ProcessError::FlowTableFull { - kind: "()", - limit: 0, - }); - }; - if let Some(src) = v4.src { - dirty_csum = true; - v4_t.set_source(src.into()); - } - if let Some(dst) = v4.dst { - dirty_csum = true; - v4_t.set_destination(dst.into()); - } - } - super::headers::IpMod::Ip6(v6) => { - let Some(L3::Ipv6(ref mut v6_t)) = hm.inner_l3 - else { - return Err(ProcessError::FlowTableFull { - kind: "()", - limit: 0, - }); - }; - if let Some(src) = v6.src { - dirty_csum = true; - v6_t.set_source(src.into()); - } - if let Some(dst) = v6.dst { - dirty_csum = true; - v6_t.set_destination(dst.into()); - } - } - } - } - if let UlpHeaderAction::Modify(m) = &xf.inner_ulp { - if let Some(src) = &m.generic.src_port { - match hm.inner_ulp { - Some(Ulp::Tcp(ref mut t)) => { - dirty_csum = true; - t.set_source(*src) - } - Some(Ulp::Udp(ref mut t)) => { - dirty_csum = true; - t.set_source(*src) - } - _ => {} - } - } - if let Some(dst) = &m.generic.dst_port { - match hm.inner_ulp { - Some(Ulp::Tcp(ref mut t)) => { - dirty_csum = true; - t.set_destination(*dst) - } - Some(Ulp::Udp(ref mut t)) => { - dirty_csum = true; - t.set_destination(*dst) - } - _ => {} - } - } - if let Some(flags) = &m.tcp_flags { - match hm.inner_ulp { - Some(Ulp::Tcp(ref mut t)) => { - dirty_csum = true; - t.set_flags(TcpFlags::from_bits_retain( - *flags, - )) - } - _ => {} - } - } - if let Some(new_id) = &m.icmp_id { - match hm.inner_ulp { - Some(Ulp::IcmpV4(ref mut pkt)) - if pkt.ty() == 0 || pkt.ty() == 3 => - { - dirty_csum = true; - pkt.rest_of_hdr_mut()[..2] - .copy_from_slice(&new_id.to_be_bytes()) - } - Some(Ulp::IcmpV6(ref mut pkt)) - if pkt.ty() == 128 || pkt.ty() == 129 => - { - dirty_csum = true; - pkt.rest_of_hdr_mut()[..2] - .copy_from_slice(&new_id.to_be_bytes()) - } - _ => {} - } - } - } - } - - if dirty_csum { - // TODO: something. - } - - match (new_eth, new_ip, new_encap) { - (Some(a), Some(b), Some(c)) => { - Ok(ThinProcRes::PushEncap(a, b, c)) - } - (None, None, None) => Ok(ThinProcRes::Na), - _ => Err(ProcessError::FlowTableFull { - kind: "()", - limit: 0, - }), - } - } - - Direction::In => { - let a = data.uft_in.get(&flow_before); - let Some(a) = a else { - // eh. - return Err(ProcessError::FlowTableFull { - kind: "()", - limit: 0, - }); - }; - pkt.set_l4_hash(a.state().l4_hash); - let xforms = Arc::clone(&a.state().xforms); - Self::update_stats_in( - &mut data.stats.vals, - &Ok(InternalProcessResult::Modified), - ); - drop(data); - - let hm = &mut pkt.meta_mut().headers; - - let mut pop_eth = false; - let mut pop_ip = false; - let mut pop_encap = false; - for xf in &xforms.hdr { - // opte::engine::err!("xf..."); - if let HeaderAction::Pop = &xf.outer_ether { - pop_eth = true; - } - if let HeaderAction::Pop = &xf.outer_ip { - pop_ip = true; - } - if let HeaderAction::Pop = &xf.outer_encap { - pop_encap = true; - } - if let HeaderAction::Modify(m) = &xf.inner_ether { - if let Some(src) = m.src { - hm.inner_eth.set_source(src); - } - if let Some(dst) = m.dst { - hm.inner_eth.set_destination(dst); - } - } - if let HeaderAction::Modify(m) = &xf.inner_ip { - match m { - super::headers::IpMod::Ip4(v4) => { - let Some(L3::Ipv4(ref mut v4_t)) = hm.inner_l3 - else { - return Err(ProcessError::FlowTableFull { - kind: "()", - limit: 0, - }); - }; - if let Some(src) = v4.src { - dirty_csum = true; - v4_t.set_source(src.into()); - } - if let Some(dst) = v4.dst { - dirty_csum = true; - v4_t.set_destination(dst.into()); - } - } - super::headers::IpMod::Ip6(v6) => { - let Some(L3::Ipv6(ref mut v6_t)) = hm.inner_l3 - else { - return Err(ProcessError::FlowTableFull { - kind: "()", - limit: 0, - }); - }; - if let Some(src) = v6.src { - dirty_csum = true; - v6_t.set_source(src.into()); - } - if let Some(dst) = v6.dst { - dirty_csum = true; - v6_t.set_destination(dst.into()); - } - } - } - } - if let UlpHeaderAction::Modify(m) = &xf.inner_ulp { - if let Some(src) = &m.generic.src_port { - match hm.inner_ulp { - Some(Ulp::Tcp(ref mut t)) => { - dirty_csum = true; - t.set_source(*src) - } - Some(Ulp::Udp(ref mut t)) => { - dirty_csum = true; - t.set_source(*src) - } - _ => {} - } - } - if let Some(dst) = &m.generic.dst_port { - match hm.inner_ulp { - Some(Ulp::Tcp(ref mut t)) => { - dirty_csum = true; - t.set_destination(*dst) - } - Some(Ulp::Udp(ref mut t)) => { - dirty_csum = true; - t.set_destination(*dst) - } - _ => {} - } - } - if let Some(flags) = &m.tcp_flags { - match hm.inner_ulp { - Some(Ulp::Tcp(ref mut t)) => { - dirty_csum = true; - t.set_flags(TcpFlags::from_bits_retain( - *flags, - )) - } - _ => {} - } - } - if let Some(new_id) = &m.icmp_id { - match hm.inner_ulp { - Some(Ulp::IcmpV4(ref mut pkt)) - if pkt.ty() == 0 || pkt.ty() == 3 => - { - dirty_csum = true; - pkt.rest_of_hdr_mut()[..2] - .copy_from_slice(&new_id.to_be_bytes()) - } - Some(Ulp::IcmpV6(ref mut pkt)) - if pkt.ty() == 128 || pkt.ty() == 129 => - { - dirty_csum = true; - pkt.rest_of_hdr_mut()[..2] - .copy_from_slice(&new_id.to_be_bytes()) - } - _ => {} - } - } - } - } - - if dirty_csum { - // TODO: do something. - } - - match (pop_eth, pop_ip, pop_encap) { - (true, true, true) => Ok(ThinProcRes::PopEncap), - (false, false, false) => Ok(ThinProcRes::Na), - _ => Err(ProcessError::FlowTableFull { - kind: "()", - limit: 0, - }), - } - } - } - } - /// Remove the rule identified by the `dir`, `layer_name`, `id` /// combination, if such a rule exists. /// diff --git a/xde/src/xde.rs b/xde/src/xde.rs index 44c8bd0d..c283de68 100644 --- a/xde/src/xde.rs +++ b/xde/src/xde.rs @@ -1611,128 +1611,6 @@ unsafe fn xde_mc_tx_one(src_dev: &XdeDev, mut pkt: MsgBlk) -> *mut mblk_t { let port = &src_dev.port; - // BEGIN THIN_PROCESS RE-EXPERIMENT - // let mut ip6_src = Default::default(); - // let mut ip6_dst = Default::default(); - // let f_hash; - // if let Ok(decision) = port.thin_process(Direction::Out, &mut parsed_pkt) { - // match decision { - // opte::engine::port::ThinProcRes::PushEncap(eth, ip, udp) => { - // f_hash = parsed_pkt.l4_hash(); - // drop(parsed_pkt); - - // // TODO: generate methods to fill a maybeuninit. - // // total bytes: ETH 14, V6 40, UDP 8, GENEVE 8 - // let new_hdrs = 14 + 40 + 8 + 8; - // let mut new_blk = MsgBlk::new_with_headroom(2, new_hdrs); - - // let w_encap_bytes = (pkt_len_old + 16) as u16; - - // new_blk.write(14, |uninit| { - // let complete_eth = opte::ingot::ethernet::Ethernet { - // destination: eth.dst.bytes().into(), - // source: eth.src.bytes().into(), - // ethertype: ingot::ethernet::Ethertype( - // eth.ether_type.into(), - // ), - // }; - - // complete_eth - // .emit_uninit(uninit) - // .expect("must be enough room..."); - // }); - - // // we know we'er only pushing v6. - // let IpPush::Ip6(v6) = ip else { panic!() }; - // ip6_src = v6.src; - // ip6_dst = v6.dst; - - // new_blk.write(40, |uninit| { - // let complete_v6 = opte::ingot::ip::Ipv6 { - // version: 6, - // dscp: 0, - // ecn: ingot::ip::Ecn::NotCapable, - // flow_label: 12345678, - // payload_len: w_encap_bytes, - // next_header: ingot::ip::IpProtocol(v6.proto.into()), - // hop_limit: 128, - // source: v6.src.bytes().into(), - // destination: v6.dst.bytes().into(), - // v6ext: vec![].into(), - // }; - - // complete_v6 - // .emit_uninit(uninit) - // .expect("must be enough room..."); - // }); - - // let EncapPush::Geneve(gen) = udp else { panic!() }; - // new_blk.write(16, |uninit| { - // let complete_udp = opte::ingot::udp::Udp { - // source: gen.entropy, - // destination: 6081, - // length: w_encap_bytes, - // checksum: 0, - // }; - // let complete_geneve = opte::ingot::geneve::Geneve { - // version: 0, - // opt_len: 0, - // flags: opte::ingot::geneve::GeneveFlags::empty(), - // protocol_type: - // opte::ingot::ethernet::Ethertype::ETHERNET, - // vni: gen.vni.into(), - // reserved: 0, - // options: Vec::new(), - // }; - - // let len = complete_udp - // .emit_uninit(uninit) - // .expect("must be enough room..."); - // complete_geneve - // .emit_uninit(&mut uninit[len..]) - // .expect("must be enough room..."); - // }); - - // core::mem::swap(&mut new_blk, &mut pkt); - // pkt.extend_if_one(new_blk); - // } - // // we're in Tx for a ULP'd pkt -- this should NEVER happen. - // opte::engine::port::ThinProcRes::PopEncap => unreachable!(), - // opte::engine::port::ThinProcRes::Na => unreachable!(), - // } - - // if ip6_dst == ip6_src { - // // todo. broken just now ig - // // return guest_loopback(src_dev, pkt, vni); - // opte::engine::err!("eh?"); - // return ptr::null_mut(); - // } - - // let my_key = RouteKey { dst: ip6_dst, l4_hash: Some(f_hash) }; - // let Route { src, dst, underlay_dev } = - // src_dev.routes.next_hop(my_key, src_dev); - - // // Get a pointer to the beginning of the outer frame and - // // fill in the dst/src addresses before sending out the - // // device. - // let mblk = pkt.unwrap_mblk(); - // let rptr = (*mblk).b_rptr; - // ptr::copy(dst.as_ptr(), rptr, 6); - // ptr::copy(src.as_ptr(), rptr.add(6), 6); - // // Unwrap: We know the packet is good because we just - // // unwrapped it above. - // let new_pkt = MsgBlk::wrap_mblk(mblk).unwrap(); - - // underlay_dev.stream.tx_drop_on_no_desc2( - // new_pkt, - // hint, - // MacTxFlags::empty(), - // ); - - // return ptr::null_mut(); - // } - // END THIN_PROCESS RE-EXPERIMENT - // The port processing code will fire a probe that describes what // action was taken -- there should be no need to add probes or // prints here. @@ -2052,41 +1930,6 @@ unsafe fn xde_rx_one( let port = &dev.port; - // BEGIN THIN_PROCESS EXPERIMENT - // let h = parsed_pkt.meta(); - - // let pop_len: usize = 70; //h.outer_ether().packet_length() + h.outer_l3 + h.outer_encap; - - // if let Ok(decision) = port.thin_process(Direction::In, &mut parsed_pkt) { - // match decision { - // opte::engine::port::ThinProcRes::PopEncap => { - // let mut to_pop = pop_len; - // drop(parsed_pkt); - // for layer in pkt.iter_mut() { - // let max_drop = layer.len(); - // let will_drop = max_drop.min(to_pop); - // layer.drop_front_bytes(will_drop); - // to_pop -= will_drop; - - // if to_pop == 0 { - // break; - // } - // } - - // // could theoretically have empty segments here. - // // not an issue over NIC for now. - // mac::mac_rx(dev.mh, mrh, pkt.unwrap_mblk()); - // } - // // we know this to be true given how we cfg opte - // opte::engine::port::ThinProcRes::PushEncap(_, _, _) => { - // unreachable!() - // } - // opte::engine::port::ThinProcRes::Na => unreachable!(), - // } - // return; - // } - // END THIN_PROCESS EXPERIMENT - let res = port.process(Direction::In, parsed_pkt); match res { From 70dab2a1c0003a4e47a47d9669c50dcf58de1ffd Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Fri, 27 Sep 2024 20:01:07 +0100 Subject: [PATCH 038/115] Tricksy ~~hobbits~~ SDT probes. --- lib/opte/src/engine/port.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/opte/src/engine/port.rs b/lib/opte/src/engine/port.rs index e5838069..3d96bb9e 100644 --- a/lib/opte/src/engine/port.rs +++ b/lib/opte/src/engine/port.rs @@ -1191,13 +1191,13 @@ impl Port { /// # States /// /// This command is valid only for [`PortState::Running`]. - // #[inline] pub fn process<'a, M>( &self, dir: Direction, // TODO: might want to pass in a &mut to an enum // which can advance to (and hold) light->full-fat metadata. - // Then we can have our cake and eat it too. + // My gutfeel is that there's a perf cost here -- this struct + // is pretty fat, but expressing the transform on a &mut also sucks. mut pkt: Packet2, M>>, ) -> result::Result where @@ -1905,6 +1905,7 @@ impl Port { } } + #[inline] fn port_process_return_probe( &self, dir: Direction, From 80d8a72025629406511263610791178c0581f667 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Tue, 8 Oct 2024 15:38:02 +0100 Subject: [PATCH 039/115] With new error handling! --- Cargo.lock | 35 ++++++++++------------------- Cargo.toml | 4 ++-- lib/opte/src/engine/ingot_base.rs | 27 +--------------------- lib/opte/src/engine/ingot_packet.rs | 15 +++++-------- lib/opte/src/engine/packet.rs | 29 +++++++++++++++++++----- 5 files changed, 45 insertions(+), 65 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index da89f551..e4270361 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -882,24 +882,23 @@ dependencies = [ [[package]] name = "ingot" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=facb123e8b5c0dc08fd74c1c8f4945b82be2c91c#facb123e8b5c0dc08fd74c1c8f4945b82be2c91c" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=22ab5b141d0e481bb5b95765160c74da68275e5a#22ab5b141d0e481bb5b95765160c74da68275e5a" dependencies = [ "bitflags 2.6.0", "ingot-macros", "ingot-types", "macaddr", "serde", - "zerocopy 0.8.0-alpha.25", + "zerocopy 0.8.3", ] [[package]] name = "ingot-macros" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=facb123e8b5c0dc08fd74c1c8f4945b82be2c91c#facb123e8b5c0dc08fd74c1c8f4945b82be2c91c" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=22ab5b141d0e481bb5b95765160c74da68275e5a#22ab5b141d0e481bb5b95765160c74da68275e5a" dependencies = [ "darling", "itertools 0.13.0", - "prettyplease", "proc-macro2", "quote", "regex", @@ -909,12 +908,12 @@ dependencies = [ [[package]] name = "ingot-types" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=facb123e8b5c0dc08fd74c1c8f4945b82be2c91c#facb123e8b5c0dc08fd74c1c8f4945b82be2c91c" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=22ab5b141d0e481bb5b95765160c74da68275e5a#22ab5b141d0e481bb5b95765160c74da68275e5a" dependencies = [ "heapless", "ingot-macros", "macaddr", - "zerocopy 0.8.0-alpha.25", + "zerocopy 0.8.3", ] [[package]] @@ -1273,7 +1272,7 @@ dependencies = [ "tabwriter", "usdt", "version_check", - "zerocopy 0.8.0-alpha.25", + "zerocopy 0.8.3", ] [[package]] @@ -1375,7 +1374,7 @@ dependencies = [ "smoltcp", "tabwriter", "usdt", - "zerocopy 0.8.0-alpha.25", + "zerocopy 0.8.3", ] [[package]] @@ -1558,16 +1557,6 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbc83ee4a840062f368f9096d80077a9841ec117e17e7f700df81958f1451254" -[[package]] -name = "prettyplease" -version = "0.2.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f12335488a2f3b0a83b14edad48dca9879ce89b2edd10e80237e4e852dd645e" -dependencies = [ - "proc-macro2", - "syn 2.0.75", -] - [[package]] name = "proc-macro-crate" version = "1.3.1" @@ -2747,11 +2736,11 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.0-alpha.25" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ade16fe7106200b0c121a3251c106c40ffcdecdab68122de5909643d22db075e" +checksum = "199837a02c176ffe66ac6e3f6195ff49ed0ae9c0fc9c905970f924909812aba6" dependencies = [ - "zerocopy-derive 0.8.0-alpha.25", + "zerocopy-derive 0.8.3", ] [[package]] @@ -2767,9 +2756,9 @@ dependencies = [ [[package]] name = "zerocopy-derive" -version = "0.8.0-alpha.25" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7873cce5410d6ff897beb4b4847366c1013fcda5ec96387a74fa4e0d2580025b" +checksum = "8c76c8bc3d9d3594dabe11d4ffab6cd71cc2c3ce38526c6de5a0d81dd0039627" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 2936ed05..4cafe031 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,7 +50,7 @@ ctor = "0.2" darling = "0.20" dyn-clone = "1.0" heapless = "0.8" -ingot = { git = "https://github.com/oxidecomputer/ingot.git", rev = "facb123e8b5c0dc08fd74c1c8f4945b82be2c91c"} +ingot = { git = "https://github.com/oxidecomputer/ingot.git", rev = "22ab5b141d0e481bb5b95765160c74da68275e5a"} ipnetwork = { version = "0.20", default-features = false } itertools = { version = "0.13", default-features = false } libc = "0.2" @@ -75,7 +75,7 @@ thiserror = "1.0" toml = "0.8" usdt = "0.5" version_check = "0.9" -zerocopy = { version = "0.8.0-alpha.25", features = ["derive"] } +zerocopy = { version = "0.8", features = ["derive"] } zone = { git = "https://github.com/oxidecomputer/zone" } ztest = { git = "https://github.com/oxidecomputer/falcon", branch = "main" } poptrie = { git = "https://github.com/oxidecomputer/poptrie", branch = "multipath" } diff --git a/lib/opte/src/engine/ingot_base.rs b/lib/opte/src/engine/ingot_base.rs index d70cb0ca..373db204 100644 --- a/lib/opte/src/engine/ingot_base.rs +++ b/lib/opte/src/engine/ingot_base.rs @@ -12,11 +12,11 @@ use ingot::ip::LowRentV6EhRepr; use ingot::tcp::Tcp; use ingot::tcp::ValidTcp; use ingot::types::primitives::*; +use ingot::types::util::Repeated; use ingot::types::ByteSlice; use ingot::types::NetworkRepr; use ingot::types::Packet; use ingot::types::ParseError; -use ingot::types::Repeated; use ingot::types::Vec; use ingot::udp::Udp; use ingot::udp::ValidUdp; @@ -117,28 +117,3 @@ pub struct Ipv6 { #[ingot(subparse(on_next_layer))] pub v6ext: Repeated, } - -// Why TF do I need to redefine these? Check... -// impl From for Packet { -// fn from(value: Ipv4) -> Self { -// Packet::Repr(value) -// } -// } - -// impl From> for Packet> { -// fn from(value: ValidIpv4) -> Self { -// Packet::Raw(value) -// } -// } - -// impl From for Packet { -// fn from(value: Ipv6) -> Self { -// Packet::Repr(value) -// } -// } - -// impl From> for Packet> { -// fn from(value: ValidIpv6) -> Self { -// Packet::Raw(value) -// } -// } diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index 49ad6c89..4959bc45 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -94,10 +94,10 @@ use ingot::tcp::TcpMut; use ingot::tcp::TcpPacket; use ingot::tcp::TcpRef; use ingot::types::primitives::*; +use ingot::types::util::Repeated; use ingot::types::DirectPacket; use ingot::types::Emit; use ingot::types::Header; -use ingot::types::HeaderStack; use ingot::types::IndirectPacket; use ingot::types::NextLayer; use ingot::types::Packet as IngotPacket; @@ -106,7 +106,6 @@ use ingot::types::ParseError as IngotParseErr; use ingot::types::ParseResult; use ingot::types::Parsed as IngotParsed; use ingot::types::Read; -use ingot::types::Repeated; use ingot::udp::Udp; use ingot::udp::UdpMut; use ingot::udp::UdpPacket; @@ -850,10 +849,9 @@ impl OpteMeta { pub fn convert_ingot, Q: Read>( value: IngotParsed, ) -> OpteParsed { - let IngotParsed { stack: HeaderStack(headers), data, last_chunk } = - value; + let IngotParsed { stack: headers, data, last_chunk } = value; - IngotParsed { stack: HeaderStack(headers.into()), data, last_chunk } + IngotParsed { stack: headers.into(), data, last_chunk } } } @@ -1566,8 +1564,7 @@ where #[inline] pub fn to_full_meta(self) -> Packet2> { let Packet2 { state: ParsedStage1 { len, meta } } = self; - let IngotParsed { stack: HeaderStack(headers), data, last_chunk } = - meta; + let IngotParsed { stack: headers, data, last_chunk } = meta; // TODO: we can probably not do this in some cases, but we // don't have a way for headeractions to signal that they @@ -1608,12 +1605,12 @@ where #[inline] pub fn meta(&self) -> &M { - &self.state.meta.stack.0 + &self.state.meta.stack } #[inline] pub fn meta_mut(&mut self) -> &mut M { - &mut self.state.meta.stack.0 + &mut self.state.meta.stack } #[inline] diff --git a/lib/opte/src/engine/packet.rs b/lib/opte/src/engine/packet.rs index 672e63ea..de40a192 100644 --- a/lib/opte/src/engine/packet.rs +++ b/lib/opte/src/engine/packet.rs @@ -2542,12 +2542,31 @@ impl From for PacketError { } } +impl DError for ingot::types::ParseError { + fn discriminant(&self) -> &'static core::ffi::CStr { + self.as_cstr() + } + + fn child(&self) -> Option<&dyn DError> { + None + } +} + +impl DError for ingot::types::PacketParseError { + fn discriminant(&self) -> &'static core::ffi::CStr { + self.header().as_cstr() + } + + fn child(&self) -> Option<&dyn DError> { + Some(self.error()) + } +} + #[derive(Clone, Debug, Eq, PartialEq, DError)] #[derror(leaf_data = ParseError::data)] pub enum ParseError { - // TODO: make this far richer... - #[leaf] - IngotError(ingot::types::ParseError), + // TODO: I think this may be the only err variant? + IngotError(ingot::types::PacketParseError), BadHeader(HeaderReadErr), BadInnerIpLen { expected: usize, @@ -2607,8 +2626,8 @@ impl ParseError { } } -impl From for ParseError { - fn from(value: ingot::types::ParseError) -> Self { +impl From for ParseError { + fn from(value: ingot::types::PacketParseError) -> Self { Self::IngotError(value) } } From 110d22ed943283073ec2df3890e2f37adf95bf75 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 9 Oct 2024 20:13:15 +0100 Subject: [PATCH 040/115] As I rewrite a test suite... could not compile `oxide-vpc` (test "integration_tests") due to 203 previous errors; 1 warning emitted It'll be a long few days. --- Cargo.lock | 6 +- Cargo.toml | 2 +- lib/opte-test-utils/src/dhcp.rs | 142 +++--- lib/opte-test-utils/src/icmp.rs | 352 ++++++------- lib/opte-test-utils/src/lib.rs | 732 +++++++++++++--------------- lib/opte/src/engine/dhcp.rs | 7 +- lib/opte/src/engine/ingot_base.rs | 16 + lib/opte/src/engine/ingot_packet.rs | 179 +++++-- lib/opte/src/engine/rule.rs | 11 +- 9 files changed, 709 insertions(+), 738 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e4270361..6e717e80 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -882,7 +882,7 @@ dependencies = [ [[package]] name = "ingot" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=22ab5b141d0e481bb5b95765160c74da68275e5a#22ab5b141d0e481bb5b95765160c74da68275e5a" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=0cfd03c481650bb859255d4971b5bdc2fe671ca1#0cfd03c481650bb859255d4971b5bdc2fe671ca1" dependencies = [ "bitflags 2.6.0", "ingot-macros", @@ -895,7 +895,7 @@ dependencies = [ [[package]] name = "ingot-macros" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=22ab5b141d0e481bb5b95765160c74da68275e5a#22ab5b141d0e481bb5b95765160c74da68275e5a" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=0cfd03c481650bb859255d4971b5bdc2fe671ca1#0cfd03c481650bb859255d4971b5bdc2fe671ca1" dependencies = [ "darling", "itertools 0.13.0", @@ -908,7 +908,7 @@ dependencies = [ [[package]] name = "ingot-types" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=22ab5b141d0e481bb5b95765160c74da68275e5a#22ab5b141d0e481bb5b95765160c74da68275e5a" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=0cfd03c481650bb859255d4971b5bdc2fe671ca1#0cfd03c481650bb859255d4971b5bdc2fe671ca1" dependencies = [ "heapless", "ingot-macros", diff --git a/Cargo.toml b/Cargo.toml index 4cafe031..342fc781 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,7 +50,7 @@ ctor = "0.2" darling = "0.20" dyn-clone = "1.0" heapless = "0.8" -ingot = { git = "https://github.com/oxidecomputer/ingot.git", rev = "22ab5b141d0e481bb5b95765160c74da68275e5a"} +ingot = { git = "https://github.com/oxidecomputer/ingot.git", rev = "0cfd03c481650bb859255d4971b5bdc2fe671ca1"} ipnetwork = { version = "0.20", default-features = false } itertools = { version = "0.13", default-features = false } libc = "0.2" diff --git a/lib/opte-test-utils/src/dhcp.rs b/lib/opte-test-utils/src/dhcp.rs index 02a2fc02..520f70c5 100644 --- a/lib/opte-test-utils/src/dhcp.rs +++ b/lib/opte-test-utils/src/dhcp.rs @@ -8,121 +8,107 @@ use super::*; use dhcpv6::protocol::MessageType; +use opte::engine::dhcp::DHCP_CLIENT_PORT; +use opte::engine::dhcp::DHCP_SERVER_PORT; use opte::engine::dhcpv6; +use opte::engine::ingot_base::Ethernet; +use opte::engine::ingot_base::Ipv4; +use opte::engine::ingot_base::Ipv6; +use opte::engine::ingot_packet::MsgBlk; +use opte::ingot::ethernet::Ethertype; +use opte::ingot::ip::IpProtocol; +use opte::ingot::types::Header; +use opte::ingot::udp::Udp; pub use smoltcp::wire::DhcpMessageType; pub use smoltcp::wire::DhcpPacket; pub use smoltcp::wire::DhcpRepr; // Build a packet from a DHCPv4 message, from a client to server. -pub fn packet_from_client_dhcpv4_message_unparsed( +pub fn packet_from_client_dhcpv4_message( cfg: &VpcCfg, msg: &DhcpRepr, -) -> Packet { - let eth = EtherMeta { - dst: MacAddr::BROADCAST, - src: cfg.guest_mac, - ether_type: EtherType::Ipv4, +) -> MsgBlk { + let eth = Ethernet { + destination: MacAddr::BROADCAST, + source: cfg.guest_mac, + ethertype: Ethertype::IPV4, }; - let ip = Ipv4Meta { - src: Ipv4Addr::ANY_ADDR, - dst: Ipv4Addr::LOCAL_BCAST, - proto: Protocol::UDP, - total_len: (msg.buffer_len() + UdpHdr::SIZE + Ipv4Hdr::BASE_SIZE) - as u16, - + let ip = Ipv4 { + source: Ipv4Addr::ANY_ADDR, + destination: Ipv4Addr::LOCAL_BCAST, + protocol: IpProtocol::UDP, + total_len: (msg.buffer_len() + + Udp::MINIMUM_LENGTH + + Ipv4::MINIMUM_LENGTH) as u16, ..Default::default() }; - let udp = UdpMeta { - src: 68, - dst: 67, - len: (UdpHdr::SIZE + msg.buffer_len()) as u16, + let udp = Udp { + source: DHCP_CLIENT_PORT, + destination: DHCP_SERVER_PORT, + length: (Udp::MINIMUM_LENGTH + msg.buffer_len()) as u16, ..Default::default() }; - let reply_len = - msg.buffer_len() + UdpHdr::SIZE + Ipv6Hdr::BASE_SIZE + EtherHdr::SIZE; - let mut pkt = Packet::alloc_and_expand(reply_len); - let mut wtr = pkt.seg0_wtr(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip.emit(wtr.slice_mut(ip.hdr_len()).unwrap()); - udp.emit(wtr.slice_mut(udp.hdr_len()).unwrap()); + let headers = (eth, ip, udp); + let total_len = msg.buffer_len() + headers.packet_length(); - let mut msg_buf = vec![0; msg.buffer_len()]; - let mut dhcp_pkt = DhcpPacket::new_checked(&mut msg_buf).unwrap(); + let mut pkt = MsgBlk::new_ethernet(total_len); + pkt.emit_back(&headers).unwrap(); + let dhcp_off = pkt.len(); + pkt.resize(total_len); + let mut dhcp_pkt = DhcpPacket::new_checked(&mut pkt[dhcp_off..]).unwrap(); msg.emit(&mut dhcp_pkt).unwrap(); - wtr.write(&msg_buf).unwrap(); + pkt } // Build a packet from a DHCPv6 message, from a client to server. -pub fn packet_from_client_dhcpv6_message_unparsed( +pub fn packet_from_client_dhcpv6_message( cfg: &VpcCfg, msg: &dhcpv6::protocol::Message<'_>, -) -> Packet { - let eth = EtherMeta { - dst: dhcpv6::ALL_RELAYS_AND_SERVERS.multicast_mac().unwrap(), - src: cfg.guest_mac, - ether_type: EtherType::Ipv6, +) -> MsgBlk { + let eth = Ethernet { + destination: dhcpv6::ALL_RELAYS_AND_SERVERS.multicast_mac().unwrap(), + source: cfg.guest_mac, + ethertype: Ethertype::IPV6, }; - let ip = Ipv6Meta { - src: Ipv6Addr::from_eui64(&cfg.guest_mac), - dst: dhcpv6::ALL_RELAYS_AND_SERVERS, - proto: Protocol::UDP, - next_hdr: IpProtocol::Udp, - pay_len: (msg.buffer_len() + UdpHdr::SIZE) as u16, + let ip = Ipv6 { + source: Ipv6Addr::from_eui64(&cfg.guest_mac), + destination: dhcpv6::ALL_RELAYS_AND_SERVERS, + next_header: IpProtocol::UDP, + payload_len: (msg.buffer_len() + Udp::MINIMUM_LENGTH) as u16, ..Default::default() }; - let udp = UdpMeta { - src: dhcpv6::CLIENT_PORT, - dst: dhcpv6::SERVER_PORT, - len: (UdpHdr::SIZE + msg.buffer_len()) as u16, + let udp = Udp { + source: dhcpv6::CLIENT_PORT, + destination: dhcpv6::SERVER_PORT, + length: (UdpHdr::SIZE + msg.buffer_len()) as u16, ..Default::default() }; - write_dhcpv6_packet_unparsed(eth, ip, udp, msg) + write_dhcpv6_packet(eth, ip, udp, msg) } -pub fn packet_from_client_dhcpv6_message( - cfg: &VpcCfg, +pub fn write_dhcpv6_packet( + eth: Ethernet, + ip: Ipv6, + udp: Udp, msg: &dhcpv6::protocol::Message<'_>, -) -> Packet { - packet_from_client_dhcpv6_message_unparsed(cfg, msg) - .parse(Out, GenericUlp {}) - .unwrap() -} +) -> MsgBlk { + let headers = (eth, ip, udp); + let total_len = msg.buffer_len() + headers.packet_length(); -pub fn write_dhcpv6_packet_unparsed( - eth: EtherMeta, - ip: Ipv6Meta, - udp: UdpMeta, - msg: &dhcpv6::protocol::Message<'_>, -) -> Packet { - let reply_len = - msg.buffer_len() + UdpHdr::SIZE + Ipv6Hdr::BASE_SIZE + EtherHdr::SIZE; - let mut pkt = Packet::alloc_and_expand(reply_len); - let mut wtr = pkt.seg0_wtr(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip.emit(wtr.slice_mut(ip.hdr_len()).unwrap()); - udp.emit(wtr.slice_mut(udp.hdr_len()).unwrap()); - let mut msg_buf = vec![0; msg.buffer_len()]; - msg.copy_into(&mut msg_buf).unwrap(); - wtr.write(&msg_buf).unwrap(); - pkt -} + let mut pkt = MsgBlk::new_ethernet(total_len); + pkt.emit_back(&headers).unwrap(); + let dhcp_off = pkt.len(); + pkt.resize(total_len); + msg.copy_into(&mut pkt[dhcp_off..]).unwrap(); -pub fn write_dhcpv6_packet( - eth: EtherMeta, - ip: Ipv6Meta, - udp: UdpMeta, - msg: &dhcpv6::protocol::Message<'_>, -) -> Packet { - write_dhcpv6_packet_unparsed(eth, ip, udp, msg) - .parse(Out, GenericUlp {}) - .unwrap() + pkt } pub fn dhcpv6_with_reasonable_defaults( diff --git a/lib/opte-test-utils/src/icmp.rs b/lib/opte-test-utils/src/icmp.rs index 2af1c6c1..e9ba46a7 100644 --- a/lib/opte-test-utils/src/icmp.rs +++ b/lib/opte-test-utils/src/icmp.rs @@ -8,10 +8,17 @@ use opte::api::*; use opte::engine::ether::*; +use opte::engine::ingot_base::Ethernet; +use opte::engine::ingot_base::Ipv4; +use opte::engine::ingot_base::Ipv6; +use opte::engine::ingot_packet::MsgBlk; use opte::engine::ip4::*; use opte::engine::ip6::*; use opte::engine::packet::*; use opte::engine::Direction::*; +use opte::ingot::ethernet::Ethertype; +use opte::ingot::ip::IpProtocol as IngotIpProto; +use opte::ingot::types::Header; use oxide_vpc::engine::VpcParser; use smoltcp::phy::ChecksumCapabilities as CsumCapab; use smoltcp::wire::Icmpv4Packet; @@ -40,7 +47,7 @@ pub fn gen_icmp_echo_req( seq_no: u16, data: &[u8], segments: usize, -) -> Packet { +) -> MsgBlk { match (ip_src, ip_dst) { (IpAddr::Ip4(src), IpAddr::Ip4(dst)) => gen_icmpv4_echo_req( eth_src, eth_dst, src, dst, ident, seq_no, data, segments, @@ -62,7 +69,7 @@ pub fn gen_icmpv4_echo_req( seq_no: u16, data: &[u8], segments: usize, -) -> Packet { +) -> MsgBlk { let etype = IcmpEchoType::Req; gen_icmp_echo( etype, eth_src, eth_dst, ip_src, ip_dst, ident, seq_no, data, segments, @@ -79,7 +86,7 @@ pub fn gen_icmp_echo_reply( seq_no: u16, data: &[u8], segments: usize, -) -> Packet { +) -> MsgBlk { match (ip_src, ip_dst) { (IpAddr::Ip4(src), IpAddr::Ip4(dst)) => gen_icmpv4_echo_reply( eth_src, eth_dst, src, dst, ident, seq_no, data, segments, @@ -101,7 +108,7 @@ pub fn gen_icmpv4_echo_reply( seq_no: u16, data: &[u8], segments: usize, -) -> Packet { +) -> MsgBlk { let etype = IcmpEchoType::Reply; gen_icmp_echo( etype, eth_src, eth_dst, ip_src, ip_dst, ident, seq_no, data, segments, @@ -118,8 +125,8 @@ pub fn gen_icmp_echo( ident: u16, seq_no: u16, data: &[u8], - segments: usize, -) -> Packet { + n_segments: usize, +) -> MsgBlk { let icmp = match etype { IcmpEchoType::Req => Icmpv4Repr::EchoRequest { ident, seq_no, data }, IcmpEchoType::Reply => Icmpv4Repr::EchoReply { ident, seq_no, data }, @@ -128,123 +135,72 @@ pub fn gen_icmp_echo( let mut icmp_pkt = Icmpv4Packet::new_unchecked(&mut icmp_bytes); icmp.emit(&mut icmp_pkt, &Default::default()); - let mut ip4 = Ipv4Meta { - src: ip_src, - dst: ip_dst, - proto: Protocol::ICMP, - total_len: (Ipv4Hdr::BASE_SIZE + icmp.buffer_len()) as u16, - ..Default::default() - }; - ip4.compute_hdr_csum(); - let eth = - &EtherMeta { dst: eth_dst, src: eth_src, ether_type: EtherType::Ipv4 }; - - let total_len = EtherHdr::SIZE + ip4.hdr_len() + icmp.buffer_len(); - - match segments { - 1 => { - let mut pkt = Packet::alloc_and_expand(total_len); - let mut wtr = pkt.seg0_wtr(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip4.emit(wtr.slice_mut(ip4.hdr_len()).unwrap()); - wtr.write(&icmp_bytes).unwrap(); - pkt.parse(Out, VpcParser::new()).unwrap() - } - 2 => { - let mut pkt = Packet::alloc_and_expand(EtherHdr::SIZE); - let mut wtr = pkt.seg_wtr(0); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - let mut wtr = - pkt.add_seg(ip4.hdr_len() + icmp_bytes.len()).unwrap(); - ip4.emit(wtr.slice_mut(ip4.hdr_len()).unwrap()); - wtr.write(&icmp_bytes).unwrap(); - pkt.parse(Out, VpcParser::new()).unwrap() - } - 3 => { - let mut pkt = Packet::alloc_and_expand(EtherHdr::SIZE); - let mut wtr = pkt.seg_wtr(0); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - let mut wtr = pkt.add_seg(ip4.hdr_len()).unwrap(); - ip4.emit(wtr.slice_mut(ip4.hdr_len()).unwrap()); - let mut wtr = pkt.add_seg(icmp_bytes.len()).unwrap(); - wtr.write(&icmp_bytes).unwrap(); - pkt.parse(Out, VpcParser::new()).unwrap() - } - _ => { - panic!("only 1 2 or 3 segments allowed") - } - } -} - -#[allow(clippy::too_many_arguments)] -pub fn gen_icmp_echo_unparsed( - etype: IcmpEchoType, - eth_src: MacAddr, - eth_dst: MacAddr, - ip_src: Ipv4Addr, - ip_dst: Ipv4Addr, - ident: u16, - seq_no: u16, - data: &[u8], - segments: usize, -) -> Packet { - let icmp = match etype { - IcmpEchoType::Req => Icmpv4Repr::EchoRequest { ident, seq_no, data }, - IcmpEchoType::Reply => Icmpv4Repr::EchoReply { ident, seq_no, data }, + let eth = Ethernet { + destination: eth_dst, + source: eth_src, + ethertype: Ethertype::IPV4, }; - let mut icmp_bytes = vec![0u8; icmp.buffer_len()]; - let mut icmp_pkt = Icmpv4Packet::new_unchecked(&mut icmp_bytes); - icmp.emit(&mut icmp_pkt, &Default::default()); - let mut ip4 = Ipv4Meta { - src: ip_src, - dst: ip_dst, - proto: Protocol::ICMP, - total_len: (Ipv4Hdr::BASE_SIZE + icmp.buffer_len()) as u16, + let mut ip = Ipv4 { + source: ip_src, + destination: ip_dst, + protocol: IngotIpProto::ICMP, + total_len: (icmp.buffer_len() + Ipv4::MINIMUM_LENGTH) as u16, ..Default::default() }; - ip4.compute_hdr_csum(); - let eth = - &EtherMeta { dst: eth_dst, src: eth_src, ether_type: EtherType::Ipv4 }; + ip.fill_checksum(); - let total_len = EtherHdr::SIZE + ip4.hdr_len() + icmp.buffer_len(); + let total_len = + eth.packet_length() + ip.packet_length() + icmp.buffer_len(); + let mut segments = vec![]; - match segments { + match n_segments { 1 => { - let mut pkt = Packet::alloc_and_expand(total_len); - let mut wtr = pkt.seg0_wtr(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip4.emit(wtr.slice_mut(ip4.hdr_len()).unwrap()); - wtr.write(&icmp_bytes).unwrap(); + let mut pkt = MsgBlk::new_ethernet(total_len); + pkt.emit_back(&(eth, ip)); + pkt.resize(total_len); + pkt.write_bytes_back(&icmp_bytes).unwrap(); - pkt + return pkt; } 2 => { - let mut pkt = Packet::alloc_and_expand(EtherHdr::SIZE); - let mut wtr = pkt.seg_wtr(0); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - let mut wtr = - pkt.add_seg(ip4.hdr_len() + icmp_bytes.len()).unwrap(); - ip4.emit(wtr.slice_mut(ip4.hdr_len()).unwrap()); - wtr.write(&icmp_bytes).unwrap(); - - pkt + let mut pkt = MsgBlk::new_ethernet(eth.packet_length()); + pkt.emit_back(eth).unwrap(); + segments.push(pkt); + + let t_len = ip.packet_length() + icmp.buffer_len(); + let mut pkt = MsgBlk::new(t_len); + pkt.emit_back(ip).unwrap(); + pkt.resize(t_len).unwrap(); + pkt.write_bytes_back(&icmp_bytes).unwrap(); + segments.push(pkt); } 3 => { - let mut pkt = Packet::alloc_and_expand(EtherHdr::SIZE); - let mut wtr = pkt.seg_wtr(0); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - let mut wtr = pkt.add_seg(ip4.hdr_len()).unwrap(); - ip4.emit(wtr.slice_mut(ip4.hdr_len()).unwrap()); - let mut wtr = pkt.add_seg(icmp_bytes.len()).unwrap(); - wtr.write(&icmp_bytes).unwrap(); - - pkt + let mut pkt = MsgBlk::new_ethernet(eth.packet_length()); + pkt.emit_back(eth).unwrap(); + segments.push(pkt); + + let mut pkt = MsgBlk::new(ip.packet_length()); + pkt.emit_back(eth).unwrap(); + segments.push(pkt); + + let mut pkt = MsgBlk::new(icmp.buffer_len()); + pkt.write_bytes_back(&icmp_bytes).unwrap(); + segments.push(pkt); } _ => { panic!("only 1 2 or 3 segments allowed") } } + + while segments.len() > 1 { + let chain = segments.pop().unwrap(); + let mut new_el = segments.last_mut().unwrap(); + + new_el.extend_if_one(chain); + } + + segments.pop().unwrap() } #[allow(clippy::too_many_arguments)] @@ -257,7 +213,7 @@ pub fn gen_icmpv6_echo_req( seq_no: u16, data: &[u8], segments: usize, -) -> Packet { +) -> MsgBlk { let etype = IcmpEchoType::Req; gen_icmpv6_echo( etype, eth_src, eth_dst, ip_src, ip_dst, ident, seq_no, data, segments, @@ -274,7 +230,7 @@ pub fn gen_icmpv6_echo_reply( seq_no: u16, data: &[u8], segments: usize, -) -> Packet { +) -> MsgBlk { let etype = IcmpEchoType::Reply; gen_icmpv6_echo( etype, eth_src, eth_dst, ip_src, ip_dst, ident, seq_no, data, segments, @@ -291,27 +247,8 @@ pub fn gen_icmpv6_echo( ident: u16, seq_no: u16, data: &[u8], - segments: usize, -) -> Packet { - gen_icmpv6_echo_unparsed( - etype, eth_src, eth_dst, ip_src, ip_dst, ident, seq_no, data, segments, - ) - .parse(Out, VpcParser::new()) - .unwrap() -} - -#[allow(clippy::too_many_arguments)] -pub fn gen_icmpv6_echo_unparsed( - etype: IcmpEchoType, - eth_src: MacAddr, - eth_dst: MacAddr, - ip_src: Ipv6Addr, - ip_dst: Ipv6Addr, - ident: u16, - seq_no: u16, - data: &[u8], - segments: usize, -) -> Packet { + n_segments: usize, +) -> MsgBlk { let icmp = match etype { IcmpEchoType::Req => Icmpv6Repr::EchoRequest { ident, seq_no, data }, IcmpEchoType::Reply => Icmpv6Repr::EchoReply { ident, seq_no, data }, @@ -325,88 +262,108 @@ pub fn gen_icmpv6_echo_unparsed( &mut req_pkt, &Default::default(), ); - let ip6 = Ipv6Meta { - src: ip_src, - dst: ip_dst, - proto: Protocol::ICMPv6, - next_hdr: IpProtocol::Icmpv6, + + let eth = Ethernet { + destination: eth_dst, + source: eth_src, + ethertype: Ethertype::IPV4, + }; + + let ip = Ipv6 { + source: ip_src, + destination: ip_dst, + next_header: IngotIpProto::ICMP_V6, + payload_len: icmp.buffer_len() as u16, hop_limit: 64, - pay_len: icmp.buffer_len() as u16, ..Default::default() }; - let eth = - &EtherMeta { dst: eth_dst, src: eth_src, ether_type: EtherType::Ipv6 }; - let total_len = EtherHdr::SIZE + ip6.hdr_len() + icmp.buffer_len(); + let total_len = + eth.packet_length() + ip.packet_length() + icmp.buffer_len(); + let mut segments = vec![]; - match segments { + match n_segments { 1 => { - let mut pkt = Packet::alloc_and_expand(total_len); - let mut wtr = pkt.seg0_wtr(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip6.emit(wtr.slice_mut(ip6.hdr_len()).unwrap()); - wtr.write(&body_bytes).unwrap(); - pkt + let mut pkt = MsgBlk::new_ethernet(total_len); + pkt.emit_back(&(eth, ip)); + pkt.resize(total_len); + pkt.write_bytes_back(&body_bytes).unwrap(); + + return pkt; } 2 => { - let mut pkt = Packet::alloc_and_expand(EtherHdr::SIZE); - let mut wtr = pkt.seg_wtr(0); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - let mut wtr = - pkt.add_seg(ip6.hdr_len() + body_bytes.len()).unwrap(); - ip6.emit(wtr.slice_mut(ip6.hdr_len()).unwrap()); - wtr.write(&body_bytes).unwrap(); - pkt + let mut pkt = MsgBlk::new_ethernet(eth.packet_length()); + pkt.emit_back(eth).unwrap(); + segments.push(pkt); + + let t_len = ip.packet_length() + icmp.buffer_len(); + let mut pkt = MsgBlk::new(t_len); + pkt.emit_back(ip).unwrap(); + pkt.resize(t_len).unwrap(); + pkt.write_bytes_back(&body_bytes).unwrap(); + segments.push(pkt); } 3 => { - let mut pkt = Packet::alloc_and_expand(EtherHdr::SIZE); - let mut wtr = pkt.seg_wtr(0); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - let mut wtr = pkt.add_seg(ip6.hdr_len()).unwrap(); - ip6.emit(wtr.slice_mut(ip6.hdr_len()).unwrap()); - let mut wtr = pkt.add_seg(body_bytes.len()).unwrap(); - wtr.write(&body_bytes).unwrap(); - pkt + let mut pkt = MsgBlk::new_ethernet(eth.packet_length()); + pkt.emit_back(eth).unwrap(); + segments.push(pkt); + + let mut pkt = MsgBlk::new(ip.packet_length()); + pkt.emit_back(eth).unwrap(); + segments.push(pkt); + + let mut pkt = MsgBlk::new(icmp.buffer_len()); + pkt.write_bytes_back(&body_bytes).unwrap(); + segments.push(pkt); } _ => { panic!("only 1 2 or 3 segments allowed") } } -} -/// Generate an NDP packet given an inner `repr`. -pub fn generate_ndisc( - repr: NdiscRepr, - src_mac: MacAddr, - dst_mac: MacAddr, - src_ip: Ipv6Addr, - dst_ip: Ipv6Addr, - with_checksum: bool, -) -> Packet { - generate_ndisc_unparsed( - repr, - src_mac, - dst_mac, - src_ip, - dst_ip, - with_checksum, - ) - .parse(Out, VpcParser::new()) - .unwrap() + while segments.len() > 1 { + let chain = segments.pop().unwrap(); + let mut new_el = segments.last_mut().unwrap(); + + new_el.extend_if_one(chain); + } + + segments.pop().unwrap() } /// Generate an NDP packet given an inner `repr`. -pub fn generate_ndisc_unparsed( +pub fn generate_ndisc( repr: NdiscRepr, src_mac: MacAddr, dst_mac: MacAddr, src_ip: Ipv6Addr, dst_ip: Ipv6Addr, with_checksum: bool, -) -> Packet { +) -> MsgBlk { let req = Icmpv6Repr::Ndisc(repr); - let mut body = vec![0u8; req.buffer_len()]; - let mut req_pkt = Icmpv6Packet::new_unchecked(&mut body); + let eth = Ethernet { + destination: dst_mac, + source: src_mac, + ethertype: Ethertype::IPV6, + }; + + let ip = Ipv6 { + source: src_ip, + destination: dst_ip, + next_header: IngotIpProto::ICMP_V6, + payload_len: req.buffer_len() as u16, + hop_limit: 255, + ..Default::default() + }; + + let headers = (eth, ip); + let total_len = req.buffer_len() + headers.packet_length(); + let mut pkt = MsgBlk::new_ethernet(total_len); + pkt.emit_back(&headers).unwrap(); + let ndisc_off = pkt.len(); + pkt.resize(total_len); + + let mut req_pkt = Icmpv6Packet::new_unchecked(&mut pkt[ndisc_off..]); let mut csum = CsumCapab::ignored(); if with_checksum { csum.icmpv6 = smoltcp::phy::Checksum::Tx; @@ -417,24 +374,7 @@ pub fn generate_ndisc_unparsed( &mut req_pkt, &csum, ); - let ip6 = Ipv6Meta { - src: src_ip, - dst: dst_ip, - proto: Protocol::ICMPv6, - next_hdr: IpProtocol::Icmpv6, - hop_limit: 255, - pay_len: req.buffer_len() as u16, - ..Default::default() - }; - let eth = - EtherMeta { dst: dst_mac, src: src_mac, ether_type: EtherType::Ipv6 }; - - let total_len = EtherHdr::SIZE + ip6.hdr_len() + req.buffer_len(); - let mut pkt = Packet::alloc_and_expand(total_len); - let mut wtr = pkt.seg0_wtr(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip6.emit(wtr.slice_mut(ip6.hdr_len()).unwrap()); - wtr.write(&body).unwrap(); + pkt } @@ -443,7 +383,7 @@ pub fn generate_ndisc_unparsed( // The source MAC is used to generate the source IPv6 address, using the EUI-64 // transform. The resulting packet has a multicast MAC address, and the // All-Routers destination IPv6 address. -pub fn gen_router_solicitation(src_mac: &MacAddr) -> Packet { +pub fn gen_router_solicitation(src_mac: &MacAddr) -> MsgBlk { let solicit = NdiscRepr::RouterSolicit { lladdr: Some(RawHardwareAddress::from_bytes(src_mac)), }; @@ -466,7 +406,7 @@ pub fn gen_router_solicitation(src_mac: &MacAddr) -> Packet { pub fn generate_neighbor_solicitation( info: &SolicitInfo, with_checksum: bool, -) -> Packet { +) -> MsgBlk { let solicit = NdiscRepr::NeighborSolicit { target_addr: Ipv6Address::from(info.target_addr), lladdr: info.lladdr.map(|x| RawHardwareAddress::from_bytes(&x)), @@ -513,7 +453,7 @@ impl std::fmt::Display for SolicitInfo { pub fn generate_neighbor_advertisement( info: &AdvertInfo, with_checksum: bool, -) -> Packet { +) -> MsgBlk { let advert = NdiscRepr::NeighborAdvert { flags: info.flags, target_addr: info.target_addr.into(), diff --git a/lib/opte-test-utils/src/lib.rs b/lib/opte-test-utils/src/lib.rs index cabb4fc9..001b3592 100644 --- a/lib/opte-test-utils/src/lib.rs +++ b/lib/opte-test-utils/src/lib.rs @@ -26,10 +26,18 @@ pub use opte::engine::geneve::GeneveMeta; pub use opte::engine::geneve::GeneveOption; pub use opte::engine::geneve::OxideOption; pub use opte::engine::geneve::Vni; +use opte::engine::geneve::GENEVE_OPT_CLASS_OXIDE; +use opte::engine::geneve::GENEVE_PORT; pub use opte::engine::headers::IpAddr; pub use opte::engine::headers::IpCidr; pub use opte::engine::headers::IpMeta; pub use opte::engine::headers::UlpMeta; +use opte::engine::ingot_base::Ethernet; +use opte::engine::ingot_base::Ipv4; +use opte::engine::ingot_base::Ipv6; +use opte::engine::ingot_base::L3Repr; +use opte::engine::ingot_packet::MsgBlk; +use opte::engine::ingot_packet::Packet2; pub use opte::engine::ip4::Ipv4Addr; pub use opte::engine::ip4::Ipv4Hdr; pub use opte::engine::ip4::Ipv4Meta; @@ -55,6 +63,17 @@ pub use opte::engine::tcp::TcpMeta; pub use opte::engine::udp::UdpHdr; pub use opte::engine::udp::UdpMeta; pub use opte::engine::GenericUlp; +use opte::ingot::ethernet::Ethertype; +use opte::ingot::geneve::Geneve; +use opte::ingot::geneve::GeneveOpt; +use opte::ingot::geneve::GeneveOptionType; +use opte::ingot::ip::IpProtocol as IngotIpProto; +use opte::ingot::tcp::Tcp; +use opte::ingot::tcp::TcpFlags as IngotTcpFlags; +use opte::ingot::types::Emit; +use opte::ingot::types::EmitDoesNotRelyOnBufContents; +use opte::ingot::types::Header; +use opte::ingot::udp::Udp; pub use opte::ExecCtx; pub use oxide_vpc::api::AddFwRuleReq; pub use oxide_vpc::api::DhcpCfg; @@ -429,109 +448,63 @@ fn set_default_fw_rules(pav: &mut PortAndVps, cfg: &VpcCfg) { update!(pav, ["set:epoch=3", "set:firewall.rules.in=3"]); } -fn verify_ulp_pkt_offsets( - pkt: &Packet, - ip: IpMeta, - ulp: UlpMeta, - body_len: usize, -) { - let mut pos = 0; - let off = pkt.hdr_offsets(); - assert_eq!( - off.inner.ether, - HdrOffset { - pkt_pos: pos, - seg_idx: 0, - seg_pos: pos, - hdr_len: EtherHdr::SIZE - }, - ); - pos += EtherHdr::SIZE; - assert_eq!( - off.inner.ip.unwrap(), - HdrOffset { - pkt_pos: pos, - seg_idx: 0, - seg_pos: pos, - hdr_len: ip.hdr_len() - }, - ); - pos += ip.hdr_len(); - assert_eq!( - off.inner.ulp.unwrap(), - HdrOffset { - pkt_pos: pos, - seg_idx: 0, - seg_pos: pos, - hdr_len: ulp.hdr_len() - }, - ); - pos += ulp.hdr_len(); - assert_eq!( - pkt.body_info(), - BodyInfo { - pkt_offset: pos, - seg_index: 0, - seg_offset: pos, - len: body_len - }, - ); -} - -pub fn ulp_pkt, U: Into>( - eth: EtherMeta, +pub fn ulp_pkt< + I: Emit + EmitDoesNotRelyOnBufContents, + U: Emit + EmitDoesNotRelyOnBufContents, +>( + eth: Ethernet, ip: I, ulp: U, body: &[u8], -) -> Packet { - let ip = ip.into(); - let ulp = ulp.into(); - let total_len = EtherHdr::SIZE + ip.hdr_len() + ulp.hdr_len() + body.len(); - let mut pkt = Packet::alloc_and_expand(total_len); - let mut wtr = pkt.seg0_wtr(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip.emit(wtr.slice_mut(ip.hdr_len()).unwrap()); - ulp.emit(wtr.slice_mut(ulp.hdr_len()).unwrap()); - wtr.write(body).unwrap(); - let mut pkt = pkt.parse(Out, GenericUlp {}).unwrap(); - pkt.compute_checksums(); - assert!(pkt.body_csum().is_some()); - verify_ulp_pkt_offsets(&pkt, ip, ulp, body.len()); +) -> MsgBlk { + let mut pkt = MsgBlk::new_ethernet_pkt((eth, ip, ulp, body)); + + let view = Packet2::new(pkt.iter_mut()); + let view = view.parse_outbound(GenericUlp {}).unwrap(); + let mut view = view.to_full_meta(); + view.compute_checksums(); + drop(view); + + // Note: we don't need to create and act on an EmitSpec here + // because we haven't meaningfully transformed the packet. + // (processed, introduced new layers, altered options/EHs) + pkt } // Generate a packet representing the start of a TCP handshake for a // telnet session from src to dst. -pub fn tcp_telnet_syn(src: &VpcCfg, dst: &VpcCfg) -> Packet { - let body = vec![]; - let tcp = TcpMeta { - src: 7865, - dst: 23, - flags: TcpFlags::SYN, - seq: 4224936861, - ack: 0, +pub fn tcp_telnet_syn(src: &VpcCfg, dst: &VpcCfg) -> MsgBlk { + let body: &[u8] = &[]; + let tcp = Tcp { + source: 7865, + destination: 23, + flags: IngotTcpFlags::SYN, + sequence: 4224936861, + acknowledgement: 0, ..Default::default() }; - let ip4 = Ipv4Meta { - src: src.ipv4_cfg().unwrap().private_ip, - dst: dst.ipv4_cfg().unwrap().private_ip, - proto: Protocol::TCP, - total_len: (Ipv4Hdr::BASE_SIZE + tcp.hdr_len() + body.len()) as u16, + let ip4 = Ipv4 { + source: src.ipv4_cfg().unwrap().private_ip, + destination: dst.ipv4_cfg().unwrap().private_ip, + protocol: IngotIpProto::TCP, + total_len: (Ipv4::MINIMUM_LENGTH + tcp.packet_length() + body.len()) + as u16, ..Default::default() }; - let eth = EtherMeta { - ether_type: EtherType::Ipv4, - src: src.guest_mac, - dst: src.gateway_mac, + let eth = Ethernet { + destination: src.gateway_mac, + source: src.guest_mac, + ethertype: Ethertype::IPV4, }; - ulp_pkt(eth, ip4, tcp, &body) + ulp_pkt(eth, ip4, tcp, &[]) } pub const HTTP_SYN_OPTS_LEN: usize = 20; // Generate a packet representing the start of a TCP handshake for an // HTTP request from src to dst. -pub fn http_syn(src: &VpcCfg, dst: &VpcCfg) -> Packet { +pub fn http_syn(src: &VpcCfg, dst: &VpcCfg) -> MsgBlk { http_syn2( src.guest_mac, src.ipv4_cfg().unwrap().private_ip, @@ -547,7 +520,7 @@ pub fn http_syn2( ip_src: impl Into, eth_dst: MacAddr, ip_dst: impl Into, -) -> Packet { +) -> MsgBlk { http_syn3(eth_src, ip_src, eth_dst, ip_dst, 44490, 80) } @@ -558,11 +531,10 @@ pub fn http_syn3( ip_dst: impl Into, sport: u16, dport: u16, -) -> Packet { +) -> MsgBlk { let body = vec![]; - let mut options = [0x00; TcpHdr::MAX_OPTION_SIZE]; #[rustfmt::skip] - let bytes = [ + let options = vec![ // MSS 0x02, 0x04, 0x05, 0xb4, // SACK @@ -574,57 +546,54 @@ pub fn http_syn3( // Window Scale 0x03, 0x03, 0x01, ]; - options[0..bytes.len()].copy_from_slice(&bytes); - let options_len = bytes.len(); - - let tcp = TcpMeta { - src: sport, - dst: dport, - flags: TcpFlags::SYN, - seq: 2382112979, - ack: 0, + + let tcp = Tcp { + source: sport, + destination: dport, + sequence: 2382112979, + acknowledgement: 0, + flags: IngotTcpFlags::SYN, window_size: 64240, - options_bytes: Some(options), - options_len, - csum: [0; 2], + options, + ..Default::default() }; - let (ether_type, ip): (_, IpMeta) = match (ip_src.into(), ip_dst.into()) { - (IpAddr::Ip4(src), IpAddr::Ip4(dst)) => ( - EtherType::Ipv4, - Ipv4Meta { - src, - dst, - proto: Protocol::TCP, - total_len: (Ipv4Hdr::BASE_SIZE + tcp.hdr_len() + body.len()) - as u16, - ttl: 64, - ident: 2662, + + let (ethertype, ip) = match (ip_src.into(), ip_dst.into()) { + (IpAddr::Ip4(source), IpAddr::Ip4(destination)) => ( + Ethertype::IPV4, + L3Repr::Ipv4(Ipv4 { + total_len: (Ipv4::MINIMUM_LENGTH + + tcp.packet_length() + + body.len()) as u16, + identification: 2662, + hop_limit: 64, + protocol: IngotIpProto::TCP, + source, + destination, ..Default::default() - } - .into(), + }), ), - (IpAddr::Ip6(src), IpAddr::Ip6(dst)) => ( - EtherType::Ipv6, - Ipv6Meta { - src, - dst, - proto: Protocol::TCP, - next_hdr: IpProtocol::Tcp, - pay_len: (tcp.hdr_len() + body.len()) as u16, + (IpAddr::Ip6(source), IpAddr::Ip6(destination)) => ( + Ethertype::IPV4, + L3Repr::Ipv6(Ipv6 { + payload_len: (tcp.packet_length() + body.len()) as u16, + next_header: IngotIpProto::TCP, + hop_limit: 64, + source, + destination, ..Default::default() - } - .into(), + }), ), _ => panic!("source and destination must be the same IP version"), }; // Any packet from the guest is always addressed to the gateway. - let eth = EtherMeta { ether_type, src: eth_src, dst: eth_dst }; + let eth = Ethernet { destination: eth_dst, source: eth_src, ethertype }; ulp_pkt(eth, ip, tcp, &body) } // Generate a packet representing the SYN+ACK reply to `http_tcp_syn()`, // from g1 to g2. -pub fn http_syn_ack(src: &VpcCfg, dst: &VpcCfg) -> Packet { +pub fn http_syn_ack(src: &VpcCfg, dst: &VpcCfg) -> MsgBlk { http_syn_ack2( src.guest_mac, src.ipv4().private_ip, @@ -642,46 +611,46 @@ pub fn http_syn_ack2( eth_dst: MacAddr, ip_dst: impl Into, dport: u16, -) -> Packet { +) -> MsgBlk { let body = vec![]; - let tcp = TcpMeta { - src: 80, - dst: dport, - flags: TcpFlags::SYN | TcpFlags::ACK, - seq: 44161351, - ack: 2382112980, + let tcp = Tcp { + source: 80, + destination: dport, + sequence: 44161351, + acknowledgement: 2382112980, + flags: IngotTcpFlags::SYN | IngotTcpFlags::ACK, ..Default::default() }; - let (ether_type, ip): (_, IpMeta) = match (ip_src.into(), ip_dst.into()) { - (IpAddr::Ip4(src), IpAddr::Ip4(dst)) => ( - EtherType::Ipv4, - Ipv4Meta { - src, - dst, - proto: Protocol::TCP, - total_len: (Ipv4Hdr::BASE_SIZE + tcp.hdr_len() + body.len()) - as u16, - ttl: 64, - ident: 2662, + let (ethertype, ip) = match (ip_src.into(), ip_dst.into()) { + (IpAddr::Ip4(source), IpAddr::Ip4(destination)) => ( + Ethertype::IPV4, + L3Repr::Ipv4(Ipv4 { + total_len: (Ipv4::MINIMUM_LENGTH + + tcp.packet_length() + + body.len()) as u16, + identification: 2662, + hop_limit: 64, + protocol: IngotIpProto::TCP, + source, + destination, ..Default::default() - } - .into(), + }), ), - (IpAddr::Ip6(src), IpAddr::Ip6(dst)) => ( - EtherType::Ipv6, - Ipv6Meta { - src, - dst, - proto: Protocol::TCP, - next_hdr: IpProtocol::Tcp, - pay_len: (tcp.hdr_len() + body.len()) as u16, + (IpAddr::Ip6(source), IpAddr::Ip6(destination)) => ( + Ethertype::IPV4, + L3Repr::Ipv6(Ipv6 { + payload_len: (tcp.packet_length() + body.len()) as u16, + next_header: IngotIpProto::TCP, + hop_limit: 64, + source, + destination, ..Default::default() - } - .into(), + }), ), _ => panic!("source and destination must be the same IP version"), }; - let eth = EtherMeta { ether_type, src: eth_src, dst: eth_dst }; + + let eth = Ethernet { destination: eth_dst, source: eth_src, ethertype }; ulp_pkt(eth, ip, tcp, &body) } @@ -690,25 +659,29 @@ pub fn http_ack2( ip_src: Ipv4Addr, eth_dst: MacAddr, ip_dst: Ipv4Addr, -) -> Packet { +) -> MsgBlk { let body = vec![]; - let tcp = TcpMeta { - src: 44490, - dst: 80, - flags: TcpFlags::ACK, - seq: 2382112980, - ack: 44161352, + let tcp = Tcp { + source: 44490, + destination: 80, + sequence: 2382112980, + acknowledgement: 44161352, + flags: IngotTcpFlags::ACK, ..Default::default() }; - let ip4 = Ipv4Meta { - src: ip_src, - dst: ip_dst, - proto: Protocol::TCP, - total_len: (Ipv4Hdr::BASE_SIZE + tcp.hdr_len() + body.len()) as u16, + let ip4 = Ipv4 { + total_len: (Ipv4::MINIMUM_LENGTH + tcp.packet_length() + body.len()) + as u16, + protocol: IngotIpProto::TCP, + source: ip_src, + destination: ip_dst, ..Default::default() }; - let eth = - EtherMeta { ether_type: EtherType::Ipv4, src: eth_src, dst: eth_dst }; + let eth = Ethernet { + destination: eth_dst, + source: eth_src, + ethertype: Ethertype::IPV4, + }; ulp_pkt(eth, ip4, tcp, &body) } @@ -717,27 +690,31 @@ pub fn http_get2( ip_src: Ipv4Addr, eth_dst: MacAddr, ip_dst: Ipv4Addr, -) -> Packet { +) -> MsgBlk { // The details of the HTTP body are irrelevant to our testing. You // only need know it's 18 characters for the purposes of seq/ack. - let body = "GET / HTTP/1.1\r\n\r\n".as_bytes(); - let tcp = TcpMeta { - src: 44490, - dst: 80, - flags: TcpFlags::PSH | TcpFlags::ACK, - seq: 2382112980, - ack: 44161352, + let body = b"GET / HTTP/1.1\r\n\r\n"; + let tcp = Tcp { + source: 44490, + destination: 80, + sequence: 2382112980, + acknowledgement: 44161352, + flags: IngotTcpFlags::PSH | IngotTcpFlags::ACK, ..Default::default() }; - let ip4 = Ipv4Meta { - src: ip_src, - dst: ip_dst, - proto: Protocol::TCP, - total_len: (Ipv4Hdr::BASE_SIZE + tcp.hdr_len() + body.len()) as u16, + let ip4 = Ipv4 { + total_len: (Ipv4::MINIMUM_LENGTH + tcp.packet_length() + body.len()) + as u16, + protocol: IngotIpProto::TCP, + source: ip_src, + destination: ip_dst, ..Default::default() }; - let eth = - EtherMeta { ether_type: EtherType::Ipv4, src: eth_src, dst: eth_dst }; + let eth = Ethernet { + destination: eth_dst, + source: eth_src, + ethertype: Ethertype::IPV4, + }; ulp_pkt(eth, ip4, tcp, body) } @@ -747,25 +724,29 @@ pub fn http_get_ack2( eth_dst: MacAddr, ip_dst: Ipv4Addr, dst_port: u16, -) -> Packet { +) -> MsgBlk { let body = vec![]; - let tcp = TcpMeta { - src: 80, - dst: dst_port, - flags: TcpFlags::ACK, - seq: 44161353, - ack: 2382112998, + let tcp = Tcp { + source: 80, + destination: dst_port, + sequence: 44161353, + acknowledgement: 2382112998, + flags: IngotTcpFlags::ACK, ..Default::default() }; - let ip4 = Ipv4Meta { - src: ip_src, - dst: ip_dst, - proto: Protocol::TCP, - total_len: (Ipv4Hdr::BASE_SIZE + tcp.hdr_len() + body.len()) as u16, + let ip4 = Ipv4 { + total_len: (Ipv4::MINIMUM_LENGTH + tcp.packet_length() + body.len()) + as u16, + protocol: IngotIpProto::TCP, + source: ip_src, + destination: ip_dst, ..Default::default() }; - let eth = - EtherMeta { ether_type: EtherType::Ipv4, src: eth_src, dst: eth_dst }; + let eth = Ethernet { + destination: eth_dst, + source: eth_src, + ethertype: Ethertype::IPV4, + }; ulp_pkt(eth, ip4, tcp, &body) } @@ -775,27 +756,31 @@ pub fn http_301_reply2( eth_dst: MacAddr, ip_dst: Ipv4Addr, dst_port: u16, -) -> Packet { +) -> MsgBlk { // The details of the HTTP body are irrelevant to our testing. You // only need know it's 34 characters for the purposes of seq/ack. let body = "HTTP/1.1 301 Moved Permanently\r\n\r\n".as_bytes(); - let tcp = TcpMeta { - src: 80, - dst: dst_port, - flags: TcpFlags::PSH | TcpFlags::ACK, - seq: 44161353, - ack: 2382112998, + let tcp = Tcp { + source: 80, + destination: dst_port, + sequence: 44161353, + acknowledgement: 2382112998, + flags: IngotTcpFlags::PSH | IngotTcpFlags::ACK, ..Default::default() }; - let ip4 = Ipv4Meta { - src: ip_src, - dst: ip_dst, - proto: Protocol::TCP, - total_len: (Ipv4Hdr::BASE_SIZE + tcp.hdr_len() + body.len()) as u16, + let ip4 = Ipv4 { + total_len: (Ipv4::MINIMUM_LENGTH + tcp.packet_length() + body.len()) + as u16, + protocol: IngotIpProto::TCP, + source: ip_src, + destination: ip_dst, ..Default::default() }; - let eth = - EtherMeta { ether_type: EtherType::Ipv4, src: eth_src, dst: eth_dst }; + let eth = Ethernet { + destination: eth_dst, + source: eth_src, + ethertype: Ethertype::IPV4, + }; ulp_pkt(eth, ip4, tcp, body) } @@ -804,25 +789,29 @@ pub fn http_301_ack2( ip_src: Ipv4Addr, eth_dst: MacAddr, ip_dst: Ipv4Addr, -) -> Packet { +) -> MsgBlk { let body = vec![]; - let tcp = TcpMeta { - src: 44490, - dst: 80, - flags: TcpFlags::ACK, - seq: 2382112998, - ack: 44161353 + 34, + let tcp = Tcp { + source: 44490, + destination: 80, + sequence: 2382112998, + acknowledgement: 44161353 + 34, + flags: IngotTcpFlags::ACK, ..Default::default() }; - let ip4 = Ipv4Meta { - src: ip_src, - dst: ip_dst, - proto: Protocol::TCP, - total_len: (Ipv4Hdr::BASE_SIZE + tcp.hdr_len() + body.len()) as u16, + let ip4 = Ipv4 { + total_len: (Ipv4::MINIMUM_LENGTH + tcp.packet_length() + body.len()) + as u16, + protocol: IngotIpProto::TCP, + source: ip_src, + destination: ip_dst, ..Default::default() }; - let eth = - EtherMeta { ether_type: EtherType::Ipv4, src: eth_src, dst: eth_dst }; + let eth = Ethernet { + destination: eth_dst, + source: eth_src, + ethertype: Ethertype::IPV4, + }; ulp_pkt(eth, ip4, tcp, &body) } @@ -831,25 +820,29 @@ pub fn http_guest_fin2( ip_src: Ipv4Addr, eth_dst: MacAddr, ip_dst: Ipv4Addr, -) -> Packet { +) -> MsgBlk { let body = vec![]; - let tcp = TcpMeta { - src: 44490, - dst: 80, - flags: TcpFlags::ACK | TcpFlags::FIN, - seq: 2382112998, - ack: 44161353 + 34, + let tcp = Tcp { + source: 44490, + destination: 80, + sequence: 2382112998, + acknowledgement: 44161353 + 34, + flags: IngotTcpFlags::ACK | IngotTcpFlags::FIN, ..Default::default() }; - let ip4 = Ipv4Meta { - src: ip_src, - dst: ip_dst, - proto: Protocol::TCP, - total_len: (Ipv4Hdr::BASE_SIZE + tcp.hdr_len() + body.len()) as u16, + let ip4 = Ipv4 { + total_len: (Ipv4::MINIMUM_LENGTH + tcp.packet_length() + body.len()) + as u16, + protocol: IngotIpProto::TCP, + source: ip_src, + destination: ip_dst, ..Default::default() }; - let eth = - EtherMeta { ether_type: EtherType::Ipv4, src: eth_src, dst: eth_dst }; + let eth = Ethernet { + destination: eth_dst, + source: eth_src, + ethertype: Ethertype::IPV4, + }; ulp_pkt(eth, ip4, tcp, &body) } @@ -859,26 +852,29 @@ pub fn http_server_ack_fin2( eth_dst: MacAddr, ip_dst: Ipv4Addr, dst_port: u16, -) -> Packet { +) -> MsgBlk { let body = vec![]; - let tcp = TcpMeta { - src: 80, - dst: dst_port, - flags: TcpFlags::ACK, - seq: 44161353 + 34, - // We are ACKing the FIN, which counts as 1 byte. - ack: 2382112998 + 1, + let tcp = Tcp { + source: 80, + destination: dst_port, + sequence: 44161353 + 34, + acknowledgement: 2382112998 + 1, + flags: IngotTcpFlags::ACK, ..Default::default() }; - let ip4 = Ipv4Meta { - src: ip_src, - dst: ip_dst, - proto: Protocol::TCP, - total_len: (Ipv4Hdr::BASE_SIZE + tcp.hdr_len() + body.len()) as u16, + let ip4 = Ipv4 { + total_len: (Ipv4::MINIMUM_LENGTH + tcp.packet_length() + body.len()) + as u16, + protocol: IngotIpProto::TCP, + source: ip_src, + destination: ip_dst, ..Default::default() }; - let eth = - EtherMeta { ether_type: EtherType::Ipv4, src: eth_src, dst: eth_dst }; + let eth = Ethernet { + destination: eth_dst, + source: eth_src, + ethertype: Ethertype::IPV4, + }; ulp_pkt(eth, ip4, tcp, &body) } @@ -888,25 +884,29 @@ pub fn http_server_fin2( eth_dst: MacAddr, ip_dst: Ipv4Addr, dst_port: u16, -) -> Packet { +) -> MsgBlk { let body = vec![]; - let tcp = TcpMeta { - src: 80, - dst: dst_port, - flags: TcpFlags::ACK | TcpFlags::FIN, - seq: 44161353 + 34, - ack: 2382112998 + 1, + let tcp = Tcp { + source: 80, + destination: dst_port, + sequence: 2382112998 + 1, + acknowledgement: 44161353 + 34, + flags: IngotTcpFlags::ACK | IngotTcpFlags::FIN, ..Default::default() }; - let ip4 = Ipv4Meta { - src: ip_src, - dst: ip_dst, - proto: Protocol::TCP, - total_len: (Ipv4Hdr::BASE_SIZE + tcp.hdr_len() + body.len()) as u16, + let ip4 = Ipv4 { + total_len: (Ipv4::MINIMUM_LENGTH + tcp.packet_length() + body.len()) + as u16, + protocol: IngotIpProto::TCP, + source: ip_src, + destination: ip_dst, ..Default::default() }; - let eth = - EtherMeta { ether_type: EtherType::Ipv4, src: eth_src, dst: eth_dst }; + let eth = Ethernet { + destination: eth_dst, + source: eth_src, + ethertype: Ethertype::IPV4, + }; ulp_pkt(eth, ip4, tcp, &body) } @@ -915,26 +915,29 @@ pub fn http_guest_ack_fin2( ip_src: Ipv4Addr, eth_dst: MacAddr, ip_dst: Ipv4Addr, -) -> Packet { +) -> MsgBlk { let body = vec![]; - let tcp = TcpMeta { - src: 44490, - dst: 80, - flags: TcpFlags::ACK, - seq: 2382112998, - // We are ACKing the FIN, which counts as 1 bytes. - ack: 44161353 + 34 + 1, + let tcp = Tcp { + source: 44490, + destination: 80, + sequence: 2382112998, + acknowledgement: 44161353 + 34 + 1, + flags: IngotTcpFlags::ACK, ..Default::default() }; - let ip4 = Ipv4Meta { - src: ip_src, - dst: ip_dst, - proto: Protocol::TCP, - total_len: (Ipv4Hdr::BASE_SIZE + tcp.hdr_len() + body.len()) as u16, + let ip4 = Ipv4 { + total_len: (Ipv4::MINIMUM_LENGTH + tcp.packet_length() + body.len()) + as u16, + protocol: IngotIpProto::TCP, + source: ip_src, + destination: ip_dst, ..Default::default() }; - let eth = - EtherMeta { ether_type: EtherType::Ipv4, src: eth_src, dst: eth_dst }; + let eth = Ethernet { + destination: eth_dst, + source: eth_src, + ethertype: Ethertype::IPV4, + }; ulp_pkt(eth, ip4, tcp, &body) } @@ -951,144 +954,75 @@ pub struct TestIpPhys { /// the rack. #[must_use] pub fn encap_external( - inner_pkt: Packet, + inner_pkt: MsgBlk, src: TestIpPhys, dst: TestIpPhys, -) -> Packet { +) -> MsgBlk { _encap(inner_pkt, src, dst, true) } /// Encapsulate a guest packet. #[must_use] -pub fn encap( - inner_pkt: Packet, - src: TestIpPhys, - dst: TestIpPhys, -) -> Packet { +pub fn encap(inner_pkt: MsgBlk, src: TestIpPhys, dst: TestIpPhys) -> MsgBlk { _encap(inner_pkt, src, dst, false) } /// Encapsulate a guest packet. #[must_use] fn _encap( - inner_pkt: Packet, + mut inner_pkt: MsgBlk, src: TestIpPhys, dst: TestIpPhys, external_snat: bool, -) -> Packet { - let old_pkt = inner_pkt.all_bytes(); - - let inner_ip_len = inner_pkt.hdr_offsets().inner.ip.map(|off| off.hdr_len); - - let inner_ulp_len = - inner_pkt.hdr_offsets().inner.ulp.map(|off| off.hdr_len); - - let inner_len = inner_pkt.len(); - - let opt_len = if external_snat { - GeneveOption::Oxide(OxideOption::External).len() - } else { - 0 - }; +) -> MsgBlk { + let pkt = Packet2::new(inner_pkt.iter_mut()); + let base_len = pkt.len(); + drop(pkt); + + let mut outer_geneve = Geneve { vni: dst.vni, ..Default::default() }; + + if external_snat { + let external_tag = GeneveOpt { + class: GENEVE_OPT_CLASS_OXIDE, + option_type: GeneveOptionType(OxideOption::External.opt_type()), + ..Default::default() + }; + + outer_geneve.opt_len += (external_tag.packet_length() >> 2) as u8; + outer_geneve.options.push(external_tag); + } - let geneve = GeneveMeta { - entropy: 99, - vni: dst.vni, - oxide_external_pkt: external_snat, + let outer_udp = Udp { + source: 99, + destination: GENEVE_PORT, + length: (base_len + Udp::MINIMUM_LENGTH + outer_geneve.packet_length()) + as u16, + ..Default::default() }; - let pay_len: u16 = (inner_len + geneve.hdr_len()).try_into().unwrap(); - assert_eq!( - pay_len as usize, - inner_len + UdpHdr::SIZE + GeneveHdr::BASE_SIZE + opt_len - ); - - let ip = Ipv6Meta { - src: src.ip, - dst: dst.ip, - pay_len, - proto: Protocol::UDP, - next_hdr: IpProtocol::Udp, + let outer_ip = Ipv6 { + source: src.ip, + destination: dst.ip, + next_header: IngotIpProto::UDP, + payload_len: outer_udp.length, ..Default::default() }; - let eth = - EtherMeta { ether_type: EtherType::Ipv6, src: src.mac, dst: dst.mac }; - - let total_len = EtherHdr::SIZE + usize::from(ip.total_len()); - let mut pkt = Packet::alloc_and_expand(total_len); - let mut wtr = pkt.seg0_wtr(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip.emit(wtr.slice_mut(ip.hdr_len()).unwrap()); - geneve.emit(pay_len, wtr.slice_mut(geneve.hdr_len()).unwrap()); - wtr.write(&old_pkt).unwrap(); - let pkt = pkt.parse(In, VpcParser::new()).unwrap(); - let off = pkt.hdr_offsets(); - let mut pos = 0; - - assert_eq!( - off.outer.ether.unwrap(), - HdrOffset { - pkt_pos: pos, - seg_idx: 0, - seg_pos: pos, - hdr_len: eth.hdr_len() - }, - ); - pos += eth.hdr_len(); - - assert_eq!( - off.outer.ip.unwrap(), - HdrOffset { - pkt_pos: pos, - seg_idx: 0, - seg_pos: pos, - hdr_len: ip.hdr_len() - }, - ); - pos += ip.hdr_len(); - - assert_eq!( - off.outer.encap.unwrap(), - HdrOffset { - pkt_pos: pos, - seg_idx: 0, - seg_pos: pos, - hdr_len: geneve.hdr_len() - }, - ); - pos += geneve.hdr_len(); - - assert_eq!( - off.inner.ether, - HdrOffset { - pkt_pos: pos, - seg_idx: 0, - seg_pos: pos, - hdr_len: EtherHdr::SIZE - }, - ); - pos += EtherHdr::SIZE; - - if let Some(hdr_len) = inner_ip_len { - assert_eq!( - off.inner.ip.unwrap(), - HdrOffset { pkt_pos: pos, seg_idx: 0, seg_pos: pos, hdr_len }, - ); - pos += hdr_len; - } - - if let Some(hdr_len) = inner_ulp_len { - assert_eq!( - off.inner.ulp.unwrap(), - HdrOffset { pkt_pos: pos, seg_idx: 0, seg_pos: pos, hdr_len }, - ); - } + let outer_eth = Ethernet { + destination: dst.mac, + source: src.mac, + ethertype: Ethertype::IPV6, + }; - let new_pkt = pkt.all_bytes(); - assert_eq!(&new_pkt[new_pkt.len() - old_pkt.len()..], &old_pkt); + let mut encap_pkt = MsgBlk::new_ethernet_pkt(&( + outer_eth, + outer_ip, + outer_udp, + outer_geneve, + )); + encap_pkt.extend_if_one(inner_pkt); - pkt + encap_pkt } /// Like `assert!`, except you also pass in the `PortAndVps` so that diff --git a/lib/opte/src/engine/dhcp.rs b/lib/opte/src/engine/dhcp.rs index 4ad5ff42..ee66ca0e 100644 --- a/lib/opte/src/engine/dhcp.rs +++ b/lib/opte/src/engine/dhcp.rs @@ -52,6 +52,9 @@ use smoltcp::wire::DhcpRepr; use smoltcp::wire::Ipv4Address; use smoltcp::wire::DHCP_MAX_DNS_SERVER_COUNT; +pub const DHCP_SERVER_PORT: u16 = 67; +pub const DHCP_CLIENT_PORT: u16 = 68; + /// The DHCP message type. /// /// Why define our own wrapper type when smoltcp already provides this @@ -460,8 +463,8 @@ impl HairpinAction for DhcpAction { Ipv4Addr::LOCAL_BCAST, )]), Predicate::InnerIpProto(vec![IpProtoMatch::Exact(Protocol::UDP)]), - Predicate::InnerDstPort(vec![PortMatch::Exact(67)]), - Predicate::InnerSrcPort(vec![PortMatch::Exact(68)]), + Predicate::InnerDstPort(vec![PortMatch::Exact(DHCP_SERVER_PORT)]), + Predicate::InnerSrcPort(vec![PortMatch::Exact(DHCP_CLIENT_PORT)]), ]; let data_preds = match self.reply_type { diff --git a/lib/opte/src/engine/ingot_base.rs b/lib/opte/src/engine/ingot_base.rs index 373db204..5735c455 100644 --- a/lib/opte/src/engine/ingot_base.rs +++ b/lib/opte/src/engine/ingot_base.rs @@ -14,6 +14,7 @@ use ingot::tcp::ValidTcp; use ingot::types::primitives::*; use ingot::types::util::Repeated; use ingot::types::ByteSlice; +use ingot::types::Emit; use ingot::types::NetworkRepr; use ingot::types::Packet; use ingot::types::ParseError; @@ -25,6 +26,8 @@ use opte_api::Ipv4Addr; use opte_api::Ipv6Addr; use opte_api::MacAddr; +use super::checksum::Checksum; + // Redefine Ethernet and v4/v6 because we have our own, internal, // types already. @@ -92,6 +95,19 @@ pub struct Ipv4 { pub options: Vec, } +impl Ipv4 { + pub fn fill_checksum(&mut self) { + let mut csum = Checksum::default(); + self.checksum = 0; + + let mut bytes = [0u8; 56]; + self.emit_raw(&mut bytes[..]); + csum.add_bytes(&bytes[..]); + + self.checksum = csum.finalize(); + } +} + #[derive(Debug, Clone, Ingot, Eq, PartialEq)] #[ingot(impl_default)] pub struct Ipv6 { diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index 4959bc45..2650d6ca 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -48,6 +48,7 @@ use super::packet::InnerFlowId; use super::packet::Packet; use super::packet::PacketState; use super::packet::ParseError; +use super::packet::WriteError; use super::packet::FLOW_ID_DEFAULT; use super::rule::CompiledEncap; use super::rule::CompiledTransform; @@ -97,6 +98,7 @@ use ingot::types::primitives::*; use ingot::types::util::Repeated; use ingot::types::DirectPacket; use ingot::types::Emit; +use ingot::types::EmitDoesNotRelyOnBufContents; use ingot::types::Header; use ingot::types::IndirectPacket; use ingot::types::NextLayer; @@ -543,6 +545,12 @@ impl MsgBlk { Self { inner } } + pub fn new_pkt(emit: impl Emit + EmitDoesNotRelyOnBufContents) -> Self { + let mut pkt = Self::new(emit.packet_length()); + pkt.emit_back(emit).unwrap(); + pkt + } + pub fn headroom(&self) -> usize { unsafe { let inner = self.inner.as_ref(); @@ -555,6 +563,14 @@ impl MsgBlk { Self::new_with_headroom(2, len) } + pub fn new_ethernet_pkt( + emit: impl Emit + EmitDoesNotRelyOnBufContents, + ) -> Self { + let mut pkt = Self::new_ethernet(emit.packet_length()); + pkt.emit_back(emit).unwrap(); + pkt + } + pub fn byte_len(&self) -> usize { self.iter().map(|el| el.len()).sum() } @@ -578,12 +594,17 @@ impl MsgBlk { &mut self, n_bytes: usize, f: impl FnOnce(&mut [MaybeUninit]), - ) { + ) -> Result<(), WriteError> { let mut_out = unsafe { self.inner.as_mut() }; let avail_bytes = unsafe { (*mut_out.b_datap).db_lim.offset_from(mut_out.b_wptr) }; - assert!(avail_bytes >= 0); - assert!(avail_bytes as usize >= n_bytes); + + if avail_bytes < 0 || (avail_bytes as usize) < n_bytes { + return Err(WriteError::NotEnoughBytes { + available: avail_bytes.max(0) as usize, + needed: n_bytes, + }); + } let in_slice = unsafe { slice::from_raw_parts_mut( @@ -595,19 +616,25 @@ impl MsgBlk { f(in_slice); mut_out.b_wptr = unsafe { mut_out.b_wptr.add(n_bytes) }; + + Ok(()) } pub unsafe fn write_front( &mut self, n_bytes: usize, f: impl FnOnce(&mut [MaybeUninit]), - ) { + ) -> Result<(), WriteError> { let mut_out = unsafe { self.inner.as_mut() }; let avail_bytes = unsafe { mut_out.b_rptr.offset_from((*mut_out.b_datap).db_base) }; - assert!(avail_bytes >= 0); - assert!(avail_bytes as usize >= n_bytes); + if avail_bytes < 0 || (avail_bytes as usize) < n_bytes { + return Err(WriteError::NotEnoughBytes { + available: avail_bytes.max(0) as usize, + needed: n_bytes, + }); + } let new_head = unsafe { mut_out.b_rptr.sub(n_bytes) }; @@ -618,6 +645,90 @@ impl MsgBlk { f(in_slice); mut_out.b_rptr = new_head; + + Ok(()) + } + + /// Adjusts the write pointer for this MsgBlk, initialising any extra bytes to 0. + pub fn resize(&mut self, new_len: usize) -> Result<(), WriteError> { + let len = self.len(); + if new_len < len { + unsafe { + let mut_inner = self.inner.as_mut(); + mut_inner.b_wptr = mut_inner.b_wptr.sub(len - new_len); + } + Ok(()) + } else if new_len > len { + unsafe { + self.write(new_len - len, |v| { + // MaybeUninit::fill is unstable. + let n = v.len(); + v.as_mut_ptr().write_bytes(0, n); + }) + } + } else { + Ok(()) + } + } + + /// Emits an `ingot` packet after any bytes present in this mblk. + pub fn emit_back( + &mut self, + pkt: impl Emit + EmitDoesNotRelyOnBufContents, + ) -> Result<(), WriteError> { + unsafe { + self.write(pkt.packet_length(), |v| { + // Unwrap safety: write will return an Error if + // unsuccessful. + pkt.emit_uninit(v).unwrap(); + }) + } + } + + /// Emits an `ingot` packet before any bytes present in this mblk. + pub fn emit_front( + &mut self, + pkt: impl Emit + EmitDoesNotRelyOnBufContents, + ) -> Result<(), WriteError> { + unsafe { + self.write_front(pkt.packet_length(), |v| { + pkt.emit_uninit(v).unwrap(); + }) + } + } + + /// XXX + pub fn write_bytes_back( + &mut self, + bytes: impl AsRef<[u8]>, + ) -> Result<(), WriteError> { + let bytes = bytes.as_ref(); + unsafe { + self.write(bytes.len(), |v| { + // feat(maybe_uninit_write_slice) -> copy_from_slice + // is unstable. + let uninit_src: &[MaybeUninit] = + core::mem::transmute(bytes); + v.copy_from_slice(uninit_src); + }) + } + } + + /// XXX + pub fn write_bytes_front( + &mut self, + bytes: impl AsRef<[u8]>, + ) -> Result<(), WriteError> { + let bytes = bytes.as_ref(); + unsafe { + self.write_front(bytes.len(), |v| { + // feat(maybe_uninit_write_slice) -> copy_from_slice + // is unstable. + let uninit_src: &[MaybeUninit] = + core::mem::transmute(bytes); + v.copy_from_slice(uninit_src); + }) + } } // TODO: I really need to rethink this one in practice. @@ -1530,6 +1641,11 @@ where // TODO: cleanup type aliases. + #[inline] + pub fn len(&self) -> usize { + self.state.len + } + #[inline] pub fn parse_inbound( self, @@ -1917,6 +2033,13 @@ impl Packet2> { 0 } + /// Compute ULP and IP header checksum from scratch. + /// + /// This should really only be used for testing. + pub fn compute_checksums(&mut self) { + todo!() + } + pub fn body_csum(&mut self) -> Option { self.state.body_csum @@ -2386,12 +2509,12 @@ impl EmittestSpec { } if let Some(outer_encap) = &push_spec.outer_encap { - let a = SizeHoldingEncap { + let encap = SizeHoldingEncap { encapped_len: self.ulp_len as u16, meta: &outer_encap, }; - let l = a.packet_length(); + let l = encap.packet_length(); let target = if prepend.is_none() { space_in_front -= l; @@ -2401,11 +2524,7 @@ impl EmittestSpec { prepend.as_mut().unwrap() }; - unsafe { - target.write_front(l, |v| { - a.emit_uninit(v).unwrap(); - }) - } + target.emit_front(&encap).unwrap(); } if let Some(outer_ip) = &push_spec.outer_ip { @@ -2418,11 +2537,7 @@ impl EmittestSpec { prepend.as_mut().unwrap() }; - unsafe { - target.write_front(l, |v| { - outer_ip.emit_uninit(v).unwrap(); - }) - } + target.emit_front(outer_ip).unwrap(); } if let Some(outer_eth) = &push_spec.outer_eth { @@ -2435,11 +2550,7 @@ impl EmittestSpec { prepend.as_mut().unwrap() }; - unsafe { - target.write_front(l, |v| { - outer_eth.emit_uninit(v).unwrap(); - }) - } + target.emit_front(outer_eth).unwrap(); } if let Some(mut prepend) = prepend { @@ -2553,12 +2664,12 @@ impl EmitSpec { } if let Some(outer_encap) = &self.push_spec.outer_encap { - let a = SizeHoldingEncap { + let encap = SizeHoldingEncap { encapped_len: self.encapped_len, meta: &outer_encap, }; - let l = a.packet_length(); + let l = encap.packet_length(); let target = if prepend.is_none() { space_in_front -= l; @@ -2568,11 +2679,7 @@ impl EmitSpec { prepend.as_mut().unwrap() }; - unsafe { - target.write_front(l, |v| { - a.emit_uninit(v).unwrap(); - }) - } + target.emit_front(&encap).unwrap(); } if let Some(outer_ip) = &self.push_spec.outer_ip { @@ -2585,11 +2692,7 @@ impl EmitSpec { prepend.as_mut().unwrap() }; - unsafe { - target.write_front(l, |v| { - outer_ip.emit_uninit(v).unwrap(); - }) - } + target.emit_front(outer_ip).unwrap(); } if let Some(outer_eth) = &self.push_spec.outer_eth { @@ -2602,11 +2705,7 @@ impl EmitSpec { prepend.as_mut().unwrap() }; - unsafe { - target.write_front(l, |v| { - outer_eth.emit_uninit(v).unwrap(); - }) - } + target.emit_front(outer_eth).unwrap(); } if let Some(mut prepend) = prepend { diff --git a/lib/opte/src/engine/rule.rs b/lib/opte/src/engine/rule.rs index 3198d276..6a45eb38 100644 --- a/lib/opte/src/engine/rule.rs +++ b/lib/opte/src/engine/rule.rs @@ -372,15 +372,8 @@ impl CompiledEncap { &mut pkt }; - unsafe { - target.write_front(bytes.len(), |v| { - // feat(maybe_uninit_write_slice) -> copy_from_slice - // is unstable. - let uninit_src: &[MaybeUninit] = - core::mem::transmute(bytes.as_slice()); - v.copy_from_slice(uninit_src); - }); - } + // Unwrap safety -- we either had enough bytes, or we just allocated them. + target.write_bytes_front(bytes).unwrap(); let l4_len = ulp_len + encap_sz; let l3_len = l4_len + l3_extra_bytes; From 0ea48dbf9146140e35ea7c923485d64a51de3456 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Thu, 10 Oct 2024 12:47:39 +0100 Subject: [PATCH 041/115] Post--review-feedback on ingot. --- Cargo.lock | 7 +- Cargo.toml | 2 +- lib/opte-test-utils/src/icmp.rs | 2 +- lib/opte-test-utils/src/lib.rs | 2 +- lib/opte/src/engine/ingot_base.rs | 2 +- lib/opte/src/engine/ingot_packet.rs | 178 ++++++++++++++-------------- lib/opte/src/engine/port.rs | 2 +- lib/opte/src/engine/rule.rs | 4 +- 8 files changed, 99 insertions(+), 100 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6e717e80..69da89c1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -882,7 +882,7 @@ dependencies = [ [[package]] name = "ingot" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=0cfd03c481650bb859255d4971b5bdc2fe671ca1#0cfd03c481650bb859255d4971b5bdc2fe671ca1" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=8cdf5c25833f485d9574aa3dc5c3d15964d19400#8cdf5c25833f485d9574aa3dc5c3d15964d19400" dependencies = [ "bitflags 2.6.0", "ingot-macros", @@ -895,7 +895,7 @@ dependencies = [ [[package]] name = "ingot-macros" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=0cfd03c481650bb859255d4971b5bdc2fe671ca1#0cfd03c481650bb859255d4971b5bdc2fe671ca1" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=8cdf5c25833f485d9574aa3dc5c3d15964d19400#8cdf5c25833f485d9574aa3dc5c3d15964d19400" dependencies = [ "darling", "itertools 0.13.0", @@ -908,9 +908,8 @@ dependencies = [ [[package]] name = "ingot-types" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=0cfd03c481650bb859255d4971b5bdc2fe671ca1#0cfd03c481650bb859255d4971b5bdc2fe671ca1" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=8cdf5c25833f485d9574aa3dc5c3d15964d19400#8cdf5c25833f485d9574aa3dc5c3d15964d19400" dependencies = [ - "heapless", "ingot-macros", "macaddr", "zerocopy 0.8.3", diff --git a/Cargo.toml b/Cargo.toml index 342fc781..cbf3009d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,7 +50,7 @@ ctor = "0.2" darling = "0.20" dyn-clone = "1.0" heapless = "0.8" -ingot = { git = "https://github.com/oxidecomputer/ingot.git", rev = "0cfd03c481650bb859255d4971b5bdc2fe671ca1"} +ingot = { git = "https://github.com/oxidecomputer/ingot.git", rev = "8cdf5c25833f485d9574aa3dc5c3d15964d19400"} ipnetwork = { version = "0.20", default-features = false } itertools = { version = "0.13", default-features = false } libc = "0.2" diff --git a/lib/opte-test-utils/src/icmp.rs b/lib/opte-test-utils/src/icmp.rs index e9ba46a7..5eadfc2b 100644 --- a/lib/opte-test-utils/src/icmp.rs +++ b/lib/opte-test-utils/src/icmp.rs @@ -18,7 +18,7 @@ use opte::engine::packet::*; use opte::engine::Direction::*; use opte::ingot::ethernet::Ethertype; use opte::ingot::ip::IpProtocol as IngotIpProto; -use opte::ingot::types::Header; +use opte::ingot::types::HeaderLen; use oxide_vpc::engine::VpcParser; use smoltcp::phy::ChecksumCapabilities as CsumCapab; use smoltcp::wire::Icmpv4Packet; diff --git a/lib/opte-test-utils/src/lib.rs b/lib/opte-test-utils/src/lib.rs index 001b3592..cf457a18 100644 --- a/lib/opte-test-utils/src/lib.rs +++ b/lib/opte-test-utils/src/lib.rs @@ -72,7 +72,7 @@ use opte::ingot::tcp::Tcp; use opte::ingot::tcp::TcpFlags as IngotTcpFlags; use opte::ingot::types::Emit; use opte::ingot::types::EmitDoesNotRelyOnBufContents; -use opte::ingot::types::Header; +use opte::ingot::types::HeaderLen; use opte::ingot::udp::Udp; pub use opte::ExecCtx; pub use oxide_vpc::api::AddFwRuleReq; diff --git a/lib/opte/src/engine/ingot_base.rs b/lib/opte/src/engine/ingot_base.rs index 5735c455..38ea1193 100644 --- a/lib/opte/src/engine/ingot_base.rs +++ b/lib/opte/src/engine/ingot_base.rs @@ -15,8 +15,8 @@ use ingot::types::primitives::*; use ingot::types::util::Repeated; use ingot::types::ByteSlice; use ingot::types::Emit; +use ingot::types::Header; use ingot::types::NetworkRepr; -use ingot::types::Packet; use ingot::types::ParseError; use ingot::types::Vec; use ingot::udp::Udp; diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index 2650d6ca..2d59dcd6 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -96,13 +96,13 @@ use ingot::tcp::TcpPacket; use ingot::tcp::TcpRef; use ingot::types::primitives::*; use ingot::types::util::Repeated; -use ingot::types::DirectPacket; +use ingot::types::BoxedHeader; use ingot::types::Emit; use ingot::types::EmitDoesNotRelyOnBufContents; -use ingot::types::Header; -use ingot::types::IndirectPacket; +use ingot::types::Header as IngotHeader; +use ingot::types::HeaderLen; +use ingot::types::InlineHeader; use ingot::types::NextLayer; -use ingot::types::Packet as IngotPacket; use ingot::types::ParseControl; use ingot::types::ParseError as IngotParseErr; use ingot::types::ParseResult; @@ -935,12 +935,12 @@ pub enum ValidEncapMeta { } pub struct OpteMeta { - pub outer_eth: Option>>, + pub outer_eth: Option>>, // pub outer_eth: Option>>, pub outer_l3: Option>, // pub outer_l3: Option>>, // pub outer_v6: Option>>, - pub outer_encap: Option>>, + pub outer_encap: Option>>, // pub outer_encap: Option>>, pub inner_eth: EthernetPacket, pub inner_l3: Option>, @@ -976,7 +976,7 @@ unsafe impl<'a> ingot::types::EmitDoesNotRelyOnBufContents { } -impl<'a> Header for SizeHoldingEncap<'a> { +impl<'a> HeaderLen for SizeHoldingEncap<'a> { const MINIMUM_LENGTH: usize = EncapMeta::MINIMUM_LENGTH; #[inline] @@ -1043,7 +1043,7 @@ impl Emit for ValidEncapMeta { } } -impl Header for EncapMeta { +impl HeaderLen for EncapMeta { const MINIMUM_LENGTH: usize = Udp::MINIMUM_LENGTH + Geneve::MINIMUM_LENGTH; #[inline] @@ -1057,7 +1057,7 @@ impl Header for EncapMeta { } } -impl Header for ValidEncapMeta { +impl HeaderLen for ValidEncapMeta { const MINIMUM_LENGTH: usize = Udp::MINIMUM_LENGTH + Geneve::MINIMUM_LENGTH; #[inline] @@ -1233,18 +1233,18 @@ impl From> for OpteMeta { fn from(value: GeneveOverV6) -> Self { // These are practically all Valid, anyhow. let outer_encap = match (value.outer_udp, value.outer_encap) { - (ingot::types::Packet::Raw(u), ingot::types::Packet::Raw(g)) => { - Some(DirectPacket::Raw(ValidEncapMeta::Geneve(u, g))) + (ingot::types::Header::Raw(u), ingot::types::Header::Raw(g)) => { + Some(InlineHeader::Raw(ValidEncapMeta::Geneve(u, g))) } _ => todo!(), }; // let outer_l3 = match value.outer_v6 { - // ingot::types::Packet::Repr(v) => { - // Some(DirectPacket::Repr(L3Repr::Ipv6(*v))) + // ingot::types::Header::Repr(v) => { + // Some(InlineHeader::Repr(L3Repr::Ipv6(*v))) // } - // ingot::types::Packet::Raw(v) => { - // Some(DirectPacket::Raw(ValidL3::Ipv6(v))) + // ingot::types::Header::Raw(v) => { + // Some(InlineHeader::Raw(ValidL3::Ipv6(v))) // } // }; @@ -1311,7 +1311,7 @@ impl PacketHeaders { pub fn outer_ether( &self, - ) -> Option<&DirectPacket>> { + ) -> Option<&InlineHeader>> { self.headers.outer_eth.as_ref() } @@ -1320,10 +1320,10 @@ impl PacketHeaders { /// in addition to its VNI. pub fn outer_encap_geneve_vni_and_origin(&self) -> Option<(Vni, bool)> { match &self.headers.outer_encap { - Some(DirectPacket::Repr(EncapMeta::Geneve(g))) => { + Some(InlineHeader::Repr(EncapMeta::Geneve(g))) => { Some((g.vni, g.oxide_external_pkt)) } - Some(DirectPacket::Raw(ValidEncapMeta::Geneve(_, g))) => { + Some(InlineHeader::Raw(ValidEncapMeta::Geneve(_, g))) => { // TODO: hack. let oxide_external = g.1.packet_length() != 0; Some((g.vni(), oxide_external)) @@ -1783,7 +1783,7 @@ impl Packet2> { // do this sort of thing. We are so, so far from that... let mut force_serialize = false; - use ingot::types::DirectPacket; + use ingot::types::InlineHeader; match headers.inner_ulp { Some(ulp) => { @@ -1793,26 +1793,26 @@ impl Packet2> { if ulp.needs_emit() || l != init_lens.inner_ulp { let inner = push_spec.inner.get_or_insert_with(Default::default); - // TODO: impl DirectPacket / From<&Ulp> for UlpRepr here? generally seems a bit anaemic. + // TODO: impl InlineHeader / From<&Ulp> for UlpRepr here? generally seems a bit anaemic. inner.ulp = Some(match ulp { - Ulp::Tcp(IngotPacket::Repr(t)) => UlpRepr::Tcp(*t), - Ulp::Tcp(IngotPacket::Raw(t)) => { + Ulp::Tcp(IngotHeader::Repr(t)) => UlpRepr::Tcp(*t), + Ulp::Tcp(IngotHeader::Raw(t)) => { UlpRepr::Tcp((&t).into()) } - Ulp::Udp(IngotPacket::Repr(t)) => UlpRepr::Udp(*t), - Ulp::Udp(IngotPacket::Raw(t)) => { + Ulp::Udp(IngotHeader::Repr(t)) => UlpRepr::Udp(*t), + Ulp::Udp(IngotHeader::Raw(t)) => { UlpRepr::Udp((&t).into()) } - Ulp::IcmpV4(IngotPacket::Repr(t)) => { + Ulp::IcmpV4(IngotHeader::Repr(t)) => { UlpRepr::IcmpV4(*t) } - Ulp::IcmpV4(IngotPacket::Raw(t)) => { + Ulp::IcmpV4(IngotHeader::Raw(t)) => { UlpRepr::IcmpV4((&t).into()) } - Ulp::IcmpV6(IngotPacket::Repr(t)) => { + Ulp::IcmpV6(IngotHeader::Repr(t)) => { UlpRepr::IcmpV6(*t) } - Ulp::IcmpV6(IngotPacket::Raw(t)) => { + Ulp::IcmpV6(IngotHeader::Raw(t)) => { UlpRepr::IcmpV6((&t).into()) } }); @@ -1839,15 +1839,15 @@ impl Packet2> { push_spec.inner.get_or_insert_with(Default::default); inner.l3 = Some(match l3 { - L3::Ipv4(IngotPacket::Repr(v4)) => L3Repr::Ipv4(*v4), - L3::Ipv4(IngotPacket::Raw(v4)) => { + L3::Ipv4(IngotHeader::Repr(v4)) => L3Repr::Ipv4(*v4), + L3::Ipv4(IngotHeader::Raw(v4)) => { L3Repr::Ipv4((&v4).into()) } - L3::Ipv6(IngotPacket::Repr(v6)) => L3Repr::Ipv6(*v6), + L3::Ipv6(IngotHeader::Repr(v6)) => L3Repr::Ipv6(*v6), - // This needs a fuller DirectPacket due to EHs... + // This needs a fuller InlineHeader due to EHs... // We can't actually do structural mods here today using OPTE. - L3::Ipv6(IngotPacket::Raw(v6)) => todo!(), // L3Repr::Ipv6((&v6).into()), + L3::Ipv6(IngotHeader::Raw(v6)) => todo!(), // L3Repr::Ipv6((&v6).into()), }); force_serialize = true; rewind += init_lens.inner_l3; @@ -1865,8 +1865,8 @@ impl Packet2> { if force_serialize { let inner = push_spec.inner.get_or_insert_with(Default::default); inner.eth = match headers.inner_eth { - IngotPacket::Repr(p) => *p, - IngotPacket::Raw(p) => (&p).into(), + IngotHeader::Repr(p) => *p, + IngotHeader::Raw(p) => (&p).into(), }; rewind += init_lens.inner_eth; } @@ -1878,9 +1878,9 @@ impl Packet2> { || encap.packet_length() != init_lens.outer_encap => { push_spec.outer_encap = Some(match encap { - DirectPacket::Repr(o) => o, + InlineHeader::Repr(o) => o, // Needed in fullness of time, but not here. - DirectPacket::Raw(_) => todo!(), + InlineHeader::Raw(_) => todo!(), }); force_serialize = true; @@ -1900,13 +1900,13 @@ impl Packet2> { || l3.packet_length() != init_lens.outer_l3 => { // push_spec.outer_ip = Some(match l3 { - // DirectPacket::Repr(o) => o, + // InlineHeader::Repr(o) => o, // // Needed in fullness of time, but not here. - // DirectPacket::Raw(_) => todo!(), + // InlineHeader::Raw(_) => todo!(), // }); push_spec.outer_ip = Some(match l3 { - L3::Ipv6(IndirectPacket::Repr(o)) => L3Repr::Ipv6(*o), - L3::Ipv4(IndirectPacket::Repr(o)) => L3Repr::Ipv4(*o), + L3::Ipv6(BoxedHeader::Repr(o)) => L3Repr::Ipv6(*o), + L3::Ipv4(BoxedHeader::Repr(o)) => L3Repr::Ipv4(*o), _ => todo!(), }); @@ -1927,9 +1927,9 @@ impl Packet2> { || eth.packet_length() != init_lens.outer_eth => { push_spec.outer_eth = Some(match eth { - DirectPacket::Repr(o) => o, + InlineHeader::Repr(o) => o, // Needed in fullness of time, but not here. - DirectPacket::Raw(_) => todo!(), + InlineHeader::Raw(_) => todo!(), }); force_serialize = true; @@ -2154,18 +2154,18 @@ impl Packet2> { Ulp::Tcp(tcp) => { tcp.set_checksum(0); match tcp { - IngotPacket::Repr(tcp) => { + IngotHeader::Repr(tcp) => { let mut bytes = [0u8; 56]; tcp.emit_raw(&mut bytes[..]); csum.add_bytes(&bytes[..]); } - IngotPacket::Raw(tcp) => { + IngotHeader::Raw(tcp) => { csum.add_bytes(tcp.0.as_bytes()); match &tcp.1 { - IngotPacket::Repr(opts) => { + IngotHeader::Repr(opts) => { csum.add_bytes(&*opts); } - IngotPacket::Raw(opts) => { + IngotHeader::Raw(opts) => { csum.add_bytes(&*opts); } } @@ -2176,12 +2176,12 @@ impl Packet2> { Ulp::Udp(udp) => { udp.set_checksum(0); match udp { - IngotPacket::Repr(udp) => { + IngotHeader::Repr(udp) => { let mut bytes = [0u8; 8]; udp.emit_raw(&mut bytes[..]); csum.add_bytes(&bytes[..]); } - IngotPacket::Raw(udp) => { + IngotHeader::Raw(udp) => { csum.add_bytes(udp.0.as_bytes()); } } @@ -2199,18 +2199,18 @@ impl Packet2> { let mut csum = Checksum::default(); match ip { - IngotPacket::Repr(ip) => { + IngotHeader::Repr(ip) => { let mut bytes = [0u8; 56]; ip.emit_raw(&mut bytes[..]); csum.add_bytes(&bytes[..]); } - IngotPacket::Raw(ip) => { + IngotHeader::Raw(ip) => { csum.add_bytes(ip.0.as_bytes()); match &ip.1 { - IngotPacket::Repr(opts) => { + IngotHeader::Repr(opts) => { csum.add_bytes(&*opts); } - IngotPacket::Raw(opts) => { + IngotHeader::Raw(opts) => { csum.add_bytes(&*opts); } } @@ -2376,8 +2376,8 @@ fn csum_minus_hdr(ulp: &ValidUlp) -> Option { // TODO: bad bound? // csum.sub_bytes(tcp.1.as_ref()); csum.sub_bytes(match &tcp.1 { - ingot::types::Packet::Repr(v) => &v[..], - ingot::types::Packet::Raw(v) => &v[..], + ingot::types::Header::Repr(v) => &v[..], + ingot::types::Header::Raw(v) => &v[..], }); Some(csum) @@ -2772,9 +2772,9 @@ impl QueryEcho for IcmpV6Packet { } } -// TODO: generate ref/mut traits on DirectPacket AND BoxPacket in ingot to halve the code here... +// TODO: generate ref/mut traits on InlineHeader AND BoxPacket in ingot to halve the code here... impl HeaderActionModify - for DirectPacket> + for InlineHeader> { #[inline] fn run_modify( @@ -2782,7 +2782,7 @@ impl HeaderActionModify mod_spec: &EtherMod, ) -> Result<(), HeaderActionError> { match self { - DirectPacket::Repr(a) => { + InlineHeader::Repr(a) => { if let Some(src) = mod_spec.src { a.set_source(src); } @@ -2790,7 +2790,7 @@ impl HeaderActionModify a.set_destination(dst); } } - DirectPacket::Raw(a) => { + InlineHeader::Raw(a) => { if let Some(src) = mod_spec.src { a.set_source(src); } @@ -2821,9 +2821,9 @@ impl HeaderActionModify for EthernetPacket { } } -// TODO: generate ref/mut traits on DirectPacket AND BoxPacket in ingot to halve the code here... +// TODO: generate ref/mut traits on InlineHeader AND BoxPacket in ingot to halve the code here... impl HeaderActionModify - for DirectPacket> + for InlineHeader> { #[inline] fn run_modify( @@ -2832,7 +2832,7 @@ impl HeaderActionModify ) -> Result<(), HeaderActionError> { match mod_spec { IpMod::Ip4(mods) => match self { - DirectPacket::Repr(L3Repr::Ipv4(v4)) => { + InlineHeader::Repr(L3Repr::Ipv4(v4)) => { if let Some(src) = mods.src { >::set_source(v4, src); } @@ -2846,7 +2846,7 @@ impl HeaderActionModify ); } } - DirectPacket::Raw(ValidL3::Ipv4(v4)) => { + InlineHeader::Raw(ValidL3::Ipv4(v4)) => { if let Some(src) = mods.src { v4.set_source(src); } @@ -2861,7 +2861,7 @@ impl HeaderActionModify _ => return Err(HeaderActionError::MissingHeader), }, IpMod::Ip6(mods) => match self { - DirectPacket::Repr(L3Repr::Ipv6(v6)) => { + InlineHeader::Repr(L3Repr::Ipv6(v6)) => { if let Some(src) = mods.src { >::set_source(v6, src); } @@ -2876,7 +2876,7 @@ impl HeaderActionModify ); } } - DirectPacket::Raw(ValidL3::Ipv6(v6)) => { + InlineHeader::Raw(ValidL3::Ipv6(v6)) => { if let Some(src) = mods.src { v6.set_source(src); } @@ -2983,7 +2983,7 @@ impl HeaderActionModify for Ulp { } impl HeaderActionModify - for DirectPacket> + for InlineHeader> { #[inline] fn run_modify( @@ -2992,7 +2992,7 @@ impl HeaderActionModify ) -> Result<(), HeaderActionError> { match (self, mod_spec) { ( - DirectPacket::Repr(EncapMeta::Geneve(g)), + InlineHeader::Repr(EncapMeta::Geneve(g)), EncapMod::Geneve(mod_spec), ) => { if let Some(vni) = mod_spec.vni { @@ -3000,7 +3000,7 @@ impl HeaderActionModify } } ( - DirectPacket::Raw(ValidEncapMeta::Geneve(u, g)), + InlineHeader::Raw(ValidEncapMeta::Geneve(u, g)), EncapMod::Geneve(mod_spec), ) => { if let Some(vni) = mod_spec.vni { @@ -3013,16 +3013,16 @@ impl HeaderActionModify } } -impl HasInnerCksum for DirectPacket> { +impl HasInnerCksum for InlineHeader> { const HAS_CKSUM: bool = false; } -impl HasInnerCksum for DirectPacket> { +impl HasInnerCksum for InlineHeader> { const HAS_CKSUM: bool = true; } impl HasInnerCksum - for DirectPacket> + for InlineHeader> { const HAS_CKSUM: bool = false; } @@ -3043,11 +3043,11 @@ impl HasInnerCksum for Ulp { // need to briefly keep both around while I systematically rewrite the test suite. impl From - for ingot::types::Packet> + for ingot::types::Header> { #[inline] fn from(value: EtherMeta) -> Self { - ingot::types::Packet::Repr( + ingot::types::Header::Repr( Ethernet { destination: value.dst, source: value.src, @@ -3059,11 +3059,11 @@ impl From } impl From - for DirectPacket> + for InlineHeader> { #[inline] fn from(value: EtherMeta) -> Self { - DirectPacket::Repr( + InlineHeader::Repr( Ethernet { destination: value.dst, source: value.src, @@ -3075,28 +3075,28 @@ impl From } impl From - for ingot::types::Packet> + for ingot::types::Header> { #[inline] fn from(value: EncapMeta) -> Self { - ingot::types::Packet::Repr(value.into()) + ingot::types::Header::Repr(value.into()) } } impl From - for DirectPacket> + for InlineHeader> { #[inline] fn from(value: EncapMeta) -> Self { - DirectPacket::Repr(value) + InlineHeader::Repr(value) } } -impl From for DirectPacket> { +impl From for InlineHeader> { #[inline] fn from(value: IpMeta) -> Self { match value { - IpMeta::Ip4(v4) => DirectPacket::Repr( + IpMeta::Ip4(v4) => InlineHeader::Repr( Ipv4 { ihl: (v4.hdr_len / 4) as u8, total_len: v4.total_len, @@ -3110,7 +3110,7 @@ impl From for DirectPacket> { } .into(), ), - IpMeta::Ip6(v6) => DirectPacket::Repr( + IpMeta::Ip6(v6) => InlineHeader::Repr( Ipv6 { payload_len: v6.pay_len, next_header: IpProtocol(u8::from(v6.next_hdr)), @@ -3166,12 +3166,12 @@ impl From for L3 { // } // } -impl PushAction>> +impl PushAction>> for EtherMeta { #[inline] - fn push(&self) -> DirectPacket> { - DirectPacket::Repr(Ethernet { + fn push(&self) -> InlineHeader> { + InlineHeader::Repr(Ethernet { destination: self.dst, source: self.src, ethertype: Ethertype(u16::from(self.ether_type)), @@ -3182,7 +3182,7 @@ impl PushAction>> impl PushAction> for EtherMeta { #[inline] fn push(&self) -> EthernetPacket { - ingot::types::Packet::Repr( + ingot::types::Header::Repr( Ethernet { destination: self.dst, source: self.src, @@ -3193,9 +3193,9 @@ impl PushAction> for EtherMeta { } } -// impl PushAction>> for IpPush { -// fn push(&self) -> DirectPacket> { -// DirectPacket::Repr(match self { +// impl PushAction>> for IpPush { +// fn push(&self) -> InlineHeader> { +// InlineHeader::Repr(match self { // IpPush::Ip4(v4) => L3Repr::Ipv4(Ipv4 { // protocol: IpProtocol(u8::from(v4.proto)), // source: v4.src.bytes().into(), diff --git a/lib/opte/src/engine/port.rs b/lib/opte/src/engine/port.rs index 3d96bb9e..a14a4dac 100644 --- a/lib/opte/src/engine/port.rs +++ b/lib/opte/src/engine/port.rs @@ -92,7 +92,7 @@ use core::sync::atomic::Ordering::SeqCst; use illumos_sys_hdrs::uintptr_t; use ingot::geneve::Geneve; use ingot::types::Emit; -use ingot::types::Header; +use ingot::types::HeaderLen; use ingot::types::Read; use ingot::udp::Udp; use kstat_macro::KStatProvider; diff --git a/lib/opte/src/engine/rule.rs b/lib/opte/src/engine/rule.rs index 6a45eb38..6ea0919f 100644 --- a/lib/opte/src/engine/rule.rs +++ b/lib/opte/src/engine/rule.rs @@ -52,7 +52,7 @@ use core::fmt::Display; use core::mem::MaybeUninit; use illumos_sys_hdrs::c_char; use illumos_sys_hdrs::uintptr_t; -use ingot::types::DirectPacket; +use ingot::types::InlineHeader; use ingot::types::Read; use opte_api::Direction; use serde::Deserialize; @@ -485,7 +485,7 @@ impl HdrTransform { T::Chunk: ByteSliceMut, { self.outer_ether - .act_on_option::>, _>( + .act_on_option::>, _>( &mut meta.headers.outer_eth, ) .map_err(Self::err_fn("outer ether"))?; From 3d34ae3c5931d01eaac00f49ff548a4cc6d14e1b Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Fri, 11 Oct 2024 12:56:28 +0100 Subject: [PATCH 042/115] Progress. --- lib/opte-test-utils/src/lib.rs | 4 +- lib/opte-test-utils/src/pcap.rs | 5 +- lib/opte/src/engine/ingot_base.rs | 28 +- lib/opte/src/engine/ingot_packet.rs | 37 +- lib/oxide-vpc/tests/integration_tests.rs | 734 ++++++++++++----------- 5 files changed, 449 insertions(+), 359 deletions(-) diff --git a/lib/opte-test-utils/src/lib.rs b/lib/opte-test-utils/src/lib.rs index cf457a18..7ea930a4 100644 --- a/lib/opte-test-utils/src/lib.rs +++ b/lib/opte-test-utils/src/lib.rs @@ -63,11 +63,11 @@ pub use opte::engine::tcp::TcpMeta; pub use opte::engine::udp::UdpHdr; pub use opte::engine::udp::UdpMeta; pub use opte::engine::GenericUlp; -use opte::ingot::ethernet::Ethertype; +pub use opte::ingot::ethernet::Ethertype; use opte::ingot::geneve::Geneve; use opte::ingot::geneve::GeneveOpt; use opte::ingot::geneve::GeneveOptionType; -use opte::ingot::ip::IpProtocol as IngotIpProto; +pub use opte::ingot::ip::IpProtocol as IngotIpProto; use opte::ingot::tcp::Tcp; use opte::ingot::tcp::TcpFlags as IngotTcpFlags; use opte::ingot::types::Emit; diff --git a/lib/opte-test-utils/src/pcap.rs b/lib/opte-test-utils/src/pcap.rs index b6267e77..3af986e9 100644 --- a/lib/opte-test-utils/src/pcap.rs +++ b/lib/opte-test-utils/src/pcap.rs @@ -6,6 +6,7 @@ //! Routines for building packet capture files. +use opte::engine::ingot_packet::MsgBlk; use opte::engine::packet::*; use pcap_parser::pcap; use pcap_parser::pcap::LegacyPcapBlock; @@ -62,8 +63,8 @@ impl PcapBuilder { } /// Add a packet to the capture. - pub fn add_pkt(&mut self, pkt: &Packet) { - let pkt_bytes = pkt.get_rdr().copy_remaining(); + pub fn add_pkt(&mut self, pkt: &MsgBlk) { + let pkt_bytes = pkt.copy_all(); let mut block = LegacyPcapBlock { ts_sec: 7777, ts_usec: 7777, diff --git a/lib/opte/src/engine/ingot_base.rs b/lib/opte/src/engine/ingot_base.rs index 38ea1193..521ec14e 100644 --- a/lib/opte/src/engine/ingot_base.rs +++ b/lib/opte/src/engine/ingot_base.rs @@ -2,7 +2,9 @@ use bitflags::bitflags; use ingot::choice; use ingot::ethernet::Ethertype; use ingot::icmp::IcmpV4; +use ingot::icmp::IcmpV4Ref; use ingot::icmp::IcmpV6; +use ingot::icmp::IcmpV6Ref; use ingot::icmp::ValidIcmpV4; use ingot::icmp::ValidIcmpV6; use ingot::ip::Ecn; @@ -10,6 +12,7 @@ use ingot::ip::IpProtocol; use ingot::ip::Ipv4Flags; use ingot::ip::LowRentV6EhRepr; use ingot::tcp::Tcp; +use ingot::tcp::TcpRef; use ingot::tcp::ValidTcp; use ingot::types::primitives::*; use ingot::types::util::Repeated; @@ -20,6 +23,7 @@ use ingot::types::NetworkRepr; use ingot::types::ParseError; use ingot::types::Vec; use ingot::udp::Udp; +use ingot::udp::UdpRef; use ingot::udp::ValidUdp; use ingot::Ingot; use opte_api::Ipv4Addr; @@ -29,7 +33,7 @@ use opte_api::MacAddr; use super::checksum::Checksum; // Redefine Ethernet and v4/v6 because we have our own, internal, -// types already. +// address types already. #[choice(on = Ethertype)] pub enum L3 { @@ -51,6 +55,28 @@ pub enum Ulp { IcmpV6 = IpProtocol::ICMP_V6, } +impl ValidUlp { + pub fn csum(&self) -> [u8; 2] { + match self { + ValidUlp::Tcp(t) => t.checksum(), + ValidUlp::Udp(u) => u.checksum(), + ValidUlp::IcmpV4(i4) => i4.checksum(), + ValidUlp::IcmpV6(i6) => i6.checksum(), + } + .to_be_bytes() + } +} + +impl ValidL3 { + pub fn csum(&self) -> [u8; 2] { + match self { + ValidL3::Ipv4(i4) => i4.checksum(), + ValidL3::Ipv6(i6) => 0, + } + .to_be_bytes() + } +} + #[derive(Copy, Clone, Debug, Eq, Hash, PartialEq, Ingot)] #[ingot(impl_default)] pub struct Ethernet { diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index 2d59dcd6..a889eb75 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -36,6 +36,7 @@ use super::ingot_base::UlpRepr; use super::ingot_base::ValidEthernet; use super::ingot_base::ValidIpv6; use super::ingot_base::ValidL3; +use super::ingot_base::ValidL4; use super::ingot_base::ValidUlp; use super::ingot_base::L3; use super::ingot_base::L4; @@ -127,7 +128,7 @@ pub struct GeneveOverV6 { pub outer_eth: EthernetPacket, #[ingot(from = "L3")] pub outer_v6: Ipv6Packet, - #[ingot(from = "L4")] + #[ingot(from = "L4", control = geneve_dst_port)] pub outer_udp: UdpPacket, pub outer_encap: GenevePacket, @@ -136,6 +137,16 @@ pub struct GeneveOverV6 { pub inner_ulp: Ulp, } +#[inline] +fn geneve_dst_port(l4: &ValidL4) -> ParseControl { + match l4 { + ValidL4::Udp(u) if u.destination() == GENEVE_PORT => { + ParseControl::Continue + } + _ => ParseControl::Reject, + } +} + #[inline] fn exit_on_arp(eth: &ValidEthernet) -> ParseControl { if eth.ethertype() == Ethertype::ARP { @@ -784,6 +795,18 @@ impl MsgBlk { Some(Self { inner }) } + + /// Copy out all bytes within this mblk and its successors + /// to a single contiguous buffer. + pub fn copy_all(&self) -> Vec { + let mut out = vec![]; + + for node in self.iter() { + out.extend_from_slice(node) + } + + out + } } #[derive(Debug)] @@ -1315,6 +1338,10 @@ impl PacketHeaders { self.headers.outer_eth.as_ref() } + pub fn outer_ip(&self) -> Option<&L3> { + self.headers.outer_l3.as_ref() + } + // Need to expose this a lil cleaner... /// Returns whether this packet is sourced from outside the rack, /// in addition to its VNI. @@ -1406,6 +1433,7 @@ impl PacketHeaders { self.body.body_segs() } + // right place for this to live? Or is `meta()` misnamed? pub fn copy_remaining(&self) -> Vec where T::Chunk: ByteSliceMut, @@ -2317,8 +2345,6 @@ impl> PacketState for ParsedStage1 { impl> ParsedStage1 {} -type Quack = Parsed2>; - // Needed for now to account for not wanting to redesign ActionDescs // to be generic over T (trait object safety rules, etc.). pub type PacketMeta3<'a> = Parsed2>; @@ -2452,7 +2478,8 @@ pub struct EmittestSpec { impl EmittestSpec { #[inline] - pub fn apply(&mut self, mut pkt: MsgBlk) -> MsgBlk { + #[must_use] + pub fn apply(&self, mut pkt: MsgBlk) -> MsgBlk { // Rewind { let mut slots = heapless::Vec::<&mut MsgBlkNode, 6>::new(); @@ -2479,7 +2506,7 @@ impl EmittestSpec { // TODO: put available layers into said slots? } - match &mut self.spec { + match &self.spec { EmitterSpec::Fastpath(push_spec) => { push_spec.encap.prepend(pkt, self.ulp_len as usize) } diff --git a/lib/oxide-vpc/tests/integration_tests.rs b/lib/oxide-vpc/tests/integration_tests.rs index 4fbd4393..889e5dd7 100644 --- a/lib/oxide-vpc/tests/integration_tests.rs +++ b/lib/oxide-vpc/tests/integration_tests.rs @@ -13,8 +13,6 @@ //! OPTE pipeline by single-stepping the packets in each capture and //! verifying that OPTE processing produces the expected bytes. -use opte_test_utils as common; - use common::icmp::*; use common::*; use opte::api::MacAddr; @@ -32,6 +30,17 @@ use opte::engine::headers::EncapMeta; use opte::engine::headers::IpMeta; use opte::engine::headers::UlpMeta; use opte::engine::icmp::IcmpHdr; +use opte::engine::ingot_base::EthernetRef; +use opte::engine::ingot_base::Ipv4Ref; +use opte::engine::ingot_base::Ipv6Ref; +use opte::engine::ingot_base::ValidL3; +use opte::engine::ingot_base::ValidUlp; +use opte::engine::ingot_base::L3; +use opte::engine::ingot_packet::LightParsedMblk; +use opte::engine::ingot_packet::MsgBlk; +use opte::engine::ingot_packet::Packet2; +use opte::engine::ingot_packet::Parsed2; +use opte::engine::ingot_packet::ParsedMblk; use opte::engine::ip4::Ipv4Addr; use opte::engine::ip4::Ipv4Hdr; use opte::engine::ip4::Ipv4HdrError; @@ -43,6 +52,7 @@ use opte::engine::packet::Initialized; use opte::engine::packet::InnerFlowId; use opte::engine::packet::Packet; use opte::engine::packet::PacketRead; +use opte::engine::packet::ParseError; use opte::engine::packet::Parsed; use opte::engine::port::ProcessError; use opte::engine::tcp::TcpState; @@ -50,6 +60,13 @@ use opte::engine::tcp::TIME_WAIT_EXPIRE_SECS; use opte::engine::udp::UdpHdr; use opte::engine::udp::UdpMeta; use opte::engine::Direction; +use opte::engine::NetworkParser; +use opte::ingot::geneve::GeneveRef; +use opte::ingot::tcp::TcpRef; +use opte::ingot::types::Emit; +use opte::ingot::types::HeaderLen; +use opte::ingot::udp::UdpRef; +use opte_test_utils as common; use oxide_vpc::api::ExternalIpCfg; use oxide_vpc::api::FirewallRule; use oxide_vpc::api::RouterClass; @@ -118,6 +135,35 @@ fn lab_cfg() -> VpcCfg { } } +fn parse_inbound( + pkt: &mut MsgBlk, + parser: NP, +) -> Result>>, ParseError> +{ + let pkt = Packet2::new(pkt.iter_mut()); + pkt.parse_inbound(parser) +} + +fn parse_outbound( + pkt: &mut MsgBlk, + parser: NP, +) -> Result>>, ParseError> +{ + let pkt = Packet2::new(pkt.iter_mut()); + pkt.parse_outbound(parser) +} + +/// Expects that a packet result is modified, and applies that modification. +macro_rules! expect_modified { + ($res:ident, $pkt:ident) => { + assert!(matches!($res, Ok(Modified(_)))); + #[allow(unused_assignments)] + if let Ok(Modified(spec)) = $res { + $pkt = spec.apply($pkt); + } + }; +} + // Verify that the list of layers is what we expect. #[test] fn check_layers() { @@ -139,14 +185,16 @@ fn port_transition_running() { // Try processing the packet while taking the port through a Ready // -> Running. // ================================================================ - let mut pkt1 = tcp_telnet_syn(&g1_cfg, &g2_cfg); - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); + let mut pkt1_m = tcp_telnet_syn(&g1_cfg, &g2_cfg); + let pkt1 = parse_outbound(&mut pkt1_m, GenericUlp {}).unwrap(); + let res = g1.port.process(Out, pkt1); assert!(matches!(res, Err(ProcessError::BadState(_)))); assert_port!(g1); g1.port.start(); set!(g1, "port_state=running"); - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let pkt1 = parse_outbound(&mut pkt1_m, GenericUlp {}).unwrap(); + let res = g1.port.process(Out, pkt1); + assert!(matches!(res, Ok(Modified(_)))); incr!( g1, [ @@ -171,11 +219,12 @@ fn port_transition_reset() { // -> Running -> Ready transition. Verify that flows are cleared // but rules remain. // ================================================================ - let mut pkt1 = tcp_telnet_syn(&g1_cfg, &g2_cfg); + let mut pkt1_m = tcp_telnet_syn(&g1_cfg, &g2_cfg); + let pkt1 = parse_outbound(&mut pkt1_m, GenericUlp {}).unwrap(); g1.port.start(); set!(g1, "port_state=running"); - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let res = g1.port.process(Out, pkt1); + expect_modified!(res, pkt1_m); incr!( g1, [ @@ -186,7 +235,8 @@ fn port_transition_reset() { ); g1.port.reset(); update!(g1, ["set:port_state=ready", "zero_flows"]); - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); + let pkt1 = parse_outbound(&mut pkt1_m, GenericUlp {}).unwrap(); + let res = g1.port.process(Out, pkt1); assert!(matches!(res, Err(ProcessError::BadState(_)))); assert_port!(g1); } @@ -221,9 +271,10 @@ fn port_transition_pause() { // ================================================================ // Send the HTTP SYN. // ================================================================ - let mut pkt1 = http_syn(&g2_cfg, &g1_cfg); - let res = g2.port.process(Out, &mut pkt1, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let mut pkt1_m = http_syn(&g2_cfg, &g1_cfg); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g2.port.process(Out, pkt1); + expect_modified!(res, pkt1_m); incr!( g2, [ @@ -233,8 +284,9 @@ fn port_transition_pause() { ] ); - let res = g1.port.process(In, &mut pkt1, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(In, pkt1); + expect_modified!(res, pkt1_m); incr!( g1, [ @@ -272,7 +324,9 @@ fn port_transition_pause() { ), Err(OpteError::BadState(_)) )); - let res = g2.port.process(Out, &mut pkt1, ActionMeta::new()); + + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g2.port.process(Out, pkt1); assert!(matches!(res, Err(ProcessError::BadState(_)))); let fw_rule: FirewallRule = "action=allow priority=10 dir=in protocol=tcp port=22".parse().unwrap(); @@ -301,13 +355,15 @@ fn port_transition_pause() { g2.port.start(); set!(g2, "port_state=running"); - let mut pkt2 = http_syn_ack(&g1_cfg, &g2_cfg); - let res = g1.port.process(Out, &mut pkt2, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let mut pkt2_m = http_syn_ack(&g1_cfg, &g2_cfg); + let pkt2 = parse_outbound(&mut pkt2_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt2); + expect_modified!(res, pkt2_m); incr!(g1, ["uft.out", "stats.port.out_modified, stats.port.out_uft_miss"]); - let res = g2.port.process(In, &mut pkt2, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let pkt2 = parse_inbound(&mut pkt2_m, VpcParser {}).unwrap(); + let res = g2.port.process(In, pkt2); + expect_modified!(res, pkt2_m); incr!(g2, ["uft.in", "stats.port.in_modified, stats.port.in_uft_miss"]); } @@ -358,7 +414,7 @@ fn gateway_icmp4_ping() { // ================================================================ // Generate an ICMP Echo Request from G1 to Virtual GW // ================================================================ - let mut pkt1 = gen_icmp_echo_req( + let mut pkt1_m = gen_icmp_echo_req( g1_cfg.guest_mac, g1_cfg.gateway_mac, g1_cfg.ipv4_cfg().unwrap().private_ip.into(), @@ -368,15 +424,16 @@ fn gateway_icmp4_ping() { &data[..], 1, ); - pcap.add_pkt(&pkt1); + pcap.add_pkt(&pkt1_m); // ================================================================ // Run the Echo Request through g1's port in the outbound // direction and verify it results in an Echo Reply Hairpin packet // back to guest. // ================================================================ - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); - let hp = match res { + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt1); + let mut hp = match res { Ok(Hairpin(hp)) => hp, _ => panic!("expected Hairpin, got {:?}", res), }; @@ -384,32 +441,32 @@ fn gateway_icmp4_ping() { // In this case we are parsing a hairpin reply, so we can't use // the VpcParser since it would expect any inbound packet to be // encapsulated. - let reply = hp.parse(In, GenericUlp {}).unwrap(); - pcap.add_pkt(&reply); - assert_eq!(reply.body_offset(), IP4_SZ + IcmpHdr::SIZE); - assert_eq!(reply.body_seg(), 0); + pcap.add_pkt(&hp); + // let reply = hp.parse(In, GenericUlp {}).unwrap(); + let reply = parse_inbound(&mut hp, GenericUlp {}).unwrap().to_full_meta(); let meta = reply.meta(); - assert!(meta.outer.ether.is_none()); - assert!(meta.outer.ip.is_none()); - assert!(meta.outer.encap.is_none()); + assert!(meta.outer_ether().is_none()); + assert!(meta.outer_ip().is_none()); + assert!(meta.outer_encap_geneve_vni_and_origin().is_none()); - let eth = meta.inner.ether; - assert_eq!(eth.src, g1_cfg.gateway_mac); - assert_eq!(eth.dst, g1_cfg.guest_mac); + let eth = meta.inner_ether(); + assert_eq!(eth.source(), g1_cfg.gateway_mac); + assert_eq!(eth.destination(), g1_cfg.guest_mac); - match meta.inner.ip.as_ref().unwrap() { - IpMeta::Ip4(ip4) => { - assert_eq!(ip4.src, g1_cfg.ipv4_cfg().unwrap().gateway_ip); - assert_eq!(ip4.dst, g1_cfg.ipv4_cfg().unwrap().private_ip); - assert_eq!(ip4.proto, Protocol::ICMP); + match meta.inner_l3().as_ref().unwrap() { + L3::Ipv4(ip4) => { + assert_eq!(ip4.source(), g1_cfg.ipv4_cfg().unwrap().gateway_ip); + assert_eq!( + ip4.destination(), + g1_cfg.ipv4_cfg().unwrap().private_ip + ); + assert_eq!(ip4.protocol(), IngotIpProto::ICMP); } - ip6 => panic!("expected inner IPv4 metadata, got IPv6: {:?}", ip6), + L3::Ipv6(v6) => panic!("expected inner IPv4 metadata, got IPv6"), } - let mut rdr = reply.get_body_rdr(); - rdr.seek_back(IcmpHdr::SIZE).unwrap(); - let reply_body = rdr.copy_remaining(); + let reply_body = reply.meta().copy_remaining(); let reply_pkt = Icmpv4Packet::new_checked(&reply_body).unwrap(); let mut csum = CsumCapab::ignored(); csum.ipv4 = smoltcp::phy::Checksum::Rx; @@ -450,8 +507,9 @@ fn guest_to_guest_no_route() { ) .unwrap(); update!(g1, ["incr:epoch", "set:router.rules.out=0"]); - let mut pkt1 = http_syn(&g1_cfg, &g2_cfg); - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); + let mut pkt1_m = http_syn(&g1_cfg, &g2_cfg); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt1); assert_drop!( res, DropReason::Layer { name: "router", reason: DenyReason::Default } @@ -510,18 +568,19 @@ fn guest_to_guest() { PcapBuilder::new("overlay_guest_to_guest-guest-2.pcap"); let mut pcap_phys2 = PcapBuilder::new("overlay_guest_to_guest-phys-2.pcap"); - let mut pkt1 = http_syn(&g1_cfg, &g2_cfg); - pcap_guest1.add_pkt(&pkt1); - let ulp_csum_b4 = pkt1.meta().inner.ulp.unwrap().csum(); - let ip_csum_b4 = pkt1.meta().inner.ip.unwrap().csum(); + let mut pkt1_m = http_syn(&g1_cfg, &g2_cfg); + pcap_guest1.add_pkt(&pkt1_m); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let ulp_csum_b4 = pkt1.meta().inner_ulp.as_ref().unwrap().csum(); + let ip_csum_b4 = pkt1.meta().inner_l3.as_ref().unwrap().csum(); // ================================================================ // Run the packet through g1's port in the outbound direction and // verify the resulting packet meets expectations. // ================================================================ - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); - pcap_phys1.add_pkt(&pkt1); - assert!(matches!(res, Ok(Modified))); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt1); + expect_modified!(res, pkt1_m); incr!( g1, [ @@ -530,64 +589,54 @@ fn guest_to_guest() { "stats.port.out_modified, stats.port.out_uft_miss", ] ); + pcap_phys1.add_pkt(&pkt1_m); + + let nodes = pkt1_m.iter(); + assert_eq!(nodes.count(), 2); - assert_eq!(pkt1.body_offset(), VPC_ENCAP_SZ + TCP4_SZ + HTTP_SYN_OPTS_LEN); - assert_eq!(pkt1.body_seg(), 1); - let ulp_csum_after = pkt1.meta().inner.ulp.unwrap().csum(); - let ip_csum_after = pkt1.meta().inner.ip.unwrap().csum(); + let pkt2 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); + let ulp_csum_after = pkt2.meta().inner_ulp.csum(); + let ip_csum_after = pkt2.meta().inner_l3.csum(); assert_eq!(ulp_csum_after, ulp_csum_b4); assert_eq!(ip_csum_after, ip_csum_b4); - let meta = pkt1.meta(); - match meta.outer.ether.as_ref() { - Some(eth) => { - assert_eq!(eth.src, MacAddr::ZERO); - assert_eq!(eth.dst, MacAddr::ZERO); - } + let meta = pkt2.meta(); + assert_eq!(meta.outer_eth.source(), MacAddr::ZERO); + assert_eq!(meta.outer_eth.destination(), MacAddr::ZERO); - None => panic!("no outer ether header"), - } + assert_eq!(meta.outer_v6.source(), g1_cfg.phys_ip); + assert_eq!(meta.outer_v6.destination(), g2_cfg.phys_ip); - match meta.outer.ip.as_ref().unwrap() { - IpMeta::Ip6(ip6) => { - assert_eq!(ip6.src, g1_cfg.phys_ip); - assert_eq!(ip6.dst, g2_cfg.phys_ip); - } + // Geneve entropy. + assert_eq!(meta.outer_udp.source(), 12700); + assert_eq!(meta.outer_encap.vni(), g1_cfg.vni); - val => panic!("expected outer IPv6, got: {:?}", val), - } + let eth = &meta.inner_eth; + assert_eq!(eth.source(), g1_cfg.guest_mac); + assert_eq!(eth.destination(), g2_cfg.guest_mac); + assert_eq!(eth.ethertype(), Ethertype::IPV4); - match meta.outer.encap.as_ref() { - Some(EncapMeta::Geneve(geneve)) => { - assert_eq!(geneve.entropy, 12700); - assert_eq!(geneve.vni, Vni::new(g1_cfg.vni).unwrap()); - } - - None => panic!("expected outer Geneve metadata"), - } - - let eth = meta.inner.ether; - assert_eq!(eth.src, g1_cfg.guest_mac); - assert_eq!(eth.dst, g2_cfg.guest_mac); - assert_eq!(eth.ether_type, EtherType::Ipv4); - - match meta.inner.ip.as_ref().unwrap() { - IpMeta::Ip4(ip4) => { - assert_eq!(ip4.src, g1_cfg.ipv4_cfg().unwrap().private_ip); - assert_eq!(ip4.dst, g2_cfg.ipv4_cfg().unwrap().private_ip); - assert_eq!(ip4.proto, Protocol::TCP); + match &meta.inner_l3 { + ValidL3::Ipv4(ip4) => { + assert_eq!(ip4.source(), g1_cfg.ipv4_cfg().unwrap().private_ip); + assert_eq!( + ip4.destination(), + g2_cfg.ipv4_cfg().unwrap().private_ip + ); + assert_eq!(ip4.protocol(), IngotIpProto::TCP); } - - ip6 => panic!("expected inner IPv4 metadata, got IPv6: {:?}", ip6), + _ => panic!("expected inner IPv4 metadata, got IPv6"), } - match meta.inner.ulp.as_ref().unwrap() { - UlpMeta::Tcp(tcp) => { - assert_eq!(tcp.src, 44490); - assert_eq!(tcp.dst, 80); + match &meta.inner_ulp { + ValidUlp::Tcp(tcp) => { + assert_eq!(tcp.source(), 44490); + assert_eq!(tcp.destination(), 80); } - ulp => panic!("expected inner TCP metadata, got: {:?}", ulp), + // todo: derive Debug on choice? + // ulp => panic!("expected inner TCP metadata, got: {:?}", ulp), + _ => panic!("expected inner TCP metadata, got (other)"), } // ================================================================ @@ -596,15 +645,13 @@ fn guest_to_guest() { // of the real process we first dump the raw bytes of g1's // outgoing packet and then reparse it. // ================================================================ - let mblk = pkt1.unwrap_mblk(); - let mut pkt2 = unsafe { - Packet::wrap_mblk_and_parse(mblk, In, VpcParser::new()).unwrap() - }; - pcap_phys2.add_pkt(&pkt2); + let mut pkt2_m = pkt1_m; + pcap_phys2.add_pkt(&pkt2_m); + let pkt2 = parse_inbound(&mut pkt2_m, VpcParser {}).unwrap(); - let res = g2.port.process(In, &mut pkt2, ActionMeta::new()); - pcap_guest2.add_pkt(&pkt2); - assert!(matches!(res, Ok(Modified))); + let res = g2.port.process(In, pkt2); + expect_modified!(res, pkt2_m); + pcap_guest2.add_pkt(&pkt2_m); incr!( g2, [ @@ -613,36 +660,41 @@ fn guest_to_guest() { "stats.port.in_modified, stats.port.in_uft_miss", ] ); - assert_eq!(pkt2.body_offset(), TCP4_SZ + HTTP_SYN_OPTS_LEN); - assert_eq!(pkt2.body_seg(), 0); + // assert_eq!(pkt2.body_offset(), TCP4_SZ + HTTP_SYN_OPTS_LEN); + // assert_eq!(pkt2.body_seg(), 0); + let pkt2 = parse_inbound(&mut pkt2_m, VpcParser {}).unwrap(); let g2_meta = pkt2.meta(); - assert!(g2_meta.outer.ether.is_none()); - assert!(g2_meta.outer.ip.is_none()); - assert!(g2_meta.outer.encap.is_none()); - - let g2_eth = g2_meta.inner.ether; - assert_eq!(g2_eth.src, g1_cfg.gateway_mac); - assert_eq!(g2_eth.dst, g2_cfg.guest_mac); - assert_eq!(g2_eth.ether_type, EtherType::Ipv4); - - match g2_meta.inner.ip.as_ref().unwrap() { - IpMeta::Ip4(ip4) => { - assert_eq!(ip4.src, g1_cfg.ipv4_cfg().unwrap().private_ip); - assert_eq!(ip4.dst, g2_cfg.ipv4_cfg().unwrap().private_ip); - assert_eq!(ip4.proto, Protocol::TCP); - } - ip6 => panic!("expected inner IPv4 metadata, got IPv6: {:?}", ip6), + // TODO: can we have a convenience method that verifies that the + // emitspec was a rewind/drop from the head of the pkt? + + let g2_eth = &g2_meta.inner_eth; + assert_eq!(g2_eth.source(), g1_cfg.gateway_mac); + assert_eq!(g2_eth.destination(), g2_cfg.guest_mac); + assert_eq!(g2_eth.ethertype(), Ethertype::IPV4); + + match &g2_meta.inner_l3 { + ValidL3::Ipv4(ip4) => { + assert_eq!(ip4.source(), g1_cfg.ipv4_cfg().unwrap().private_ip); + assert_eq!( + ip4.destination(), + g2_cfg.ipv4_cfg().unwrap().private_ip + ); + assert_eq!(ip4.protocol(), IngotIpProto::TCP); + } + _ => panic!("expected inner IPv4 metadata, got IPv6"), } - match g2_meta.inner.ulp.as_ref().unwrap() { - UlpMeta::Tcp(tcp) => { - assert_eq!(tcp.src, 44490); - assert_eq!(tcp.dst, 80); + match &g2_meta.inner_ulp { + ValidUlp::Tcp(tcp) => { + assert_eq!(tcp.source(), 44490); + assert_eq!(tcp.destination(), 80); } - ulp => panic!("expected inner TCP metadata, got: {:?}", ulp), + // todo: derive Debug on choice? + // ulp => panic!("expected inner TCP metadata, got: {:?}", ulp), + _ => panic!("expected inner TCP metadata, got (other)"), } } @@ -682,7 +734,8 @@ fn guest_to_guest_diff_vpc_no_peer() { // verify the packet is dropped. // ================================================================ let mut g1_pkt = http_syn(&g1_cfg, &g2_cfg); - let res = g1.port.process(Out, &mut g1_pkt, ActionMeta::new()); + let pkt1 = parse_outbound(&mut g1_pkt, GenericUlp {}).unwrap(); + let res = g1.port.process(Out, pkt1); assert_drop!( res, DropReason::Layer { name: "overlay", reason: DenyReason::Action } @@ -719,19 +772,20 @@ fn guest_to_internet_ipv4() { // Generate a TCP SYN packet from g1 to zinascii.com // ================================================================ let dst_ip = "52.10.128.69".parse().unwrap(); - let mut pkt1 = http_syn2( + let mut pkt1_m = http_syn2( g1_cfg.guest_mac, g1_cfg.ipv4_cfg().unwrap().private_ip, GW_MAC_ADDR, dst_ip, ); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); // ================================================================ // Run the packet through g1's port in the outbound direction and // verify the resulting packet meets expectations. // ================================================================ - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); - assert!(matches!(res, Ok(Modified)), "bad result: {:?}", res); + let res = g1.port.process(Out, pkt1); + expect_modified!(res, pkt1_m); incr!( g1, [ @@ -741,84 +795,68 @@ fn guest_to_internet_ipv4() { "stats.port.out_modified, stats.port.out_uft_miss", ] ); - assert_eq!(pkt1.body_offset(), VPC_ENCAP_SZ + TCP4_SZ + HTTP_SYN_OPTS_LEN); - assert_eq!(pkt1.body_seg(), 1); - let meta = pkt1.meta(); - match meta.outer.ether.as_ref() { - Some(eth) => { - assert_eq!(eth.src, MacAddr::ZERO); - assert_eq!(eth.dst, MacAddr::ZERO); - } - - None => panic!("no outer ether header"), - } - let inner_bytes = match meta.outer.ip.as_ref().unwrap() { - IpMeta::Ip6(ip6) => { - assert_eq!(ip6.src, g1_cfg.phys_ip); - - // Check that the encoded payload length in the outer header is - // correct, and matches the actual number of bytes in the rest of - // the packet. - let mut bytes = pkt1.get_rdr().copy_remaining(); - assert_eq!( - ip6.pay_len as usize, - bytes.len() - EtherHdr::SIZE - Ipv6Hdr::BASE_SIZE - ); + // Inbound parse asserts specifically that we have: + // - Ethernet + // - Ipv6 + // - Udp (dstport 6081) + // - Geneve + // - (Inner ULP headers) + let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); + let meta = pkt1.meta(); - // Strip off the encapsulation headers - bytes.drain(..VPC_ENCAP_SZ); - bytes - } + assert_eq!(meta.outer_eth.source(), MacAddr::ZERO); + assert_eq!(meta.outer_eth.destination(), MacAddr::ZERO); - val => panic!("expected outer IPv6, got: {:?}", val), - }; + assert_eq!(meta.outer_v6.source(), g1_cfg.phys_ip); + // Check that the encoded payload length in the outer header is + // correct, and matches the actual number of bytes in the rest of + // the packet. + let len_post_v6 = + pkt1.len() - (&meta.outer_eth, &meta.outer_v6).packet_length(); + assert_eq!(meta.outer_v6.payload_len() as usize, len_post_v6); - match meta.outer.encap.as_ref() { - Some(EncapMeta::Geneve(geneve)) => { - assert_eq!(geneve.entropy, 24329); - } + assert_eq!(meta.outer_udp.source(), 24329); + assert_eq!(meta.outer_udp.length() as usize, len_post_v6); - None => panic!("expected outer Geneve metadata"), - } + assert_eq!(meta.inner_eth.source(), g1_cfg.guest_mac); + assert_eq!(meta.inner_eth.ethertype(), Ethertype::IPV4); - let eth = meta.inner.ether; - assert_eq!(eth.src, g1_cfg.guest_mac); - assert_eq!(eth.ether_type, EtherType::Ipv4); + match &meta.inner_l3 { + ValidL3::Ipv4(ip4) => { + assert_eq!(ip4.source(), g1_cfg.snat().external_ip); + assert_eq!(ip4.destination(), dst_ip); + assert_eq!(ip4.protocol(), IngotIpProto::TCP); - match meta.inner.ip.as_ref().unwrap() { - IpMeta::Ip4(ip4) => { - assert_eq!(ip4.src, g1_cfg.snat().external_ip); - assert_eq!(ip4.dst, dst_ip); - assert_eq!(ip4.proto, Protocol::TCP); + let inner_len = len_post_v6 + - (&meta.outer_udp, &meta.outer_encap, &meta.inner_eth) + .packet_length(); // Check that the encoded payload length in the inner header is // correct, and matches the actual number of bytes in the rest of // the packet. // IPv4 total length _DOES_ include the IPv4 header. - assert_eq!( - ip4.total_len as usize, - inner_bytes.len() - EtherHdr::SIZE, - ); + assert_eq!(ip4.total_len() as usize, inner_len,); } - - ip6 => panic!("expected inner IPv4 metadata, got IPv6: {:?}", ip6), + _ => panic!("expected inner IPv4 metadata, got IPv6"), } - match meta.inner.ulp.as_ref().unwrap() { - UlpMeta::Tcp(tcp) => { + match &meta.inner_ulp { + ValidUlp::Tcp(tcp) => { assert_eq!( - tcp.src, - g1_cfg.snat().ports.clone().next_back().unwrap(), + tcp.source(), + g1_cfg.snat().ports.clone().next_back().unwrap() ); - assert_eq!(tcp.dst, 80); + assert_eq!(tcp.destination(), 80); } - ulp => panic!("expected inner TCP metadata, got: {:?}", ulp), + // todo: derive Debug on choice? + // ulp => panic!("expected inner TCP metadata, got: {:?}", ulp), + _ => panic!("expected inner TCP metadata, got (other)"), } let mut pcap_guest = PcapBuilder::new("guest_to_internet_ipv4.pcap"); - pcap_guest.add_pkt(&pkt1); + pcap_guest.add_pkt(&pkt1_m); } // Verify that a guest can communicate with the internet over IPv6. @@ -843,7 +881,7 @@ fn guest_to_internet_ipv6() { // Generate a TCP SYN packet from g1 to example.com // ================================================================ let dst_ip = "2606:2800:220:1:248:1893:25c8:1946".parse().unwrap(); - let mut pkt1 = http_syn2( + let mut pkt1_m = http_syn2( g1_cfg.guest_mac, g1_cfg.ipv6_cfg().unwrap().private_ip, GW_MAC_ADDR, @@ -854,8 +892,9 @@ fn guest_to_internet_ipv6() { // Run the packet through g1's port in the outbound direction and // verify the resulting packet meets expectations. // ================================================================ - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); - assert!(matches!(res, Ok(Modified)), "bad result: {:?}", res); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt1); + expect_modified!(res, pkt1_m); incr!( g1, [ @@ -866,85 +905,66 @@ fn guest_to_internet_ipv6() { ] ); - assert_eq!(pkt1.body_offset(), VPC_ENCAP_SZ + TCP6_SZ + HTTP_SYN_OPTS_LEN); - assert_eq!(pkt1.body_seg(), 1); + let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); let meta = pkt1.meta(); - match meta.outer.ether.as_ref() { - Some(eth) => { - assert_eq!(eth.src, MacAddr::ZERO); - assert_eq!(eth.dst, MacAddr::ZERO); - } - - None => panic!("no outer ether header"), - } - - let inner_bytes = match meta.outer.ip.as_ref().unwrap() { - IpMeta::Ip6(ip6) => { - assert_eq!(ip6.src, g1_cfg.phys_ip); - // Check that the encoded payload length in the outer header is - // correct, and matches the actual number of bytes in the rest of - // the packet. - let mut bytes = pkt1.get_rdr().copy_remaining(); - assert_eq!( - ip6.pay_len as usize, - bytes.len() - EtherHdr::SIZE - Ipv6Hdr::BASE_SIZE - ); - - // Strip off the encapsulation headers - bytes.drain(..VPC_ENCAP_SZ); - bytes - } - - val => panic!("expected outer IPv6, got: {:?}", val), - }; - - match meta.outer.encap.as_ref() { - Some(EncapMeta::Geneve(geneve)) => { - assert_eq!(geneve.entropy, 63246); - } - - None => panic!("expected outer Geneve metadata"), - } - - let eth = meta.inner.ether; - assert_eq!(eth.src, g1_cfg.guest_mac); - assert_eq!(eth.ether_type, EtherType::Ipv6); - - match meta.inner.ip.as_ref().unwrap() { - IpMeta::Ip6(ip6) => { - assert_eq!(ip6.src, g1_cfg.snat6().external_ip); - assert_eq!(ip6.dst, dst_ip); - assert_eq!(ip6.proto, Protocol::TCP); - assert_eq!(ip6.next_hdr, IpProtocol::Tcp); + assert_eq!(meta.outer_eth.source(), MacAddr::ZERO); + assert_eq!(meta.outer_eth.destination(), MacAddr::ZERO); + + assert_eq!(meta.outer_v6.source(), g1_cfg.phys_ip); + // Check that the encoded payload length in the outer header is + // correct, and matches the actual number of bytes in the rest of + // the packet. + let len_post_v6 = + pkt1.len() - (&meta.outer_eth, &meta.outer_v6).packet_length(); + assert_eq!(meta.outer_v6.payload_len() as usize, len_post_v6); + + assert_eq!(meta.outer_udp.source(), 24329); + assert_eq!(meta.outer_udp.length() as usize, len_post_v6); + + assert_eq!(meta.inner_eth.source(), g1_cfg.guest_mac); + assert_eq!(meta.inner_eth.ethertype(), Ethertype::IPV4); + + match &meta.inner_l3 { + ValidL3::Ipv6(ip6) => { + assert_eq!(ip6.source(), g1_cfg.snat6().external_ip); + assert_eq!(ip6.destination(), dst_ip); + assert_eq!(ip6.next_header(), IngotIpProto::TCP); + + let inner_len = len_post_v6 + - ( + &meta.outer_udp, + &meta.outer_encap, + &meta.inner_eth, + &meta.inner_l3, + ) + .packet_length(); // Check that the encoded payload length in the inner header is // correct, and matches the actual number of bytes in the rest of // the packet. // IPv6 payload length _DOES NOT_ include the IPv6 header. - assert_eq!( - ip6.pay_len as usize, - inner_bytes.len() - EtherHdr::SIZE - Ipv6Hdr::BASE_SIZE - ); + assert_eq!(ip6.payload_len() as usize, inner_len); } - - ip4 => panic!("expected inner IPv6 metadata, got IPv4: {:?}", ip4), + _ => panic!("expected inner IPv4 metadata, got IPv6"), } - match meta.inner.ulp.as_ref().unwrap() { - UlpMeta::Tcp(tcp) => { + match &meta.inner_ulp { + ValidUlp::Tcp(tcp) => { assert_eq!( - tcp.src, - g1_cfg.snat6().ports.clone().next_back().unwrap(), + tcp.source(), + g1_cfg.snat6().ports.clone().next_back().unwrap() ); - assert_eq!(tcp.dst, 80); + assert_eq!(tcp.destination(), 80); } - ulp => panic!("expected inner TCP metadata, got: {:?}", ulp), + // todo: derive Debug on choice? + // ulp => panic!("expected inner TCP metadata, got: {:?}", ulp), + _ => panic!("expected inner TCP metadata, got (other)"), } let mut pcap_guest = PcapBuilder::new("guest_to_internet_ipv6.pcap"); - pcap_guest.add_pkt(&pkt1); + pcap_guest.add_pkt(&pkt1_m); } fn multi_external_setup( @@ -1104,9 +1124,10 @@ fn check_external_ip_inbound_behaviour( flow_port, 80, ); - let mut pkt1 = encap_external(pkt1, bsvc_phys, g1_phys); + let mut pkt1_m = encap_external(pkt1, bsvc_phys, g1_phys); + let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); - let res = port.port.process(In, &mut pkt1, ActionMeta::new()); + let res = port.port.process(In, pkt1); if old_ip_gone { // If we lose an external IP, the failure mode is obvious: // invalidate the action, do not rewrite dst IP to target the @@ -1125,10 +1146,7 @@ fn check_external_ip_inbound_behaviour( ] ); } else { - assert!( - matches!(res, Ok(Modified)), - "bad result for ip {ext_ip:?}: {res:?}" - ); + expect_modified!(res, pkt1_m); let rules = [ "firewall.flows.out, firewall.flows.in", "nat.flows.out, nat.flows.in", @@ -1142,8 +1160,11 @@ fn check_external_ip_inbound_behaviour( IpAddr::Ip4(_) => { let private_ip = cfg.ipv4().private_ip; if !old_ip_gone { + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}) + .unwrap() + .to_full_meta(); assert_eq!( - pkt1.meta().inner_ip4().unwrap().dst, + pkt1.meta().inner_ip4().unwrap().destination(), private_ip ); } @@ -1152,8 +1173,11 @@ fn check_external_ip_inbound_behaviour( IpAddr::Ip6(_) => { let private_ip = cfg.ipv6().private_ip; if !old_ip_gone { + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}) + .unwrap() + .to_full_meta(); assert_eq!( - pkt1.meta().inner_ip6().unwrap().dst, + pkt1.meta().inner_ip6().unwrap().destination(), private_ip ); } @@ -1173,20 +1197,23 @@ fn check_external_ip_inbound_behaviour( // IP (ephemeral) that the wrong src_ip will be selected (as it will // draw from a separate pool). // ================================================================ - let mut pkt2 = http_syn_ack2( + let mut pkt2_m = http_syn_ack2( cfg.guest_mac, private_ip, GW_MAC_ADDR, partner_ip, flow_port, ); - let res = port.port.process(Out, &mut pkt2, ActionMeta::new()); + let pkt2 = parse_outbound(&mut pkt2_m, VpcParser {}).unwrap(); + let res = port.port.process(Out, pkt2); + expect_modified!(res, pkt2_m); + let pkt2 = + parse_inbound(&mut pkt2_m, VpcParser {}).unwrap().to_full_meta(); if old_ip_gone { // Failure mode here is different (assuming we have at least one // external IP). The packet must fail to send via the old IP, // invalidate the entry, and then choose the new external IP. - assert!(matches!(res, Ok(Modified)), "bad result: {:?}", res); update!( port, [ @@ -1199,18 +1226,17 @@ fn check_external_ip_inbound_behaviour( match ext_ip { IpAddr::Ip4(ip) => { - let chosen_ip = pkt2.meta().inner_ip4().unwrap().src; + let chosen_ip = pkt2.meta().inner_ip4().unwrap().source(); assert_ne!(chosen_ip, ip); assert_ne!(IpAddr::from(chosen_ip), private_ip); } IpAddr::Ip6(ip) => { - let chosen_ip = pkt2.meta().inner_ip6().unwrap().src; + let chosen_ip = pkt2.meta().inner_ip6().unwrap().source(); assert_ne!(chosen_ip, ip); assert_ne!(IpAddr::from(chosen_ip), private_ip); } }; } else { - assert!(matches!(res, Ok(Modified)), "bad result: {:?}", res); update!( port, [ @@ -1220,10 +1246,10 @@ fn check_external_ip_inbound_behaviour( ); match ext_ip { IpAddr::Ip4(ip) => { - assert_eq!(pkt2.meta().inner_ip4().unwrap().src, ip); + assert_eq!(pkt2.meta().inner_ip4().unwrap().source(), ip); } IpAddr::Ip6(ip) => { - assert_eq!(pkt2.meta().inner_ip6().unwrap().src, ip); + assert_eq!(pkt2.meta().inner_ip6().unwrap().source(), ip); } }; } @@ -1280,10 +1306,11 @@ fn external_ip_balanced_over_floating_ips() { flow_port, 80, ); - let mut pkt = encap_external(pkt, bsvc_phys, g1_phys); + let mut pkt_m = encap_external(pkt, bsvc_phys, g1_phys); + let pkt = parse_outbound(&mut pkt_m, VpcParser {}).unwrap(); - let res = g1.port.process(Out, &mut pkt, ActionMeta::new()); - assert!(matches!(res, Ok(Modified)), "bad result: {res:?}"); + let res = g1.port.process(Out, pkt); + expect_modified!(res, pkt_m); incr!( g1, [ @@ -1294,12 +1321,15 @@ fn external_ip_balanced_over_floating_ips() { ] ); + let pkt = + parse_inbound(&mut pkt_m, VpcParser {}).unwrap().to_full_meta(); + match partner_ip { IpAddr::Ip4(_) => { - seen_v4s.push(pkt.meta().inner_ip4().unwrap().src); + seen_v4s.push(pkt.meta().inner_ip4().unwrap().source()); } IpAddr::Ip6(_) => { - seen_v6s.push(pkt.meta().inner_ip6().unwrap().src); + seen_v6s.push(pkt.meta().inner_ip6().unwrap().source()); } } } @@ -1378,13 +1408,11 @@ fn external_ip_epoch_affinity_preserved() { }; let pkt1 = http_syn2(BS_MAC_ADDR, partner_ip, g1_cfg.guest_mac, ext_ip); - let mut pkt1 = encap_external(pkt1, bsvc_phys, g1_phys); + let mut pkt1_m = encap_external(pkt1, bsvc_phys, g1_phys); + let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); - let res = g1.port.process(In, &mut pkt1, ActionMeta::new()); - assert!( - matches!(res, Ok(Modified)), - "bad result for ip {ext_ip:?}: {res:?}" - ); + let res = g1.port.process(In, pkt1); + expect_modified!(res, pkt1_m); incr!( g1, [ @@ -1407,15 +1435,16 @@ fn external_ip_epoch_affinity_preserved() { // The reply packet must still originate from the ephemeral port // after an epoch change. // ================================================================ - let mut pkt2 = http_syn_ack2( + let mut pkt2_m = http_syn_ack2( g1_cfg.guest_mac, private_ip, GW_MAC_ADDR, partner_ip, 44490, ); - let res = g1.port.process(Out, &mut pkt2, ActionMeta::new()); - assert!(matches!(res, Ok(Modified)), "bad result: {:?}", res); + let pkt2 = parse_outbound(&mut pkt2_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt2); + expect_modified!(res, pkt2_m); update!( g1, [ @@ -1423,12 +1452,15 @@ fn external_ip_epoch_affinity_preserved() { "incr:stats.port.out_modified, stats.port.out_uft_miss", ] ); + + let pkt2 = + parse_inbound(&mut pkt2_m, VpcParser {}).unwrap().to_full_meta(); match ext_ip { IpAddr::Ip4(ip) => { - assert_eq!(pkt2.meta().inner_ip4().unwrap().src, ip); + assert_eq!(pkt2.meta().inner_ip4().unwrap().source(), ip); } IpAddr::Ip6(ip) => { - assert_eq!(pkt2.meta().inner_ip6().unwrap().src, ip); + assert_eq!(pkt2.meta().inner_ip6().unwrap().source(), ip); } }; } @@ -1508,14 +1540,18 @@ struct IcmpSnatParams { } fn unpack_and_verify_icmp( - pkt: &Packet, + pkt: &mut MsgBlk, cfg: &VpcCfg, params: &IcmpSnatParams, dir: Direction, seq_no: u16, body_seg: usize, ) { - let meta = pkt.meta(); + let parsed = match dir { + In => parse_inbound(pkt, VpcParser {}).unwrap().to_full_meta(), + Out => parse_outbound(pkt, VpcParser {}).unwrap().to_full_meta(), + }; + let meta = parsed.meta(); let (src_eth, dst_eth, src_ip, dst_ip, encapped, ident) = match dir { Direction::Out => ( @@ -1536,54 +1572,57 @@ fn unpack_and_verify_icmp( ), }; - let eth = meta.inner.ether; - assert_eq!(eth.src, src_eth); - assert_eq!(eth.dst, dst_eth); + let eth = meta.inner_ether(); + assert_eq!(eth.source(), src_eth); + assert_eq!(eth.destination(), dst_eth); - match (dst_ip, meta.inner.ip.as_ref().unwrap()) { - (IpAddr::Ip4(_), IpMeta::Ip4(meta)) => { - assert_eq!(eth.ether_type, EtherType::Ipv4); - assert_eq!(IpAddr::from(meta.src), src_ip); - assert_eq!(IpAddr::from(meta.dst), dst_ip); - assert_eq!(meta.proto, Protocol::ICMP); + match (dst_ip, meta.inner_l3().as_ref().unwrap()) { + (IpAddr::Ip4(_), L3::Ipv4(meta)) => { + assert_eq!(eth.ethertype(), Ethertype::IPV4); + assert_eq!(IpAddr::from(meta.source()), src_ip); + assert_eq!(IpAddr::from(meta.destination()), dst_ip); + assert_eq!(meta.protocol(), IngotIpProto::ICMP); - unpack_and_verify_icmp4(pkt, ident, seq_no, encapped, body_seg); + unpack_and_verify_icmp4(&parsed, ident, seq_no, encapped, body_seg); } - (IpAddr::Ip6(_), IpMeta::Ip6(meta)) => { - assert_eq!(eth.ether_type, EtherType::Ipv6); - assert_eq!(IpAddr::from(meta.src), src_ip); - assert_eq!(IpAddr::from(meta.dst), dst_ip); - assert_eq!(meta.proto, Protocol::ICMPv6); + (IpAddr::Ip6(_), L3::Ipv6(meta)) => { + assert_eq!(eth.ethertype(), Ethertype::IPV6); + assert_eq!(IpAddr::from(meta.source()), src_ip); + assert_eq!(IpAddr::from(meta.destination()), dst_ip); + assert_eq!(meta.next_header(), IngotIpProto::ICMP_V6); unpack_and_verify_icmp6( - pkt, ident, seq_no, encapped, body_seg, meta.src, meta.dst, + &parsed, + ident, + seq_no, + encapped, + body_seg, + meta.source(), + meta.destination(), ); } (IpAddr::Ip4(_), ip6) => { - panic!("expected inner IPv4 metadata, got IPv6: {:?}", ip6) + panic!("expected inner IPv4 metadata, got IPv6") } (IpAddr::Ip6(_), ip4) => { - panic!("expected inner IPv6 metadata, got IPv4: {:?}", ip4) + panic!("expected inner IPv6 metadata, got IPv4") } } } fn unpack_and_verify_icmp4( - pkt: &Packet, + pkt: &Packet2, expected_ident: u16, seq_no: u16, encapped: bool, body_seg: usize, ) { - let icmp_offset = pkt.body_offset() - IcmpHdr::SIZE; - let tgt_offset = IP4_SZ + if encapped { VPC_ENCAP_SZ } else { 0 }; - assert_eq!(icmp_offset, tgt_offset); - assert_eq!(pkt.body_seg(), body_seg); - // Because we treat ICMPv4 as a full-fledged ULP, we need to // unsplit the emitted header from the body. - let pkt_bytes = pkt.all_bytes(); - let icmp = Icmpv4Packet::new_checked(&pkt_bytes[icmp_offset..]).unwrap(); + let mut icmp = pkt.meta().inner_ulp().unwrap().emit_vec(); + icmp.extend(pkt.meta().copy_remaining().into_iter()); + + let icmp = Icmpv4Packet::new_checked(&icmp[..]).unwrap(); assert!(icmp.verify_checksum()); assert_eq!(icmp.echo_ident(), expected_ident); @@ -1591,7 +1630,7 @@ fn unpack_and_verify_icmp4( } fn unpack_and_verify_icmp6( - pkt: &Packet, + pkt: &Packet2, expected_ident: u16, seq_no: u16, encapped: bool, @@ -1599,23 +1638,14 @@ fn unpack_and_verify_icmp6( src_ip: Ipv6Addr, dst_ip: Ipv6Addr, ) { - // Length is factored into pseudo header calc. - // We know there are no ext headers. - let pay_len = pkt.meta().inner_ip6().unwrap().pay_len as usize; - let src_ip = smoltcp::wire::Ipv6Address::from(src_ip).into(); let dst_ip = smoltcp::wire::Ipv6Address::from(dst_ip).into(); - let icmp_offset = pkt.body_offset() - IcmpHdr::SIZE; - let tgt_offset = IP6_SZ + if encapped { VPC_ENCAP_SZ } else { 0 }; - assert_eq!(icmp_offset, tgt_offset); - assert_eq!(pkt.body_seg(), body_seg); - - // Because we treat ICMPv6 as a full-fledged ULP, we need to + // Because we treat ICMPv4 as a full-fledged ULP, we need to // unsplit the emitted header from the body. - let pkt_bytes = pkt.all_bytes(); - let icmp = Icmpv6Packet::new_checked(&pkt_bytes[icmp_offset..][..pay_len]) - .unwrap(); + let mut icmp = pkt.meta().inner_ulp().unwrap().emit_vec(); + icmp.extend(pkt.meta().copy_remaining().into_iter()); + let icmp = Icmpv6Packet::new_checked(&icmp[..]).unwrap(); assert!(icmp.verify_checksum(&src_ip, &dst_ip)); assert_eq!(icmp.echo_ident(), expected_ident); @@ -1688,7 +1718,7 @@ fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { // ================================================================ // Verify echo request rewrite. // ================================================================ - let mut pkt1 = gen_icmp_echo_req( + let mut pkt1_m = gen_icmp_echo_req( g1_cfg.guest_mac, g1_cfg.gateway_mac, private_ip, @@ -1698,9 +1728,10 @@ fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { &data[..], 2, ); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); - assert!(matches!(res, Ok(Modified)), "bad result: {:?}", res); + let res = g1.port.process(Out, pkt1); + expect_modified!(res, pkt1_m); incr!( g1, [ @@ -1711,12 +1742,12 @@ fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { ] ); - unpack_and_verify_icmp(&pkt1, &g1_cfg, ¶ms, Out, seq_no, 0); + unpack_and_verify_icmp(&mut pkt1_m, &g1_cfg, ¶ms, Out, seq_no, 0); // ================================================================ // Verify echo reply rewrite. // ================================================================ - let mut pkt2 = gen_icmp_echo_reply( + let mut pkt2_m = gen_icmp_echo_reply( BS_MAC_ADDR, g1_cfg.guest_mac, dst_ip, @@ -1726,6 +1757,7 @@ fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { &data[..], 3, ); + let g1_phys = TestIpPhys { ip: g1_cfg.phys_ip, mac: g1_cfg.guest_mac, @@ -1736,13 +1768,14 @@ fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { mac: BS_MAC_ADDR, vni: Vni::new(BOUNDARY_SERVICES_VNI).unwrap(), }; - pkt2 = encap_external(pkt2, bsvc_phys, g1_phys); + pkt2_m = encap_external(pkt2_m, bsvc_phys, g1_phys); + let pkt2 = parse_inbound(&mut pkt2_m, VpcParser {}).unwrap(); - let res = g1.port.process(In, &mut pkt2, ActionMeta::new()); - assert!(matches!(res, Ok(Modified)), "bad result: {:?}", res); + let res = g1.port.process(In, pkt2); + expect_modified!(res, pkt2_m); incr!(g1, ["uft.in", "stats.port.in_modified, stats.port.in_uft_miss"]); - unpack_and_verify_icmp(&pkt2, &g1_cfg, ¶ms, In, seq_no, 0); + unpack_and_verify_icmp(&mut pkt2_m, &g1_cfg, ¶ms, In, seq_no, 0); // ================================================================ // Send ICMP Echo Req a second time. We want to verify that a) the @@ -1750,7 +1783,7 @@ fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { // transformation. // ================================================================ seq_no += 1; - let mut pkt3 = gen_icmp_echo_req( + let mut pkt3_m = gen_icmp_echo_req( g1_cfg.guest_mac, g1_cfg.gateway_mac, private_ip, @@ -1760,21 +1793,22 @@ fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { &data[..], 1, ); + let pkt3 = parse_outbound(&mut pkt3_m, VpcParser {}).unwrap(); assert_eq!(g1.port.stats_snap().out_uft_hit, 0); - let res = g1.port.process(Out, &mut pkt3, ActionMeta::new()); - assert!(matches!(res, Ok(Modified)), "bad result: {:?}", res); + let res = g1.port.process(Out, pkt3); + expect_modified!(res, pkt3_m); incr!(g1, ["stats.port.out_modified, stats.port.out_uft_hit"]); assert_eq!(g1.port.stats_snap().out_uft_hit, 1); - unpack_and_verify_icmp(&pkt3, &g1_cfg, ¶ms, Out, seq_no, 1); + unpack_and_verify_icmp(&mut pkt3_m, &g1_cfg, ¶ms, Out, seq_no, 1); // ================================================================ // Process ICMP Echo Reply a second time. Once again, this time we // want to verify that the body transformation comes from the UFT // entry. // ================================================================ - let mut pkt4 = gen_icmp_echo_reply( + let mut pkt4_m = gen_icmp_echo_reply( BS_MAC_ADDR, g1_cfg.guest_mac, dst_ip, @@ -1784,14 +1818,15 @@ fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { &data[..], 2, ); + let pkt4 = parse_inbound(&mut pkt4_m, VpcParser {}).unwrap(); assert_eq!(g1.port.stats_snap().in_uft_hit, 0); - let res = g1.port.process(In, &mut pkt4, ActionMeta::new()); - assert!(matches!(res, Ok(Modified)), "bad result: {:?}", res); + let res = g1.port.process(In, pkt4); + expect_modified!(res, pkt4_m); incr!(g1, ["stats.port.in_modified, stats.port.in_uft_hit"]); assert_eq!(g1.port.stats_snap().in_uft_hit, 1); - unpack_and_verify_icmp(&pkt4, &g1_cfg, ¶ms, In, seq_no, 0); + unpack_and_verify_icmp(&mut pkt4_m, &g1_cfg, ¶ms, In, seq_no, 0); // ================================================================ // Insert a new packet along the same S/D pair: this should occupy @@ -1800,7 +1835,7 @@ fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { let new_params = IcmpSnatParams { icmp_id: 8, snat_port: mapped_port - 1, ..params }; - let mut pkt5 = gen_icmp_echo_req( + let mut pkt5_m = gen_icmp_echo_req( g1_cfg.guest_mac, g1_cfg.gateway_mac, private_ip, @@ -1810,9 +1845,10 @@ fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { &data[..], 2, ); + let pkt5 = parse_outbound(&mut pkt5_m, VpcParser {}).unwrap(); - let res = g1.port.process(Out, &mut pkt5, ActionMeta::new()); - assert!(matches!(res, Ok(Modified)), "bad result: {:?}", res); + let res = g1.port.process(Out, pkt5); + expect_modified!(res, pkt5_m); incr!( g1, [ @@ -1823,7 +1859,7 @@ fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { ] ); - unpack_and_verify_icmp(&pkt5, &g1_cfg, &new_params, Out, seq_no, 0); + unpack_and_verify_icmp(&mut pkt5_m, &g1_cfg, &new_params, Out, seq_no, 0); } #[test] From 572c1694f2dfb12a93bcbf22c394686a666e5dd6 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Fri, 11 Oct 2024 18:36:25 +0100 Subject: [PATCH 043/115] More progress... --- lib/opte-test-utils/src/icmp.rs | 4 +- lib/opte/src/engine/ingot_base.rs | 10 + lib/opte/src/engine/ingot_packet.rs | 7 + lib/oxide-vpc/tests/fuzz_regression.rs | 16 +- lib/oxide-vpc/tests/integration_tests.rs | 843 ++++++++++++----------- 5 files changed, 475 insertions(+), 405 deletions(-) diff --git a/lib/opte-test-utils/src/icmp.rs b/lib/opte-test-utils/src/icmp.rs index 5eadfc2b..280bb167 100644 --- a/lib/opte-test-utils/src/icmp.rs +++ b/lib/opte-test-utils/src/icmp.rs @@ -157,8 +157,8 @@ pub fn gen_icmp_echo( match n_segments { 1 => { let mut pkt = MsgBlk::new_ethernet(total_len); - pkt.emit_back(&(eth, ip)); - pkt.resize(total_len); + pkt.emit_back(&(eth, ip)).unwrap(); + pkt.resize(total_len).unwrap(); pkt.write_bytes_back(&icmp_bytes).unwrap(); return pkt; diff --git a/lib/opte/src/engine/ingot_base.rs b/lib/opte/src/engine/ingot_base.rs index 521ec14e..e6c7abd3 100644 --- a/lib/opte/src/engine/ingot_base.rs +++ b/lib/opte/src/engine/ingot_base.rs @@ -67,6 +67,16 @@ impl ValidUlp { } } +impl Ulp { + pub fn src_port(&self) -> Option { + match self { + Ulp::Tcp(t) => Some(t.source()), + Ulp::Udp(u) => Some(u.source()), + _ => None, + } + } +} + impl ValidL3 { pub fn csum(&self) -> [u8; 2] { match self { diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index a889eb75..1079f86a 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -556,6 +556,13 @@ impl MsgBlk { Self { inner } } + pub fn copy(buf: impl AsRef<[u8]>) -> Self { + let mut out = Self::new(buf.as_ref().len()); + // Unwarp safety -- just allocated length of input buffer. + out.write_bytes_back(buf).unwrap(); + out + } + pub fn new_pkt(emit: impl Emit + EmitDoesNotRelyOnBufContents) -> Self { let mut pkt = Self::new(emit.packet_length()); pkt.emit_back(emit).unwrap(); diff --git a/lib/oxide-vpc/tests/fuzz_regression.rs b/lib/oxide-vpc/tests/fuzz_regression.rs index 0e159429..9e091f22 100644 --- a/lib/oxide-vpc/tests/fuzz_regression.rs +++ b/lib/oxide-vpc/tests/fuzz_regression.rs @@ -9,6 +9,8 @@ //! These tests capture past known-bad packets which have made some part //! of OPTE panic in the past, and ensure that it does not today. +use opte::engine::ingot_packet::MsgBlk; +use opte::engine::ingot_packet::Packet2; use opte::engine::packet::Packet; use opte::engine::Direction; use oxide_vpc::engine::VpcParser; @@ -110,19 +112,17 @@ fn run_tests( #[test] fn parse_in_regression() { run_tests("parse_in", |data| { - let mut pkt = Packet::alloc_and_expand(data.len()); - let mut wtr = pkt.seg0_wtr(); - wtr.write(data).unwrap(); - let _ = pkt.parse(Direction::In, VpcParser {}); + let mut msg = MsgBlk::copy(data); + let parsed = Packet2::new(msg.iter_mut()); + let _ = parsed.parse_inbound(VpcParser {}); }); } #[test] fn parse_out_regression() { run_tests("parse_out", |data| { - let mut pkt = Packet::alloc_and_expand(data.len()); - let mut wtr = pkt.seg0_wtr(); - wtr.write(data).unwrap(); - let _ = pkt.parse(Direction::Out, VpcParser {}); + let mut msg = MsgBlk::copy(data); + let parsed = Packet2::new(msg.iter_mut()); + let _ = parsed.parse_outbound(VpcParser {}); }); } diff --git a/lib/oxide-vpc/tests/integration_tests.rs b/lib/oxide-vpc/tests/integration_tests.rs index 889e5dd7..1f7a03fd 100644 --- a/lib/oxide-vpc/tests/integration_tests.rs +++ b/lib/oxide-vpc/tests/integration_tests.rs @@ -30,8 +30,10 @@ use opte::engine::headers::EncapMeta; use opte::engine::headers::IpMeta; use opte::engine::headers::UlpMeta; use opte::engine::icmp::IcmpHdr; +use opte::engine::ingot_base::Ethernet; use opte::engine::ingot_base::EthernetRef; use opte::engine::ingot_base::Ipv4Ref; +use opte::engine::ingot_base::Ipv6; use opte::engine::ingot_base::Ipv6Ref; use opte::engine::ingot_base::ValidL3; use opte::engine::ingot_base::ValidUlp; @@ -62,9 +64,11 @@ use opte::engine::udp::UdpMeta; use opte::engine::Direction; use opte::engine::NetworkParser; use opte::ingot::geneve::GeneveRef; +use opte::ingot::icmp::IcmpV6Ref; use opte::ingot::tcp::TcpRef; use opte::ingot::types::Emit; use opte::ingot::types::HeaderLen; +use opte::ingot::udp::Udp; use opte::ingot::udp::UdpRef; use opte_test_utils as common; use oxide_vpc::api::ExternalIpCfg; @@ -76,6 +80,7 @@ use pcap::*; use smoltcp::phy::ChecksumCapabilities as CsumCapab; use smoltcp::wire::Icmpv4Packet; use smoltcp::wire::Icmpv4Repr; +use smoltcp::wire::Icmpv6Message; use smoltcp::wire::Icmpv6Packet; use smoltcp::wire::Icmpv6Repr; use smoltcp::wire::IpAddress; @@ -86,6 +91,7 @@ use smoltcp::wire::NdiscRouterFlags; use smoltcp::wire::RawHardwareAddress; use std::prelude::v1::*; use std::time::Duration; +use zerocopy::FromBytes; use zerocopy::IntoBytes; const IP4_SZ: usize = EtherHdr::SIZE + Ipv4Hdr::BASE_SIZE; @@ -1864,40 +1870,43 @@ fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { #[test] fn bad_ip_len() { - let cfg = lab_cfg(); - - let eth = EtherMeta { - src: cfg.guest_mac, - dst: MacAddr::BROADCAST, - ether_type: EtherType::Ipv4, - }; - - let ip = Ipv4Meta { - src: "0.0.0.0".parse().unwrap(), - dst: Ipv4Addr::LOCAL_BCAST, - proto: Protocol::UDP, - ttl: 64, - ident: 1, - hdr_len: 20, - // We write a total legnth of 4 bytes, which is completely - // bogus for an IP header and should return an error during - // processing. - total_len: 4, - ..Default::default() - }; - - let udp = UdpMeta { src: 68, dst: 67, ..Default::default() }; - let total_len = EtherHdr::SIZE + usize::from(ip.hdr_len) + udp.hdr_len(); - let mut pkt = Packet::alloc_and_expand(total_len); - let mut wtr = pkt.seg0_wtr(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip.emit(wtr.slice_mut(ip.hdr_len()).unwrap()); - udp.emit(wtr.slice_mut(udp.hdr_len()).unwrap()); - let res = pkt.parse(Out, VpcParser::new()); - assert_eq!( - res.err().unwrap(), - Ipv4HdrError::BadTotalLen { total_len: 4 }.into() - ); + // TODO(kyle) + panic!() + + // let cfg = lab_cfg(); + + // let eth = EtherMeta { + // src: cfg.guest_mac, + // dst: MacAddr::BROADCAST, + // ether_type: EtherType::Ipv4, + // }; + + // let ip = Ipv4Meta { + // src: "0.0.0.0".parse().unwrap(), + // dst: Ipv4Addr::LOCAL_BCAST, + // proto: Protocol::UDP, + // ttl: 64, + // ident: 1, + // hdr_len: 20, + // // We write a total legnth of 4 bytes, which is completely + // // bogus for an IP header and should return an error during + // // processing. + // total_len: 4, + // ..Default::default() + // }; + + // let udp = UdpMeta { src: 68, dst: 67, ..Default::default() }; + // let total_len = EtherHdr::SIZE + usize::from(ip.hdr_len) + udp.hdr_len(); + // let mut pkt = Packet::alloc_and_expand(total_len); + // let mut wtr = pkt.seg0_wtr(); + // eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); + // ip.emit(wtr.slice_mut(ip.hdr_len()).unwrap()); + // udp.emit(wtr.slice_mut(udp.hdr_len()).unwrap()); + // let res = pkt.parse(Out, VpcParser::new()); + // assert_eq!( + // res.err().unwrap(), + // Ipv4HdrError::BadTotalLen { total_len: 4 }.into() + // ); } // Verify that OPTE generates a hairpin ARP reply when the guest @@ -1911,12 +1920,13 @@ fn arp_gateway() { g1.port.start(); set!(g1, "port_state=running"); - let eth_hdr = EtherHdrRaw { - dst: [0xff; 6], - src: cfg.guest_mac.bytes(), - ether_type: [0x08, 0x06], + let eth_hdr = Ethernet { + destination: MacAddr::BROADCAST, + source: cfg.guest_mac, + ethertype: Ethertype::ARP, }; + // TODO: ingot? let arp = ArpEthIpv4 { htype: 1, ptype: u16::from(EtherType::Ipv4), @@ -1929,34 +1939,40 @@ fn arp_gateway() { tpa: cfg.ipv4_cfg().unwrap().gateway_ip, }; - let mut bytes = vec![]; - bytes.extend_from_slice(eth_hdr.as_bytes()); + let mut bytes = eth_hdr.emit_vec(); bytes.extend_from_slice(ArpEthIpv4Raw::from(&arp).as_bytes()); - let mut pkt = Packet::copy(&bytes).parse(Out, VpcParser::new()).unwrap(); - let res = g1.port.process(Out, &mut pkt, ActionMeta::new()); + let mut pkt_m = MsgBlk::copy(bytes); + let pkt = parse_outbound(&mut pkt_m, VpcParser {}).unwrap(); + + let res = g1.port.process(Out, pkt); match res { - Ok(Hairpin(hppkt)) => { + Ok(Hairpin(mut hppkt)) => { // In this case we are parsing a hairpin reply, so we // can't use the VpcParser since it would expect any // inbound packet to be encapsulated. - let mut hppkt = hppkt.parse(In, GenericUlp {}).unwrap(); + let hppkt = parse_inbound(&mut hppkt, GenericUlp {}).unwrap(); let meta = hppkt.meta(); - let ethm = meta.inner.ether; - assert_eq!(ethm.dst, cfg.guest_mac); - assert_eq!(ethm.src, cfg.gateway_mac); - assert_eq!(ethm.ether_type, EtherType::Arp); - let eth_len = hppkt.hdr_offsets().inner.ether.hdr_len; - - let mut rdr = hppkt.get_rdr_mut(); - assert!(rdr.seek(eth_len).is_ok()); - let arp = ArpEthIpv4::parse(&mut rdr).unwrap(); - assert_eq!(arp.op, ArpOp::Reply); - assert_eq!(arp.ptype, u16::from(EtherType::Ipv4)); - assert_eq!(arp.sha, cfg.gateway_mac); - assert_eq!(arp.spa, cfg.ipv4_cfg().unwrap().gateway_ip); - assert_eq!(arp.tha, cfg.guest_mac); - assert_eq!(arp.tpa, cfg.ipv4_cfg().unwrap().private_ip); + let ethm = &meta.inner_eth; + assert_eq!(ethm.destination(), cfg.guest_mac); + assert_eq!(ethm.source(), cfg.gateway_mac); + assert_eq!(ethm.ethertype(), Ethertype::ARP); + + let body = hppkt.to_full_meta().meta().copy_remaining(); + + let (arp, _) = ArpEthIpv4Raw::ref_from_prefix(&body[..]).unwrap(); + assert_eq!(arp.op, ArpOp::Reply.to_be_bytes()); + assert_eq!(arp.ptype, Ethertype::IPV4.0.to_be_bytes()); + assert_eq!(MacAddr::from(arp.sha), cfg.gateway_mac); + assert_eq!( + Ipv4Addr::from(arp.spa), + cfg.ipv4_cfg().unwrap().gateway_ip + ); + assert_eq!(MacAddr::from(arp.tha), cfg.guest_mac); + assert_eq!( + Ipv4Addr::from(arp.tpa), + cfg.ipv4_cfg().unwrap().private_ip + ); } res => panic!("expected a Hairpin, got {:?}", res), @@ -1978,9 +1994,10 @@ fn flow_expiration() { // Run the packet through g1's port in the outbound direction and // verify the resulting packet meets expectations. // ================================================================ - let mut pkt1 = http_syn(&g1_cfg, &g2_cfg); - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let mut pkt1_m = http_syn(&g1_cfg, &g2_cfg); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt1); + expect_modified!(res, pkt1_m); incr!( g1, [ @@ -2041,7 +2058,7 @@ fn test_guest_to_gateway_icmpv6_ping( // ================================================================ // Generate an ICMP Echo Request from G1 to Virtual GW // ================================================================ - let mut pkt1 = gen_icmp_echo_req( + let mut pkt1_m = gen_icmp_echo_req( g1_cfg.guest_mac, g1_cfg.gateway_mac, src_ip.into(), @@ -2051,15 +2068,16 @@ fn test_guest_to_gateway_icmpv6_ping( &data[..], 3, ); - pcap.add_pkt(&pkt1); + pcap.add_pkt(&pkt1_m); // ================================================================ // Run the Echo Request through g1's port in the outbound // direction and verify it results in an Echo Reply Hairpin packet // back to guest. // ================================================================ - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); - let hp = match res { + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt1); + let mut hp = match res { Ok(Hairpin(hp)) => hp, _ => panic!("expected Hairpin, got {:?}", res), }; @@ -2068,50 +2086,43 @@ fn test_guest_to_gateway_icmpv6_ping( // In this case we are parsing a hairpin reply, so we can't use // the VpcParser since it would expect any inbound packet to be // encapsulated. - let reply = hp.parse(In, GenericUlp {}).unwrap(); - pcap.add_pkt(&reply); - - // Ether + IPv6 + ICMPv6 - assert_eq!(reply.body_offset(), IP6_SZ + IcmpHdr::SIZE); - assert_eq!(reply.body_seg(), 0); + pcap.add_pkt(&hp); + let reply = parse_inbound(&mut hp, GenericUlp {}).unwrap(); let meta = reply.meta(); - assert!(meta.outer.ether.is_none()); - assert!(meta.outer.ip.is_none()); - assert!(meta.outer.encap.is_none()); - - let eth = meta.inner.ether; - assert_eq!(eth.src, g1_cfg.gateway_mac); - assert_eq!(eth.dst, g1_cfg.guest_mac); - - let (src, dst) = match meta.inner.ip.as_ref().unwrap() { - IpMeta::Ip6(ip6) => { - assert_eq!(ip6.src, dst_ip); - assert_eq!(ip6.dst, src_ip); - assert_eq!(ip6.proto, Protocol::ICMPv6); + + let eth = &meta.inner_eth; + assert_eq!(eth.source(), g1_cfg.gateway_mac); + assert_eq!(eth.destination(), g1_cfg.guest_mac); + + let (src, dst) = match meta.inner_l3.as_ref().unwrap() { + ValidL3::Ipv6(ip6) => { + assert_eq!(ip6.source(), dst_ip); + assert_eq!(ip6.destination(), src_ip); + assert_eq!(ip6.next_header(), IngotIpProto::ICMP_V6); ( - Ipv6Address::from_bytes(&ip6.src), - Ipv6Address::from_bytes(&ip6.dst), + Ipv6Address::from_bytes(&ip6.source()), + Ipv6Address::from_bytes(&ip6.destination()), ) } - ip4 => panic!("expected inner IPv6 metadata, got IPv4: {:?}", ip4), + _ => panic!("expected inner IPv6 metadata, got IPv4"), }; - let Some(icmp6) = meta.inner_icmp6() else { + let Some(ValidUlp::IcmpV6(icmp6)) = &meta.inner_ulp else { panic!("expected inner ICMPv6 metadata"); }; // `Icmpv6Packet` requires the ICMPv6 header and not just the message payload. - // Given we successfully got the ICMPv6 metadata, rewinding here is fine. - let mut rdr = reply.get_body_rdr(); - rdr.seek_back(icmp6.hdr_len()).unwrap(); + let mut reply_body = icmp6.emit_vec(); + let msg_type = Icmpv6Message::from(icmp6.ty()); + let msg_code = icmp6.code(); - let reply_body = rdr.copy_remaining(); + reply_body.extend(reply.to_full_meta().meta().copy_remaining().into_iter()); let reply_pkt = Icmpv6Packet::new_checked(&reply_body).unwrap(); // Verify the parsed metadata matches the packet - assert_eq!(icmp6.msg_code, reply_pkt.msg_code()); - assert_eq!(icmp6.msg_type, reply_pkt.msg_type().into()); + assert_eq!(msg_code, reply_pkt.msg_code()); + assert_eq!(msg_type, reply_pkt.msg_type()); let mut csum = CsumCapab::ignored(); csum.icmpv6 = smoltcp::phy::Checksum::Rx; @@ -2148,16 +2159,17 @@ fn gateway_router_advert_reply() { // ==================================================== // Generate a Router Solicitation from G1 to Virtual GW // ==================================================== - let mut pkt1 = gen_router_solicitation(&g1_cfg.guest_mac); - pcap.add_pkt(&pkt1); + let mut pkt1_m = gen_router_solicitation(&g1_cfg.guest_mac); + pcap.add_pkt(&pkt1_m); // ================================================================ // Run the Solicitation through g1's port in the outbound // direction and verify it results in an Router Advertisement // hairpin back to guest. // ================================================================ - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); - let hp = match res { + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt1); + let mut hp = match res { Ok(Hairpin(hp)) => hp, _ => panic!("expected Hairpin, got {:?}", res), }; @@ -2166,35 +2178,31 @@ fn gateway_router_advert_reply() { // In this case we are parsing a hairpin reply, so we can't use // the VpcParser since it would expect any inbound packet to be // encapsulated. - let reply = hp.parse(In, GenericUlp {}).unwrap(); - pcap.add_pkt(&reply); - - // Ether + IPv6 + ICMPv6 - assert_eq!(reply.body_offset(), IP6_SZ + IcmpHdr::SIZE); - assert_eq!(reply.body_seg(), 0); + pcap.add_pkt(&hp); + let reply = parse_inbound(&mut hp, GenericUlp {}).unwrap(); let meta = reply.meta(); - assert!(meta.outer.ether.is_none()); - assert!(meta.outer.ip.is_none()); - assert!(meta.outer.encap.is_none()); - let eth = meta.inner.ether; + let eth = &meta.inner_eth; assert_eq!( - eth.src, g1_cfg.gateway_mac, + eth.source(), + g1_cfg.gateway_mac, "Router advertisement should come from the gateway's MAC" ); assert_eq!( - eth.dst, g1_cfg.guest_mac, + eth.destination(), + g1_cfg.guest_mac, "Router advertisement should be destined for the guest's MAC" ); - let IpMeta::Ip6(ip6) = meta.inner.ip.as_ref().expect("No inner IP header") + let ValidL3::Ipv6(ip6) = + meta.inner_l3.as_ref().expect("No inner IP header") else { panic!("Inner IP header is not IPv6"); }; assert_eq!( - ip6.src, + ip6.source(), Ipv6Addr::from_eui64(&g1_cfg.gateway_mac), "Router advertisement should come from the \ gateway's link-local IPv6 address, generated \ @@ -2202,32 +2210,35 @@ fn gateway_router_advert_reply() { ); let expected_dst = Ipv6Addr::from_eui64(&g1_cfg.guest_mac); assert_eq!( - ip6.dst, expected_dst, + ip6.destination(), + expected_dst, "Router advertisement should be destined for \ the guest's Link-Local IPv6 address, generated from \ the EUI-64 transform of its MAC" ); - assert_eq!(ip6.proto, Protocol::ICMPv6); + assert_eq!(ip6.next_header(), IngotIpProto::ICMP_V6); // RFC 4861 6.1.2 requires that the hop limit be 255 in an RA. - assert_eq!(ip6.hop_limit, 255); + assert_eq!(ip6.hop_limit(), 255); - let Some(icmp6) = meta.inner_icmp6() else { + let Some(ValidUlp::IcmpV6(icmp6)) = &meta.inner_ulp else { panic!("expected inner ICMPv6 metadata"); }; // `Icmpv6Packet` requires the ICMPv6 header and not just the message payload. // Given we successfully got the ICMPv6 metadata, rewinding here is fine. - let mut rdr = reply.get_body_rdr(); - rdr.seek_back(icmp6.hdr_len()).unwrap(); + let mut reply_body = icmp6.emit_vec(); + let ip6_src = ip6.source(); + let ip6_dst = ip6.destination(); - let reply_body = rdr.copy_remaining(); + reply_body.extend(reply.to_full_meta().meta().copy_remaining().into_iter()); let reply_pkt = Icmpv6Packet::new_checked(&reply_body).unwrap(); + let mut csum = CsumCapab::ignored(); csum.icmpv6 = smoltcp::phy::Checksum::Rx; let reply_icmp = Icmpv6Repr::parse( - &IpAddress::Ipv6(ip6.src.into()), - &IpAddress::Ipv6(ip6.dst.into()), + &IpAddress::Ipv6(ip6_src.into()), + &IpAddress::Ipv6(ip6_dst.into()), &reply_pkt, &csum, ) @@ -2421,60 +2432,54 @@ fn generate_solicit_test_data(cfg: &VpcCfg) -> Vec { // `na`. fn validate_hairpin_advert( pcap: &mut PcapBuilder, - hp: Packet, + mut hp: MsgBlk, na: AdvertInfo, ) { // In this case we are parsing a hairpin reply, so we can't use // the VpcParser since it would expect any inbound packet to be // encapsulated. - let reply = hp.parse(In, GenericUlp {}).unwrap(); - pcap.add_pkt(&reply); + pcap.add_pkt(&hp); + let reply = parse_inbound(&mut hp, GenericUlp {}).unwrap(); - // Verify Ethernet and IPv6 header basics. - assert_eq!(reply.body_offset(), IP6_SZ + IcmpHdr::SIZE); - assert_eq!(reply.body_seg(), 0); let meta = reply.meta(); - assert!(meta.outer.ether.is_none()); - assert!(meta.outer.ip.is_none()); - assert!(meta.outer.encap.is_none()); // Check that the inner MACs are what we expect. - let eth = meta.inner.ether; - assert_eq!(eth.src, na.src_mac); - assert_eq!(eth.dst, na.dst_mac); + let eth = &meta.inner_eth; + assert_eq!(eth.source(), na.src_mac); + assert_eq!(eth.destination(), na.dst_mac); // Check that the inner IPs are what we expect. - let ip6 = if let IpMeta::Ip6(ip6) = - meta.inner.ip.as_ref().expect("No inner IP header") - { - ip6 - } else { + let ValidL3::Ipv6(ip6) = + meta.inner_l3.as_ref().expect("No inner IP header") + else { panic!("Inner IP header is not IPv6"); }; - assert_eq!(ip6.src, na.src_ip); - assert_eq!(ip6.dst, na.dst_ip); - assert_eq!(ip6.proto, Protocol::ICMPv6); + assert_eq!(ip6.source(), na.src_ip); + assert_eq!(ip6.destination(), na.dst_ip); + assert_eq!(ip6.next_header(), IngotIpProto::ICMP_V6); // RFC 4861 7.1.2 requires that the hop limit be 255 in an NA. - assert_eq!(ip6.hop_limit, 255); + assert_eq!(ip6.hop_limit(), 255); - let Some(icmp6) = meta.inner_icmp6() else { + let Some(ValidUlp::IcmpV6(icmp6)) = &meta.inner_ulp else { panic!("expected inner ICMPv6 metadata"); }; // `Icmpv6Packet` requires the ICMPv6 header and not just the message payload. // Given we successfully got the ICMPv6 metadata, rewinding here is fine. - let mut rdr = reply.get_body_rdr(); - rdr.seek_back(icmp6.hdr_len()).unwrap(); + let mut reply_body = icmp6.emit_vec(); + let ip6_src = ip6.source(); + let ip6_dst = ip6.destination(); - // Validate the details of the Neighbor Advertisement itself. - let reply_body = rdr.copy_remaining(); + reply_body.extend(reply.to_full_meta().meta().copy_remaining().into_iter()); let reply_pkt = Icmpv6Packet::new_checked(&reply_body).unwrap(); + + // Validate the details of the Neighbor Advertisement itself. let mut csum = CsumCapab::ignored(); csum.icmpv6 = smoltcp::phy::Checksum::Rx; let reply_icmp = Icmpv6Repr::parse( - &IpAddress::Ipv6(ip6.src.into()), - &IpAddress::Ipv6(ip6.dst.into()), + &IpAddress::Ipv6(ip6_src.into()), + &IpAddress::Ipv6(ip6_dst.into()), &reply_pkt, &csum, ) @@ -2596,9 +2601,10 @@ fn outbound_ndp_dropped() { flags: NdiscNeighborFlags::OVERRIDE, }; - let mut pkt = generate_neighbor_advertisement(&outbound_na, true); + let mut pkt_m = generate_neighbor_advertisement(&outbound_na, true); + let pkt = parse_outbound(&mut pkt_m, VpcParser {}).unwrap(); - let res = g1.port.process(Out, &mut pkt, ActionMeta::new()).unwrap(); + let res = g1.port.process(Out, pkt).unwrap(); match res { ProcessResult::Drop { .. } => { incr!( @@ -2655,8 +2661,9 @@ fn inbound_ndp_dropped_at_gateway() { }; let pkt = generate_neighbor_solicitation(&ns, true); - let mut pkt = encap(pkt, g2_phys, g1_phys); - let res = g1.port.process(In, &mut pkt, ActionMeta::new()).unwrap(); + let mut pkt_m = encap(pkt, g2_phys, g1_phys); + let pkt = parse_inbound(&mut pkt_m, VpcParser {}).unwrap(); + let res = g1.port.process(In, pkt).unwrap(); println!("{res:?}"); match res { ProcessResult::Drop { .. } => { @@ -2683,26 +2690,27 @@ fn inbound_ndp_dropped_at_gateway() { fn packet_from_client_dhcpv6_message( cfg: &VpcCfg, msg: &dhcpv6::protocol::Message<'_>, -) -> Packet { - let eth = EtherMeta { - dst: dhcpv6::ALL_RELAYS_AND_SERVERS.multicast_mac().unwrap(), - src: cfg.guest_mac, - ether_type: EtherType::Ipv6, +) -> MsgBlk { + let eth = Ethernet { + destination: dhcpv6::ALL_RELAYS_AND_SERVERS.multicast_mac().unwrap(), + source: cfg.guest_mac, + ethertype: Ethertype::IPV6, }; - let ip = Ipv6Meta { - src: Ipv6Addr::from_eui64(&cfg.guest_mac), - dst: dhcpv6::ALL_RELAYS_AND_SERVERS, - proto: Protocol::UDP, - next_hdr: IpProtocol::Udp, - pay_len: (msg.buffer_len() + UdpHdr::SIZE) as u16, + let payload_len = (msg.buffer_len() + Udp::MINIMUM_LENGTH) as u16; + + let ip = Ipv6 { + source: Ipv6Addr::from_eui64(&cfg.guest_mac), + destination: dhcpv6::ALL_RELAYS_AND_SERVERS, + next_header: IngotIpProto::UDP, + payload_len, ..Default::default() }; - let udp = UdpMeta { - src: dhcpv6::CLIENT_PORT, - dst: dhcpv6::SERVER_PORT, - len: (UdpHdr::SIZE + msg.buffer_len()) as u16, + let udp = Udp { + source: dhcpv6::CLIENT_PORT, + destination: dhcpv6::SERVER_PORT, + length: payload_len, ..Default::default() }; @@ -2710,22 +2718,20 @@ fn packet_from_client_dhcpv6_message( } fn write_dhcpv6_packet( - eth: EtherMeta, - ip: Ipv6Meta, - udp: UdpMeta, + eth: Ethernet, + ip: Ipv6, + udp: Udp, msg: &dhcpv6::protocol::Message<'_>, -) -> Packet { - let reply_len = - msg.buffer_len() + UdpHdr::SIZE + Ipv6Hdr::BASE_SIZE + EtherHdr::SIZE; - let mut pkt = Packet::alloc_and_expand(reply_len); - let mut wtr = pkt.seg0_wtr(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip.emit(wtr.slice_mut(ip.hdr_len()).unwrap()); - udp.emit(wtr.slice_mut(udp.hdr_len()).unwrap()); - let mut msg_buf = vec![0; msg.buffer_len()]; - msg.copy_into(&mut msg_buf).unwrap(); - wtr.write(&msg_buf).unwrap(); - pkt.parse(Out, GenericUlp {}).unwrap() +) -> MsgBlk { + let total_len = msg.buffer_len() + (ð, &ip, &udp).packet_length(); + + let mut pkt = MsgBlk::new_ethernet(total_len); + pkt.emit_back((eth, ip, udp)); + let l = pkt.len(); + pkt.resize(total_len); + msg.copy_into(&mut pkt[l..]); + + pkt } // Assert the essential details of a DHCPv6 exchange. The client request is in @@ -2739,36 +2745,40 @@ fn write_dhcpv6_packet( // - The server must include its own Server ID option. fn verify_dhcpv6_essentials<'a>( cfg: &VpcCfg, - request_pkt: &Packet, + request_pkt: &mut MsgBlk, request: &dhcpv6::protocol::Message<'a>, - reply_pkt: &Packet, + reply_pkt: &mut MsgBlk, reply: &dhcpv6::protocol::Message<'a>, ) { + let request_pkt = + parse_outbound(request_pkt, GenericUlp {}).unwrap().to_full_meta(); + let reply_pkt = + parse_inbound(reply_pkt, GenericUlp {}).unwrap().to_full_meta(); let request_meta = request_pkt.meta(); let reply_meta = reply_pkt.meta(); let request_ether = request_meta.inner_ether(); let reply_ether = reply_meta.inner_ether(); assert_eq!( - request_ether.dst, + request_ether.destination(), dhcpv6::ALL_RELAYS_AND_SERVERS.multicast_mac().unwrap() ); - assert_eq!(request_ether.src, reply_ether.dst); + assert_eq!(request_ether.source(), reply_ether.destination()); let request_ip = request_meta.inner_ip6().unwrap(); let reply_ip = reply_meta.inner_ip6().unwrap(); - assert_eq!(request_ip.src, Ipv6Addr::from_eui64(&cfg.guest_mac)); - assert_eq!(request_ip.dst, dhcpv6::ALL_RELAYS_AND_SERVERS); - assert_eq!(request_ip.proto, Protocol::UDP); - assert_eq!(reply_ip.dst, request_ip.src); - assert_eq!(reply_ip.src, Ipv6Addr::from_eui64(&cfg.gateway_mac)); - assert_eq!(reply_ip.proto, Protocol::UDP); + assert_eq!(request_ip.source(), Ipv6Addr::from_eui64(&cfg.guest_mac)); + assert_eq!(request_ip.destination(), dhcpv6::ALL_RELAYS_AND_SERVERS); + assert_eq!(request_ip.next_header(), IngotIpProto::UDP); + assert_eq!(reply_ip.destination(), request_ip.source()); + assert_eq!(reply_ip.source(), Ipv6Addr::from_eui64(&cfg.gateway_mac)); + assert_eq!(reply_ip.next_header(), IngotIpProto::UDP); let request_udp = request_meta.inner_udp().unwrap(); let reply_udp = reply_meta.inner_udp().unwrap(); - assert_eq!(request_udp.src, dhcpv6::CLIENT_PORT); - assert_eq!(request_udp.dst, dhcpv6::SERVER_PORT); - assert_eq!(reply_udp.dst, dhcpv6::CLIENT_PORT); - assert_eq!(reply_udp.src, dhcpv6::SERVER_PORT); + assert_eq!(request_udp.source(), dhcpv6::CLIENT_PORT); + assert_eq!(request_udp.destination(), dhcpv6::SERVER_PORT); + assert_eq!(reply_udp.destination(), dhcpv6::CLIENT_PORT); + assert_eq!(reply_udp.source(), dhcpv6::SERVER_PORT); // Verify the details of the DHCPv6 exchange itself. assert_eq!(reply.xid, request.xid); @@ -2857,28 +2867,32 @@ fn test_reply_to_dhcpv6_solicit_or_request() { xid: dhcpv6::TransactionId::from(&[0u8, 1, 2]), options, }; - let mut request_pkt = + let mut request_pkt_m = packet_from_client_dhcpv6_message(&g1_cfg, &request); - pcap.add_pkt(&request_pkt); - let res = g1 - .port - .process(Out, &mut request_pkt, ActionMeta::new()) - .unwrap(); - if let Hairpin(hp) = res { + pcap.add_pkt(&request_pkt_m); + let request_pkt = + parse_outbound(&mut request_pkt_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, request_pkt).unwrap(); + + if let Hairpin(mut hp) = res { // In this case we are parsing a hairpin reply, so we // can't use the VpcParser since it would expect any // inbound packet to be encapsulated. - let reply_pkt = hp.parse(In, GenericUlp {}).unwrap(); - pcap.add_pkt(&reply_pkt); + pcap.add_pkt(&hp); + + let reply_pkt = parse_inbound(&mut hp, GenericUlp {}) + .unwrap() + .to_full_meta(); + let out_body = reply_pkt.meta().copy_remaining(); + drop(reply_pkt); - let body = reply_pkt.get_body_rdr().copy_remaining(); let reply = - dhcpv6::protocol::Message::from_bytes(&body).unwrap(); + dhcpv6::protocol::Message::from_bytes(&out_body).unwrap(); verify_dhcpv6_essentials( &g1_cfg, - &request_pkt, + &mut request_pkt_m, &request, - &reply_pkt, + &mut hp, &reply, ); @@ -2988,14 +3002,15 @@ fn establish_http_conn( // Run the SYN packet through g1's port in the outbound direction // and verify it is accepted. // ================================================================ - let mut pkt1 = http_syn2( + let mut pkt1_m = http_syn2( g1_cfg.guest_mac, g1_cfg.ipv4().private_ip, GW_MAC_ADDR, dst_ip, ); - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt1); + expect_modified!(res, pkt1_m); incr!( g1, [ @@ -3005,7 +3020,9 @@ fn establish_http_conn( "stats.port.out_modified, stats.port.out_uft_miss", ] ); - let snat_port = pkt1.meta().inner.ulp.unwrap().src_port().unwrap(); + let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); + let snat_port = + pkt1.to_full_meta().meta().inner_ulp().unwrap().src_port().unwrap(); // ================================================================ // Step 2 @@ -3013,7 +3030,7 @@ fn establish_http_conn( // Run the SYN+ACK packet through g1's port in the inbound // direction and verify it is accepted. // ================================================================ - let mut pkt2 = http_syn_ack2( + let mut pkt2_m = http_syn_ack2( BS_MAC_ADDR, dst_ip, g1_cfg.guest_mac, @@ -3030,9 +3047,10 @@ fn establish_http_conn( mac: BS_MAC_ADDR, vni: Vni::new(BOUNDARY_SERVICES_VNI).unwrap(), }; - pkt2 = encap_external(pkt2, bs_phys, g1_phys); - let res = g1.port.process(In, &mut pkt2, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + pkt2_m = encap_external(pkt2_m, bs_phys, g1_phys); + let pkt2 = parse_inbound(&mut pkt2_m, VpcParser {}).unwrap(); + let res = g1.port.process(In, pkt2); + expect_modified!(res, pkt2_m); incr!(g1, ["uft.in", "stats.port.in_modified, stats.port.in_uft_miss"]); // ================================================================ @@ -3040,14 +3058,15 @@ fn establish_http_conn( // // Send ACK to establish connection. // ================================================================ - let mut pkt3 = http_ack2( + let mut pkt3_m = http_ack2( g1_cfg.guest_mac, g1_cfg.ipv4().private_ip, GW_MAC_ADDR, dst_ip, ); - let res = g1.port.process(Out, &mut pkt3, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let pkt3 = parse_outbound(&mut pkt3_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt3); + expect_modified!(res, pkt3_m); incr!(g1, ["stats.port.out_modified, stats.port.out_uft_hit"]); snat_port } @@ -3116,13 +3135,14 @@ fn uft_lft_invalidation_out() { // ================================================================ // Step 4 // ================================================================ - let mut pkt4 = http_get2( + let mut pkt4_m = http_get2( g1_cfg.guest_mac, g1_cfg.ipv4().private_ip, GW_MAC_ADDR, dst_ip, ); - let res = g1.port.process(Out, &mut pkt4, ActionMeta::new()); + let pkt4 = parse_outbound(&mut pkt4_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt4); assert_drop!( res, DropReason::Layer { name: "firewall", reason: DenyReason::Rule } @@ -3184,17 +3204,18 @@ fn uft_lft_invalidation_in() { }; let snat_port = establish_http_conn(&g1_cfg, &mut g1, dst_ip); - let mut pkt1 = http_get2( + let mut pkt1_m = http_get2( g1_cfg.guest_mac, g1_cfg.ipv4().private_ip, GW_MAC_ADDR, dst_ip, ); - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt1); + expect_modified!(res, pkt1_m); incr!(g1, ["stats.port.out_modified, stats.port.out_uft_hit"]); - let mut pkt2 = http_get_ack2( + let mut pkt2_m = http_get_ack2( BS_MAC_ADDR, dst_ip, g1_cfg.guest_mac, @@ -3206,10 +3227,11 @@ fn uft_lft_invalidation_in() { mac: BS_MAC_ADDR, vni: Vni::new(BOUNDARY_SERVICES_VNI).unwrap(), }; - pkt2 = encap_external(pkt2, bs_phys, g1_phys); - let res = g1.port.process(In, &mut pkt2, ActionMeta::new()); + pkt2_m = encap_external(pkt2_m, bs_phys, g1_phys); + let pkt2 = parse_inbound(&mut pkt2_m, VpcParser {}).unwrap(); + let res = g1.port.process(In, pkt2); incr!(g1, ["stats.port.in_modified, stats.port.in_uft_hit"]); - assert!(matches!(res, Ok(Modified))); + expect_modified!(res, pkt2_m); // ================================================================ // Step 3 @@ -3235,7 +3257,7 @@ fn uft_lft_invalidation_in() { // ================================================================ // Step 4 // ================================================================ - let mut pkt3 = http_301_reply2( + let mut pkt3_m = http_301_reply2( BS_MAC_ADDR, dst_ip, g1_cfg.guest_mac, @@ -3247,8 +3269,9 @@ fn uft_lft_invalidation_in() { mac: BS_MAC_ADDR, vni: Vni::new(BOUNDARY_SERVICES_VNI).unwrap(), }; - pkt3 = encap_external(pkt3, bs_phys, g1_phys); - let res = g1.port.process(In, &mut pkt3, ActionMeta::new()); + pkt3_m = encap_external(pkt3_m, bs_phys, g1_phys); + let pkt3 = parse_inbound(&mut pkt3_m, VpcParser {}).unwrap(); + let res = g1.port.process(In, pkt3); assert_drop!( res, DropReason::Layer { name: "firewall", reason: DenyReason::Default } @@ -3270,20 +3293,26 @@ fn test_outbound_http(g1_cfg: &VpcCfg, g1: &mut PortAndVps) -> InnerFlowId { mac: g1_cfg.guest_mac, vni: g1_cfg.vni, }; + let bs_phys = TestIpPhys { + ip: BS_IP_ADDR, + mac: BS_MAC_ADDR, + vni: Vni::new(BOUNDARY_SERVICES_VNI).unwrap(), + }; // ================================================================ // SYN: Client -> Server // ================================================================ let dst_ip = "52.10.128.69".parse().unwrap(); - let mut pkt1 = http_syn2( + let mut pkt1_m = http_syn2( g1_cfg.guest_mac, g1_cfg.ipv4().private_ip, GW_MAC_ADDR, dst_ip, ); - let flow = *pkt1.flow(); - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let flow = pkt1.flow(); + let res = g1.port.process(Out, pkt1); + expect_modified!(res, pkt1_m); incr!( g1, [ @@ -3293,181 +3322,168 @@ fn test_outbound_http(g1_cfg: &VpcCfg, g1: &mut PortAndVps) -> InnerFlowId { "stats.port.out_modified, stats.port.out_uft_miss", ] ); - let snat_port = pkt1.meta().inner.ulp.unwrap().src_port().unwrap(); + let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); + let snat_port = + pkt1.to_full_meta().meta().inner_ulp().unwrap().src_port().unwrap(); assert_eq!(TcpState::SynSent, g1.port.tcp_state(&flow).unwrap()); // ================================================================ // SYN+ACK: Server -> Client // ================================================================ - let mut pkt2 = http_syn_ack2( + let mut pkt2_m = http_syn_ack2( BS_MAC_ADDR, dst_ip, g1_cfg.guest_mac, g1_cfg.snat().external_ip, snat_port, ); - let bs_phys = TestIpPhys { - ip: BS_IP_ADDR, - mac: BS_MAC_ADDR, - vni: Vni::new(BOUNDARY_SERVICES_VNI).unwrap(), - }; - pkt2 = encap_external(pkt2, bs_phys, g1_phys); - let res = g1.port.process(In, &mut pkt2, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + pkt2_m = encap_external(pkt2_m, bs_phys, g1_phys); + let pkt2 = parse_inbound(&mut pkt2_m, VpcParser {}).unwrap(); + let res = g1.port.process(In, pkt2); + expect_modified!(res, pkt2_m); incr!(g1, ["uft.in", "stats.port.in_modified, stats.port.in_uft_miss"]); assert_eq!(TcpState::Established, g1.port.tcp_state(&flow).unwrap()); // ================================================================ // ACK: Client -> Server // ================================================================ - let mut pkt3 = http_ack2( + let mut pkt3_m = http_ack2( g1_cfg.guest_mac, g1_cfg.ipv4().private_ip, GW_MAC_ADDR, dst_ip, ); - let res = g1.port.process(Out, &mut pkt3, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let pkt3 = parse_outbound(&mut pkt3_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt3); + expect_modified!(res, pkt3_m); incr!(g1, ["stats.port.out_modified, stats.port.out_uft_hit"]); assert_eq!(TcpState::Established, g1.port.tcp_state(&flow).unwrap()); // ================================================================ // HTTP GET: Client -> Server // ================================================================ - let mut pkt4 = http_get2( + let mut pkt4_m = http_get2( g1_cfg.guest_mac, g1_cfg.ipv4().private_ip, GW_MAC_ADDR, dst_ip, ); - let res = g1.port.process(Out, &mut pkt4, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let pkt4 = parse_outbound(&mut pkt4_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt4); + expect_modified!(res, pkt4_m); incr!(g1, ["stats.port.out_modified, stats.port.out_uft_hit"]); assert_eq!(TcpState::Established, g1.port.tcp_state(&flow).unwrap()); // ================================================================ // ACK HTTP GET: Server -> Client // ================================================================ - let mut pkt5 = http_get_ack2( + let mut pkt5_m = http_get_ack2( BS_MAC_ADDR, dst_ip, g1_cfg.guest_mac, g1_cfg.snat().external_ip, snat_port, ); - let bs_phys = TestIpPhys { - ip: BS_IP_ADDR, - mac: BS_MAC_ADDR, - vni: Vni::new(BOUNDARY_SERVICES_VNI).unwrap(), - }; - pkt5 = encap_external(pkt5, bs_phys, g1_phys); - let res = g1.port.process(In, &mut pkt5, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + pkt5_m = encap_external(pkt5_m, bs_phys, g1_phys); + let pkt5 = parse_inbound(&mut pkt5_m, VpcParser {}).unwrap(); + let res = g1.port.process(In, pkt5); + expect_modified!(res, pkt5_m); incr!(g1, ["stats.port.in_modified, stats.port.in_uft_hit"]); assert_eq!(TcpState::Established, g1.port.tcp_state(&flow).unwrap()); // ================================================================ // HTTP 301 Reply: Server -> Client // ================================================================ - let mut pkt6 = http_301_reply2( + let mut pkt6_m = http_301_reply2( BS_MAC_ADDR, dst_ip, g1_cfg.guest_mac, g1_cfg.snat().external_ip, snat_port, ); - let bs_phys = TestIpPhys { - ip: BS_IP_ADDR, - mac: BS_MAC_ADDR, - vni: Vni::new(BOUNDARY_SERVICES_VNI).unwrap(), - }; - pkt6 = encap_external(pkt6, bs_phys, g1_phys); - let res = g1.port.process(In, &mut pkt6, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + pkt6_m = encap_external(pkt6_m, bs_phys, g1_phys); + let pkt6 = parse_inbound(&mut pkt6_m, VpcParser {}).unwrap(); + let res = g1.port.process(In, pkt6); + expect_modified!(res, pkt6_m); incr!(g1, ["stats.port.in_modified, stats.port.in_uft_hit"]); assert_eq!(TcpState::Established, g1.port.tcp_state(&flow).unwrap()); // ================================================================ // ACK HTTP 301: Client -> Server // ================================================================ - let mut pkt7 = http_301_ack2( + let mut pkt7_m = http_301_ack2( g1_cfg.guest_mac, g1_cfg.ipv4().private_ip, GW_MAC_ADDR, dst_ip, ); - let res = g1.port.process(Out, &mut pkt7, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let pkt7 = parse_outbound(&mut pkt7_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt7); + expect_modified!(res, pkt7_m); incr!(g1, ["stats.port.out_modified, stats.port.out_uft_hit"]); assert_eq!(TcpState::Established, g1.port.tcp_state(&flow).unwrap()); // ================================================================ // FIN: Client -> Server // ================================================================ - let mut pkt8 = http_guest_fin2( + let mut pkt8_m = http_guest_fin2( g1_cfg.guest_mac, g1_cfg.ipv4().private_ip, GW_MAC_ADDR, dst_ip, ); - let res = g1.port.process(Out, &mut pkt8, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let pkt8 = parse_outbound(&mut pkt8_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt8); + expect_modified!(res, pkt8_m); incr!(g1, ["stats.port.out_modified, stats.port.out_uft_hit"]); assert_eq!(TcpState::FinWait1, g1.port.tcp_state(&flow).unwrap()); // ================================================================ // ACK FIN: Server -> Client // ================================================================ - let mut pkt9 = http_server_ack_fin2( + let mut pkt9_m = http_server_ack_fin2( BS_MAC_ADDR, dst_ip, g1_cfg.guest_mac, g1_cfg.snat().external_ip, snat_port, ); - let bs_phys = TestIpPhys { - ip: BS_IP_ADDR, - mac: BS_MAC_ADDR, - vni: Vni::new(BOUNDARY_SERVICES_VNI).unwrap(), - }; - pkt9 = encap_external(pkt9, bs_phys, g1_phys); - let res = g1.port.process(In, &mut pkt9, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + pkt9_m = encap_external(pkt9_m, bs_phys, g1_phys); + let pkt9 = parse_inbound(&mut pkt9_m, VpcParser {}).unwrap(); + let res = g1.port.process(In, pkt9); + expect_modified!(res, pkt9_m); incr!(g1, ["stats.port.in_modified, stats.port.in_uft_hit"]); assert_eq!(TcpState::FinWait2, g1.port.tcp_state(&flow).unwrap()); // ================================================================ // FIN: Server -> Client // ================================================================ - let mut pkt10 = http_server_fin2( + let mut pkt10_m = http_server_fin2( BS_MAC_ADDR, dst_ip, g1_cfg.guest_mac, g1_cfg.snat().external_ip, snat_port, ); - let bs_phys = TestIpPhys { - ip: BS_IP_ADDR, - mac: BS_MAC_ADDR, - vni: Vni::new(BOUNDARY_SERVICES_VNI).unwrap(), - }; - pkt10 = encap_external(pkt10, bs_phys, g1_phys); - let res = g1.port.process(In, &mut pkt10, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + pkt10_m = encap_external(pkt10_m, bs_phys, g1_phys); + let pkt10 = parse_inbound(&mut pkt10_m, VpcParser {}).unwrap(); + let res = g1.port.process(In, pkt10); + expect_modified!(res, pkt10_m); incr!(g1, ["stats.port.in_modified, stats.port.in_uft_hit"]); assert_eq!(TcpState::TimeWait, g1.port.tcp_state(&flow).unwrap()); // ================================================================ // ACK Server FIN: Client -> Server // ================================================================ - let mut pkt11 = http_guest_ack_fin2( + let mut pkt11_m = http_guest_ack_fin2( g1_cfg.guest_mac, g1_cfg.ipv4().private_ip, GW_MAC_ADDR, dst_ip, ); - let res = g1.port.process(Out, &mut pkt11, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let pkt11 = parse_outbound(&mut pkt11_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt11); + expect_modified!(res, pkt11_m); incr!(g1, ["stats.port.out_modified, stats.port.out_uft_hit"]); assert_eq!(TcpState::TimeWait, g1.port.tcp_state(&flow).unwrap()); @@ -3567,14 +3583,15 @@ fn early_tcp_invalidation() { // Repeat the exact same flow. This SYN is not blocked, the old // entry is invalidated, and a new one is created. // ================================================================ - let mut pkt1 = http_syn2( + let mut pkt1_m = http_syn2( g1_cfg.guest_mac, g1_cfg.ipv4().private_ip, GW_MAC_ADDR, dst_ip, ); - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt1); + expect_modified!(res, pkt1_m); incr!( g1, [ @@ -3584,7 +3601,9 @@ fn early_tcp_invalidation() { ] ); assert_eq!(TcpState::SynSent, g1.port.tcp_state(&flow).unwrap()); - let snat_port = pkt1.meta().inner.ulp.unwrap().src_port().unwrap(); + let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); + let snat_port = + pkt1.to_full_meta().meta().inner_ulp().unwrap().src_port().unwrap(); // ================================================================ // Drive to established, then validate the same applies to inbound @@ -3600,20 +3619,21 @@ fn early_tcp_invalidation() { mac: g1_cfg.guest_mac, vni: g1_cfg.vni, }; - let mut pkt2 = http_syn_ack2( + let mut pkt2_m = http_syn_ack2( BS_MAC_ADDR, dst_ip, g1_cfg.guest_mac, g1_cfg.snat().external_ip, snat_port, ); - pkt2 = encap_external(pkt2, bs_phys, g1_phys); - let res = g1.port.process(In, &mut pkt2, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + pkt2_m = encap_external(pkt2_m, bs_phys, g1_phys); + let pkt2 = parse_inbound(&mut pkt2_m, VpcParser {}).unwrap(); + let res = g1.port.process(In, pkt2); + expect_modified!(res, pkt2_m); incr!(g1, ["stats.port.in_modified, stats.port.in_uft_hit"]); assert_eq!(TcpState::Established, g1.port.tcp_state(&flow).unwrap()); - let mut pkt1 = http_syn3( + let mut pkt1_m = http_syn3( BS_MAC_ADDR, dst_ip, g1_cfg.guest_mac, @@ -3621,9 +3641,10 @@ fn early_tcp_invalidation() { 80, snat_port, ); - pkt1 = encap_external(pkt1, bs_phys, g1_phys); - let res = g1.port.process(In, &mut pkt1, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + pkt1_m = encap_external(pkt1_m, bs_phys, g1_phys); + let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(In, pkt1); + expect_modified!(res, pkt1_m); update!( g1, [ @@ -3644,15 +3665,16 @@ fn early_tcp_invalidation() { // This case is just an ACK, but the same logic applies for // FIN+ACK. The FIN+ACK case could be special-cased CLOSED->CLOSED, // but we're not doing that for now. - let mut pkt11 = http_guest_ack_fin2( + let mut pkt11_m = http_guest_ack_fin2( g1_cfg.guest_mac, g1_cfg.ipv4().private_ip, GW_MAC_ADDR, dst_ip2, ); - let flow = *pkt11.flow(); - let res = g1.port.process(Out, &mut pkt11, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let pkt11 = parse_outbound(&mut pkt11_m, VpcParser {}).unwrap(); + let flow = pkt11.flow(); + let res = g1.port.process(Out, pkt11); + expect_modified!(res, pkt11_m); incr!( g1, [ @@ -3667,15 +3689,16 @@ fn early_tcp_invalidation() { // ================================================================ // This entry will not block new flows on the same tuple. // ================================================================ - let mut pkt1 = http_syn2( + let mut pkt1_m = http_syn2( g1_cfg.guest_mac, g1_cfg.ipv4().private_ip, GW_MAC_ADDR, dst_ip2, ); - let flow = *pkt1.flow(); - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let flow = pkt1.flow(); + let res = g1.port.process(Out, pkt1); + expect_modified!(res, pkt1_m); incr!( g1, [ @@ -3752,16 +3775,16 @@ fn tcp_inbound() { // ================================================================ // SYN: Client -> Server // ================================================================ - let mut pkt1 = http_syn2(BS_MAC_ADDR, client_ip, serv_mac, serv_ext_ip); + let mut pkt1_m = http_syn2(BS_MAC_ADDR, client_ip, serv_mac, serv_ext_ip); let bs_phys = TestIpPhys { ip: BS_IP_ADDR, mac: BS_MAC_ADDR, vni: Vni::new(BOUNDARY_SERVICES_VNI).unwrap(), }; - pkt1 = encap(pkt1, bs_phys, g1_phys); - let res = g1.port.process(In, &mut pkt1, ActionMeta::new()); - let flow = pkt1.flow().mirror(); - assert!(matches!(res, Ok(Modified))); + pkt1_m = encap(pkt1_m, bs_phys, g1_phys); + let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(In, pkt1); + expect_modified!(res, pkt1_m); incr!( g1, [ @@ -3771,133 +3794,147 @@ fn tcp_inbound() { "stats.port.in_modified, stats.port.in_uft_miss", ] ); - let sport = pkt1.meta().inner.ulp.unwrap().src_port().unwrap(); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let flow = pkt1.flow().mirror(); + let sport = + pkt1.to_full_meta().meta().inner_ulp().unwrap().src_port().unwrap(); assert_eq!(TcpState::Listen, g1.port.tcp_state(&flow).unwrap()); // ================================================================ // SYN+ACK: Server -> Client // ================================================================ - let mut pkt2 = http_syn_ack2( + let mut pkt2_m = http_syn_ack2( serv_mac, g1_cfg.ipv4().private_ip, GW_MAC_ADDR, client_ip, sport, ); - let res = g1.port.process(Out, &mut pkt2, ActionMeta::new()); - assert!(matches!(res, Ok(Modified)), "expected Modified, got {:?}", res); + let pkt2 = parse_outbound(&mut pkt2_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt2); + expect_modified!(res, pkt2_m); incr!(g1, ["uft.out, stats.port.out_modified, stats.port.out_uft_miss"]); assert_eq!(TcpState::SynRcvd, g1.port.tcp_state(&flow).unwrap()); // ================================================================ // ACK: Client -> Server // ================================================================ - let mut pkt3 = http_ack2(BS_MAC_ADDR, client_ip, serv_mac, serv_ext_ip); - pkt3 = encap(pkt3, bs_phys, g1_phys); - let res = g1.port.process(In, &mut pkt3, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let mut pkt3_m = http_ack2(BS_MAC_ADDR, client_ip, serv_mac, serv_ext_ip); + pkt3_m = encap(pkt3_m, bs_phys, g1_phys); + let pkt3 = parse_inbound(&mut pkt3_m, VpcParser {}).unwrap(); + let res = g1.port.process(In, pkt3); + expect_modified!(res, pkt3_m); incr!(g1, ["stats.port.in_modified, stats.port.in_uft_hit"]); assert_eq!(TcpState::Established, g1.port.tcp_state(&flow).unwrap()); // ================================================================ // HTTP GET: Client -> Server // ================================================================ - let mut pkt4 = http_get2(BS_MAC_ADDR, client_ip, serv_mac, serv_ext_ip); - pkt4 = encap(pkt4, bs_phys, g1_phys); - let res = g1.port.process(In, &mut pkt4, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let mut pkt4_m = http_get2(BS_MAC_ADDR, client_ip, serv_mac, serv_ext_ip); + pkt4_m = encap(pkt4_m, bs_phys, g1_phys); + let pkt4 = parse_inbound(&mut pkt4_m, VpcParser {}).unwrap(); + let res = g1.port.process(In, pkt4); + expect_modified!(res, pkt4_m); incr!(g1, ["stats.port.in_modified, stats.port.in_uft_hit"]); assert_eq!(TcpState::Established, g1.port.tcp_state(&flow).unwrap()); // ================================================================ // ACK HTTP GET: Server -> Client // ================================================================ - let mut pkt5 = http_get_ack2( + let mut pkt5_m = http_get_ack2( serv_mac, g1_cfg.ipv4().private_ip, GW_MAC_ADDR, client_ip, sport, ); - let res = g1.port.process(Out, &mut pkt5, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let pkt5 = parse_outbound(&mut pkt5_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt5); + expect_modified!(res, pkt5_m); incr!(g1, ["stats.port.out_modified, stats.port.out_uft_hit"]); assert_eq!(TcpState::Established, g1.port.tcp_state(&flow).unwrap()); // ================================================================ // HTTP 301 Reply: Server -> Client // ================================================================ - let mut pkt6 = http_301_reply2( + let mut pkt6_m = http_301_reply2( serv_mac, g1_cfg.ipv4().private_ip, GW_MAC_ADDR, client_ip, sport, ); - let res = g1.port.process(Out, &mut pkt6, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let pkt6 = parse_outbound(&mut pkt6_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt6); + expect_modified!(res, pkt6_m); incr!(g1, ["stats.port.out_modified, stats.port.out_uft_hit"]); assert_eq!(TcpState::Established, g1.port.tcp_state(&flow).unwrap()); // ================================================================ // ACK HTTP 301: Client -> Server // ================================================================ - let mut pkt7 = http_301_ack2(BS_MAC_ADDR, client_ip, serv_mac, serv_ext_ip); - pkt7 = encap(pkt7, bs_phys, g1_phys); - let res = g1.port.process(In, &mut pkt7, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let mut pkt7_m = + http_301_ack2(BS_MAC_ADDR, client_ip, serv_mac, serv_ext_ip); + pkt7_m = encap(pkt7_m, bs_phys, g1_phys); + let pkt7 = parse_inbound(&mut pkt7_m, VpcParser {}).unwrap(); + let res = g1.port.process(In, pkt7); + expect_modified!(res, pkt7_m); incr!(g1, ["stats.port.in_modified, stats.port.in_uft_hit"]); assert_eq!(TcpState::Established, g1.port.tcp_state(&flow).unwrap()); // ================================================================ // FIN: Client -> Server // ================================================================ - let mut pkt8 = + let mut pkt8_m = http_guest_fin2(BS_MAC_ADDR, client_ip, serv_mac, serv_ext_ip); - pkt8 = encap(pkt8, bs_phys, g1_phys); - let res = g1.port.process(In, &mut pkt8, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + pkt8_m = encap(pkt8_m, bs_phys, g1_phys); + let pkt8 = parse_inbound(&mut pkt8_m, VpcParser {}).unwrap(); + let res = g1.port.process(In, pkt8); + expect_modified!(res, pkt8_m); incr!(g1, ["stats.port.in_modified, stats.port.in_uft_hit"]); assert_eq!(TcpState::CloseWait, g1.port.tcp_state(&flow).unwrap()); // ================================================================ // ACK Client FIN: Server -> Client // ================================================================ - let mut pkt9 = http_server_ack_fin2( + let mut pkt9_m = http_server_ack_fin2( serv_mac, g1_cfg.ipv4().private_ip, GW_MAC_ADDR, client_ip, sport, ); - let res = g1.port.process(Out, &mut pkt9, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let pkt9 = parse_outbound(&mut pkt9_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt9); + expect_modified!(res, pkt9_m); incr!(g1, ["stats.port.out_modified, stats.port.out_uft_hit"]); assert_eq!(TcpState::CloseWait, g1.port.tcp_state(&flow).unwrap()); // ================================================================ // FIN: Server -> Client // ================================================================ - let mut pkt10 = http_server_fin2( + let mut pkt10_m = http_server_fin2( serv_mac, g1_cfg.ipv4().private_ip, GW_MAC_ADDR, client_ip, sport, ); - let res = g1.port.process(Out, &mut pkt10, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let pkt10 = parse_outbound(&mut pkt10_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt10); + expect_modified!(res, pkt10_m); incr!(g1, ["stats.port.out_modified, stats.port.out_uft_hit"]); assert_eq!(TcpState::LastAck, g1.port.tcp_state(&flow).unwrap()); // ================================================================ // ACK Server FIN: Client -> Server // ================================================================ - let mut pkt11 = + let mut pkt11_m = http_guest_ack_fin2(BS_MAC_ADDR, client_ip, serv_mac, serv_ext_ip); - pkt11 = encap(pkt11, bs_phys, g1_phys); - let res = g1.port.process(In, &mut pkt11, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + pkt11_m = encap(pkt11_m, bs_phys, g1_phys); + let pkt11 = parse_inbound(&mut pkt11_m, VpcParser {}).unwrap(); + let res = g1.port.process(In, pkt11); + expect_modified!(res, pkt11_m); update!( g1, [ @@ -3925,13 +3962,14 @@ fn anti_spoof() { // ================================================================ // Try to send an outbound packet with a spoofed IP. // ================================================================ - let mut pkt1 = http_syn2( + let mut pkt1_m = http_syn2( g1_cfg.guest_mac, src_ip, GW_MAC_ADDR, g2_cfg.ipv4().private_ip, ); - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt1); assert_drop!( res, DropReason::Layer { name: "gateway", reason: DenyReason::Default } @@ -3947,13 +3985,14 @@ fn anti_spoof() { // ================================================================ // Try to send an outbound packet with a spoofed MAC address. // ================================================================ - pkt1 = http_syn2( + pkt1_m = http_syn2( src_mac, g1_cfg.ipv4().private_ip, GW_MAC_ADDR, g2_cfg.ipv4().private_ip, ); - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt1); assert_drop!( res, DropReason::Layer { name: "gateway", reason: DenyReason::Default } @@ -3969,8 +4008,9 @@ fn anti_spoof() { // ================================================================ // Try to send an outbound packet with a spoofed MAC address and IP. // ================================================================ - pkt1 = http_syn2(src_mac, src_ip, GW_MAC_ADDR, g2_cfg.ipv4().private_ip); - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); + pkt1_m = http_syn2(src_mac, src_ip, GW_MAC_ADDR, g2_cfg.ipv4().private_ip); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt1); assert_drop!( res, DropReason::Layer { name: "gateway", reason: DenyReason::Default } @@ -4008,7 +4048,7 @@ fn no_panic_on_flow_table_full() { // Send one TCP packet to `zinascii.com`. let dst_ip: Ipv4Addr = "52.10.128.69".parse().unwrap(); - let mut pkt1 = http_syn2( + let mut pkt1_m = http_syn2( g1_cfg.guest_mac, g1_cfg.ipv4_cfg().unwrap().private_ip, GW_MAC_ADDR, @@ -4018,20 +4058,22 @@ fn no_panic_on_flow_table_full() { // Process the packet through our port. We don't actually care about the // contents here, we just want to make sure that the packet can be _sent at // all_. - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt1); assert!(res.is_ok()); // Send another one, which should exhaust the TCP flow table limit we // severely truncated above. Note we need to send to a different IP address. // Let's use google.com. let dst_ip: Ipv4Addr = "142.251.46.238".parse().unwrap(); - let mut pkt2 = http_syn2( + let mut pkt2_m = http_syn2( g1_cfg.guest_mac, g1_cfg.ipv4_cfg().unwrap().private_ip, GW_MAC_ADDR, dst_ip, ); - let res2 = g1.port.process(Out, &mut pkt2, ActionMeta::new()); + let pkt2 = parse_outbound(&mut pkt2_m, VpcParser {}).unwrap(); + let res2 = g1.port.process(Out, pkt2); assert_drop!(res2, DropReason::TcpErr); } @@ -4073,7 +4115,7 @@ fn intra_subnet_routes_with_custom() { let data = b"1234\0"; // Send one ICMP packet to that guest. - let mut pkt1 = gen_icmpv4_echo_req( + let mut pkt1_m = gen_icmpv4_echo_req( g1_cfg.guest_mac, g1_cfg.gateway_mac, g1_cfg.ipv4().private_ip, @@ -4087,8 +4129,9 @@ fn intra_subnet_routes_with_custom() { // Process the packet through our port. It should be allowed through: // we have a V2P mapping for the target guest, and a route for the other // subnet. - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); - assert!(matches!(res, Ok(ProcessResult::Modified))); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt1); + expect_modified!(res, pkt1_m); incr!( g1, [ @@ -4107,7 +4150,7 @@ fn intra_subnet_routes_with_custom() { ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); - let mut pkt2 = gen_icmpv4_echo_req( + let mut pkt2_m = gen_icmpv4_echo_req( g1_cfg.guest_mac, g1_cfg.gateway_mac, g1_cfg.ipv4().private_ip, @@ -4117,7 +4160,8 @@ fn intra_subnet_routes_with_custom() { data, 1, ); - let res = g1.port.process(Out, &mut pkt2, ActionMeta::new()); + let pkt2 = parse_outbound(&mut pkt2_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt2); assert!(matches!( res, Ok(ProcessResult::Drop { @@ -4142,7 +4186,7 @@ fn intra_subnet_routes_with_custom() { ) .unwrap(); update!(g1, ["incr:epoch", "decr:router.rules.out"]); - let mut pkt3 = gen_icmpv4_echo_req( + let mut pkt3_m = gen_icmpv4_echo_req( g1_cfg.guest_mac, g1_cfg.gateway_mac, g1_cfg.ipv4().private_ip, @@ -4152,8 +4196,9 @@ fn intra_subnet_routes_with_custom() { data, 1, ); - let res = g1.port.process(Out, &mut pkt3, ActionMeta::new()); - assert!(matches!(res, Ok(ProcessResult::Modified))); + let pkt3 = parse_outbound(&mut pkt3_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt3); + expect_modified!(res, pkt3_m); } #[test] @@ -4197,7 +4242,7 @@ fn port_as_router_target() { let data = b"1234\0"; // Send one ICMP packet to that range. - let mut pkt1 = gen_icmpv4_echo_req( + let mut pkt1_m = gen_icmpv4_echo_req( g1_cfg.guest_mac, g1_cfg.gateway_mac, g1_cfg.ipv4().private_ip, @@ -4210,8 +4255,9 @@ fn port_as_router_target() { // That packet should be allowed: the target IP resolves to a valid // V2P Mapping. - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); - assert!(matches!(res, Ok(ProcessResult::Modified))); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt1); + expect_modified!(res, pkt1_m); incr!( g1, [ @@ -4220,18 +4266,23 @@ fn port_as_router_target() { ] ); + let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); + // Encap routes between sleds correctly, inner IPs are not modified, // and L2 dst matches the guest's NIC. - let v6_encap_meta = pkt1.meta().outer.ip.as_ref().unwrap().ip6().unwrap(); - assert_eq!(v6_encap_meta.src, g1_cfg.phys_ip); - assert_eq!(v6_encap_meta.dst, g2_cfg.phys_ip); - assert_eq!(pkt1.meta().inner_ether().dst, g2_cfg.guest_mac); - assert_eq!(pkt1.meta().inner_ether().src, g1_cfg.guest_mac); - assert_eq!(pkt1.meta().inner_ip4().unwrap().src, g1_cfg.ipv4().private_ip); - assert_eq!(pkt1.meta().inner_ip4().unwrap().dst, dst_ip); + let v6_encap_meta = &pkt1.meta().outer_v6; + assert_eq!(v6_encap_meta.source(), g1_cfg.phys_ip); + assert_eq!(v6_encap_meta.destination(), g2_cfg.phys_ip); + assert_eq!(pkt1.meta().inner_eth.destination(), g2_cfg.guest_mac); + assert_eq!(pkt1.meta().inner_eth.source(), g1_cfg.guest_mac); + let ValidL3::Ipv4(inner_ip4) = &pkt1.meta().inner_l3 else { + panic!("encapped v4 packet did not parse back as v4"); + }; + assert_eq!(inner_ip4.source(), g1_cfg.ipv4().private_ip); + assert_eq!(inner_ip4.destination(), dst_ip); // Now deliver the packet to node g2. - let res = g2.port.process(In, &mut pkt1, ActionMeta::new()); + let res = g2.port.process(In, pkt1); incr!( g2, [ @@ -4239,11 +4290,11 @@ fn port_as_router_target() { "stats.port.in_modified, stats.port.in_uft_miss, uft.in", ] ); - assert!(matches!(res, Ok(ProcessResult::Modified))); + expect_modified!(res, pkt1_m); // A reply from that address must be allowed out by g2, and accepted // by g1. - let mut pkt2 = gen_icmpv4_echo_reply( + let mut pkt2_m = gen_icmpv4_echo_reply( g2_cfg.guest_mac, g2_cfg.gateway_mac, dst_ip, @@ -4253,11 +4304,13 @@ fn port_as_router_target() { data, 1, ); + let pkt2 = parse_outbound(&mut pkt2_m, VpcParser {}).unwrap(); - let res = g2.port.process(Out, &mut pkt2, ActionMeta::new()); + let res = g2.port.process(Out, pkt2); incr!(g2, ["stats.port.out_modified, stats.port.out_uft_miss, uft.out",]); - assert!(matches!(res, Ok(ProcessResult::Modified))); + expect_modified!(res, pkt2_m); - let res = g1.port.process(In, &mut pkt2, ActionMeta::new()); - assert!(matches!(res, Ok(ProcessResult::Modified))); + let pkt2 = parse_inbound(&mut pkt2_m, VpcParser {}).unwrap(); + let res = g1.port.process(In, pkt2); + expect_modified!(res, pkt2_m); } From 3ee9f1826ef11895e5e558054306f45fec47d08d Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Fri, 11 Oct 2024 20:54:51 +0100 Subject: [PATCH 044/115] More progress on the 'it compiles, at least'. --- bench/benches/userland.rs | 97 +++++++++++---- bench/src/packet.rs | 152 +++++++++++------------ crates/opte-api/src/encap.rs | 4 +- lib/opte-test-utils/src/icmp.rs | 3 +- lib/opte-test-utils/src/lib.rs | 33 +++++ lib/oxide-vpc/tests/firewall_tests.rs | 65 +++++----- lib/oxide-vpc/tests/integration_tests.rs | 44 ++----- 7 files changed, 228 insertions(+), 170 deletions(-) diff --git a/bench/benches/userland.rs b/bench/benches/userland.rs index 4014291f..8a0c81b3 100644 --- a/bench/benches/userland.rs +++ b/bench/benches/userland.rs @@ -10,6 +10,7 @@ use criterion::criterion_group; use criterion::criterion_main; use criterion::BenchmarkId; use criterion::Criterion; +use opte::engine::ingot_packet::Packet2; use opte_bench::alloc::*; use opte_bench::packet::BenchPacket; use opte_bench::packet::BenchPacketInstance; @@ -80,12 +81,31 @@ pub fn test_parse( || inp.generate(), // match *outside* the closure to prevent its selection from being timed. match parser { - ParserKind::Generic => |(in_pkt, direction): TestCase| { - in_pkt.parse(direction, GenericUlp {}) - }, - ParserKind::OxideVpc => |(in_pkt, direction): TestCase| { - in_pkt.parse(direction, VpcParser {}) - }, + ParserKind::Generic => { + |(mut in_pkt, direction): TestCase| { + let pkt = + black_box(Packet2::new(in_pkt.iter_mut())); + black_box(match direction { + In => pkt.parse_inbound(GenericUlp {}), + Out => pkt.parse_outbound(GenericUlp {}), + }) + .unwrap(); + } + } + ParserKind::OxideVpc => { + |(mut in_pkt, direction): TestCase| { + let pkt = + black_box(Packet2::new(in_pkt.iter_mut())); + black_box(match direction { + In => { + pkt.parse_inbound(VpcParser {}).unwrap(); + } + Out => { + pkt.parse_outbound(VpcParser {}).unwrap(); + } + }); + } + } }, criterion::BatchSize::PerIteration, ) @@ -117,6 +137,7 @@ pub fn test_handle( M::label() )); + let parser = case.parse_with(); c.bench_with_input( BenchmarkId::from_parameter(case.instance_name()), &case, @@ -124,30 +145,52 @@ pub fn test_handle( b.iter_batched( || { let (init_pkt, dir) = case.generate(); - let parsed_pkt = match case.parse_with() { - ParserKind::Generic => { - init_pkt.parse(dir, GenericUlp {}).unwrap() - } - ParserKind::OxideVpc => { - init_pkt.parse(dir, VpcParser {}).unwrap() - } - }; - case.pre_handle(&port); - (parsed_pkt, dir) + (init_pkt, dir) }, - |(mut pkt, dir)| { - assert!(!matches!( - port.port - .process( - dir, - black_box(&mut pkt), - ActionMeta::new(), - ) - .unwrap(), - ProcessResult::Drop { .. } - )) + // Can't seem to match outside here -- must be missing something. + // Sadly, we can't elide parsing here as the + // packet is now a view over the generated pkt. + |(mut pkt_m, dir): TestCase| match parser { + ParserKind::Generic => { + let pkt = Packet2::new(pkt_m.iter_mut()); + let res = match dir { + In => { + let pkt = + pkt.parse_inbound(GenericUlp {}).unwrap(); + port.port.process(dir, black_box(pkt)).unwrap() + } + Out => { + let pkt = + pkt.parse_outbound(GenericUlp {}).unwrap(); + port.port.process(dir, black_box(pkt)).unwrap() + } + }; + assert!(!matches!(res, ProcessResult::Drop { .. })); + if let Modified(spec) = res { + black_box(spec.apply(pkt_m)); + } + } + ParserKind::OxideVpc => { + let pkt = Packet2::new(pkt_m.iter_mut()); + let res = match dir { + In => { + let pkt = + pkt.parse_inbound(VpcParser {}).unwrap(); + port.port.process(dir, black_box(pkt)).unwrap() + } + Out => { + let pkt = + pkt.parse_outbound(VpcParser {}).unwrap(); + port.port.process(dir, black_box(pkt)).unwrap() + } + }; + assert!(!matches!(res, ProcessResult::Drop { .. })); + if let Modified(spec) = res { + black_box(spec.apply(pkt_m)); + } + } }, criterion::BatchSize::PerIteration, ) diff --git a/bench/src/packet.rs b/bench/src/packet.rs index 44df84e0..2cc3a158 100644 --- a/bench/src/packet.rs +++ b/bench/src/packet.rs @@ -5,22 +5,32 @@ // Copyright 2024 Oxide Computer Company use opte::engine::dhcpv6::MessageType; +use opte::engine::ingot_base::Ethernet; +use opte::engine::ingot_base::Ipv4; +use opte::engine::ingot_base::Ipv6; +use opte::engine::ingot_base::L3Repr; +use opte::engine::ingot_base::UlpRepr; +use opte::engine::ingot_packet::MsgBlk; use opte::engine::packet::Initialized; use opte::engine::packet::Packet; use opte::engine::Direction; +use opte::ingot::tcp::Tcp; +use opte::ingot::tcp::TcpFlags; +use opte::ingot::types::HeaderLen; +use opte::ingot::udp::Udp; use opte_test_utils::dhcp::dhcpv6_with_reasonable_defaults; -use opte_test_utils::dhcp::packet_from_client_dhcpv4_message_unparsed; -use opte_test_utils::dhcp::packet_from_client_dhcpv6_message_unparsed; +use opte_test_utils::dhcp::packet_from_client_dhcpv4_message; +use opte_test_utils::dhcp::packet_from_client_dhcpv6_message; use opte_test_utils::dhcp::DhcpRepr; -use opte_test_utils::icmp::gen_icmp_echo_unparsed; -use opte_test_utils::icmp::gen_icmpv6_echo_unparsed; -use opte_test_utils::icmp::generate_ndisc_unparsed; +use opte_test_utils::icmp::gen_icmp_echo; +use opte_test_utils::icmp::gen_icmpv6_echo; +use opte_test_utils::icmp::generate_ndisc; use opte_test_utils::icmp::NdiscRepr; use opte_test_utils::icmp::RawHardwareAddress; use opte_test_utils::overlay::BOUNDARY_SERVICES_VNI; use opte_test_utils::*; -pub type TestCase = (Packet, Direction); +pub type TestCase = (MsgBlk, Direction); pub enum ParserKind { Generic, @@ -42,7 +52,7 @@ pub trait BenchPacketInstance { fn instance_name(&self) -> String; /// Generate a single test packet. - fn generate(&self) -> (Packet, Direction); + fn generate(&self) -> (MsgBlk, Direction); /// Create a custom port for this benchmark instance. fn create_port(&self) -> Option { @@ -150,12 +160,12 @@ impl BenchPacketInstance for UlpProcessInstance { // flowkey. This will also set up our UFT entry. let self_but_out = Self { direction: Direction::Out, ..self.clone() }; - let (pkt, dir) = self_but_out.generate(); - let mut pkt = pkt.parse(dir, VpcParser {}).unwrap(); + let (mut pkt_m, dir) = self_but_out.generate(); + let pkt = parse_outbound(&mut pkt_m, VpcParser {}).unwrap(); if self.fast_path { if let ProcessResult::Drop { reason } = - port.port.process(dir, &mut pkt, ActionMeta::new()).unwrap() + port.port.process(dir, pkt).unwrap() { panic!("failed to pass in pkt: {reason:?}"); }; @@ -165,6 +175,9 @@ impl BenchPacketInstance for UlpProcessInstance { port.port.clear_lft(layer).unwrap(); } } + + // Note: don't need to finish processing the packet + // -- the op we care about is just establishing the UFT state. } fn instance_name(&self) -> String { @@ -174,8 +187,8 @@ impl BenchPacketInstance for UlpProcessInstance { ) } - fn generate(&self) -> (Packet, Direction) { - let (my_ip, my_guest_ip, partner_ip, ether_type): ( + fn generate(&self) -> (MsgBlk, Direction) { + let (my_ip, my_guest_ip, partner_ip, ethertype): ( IpAddr, IpAddr, IpAddr, @@ -185,13 +198,13 @@ impl BenchPacketInstance for UlpProcessInstance { self.cfg.ipv4().external_ips.ephemeral_ip.unwrap().into(), self.cfg.ipv4().private_ip.into(), "93.184.216.34".parse().unwrap(), - EtherType::Ipv4, + Ethertype::IPV4, ), IpVariant::V6 => ( self.cfg.ipv6().external_ips.ephemeral_ip.unwrap().into(), self.cfg.ipv6().private_ip.into(), "2606:2800:220:1:248:1893:25c8:1946".parse().unwrap(), - EtherType::Ipv6, + Ethertype::IPV6, ), }; let (src_mac, dst_mac) = match self.direction { @@ -202,57 +215,55 @@ impl BenchPacketInstance for UlpProcessInstance { Direction::Out => (my_guest_ip, partner_ip, 10010, 80), Direction::In => (partner_ip, my_ip, 80, 10010), }; - let eth = EtherMeta { dst: dst_mac, src: src_mac, ether_type }; + let eth = Ethernet { destination: dst_mac, source: src_mac, ethertype }; let body = vec![0u8; self.body_len]; - let (ulp, next_hdr): (UlpMeta, _) = match self.proto { + let (ulp, next_header) = match self.proto { ProtoVariant::Tcp => ( - TcpMeta { - src: src_port, - dst: dst_port, + UlpRepr::Tcp(Tcp { + source: src_port, + destination: dst_port, flags: TcpFlags::ACK, - seq: 1234, - ack: 3456, + sequence: 1234, + acknowledgement: 3456, window_size: 1, - csum: [0; 2], - options_bytes: None, - options_len: 0, - } - .into(), - IpProtocol::Tcp, + ..Default::default() + }), + IngotIpProto::TCP, ), ProtoVariant::Udp => ( - UdpMeta { - src: src_port, - dst: dst_port, - len: (UdpHdr::SIZE + body.len()) as u16, - csum: [0; 2], - } - .into(), - IpProtocol::Udp, + UlpRepr::Udp(Udp { + source: src_port, + destination: dst_port, + length: (Udp::MINIMUM_LENGTH + body.len()) as u16, + ..Default::default() + }), + IngotIpProto::UDP, ), }; - let proto = Protocol::from(next_hdr); - let ip: IpMeta = match (src_ip, dst_ip) { - (IpAddr::Ip4(src), IpAddr::Ip4(dst)) => Ipv4Meta { - src, - dst, - proto, - total_len: (Ipv4Hdr::BASE_SIZE + ulp.hdr_len() + body.len()) - as u16, - ..Ipv4Meta::default() + let protocol = next_header; + let ip = match (src_ip, dst_ip) { + (IpAddr::Ip4(source), IpAddr::Ip4(destination)) => { + L3Repr::Ipv4(Ipv4 { + source, + destination, + protocol, + total_len: (Ipv4::MINIMUM_LENGTH + + (&ulp, &body).packet_length()) + as u16, + ..Default::default() + }) } - .into(), - (IpAddr::Ip6(src), IpAddr::Ip6(dst)) => Ipv6Meta { - src, - dst, - next_hdr, - proto, - pay_len: (ulp.hdr_len() + body.len()) as u16, - ..Ipv6Meta::default() + (IpAddr::Ip6(source), IpAddr::Ip6(destination)) => { + L3Repr::Ipv6(Ipv6 { + source, + destination, + next_header, + payload_len: (&ulp, &body).packet_length() as u16, + ..Default::default() + }) } - .into(), _ => unreachable!(), }; @@ -276,14 +287,7 @@ impl BenchPacketInstance for UlpProcessInstance { } }; - let buf = out_pkt.all_bytes(); - - let len = buf.len(); - let mut pkt = Packet::alloc_and_expand(len); - let mut wtr = pkt.seg0_wtr(); - wtr.slice_mut(len).unwrap().copy_from_slice(&buf[..]); - - (pkt, self.direction) + (out_pkt, self.direction) } fn create_port(&self) -> Option { @@ -359,7 +363,7 @@ impl BenchPacketInstance for Dhcp4Instance { format!("{self:?}") } - fn generate(&self) -> (Packet, Direction) { + fn generate(&self) -> (MsgBlk, Direction) { let cfg = g1_cfg(); let message_type = match self { Dhcp4Instance::Discover => dhcp::DhcpMessageType::Discover, @@ -396,10 +400,7 @@ impl BenchPacketInstance for Dhcp4Instance { additional_options: &[], }; - ( - packet_from_client_dhcpv4_message_unparsed(&cfg, &repr), - Direction::Out, - ) + (packet_from_client_dhcpv4_message(&cfg, &repr), Direction::Out) } } @@ -429,7 +430,7 @@ impl BenchPacketInstance for Dhcp6Instance { format!("{self:?}") } - fn generate(&self) -> (Packet, Direction) { + fn generate(&self) -> (MsgBlk, Direction) { let cfg = g1_cfg(); let class = match self { Dhcp6Instance::Solicit => MessageType::Solicit, @@ -437,10 +438,7 @@ impl BenchPacketInstance for Dhcp6Instance { }; let repr = dhcpv6_with_reasonable_defaults(class, false, &cfg); - ( - packet_from_client_dhcpv6_message_unparsed(&cfg, &repr), - Direction::Out, - ) + (packet_from_client_dhcpv6_message(&cfg, &repr), Direction::Out) } } @@ -464,13 +462,13 @@ impl BenchPacketInstance for Icmp4 { "EchoRequest".into() } - fn generate(&self) -> (Packet, Direction) { + fn generate(&self) -> (MsgBlk, Direction) { let cfg = g1_cfg(); let ident = 7; let seq_no = 777; let data = b"reunion\0"; - let pkt = gen_icmp_echo_unparsed( + let pkt = gen_icmp_echo( icmp::IcmpEchoType::Req, cfg.guest_mac, cfg.gateway_mac, @@ -517,14 +515,14 @@ impl BenchPacketInstance for Icmp6Instance { format!("{self:?}") } - fn generate(&self) -> (Packet, Direction) { + fn generate(&self) -> (MsgBlk, Direction) { let cfg = g1_cfg(); let ident = 7; let seq_no = 777; let data = b"reunion\0"; let pkt = match self { - Icmp6Instance::EchoRequest => gen_icmpv6_echo_unparsed( + Icmp6Instance::EchoRequest => gen_icmpv6_echo( icmp::IcmpEchoType::Req, cfg.guest_mac, cfg.gateway_mac, @@ -542,7 +540,7 @@ impl BenchPacketInstance for Icmp6Instance { &cfg.guest_mac, )), }; - generate_ndisc_unparsed( + generate_ndisc( solicit, cfg.guest_mac, cfg.gateway_mac, @@ -558,7 +556,7 @@ impl BenchPacketInstance for Icmp6Instance { }; let dst_ip = Ipv6Addr::ALL_ROUTERS; - generate_ndisc_unparsed( + generate_ndisc( solicit, src_mac, // Must be destined for the All-Routers IPv6 address, and the corresponding diff --git a/crates/opte-api/src/encap.rs b/crates/opte-api/src/encap.rs index 142515d9..e036633f 100644 --- a/crates/opte-api/src/encap.rs +++ b/crates/opte-api/src/encap.rs @@ -23,7 +23,7 @@ mod test { fn good_vni() { assert!(Vni::new(0u32).is_ok()); assert!(Vni::new(11u8).is_ok()); - assert!(Vni::new(VNI_MAX).is_ok()); + assert!(Vni::new((1u32 << 24) - 1).is_ok()); } #[test] @@ -35,7 +35,7 @@ mod test { #[test] fn vni_round_trip() { let vni = Vni::new(7777u32).unwrap(); - assert_eq!([0x00, 0x1E, 0x61], vni.inner); + assert_eq!([0x00, 0x1E, 0x61], vni.bytes()); assert_eq!(7777, u32::from(vni)); } } diff --git a/lib/opte-test-utils/src/icmp.rs b/lib/opte-test-utils/src/icmp.rs index 280bb167..6ec1a16a 100644 --- a/lib/opte-test-utils/src/icmp.rs +++ b/lib/opte-test-utils/src/icmp.rs @@ -150,8 +150,7 @@ pub fn gen_icmp_echo( }; ip.fill_checksum(); - let total_len = - eth.packet_length() + ip.packet_length() + icmp.buffer_len(); + let total_len = eth.packet_length() + ip.packet_length() + icmp_bytes.len(); let mut segments = vec![]; match n_segments { diff --git a/lib/opte-test-utils/src/lib.rs b/lib/opte-test-utils/src/lib.rs index 7ea930a4..4d79db17 100644 --- a/lib/opte-test-utils/src/lib.rs +++ b/lib/opte-test-utils/src/lib.rs @@ -36,6 +36,7 @@ use opte::engine::ingot_base::Ethernet; use opte::engine::ingot_base::Ipv4; use opte::engine::ingot_base::Ipv6; use opte::engine::ingot_base::L3Repr; +use opte::engine::ingot_packet::LightParsedMblk; use opte::engine::ingot_packet::MsgBlk; use opte::engine::ingot_packet::Packet2; pub use opte::engine::ip4::Ipv4Addr; @@ -50,6 +51,7 @@ pub use opte::engine::packet::BodyInfo; pub use opte::engine::packet::HdrOffset; pub use opte::engine::packet::Initialized; pub use opte::engine::packet::Packet; +use opte::engine::packet::ParseError; pub use opte::engine::packet::Parsed; pub use opte::engine::port::meta::ActionMeta; pub use opte::engine::port::DropReason; @@ -63,6 +65,7 @@ pub use opte::engine::tcp::TcpMeta; pub use opte::engine::udp::UdpHdr; pub use opte::engine::udp::UdpMeta; pub use opte::engine::GenericUlp; +use opte::engine::NetworkParser; pub use opte::ingot::ethernet::Ethertype; use opte::ingot::geneve::Geneve; use opte::ingot::geneve::GeneveOpt; @@ -107,6 +110,36 @@ pub use smoltcp::wire::IpProtocol; pub use std::num::NonZeroU32; pub use std::sync::Arc; +/// Expects that a packet result is modified, and applies that modification. +#[macro_export] +macro_rules! expect_modified { + ($res:ident, $pkt:ident) => { + assert!(matches!($res, Ok(Modified(_)))); + #[allow(unused_assignments)] + if let Ok(Modified(spec)) = $res { + $pkt = spec.apply($pkt); + } + }; +} + +pub fn parse_inbound( + pkt: &mut MsgBlk, + parser: NP, +) -> Result>>, ParseError> +{ + let pkt = Packet2::new(pkt.iter_mut()); + pkt.parse_inbound(parser) +} + +pub fn parse_outbound( + pkt: &mut MsgBlk, + parser: NP, +) -> Result>>, ParseError> +{ + let pkt = Packet2::new(pkt.iter_mut()); + pkt.parse_outbound(parser) +} + // It's imperative that this list stays in sync with the layers that // makeup the VPC implementation. We verify this in the `check_layers` // test. diff --git a/lib/oxide-vpc/tests/firewall_tests.rs b/lib/oxide-vpc/tests/firewall_tests.rs index 50effe53..4afc71e5 100644 --- a/lib/oxide-vpc/tests/firewall_tests.rs +++ b/lib/oxide-vpc/tests/firewall_tests.rs @@ -1,3 +1,4 @@ +use opte::engine::ingot_packet::MsgBlk; use opte_test_utils as common; use common::*; @@ -32,9 +33,10 @@ fn firewall_replace_rules() { // Run the SYN packet through g1's port in the outbound direction // and verify if passes the firewall. // ================================================================ - let mut pkt1 = http_syn(&g1_cfg, &g2_cfg); - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let mut pkt1_m = http_syn(&g1_cfg, &g2_cfg); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt1); + expect_modified!(res, pkt1_m); incr!( g1, [ @@ -71,9 +73,10 @@ fn firewall_replace_rules() { ] ); - let mut pkt2 = http_syn(&g1_cfg, &g2_cfg); - let res = g1.port.process(Out, &mut pkt2, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let mut pkt2_m = http_syn(&g1_cfg, &g2_cfg); + let pkt2 = parse_outbound(&mut pkt2_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt2); + expect_modified!(res, pkt2_m); incr!( g1, [ @@ -88,14 +91,16 @@ fn firewall_replace_rules() { // of the real process we first dump the raw bytes of g1's // outgoing packet and then reparse it. // ================================================================ - let mblk = pkt2.unwrap_mblk(); - let mut pkt3 = unsafe { - Packet::wrap_mblk_and_parse(mblk, In, VpcParser::new()).unwrap() - }; - let mut pkt3_copy = - Packet::copy(&pkt3.all_bytes()).parse(In, VpcParser::new()).unwrap(); - let res = g2.port.process(In, &mut pkt3, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + + let mut pkt3_m = pkt2_m; + let pkt3_bytes = pkt3_m.copy_all(); + let mut pkt3_copy_m = MsgBlk::copy(pkt3_bytes); + + let pkt3 = parse_inbound(&mut pkt3_m, VpcParser {}).unwrap(); + let pkt3_copy = parse_inbound(&mut pkt3_copy_m, VpcParser {}).unwrap(); + + let res = g2.port.process(In, pkt3); + expect_modified!(res, pkt3_m); incr!( g2, [ @@ -130,7 +135,7 @@ fn firewall_replace_rules() { // Verify the packet is dropped and that the firewall flow table // entry (along with its dual) was invalidated. - let res = g2.port.process(In, &mut pkt3_copy, ActionMeta::new()); + let res = g2.port.process(In, pkt3_copy); assert_drop!( res, DropReason::Layer { name: "firewall", reason: DenyReason::Rule } @@ -181,20 +186,21 @@ fn firewall_vni_inbound() { mac: g2_cfg.guest_mac, vni: g2_cfg.vni, }; - let mut pkt1 = http_syn2( + let mut pkt1_m = http_syn2( g2_cfg.guest_mac, g2_cfg.ipv4().private_ip, g1_cfg.guest_mac, g1_cfg.ipv4().private_ip, ); - pkt1 = encap(pkt1, phys_src, phys_dst); + pkt1_m = encap(pkt1_m, phys_src, phys_dst); + let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); // ================================================================ // Verify that g1's firewall rejects this packet, as the default // VPC firewall rules dictate that only inbound traffic from the // same VPC should be allowed. // ================================================================ - let res = g1.port.process(In, &mut pkt1, ActionMeta::new()); + let res = g1.port.process(In, pkt1); assert_drop!( res, DropReason::Layer { name: "firewall", reason: DenyReason::Default } @@ -222,15 +228,16 @@ fn firewall_vni_inbound() { mac: g2_cfg.guest_mac, vni: g2_cfg.vni, }; - let mut pkt2 = http_syn2( + let mut pkt2_m = http_syn2( g2_cfg.guest_mac, g2_cfg.ipv4().private_ip, g1_cfg.guest_mac, g1_cfg.ipv4().private_ip, ); - pkt2 = encap(pkt2, phys_src, phys_dst); - let res = g1.port.process(In, &mut pkt2, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + pkt2_m = encap(pkt2_m, phys_src, phys_dst); + let pkt2 = parse_inbound(&mut pkt2_m, VpcParser {}).unwrap(); + let res = g1.port.process(In, pkt2); + expect_modified!(res, pkt2_m); incr!( g1, [ @@ -293,18 +300,19 @@ fn firewall_vni_outbound() { mac: g2_cfg.guest_mac, vni: g2_cfg.vni, }; - let mut pkt1 = http_syn2( + let mut pkt1_m = http_syn2( g1_cfg.guest_mac, g1_cfg.ipv4().private_ip, g1_cfg.guest_mac, g2_cfg.ipv4().private_ip, ); - pkt1 = encap(pkt1, phys_src, phys_dst); + // pkt1 = encap(pkt1, phys_src, phys_dst); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); // ================================================================ // Try to send the packet and verify the firewall does not allow it. // ================================================================ - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); + let res = g1.port.process(Out, pkt1); assert_drop!( res, DropReason::Layer { name: "firewall", reason: DenyReason::Rule } @@ -356,20 +364,21 @@ fn firewall_external_inbound() { vni: g1_cfg.vni, }; - let mut pkt1 = http_syn2( + let mut pkt1_m = http_syn2( BS_MAC_ADDR, std::net::IpAddr::from([1, 1, 1, 1]), g1_cfg.guest_mac, g1_cfg.ipv4().private_ip, ); - pkt1 = encap_external(pkt1, bsvc_phys, guest_phys); + pkt1_m = encap_external(pkt1_m, bsvc_phys, guest_phys); + let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); // ================================================================ // Verify that g1's firewall rejects this packet, as the default // VPC firewall rules dictate that only inbound traffic from the // same VPC should be allowed. // ================================================================ - let res = g1.port.process(In, &mut pkt1, ActionMeta::new()); + let res = g1.port.process(In, pkt1); assert_drop!( res, DropReason::Layer { name: "firewall", reason: DenyReason::Default } diff --git a/lib/oxide-vpc/tests/integration_tests.rs b/lib/oxide-vpc/tests/integration_tests.rs index 1f7a03fd..6e110ba5 100644 --- a/lib/oxide-vpc/tests/integration_tests.rs +++ b/lib/oxide-vpc/tests/integration_tests.rs @@ -141,35 +141,6 @@ fn lab_cfg() -> VpcCfg { } } -fn parse_inbound( - pkt: &mut MsgBlk, - parser: NP, -) -> Result>>, ParseError> -{ - let pkt = Packet2::new(pkt.iter_mut()); - pkt.parse_inbound(parser) -} - -fn parse_outbound( - pkt: &mut MsgBlk, - parser: NP, -) -> Result>>, ParseError> -{ - let pkt = Packet2::new(pkt.iter_mut()); - pkt.parse_outbound(parser) -} - -/// Expects that a packet result is modified, and applies that modification. -macro_rules! expect_modified { - ($res:ident, $pkt:ident) => { - assert!(matches!($res, Ok(Modified(_)))); - #[allow(unused_assignments)] - if let Ok(Modified(spec)) = $res { - $pkt = spec.apply($pkt); - } - }; -} - // Verify that the list of layers is what we expect. #[test] fn check_layers() { @@ -2517,15 +2488,20 @@ fn test_gateway_neighbor_advert_reply() { let mut with_checksum = false; let data = generate_solicit_test_data(&g1_cfg); for d in data { + // TODO(kyle) + let with_checksum = true; + let mut pkt = generate_neighbor_solicitation(&d.ns, with_checksum); // Alternate between using smoltcp or our `compute_checksums` method // to compute the checksums. - if !with_checksum { - pkt.compute_checksums(); - } - with_checksum = !with_checksum; + // TODO(kyle) + // if !with_checksum { + // pkt.compute_checksums(); + // } + // with_checksum = !with_checksum; pcap.add_pkt(&pkt); - let res = g1.port.process(Out, &mut pkt, ActionMeta::new()); + let pkt1 = parse_outbound(&mut pkt, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt1); match (res, d.na) { (Ok(ProcessResult::Drop { .. }), None) => { // Dropped the packet, as we expected From 2811546329e1752ecf2fa0cb9c07097ffc3ca36f Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Sun, 13 Oct 2024 10:45:37 +0100 Subject: [PATCH 045/115] Need a new nightly + some tweaks. --- .github/buildomat/jobs/opte-api.sh | 4 ++-- .github/buildomat/jobs/opte-ioctl.sh | 4 ++-- .github/buildomat/jobs/opte.sh | 8 ++++---- .github/buildomat/jobs/opteadm.sh | 4 ++-- .github/buildomat/jobs/oxide-vpc.sh | 8 ++++---- .github/buildomat/jobs/p5p.sh | 2 +- .github/buildomat/jobs/xde.sh | 6 +++--- lib/opte-test-utils/src/icmp.rs | 8 ++------ lib/opte-test-utils/src/lib.rs | 2 +- lib/opte/src/engine/ingot_packet.rs | 4 +++- lib/opte/src/engine/packet.rs | 2 +- xde/rust-toolchain.toml | 2 +- 12 files changed, 26 insertions(+), 28 deletions(-) diff --git a/.github/buildomat/jobs/opte-api.sh b/.github/buildomat/jobs/opte-api.sh index c08c3ff5..9628a2cc 100755 --- a/.github/buildomat/jobs/opte-api.sh +++ b/.github/buildomat/jobs/opte-api.sh @@ -3,7 +3,7 @@ #: name = "opte-api" #: variety = "basic" #: target = "helios-2.0" -#: rust_toolchain = "nightly-2024-05-12" +#: rust_toolchain = "nightly-2024-10-12" #: output_rules = [] #: access_repos = [ #: "oxidecomputer/ingot", @@ -27,7 +27,7 @@ header "check API_VERSION" ./check-api-version.sh header "check style" -ptime -m cargo +nightly-2024-05-12 fmt -- --check +ptime -m cargo +nightly-2024-10-12 fmt -- --check header "analyze std" ptime -m cargo clippy --all-targets diff --git a/.github/buildomat/jobs/opte-ioctl.sh b/.github/buildomat/jobs/opte-ioctl.sh index 5f2adf40..2e6315a3 100755 --- a/.github/buildomat/jobs/opte-ioctl.sh +++ b/.github/buildomat/jobs/opte-ioctl.sh @@ -3,7 +3,7 @@ #: name = "opte-ioctl" #: variety = "basic" #: target = "helios-2.0" -#: rust_toolchain = "nightly-2024-05-12" +#: rust_toolchain = "nightly-2024-10-12" #: output_rules = [] #: access_repos = [ #: "oxidecomputer/ingot", @@ -24,7 +24,7 @@ rustc --version cd lib/opte-ioctl header "check style" -ptime -m cargo +nightly-2024-05-12 fmt -- --check +ptime -m cargo +nightly-2024-10-12 fmt -- --check header "analyze" ptime -m cargo clippy --all-targets diff --git a/.github/buildomat/jobs/opte.sh b/.github/buildomat/jobs/opte.sh index a4668e9f..635802d2 100755 --- a/.github/buildomat/jobs/opte.sh +++ b/.github/buildomat/jobs/opte.sh @@ -3,7 +3,7 @@ #: name = "opte" #: variety = "basic" #: target = "helios-2.0" -#: rust_toolchain = "nightly-2024-05-12" +#: rust_toolchain = "nightly-2024-10-12" #: output_rules = [] #: access_repos = [ #: "oxidecomputer/ingot", @@ -24,7 +24,7 @@ rustc --version cd lib/opte header "check style" -ptime -m cargo +nightly-2024-05-12 fmt -- --check +ptime -m cargo +nightly-2024-10-12 fmt -- --check header "check docs" # @@ -33,13 +33,13 @@ header "check docs" # # Use nightly which is needed for the `kernel` feature. RUSTDOCFLAGS="-D warnings" ptime -m \ - cargo +nightly-2024-05-12 doc --no-default-features --features=api,std,engine,kernel + cargo +nightly-2024-10-12 doc --no-default-features --features=api,std,engine,kernel header "analyze std + api" ptime -m cargo clippy --all-targets header "analyze no_std + engine + kernel" -ptime -m cargo +nightly-2024-05-12 clippy --no-default-features --features engine,kernel +ptime -m cargo +nightly-2024-10-12 clippy --no-default-features --features engine,kernel header "test" ptime -m cargo test diff --git a/.github/buildomat/jobs/opteadm.sh b/.github/buildomat/jobs/opteadm.sh index 18193b98..9d94cce8 100755 --- a/.github/buildomat/jobs/opteadm.sh +++ b/.github/buildomat/jobs/opteadm.sh @@ -3,7 +3,7 @@ #: name = "opteadm" #: variety = "basic" #: target = "helios-2.0" -#: rust_toolchain = "nightly-2024-05-12" +#: rust_toolchain = "nightly-2024-10-12" #: output_rules = [ #: "=/work/debug/opteadm", #: "=/work/debug/opteadm.debug.sha256", @@ -29,7 +29,7 @@ rustc --version pushd bin/opteadm header "check style" -ptime -m cargo +nightly-2024-05-12 fmt -- --check +ptime -m cargo +nightly-2024-10-12 fmt -- --check header "analyze" ptime -m cargo clippy --all-targets diff --git a/.github/buildomat/jobs/oxide-vpc.sh b/.github/buildomat/jobs/oxide-vpc.sh index da3cc073..e919bd93 100755 --- a/.github/buildomat/jobs/oxide-vpc.sh +++ b/.github/buildomat/jobs/oxide-vpc.sh @@ -3,7 +3,7 @@ #: name = "oxide-vpc" #: variety = "basic" #: target = "helios-2.0" -#: rust_toolchain = "nightly-2024-05-12" +#: rust_toolchain = "nightly-2024-10-12" #: output_rules = [] #: access_repos = [ #: "oxidecomputer/ingot", @@ -24,7 +24,7 @@ rustc --version cd lib/oxide-vpc header "check style" -ptime -m cargo +nightly-2024-05-12 fmt -- --check +ptime -m cargo +nightly-2024-10-12 fmt -- --check header "check docs" # @@ -33,13 +33,13 @@ header "check docs" # # Use nightly which is needed for the `kernel` feature. RUSTDOCFLAGS="-D warnings" ptime -m \ - cargo +nightly-2024-05-12 doc --no-default-features --features=api,std,engine,kernel + cargo +nightly-2024-10-12 doc --no-default-features --features=api,std,engine,kernel header "analyze std + api + usdt" ptime -m cargo clippy --features usdt --all-targets header "analyze no_std + engine + kernel" -ptime -m cargo +nightly-2024-05-12 clippy --no-default-features --features engine,kernel +ptime -m cargo +nightly-2024-10-12 clippy --no-default-features --features engine,kernel header "test" ptime -m cargo test diff --git a/.github/buildomat/jobs/p5p.sh b/.github/buildomat/jobs/p5p.sh index 20e5c65c..af2ec788 100755 --- a/.github/buildomat/jobs/p5p.sh +++ b/.github/buildomat/jobs/p5p.sh @@ -3,7 +3,7 @@ #: name = "opte-p5p" #: variety = "basic" #: target = "helios-2.0" -#: rust_toolchain = "nightly-2024-05-12" +#: rust_toolchain = "nightly-2024-10-12" #: output_rules = [ #: "=/out/opte.p5p", #: "=/out/opte.p5p.sha256", diff --git a/.github/buildomat/jobs/xde.sh b/.github/buildomat/jobs/xde.sh index 83986624..cc785611 100755 --- a/.github/buildomat/jobs/xde.sh +++ b/.github/buildomat/jobs/xde.sh @@ -3,7 +3,7 @@ #: name = "opte-xde" #: variety = "basic" #: target = "helios-2.0" -#: rust_toolchain = "nightly-2024-05-12" +#: rust_toolchain = "nightly-2024-10-12" #: output_rules = [ #: "=/work/debug/xde.dbg", #: "=/work/debug/xde.dbg.sha256", @@ -78,7 +78,7 @@ pushd xde cp xde.conf /work/xde.conf header "check style" -ptime -m cargo +nightly-2024-05-12 fmt -p xde -p xde-link -- --check +ptime -m cargo +nightly-2024-10-12 fmt -p xde -p xde-link -- --check header "analyze" ptime -m cargo clippy -- \ @@ -126,7 +126,7 @@ sha256sum $REL_TGT/xde_link.so > $REL_TGT/xde_link.so.sha256 header "build xde integration tests" pushd xde-tests -cargo +nightly-2024-05-12 fmt -- --check +cargo +nightly-2024-10-12 fmt -- --check cargo clippy --all-targets cargo build --test loopback loopback_test=$( diff --git a/lib/opte-test-utils/src/icmp.rs b/lib/opte-test-utils/src/icmp.rs index 6ec1a16a..19870fdf 100644 --- a/lib/opte-test-utils/src/icmp.rs +++ b/lib/opte-test-utils/src/icmp.rs @@ -157,7 +157,6 @@ pub fn gen_icmp_echo( 1 => { let mut pkt = MsgBlk::new_ethernet(total_len); pkt.emit_back(&(eth, ip)).unwrap(); - pkt.resize(total_len).unwrap(); pkt.write_bytes_back(&icmp_bytes).unwrap(); return pkt; @@ -170,7 +169,6 @@ pub fn gen_icmp_echo( let t_len = ip.packet_length() + icmp.buffer_len(); let mut pkt = MsgBlk::new(t_len); pkt.emit_back(ip).unwrap(); - pkt.resize(t_len).unwrap(); pkt.write_bytes_back(&icmp_bytes).unwrap(); segments.push(pkt); } @@ -265,7 +263,7 @@ pub fn gen_icmpv6_echo( let eth = Ethernet { destination: eth_dst, source: eth_src, - ethertype: Ethertype::IPV4, + ethertype: Ethertype::IPV6, }; let ip = Ipv6 { @@ -285,7 +283,6 @@ pub fn gen_icmpv6_echo( 1 => { let mut pkt = MsgBlk::new_ethernet(total_len); pkt.emit_back(&(eth, ip)); - pkt.resize(total_len); pkt.write_bytes_back(&body_bytes).unwrap(); return pkt; @@ -298,7 +295,6 @@ pub fn gen_icmpv6_echo( let t_len = ip.packet_length() + icmp.buffer_len(); let mut pkt = MsgBlk::new(t_len); pkt.emit_back(ip).unwrap(); - pkt.resize(t_len).unwrap(); pkt.write_bytes_back(&body_bytes).unwrap(); segments.push(pkt); } @@ -308,7 +304,7 @@ pub fn gen_icmpv6_echo( segments.push(pkt); let mut pkt = MsgBlk::new(ip.packet_length()); - pkt.emit_back(eth).unwrap(); + pkt.emit_back(ip).unwrap(); segments.push(pkt); let mut pkt = MsgBlk::new(icmp.buffer_len()); diff --git a/lib/opte-test-utils/src/lib.rs b/lib/opte-test-utils/src/lib.rs index 4d79db17..20374de4 100644 --- a/lib/opte-test-utils/src/lib.rs +++ b/lib/opte-test-utils/src/lib.rs @@ -495,7 +495,7 @@ pub fn ulp_pkt< let view = Packet2::new(pkt.iter_mut()); let view = view.parse_outbound(GenericUlp {}).unwrap(); let mut view = view.to_full_meta(); - view.compute_checksums(); + // view.compute_checksums(); drop(view); // Note: we don't need to create and act on an EmitSpec here diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index 1079f86a..ed8413af 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -1,3 +1,5 @@ +use crate::engine::packet::mock_freemsg; + use super::checksum::Checksum as OpteCsum; use super::checksum::Checksum; use super::checksum::HeaderChecksum; @@ -895,7 +897,7 @@ impl Drop for MsgBlk { if #[cfg(all(not(feature = "std"), not(test)))] { unsafe { ddi::freemsg(self.inner.as_ptr()) }; } else { - // mock_freemsg(self.inner.as_ptr()); + mock_freemsg(self.inner.as_ptr()); } } } diff --git a/lib/opte/src/engine/packet.rs b/lib/opte/src/engine/packet.rs index de40a192..aee68f90 100644 --- a/lib/opte/src/engine/packet.rs +++ b/lib/opte/src/engine/packet.rs @@ -3264,7 +3264,7 @@ pub fn mock_desballoc(buf: Vec) -> *mut mblk_t { // The std equivalent to `freemsg(9F)`. #[cfg(any(feature = "std", test))] -fn mock_freemsg(mut mp: *mut mblk_t) { +pub(crate) fn mock_freemsg(mut mp: *mut mblk_t) { while !mp.is_null() { let cont = unsafe { (*mp).b_cont }; mock_freeb(mp); diff --git a/xde/rust-toolchain.toml b/xde/rust-toolchain.toml index e2d73ef6..5b5cdf89 100644 --- a/xde/rust-toolchain.toml +++ b/xde/rust-toolchain.toml @@ -1,5 +1,5 @@ [toolchain] -channel = "nightly-2024-09-12" +channel = "nightly-2024-10-12" target = "x86_64-unknown-illumos" components = [ "clippy", "rustfmt", "rust-src" ] profile = "minimal" From 05b2e5bb14c50255426dd9a00ca964ade497e298 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Mon, 14 Oct 2024 17:10:38 +0100 Subject: [PATCH 046/115] One or two checksum bugs still to unearth, then TCP state. --- bench/benches/userland.rs | 4 +- lib/opte-test-utils/src/icmp.rs | 8 +- lib/opte-test-utils/src/lib.rs | 12 +- lib/opte/src/engine/checksum.rs | 13 ++ lib/opte/src/engine/icmp/v4.rs | 44 ++-- lib/opte/src/engine/icmp/v6.rs | 123 +++++------ lib/opte/src/engine/ingot_base.rs | 186 +++++++++++++++-- lib/opte/src/engine/ingot_packet.rs | 252 ++++++++++++++--------- lib/opte/src/engine/mod.rs | 20 +- lib/opte/src/engine/port.rs | 2 +- lib/oxide-vpc/.gitignore | 1 + lib/oxide-vpc/src/engine/mod.rs | 8 +- lib/oxide-vpc/tests/integration_tests.rs | 60 ++++-- 13 files changed, 472 insertions(+), 261 deletions(-) diff --git a/bench/benches/userland.rs b/bench/benches/userland.rs index 8a0c81b3..e0e07db9 100644 --- a/bench/benches/userland.rs +++ b/bench/benches/userland.rs @@ -107,7 +107,7 @@ pub fn test_parse( } } }, - criterion::BatchSize::PerIteration, + criterion::BatchSize::LargeInput, ) }, ); @@ -192,7 +192,7 @@ pub fn test_handle( } } }, - criterion::BatchSize::PerIteration, + criterion::BatchSize::LargeInput, ) }, ); diff --git a/lib/opte-test-utils/src/icmp.rs b/lib/opte-test-utils/src/icmp.rs index 19870fdf..e97c0531 100644 --- a/lib/opte-test-utils/src/icmp.rs +++ b/lib/opte-test-utils/src/icmp.rs @@ -11,6 +11,7 @@ use opte::engine::ether::*; use opte::engine::ingot_base::Ethernet; use opte::engine::ingot_base::Ipv4; use opte::engine::ingot_base::Ipv6; +use opte::engine::ingot_base::L3; use opte::engine::ingot_packet::MsgBlk; use opte::engine::ip4::*; use opte::engine::ip6::*; @@ -141,14 +142,15 @@ pub fn gen_icmp_echo( ethertype: Ethertype::IPV4, }; - let mut ip = Ipv4 { + let mut ip: L3<&mut [u8]> = Ipv4 { source: ip_src, destination: ip_dst, protocol: IngotIpProto::ICMP, total_len: (icmp.buffer_len() + Ipv4::MINIMUM_LENGTH) as u16, ..Default::default() - }; - ip.fill_checksum(); + } + .into(); + ip.compute_checksum(); let total_len = eth.packet_length() + ip.packet_length() + icmp_bytes.len(); let mut segments = vec![]; diff --git a/lib/opte-test-utils/src/lib.rs b/lib/opte-test-utils/src/lib.rs index 20374de4..27d2c99d 100644 --- a/lib/opte-test-utils/src/lib.rs +++ b/lib/opte-test-utils/src/lib.rs @@ -114,7 +114,11 @@ pub use std::sync::Arc; #[macro_export] macro_rules! expect_modified { ($res:ident, $pkt:ident) => { - assert!(matches!($res, Ok(Modified(_)))); + assert!( + matches!($res, Ok(Modified(_))), + "expected Modified, got {:?}", + $res + ); #[allow(unused_assignments)] if let Ok(Modified(spec)) = $res { $pkt = spec.apply($pkt); @@ -495,7 +499,7 @@ pub fn ulp_pkt< let view = Packet2::new(pkt.iter_mut()); let view = view.parse_outbound(GenericUlp {}).unwrap(); let mut view = view.to_full_meta(); - // view.compute_checksums(); + view.compute_checksums(); drop(view); // Note: we don't need to create and act on an EmitSpec here @@ -607,7 +611,7 @@ pub fn http_syn3( }), ), (IpAddr::Ip6(source), IpAddr::Ip6(destination)) => ( - Ethertype::IPV4, + Ethertype::IPV6, L3Repr::Ipv6(Ipv6 { payload_len: (tcp.packet_length() + body.len()) as u16, next_header: IngotIpProto::TCP, @@ -670,7 +674,7 @@ pub fn http_syn_ack2( }), ), (IpAddr::Ip6(source), IpAddr::Ip6(destination)) => ( - Ethertype::IPV4, + Ethertype::IPV6, L3Repr::Ipv6(Ipv6 { payload_len: (tcp.packet_length() + body.len()) as u16, next_header: IngotIpProto::TCP, diff --git a/lib/opte/src/engine/checksum.rs b/lib/opte/src/engine/checksum.rs index 59154e54..45a1ff8f 100644 --- a/lib/opte/src/engine/checksum.rs +++ b/lib/opte/src/engine/checksum.rs @@ -121,6 +121,11 @@ pub struct Checksum { } impl Checksum { + /// Creates a new checksum counter. + pub fn new() -> Self { + Self::from(0) + } + /// Update the sum based by adding the contents of `bytes`. /// /// This is useful for incrementally updating an existing checksum @@ -152,6 +157,14 @@ impl Checksum { (self.inner & 0xFFFF) as u16 } + + /// Calls [`Self::finalize`], and returns the one's complement value + /// of the checksum for storage as a `u16be`. + pub fn finalize_for_ingot(&mut self) -> u16 { + let out = self.finalize(); + + (!out).to_be() + } } impl From for Checksum { diff --git a/lib/opte/src/engine/icmp/v4.rs b/lib/opte/src/engine/icmp/v4.rs index a3e7b641..b38fb1f8 100644 --- a/lib/opte/src/engine/icmp/v4.rs +++ b/lib/opte/src/engine/icmp/v4.rs @@ -7,12 +7,18 @@ //! ICMPv4 headers and processing. use super::*; +use crate::engine::ingot_base::Ethernet; +use crate::engine::ingot_base::Ipv4; +use crate::engine::ingot_base::L3; use crate::engine::ingot_packet::MsgBlk; use crate::engine::ingot_packet::PacketHeaders2; use crate::engine::ip4::Ipv4Hdr; use crate::engine::ip4::Ipv4Meta; use crate::engine::predicate::Ipv4AddrMatch; +use ingot::ethernet::Ethertype; +use ingot::ip::IpProtocol; use ingot::types::Emit; +use ingot::types::HeaderLen; pub use opte_api::ip::IcmpEchoReply; use smoltcp::wire; use smoltcp::wire::Icmpv4Message; @@ -74,8 +80,7 @@ impl HairpinAction for IcmpEchoReply { ))); }; - // `Icmpv4Packet` requires the ICMPv4 header and not just the message payload. - // Given we successfully got the ICMPv4 metadata, rewinding here is fine. + // TODO: prealloc right size. let mut body = icmp.emit_vec(); meta.append_remaining(&mut body); @@ -113,31 +118,24 @@ impl HairpinAction for IcmpEchoReply { csum.icmpv4 = Checksum::Tx; reply.emit(&mut icmp_reply, &csum); - let mut ip4 = Ipv4Meta { - src: self.echo_dst_ip, - dst: self.echo_src_ip, - proto: Protocol::ICMP, - total_len: (Ipv4Hdr::BASE_SIZE + reply_len) as u16, + let mut ip4: L3<&mut [u8]> = Ipv4 { + source: self.echo_dst_ip, + destination: self.echo_src_ip, + protocol: IpProtocol::ICMP, + total_len: (Ipv4::MINIMUM_LENGTH + reply_len) as u16, ..Default::default() - }; - ip4.compute_hdr_csum(); + } + .into(); + + ip4.compute_checksum(); - let eth = EtherMeta { - dst: self.echo_src_mac, - src: self.echo_dst_mac, - ether_type: EtherType::Ipv4, + let eth = Ethernet { + destination: self.echo_src_mac, + source: self.echo_dst_mac, + ethertype: Ethertype::IPV4, }; - let total_len = EtherHdr::SIZE + Ipv4Hdr::BASE_SIZE + reply_len; - let mut pkt = Packet::alloc_and_expand(total_len); - let mut wtr = pkt.seg0_wtr(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip4.emit(wtr.slice_mut(ip4.hdr_len()).unwrap()); - wtr.write(&tmp).unwrap(); - Ok(AllowOrDeny::Allow( - unsafe { MsgBlk::wrap_mblk(pkt.unwrap_mblk()) } - .expect("known valid"), - )) + Ok(AllowOrDeny::Allow(MsgBlk::new_ethernet_pkt((ð, &ip4, &tmp)))) } } diff --git a/lib/opte/src/engine/icmp/v6.rs b/lib/opte/src/engine/icmp/v6.rs index eb0c9444..2bffa62c 100644 --- a/lib/opte/src/engine/icmp/v6.rs +++ b/lib/opte/src/engine/icmp/v6.rs @@ -7,6 +7,8 @@ //! ICMPv6 headers and processing. use super::*; +use crate::engine::ingot_base::Ethernet; +use crate::engine::ingot_base::Ipv6; use crate::engine::ingot_base::Ipv6Ref; use crate::engine::ingot_packet::MsgBlk; use crate::engine::ingot_packet::PacketHeaders2; @@ -14,6 +16,8 @@ use crate::engine::ip6::Ipv6Hdr; use crate::engine::ip6::Ipv6Meta; use crate::engine::predicate::Ipv6AddrMatch; use alloc::string::String; +use ingot::ethernet::Ethertype; +use ingot::ip::IpProtocol as IngotIpProto; use ingot::types::Emit; pub use opte_api::ip::Icmpv6EchoReply; pub use opte_api::ip::Ipv6Addr; @@ -179,6 +183,8 @@ impl HairpinAction for Icmpv6EchoReply { data: src_data, }; + // TODO: less Vec + let reply_len = reply.buffer_len(); let mut ulp_body = vec![0u8; reply_len]; let mut icmp_reply = Icmpv6Packet::new_unchecked(&mut ulp_body); @@ -186,33 +192,23 @@ impl HairpinAction for Icmpv6EchoReply { csum.icmpv6 = Checksum::Tx; reply.emit(&dst_ip, &src_ip, &mut icmp_reply, &csum); - let ip = Ipv6Meta { - src: self.dst_ip, - dst: self.src_ip, - proto: Protocol::ICMPv6, - next_hdr: IpProtocol::Icmpv6, - // There are no extension headers. The ULP is the only - // content. - pay_len: reply_len as u16, + let mut ip6 = Ipv6 { + source: self.dst_ip, + destination: self.src_ip, + next_header: IngotIpProto::ICMP_V6, + payload_len: reply_len as u16, ..Default::default() }; - let eth = EtherMeta { - ether_type: EtherType::Ipv6, - dst: self.src_mac, - src: self.dst_mac, + let eth = Ethernet { + destination: self.src_mac, + source: self.dst_mac, + ethertype: Ethertype::IPV6, }; - let total_len = EtherHdr::SIZE + Ipv6Hdr::BASE_SIZE + reply_len; - let mut pkt = Packet::alloc_and_expand(total_len); - let mut wtr = pkt.seg0_wtr(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip.emit(wtr.slice_mut(ip.hdr_len()).unwrap()); - wtr.write(&ulp_body).unwrap(); - Ok(AllowOrDeny::Allow( - unsafe { MsgBlk::wrap_mblk(pkt.unwrap_mblk()) } - .expect("known valid"), - )) + Ok(AllowOrDeny::Allow(MsgBlk::new_ethernet_pkt(( + ð, &ip6, &ulp_body, + )))) } } @@ -370,38 +366,26 @@ impl HairpinAction for RouterAdvertisement { &csum, ); - let ip = Ipv6Meta { - src: *self.ip(), - // Safety: We match on this being Some(_) above, so unwrap is safe. - dst: meta.inner_ip6().unwrap().source(), - proto: Protocol::ICMPv6, - next_hdr: IpProtocol::Icmpv6, + let mut ip6 = Ipv6 { + source: *self.ip(), + destination: meta.inner_ip6().unwrap().source(), + next_header: IngotIpProto::ICMP_V6, + payload_len: reply_len as u16, + // RFC 4861 6.1.2 requires that the hop limit be 255 in an RA. hop_limit: 255, - // There are no extension headers; the ULP is the only - // content. - pay_len: reply_len as u16, ..Default::default() }; - // The Ethernet frame should come from OPTE's virtual gateway MAC, and - // be destined for the client which sent us the packet. - let eth = EtherMeta { - ether_type: EtherType::Ipv6, - dst: self.src_mac, - src: self.mac, + let eth = Ethernet { + destination: self.src_mac, + source: self.mac, + ethertype: Ethertype::IPV6, }; - let total_len = EtherHdr::SIZE + Ipv6Hdr::BASE_SIZE + reply_len; - let mut pkt = Packet::alloc_and_expand(total_len); - let mut wtr = pkt.seg0_wtr(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip.emit(wtr.slice_mut(ip.hdr_len()).unwrap()); - wtr.write(&ulp_body).unwrap(); - Ok(AllowOrDeny::Allow( - unsafe { MsgBlk::wrap_mblk(pkt.unwrap_mblk()) } - .expect("known valid"), - )) + Ok(AllowOrDeny::Allow(MsgBlk::new_ethernet_pkt(( + ð, &ip6, &ulp_body, + )))) } } @@ -643,40 +627,29 @@ impl HairpinAction for NeighborAdvertisement { &csum, ); - let ip = Ipv6Meta { - src: *self.ip(), - dst: dst_ip, - proto: Protocol::ICMPv6, - next_hdr: IpProtocol::Icmpv6, + // While the frame must always be sent from the gateway, who the frame + // is addressed to depends on whether we should multicast the packet. + let dst_mac = dst_ip.multicast_mac().unwrap_or(self.src_mac); + + let mut ip6 = Ipv6 { + source: *self.ip(), + destination: dst_ip, + next_header: IngotIpProto::ICMP_V6, + payload_len: reply_len as u16, + // RFC 4861 7.1.2 requires that the hop limit be 255 in an NA. hop_limit: 255, - // There are no extension headers; the ULP is the only - // content. - pay_len: reply_len as u16, ..Default::default() }; - // While the frame must always be sent from the gateway, who the frame - // is addressed to depends on whether we should multicast the packet. - let dst_mac = dst_ip.multicast_mac().unwrap_or(self.src_mac); - - // The Ethernet frame should come from OPTE's virtual gateway MAC, and - // be destined for the client which sent us the packet. - let eth = EtherMeta { - ether_type: EtherType::Ipv6, - dst: dst_mac, - src: self.mac, + let eth = Ethernet { + destination: dst_mac, + source: self.mac, + ethertype: Ethertype::IPV6, }; - let len = EtherHdr::SIZE + Ipv6Hdr::BASE_SIZE + reply_len; - let mut pkt = Packet::alloc_and_expand(len); - let mut wtr = pkt.seg0_wtr(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip.emit(wtr.slice_mut(ip.hdr_len()).unwrap()); - wtr.write(&ulp_body).unwrap(); - Ok(AllowOrDeny::Allow( - unsafe { MsgBlk::wrap_mblk(pkt.unwrap_mblk()) } - .expect("known valid"), - )) + Ok(AllowOrDeny::Allow(MsgBlk::new_ethernet_pkt(( + ð, &ip6, &ulp_body, + )))) } } diff --git a/lib/opte/src/engine/ingot_base.rs b/lib/opte/src/engine/ingot_base.rs index e6c7abd3..c90d78b4 100644 --- a/lib/opte/src/engine/ingot_base.rs +++ b/lib/opte/src/engine/ingot_base.rs @@ -1,9 +1,12 @@ +use super::checksum::Checksum; use bitflags::bitflags; use ingot::choice; use ingot::ethernet::Ethertype; use ingot::icmp::IcmpV4; +use ingot::icmp::IcmpV4Mut; use ingot::icmp::IcmpV4Ref; use ingot::icmp::IcmpV6; +use ingot::icmp::IcmpV6Mut; use ingot::icmp::IcmpV6Ref; use ingot::icmp::ValidIcmpV4; use ingot::icmp::ValidIcmpV6; @@ -12,6 +15,7 @@ use ingot::ip::IpProtocol; use ingot::ip::Ipv4Flags; use ingot::ip::LowRentV6EhRepr; use ingot::tcp::Tcp; +use ingot::tcp::TcpMut; use ingot::tcp::TcpRef; use ingot::tcp::ValidTcp; use ingot::types::primitives::*; @@ -19,18 +23,21 @@ use ingot::types::util::Repeated; use ingot::types::ByteSlice; use ingot::types::Emit; use ingot::types::Header; +use ingot::types::HeaderLen; use ingot::types::NetworkRepr; +use ingot::types::NextLayer; use ingot::types::ParseError; use ingot::types::Vec; use ingot::udp::Udp; +use ingot::udp::UdpMut; use ingot::udp::UdpRef; use ingot::udp::ValidUdp; use ingot::Ingot; use opte_api::Ipv4Addr; use opte_api::Ipv6Addr; use opte_api::MacAddr; - -use super::checksum::Checksum; +use zerocopy::ByteSliceMut; +use zerocopy::IntoBytes; // Redefine Ethernet and v4/v6 because we have our own, internal, // address types already. @@ -41,6 +48,121 @@ pub enum L3 { Ipv6 = Ethertype::IPV6, } +impl L3 { + pub fn pseudo_header(&self) -> Checksum { + match self { + L3::Ipv4(v4) => { + let mut pseudo_hdr_bytes = [0u8; 12]; + pseudo_hdr_bytes[0..4].copy_from_slice(v4.source().as_ref()); + pseudo_hdr_bytes[4..8] + .copy_from_slice(v4.destination().as_ref()); + pseudo_hdr_bytes[9] = v4.protocol().0; + let ulp_len = v4.total_len() - 4 * (v4.ihl() as u16); + pseudo_hdr_bytes[10..].copy_from_slice(&ulp_len.to_be_bytes()); + + Checksum::compute(&pseudo_hdr_bytes) + } + L3::Ipv6(v6) => { + let mut pseudo_hdr_bytes = [0u8; 40]; + pseudo_hdr_bytes[0..16].copy_from_slice(&v6.source().as_ref()); + pseudo_hdr_bytes[16..32] + .copy_from_slice(&v6.destination().as_ref()); + pseudo_hdr_bytes[39] = v6.next_layer().unwrap_or_default().0; + let ulp_len = v6.payload_len() as u32; + pseudo_hdr_bytes[32..36] + .copy_from_slice(&ulp_len.to_be_bytes()); + Checksum::compute(&pseudo_hdr_bytes) + } + } + } +} + +impl ValidL3 { + pub fn pseudo_header(&self) -> Checksum { + match self { + ValidL3::Ipv4(v4) => { + let mut pseudo_hdr_bytes = [0u8; 12]; + pseudo_hdr_bytes[0..4].copy_from_slice(v4.source().as_ref()); + pseudo_hdr_bytes[4..8] + .copy_from_slice(v4.destination().as_ref()); + // pseudo_hdr_bytes[8] reserved + pseudo_hdr_bytes[9] = v4.protocol().0; + let ulp_len = v4.total_len() - 4 * (v4.ihl() as u16); + pseudo_hdr_bytes[10..].copy_from_slice(&ulp_len.to_be_bytes()); + + Checksum::compute(&pseudo_hdr_bytes) + } + ValidL3::Ipv6(v6) => { + let mut pseudo_hdr_bytes = [0u8; 40]; + pseudo_hdr_bytes[0..16].copy_from_slice(&v6.source().as_ref()); + pseudo_hdr_bytes[16..32] + .copy_from_slice(&v6.destination().as_ref()); + pseudo_hdr_bytes[39] = v6.next_layer().unwrap_or_default().0; + let ulp_len = v6.payload_len() as u32; + pseudo_hdr_bytes[32..36] + .copy_from_slice(&ulp_len.to_be_bytes()); + + Checksum::compute(&pseudo_hdr_bytes) + } + } + } +} + +impl L3 { + #[inline] + pub fn compute_checksum(&mut self) { + if let L3::Ipv4(ip) = self { + ip.set_checksum(0); + + let mut csum = Checksum::new(); + + match ip { + Header::Repr(ip) => { + let mut bytes = [0u8; 56]; + ip.emit_raw(&mut bytes[..]); + csum.add_bytes(&bytes[..]); + } + Header::Raw(ip) => { + csum.add_bytes(ip.0.as_bytes()); + + match &ip.1 { + Header::Repr(opts) => { + csum.add_bytes(&*opts); + } + Header::Raw(opts) => { + csum.add_bytes(&*opts); + } + } + } + } + + ip.set_checksum(csum.finalize_for_ingot()); + } + } +} + +impl ValidL3 { + #[inline] + pub fn compute_checksum(&mut self) { + if let ValidL3::Ipv4(ip) = self { + ip.set_checksum(0); + + let mut csum = Checksum::new(); + csum.add_bytes(ip.0.as_bytes()); + match &ip.1 { + Header::Repr(opts) => { + csum.add_bytes(&*opts); + } + Header::Raw(opts) => { + csum.add_bytes(&*opts); + } + } + + ip.set_checksum(csum.finalize_for_ingot()); + } + } +} + #[choice(on = IpProtocol)] pub enum L4 { Tcp = IpProtocol::TCP, @@ -67,6 +189,53 @@ impl ValidUlp { } } +impl ValidUlp { + pub fn compute_checksum( + &mut self, + mut body_csum: Checksum, + l3: &ValidL3, + ) { + match self { + // ICMP4 requires the body_csum *without* + // the pseudoheader added back in. + ValidUlp::IcmpV4(i4) => { + i4.set_checksum(0); + body_csum.add_bytes(i4.0.as_bytes()); + i4.set_checksum(body_csum.finalize_for_ingot()); + } + ValidUlp::IcmpV6(i6) => { + body_csum += l3.pseudo_header(); + + i6.set_checksum(0); + body_csum.add_bytes(i6.0.as_bytes()); + i6.set_checksum(body_csum.finalize_for_ingot()); + } + ValidUlp::Tcp(tcp) => { + body_csum += l3.pseudo_header(); + + tcp.set_checksum(0); + body_csum.add_bytes(tcp.0.as_bytes()); + match &tcp.1 { + Header::Repr(opts) => { + body_csum.add_bytes(&*opts); + } + Header::Raw(opts) => { + body_csum.add_bytes(&*opts); + } + } + tcp.set_checksum(body_csum.finalize_for_ingot()); + } + ValidUlp::Udp(udp) => { + body_csum += l3.pseudo_header(); + + udp.set_checksum(0); + body_csum.add_bytes(udp.0.as_bytes()); + udp.set_checksum(body_csum.finalize_for_ingot()); + } + } + } +} + impl Ulp { pub fn src_port(&self) -> Option { match self { @@ -131,19 +300,6 @@ pub struct Ipv4 { pub options: Vec, } -impl Ipv4 { - pub fn fill_checksum(&mut self) { - let mut csum = Checksum::default(); - self.checksum = 0; - - let mut bytes = [0u8; 56]; - self.emit_raw(&mut bytes[..]); - csum.add_bytes(&bytes[..]); - - self.checksum = csum.finalize(); - } -} - #[derive(Debug, Clone, Ingot, Eq, PartialEq)] #[ingot(impl_default)] pub struct Ipv6 { diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index ed8413af..af5e0942 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -1,5 +1,3 @@ -use crate::engine::packet::mock_freemsg; - use super::checksum::Checksum as OpteCsum; use super::checksum::Checksum; use super::checksum::HeaderChecksum; @@ -59,6 +57,8 @@ use super::rule::HdrTransform; use super::rule::HdrTransformError; use super::LightweightMeta; use super::NetworkParser; +#[cfg(any(feature = "std", test))] +use crate::engine::packet::mock_freemsg; use alloc::boxed::Box; use alloc::sync::Arc; use alloc::vec::Vec; @@ -71,6 +71,7 @@ use core::mem::MaybeUninit; use core::num::NonZeroU32; use core::ops::Deref; use core::ops::DerefMut; +use core::ptr; use core::ptr::NonNull; use core::slice; use core::sync::atomic::AtomicPtr; @@ -173,7 +174,7 @@ impl From> for OpteMeta { } } -impl LightweightMeta for ValidNoEncap { +impl LightweightMeta for ValidNoEncap { #[inline] fn flow(&self) -> InnerFlowId { let (proto, addrs) = match &self.inner_l3 { @@ -302,7 +303,7 @@ impl LightweightMeta for ValidNoEncap { let pseudo_csum = match self.inner_eth.ethertype() { Ethertype::IPV4 | Ethertype::IPV6 => { - self.inner_l3.as_ref().map(l3_pseudo_header_v) + self.inner_l3.as_ref().map(|v| v.pseudo_header()) } // Includes ARP. _ => return None, @@ -326,8 +327,13 @@ impl LightweightMeta for ValidNoEncap { } #[inline] - fn update_ulp_checksums(&mut self, body_csum: OpteCsum) { - todo!() + fn update_inner_checksums(&mut self, body_csum: OpteCsum) { + if let Some(l3) = self.inner_l3.as_mut() { + if let Some(ulp) = self.inner_ulp.as_mut() { + ulp.compute_checksum(body_csum, l3); + } + l3.compute_checksum(); + } } } @@ -338,7 +344,7 @@ impl From> for OpteMeta { } } -impl LightweightMeta for ValidGeneveOverV6 { +impl LightweightMeta for ValidGeneveOverV6 { #[inline] fn flow(&self) -> InnerFlowId { let (proto, addrs) = match &self.inner_l3 { @@ -454,7 +460,7 @@ impl LightweightMeta for ValidGeneveOverV6 { let pseudo_csum = match self.inner_eth.ethertype() { Ethertype::IPV4 | Ethertype::IPV6 => { - Some(l3_pseudo_header_v(&self.inner_l3)) + Some(self.inner_l3.pseudo_header()) } // Includes ARP. _ => return None, @@ -481,8 +487,9 @@ impl LightweightMeta for ValidGeneveOverV6 { } #[inline] - fn update_ulp_checksums(&mut self, body_csum: OpteCsum) { - todo!() + fn update_inner_checksums(&mut self, body_csum: OpteCsum) { + self.inner_ulp.compute_checksum(body_csum, &self.inner_l3); + self.inner_l3.compute_checksum(); } } @@ -816,6 +823,30 @@ impl MsgBlk { out } + + /// Drops all empty mblks from the start of this chain where possible + /// (i.e., any empty mblk is followed by another mblk). + pub fn drop_empty_segments(&mut self) { + let mut head = self.inner; + let mut neighbour = unsafe { (*head.as_ptr()).b_cont }; + + while !neighbour.is_null() + && unsafe { (*head.as_ptr()).b_rptr == (*head.as_ptr()).b_wptr } + { + // Replace head with neighbour. + // Disconnect head from neighbour, and drop head. + unsafe { + (*head.as_ptr()).b_cont = ptr::null_mut(); + drop(MsgBlk::wrap_mblk(head.as_ptr())); + + // SAFETY: we know neighbour is non_null. + head = NonNull::new_unchecked(neighbour); + neighbour = (*head.as_ptr()).b_cont + } + } + + self.inner = head; + } } #[derive(Debug)] @@ -1617,7 +1648,7 @@ impl Packet2> { impl<'a, T: Read + 'a> Packet2> where - T::Chunk: ingot::types::IntoBufPointer<'a>, + T::Chunk: ingot::types::IntoBufPointer<'a> + ByteSliceMut, { // #[inline] // pub fn parse( @@ -1936,17 +1967,26 @@ impl Packet2> { || l3.needs_emit() || l3.packet_length() != init_lens.outer_l3 => { - // push_spec.outer_ip = Some(match l3 { - // InlineHeader::Repr(o) => o, - // // Needed in fullness of time, but not here. - // InlineHeader::Raw(_) => todo!(), - // }); + let encap_len = push_spec.outer_encap.packet_length(); + push_spec.outer_ip = Some(match l3 { L3::Ipv6(BoxedHeader::Repr(o)) => L3Repr::Ipv6(*o), L3::Ipv4(BoxedHeader::Repr(o)) => L3Repr::Ipv4(*o), _ => todo!(), }); + let inner_sz = (encapped_len + encap_len) as u16; + + match &mut push_spec.outer_ip { + Some(L3Repr::Ipv4(v4)) => { + v4.total_len = (v4.ihl as u16) * 4 + inner_sz; + } + Some(L3Repr::Ipv6(v6)) => { + v6.payload_len = inner_sz; + } + _ => {} + } + force_serialize = true; rewind += init_lens.outer_l3; } @@ -2073,8 +2113,86 @@ impl Packet2> { /// Compute ULP and IP header checksum from scratch. /// /// This should really only be used for testing. - pub fn compute_checksums(&mut self) { - todo!() + pub fn compute_checksums(&mut self) + where + T::Chunk: ByteSliceMut, + { + let mut body_csum = Checksum::new(); + for seg in self.body_segs_mut().unwrap_or_default() { + body_csum.add_bytes(seg); + } + self.state.body_csum = Some(body_csum); + + if let Some(ulp) = &mut self.state.meta.headers.inner_ulp { + let mut csum = body_csum; + + // Unwrap: Can't have a ULP without an IP. + let ip = self.state.meta.headers.inner_l3.as_ref().unwrap(); + // Add pseudo header checksum. + let pseudo_csum = ip.pseudo_header(); + csum += pseudo_csum; + // Determine ULP slice and add its bytes to the + // checksum. + match ulp { + // ICMP4 requires the body_csum *without* + // the pseudoheader added back in. + Ulp::IcmpV4(i4) => { + let mut bytes = [0u8; 8]; + i4.set_checksum(0); + i4.emit_raw(&mut bytes[..]); + body_csum.add_bytes(&bytes[..]); + i4.set_checksum(body_csum.finalize_for_ingot()); + } + Ulp::IcmpV6(i6) => { + let mut bytes = [0u8; 8]; + i6.set_checksum(0); + i6.emit_raw(&mut bytes[..]); + csum.add_bytes(&bytes[..]); + i6.set_checksum(csum.finalize_for_ingot()); + } + Ulp::Tcp(tcp) => { + tcp.set_checksum(0); + match tcp { + IngotHeader::Repr(tcp) => { + let mut bytes = [0u8; 56]; + tcp.emit_raw(&mut bytes[..]); + csum.add_bytes(&bytes[..]); + } + IngotHeader::Raw(tcp) => { + csum.add_bytes(tcp.0.as_bytes()); + match &tcp.1 { + IngotHeader::Repr(opts) => { + csum.add_bytes(&*opts); + } + IngotHeader::Raw(opts) => { + csum.add_bytes(&*opts); + } + } + } + } + tcp.set_checksum(csum.finalize_for_ingot()); + } + Ulp::Udp(udp) => { + udp.set_checksum(0); + match udp { + IngotHeader::Repr(udp) => { + let mut bytes = [0u8; 8]; + udp.emit_raw(&mut bytes[..]); + csum.add_bytes(&bytes[..]); + } + IngotHeader::Raw(udp) => { + csum.add_bytes(udp.0.as_bytes()); + } + } + udp.set_checksum(csum.finalize_for_ingot()); + } + } + } + + // Compute and fill in the IPv4 header checksum. + if let Some(l3) = self.state.meta.headers.inner_l3.as_mut() { + l3.compute_checksum(); + } } pub fn body_csum(&mut self) -> Option { @@ -2158,6 +2276,7 @@ impl Packet2> { // Start by reusing the known checksum of the body. let mut body_csum = self.body_csum().unwrap_or_default(); + eprintln!("{body_csum:?}"); // If a ULP exists, then compute and set its checksum. if let (true, Some(ulp)) = @@ -2167,7 +2286,7 @@ impl Packet2> { // Unwrap: Can't have a ULP without an IP. let ip = self.state.meta.headers.inner_l3.as_ref().unwrap(); // Add pseudo header checksum. - let pseudo_csum = l3_pseudo_header(ip); + let pseudo_csum = ip.pseudo_header(); csum += pseudo_csum; // Determine ULP slice and add its bytes to the // checksum. @@ -2179,14 +2298,14 @@ impl Packet2> { i4.set_checksum(0); i4.emit_raw(&mut bytes[..]); body_csum.add_bytes(&bytes[..]); - i4.set_checksum(body_csum.finalize()); + i4.set_checksum(body_csum.finalize_for_ingot()); } Ulp::IcmpV6(i6) => { let mut bytes = [0u8; 8]; i6.set_checksum(0); i6.emit_raw(&mut bytes[..]); csum.add_bytes(&bytes[..]); - i6.set_checksum(csum.finalize()); + i6.set_checksum(csum.finalize_for_ingot()); } Ulp::Tcp(tcp) => { tcp.set_checksum(0); @@ -2208,7 +2327,7 @@ impl Packet2> { } } } - tcp.set_checksum(csum.finalize()); + tcp.set_checksum(csum.finalize_for_ingot()); } Ulp::Udp(udp) => { udp.set_checksum(0); @@ -2222,91 +2341,16 @@ impl Packet2> { csum.add_bytes(udp.0.as_bytes()); } } - udp.set_checksum(csum.finalize()); + udp.set_checksum(csum.finalize_for_ingot()); } } } // Compute and fill in the IPv4 header checksum. - if let (true, Some(L3::Ipv4(ip))) = + if let (true, Some(l3)) = (update_ip, &mut self.state.meta.headers.inner_l3) { - ip.set_checksum(0); - - let mut csum = Checksum::default(); - - match ip { - IngotHeader::Repr(ip) => { - let mut bytes = [0u8; 56]; - ip.emit_raw(&mut bytes[..]); - csum.add_bytes(&bytes[..]); - } - IngotHeader::Raw(ip) => { - csum.add_bytes(ip.0.as_bytes()); - match &ip.1 { - IngotHeader::Repr(opts) => { - csum.add_bytes(&*opts); - } - IngotHeader::Raw(opts) => { - csum.add_bytes(&*opts); - } - } - } - } - - ip.set_checksum(csum.finalize()); - } - } -} - -fn l3_pseudo_header(l3: &L3) -> Checksum { - match l3 { - L3::Ipv4(v4) => { - let mut pseudo_hdr_bytes = [0u8; 12]; - pseudo_hdr_bytes[0..4].copy_from_slice(v4.source().as_ref()); - pseudo_hdr_bytes[4..8].copy_from_slice(v4.destination().as_ref()); - pseudo_hdr_bytes[9] = v4.protocol().0; - let ulp_len = v4.total_len() - 4 * (v4.ihl() as u16); - pseudo_hdr_bytes[10..].copy_from_slice(&ulp_len.to_be_bytes()); - - Checksum::compute(&pseudo_hdr_bytes) - } - L3::Ipv6(v6) => { - let mut pseudo_hdr_bytes = [0u8; 40]; - pseudo_hdr_bytes[0..16].copy_from_slice(&v6.source().as_ref()); - pseudo_hdr_bytes[16..32] - .copy_from_slice(&v6.destination().as_ref()); - pseudo_hdr_bytes[39] = v6.next_layer().unwrap_or_default().0; - let ulp_len = v6.payload_len() as u32; - pseudo_hdr_bytes[32..36].copy_from_slice(&ulp_len.to_be_bytes()); - Checksum::compute(&pseudo_hdr_bytes) - } - } -} - -fn l3_pseudo_header_v(l3: &ValidL3) -> Checksum { - match l3 { - ValidL3::Ipv4(v4) => { - let mut pseudo_hdr_bytes = [0u8; 12]; - pseudo_hdr_bytes[0..4].copy_from_slice(v4.source().as_ref()); - pseudo_hdr_bytes[4..8].copy_from_slice(v4.destination().as_ref()); - // pseudo_hdr_bytes[8] reserved - pseudo_hdr_bytes[9] = v4.protocol().0; - let ulp_len = v4.total_len() - 4 * (v4.ihl() as u16); - pseudo_hdr_bytes[10..].copy_from_slice(&ulp_len.to_be_bytes()); - - Checksum::compute(&pseudo_hdr_bytes) - } - ValidL3::Ipv6(v6) => { - let mut pseudo_hdr_bytes = [0u8; 40]; - pseudo_hdr_bytes[0..16].copy_from_slice(&v6.source().as_ref()); - pseudo_hdr_bytes[16..32] - .copy_from_slice(&v6.destination().as_ref()); - pseudo_hdr_bytes[39] = v6.next_layer().unwrap_or_default().0; - let ulp_len = v6.payload_len() as u32; - pseudo_hdr_bytes[32..36].copy_from_slice(&ulp_len.to_be_bytes()); - - Checksum::compute(&pseudo_hdr_bytes) + l3.compute_checksum(); } } } @@ -2515,7 +2559,7 @@ impl EmittestSpec { // TODO: put available layers into said slots? } - match &self.spec { + let mut out = match &self.spec { EmitterSpec::Fastpath(push_spec) => { push_spec.encap.prepend(pkt, self.ulp_len as usize) } @@ -2596,7 +2640,11 @@ impl EmittestSpec { pkt } } - } + }; + + out.drop_empty_segments(); + + out } #[inline] diff --git a/lib/opte/src/engine/mod.rs b/lib/opte/src/engine/mod.rs index db2e3161..1cb13a83 100644 --- a/lib/opte/src/engine/mod.rs +++ b/lib/opte/src/engine/mod.rs @@ -301,8 +301,8 @@ pub trait NetworkImpl { /// This provides parsing for inbound/outbound packets for a given /// [`NetworkImpl`]. pub trait NetworkParser { - type InMeta: LightweightMeta; - type OutMeta: LightweightMeta; + type InMeta: LightweightMeta; + type OutMeta: LightweightMeta; /// Parse an outbound packet. /// @@ -313,7 +313,7 @@ pub trait NetworkParser { rdr: T, ) -> Result>, ParseError> where - T::Chunk: ingot::types::IntoBufPointer<'a>; + T::Chunk: ingot::types::IntoBufPointer<'a> + ByteSliceMut; /// Parse an inbound packet. /// @@ -324,7 +324,7 @@ pub trait NetworkParser { rdr: T, ) -> Result>, ParseError> where - T::Chunk: ingot::types::IntoBufPointer<'a>; + T::Chunk: ingot::types::IntoBufPointer<'a> + ByteSliceMut; } /// Header formats which allow a flow ID to be read out, and which can be converted @@ -347,8 +347,8 @@ pub trait LightweightMeta: Into> { /// Returns the number of bytes occupied by the packet's outer encapsulation. fn encap_len(&self) -> u16; - /// Recalculate checksums within ULP headers, derived from a pre-computed `body_csum`. - fn update_ulp_checksums(&mut self, body_csum: Checksum); + /// Recalculate checksums within inner headers, derived from a pre-computed `body_csum`. + fn update_inner_checksums(&mut self, body_csum: Checksum); } /// A generic ULP parser, useful for testing inside of the opte crate @@ -371,15 +371,15 @@ impl GenericUlp { } impl NetworkParser for GenericUlp { - type InMeta = ValidNoEncap; - type OutMeta = ValidNoEncap; + type InMeta = ValidNoEncap; + type OutMeta = ValidNoEncap; fn parse_inbound<'a, T: Read + 'a>( &self, rdr: T, ) -> Result>, ParseError> where - T::Chunk: ingot::types::IntoBufPointer<'a>, + T::Chunk: ingot::types::IntoBufPointer<'a> + ByteSliceMut, { Ok(ValidNoEncap::parse_read(rdr)?) } @@ -389,7 +389,7 @@ impl NetworkParser for GenericUlp { rdr: T, ) -> Result>, ParseError> where - T::Chunk: ingot::types::IntoBufPointer<'a>, + T::Chunk: ingot::types::IntoBufPointer<'a> + ByteSliceMut, { Ok(ValidNoEncap::parse_read(rdr)?) } diff --git a/lib/opte/src/engine/port.rs b/lib/opte/src/engine/port.rs index a14a4dac..5034a690 100644 --- a/lib/opte/src/engine/port.rs +++ b/lib/opte/src/engine/port.rs @@ -1328,7 +1328,7 @@ impl Port { }; meta.run_compiled_transform(&tx); if let Some(csum) = body_csum { - meta.update_ulp_checksums(csum); + meta.update_inner_checksums(csum); } let encap_len = meta.encap_len(); let ulp_len = (len - (encap_len as usize)) as u32; diff --git a/lib/oxide-vpc/.gitignore b/lib/oxide-vpc/.gitignore index 5d43dab0..e2ef55f9 100644 --- a/lib/oxide-vpc/.gitignore +++ b/lib/oxide-vpc/.gitignore @@ -6,3 +6,4 @@ overlay_guest_to_guest-phys-1.pcap overlay_guest_to_guest-phys-2.pcap dhcpv6_solicit_reply.pcap guest_to_internet_ipv[46].pcap +snat-v[46]-echo-id.pcap diff --git a/lib/oxide-vpc/src/engine/mod.rs b/lib/oxide-vpc/src/engine/mod.rs index b1a4d2aa..89821a6b 100644 --- a/lib/oxide-vpc/src/engine/mod.rs +++ b/lib/oxide-vpc/src/engine/mod.rs @@ -137,8 +137,8 @@ impl NetworkImpl for VpcNetwork { } impl NetworkParser for VpcParser { - type InMeta = ValidGeneveOverV6; - type OutMeta = ValidNoEncap; + type InMeta = ValidGeneveOverV6; + type OutMeta = ValidNoEncap; #[inline] fn parse_outbound<'a, T: Read + 'a>( @@ -146,7 +146,7 @@ impl NetworkParser for VpcParser { rdr: T, ) -> Result>, ParseError> where - T::Chunk: opte::ingot::types::IntoBufPointer<'a>, + T::Chunk: opte::ingot::types::IntoBufPointer<'a> + ByteSliceMut, { Ok(ValidNoEncap::parse_read(rdr)?) } @@ -157,7 +157,7 @@ impl NetworkParser for VpcParser { rdr: T, ) -> Result>, ParseError> where - T::Chunk: opte::ingot::types::IntoBufPointer<'a>, + T::Chunk: opte::ingot::types::IntoBufPointer<'a> + ByteSliceMut, { Ok(ValidGeneveOverV6::parse_read(rdr)?) } diff --git a/lib/oxide-vpc/tests/integration_tests.rs b/lib/oxide-vpc/tests/integration_tests.rs index 6e110ba5..d26f2dd3 100644 --- a/lib/oxide-vpc/tests/integration_tests.rs +++ b/lib/oxide-vpc/tests/integration_tests.rs @@ -440,10 +440,11 @@ fn gateway_icmp4_ping() { assert_eq!(ip4.protocol(), IngotIpProto::ICMP); } - L3::Ipv6(v6) => panic!("expected inner IPv4 metadata, got IPv6"), + L3::Ipv6(_) => panic!("expected inner IPv4 metadata, got IPv6"), } - let reply_body = reply.meta().copy_remaining(); + let mut reply_body = meta.inner_ulp().expect("ICMPv4 is a ULP").emit_vec(); + reply.meta().append_remaining(&mut reply_body); let reply_pkt = Icmpv4Packet::new_checked(&reply_body).unwrap(); let mut csum = CsumCapab::ignored(); csum.ipv4 = smoltcp::phy::Checksum::Rx; @@ -640,7 +641,7 @@ fn guest_to_guest() { // assert_eq!(pkt2.body_offset(), TCP4_SZ + HTTP_SYN_OPTS_LEN); // assert_eq!(pkt2.body_seg(), 0); - let pkt2 = parse_inbound(&mut pkt2_m, VpcParser {}).unwrap(); + let pkt2 = parse_outbound(&mut pkt2_m, VpcParser {}).unwrap(); let g2_meta = pkt2.meta(); // TODO: can we have a convenience method that verifies that the @@ -652,7 +653,7 @@ fn guest_to_guest() { assert_eq!(g2_eth.ethertype(), Ethertype::IPV4); match &g2_meta.inner_l3 { - ValidL3::Ipv4(ip4) => { + Some(ValidL3::Ipv4(ip4)) => { assert_eq!(ip4.source(), g1_cfg.ipv4_cfg().unwrap().private_ip); assert_eq!( ip4.destination(), @@ -664,7 +665,7 @@ fn guest_to_guest() { } match &g2_meta.inner_ulp { - ValidUlp::Tcp(tcp) => { + Some(ValidUlp::Tcp(tcp)) => { assert_eq!(tcp.source(), 44490); assert_eq!(tcp.destination(), 80); } @@ -730,6 +731,7 @@ fn guest_to_guest_diff_vpc_no_peer() { // Verify that a guest can communicate with the internet over IPv4. #[test] fn guest_to_internet_ipv4() { + let mut pcap_guest = PcapBuilder::new("guest_to_internet_ipv4.pcap"); let g1_cfg = g1_cfg(); let mut g1 = oxide_net_setup("g1_port", &g1_cfg, None, None); g1.port.start(); @@ -755,6 +757,7 @@ fn guest_to_internet_ipv4() { GW_MAC_ADDR, dst_ip, ); + pcap_guest.add_pkt(&pkt1_m); let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); // ================================================================ @@ -832,13 +835,13 @@ fn guest_to_internet_ipv4() { _ => panic!("expected inner TCP metadata, got (other)"), } - let mut pcap_guest = PcapBuilder::new("guest_to_internet_ipv4.pcap"); pcap_guest.add_pkt(&pkt1_m); } // Verify that a guest can communicate with the internet over IPv6. #[test] fn guest_to_internet_ipv6() { + let mut pcap_guest = PcapBuilder::new("guest_to_internet_ipv6.pcap"); let g1_cfg = g1_cfg(); let mut g1 = oxide_net_setup("g1_port", &g1_cfg, None, None); g1.port.start(); @@ -864,6 +867,7 @@ fn guest_to_internet_ipv6() { GW_MAC_ADDR, dst_ip, ); + pcap_guest.add_pkt(&pkt1_m); // ================================================================ // Run the packet through g1's port in the outbound direction and @@ -896,11 +900,11 @@ fn guest_to_internet_ipv6() { pkt1.len() - (&meta.outer_eth, &meta.outer_v6).packet_length(); assert_eq!(meta.outer_v6.payload_len() as usize, len_post_v6); - assert_eq!(meta.outer_udp.source(), 24329); + assert_eq!(meta.outer_udp.source(), 63246); assert_eq!(meta.outer_udp.length() as usize, len_post_v6); assert_eq!(meta.inner_eth.source(), g1_cfg.guest_mac); - assert_eq!(meta.inner_eth.ethertype(), Ethertype::IPV4); + assert_eq!(meta.inner_eth.ethertype(), Ethertype::IPV6); match &meta.inner_l3 { ValidL3::Ipv6(ip6) => { @@ -939,8 +943,6 @@ fn guest_to_internet_ipv6() { // ulp => panic!("expected inner TCP metadata, got: {:?}", ulp), _ => panic!("expected inner TCP metadata, got (other)"), } - - let mut pcap_guest = PcapBuilder::new("guest_to_internet_ipv6.pcap"); pcap_guest.add_pkt(&pkt1_m); } @@ -1524,9 +1526,10 @@ fn unpack_and_verify_icmp( seq_no: u16, body_seg: usize, ) { + // Note the reversed direction -- parse the expected *output* format. let parsed = match dir { - In => parse_inbound(pkt, VpcParser {}).unwrap().to_full_meta(), - Out => parse_outbound(pkt, VpcParser {}).unwrap().to_full_meta(), + In => parse_outbound(pkt, VpcParser {}).unwrap().to_full_meta(), + Out => parse_inbound(pkt, VpcParser {}).unwrap().to_full_meta(), }; let meta = parsed.meta(); @@ -1597,7 +1600,7 @@ fn unpack_and_verify_icmp4( // Because we treat ICMPv4 as a full-fledged ULP, we need to // unsplit the emitted header from the body. let mut icmp = pkt.meta().inner_ulp().unwrap().emit_vec(); - icmp.extend(pkt.meta().copy_remaining().into_iter()); + pkt.meta().append_remaining(&mut icmp); let icmp = Icmpv4Packet::new_checked(&icmp[..]).unwrap(); @@ -1621,7 +1624,7 @@ fn unpack_and_verify_icmp6( // Because we treat ICMPv4 as a full-fledged ULP, we need to // unsplit the emitted header from the body. let mut icmp = pkt.meta().inner_ulp().unwrap().emit_vec(); - icmp.extend(pkt.meta().copy_remaining().into_iter()); + pkt.meta().append_remaining(&mut icmp); let icmp = Icmpv6Packet::new_checked(&icmp[..]).unwrap(); assert!(icmp.verify_checksum(&src_ip, &dst_ip)); @@ -1644,6 +1647,11 @@ fn snat_icmp6_echo_rewrite() { } fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { + let mut pcap = match &dst_ip { + IpAddr::Ip4(_) => PcapBuilder::new("snat-v4-echo-id.pcap"), + IpAddr::Ip6(_) => PcapBuilder::new("snat-v6-echo-id.pcap"), + }; + let g1_cfg = g1_cfg(); let mut g1 = oxide_net_setup("g1_port", &g1_cfg, None, None); g1.port.start(); @@ -1705,10 +1713,12 @@ fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { &data[..], 2, ); + pcap.add_pkt(&pkt1_m); let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); let res = g1.port.process(Out, pkt1); expect_modified!(res, pkt1_m); + pcap.add_pkt(&pkt1_m); incr!( g1, [ @@ -1746,10 +1756,12 @@ fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { vni: Vni::new(BOUNDARY_SERVICES_VNI).unwrap(), }; pkt2_m = encap_external(pkt2_m, bsvc_phys, g1_phys); + pcap.add_pkt(&pkt2_m); let pkt2 = parse_inbound(&mut pkt2_m, VpcParser {}).unwrap(); let res = g1.port.process(In, pkt2); expect_modified!(res, pkt2_m); + pcap.add_pkt(&pkt2_m); incr!(g1, ["uft.in", "stats.port.in_modified, stats.port.in_uft_miss"]); unpack_and_verify_icmp(&mut pkt2_m, &g1_cfg, ¶ms, In, seq_no, 0); @@ -1770,11 +1782,13 @@ fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { &data[..], 1, ); + pcap.add_pkt(&pkt3_m); let pkt3 = parse_outbound(&mut pkt3_m, VpcParser {}).unwrap(); assert_eq!(g1.port.stats_snap().out_uft_hit, 0); let res = g1.port.process(Out, pkt3); expect_modified!(res, pkt3_m); + pcap.add_pkt(&pkt3_m); incr!(g1, ["stats.port.out_modified, stats.port.out_uft_hit"]); assert_eq!(g1.port.stats_snap().out_uft_hit, 1); @@ -1795,11 +1809,13 @@ fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { &data[..], 2, ); + pcap.add_pkt(&pkt4_m); let pkt4 = parse_inbound(&mut pkt4_m, VpcParser {}).unwrap(); assert_eq!(g1.port.stats_snap().in_uft_hit, 0); let res = g1.port.process(In, pkt4); expect_modified!(res, pkt4_m); + pcap.add_pkt(&pkt4_m); incr!(g1, ["stats.port.in_modified, stats.port.in_uft_hit"]); assert_eq!(g1.port.stats_snap().in_uft_hit, 1); @@ -1822,10 +1838,12 @@ fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { &data[..], 2, ); + pcap.add_pkt(&pkt5_m); let pkt5 = parse_outbound(&mut pkt5_m, VpcParser {}).unwrap(); let res = g1.port.process(Out, pkt5); expect_modified!(res, pkt5_m); + pcap.add_pkt(&pkt5_m); incr!( g1, [ @@ -2488,17 +2506,15 @@ fn test_gateway_neighbor_advert_reply() { let mut with_checksum = false; let data = generate_solicit_test_data(&g1_cfg); for d in data { - // TODO(kyle) - let with_checksum = true; - let mut pkt = generate_neighbor_solicitation(&d.ns, with_checksum); // Alternate between using smoltcp or our `compute_checksums` method // to compute the checksums. - // TODO(kyle) - // if !with_checksum { - // pkt.compute_checksums(); - // } - // with_checksum = !with_checksum; + if !with_checksum { + let mut parsed = + parse_outbound(&mut pkt, VpcParser {}).unwrap().to_full_meta(); + parsed.compute_checksums(); + } + with_checksum = !with_checksum; pcap.add_pkt(&pkt); let pkt1 = parse_outbound(&mut pkt, VpcParser {}).unwrap(); let res = g1.port.process(Out, pkt1); From 82bc8616d6b40a8d983796aade26c5e3e6e97be1 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Mon, 14 Oct 2024 20:27:07 +0100 Subject: [PATCH 047/115] One more. --- lib/opte-test-utils/src/icmp.rs | 2 +- lib/opte/src/engine/ingot_packet.rs | 57 ++---------------------- lib/oxide-vpc/tests/integration_tests.rs | 7 +++ 3 files changed, 12 insertions(+), 54 deletions(-) diff --git a/lib/opte-test-utils/src/icmp.rs b/lib/opte-test-utils/src/icmp.rs index e97c0531..6ffaea5f 100644 --- a/lib/opte-test-utils/src/icmp.rs +++ b/lib/opte-test-utils/src/icmp.rs @@ -180,7 +180,7 @@ pub fn gen_icmp_echo( segments.push(pkt); let mut pkt = MsgBlk::new(ip.packet_length()); - pkt.emit_back(eth).unwrap(); + pkt.emit_back(ip).unwrap(); segments.push(pkt); let mut pkt = MsgBlk::new(icmp.buffer_len()); diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index af5e0942..62243dd0 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -273,7 +273,7 @@ impl LightweightMeta for ValidNoEncap { } } (Some(ValidUlp::IcmpV4(pkt)), Some(tx)) - if pkt.ty() == 0 || pkt.ty() == 3 => + if pkt.ty() == 0 || pkt.ty() == 8 => { if let Some(new_id) = tx.icmp_id { pkt.rest_of_hdr_mut()[..2] @@ -435,7 +435,7 @@ impl LightweightMeta for ValidGeneveOverV6 { } } (ValidUlp::IcmpV4(pkt), Some(tx)) - if pkt.ty() == 0 || pkt.ty() == 3 => + if pkt.ty() == 0 || pkt.ty() == 8 => { if let Some(new_id) = tx.icmp_id { pkt.rest_of_hdr_mut()[..2] @@ -2197,54 +2197,6 @@ impl Packet2> { pub fn body_csum(&mut self) -> Option { self.state.body_csum - - // let out = *self.state.body_csum - // .get(|| { - // let use_pseudo = if let Some(v) = self.state.meta.inner_ulp() { - // !matches!(v, Ulp::IcmpV4(_)) - // } else { - // false - // }; - - // // XXX TODO: make these valid even AFTER all packet pushings occur. - // let pseudo_csum = - // match (&self.state.meta.headers).inner_eth.ethertype() { - // // ARP - // Ethertype::ARP => { - // return None; - // } - // Ethertype::IPV4 | Ethertype::IPV6 => self - // .state - // .meta - // .headers - // .inner_l3 - // .as_ref() - // .map(l3_pseudo_header), - // _ => unreachable!(), - // }; - - // let Some(pseudo_csum) = pseudo_csum else { - // return None; - // }; - - // self.state.meta.inner_ulp().and_then(csum_minus_hdr).map(|mut v| { - // if use_pseudo { - // v -= pseudo_csum; - // } - // v - // }) - // }); - - // // let mut manual = Checksum::default(); - // // if let Some(segs) = self.body_segs() { - // // for seg in segs { - // // manual.add_bytes(*seg); - // // } - - // // opte::engine::err!("think my csum is {:?}, reality is {:?}", out.map(|mut v| v.finalize()), manual.finalize()); - // // } - - // out } pub fn l4_hash(&mut self) -> u32 { @@ -2276,7 +2228,6 @@ impl Packet2> { // Start by reusing the known checksum of the body. let mut body_csum = self.body_csum().unwrap_or_default(); - eprintln!("{body_csum:?}"); // If a ULP exists, then compute and set its checksum. if let (true, Some(ulp)) = @@ -2419,7 +2370,7 @@ fn csum_minus_hdr(ulp: &ValidUlp) -> Option { icmp.checksum().to_be_bytes(), )); - csum.sub_bytes(&[icmp.code(), icmp.ty()]); + csum.sub_bytes(&[icmp.ty(), icmp.code()]); csum.sub_bytes(icmp.rest_of_hdr_ref()); Some(csum) @@ -2433,7 +2384,7 @@ fn csum_minus_hdr(ulp: &ValidUlp) -> Option { icmp.checksum().to_be_bytes(), )); - csum.sub_bytes(&[icmp.code(), icmp.ty()]); + csum.sub_bytes(&[icmp.ty(), icmp.code()]); csum.sub_bytes(icmp.rest_of_hdr_ref()); Some(csum) diff --git a/lib/oxide-vpc/tests/integration_tests.rs b/lib/oxide-vpc/tests/integration_tests.rs index d26f2dd3..c5983034 100644 --- a/lib/oxide-vpc/tests/integration_tests.rs +++ b/lib/oxide-vpc/tests/integration_tests.rs @@ -20,6 +20,7 @@ use opte::api::OpteError; use opte::ddi::time::Moment; use opte::engine::arp::ArpEthIpv4; use opte::engine::arp::ArpEthIpv4Raw; +use opte::engine::checksum::Checksum as OpteCsum; use opte::engine::dhcpv6; use opte::engine::ether::EtherHdr; use opte::engine::ether::EtherHdrRaw; @@ -77,6 +78,7 @@ use oxide_vpc::api::RouterClass; use oxide_vpc::api::VpcCfg; use oxide_vpc::engine::overlay::BOUNDARY_SERVICES_VNI; use pcap::*; +use smoltcp::phy::Checksum; use smoltcp::phy::ChecksumCapabilities as CsumCapab; use smoltcp::wire::Icmpv4Packet; use smoltcp::wire::Icmpv4Repr; @@ -163,6 +165,7 @@ fn port_transition_running() { // -> Running. // ================================================================ let mut pkt1_m = tcp_telnet_syn(&g1_cfg, &g2_cfg); + let pkt1 = parse_outbound(&mut pkt1_m, GenericUlp {}).unwrap(); let res = g1.port.process(Out, pkt1); assert!(matches!(res, Err(ProcessError::BadState(_)))); @@ -758,6 +761,7 @@ fn guest_to_internet_ipv4() { dst_ip, ); pcap_guest.add_pkt(&pkt1_m); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); // ================================================================ @@ -1714,6 +1718,7 @@ fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { 2, ); pcap.add_pkt(&pkt1_m); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); let res = g1.port.process(Out, pkt1); @@ -1757,6 +1762,7 @@ fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { }; pkt2_m = encap_external(pkt2_m, bsvc_phys, g1_phys); pcap.add_pkt(&pkt2_m); + let pkt2 = parse_inbound(&mut pkt2_m, VpcParser {}).unwrap(); let res = g1.port.process(In, pkt2); @@ -1809,6 +1815,7 @@ fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { &data[..], 2, ); + pkt4_m = encap_external(pkt4_m, bsvc_phys, g1_phys); pcap.add_pkt(&pkt4_m); let pkt4 = parse_inbound(&mut pkt4_m, VpcParser {}).unwrap(); From e752e3c4ec39e15bc64e8610fb3c2ef0b8d0dae0 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Tue, 15 Oct 2024 10:57:29 +0100 Subject: [PATCH 048/115] Iterating. --- lib/opte/src/engine/dhcpv6/protocol.rs | 10 +- lib/opte/src/engine/ingot_base.rs | 62 +- lib/opte/src/engine/layer.rs | 60 +- lib/opte/src/engine/nat.rs | 151 ++-- lib/opte/src/engine/packet.rs | 914 ++++++++++++----------- lib/opte/src/engine/rule.rs | 88 +-- lib/opte/src/engine/snat.rs | 148 ++-- lib/oxide-vpc/tests/integration_tests.rs | 78 +- 8 files changed, 775 insertions(+), 736 deletions(-) diff --git a/lib/opte/src/engine/dhcpv6/protocol.rs b/lib/opte/src/engine/dhcpv6/protocol.rs index 5b45f9c2..8df11c34 100644 --- a/lib/opte/src/engine/dhcpv6/protocol.rs +++ b/lib/opte/src/engine/dhcpv6/protocol.rs @@ -710,6 +710,8 @@ mod test { use super::OptionCode; use super::Packet; use crate::engine::dhcpv6::test_data; + use crate::engine::ingot_packet::MsgBlk; + use crate::engine::ingot_packet::Packet2; use crate::engine::port::meta::ActionMeta; use crate::engine::GenericUlp; use opte_api::Direction::*; @@ -741,9 +743,11 @@ mod test { #[test] fn test_predicates_match_snooped_solicit_message() { - let pkt = Packet::copy(test_data::TEST_SOLICIT_PACKET) - .parse(Out, GenericUlp {}) - .unwrap(); + let mut pkt = MsgBlk::copy(test_data::TEST_SOLICIT_PACKET); + let pkt = Packet2::new(pkt.iter_mut()) + .parse_outbound(GenericUlp {}) + .unwrap() + .to_full_meta(); let pmeta = pkt.meta(); let ameta = ActionMeta::new(); let client_mac = diff --git a/lib/opte/src/engine/ingot_base.rs b/lib/opte/src/engine/ingot_base.rs index c90d78b4..f152e246 100644 --- a/lib/opte/src/engine/ingot_base.rs +++ b/lib/opte/src/engine/ingot_base.rs @@ -108,35 +108,51 @@ impl ValidL3 { } } -impl L3 { +impl Ipv4 { #[inline] pub fn compute_checksum(&mut self) { - if let L3::Ipv4(ip) = self { - ip.set_checksum(0); + self.checksum = 0; - let mut csum = Checksum::new(); + let mut csum = Checksum::new(); - match ip { - Header::Repr(ip) => { - let mut bytes = [0u8; 56]; - ip.emit_raw(&mut bytes[..]); - csum.add_bytes(&bytes[..]); - } - Header::Raw(ip) => { - csum.add_bytes(ip.0.as_bytes()); - - match &ip.1 { - Header::Repr(opts) => { - csum.add_bytes(&*opts); - } - Header::Raw(opts) => { - csum.add_bytes(&*opts); - } - } - } + let mut bytes = [0u8; 56]; + self.emit_raw(&mut bytes[..]); + csum.add_bytes(&bytes[..]); + + self.checksum = csum.finalize_for_ingot(); + } +} + +impl ValidIpv4 { + #[inline] + pub fn compute_checksum(&mut self) { + self.set_checksum(0); + + let mut csum = Checksum::new(); + + csum.add_bytes(self.0.as_bytes()); + + match &self.1 { + Header::Repr(opts) => { + csum.add_bytes(&*opts); } + Header::Raw(opts) => { + csum.add_bytes(&*opts); + } + } - ip.set_checksum(csum.finalize_for_ingot()); + self.set_checksum(csum.finalize_for_ingot()); + } +} + +impl L3 { + #[inline] + pub fn compute_checksum(&mut self) { + if let L3::Ipv4(ip) = self { + match ip { + Header::Repr(ip) => ip.compute_checksum(), + Header::Raw(ip) => ip.compute_checksum(), + } } } } diff --git a/lib/opte/src/engine/layer.rs b/lib/opte/src/engine/layer.rs index 6997580b..c9199b3b 100644 --- a/lib/opte/src/engine/layer.rs +++ b/lib/opte/src/engine/layer.rs @@ -1828,6 +1828,14 @@ pub struct rule_no_match_sdt_arg { #[cfg(test)] mod test { + use ingot::ethernet::Ethernet; + use ingot::ethernet::Ethertype; + use ingot::tcp::Tcp; + use ingot::types::HeaderLen; + + use crate::engine::ingot_base::Ipv4; + use crate::engine::GenericUlp; + use super::*; #[test] @@ -1854,42 +1862,32 @@ mod test { rule_table.add(rule.finalize()); - let ip = IpMeta::from(Ipv4Meta { - src: "10.0.0.77".parse().unwrap(), - dst: "52.10.128.69".parse().unwrap(), - proto: Protocol::TCP, - ttl: 64, - ident: 1, - hdr_len: 20, - total_len: 40, - csum: [0; 2], - }); - let ulp = UlpMeta::from(TcpMeta { - src: 5555, - dst: 443, - flags: 0, - seq: 0, - ack: 0, - window_size: 64240, - options_bytes: None, - options_len: 0, - ..Default::default() - }); - - let pmeta = PacketMeta { - outer: Default::default(), - inner: InnerMeta { - ip: Some(ip), - ulp: Some(ulp), + let mut test_pkt = MsgBlk::new_ethernet_pkt(( + Ethernet { ethertype: Ethertype::IPV4, ..Default::default() }, + Ipv4 { + source: "10.0.0.77".parse().unwrap(), + destination: "52.10.128.69".parse().unwrap(), + protocol: ingot::ip::IpProtocol::TCP, + identification: 1, + total_len: (20 + Tcp::MINIMUM_LENGTH) as u16, ..Default::default() }, - }; + Tcp { + source: 5555, + destination: 443, + window_size: 64240, + ..Default::default() + }, + )); + + let pkt_view = Packet2::new(test_pkt.iter_mut()); + let pmeta = + pkt_view.parse_outbound(GenericUlp {}).unwrap().to_full_meta(); // The pkt/rdr aren't actually used in this case. - let pkt = Packet::copy(&[0xA]); let ameta = ActionMeta::new(); - let ifid = InnerFlowId::from(&pmeta); - assert!(rule_table.find_match(&ifid, &pmeta, &ameta).is_some()); + let ifid = *pmeta.flow(); + assert!(rule_table.find_match(&ifid, &pmeta.meta(), &ameta).is_some()); } } // TODO Reinstate diff --git a/lib/opte/src/engine/nat.rs b/lib/opte/src/engine/nat.rs index e97b7b8c..b32eb29f 100644 --- a/lib/opte/src/engine/nat.rs +++ b/lib/opte/src/engine/nat.rs @@ -219,7 +219,18 @@ impl ActionDesc for NatDesc { mod test { use super::*; use crate::engine::ether::EtherMeta; + use crate::engine::ingot_base::Ethernet; + use crate::engine::ingot_base::EthernetRef; + use crate::engine::ingot_base::Ipv4; + use crate::engine::ingot_base::Ipv4Ref; + use crate::engine::ingot_packet::MsgBlk; use crate::engine::GenericUlp; + use ingot::ethernet::Ethertype; + use ingot::ip::IpProtocol; + use ingot::tcp::Tcp; + use ingot::tcp::TcpFlags; + use ingot::tcp::TcpRef; + use ingot::types::HeaderLen; use opte_api::Direction::*; #[derive(Debug)] @@ -256,29 +267,33 @@ mod test { // ================================================================ // Build the packet metadata // ================================================================ - let body = vec![]; - let tcp = - TcpMeta { src: priv_port, dst: outside_port, ..Default::default() }; - let mut ip4 = Ipv4Meta { - src: priv_ip, - dst: outside_ip, - proto: Protocol::TCP, - total_len: (Ipv4Hdr::BASE_SIZE + tcp.hdr_len() + body.len()) as u16, + let body: Vec = vec![]; + let tcp = Tcp { + source: priv_port, + destination: outside_port, ..Default::default() }; - ip4.compute_hdr_csum(); - let eth = EtherMeta { - ether_type: EtherType::Ipv4, - src: priv_mac, - dst: dest_mac, + let mut ip4 = Ipv4 { + source: priv_ip, + destination: outside_ip, + protocol: IpProtocol::TCP, + total_len: (Ipv4::MINIMUM_LENGTH + (&tcp, &body).packet_length()) + as u16, + ..Default::default() + }; + ip4.compute_checksum(); + + let eth = Ethernet { + destination: dest_mac, + source: priv_mac, + ethertype: Ethertype::IPV4, }; - let mut pkt = Packet::alloc_and_expand(128); - let mut wtr = pkt.seg0_wtr(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip4.emit(wtr.slice_mut(ip4.hdr_len()).unwrap()); - tcp.emit(wtr.slice_mut(tcp.hdr_len()).unwrap()); - wtr.write(&body).unwrap(); - let mut pkt = pkt.parse(Out, GenericUlp {}).unwrap(); + + let mut pkt_m = MsgBlk::new_ethernet_pkt((ð, &ip4, &tcp, &body)); + let mut pkt = Packet2::new(pkt_m.iter_mut()) + .parse_outbound(GenericUlp {}) + .unwrap() + .to_full_meta(); // ================================================================ // Verify descriptor generation. @@ -296,79 +311,83 @@ mod test { let pmo = pkt.meta_mut(); out_ht.run(pmo).unwrap(); - let ether_meta = pmo.inner.ether; - assert_eq!(ether_meta.src, priv_mac); - assert_eq!(ether_meta.dst, dest_mac); + let ether_meta = pmo.inner_ether(); + assert_eq!(ether_meta.source(), priv_mac); + assert_eq!(ether_meta.destination(), dest_mac); - let ip4_meta = match pmo.inner.ip.as_ref().unwrap() { - IpMeta::Ip4(v) => v, + let ip4_meta = match pmo.inner_ip4() { + Some(v) => v, _ => panic!("expect Ipv4Meta"), }; - assert_eq!(ip4_meta.src, pub_ip); - assert_eq!(ip4_meta.dst, outside_ip); - assert_eq!(ip4_meta.proto, Protocol::TCP); + assert_eq!(ip4_meta.source(), pub_ip); + assert_eq!(ip4_meta.destination(), outside_ip); + assert_eq!(ip4_meta.protocol(), IpProtocol::TCP); - let tcp_meta = match pmo.inner.ulp.as_ref().unwrap() { - UlpMeta::Tcp(v) => v, + let tcp_meta = match pmo.inner_tcp() { + Some(v) => v, _ => panic!("expect TcpMeta"), }; - assert_eq!(tcp_meta.src, priv_port); - assert_eq!(tcp_meta.dst, outside_port); - assert_eq!(tcp_meta.flags, 0); + assert_eq!(tcp_meta.source(), priv_port); + assert_eq!(tcp_meta.destination(), outside_port); + assert_eq!(tcp_meta.flags(), TcpFlags::empty()); // ================================================================ // Verify inbound header transformation. // ================================================================ - let body = vec![]; - let tcp = - TcpMeta { src: outside_port, dst: priv_port, ..Default::default() }; - let mut ip4 = Ipv4Meta { - src: outside_ip, - dst: priv_ip, - proto: Protocol::TCP, - total_len: (Ipv4Hdr::BASE_SIZE + tcp.hdr_len() + body.len()) as u16, + let body: Vec = vec![]; + let tcp = Tcp { + source: outside_port, + destination: priv_port, ..Default::default() }; - ip4.compute_hdr_csum(); - let eth = EtherMeta { - dst: priv_mac, - src: dest_mac, - ether_type: EtherType::Ipv4, + let mut ip4 = Ipv4 { + source: outside_ip, + destination: pub_ip, + protocol: IpProtocol::TCP, + total_len: (Ipv4::MINIMUM_LENGTH + (&tcp, &body).packet_length()) + as u16, + ..Default::default() + }; + ip4.compute_checksum(); + + let eth = Ethernet { + destination: priv_mac, + source: dest_mac, + ethertype: Ethertype::IPV4, }; - let mut pkt = Packet::alloc_and_expand(128); - let mut wtr = pkt.seg0_wtr(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip4.emit(wtr.slice_mut(ip4.hdr_len()).unwrap()); - tcp.emit(wtr.slice_mut(tcp.hdr_len()).unwrap()); - wtr.write(&body).unwrap(); - let mut pkt = pkt.parse(Out, GenericUlp {}).unwrap(); + + let mut pkt_m = MsgBlk::new_ethernet_pkt((ð, &ip4, &tcp, &body)); + let mut pkt = Packet2::new(pkt_m.iter_mut()) + .parse_inbound(GenericUlp {}) + .unwrap() + .to_full_meta(); let pmi = pkt.meta_mut(); let in_ht = desc.gen_ht(Direction::In); in_ht.run(pmi).unwrap(); - let ether_meta = pmi.inner.ether; - assert_eq!(ether_meta.src, dest_mac); - assert_eq!(ether_meta.dst, priv_mac); + let ether_meta = pmi.inner_ether(); + assert_eq!(ether_meta.source(), dest_mac); + assert_eq!(ether_meta.destination(), priv_mac); - let ip4_meta = match pmi.inner.ip.as_ref().unwrap() { - IpMeta::Ip4(v) => v, + let ip4_meta = match pmi.inner_ip4() { + Some(v) => v, _ => panic!("expect Ipv4Meta"), }; - assert_eq!(ip4_meta.src, outside_ip); - assert_eq!(ip4_meta.dst, priv_ip); - assert_eq!(ip4_meta.proto, Protocol::TCP); + assert_eq!(ip4_meta.source(), outside_ip); + assert_eq!(ip4_meta.destination(), priv_ip); + assert_eq!(ip4_meta.protocol(), IpProtocol::TCP); - let tcp_meta = match pmi.inner.ulp.as_ref().unwrap() { - UlpMeta::Tcp(v) => v, + let tcp_meta = match pmi.inner_tcp() { + Some(v) => v, _ => panic!("expect TcpMeta"), }; - assert_eq!(tcp_meta.src, outside_port); - assert_eq!(tcp_meta.dst, priv_port); - assert_eq!(tcp_meta.flags, 0); + assert_eq!(tcp_meta.source(), outside_port); + assert_eq!(tcp_meta.destination(), priv_port); + assert_eq!(tcp_meta.flags(), TcpFlags::empty()); } } diff --git a/lib/opte/src/engine/packet.rs b/lib/opte/src/engine/packet.rs index aee68f90..1d656d8f 100644 --- a/lib/opte/src/engine/packet.rs +++ b/lib/opte/src/engine/packet.rs @@ -3363,40 +3363,41 @@ mod test { pkt } - #[test] - fn zero_byte_packet() { - let pkt = Packet::alloc(0); - assert_eq!(pkt.len(), 0); - assert_eq!(pkt.num_segs(), 1); - assert_eq!(pkt.avail(), 16); - let res = pkt.parse(Out, GenericUlp {}); - match res { - Err(ParseError::BadHeader(msg)) => { - assert_eq!( - msg, - EtherHdrError::ReadError(ReadErr::EndOfPacket).into() - ); - } + // TODO(kyle): equivalent for MsgBlk + // #[test] + // fn zero_byte_packet() { + // let pkt = Packet::alloc(0); + // assert_eq!(pkt.len(), 0); + // assert_eq!(pkt.num_segs(), 1); + // assert_eq!(pkt.avail(), 16); + // let res = pkt.parse(Out, GenericUlp {}); + // match res { + // Err(ParseError::BadHeader(msg)) => { + // assert_eq!( + // msg, + // EtherHdrError::ReadError(ReadErr::EndOfPacket).into() + // ); + // } - _ => panic!("expected read error, got: {:?}", res), - } + // _ => panic!("expected read error, got: {:?}", res), + // } - let pkt2 = Packet::copy(&[]); - assert_eq!(pkt2.len(), 0); - assert_eq!(pkt2.num_segs(), 1); - assert_eq!(pkt2.avail(), 16); - let res = pkt2.parse(Out, GenericUlp {}); - match res { - Err(ParseError::BadHeader(msg)) => { - assert_eq!( - msg, - EtherHdrError::ReadError(ReadErr::EndOfPacket).into() - ); - } + // let pkt2 = Packet::copy(&[]); + // assert_eq!(pkt2.len(), 0); + // assert_eq!(pkt2.num_segs(), 1); + // assert_eq!(pkt2.avail(), 16); + // let res = pkt2.parse(Out, GenericUlp {}); + // match res { + // Err(ParseError::BadHeader(msg)) => { + // assert_eq!( + // msg, + // EtherHdrError::ReadError(ReadErr::EndOfPacket).into() + // ); + // } - _ => panic!("expected read error, got: {:?}", res), - } - } + // _ => panic!("expected read error, got: {:?}", res), + // } + // } // Verify uninitialized packet. #[test] @@ -3444,115 +3445,117 @@ mod test { assert_eq!(pkt.len(), 6); } - #[test] - fn read_single_segment() { - let parsed = tcp_pkt(&[]).parse(Out, GenericUlp {}).unwrap(); - assert_eq!(parsed.state.hdr_offsets.inner.ether.seg_idx, 0); - assert_eq!(parsed.state.hdr_offsets.inner.ether.seg_pos, 0); + // TODO(kyle): equivalents for MsgBlk? + // #[test] + // fn read_single_segment() { + // let parsed = tcp_pkt(&[]).parse(Out, GenericUlp {}).unwrap(); + // assert_eq!(parsed.state.hdr_offsets.inner.ether.seg_idx, 0); + // assert_eq!(parsed.state.hdr_offsets.inner.ether.seg_pos, 0); - let eth_meta = parsed.state.meta.inner.ether; - assert_eq!(eth_meta.ether_type, EtherType::Ipv4); - assert_eq!(eth_meta.dst, DST_MAC); - assert_eq!(eth_meta.src, SRC_MAC); + // let eth_meta = parsed.state.meta.inner.ether; + // assert_eq!(eth_meta.ether_type, EtherType::Ipv4); + // assert_eq!(eth_meta.dst, DST_MAC); + // assert_eq!(eth_meta.src, SRC_MAC); - let offsets = &parsed.state.hdr_offsets; + // let offsets = &parsed.state.hdr_offsets; - let ip4_meta = match parsed.state.meta.inner.ip.as_ref().unwrap() { - IpMeta::Ip4(v) => v, - _ => panic!("expected IPv4"), - }; - assert_eq!(ip4_meta.src, SRC_IP4); - assert_eq!(ip4_meta.dst, DST_IP4); - assert_eq!(ip4_meta.proto, Protocol::TCP); - assert_eq!(offsets.inner.ip.as_ref().unwrap().seg_idx, 0); - assert_eq!(offsets.inner.ip.as_ref().unwrap().seg_pos, 14); - - let tcp_meta = match parsed.state.meta.inner.ulp.as_ref().unwrap() { - UlpMeta::Tcp(v) => v, - _ => panic!("expected TCP"), - }; - assert_eq!(tcp_meta.src, 3839); - assert_eq!(tcp_meta.dst, 80); - assert_eq!(tcp_meta.flags, TcpFlags::SYN); - assert_eq!(tcp_meta.seq, 4224936861); - assert_eq!(tcp_meta.ack, 0); - assert_eq!(offsets.inner.ulp.as_ref().unwrap().seg_idx, 0); - assert_eq!(offsets.inner.ulp.as_ref().unwrap().seg_pos, 34); - } + // let ip4_meta = match parsed.state.meta.inner.ip.as_ref().unwrap() { + // IpMeta::Ip4(v) => v, + // _ => panic!("expected IPv4"), + // }; + // assert_eq!(ip4_meta.src, SRC_IP4); + // assert_eq!(ip4_meta.dst, DST_IP4); + // assert_eq!(ip4_meta.proto, Protocol::TCP); + // assert_eq!(offsets.inner.ip.as_ref().unwrap().seg_idx, 0); + // assert_eq!(offsets.inner.ip.as_ref().unwrap().seg_pos, 14); + + // let tcp_meta = match parsed.state.meta.inner.ulp.as_ref().unwrap() { + // UlpMeta::Tcp(v) => v, + // _ => panic!("expected TCP"), + // }; + // assert_eq!(tcp_meta.src, 3839); + // assert_eq!(tcp_meta.dst, 80); + // assert_eq!(tcp_meta.flags, TcpFlags::SYN); + // assert_eq!(tcp_meta.seq, 4224936861); + // assert_eq!(tcp_meta.ack, 0); + // assert_eq!(offsets.inner.ulp.as_ref().unwrap().seg_idx, 0); + // assert_eq!(offsets.inner.ulp.as_ref().unwrap().seg_pos, 34); + // } - #[test] - fn write_and_read_multi_segment() { - let mp1 = allocb(34); - let mp2 = allocb(20); + // TODO(kyle): equivalents for MsgBlk? + // #[test] + // fn write_and_read_multi_segment() { + // let mp1 = allocb(34); + // let mp2 = allocb(20); - unsafe { - (*mp1).b_cont = mp2; - } + // unsafe { + // (*mp1).b_cont = mp2; + // } - let mut seg1 = unsafe { PacketSeg::wrap_mblk(mp1) }; - let mut seg2 = unsafe { PacketSeg::wrap_mblk(mp2) }; + // let mut seg1 = unsafe { PacketSeg::wrap_mblk(mp1) }; + // let mut seg2 = unsafe { PacketSeg::wrap_mblk(mp2) }; - let tcp = TcpMeta { - src: 3839, - dst: 80, - flags: TcpFlags::SYN, - seq: 4224936861, - ..Default::default() - }; - let ip4 = Ipv4Meta { - src: SRC_IP4, - dst: DST_IP4, - proto: Protocol::TCP, - total_len: (Ipv4Hdr::BASE_SIZE + tcp.hdr_len()) as u16, - ..Default::default() - }; - let eth = EtherMeta { - ether_type: EtherType::Ipv4, - src: SRC_MAC, - dst: DST_MAC, - }; - seg1.expand_end(34).unwrap(); - let mut wtr1 = seg1.get_writer(); - eth.emit(wtr1.slice_mut(EtherHdr::SIZE).unwrap()); - ip4.emit(wtr1.slice_mut(ip4.hdr_len()).unwrap()); - - seg2.expand_end(20).unwrap(); - let mut wtr2 = seg2.get_writer(); - tcp.emit(wtr2.slice_mut(tcp.hdr_len()).unwrap()); - let pkt = Packet::new2(seg1, seg2); - let parsed = pkt.parse(Out, GenericUlp {}).unwrap(); - - let eth_parsed = parsed.state.meta.inner.ether; - assert_eq!(parsed.state.hdr_offsets.inner.ether.seg_idx, 0); - assert_eq!(parsed.state.hdr_offsets.inner.ether.seg_pos, 0); - assert_eq!(eth_parsed.ether_type, EtherType::Ipv4); - assert_eq!(eth_parsed.dst, DST_MAC); - assert_eq!(eth_parsed.src, SRC_MAC); - - let offsets = &parsed.state.hdr_offsets; - - let ip4_parsed = match parsed.state.meta.inner.ip.unwrap() { - IpMeta::Ip4(v) => v, - _ => panic!("expected IPv4"), - }; - assert_eq!(ip4_parsed.src, SRC_IP4); - assert_eq!(ip4_parsed.dst, DST_IP4); - assert_eq!(ip4_parsed.proto, Protocol::TCP); - assert_eq!(offsets.inner.ip.as_ref().unwrap().seg_idx, 0); - assert_eq!(offsets.inner.ip.as_ref().unwrap().seg_pos, 14); - - let tcp_parsed = match parsed.state.meta.inner.ulp.unwrap() { - UlpMeta::Tcp(v) => v, - _ => panic!("expected TCP"), - }; - assert_eq!(tcp_parsed.src, 3839); - assert_eq!(tcp_parsed.dst, 80); - assert_eq!(tcp_parsed.flags, TcpFlags::SYN); - assert_eq!(tcp_parsed.seq, 4224936861); - assert_eq!(tcp_parsed.ack, 0); - assert_eq!(offsets.inner.ulp.as_ref().unwrap().seg_idx, 0); - assert_eq!(offsets.inner.ulp.as_ref().unwrap().seg_pos, 34); - } + // let tcp = TcpMeta { + // src: 3839, + // dst: 80, + // flags: TcpFlags::SYN, + // seq: 4224936861, + // ..Default::default() + // }; + // let ip4 = Ipv4Meta { + // src: SRC_IP4, + // dst: DST_IP4, + // proto: Protocol::TCP, + // total_len: (Ipv4Hdr::BASE_SIZE + tcp.hdr_len()) as u16, + // ..Default::default() + // }; + // let eth = EtherMeta { + // ether_type: EtherType::Ipv4, + // src: SRC_MAC, + // dst: DST_MAC, + // }; + // seg1.expand_end(34).unwrap(); + // let mut wtr1 = seg1.get_writer(); + // eth.emit(wtr1.slice_mut(EtherHdr::SIZE).unwrap()); + // ip4.emit(wtr1.slice_mut(ip4.hdr_len()).unwrap()); + + // seg2.expand_end(20).unwrap(); + // let mut wtr2 = seg2.get_writer(); + // tcp.emit(wtr2.slice_mut(tcp.hdr_len()).unwrap()); + // let pkt = Packet::new2(seg1, seg2); + // let parsed = pkt.parse(Out, GenericUlp {}).unwrap(); + + // let eth_parsed = parsed.state.meta.inner.ether; + // assert_eq!(parsed.state.hdr_offsets.inner.ether.seg_idx, 0); + // assert_eq!(parsed.state.hdr_offsets.inner.ether.seg_pos, 0); + // assert_eq!(eth_parsed.ether_type, EtherType::Ipv4); + // assert_eq!(eth_parsed.dst, DST_MAC); + // assert_eq!(eth_parsed.src, SRC_MAC); + + // let offsets = &parsed.state.hdr_offsets; + + // let ip4_parsed = match parsed.state.meta.inner.ip.unwrap() { + // IpMeta::Ip4(v) => v, + // _ => panic!("expected IPv4"), + // }; + // assert_eq!(ip4_parsed.src, SRC_IP4); + // assert_eq!(ip4_parsed.dst, DST_IP4); + // assert_eq!(ip4_parsed.proto, Protocol::TCP); + // assert_eq!(offsets.inner.ip.as_ref().unwrap().seg_idx, 0); + // assert_eq!(offsets.inner.ip.as_ref().unwrap().seg_pos, 14); + + // let tcp_parsed = match parsed.state.meta.inner.ulp.unwrap() { + // UlpMeta::Tcp(v) => v, + // _ => panic!("expected TCP"), + // }; + // assert_eq!(tcp_parsed.src, 3839); + // assert_eq!(tcp_parsed.dst, 80); + // assert_eq!(tcp_parsed.flags, TcpFlags::SYN); + // assert_eq!(tcp_parsed.seq, 4224936861); + // assert_eq!(tcp_parsed.ack, 0); + // assert_eq!(offsets.inner.ulp.as_ref().unwrap().seg_idx, 0); + // assert_eq!(offsets.inner.ulp.as_ref().unwrap().seg_pos, 34); + // } // Verify that we catch when a read requires more bytes than are // available. @@ -3583,210 +3586,213 @@ mod test { )); } - #[test] - #[should_panic] - fn slice_unchecked_bad_offset() { - let parsed = tcp_pkt(&[]).parse(Out, GenericUlp {}).unwrap(); - // Offset past end of segment. - parsed.segs[0].slice_unchecked(99, None); - } + // TODO(kyle): equivalents for MsgBlk? + // #[test] + // #[should_panic] + // fn slice_unchecked_bad_offset() { + // let parsed = tcp_pkt(&[]).parse(Out, GenericUlp {}).unwrap(); + // // Offset past end of segment. + // parsed.segs[0].slice_unchecked(99, None); + // } - #[test] - #[should_panic] - fn slice_mut_unchecked_bad_offset() { - let mut parsed = tcp_pkt(&[]).parse(Out, GenericUlp {}).unwrap(); - // Offset past end of segment. - parsed.segs[0].slice_mut_unchecked(99, None); - } + // #[test] + // #[should_panic] + // fn slice_mut_unchecked_bad_offset() { + // let mut parsed = tcp_pkt(&[]).parse(Out, GenericUlp {}).unwrap(); + // // Offset past end of segment. + // parsed.segs[0].slice_mut_unchecked(99, None); + // } - #[test] - #[should_panic] - fn slice_unchecked_bad_len() { - let parsed = tcp_pkt(&[]).parse(Out, GenericUlp {}).unwrap(); - // Length past end of segment. - parsed.segs[0].slice_unchecked(0, Some(99)); - } + // #[test] + // #[should_panic] + // fn slice_unchecked_bad_len() { + // let parsed = tcp_pkt(&[]).parse(Out, GenericUlp {}).unwrap(); + // // Length past end of segment. + // parsed.segs[0].slice_unchecked(0, Some(99)); + // } - #[test] - #[should_panic] - fn slice_mut_unchecked_bad_len() { - let mut parsed = tcp_pkt(&[]).parse(Out, GenericUlp {}).unwrap(); - // Length past end of segment. - parsed.segs[0].slice_mut_unchecked(0, Some(99)); - } + // #[test] + // #[should_panic] + // fn slice_mut_unchecked_bad_len() { + // let mut parsed = tcp_pkt(&[]).parse(Out, GenericUlp {}).unwrap(); + // // Length past end of segment. + // parsed.segs[0].slice_mut_unchecked(0, Some(99)); + // } - #[test] - fn slice_unchecked_zero() { - let parsed = tcp_pkt(&[]).parse(Out, GenericUlp {}).unwrap(); - // Set offset to end of packet and slice the "rest" by - // passing None. - assert_eq!(parsed.segs[0].slice_unchecked(54, None).len(), 0); - } + // #[test] + // fn slice_unchecked_zero() { + // let parsed = tcp_pkt(&[]).parse(Out, GenericUlp {}).unwrap(); + // // Set offset to end of packet and slice the "rest" by + // // passing None. + // assert_eq!(parsed.segs[0].slice_unchecked(54, None).len(), 0); + // } - #[test] - fn slice_mut_unchecked_zero() { - let mut parsed = tcp_pkt(&[]).parse(Out, GenericUlp {}).unwrap(); - // Set offset to end of packet and slice the "rest" by - // passing None. - assert_eq!(parsed.segs[0].slice_mut_unchecked(54, None).len(), 0); - } + // #[test] + // fn slice_mut_unchecked_zero() { + // let mut parsed = tcp_pkt(&[]).parse(Out, GenericUlp {}).unwrap(); + // // Set offset to end of packet and slice the "rest" by + // // passing None. + // assert_eq!(parsed.segs[0].slice_mut_unchecked(54, None).len(), 0); + // } + // TODO(kyle): equivalent for MsgBlk // Verify that if the TCP header straddles an mblk we return an // error. - #[test] - fn straddled_tcp() { - let mp1 = allocb(46); - let mp2 = allocb(8); + // #[test] + // fn straddled_tcp() { + // let mp1 = allocb(46); + // let mp2 = allocb(8); - unsafe { - (*mp1).b_cont = mp2; - } + // unsafe { + // (*mp1).b_cont = mp2; + // } - let mut seg1 = unsafe { PacketSeg::wrap_mblk(mp1) }; - let mut seg2 = unsafe { PacketSeg::wrap_mblk(mp2) }; + // let mut seg1 = unsafe { PacketSeg::wrap_mblk(mp1) }; + // let mut seg2 = unsafe { PacketSeg::wrap_mblk(mp2) }; - let tcp = TcpMeta { src: 3839, dst: 80, ..Default::default() }; - let ip4 = Ipv4Meta { - src: SRC_IP4, - dst: DST_IP4, - proto: Protocol::TCP, - total_len: (Ipv4Hdr::BASE_SIZE + tcp.hdr_len()) as u16, - ..Default::default() - }; - let eth = EtherMeta { - ether_type: EtherType::Ipv4, - src: SRC_MAC, - dst: DST_MAC, - }; - seg1.expand_end(46).unwrap(); - let mut wtr1 = seg1.get_writer(); - eth.emit(wtr1.slice_mut(EtherHdr::SIZE).unwrap()); - ip4.emit(wtr1.slice_mut(ip4.hdr_len()).unwrap()); - let mut tcp_bytes = vec![0u8; tcp.hdr_len()]; - tcp.emit(&mut tcp_bytes); - wtr1.write(&tcp_bytes[0..12]).unwrap(); - - seg2.expand_end(8).unwrap(); - let mut wtr2 = seg2.get_writer(); - wtr2.write(&tcp_bytes[12..]).unwrap(); - let pkt = Packet::new2(seg1, seg2); - assert_eq!(pkt.num_segs(), 2); - assert_eq!( - pkt.len(), - EtherHdr::SIZE + Ipv4Hdr::BASE_SIZE + TcpHdr::BASE_SIZE - ); - assert!(matches!( - pkt.parse(Out, GenericUlp {}), - Err(ParseError::BadHeader(_)) - )); - } + // let tcp = TcpMeta { src: 3839, dst: 80, ..Default::default() }; + // let ip4 = Ipv4Meta { + // src: SRC_IP4, + // dst: DST_IP4, + // proto: Protocol::TCP, + // total_len: (Ipv4Hdr::BASE_SIZE + tcp.hdr_len()) as u16, + // ..Default::default() + // }; + // let eth = EtherMeta { + // ether_type: EtherType::Ipv4, + // src: SRC_MAC, + // dst: DST_MAC, + // }; + // seg1.expand_end(46).unwrap(); + // let mut wtr1 = seg1.get_writer(); + // eth.emit(wtr1.slice_mut(EtherHdr::SIZE).unwrap()); + // ip4.emit(wtr1.slice_mut(ip4.hdr_len()).unwrap()); + // let mut tcp_bytes = vec![0u8; tcp.hdr_len()]; + // tcp.emit(&mut tcp_bytes); + // wtr1.write(&tcp_bytes[0..12]).unwrap(); + + // seg2.expand_end(8).unwrap(); + // let mut wtr2 = seg2.get_writer(); + // wtr2.write(&tcp_bytes[12..]).unwrap(); + // let pkt = Packet::new2(seg1, seg2); + // assert_eq!(pkt.num_segs(), 2); + // assert_eq!( + // pkt.len(), + // EtherHdr::SIZE + Ipv4Hdr::BASE_SIZE + TcpHdr::BASE_SIZE + // ); + // assert!(matches!( + // pkt.parse(Out, GenericUlp {}), + // Err(ParseError::BadHeader(_)) + // )); + // } + // TODO(kyle): equivalent for MsgBlk // Verify that we correctly parse an IPv6 packet with extension headers - #[test] - fn parse_ipv6_extension_headers_ok() { - use crate::engine::ip6::test::generate_test_packet; - use crate::engine::ip6::test::SUPPORTED_EXTENSIONS; - use itertools::Itertools; - use smoltcp::wire::IpProtocol; - for n_extensions in 0..SUPPORTED_EXTENSIONS.len() { - for extensions in - SUPPORTED_EXTENSIONS.into_iter().permutations(n_extensions) - { - // Generate a full IPv6 test packet, but pull out the extension - // headers as a byte array. - let (buf, ipv6_header_size) = - generate_test_packet(extensions.as_slice()); - - let next_hdr = - *(extensions.first().unwrap_or(&IpProtocol::Tcp)); - let ext_hdrs = &buf[Ipv6Hdr::BASE_SIZE..ipv6_header_size]; - - // Append a TCP header - let tcp = TcpMeta { - src: 3839, - dst: 80, - seq: 4224936861, - ..Default::default() - }; - let mut ext_bytes = [0; 64]; - let ext_len = ext_hdrs.len(); - assert!(ext_len <= 64); - ext_bytes[0..ext_len].copy_from_slice(ext_hdrs); - - let pay_len = tcp.hdr_len() + ext_len; - let ip6 = Ipv6Meta { - src: SRC_IP6, - dst: DST_IP6, - proto: Protocol::TCP, - next_hdr, - hop_limit: 255, - pay_len: pay_len as u16, - ext: Some(ext_bytes), - ext_len, - }; - let eth = EtherMeta { - ether_type: EtherType::Ipv6, - src: SRC_MAC, - dst: DST_MAC, - }; - - let mut seg = PacketSeg::alloc(1024); - seg.expand_end(14 + ipv6_header_size + tcp.hdr_len()).unwrap(); - let mut wtr = seg.get_writer(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip6.emit(wtr.slice_mut(ip6.hdr_len()).unwrap()); - tcp.emit(wtr.slice_mut(tcp.hdr_len()).unwrap()); - let parsed = - Packet::new(seg).parse(Out, GenericUlp {}).unwrap(); - - // Assert that the computed offsets of the headers and payloads - // are accurate - let offsets = &parsed.state.hdr_offsets; - let ip = offsets - .inner - .ip - .as_ref() - .expect("Expected IP header offsets"); - assert_eq!( - ip.seg_idx, 0, - "Expected IP headers to be in segment 0" - ); - assert_eq!( - ip.seg_pos, - EtherHdr::SIZE, - "Expected the IP header to start immediately \ - after the Ethernet header" - ); - assert_eq!( - ip.pkt_pos, - EtherHdr::SIZE, - "Expected the IP header to start immediately \ - after the Ethernet header" - ); - let ulp = &offsets - .inner - .ulp - .as_ref() - .expect("Expected ULP header offsets"); - assert_eq!( - ulp.seg_idx, 0, - "Expected the ULP header to be in segment 0" - ); - assert_eq!( - ulp.seg_pos, - EtherHdr::SIZE + ipv6_header_size, - "Expected the ULP header to start immediately \ - after the IP header", - ); - assert_eq!( - ulp.pkt_pos, - EtherHdr::SIZE + ipv6_header_size, - "Expected the ULP header to start immediately \ - after the IP header", - ); - } - } - } + // #[test] + // fn parse_ipv6_extension_headers_ok() { + // use crate::engine::ip6::test::generate_test_packet; + // use crate::engine::ip6::test::SUPPORTED_EXTENSIONS; + // use itertools::Itertools; + // use smoltcp::wire::IpProtocol; + // for n_extensions in 0..SUPPORTED_EXTENSIONS.len() { + // for extensions in + // SUPPORTED_EXTENSIONS.into_iter().permutations(n_extensions) + // { + // // Generate a full IPv6 test packet, but pull out the extension + // // headers as a byte array. + // let (buf, ipv6_header_size) = + // generate_test_packet(extensions.as_slice()); + + // let next_hdr = + // *(extensions.first().unwrap_or(&IpProtocol::Tcp)); + // let ext_hdrs = &buf[Ipv6Hdr::BASE_SIZE..ipv6_header_size]; + + // // Append a TCP header + // let tcp = TcpMeta { + // src: 3839, + // dst: 80, + // seq: 4224936861, + // ..Default::default() + // }; + // let mut ext_bytes = [0; 64]; + // let ext_len = ext_hdrs.len(); + // assert!(ext_len <= 64); + // ext_bytes[0..ext_len].copy_from_slice(ext_hdrs); + + // let pay_len = tcp.hdr_len() + ext_len; + // let ip6 = Ipv6Meta { + // src: SRC_IP6, + // dst: DST_IP6, + // proto: Protocol::TCP, + // next_hdr, + // hop_limit: 255, + // pay_len: pay_len as u16, + // ext: Some(ext_bytes), + // ext_len, + // }; + // let eth = EtherMeta { + // ether_type: EtherType::Ipv6, + // src: SRC_MAC, + // dst: DST_MAC, + // }; + + // let mut seg = PacketSeg::alloc(1024); + // seg.expand_end(14 + ipv6_header_size + tcp.hdr_len()).unwrap(); + // let mut wtr = seg.get_writer(); + // eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); + // ip6.emit(wtr.slice_mut(ip6.hdr_len()).unwrap()); + // tcp.emit(wtr.slice_mut(tcp.hdr_len()).unwrap()); + // let parsed = + // Packet::new(seg).parse(Out, GenericUlp {}).unwrap(); + + // // Assert that the computed offsets of the headers and payloads + // // are accurate + // let offsets = &parsed.state.hdr_offsets; + // let ip = offsets + // .inner + // .ip + // .as_ref() + // .expect("Expected IP header offsets"); + // assert_eq!( + // ip.seg_idx, 0, + // "Expected IP headers to be in segment 0" + // ); + // assert_eq!( + // ip.seg_pos, + // EtherHdr::SIZE, + // "Expected the IP header to start immediately \ + // after the Ethernet header" + // ); + // assert_eq!( + // ip.pkt_pos, + // EtherHdr::SIZE, + // "Expected the IP header to start immediately \ + // after the Ethernet header" + // ); + // let ulp = &offsets + // .inner + // .ulp + // .as_ref() + // .expect("Expected ULP header offsets"); + // assert_eq!( + // ulp.seg_idx, 0, + // "Expected the ULP header to be in segment 0" + // ); + // assert_eq!( + // ulp.seg_pos, + // EtherHdr::SIZE + ipv6_header_size, + // "Expected the ULP header to start immediately \ + // after the IP header", + // ); + // assert_eq!( + // ulp.pkt_pos, + // EtherHdr::SIZE + ipv6_header_size, + // "Expected the ULP header to start immediately \ + // after the IP header", + // ); + // } + // } + // } #[test] fn seg_writer() { @@ -3875,142 +3881,144 @@ mod test { } } - #[test] - fn small_packet_with_padding() { - const MINIMUM_ETH_FRAME_SZ: usize = 64; - const FRAME_CHECK_SEQ_SZ: usize = 4; - - // Start with a test packet that's smaller than the minimum - // ethernet frame size (64). - let body = []; - let mut pkt = tcp_pkt(&body); - assert!(pkt.len() < MINIMUM_ETH_FRAME_SZ); - - // Many (most?) NICs will pad out any such frames so that - // the total size is 64. - let padding_len = MINIMUM_ETH_FRAME_SZ - - pkt.len() - // Discount the 4 bytes for the Frame Check Sequence (FCS) - // which is usually not visible to upstack software. - - FRAME_CHECK_SEQ_SZ; - - // Tack on a new segment filled with zero to pad the packet so that - // it meets the minimum frame size. - // Note that we do NOT update any of the packet headers themselves - // as this padding process should be transparent to the upper - // layers. - let mut padding_seg_wtr = pkt.add_seg(padding_len).unwrap(); - padding_seg_wtr.write(&vec![0; padding_len]).unwrap(); - assert_eq!(pkt.len(), MINIMUM_ETH_FRAME_SZ - FRAME_CHECK_SEQ_SZ); - - // Generate the metadata by parsing the packet - let mut pkt = pkt.parse(Direction::In, GenericUlp {}).unwrap(); - - // Grab parsed metadata - let ip4_meta = pkt.meta().inner_ip4().cloned().unwrap(); - let tcp_meta = pkt.meta().inner_tcp().cloned().unwrap(); - - // Length in packet headers shouldn't reflect include padding - assert_eq!( - usize::from(ip4_meta.total_len), - ip4_meta.hdr_len() + tcp_meta.hdr_len() + body.len(), - ); - - // The computed body length also shouldn't include the padding - assert_eq!(pkt.state.body.len, body.len()); - - // Pretend some processing happened... - // And now we need to update the packet headers based on the - // modified packet metadata. - pkt.emit_new_headers().unwrap(); - - // Grab the actual packet headers - let ip4_off = pkt.hdr_offsets().inner.ip.unwrap().pkt_pos; - let mut rdr = pkt.get_rdr_mut(); - rdr.seek(ip4_off).unwrap(); - let ip4_hdr = Ipv4Hdr::parse(&mut rdr).unwrap(); - let tcp_hdr = TcpHdr::parse(&mut rdr).unwrap(); - - // And make sure they don't include the padding bytes - assert_eq!( - usize::from(ip4_hdr.total_len()), - usize::from(ip4_hdr.hdr_len()) + tcp_hdr.hdr_len() + body.len() - ); - } + // TODO(kyle): equivalent for MsgBlk + // #[test] + // fn small_packet_with_padding() { + // const MINIMUM_ETH_FRAME_SZ: usize = 64; + // const FRAME_CHECK_SEQ_SZ: usize = 4; + + // // Start with a test packet that's smaller than the minimum + // // ethernet frame size (64). + // let body = []; + // let mut pkt = tcp_pkt(&body); + // assert!(pkt.len() < MINIMUM_ETH_FRAME_SZ); + + // // Many (most?) NICs will pad out any such frames so that + // // the total size is 64. + // let padding_len = MINIMUM_ETH_FRAME_SZ + // - pkt.len() + // // Discount the 4 bytes for the Frame Check Sequence (FCS) + // // which is usually not visible to upstack software. + // - FRAME_CHECK_SEQ_SZ; + + // // Tack on a new segment filled with zero to pad the packet so that + // // it meets the minimum frame size. + // // Note that we do NOT update any of the packet headers themselves + // // as this padding process should be transparent to the upper + // // layers. + // let mut padding_seg_wtr = pkt.add_seg(padding_len).unwrap(); + // padding_seg_wtr.write(&vec![0; padding_len]).unwrap(); + // assert_eq!(pkt.len(), MINIMUM_ETH_FRAME_SZ - FRAME_CHECK_SEQ_SZ); + + // // Generate the metadata by parsing the packet + // let mut pkt = pkt.parse(Direction::In, GenericUlp {}).unwrap(); + + // // Grab parsed metadata + // let ip4_meta = pkt.meta().inner_ip4().cloned().unwrap(); + // let tcp_meta = pkt.meta().inner_tcp().cloned().unwrap(); + + // // Length in packet headers shouldn't reflect include padding + // assert_eq!( + // usize::from(ip4_meta.total_len), + // ip4_meta.hdr_len() + tcp_meta.hdr_len() + body.len(), + // ); - #[test] - fn udp6_packet_with_padding() { - let body = [1, 2, 3, 4]; - let udp = UdpMeta { - src: 124, - dst: 5673, - len: u16::try_from(UdpHdr::SIZE + body.len()).unwrap(), - ..Default::default() - }; - let ip6 = Ipv6Meta { - src: SRC_IP6, - dst: DST_IP6, - proto: Protocol::UDP, - next_hdr: smoltcp::wire::IpProtocol::Udp, - hop_limit: 255, - pay_len: udp.len, - ext: None, - ext_len: 0, - }; - let eth = EtherMeta { - ether_type: EtherType::Ipv6, - src: SRC_MAC, - dst: DST_MAC, - }; + // // The computed body length also shouldn't include the padding + // assert_eq!(pkt.state.body.len, body.len()); + + // // Pretend some processing happened... + // // And now we need to update the packet headers based on the + // // modified packet metadata. + // pkt.emit_new_headers().unwrap(); + + // // Grab the actual packet headers + // let ip4_off = pkt.hdr_offsets().inner.ip.unwrap().pkt_pos; + // let mut rdr = pkt.get_rdr_mut(); + // rdr.seek(ip4_off).unwrap(); + // let ip4_hdr = Ipv4Hdr::parse(&mut rdr).unwrap(); + // let tcp_hdr = TcpHdr::parse(&mut rdr).unwrap(); + + // // And make sure they don't include the padding bytes + // assert_eq!( + // usize::from(ip4_hdr.total_len()), + // usize::from(ip4_hdr.hdr_len()) + tcp_hdr.hdr_len() + body.len() + // ); + // } - let pkt_sz = eth.hdr_len() + ip6.hdr_len() + usize::from(ip6.pay_len); - let mut pkt = Packet::alloc_and_expand(pkt_sz); - let mut wtr = pkt.seg0_wtr(); - eth.emit(wtr.slice_mut(eth.hdr_len()).unwrap()); - ip6.emit(wtr.slice_mut(ip6.hdr_len()).unwrap()); - udp.emit(wtr.slice_mut(udp.hdr_len()).unwrap()); - wtr.write(&body).unwrap(); - assert_eq!(pkt.len(), pkt_sz); + // TODO(kyle): equivalent for MsgBlk + // #[test] + // fn udp6_packet_with_padding() { + // let body = [1, 2, 3, 4]; + // let udp = UdpMeta { + // src: 124, + // dst: 5673, + // len: u16::try_from(UdpHdr::SIZE + body.len()).unwrap(), + // ..Default::default() + // }; + // let ip6 = Ipv6Meta { + // src: SRC_IP6, + // dst: DST_IP6, + // proto: Protocol::UDP, + // next_hdr: smoltcp::wire::IpProtocol::Udp, + // hop_limit: 255, + // pay_len: udp.len, + // ext: None, + // ext_len: 0, + // }; + // let eth = EtherMeta { + // ether_type: EtherType::Ipv6, + // src: SRC_MAC, + // dst: DST_MAC, + // }; - // Tack on a new segment filled zero padding at - // the end that's not part of the payload as indicated - // by the packet headers. - let padding_len = 8; - let mut padding_seg_wtr = pkt.add_seg(padding_len).unwrap(); - padding_seg_wtr.write(&vec![0; padding_len]).unwrap(); - assert_eq!(pkt.len(), pkt_sz + padding_len); - - // Generate the metadata by parsing the packet - let mut pkt = pkt.parse(Direction::In, GenericUlp {}).unwrap(); - - // Grab parsed metadata - let ip6_meta = pkt.meta().inner_ip6().cloned().unwrap(); - let udp_meta = pkt.meta().inner_udp().cloned().unwrap(); - - // Length in packet headers shouldn't reflect include padding - assert_eq!( - usize::from(ip6_meta.pay_len), - udp_meta.hdr_len() + body.len(), - ); + // let pkt_sz = eth.hdr_len() + ip6.hdr_len() + usize::from(ip6.pay_len); + // let mut pkt = Packet::alloc_and_expand(pkt_sz); + // let mut wtr = pkt.seg0_wtr(); + // eth.emit(wtr.slice_mut(eth.hdr_len()).unwrap()); + // ip6.emit(wtr.slice_mut(ip6.hdr_len()).unwrap()); + // udp.emit(wtr.slice_mut(udp.hdr_len()).unwrap()); + // wtr.write(&body).unwrap(); + // assert_eq!(pkt.len(), pkt_sz); + + // // Tack on a new segment filled zero padding at + // // the end that's not part of the payload as indicated + // // by the packet headers. + // let padding_len = 8; + // let mut padding_seg_wtr = pkt.add_seg(padding_len).unwrap(); + // padding_seg_wtr.write(&vec![0; padding_len]).unwrap(); + // assert_eq!(pkt.len(), pkt_sz + padding_len); + + // // Generate the metadata by parsing the packet + // let mut pkt = pkt.parse(Direction::In, GenericUlp {}).unwrap(); + + // // Grab parsed metadata + // let ip6_meta = pkt.meta().inner_ip6().cloned().unwrap(); + // let udp_meta = pkt.meta().inner_udp().cloned().unwrap(); + + // // Length in packet headers shouldn't reflect include padding + // assert_eq!( + // usize::from(ip6_meta.pay_len), + // udp_meta.hdr_len() + body.len(), + // ); - // The computed body length also shouldn't include the padding - assert_eq!(pkt.state.body.len, body.len()); + // // The computed body length also shouldn't include the padding + // assert_eq!(pkt.state.body.len, body.len()); - // Pretend some processing happened... - // And now we need to update the packet headers based on the - // modified packet metadata. - pkt.emit_new_headers().unwrap(); + // // Pretend some processing happened... + // // And now we need to update the packet headers based on the + // // modified packet metadata. + // pkt.emit_new_headers().unwrap(); - // Grab the actual packet headers - let ip6_off = pkt.hdr_offsets().inner.ip.unwrap().pkt_pos; - let mut rdr = pkt.get_rdr_mut(); - rdr.seek(ip6_off).unwrap(); - let ip6_hdr = Ipv6Hdr::parse(&mut rdr).unwrap(); - let udp_hdr = UdpHdr::parse(&mut rdr).unwrap(); + // // Grab the actual packet headers + // let ip6_off = pkt.hdr_offsets().inner.ip.unwrap().pkt_pos; + // let mut rdr = pkt.get_rdr_mut(); + // rdr.seek(ip6_off).unwrap(); + // let ip6_hdr = Ipv6Hdr::parse(&mut rdr).unwrap(); + // let udp_hdr = UdpHdr::parse(&mut rdr).unwrap(); - // And make sure they don't include the padding bytes - assert_eq!(ip6_hdr.pay_len(), udp_hdr.hdr_len() + body.len()); - } + // // And make sure they don't include the padding bytes + // assert_eq!(ip6_hdr.pay_len(), udp_hdr.hdr_len() + body.len()); + // } fn create_linked_mblks(n: usize) -> Vec<*mut mblk_t> { let mut els = vec![]; diff --git a/lib/opte/src/engine/rule.rs b/lib/opte/src/engine/rule.rs index 6ea0919f..f8d5dd0c 100644 --- a/lib/opte/src/engine/rule.rs +++ b/lib/opte/src/engine/rule.rs @@ -6,6 +6,10 @@ //! Rules and actions. +use crate::engine::ingot_base::Ipv4; +use crate::engine::ingot_base::Ipv4Mut; +use crate::engine::GenericUlp; + use super::ether::EtherMeta; use super::ether::EtherMod; use super::flow_table::StateSummary; @@ -52,6 +56,10 @@ use core::fmt::Display; use core::mem::MaybeUninit; use illumos_sys_hdrs::c_char; use illumos_sys_hdrs::uintptr_t; +use ingot::ethernet::Ethertype; +use ingot::ip::IpProtocol; +use ingot::tcp::Tcp; +use ingot::types::HeaderLen; use ingot::types::InlineHeader; use ingot::types::Read; use opte_api::Direction; @@ -1031,35 +1039,29 @@ fn rule_matching() { let dst_port = "443".parse().unwrap(); // There is no DataPredicate usage in this test, so this pkt/rdr // can be bogus. - let pkt = Packet::copy(&[0xA]); - let mut rdr = pkt.get_rdr(); - - let ip = IpMeta::from(Ipv4Meta { - src: src_ip, - dst: dst_ip, - proto: Protocol::TCP, - ttl: 64, - ident: 1, - hdr_len: 20, - total_len: 40, - csum: [0; 2], - }); - let ulp = UlpMeta::from(TcpMeta { - src: src_port, - dst: dst_port, - flags: 0, - seq: 0, - ack: 0, - options_bytes: None, - options_len: 0, + let tcp = Tcp { + source: src_port, + destination: dst_port, window_size: 64240, ..Default::default() - }); - - let meta = PacketMeta { - outer: Default::default(), - inner: InnerMeta { ip: Some(ip), ulp: Some(ulp), ..Default::default() }, }; + let mut ip4 = Ipv4 { + source: src_ip, + destination: dst_ip, + protocol: IpProtocol::TCP, + total_len: (Ipv4::MINIMUM_LENGTH + tcp.packet_length()) as u16, + ..Default::default() + }; + + let eth = Ethernet { ethertype: Ethertype::IPV4, ..Default::default() }; + + let mut pkt_m = MsgBlk::new_ethernet_pkt((ð, &ip4, &tcp)); + let mut pkt = Packet2::new(pkt_m.iter_mut()) + .parse_outbound(GenericUlp {}) + .unwrap() + .to_full_meta(); + pkt.compute_checksums(); + let meta = pkt.meta(); r1.add_predicate(Predicate::InnerSrcIp4(vec![Ipv4AddrMatch::Exact( src_ip, @@ -1067,36 +1069,14 @@ fn rule_matching() { let r1 = r1.finalize(); let ameta = ActionMeta::new(); - assert!(r1.is_match(&meta, &ameta, &mut rdr)); + assert!(r1.is_match(&meta, &ameta)); let new_src_ip = "10.11.11.99".parse().unwrap(); - let ip = IpMeta::from(Ipv4Meta { - src: new_src_ip, - dst: dst_ip, - proto: Protocol::TCP, - ttl: 64, - ident: 1, - hdr_len: 20, - total_len: 40, - csum: [0; 2], - }); - let ulp = UlpMeta::from(TcpMeta { - src: src_port, - dst: dst_port, - flags: 0, - seq: 0, - ack: 0, - options_bytes: None, - options_len: 0, - window_size: 64240, - ..Default::default() - }); - - let meta = PacketMeta { - outer: Default::default(), - inner: InnerMeta { ip: Some(ip), ulp: Some(ulp), ..Default::default() }, - }; + let meta = pkt.meta_mut(); + if let Some(L3::Ipv4(v4)) = &mut meta.headers.inner_l3 { + v4.set_source(new_src_ip); + } - assert!(!r1.is_match(&meta, &ameta, &mut rdr)); + assert!(!r1.is_match(&meta, &ameta)); } diff --git a/lib/opte/src/engine/snat.rs b/lib/opte/src/engine/snat.rs index fc107966..2d96bfd1 100644 --- a/lib/opte/src/engine/snat.rs +++ b/lib/opte/src/engine/snat.rs @@ -465,6 +465,19 @@ impl ActionDesc for SNatIcmpEchoDesc { #[cfg(test)] mod test { + use ingot::ethernet::Ethertype; + use ingot::ip::IpProtocol; + use ingot::tcp::Tcp; + use ingot::tcp::TcpFlags; + use ingot::tcp::TcpRef; + use ingot::types::HeaderLen; + + use crate::engine::ingot_base::Ethernet; + use crate::engine::ingot_base::EthernetRef; + use crate::engine::ingot_base::Ipv4; + use crate::engine::ingot_base::Ipv4Ref; + use crate::engine::ingot_packet::MsgBlk; + use super::*; #[test] @@ -519,29 +532,31 @@ mod test { // ================================================================ // Build the packet // ================================================================ - let body = vec![]; - let tcp = - TcpMeta { src: priv_port, dst: outside_port, ..Default::default() }; - let ip4 = Ipv4Meta { - src: priv_ip, - dst: outside_ip, - proto: Protocol::TCP, - total_len: (Ipv4Hdr::BASE_SIZE + tcp.hdr_len() + body.len()) as u16, + let body: Vec = vec![]; + let tcp = Tcp { + source: priv_port, + destination: outside_port, + ..Default::default() + }; + let mut ip4 = Ipv4 { + source: priv_ip, + destination: outside_ip, + protocol: IpProtocol::TCP, + total_len: (Ipv4::MINIMUM_LENGTH + (&tcp, &body).packet_length()) + as u16, ..Default::default() }; - let eth = EtherMeta { - ether_type: EtherType::Ipv4, - src: priv_mac, - dst: dest_mac, + let eth = Ethernet { + destination: dest_mac, + source: priv_mac, + ethertype: Ethertype::IPV4, }; - let pkt_len = EtherHdr::SIZE + usize::from(ip4.total_len); - let mut pkt = Packet::alloc_and_expand(pkt_len); - let mut wtr = pkt.seg0_wtr(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip4.emit(wtr.slice_mut(ip4.hdr_len()).unwrap()); - tcp.emit(wtr.slice_mut(tcp.hdr_len()).unwrap()); - wtr.write(&body).unwrap(); - let mut pkt = pkt.parse(Direction::Out, GenericUlp {}).unwrap(); + + let mut pkt_m = MsgBlk::new_ethernet_pkt((ð, &ip4, &tcp, &body)); + let mut pkt = Packet2::new(pkt_m.iter_mut()) + .parse_outbound(GenericUlp {}) + .unwrap() + .to_full_meta(); pkt.compute_checksums(); // ================================================================ @@ -561,81 +576,82 @@ mod test { out_ht.run(pkt.meta_mut()).unwrap(); let pmo = pkt.meta(); - let ether_meta = pmo.inner.ether; - assert_eq!(ether_meta.src, priv_mac); - assert_eq!(ether_meta.dst, dest_mac); + let ether_meta = pmo.inner_ether(); + assert_eq!(ether_meta.source(), priv_mac); + assert_eq!(ether_meta.destination(), dest_mac); - let ip4_meta = match pmo.inner.ip.as_ref().unwrap() { - IpMeta::Ip4(v) => v, + let ip4_meta = match pmo.inner_ip4() { + Some(v) => v, _ => panic!("expect Ipv4Meta"), }; - assert_eq!(ip4_meta.src, pub_ip); - assert_eq!(ip4_meta.dst, outside_ip); - assert_eq!(ip4_meta.proto, Protocol::TCP); + assert_eq!(ip4_meta.source(), pub_ip); + assert_eq!(ip4_meta.destination(), outside_ip); + assert_eq!(ip4_meta.protocol(), IpProtocol::TCP); - let tcp_meta = match pmo.inner.ulp.as_ref().unwrap() { - UlpMeta::Tcp(v) => v, + let tcp_meta = match pmo.inner_tcp() { + Some(v) => v, _ => panic!("expect TcpMeta"), }; - assert_eq!(tcp_meta.src, pub_port); - assert_eq!(tcp_meta.dst, outside_port); - assert_eq!(tcp_meta.flags, 0); + assert_eq!(tcp_meta.source(), pub_port); + assert_eq!(tcp_meta.destination(), outside_port); + assert_eq!(tcp_meta.flags(), TcpFlags::empty()); // ================================================================ // Verify inbound header transformation. // ================================================================ - let body = vec![]; - let tcp = - TcpMeta { src: outside_port, dst: priv_port, ..Default::default() }; - let ip4 = Ipv4Meta { - src: outside_ip, - dst: priv_ip, - proto: Protocol::TCP, - total_len: (Ipv4Hdr::BASE_SIZE + tcp.hdr_len() + body.len()) as u16, + let tcp = Tcp { + source: outside_port, + destination: pub_port, ..Default::default() }; - let eth = EtherMeta { - ether_type: EtherType::Ipv4, - src: dest_mac, - dst: priv_mac, + let mut ip4 = Ipv4 { + source: outside_ip, + destination: pub_ip, + protocol: IpProtocol::TCP, + total_len: (Ipv4::MINIMUM_LENGTH + (&tcp, &body).packet_length()) + as u16, + ..Default::default() + }; + let eth = Ethernet { + destination: priv_mac, + source: dest_mac, + ethertype: Ethertype::IPV4, }; - let pkt_len = EtherHdr::SIZE + usize::from(ip4.total_len); - let mut pkt = Packet::alloc_and_expand(pkt_len); - let mut wtr = pkt.seg0_wtr(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip4.emit(wtr.slice_mut(ip4.hdr_len()).unwrap()); - tcp.emit(wtr.slice_mut(tcp.hdr_len()).unwrap()); - wtr.write(&body).unwrap(); - let mut pkt = pkt.parse(Direction::In, GenericUlp {}).unwrap(); + + let mut pkt_m = MsgBlk::new_ethernet_pkt((ð, &ip4, &tcp, &body)); + let mut pkt = Packet2::new(pkt_m.iter_mut()) + .parse_inbound(GenericUlp {}) + .unwrap() + .to_full_meta(); pkt.compute_checksums(); let in_ht = desc.gen_ht(Direction::In); in_ht.run(pkt.meta_mut()).unwrap(); let pmi = pkt.meta(); - let ether_meta = pmi.inner.ether; - assert_eq!(ether_meta.src, dest_mac); - assert_eq!(ether_meta.dst, priv_mac); + let ether_meta = pmi.inner_ether(); + assert_eq!(ether_meta.source(), dest_mac); + assert_eq!(ether_meta.destination(), priv_mac); - let ip4_meta = match pmi.inner.ip.as_ref().unwrap() { - IpMeta::Ip4(v) => v, + let ip4_meta = match pmi.inner_ip4() { + Some(v) => v, _ => panic!("expect Ipv4Meta"), }; - assert_eq!(ip4_meta.src, outside_ip); - assert_eq!(ip4_meta.dst, priv_ip); - assert_eq!(ip4_meta.proto, Protocol::TCP); + assert_eq!(ip4_meta.source(), outside_ip); + assert_eq!(ip4_meta.destination(), priv_ip); + assert_eq!(ip4_meta.protocol(), IpProtocol::TCP); - let tcp_meta = match pmi.inner.ulp.as_ref().unwrap() { - UlpMeta::Tcp(v) => v, + let tcp_meta = match pmi.inner_tcp() { + Some(v) => v, _ => panic!("expect TcpMeta"), }; - assert_eq!(tcp_meta.src, outside_port); - assert_eq!(tcp_meta.dst, priv_port); - assert_eq!(tcp_meta.flags, 0); + assert_eq!(tcp_meta.source(), outside_port); + assert_eq!(tcp_meta.destination(), priv_port); + assert_eq!(tcp_meta.flags(), TcpFlags::empty()); // ================================================================ // Verify other ULPs are unaffected. diff --git a/lib/oxide-vpc/tests/integration_tests.rs b/lib/oxide-vpc/tests/integration_tests.rs index c5983034..10becdb2 100644 --- a/lib/oxide-vpc/tests/integration_tests.rs +++ b/lib/oxide-vpc/tests/integration_tests.rs @@ -1864,46 +1864,44 @@ fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { unpack_and_verify_icmp(&mut pkt5_m, &g1_cfg, &new_params, Out, seq_no, 0); } -#[test] -fn bad_ip_len() { - // TODO(kyle) - panic!() - - // let cfg = lab_cfg(); - - // let eth = EtherMeta { - // src: cfg.guest_mac, - // dst: MacAddr::BROADCAST, - // ether_type: EtherType::Ipv4, - // }; - - // let ip = Ipv4Meta { - // src: "0.0.0.0".parse().unwrap(), - // dst: Ipv4Addr::LOCAL_BCAST, - // proto: Protocol::UDP, - // ttl: 64, - // ident: 1, - // hdr_len: 20, - // // We write a total legnth of 4 bytes, which is completely - // // bogus for an IP header and should return an error during - // // processing. - // total_len: 4, - // ..Default::default() - // }; - - // let udp = UdpMeta { src: 68, dst: 67, ..Default::default() }; - // let total_len = EtherHdr::SIZE + usize::from(ip.hdr_len) + udp.hdr_len(); - // let mut pkt = Packet::alloc_and_expand(total_len); - // let mut wtr = pkt.seg0_wtr(); - // eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - // ip.emit(wtr.slice_mut(ip.hdr_len()).unwrap()); - // udp.emit(wtr.slice_mut(udp.hdr_len()).unwrap()); - // let res = pkt.parse(Out, VpcParser::new()); - // assert_eq!( - // res.err().unwrap(), - // Ipv4HdrError::BadTotalLen { total_len: 4 }.into() - // ); -} +// TODO(kyle) +// #[test] +// fn bad_ip_len() { +// let cfg = lab_cfg(); + +// let eth = EtherMeta { +// src: cfg.guest_mac, +// dst: MacAddr::BROADCAST, +// ether_type: EtherType::Ipv4, +// }; + +// let ip = Ipv4Meta { +// src: "0.0.0.0".parse().unwrap(), +// dst: Ipv4Addr::LOCAL_BCAST, +// proto: Protocol::UDP, +// ttl: 64, +// ident: 1, +// hdr_len: 20, +// // We write a total legnth of 4 bytes, which is completely +// // bogus for an IP header and should return an error during +// // processing. +// total_len: 4, +// ..Default::default() +// }; + +// let udp = UdpMeta { src: 68, dst: 67, ..Default::default() }; +// let total_len = EtherHdr::SIZE + usize::from(ip.hdr_len) + udp.hdr_len(); +// let mut pkt = Packet::alloc_and_expand(total_len); +// let mut wtr = pkt.seg0_wtr(); +// eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); +// ip.emit(wtr.slice_mut(ip.hdr_len()).unwrap()); +// udp.emit(wtr.slice_mut(udp.hdr_len()).unwrap()); +// let res = pkt.parse(Out, VpcParser::new()); +// assert_eq!( +// res.err().unwrap(), +// Ipv4HdrError::BadTotalLen { total_len: 4 }.into() +// ); +// } // Verify that OPTE generates a hairpin ARP reply when the guest // queries for the gateway. From cd7d1a500d9ce52a489042999c4f719b947cba20 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 16 Oct 2024 11:43:10 +0100 Subject: [PATCH 049/115] Arc<> up FlowEntries. Yet another architectural change in service of something small that we want to keep fast. This gives us the ability to keep dropping the table lock very quickly, even with TCP state handling. Previously we needed to treat in- and out-bound TCP state separately -- inbound flows would have packets transformed before being able to look up TCP state. This is still the case in the slowpath but for the fasterpaths this was super unwieldy. Arc<>ing the flow entries themselves (rather than just their state) allows us to prevent that lookup from needing to happen at all. We can `hit` flows outside of the table lock. Accordingly, we don't need to do anything insane like regrabbing the table lock after processing inbound packets -- we have everything we need after one `clone`. One or two things needed to hook upn TCP flows in the fastpath, still. --- lib/opte/src/ddi/time.rs | 41 ++- lib/opte/src/engine/flow_table.rs | 128 +++++--- lib/opte/src/engine/layer.rs | 16 +- lib/opte/src/engine/port.rs | 484 +++++++++++++++++------------- lib/opte/src/engine/tcp_state.rs | 111 +++---- 5 files changed, 462 insertions(+), 318 deletions(-) diff --git a/lib/opte/src/ddi/time.rs b/lib/opte/src/ddi/time.rs index f552ba73..e5d11ddc 100644 --- a/lib/opte/src/ddi/time.rs +++ b/lib/opte/src/ddi/time.rs @@ -6,6 +6,7 @@ //! Moments, periodics, etc. use core::ops::Add; +use core::sync::atomic::AtomicU64; use core::time::Duration; cfg_if! { @@ -15,6 +16,7 @@ cfg_if! { use illumos_sys_hdrs as ddi; } else { use std::time::Instant; + use std::sync::OnceLock; } } @@ -25,14 +27,19 @@ pub const NANOS: u64 = 1_000_000_000; /// The conversion from nanoseconds to milliseconds. pub const NANOS_TO_MILLIS: u64 = 1_000_000; +#[cfg(any(feature = "std", test))] +static FIRST_TS: OnceLock = OnceLock::new(); + /// A moment in time. #[derive(Clone, Copy, Debug)] pub struct Moment { #[cfg(all(not(feature = "std"), not(test)))] inner: ddi::hrtime_t, + // This is a duration masquerading as an instant -- this + // allows us to and from raw ns counts when needed on std. #[cfg(any(feature = "std", test))] - inner: Instant, + inner: Duration, } impl Add for Moment { @@ -62,7 +69,7 @@ impl Moment { if #[cfg(all(not(feature = "std"), not(test)))] { (self.inner as u64).saturating_sub(earlier.inner as u64) / NANOS_TO_MILLIS } else { - let delta = self.inner.saturating_duration_since(earlier.inner); + let delta = self.inner.saturating_sub(earlier.inner); delta.as_secs() * MILLIS + delta.subsec_millis() as u64 } } @@ -73,20 +80,36 @@ impl Moment { if #[cfg(all(not(feature = "std"), not(test)))] { Self { inner: unsafe { ddi::gethrtime() } } } else { - Self { inner: Instant::now() } + let first_ts = *FIRST_TS.get_or_init(|| Instant::now()); + Self { inner: Instant::now().saturating_duration_since(first_ts) } } } } - /// Return the underlying timestamp for debugging purposes - /// if supported on the current platform. - #[allow(dead_code)] - pub(crate) fn raw_millis(&self) -> Option { + /// Return the underlying timestamp for atomic storage or debugging, converted + /// to milliseconds. + pub(crate) fn raw_millis(&self) -> u64 { + self.raw() / NANOS_TO_MILLIS + } + + /// Return the underlying timestamp for atomic storage or debugging. + pub(crate) fn raw(&self) -> u64 { + cfg_if! { + if #[cfg(all(not(feature = "std"), not(test)))] { + self.inner as u64 + } else { + // Conversion here is truncating. + self.inner.as_nanos() as u64 + } + } + } + + pub(crate) fn from_raw_nanos(raw: u64) -> Self { cfg_if! { if #[cfg(all(not(feature = "std"), not(test)))] { - Some(self.inner as u64 / NANOS_TO_MILLIS) + Self { inner: raw as ddi::hrtime_t } } else { - None + Self { inner: Duration::from_nanos(raw) } } } } diff --git a/lib/opte/src/engine/flow_table.rs b/lib/opte/src/engine/flow_table.rs index 4bd53d27..758bd32c 100644 --- a/lib/opte/src/engine/flow_table.rs +++ b/lib/opte/src/engine/flow_table.rs @@ -12,13 +12,18 @@ use super::packet::InnerFlowId; use crate::ddi::time::Moment; use crate::ddi::time::MILLIS; +use crate::ddi::time::NANOS_TO_MILLIS; use alloc::boxed::Box; use alloc::collections::BTreeMap; use alloc::ffi::CString; use alloc::string::String; +use alloc::sync::Arc; use alloc::vec::Vec; use core::fmt; use core::num::NonZeroU32; +use core::sync::atomic::AtomicBool; +use core::sync::atomic::AtomicU64; +use core::sync::atomic::Ordering; #[cfg(all(not(feature = "std"), not(test)))] use illumos_sys_hdrs::uintptr_t; use opte_api::OpteError; @@ -80,12 +85,13 @@ pub struct FlowTable { name_c: CString, limit: NonZeroU32, policy: Box>, - map: BTreeMap>, + map: BTreeMap>>, } impl FlowTable where - S: Clone + fmt::Debug + Dump, + // S: Clone + fmt::Debug + Dump, + S: fmt::Debug + Dump, { /// Add a new entry to the flow table. /// @@ -101,16 +107,39 @@ where } let entry = FlowEntry::new(state); - self.map.insert(flow_id, entry); + self.map.insert(flow_id, entry.into()); Ok(()) } + /// Add a new entry to the flow table, returning a shared refrence to + /// the entry. + /// + /// # Errors + /// + /// If the table is at max capacity, an error is returned and no + /// modification is made to the table. + /// + /// If an entry already exists for this flow, it is overwritten. + pub fn add_and_return( + &mut self, + flow_id: InnerFlowId, + state: S, + ) -> Result>> { + if self.map.len() == self.limit.get() as usize { + return Err(OpteError::MaxCapacity(self.limit.get() as u64)); + } + + let entry = Arc::new(FlowEntry::new(state)); + self.map.insert(flow_id, entry.clone()); + Ok(entry) + } + /// Add a new entry to the flow table while eliding the capacity check. /// /// This is meant for table implementations that enforce their own limit. pub fn add_unchecked(&mut self, flow_id: InnerFlowId, state: S) { let entry = FlowEntry::new(state); - self.map.insert(flow_id, entry); + self.map.insert(flow_id, entry.into()); } // Clear all entries from the flow table. @@ -145,8 +174,8 @@ where port_c, name_c, flowid, - Some(entry.last_hit), - Some(now), + Some(entry.last_hit.load(Ordering::Relaxed)), + Some(now.raw_millis()), ); expired.push(f(entry.state())); return false; @@ -165,18 +194,18 @@ where /// Get a reference to the flow entry for a given flow, if one /// exists. - pub fn get(&mut self, flow_id: &InnerFlowId) -> Option<&FlowEntry> { + pub fn get(&self, flow_id: &InnerFlowId) -> Option<&Arc>> { self.map.get(flow_id) } /// Get a mutable reference to the flow entry for a given flow, if /// one exists. - pub fn get_mut( - &mut self, - flow_id: &InnerFlowId, - ) -> Option<&mut FlowEntry> { - self.map.get_mut(flow_id) - } + // pub fn get_mut( + // &mut self, + // flow_id: &InnerFlowId, + // ) -> Option<&mut FlowEntry> { + // self.map.get_mut(flow_id) + // } /// Mark all flow table entries as requiring revalidation after a /// reset or removal of rules. @@ -185,8 +214,8 @@ where /// will occupy flowtable space until they are denied or expire. As such /// this method should be used only when the original state (`S`) *must* /// be preserved to ensure correctness. - pub fn mark_dirty(&mut self) { - self.map.values_mut().for_each(|v| v.dirty = true); + pub fn mark_dirty(&self) { + self.map.values().for_each(|v| v.set_dirty()); } pub fn new( @@ -211,7 +240,7 @@ where self.map.len() as u32 } - pub fn remove(&mut self, flow: &InnerFlowId) -> Option> { + pub fn remove(&mut self, flow: &InnerFlowId) -> Option>> { self.map.remove(flow) } } @@ -221,8 +250,8 @@ fn flow_expired_probe( port: &CString, name: &CString, flowid: &InnerFlowId, - last_hit: Option, - now: Option, + last_hit: Option, + now: Option, ) { cfg_if! { if #[cfg(all(not(feature = "std"), not(test)))] { @@ -231,8 +260,8 @@ fn flow_expired_probe( port.as_ptr() as uintptr_t, name.as_ptr() as uintptr_t, flowid, - last_hit.and_then(|m| m.raw_millis()).unwrap_or_default() as usize, - now.and_then(|m| m.raw_millis()).unwrap_or_default() as usize, + last_hit.unwrap_or_default() as usize, + now.unwrap_or_default() as usize, ); } } else if #[cfg(feature = "usdt")] { @@ -240,7 +269,7 @@ fn flow_expired_probe( let port_s = port.to_str().unwrap(); let name_s = name.to_str().unwrap(); crate::opte_provider::flow__expired!( - || (port_s, name_s, flowid.to_string(), 0, 0) + || (port_s, name_s, flowid.to_string(), last_hit.unwrap_or_default(), now.unwrap_or_default()) ); } else { let (_, _, _) = (port, name, flowid); @@ -257,24 +286,27 @@ pub trait Dump { } /// The FlowEntry holds any arbitrary state type `S`. -#[derive(Clone, Debug)] +#[derive(Debug)] pub struct FlowEntry { state: S, /// Number of times this flow has been matched. - hits: u64, + hits: AtomicU64, /// This tracks the last time the flow was matched. - last_hit: Moment, + /// + /// These are raw u64s sourced from `Moment`, which track time + /// in nanoseconds. + last_hit: AtomicU64, /// Records whether this flow predates a rule change, and /// must rerun rule processing before `state` can be used. - dirty: bool, + dirty: AtomicBool, } impl FlowEntry { fn dump(&self) -> S::DumpVal { - self.state.dump(self.hits) + self.state.dump(self.hits.load(Ordering::Relaxed)) } pub fn state_mut(&mut self) -> &mut S { @@ -286,32 +318,54 @@ impl FlowEntry { } pub fn hits(&self) -> u64 { - self.hits + self.hits.load(Ordering::Relaxed) } - pub fn hit(&mut self) { - self.hits += 1; - self.last_hit = Moment::now(); + /// Increments this flow's hit counter and + pub fn hit(&self) { + self.hit_at(Moment::now()) + } + + /// Increments a flow's hit counter and sets th + /// + /// This is used to minimise calls to `gethrtime` in fastpath + /// operations. Callers *MUST* be certain that expiry logic for this flow + /// entry uses saturating comparisons, particularly if timestamps are + /// sourced before grabbing a lock / processing a packet / any other + /// long-running operation. **This is doubly true if you are not holding + /// the port lock.** + pub(crate) fn hit_at(&self, now: Moment) { + self.hits.fetch_add(1, Ordering::Relaxed); + self.last_hit.store(now.raw(), Ordering::Relaxed); } pub fn is_dirty(&self) -> bool { - self.dirty + self.dirty.load(Ordering::Relaxed) } - pub fn mark_clean(&mut self) { - self.dirty = false + pub fn set_dirty(&self) { + self.dirty.store(true, Ordering::Relaxed) } - pub fn last_hit(&self) -> &Moment { - &self.last_hit + pub fn mark_clean(&self) { + self.dirty.store(false, Ordering::Relaxed) + } + + pub fn last_hit(&self) -> Moment { + Moment::from_raw_nanos(self.last_hit.load(Ordering::Relaxed)) } fn is_expired(&self, now: Moment, ttl: Ttl) -> bool { - ttl.is_expired(self.last_hit, now) + ttl.is_expired(self.last_hit(), now) } fn new(state: S) -> Self { - FlowEntry { state, hits: 0, last_hit: Moment::now(), dirty: false } + FlowEntry { + state, + hits: 0.into(), + last_hit: Moment::now().raw().into(), + dirty: false.into(), + } } } diff --git a/lib/opte/src/engine/layer.rs b/lib/opte/src/engine/layer.rs index c9199b3b..9ee46e3b 100644 --- a/lib/opte/src/engine/layer.rs +++ b/lib/opte/src/engine/layer.rs @@ -243,8 +243,8 @@ impl LayerFlowTable { self.count = self.ft_out.num_flows(); } - fn get_in(&mut self, flow: &InnerFlowId) -> EntryState { - match self.ft_in.get_mut(flow) { + fn get_in(&self, flow: &InnerFlowId) -> EntryState { + match self.ft_in.get(flow) { Some(entry) => { entry.hit(); if entry.is_dirty() { @@ -258,8 +258,8 @@ impl LayerFlowTable { } } - fn get_out(&mut self, flow: &InnerFlowId) -> EntryState { - match self.ft_out.get_mut(flow) { + fn get_out(&self, flow: &InnerFlowId) -> EntryState { + match self.ft_out.get(flow) { Some(entry) => { entry.hit(); let action = entry.state().action_desc.clone(); @@ -277,27 +277,27 @@ impl LayerFlowTable { fn remove_in( &mut self, flow: &InnerFlowId, - ) -> Option> { + ) -> Option>> { self.ft_in.remove(flow) } fn remove_out( &mut self, flow: &InnerFlowId, - ) -> Option> { + ) -> Option>> { self.ft_out.remove(flow) } fn mark_clean(&mut self, dir: Direction, flow: &InnerFlowId) { match dir { Direction::In => { - let entry = self.ft_in.get_mut(flow); + let entry = self.ft_in.get(flow); if let Some(entry) = entry { entry.mark_clean(); } } Direction::Out => { - let entry = self.ft_out.get_mut(flow); + let entry = self.ft_out.get(flow); if let Some(entry) = entry { entry.mark_clean(); } diff --git a/lib/opte/src/engine/port.rs b/lib/opte/src/engine/port.rs index 5034a690..573c989f 100644 --- a/lib/opte/src/engine/port.rs +++ b/lib/opte/src/engine/port.rs @@ -24,6 +24,7 @@ use super::ingot_base::L3Repr; use super::ingot_packet::MsgBlk; use super::ingot_packet::MsgBlkIterMut; use super::ingot_packet::Packet2; +use super::ingot_packet::PacketHeaders2; use super::ingot_packet::Parsed2; use super::ingot_packet::ParsedMblk; use super::ingot_packet::ParsedStage1; @@ -91,6 +92,7 @@ use core::sync::atomic::Ordering::SeqCst; #[cfg(all(not(feature = "std"), not(test)))] use illumos_sys_hdrs::uintptr_t; use ingot::geneve::Geneve; +use ingot::tcp::TcpRef; use ingot::types::Emit; use ingot::types::HeaderLen; use ingot::types::Read; @@ -99,6 +101,7 @@ use kstat_macro::KStatProvider; use opte_api::Direction; use opte_api::MacAddr; use opte_api::OpteError; +use zerocopy::ByteSlice; use zerocopy::ByteSliceMut; pub type Result = result::Result; @@ -534,6 +537,10 @@ pub struct UftEntry { /// The port epoch upon which this entry was established. Used for /// invalidation when the rule set is updated. epoch: u64, + + /// Cached reference to a flow's TCP state, if applicable. + /// This allows us to maintain up-to-date TCP flow table info + tcp_flow: Option>>, } impl Dump for UftEntry { @@ -1203,6 +1210,7 @@ impl Port { where M: LightweightMeta< as Read>::Chunk>, { + let process_start = Moment::now(); let flow_before = pkt.flow(); // Packet processing is split into a few mechanisms based on @@ -1237,9 +1245,10 @@ impl Port { // TODO: fixup types here. // self.port_process_entry_probe(dir, &flow_before, epoch, &pkt); - let mut uft: Option<&mut FlowEntry>> = match dir { - Direction::Out => data.uft_out.get_mut(&flow_before), - Direction::In => data.uft_in.get_mut(&flow_before), + let mut uft: Option<&Arc>>> = match dir + { + Direction::Out => data.uft_out.get(&flow_before), + Direction::In => data.uft_in.get(&flow_before), }; enum FastPathDecision { @@ -1248,23 +1257,31 @@ impl Port { Slow, } + // enum FastPathDecision { + // CompiledUft { tx: Arc>>, l4_hash: u32 }, + // Uft { tx: Arc>>, l4_hash: u32 }, + // Slow, + // } + let decision = match uft { // We have a valid UFT entry of some kind -- clone out the // saved transforms so that we can drop the lock ASAP. Some(entry) if entry.state().epoch == epoch => { entry.hit(); - let now = *entry.last_hit(); + let now = entry.last_hit(); // The Fast Path. let xforms = &entry.state().xforms; let out = if let Some(compiled) = xforms.compiled.as_ref() { FastPathDecision::CompiledUft { tx: Arc::clone(compiled), + // tx: Arc::clone(entry), l4_hash: entry.state().l4_hash, } } else { FastPathDecision::Uft { tx: Arc::clone(xforms), + // tx: Arc::clone(entry), l4_hash: entry.state().l4_hash, } }; @@ -1555,7 +1572,7 @@ impl Port { .lock() .tcp_flows .get(flow) - .map(|entry| entry.state().tcp_state.tcp_state()) + .map(|entry| entry.state().tcp_state()) } } @@ -1563,16 +1580,7 @@ impl Port { #[derive(Debug)] enum TcpMaybeClosed { Closed { ufid_inbound: Option }, - NewState(TcpState), -} - -impl From for TcpState { - fn from(value: TcpMaybeClosed) -> Self { - match value { - TcpMaybeClosed::Closed { .. } => TcpState::Closed, - TcpMaybeClosed::NewState(s) => s, - } - } + NewState(TcpState, Arc>), } pub enum ThinProcRes { @@ -1996,13 +2004,13 @@ impl Port { /// * `OpteError::MaxCapacity(_)` if the TCP flows table is full. /// * `ProcessError::TcpFlow(_)` if we do not have a valid transition from /// `Closed` based on the packet state. - fn create_new_tcp_entry( + fn create_new_tcp_entry( &self, tcp_flows: &mut FlowTable, - tcp: &TcpMeta, + tcp: &impl TcpRef, dir: &TcpDirection, pkt_len: u64, - ) -> result::Result { + ) -> result::Result { // Create a new entry and find its current state. In // this case it should always be `SynSent`, unless we're // recovering an `Established` flow. @@ -2033,21 +2041,23 @@ impl Port { (ufid_out, TcpFlowEntryState::new_outbound(tfs, pkt_len)) } }; - match tcp_flows.add(*ufid_out, tfes) { - Ok(_) => {} + match tcp_flows.add_and_return(*ufid_out, tfes) { + Ok(entry) => Ok(TcpMaybeClosed::NewState(tcp_state, entry)), Err(OpteError::MaxCapacity(limit)) => { - return Err(ProcessError::FlowTableFull { - kind: "TCP", - limit, - }); + Err(ProcessError::FlowTableFull { kind: "TCP", limit }) } Err(_) => unreachable!( "Cannot return other errors from FlowTable::add" ), - }; + } + } else { + Ok(TcpMaybeClosed::Closed { + ufid_inbound: match *dir { + TcpDirection::In { ufid_in, .. } => Some(*ufid_in), + TcpDirection::Out { .. } => None, + }, + }) } - - Ok(tcp_state) } /// Attempts to lookup and update TCP flowstate in response to a given @@ -2065,10 +2075,10 @@ impl Port { /// (e.g. `process_out_tcp_existing`) should respond to `NewFlow` by creating /// a new TCP flow table entry. Where possible, this should be done by treating /// a packet as a UFT miss (e.g., `process_out_miss`) and reprocessing the flow. - fn update_tcp_entry( + fn update_tcp_entry( &self, mut data: PortDataOrSubset, - tcp: &TcpMeta, + tcp: &impl TcpRef, dir: &TcpDirection, pkt_len: u64, ) -> result::Result { @@ -2078,12 +2088,16 @@ impl Port { TcpDirection::Out { ufid_out } => (ufid_out, None), }; - let Some(entry) = tcp_flows.get_mut(ufid_out) else { + let Some(entry) = tcp_flows.get(ufid_out) else { return Err(ProcessError::MissingFlow(*ufid_out)); }; + let entry = entry.clone(); + // TODO: need to hit this from a UFT entry. + // Work out atomics shortly... entry.hit(); - let tfes = entry.state_mut(); + let tfes_base = entry.state(); + let mut tfes = tfes_base.inner.lock(); match *dir { TcpDirection::In { .. } => { tfes.segs_in += 1; @@ -2110,6 +2124,8 @@ impl Port { tfes.inbound_ufid = Some(*ufid_in); } + drop(tfes); + let ufid_inbound = if matches!( next_state, Ok(TcpState::Closed) | Err(TcpFlowStateError::NewFlow { .. }) @@ -2117,7 +2133,8 @@ impl Port { // Due to order of operations, out_tcp_existing must // call uft_tcp_closed separately. let entry = tcp_flows.remove(ufid_out).unwrap(); - let state_ufid = entry.state().inbound_ufid; + let lock = entry.state().inner.lock(); + let state_ufid = lock.inbound_ufid; if let PortDataOrSubset::Port(data) = data { // The inbound side of the UFT is based on @@ -2146,7 +2163,7 @@ impl Port { Ok(match next_state { TcpState::Closed => TcpMaybeClosed::Closed { ufid_inbound }, - a => TcpMaybeClosed::NewState(a), + a => TcpMaybeClosed::NewState(a, entry), }) } @@ -2155,10 +2172,10 @@ impl Port { fn process_in_tcp( &self, data: &mut PortData, - pmeta: &PacketMeta, + pmeta: &PacketHeaders2, ufid_in: &InnerFlowId, pkt_len: u64, - ) -> result::Result { + ) -> result::Result { // All TCP flows are keyed with respect to the outbound Flow // ID, therefore we mirror the flow. This value must represent // the guest-side of the flow and thus come from the passed-in @@ -2185,15 +2202,12 @@ impl Port { e @ Err( ProcessError::TcpFlow(TcpFlowStateError::NewFlow { .. }) | ProcessError::MissingFlow(_), - ) => { - self.create_new_tcp_entry( - &mut data.tcp_flows, - tcp, - &dir, - pkt_len, - )?; - e.map(Into::into) - } + ) => self.create_new_tcp_entry( + &mut data.tcp_flows, + tcp, + &dir, + pkt_len, + ), Ok(v) => Ok(v.into()), Err(e) => Err(e), } @@ -2244,22 +2258,25 @@ impl Port { } let ufid_out = pkt.flow().mirror(); - let hte = UftEntry { + let mut hte = UftEntry { pair: Some(ufid_out), xforms: xforms.compile(pkt.checksums_dirty()), epoch, l4_hash: ufid_in.crc32(), + tcp_flow: None, }; // Keep around the comment on the `None` arm #[allow(clippy::single_match)] - match data.uft_out.get_mut(&ufid_out) { + match data.uft_out.get(&ufid_out) { // If an outbound packet has already created an outbound // UFT entry, make sure to pair it to this inbound entry. Some(out_entry) => { // Remember, the inbound UFID is the flow as seen by // the network, before any processing is done by OPTE. - out_entry.state_mut().pair = Some(*ufid_in); + + // TODO(kyle) + // out_entry.state().pair = Some(*ufid_in); } // Ideally we would simulate the outbound flow if no @@ -2276,73 +2293,68 @@ impl Port { // For inbound traffic the TCP flow table must be // checked _after_ processing take place. - // TODO: uncork - // if pkt.meta().is_inner_tcp() { - // match self.process_in_tcp( - // data, - // pkt.meta(), - // ufid_in, - // pkt.len() as u64, - // ) { - // Ok(TcpState::Closed) => Ok(InternalProcessResult::Modified { transform: todo!(), tcp_state: todo!() }), - - // // Found existing TCP flow, or have just created a new one. - // Ok(_) - // | Err(ProcessError::TcpFlow(TcpFlowStateError::NewFlow { - // .. - // })) - // | Err(ProcessError::MissingFlow(_)) => { - // // We have a good TCP flow, create a new UFT entry. - // match data.uft_in.add(*ufid_in, hte) { - // Ok(_) => Ok(InternalProcessResult::Modified { transform: todo!(), tcp_state: todo!() }), - // Err(OpteError::MaxCapacity(limit)) => { - // Err(ProcessError::FlowTableFull { - // kind: "UFT", - // limit, - // }) - // } - // Err(_) => unreachable!( - // "Cannot return other errors from FlowTable::add" - // ), - // } - // } - - // // Unlike for existing flows, we don't allow through - // // unexpected packets here for now -- the `TcpState` FSM - // // already encodes a shortcut from `Closed` to `Established. - // Err(ProcessError::TcpFlow(err)) => { - // let e = format!("{err}"); - // self.tcp_err(&data.tcp_flows, Direction::In, e, pkt); - // Ok(InternalProcessResult::Drop { reason: DropReason::TcpErr }) - // } - // Err(ProcessError::FlowTableFull { kind, limit }) => { - // let e = format!("{kind} flow table full ({limit} entries)"); - // self.tcp_err(&data.tcp_flows, Direction::In, e, pkt); - // Ok(InternalProcessResult::Drop { reason: DropReason::TcpErr }) - // } - // res => unreachable!( - // "Cannot return other errors from \ - // process_in_tcp, returned: {res:?}" - // ), - // } - // } else { - // match data.uft_in.add(*ufid_in, hte) { - // Ok(_) => Ok(InternalProcessResult::Modified{ transform: todo!(), tcp_state: todo!() }), - // Err(OpteError::MaxCapacity(limit)) => { - // Err(ProcessError::FlowTableFull { kind: "UFT", limit }) - // } - // Err(_) => unreachable!( - // "Cannot return other errors from FlowTable::add" - // ), - // } - // } - match data.uft_in.add(*ufid_in, hte) { - Ok(_) => Ok(InternalProcessResult::Modified), - Err(OpteError::MaxCapacity(limit)) => { - Err(ProcessError::FlowTableFull { kind: "UFT", limit }) + if pkt.meta().is_inner_tcp() { + match self.process_in_tcp( + data, + pkt.meta(), + ufid_in, + pkt.len() as u64, + ) { + Ok(TcpMaybeClosed::Closed { .. }) => { + Ok(InternalProcessResult::Modified) + } + + // Found existing TCP flow, or have just created a new one. + Ok(TcpMaybeClosed::NewState(_, flow)) => { + // We have a good TCP flow, create a new UFT entry. + hte.tcp_flow = Some(flow); + match data.uft_in.add(*ufid_in, hte) { + Ok(_) => Ok(InternalProcessResult::Modified), + Err(OpteError::MaxCapacity(limit)) => { + Err(ProcessError::FlowTableFull { + kind: "UFT", + limit, + }) + } + Err(_) => unreachable!( + "Cannot return other errors from FlowTable::add" + ), + } + } + + // Unlike for existing flows, we don't allow through + // unexpected packets here for now -- the `TcpState` FSM + // already encodes a shortcut from `Closed` to `Established. + Err(ProcessError::TcpFlow(err)) => { + let e = format!("{err}"); + // TODO(kyle) + // self.tcp_err(&data.tcp_flows, Direction::In, e, pkt); + Ok(InternalProcessResult::Drop { + reason: DropReason::TcpErr, + }) + } + Err(ProcessError::FlowTableFull { kind, limit }) => { + let e = format!("{kind} flow table full ({limit} entries)"); + // TODO(kyle) + // self.tcp_err(&data.tcp_flows, Direction::In, e, pkt); + Ok(InternalProcessResult::Drop { + reason: DropReason::TcpErr, + }) + } + res => unreachable!( + "Cannot return other errors from \ + process_in_tcp, returned: {res:?}" + ), } - Err(_) => { - unreachable!("Cannot return other errors from FlowTable::add") + } else { + match data.uft_in.add(*ufid_in, hte) { + Ok(_) => Ok(InternalProcessResult::Modified), + Err(OpteError::MaxCapacity(limit)) => { + Err(ProcessError::FlowTableFull { kind: "UFT", limit }) + } + Err(_) => unreachable!( + "Cannot return other errors from FlowTable::add" + ), } } } @@ -2363,7 +2375,7 @@ impl Port { self.name_cstr.as_ptr() as uintptr_t, ufid, epoch as uintptr_t, - last_hit.raw_millis().unwrap_or_default() as usize + last_hit.raw_millis() as usize ); } } else if #[cfg(feature = "usdt")] { @@ -2391,7 +2403,7 @@ impl Port { // Use the compiled UFT entry if one exists. Otherwise // fallback to layer processing. - match data.uft_in.get_mut(ufid_in) { + match data.uft_in.get(ufid_in) { Some(entry) if entry.state().epoch == epoch => { // TODO At the moment I'm holding the UFT locks not // just for lookup, but for the entire duration of @@ -2400,7 +2412,7 @@ impl Port { // for lookup. entry.hit(); data.stats.vals.in_uft_hit += 1; - self.uft_hit_probe(In, pkt.flow(), epoch, entry.last_hit()); + self.uft_hit_probe(In, pkt.flow(), epoch, &entry.last_hit()); let transform = Some(Arc::clone(&entry.state().xforms)); pkt.set_l4_hash(entry.state().l4_hash); @@ -2524,7 +2536,7 @@ impl Port { &self, tcp_flows: &mut FlowTable, ufid_out: &InnerFlowId, - pmeta: &PacketMeta, + pmeta: &PacketHeaders2, pkt_len: u64, ) -> result::Result { let tcp = pmeta.inner_tcp().unwrap(); @@ -2542,7 +2554,7 @@ impl Port { &self, data: &mut PortData, ufid_out: &InnerFlowId, - pmeta: &PacketMeta, + pmeta: &PacketHeaders2, pkt_len: u64, ) -> result::Result { let tcp = pmeta.inner_tcp().unwrap(); @@ -2557,20 +2569,12 @@ impl Port { Err( ProcessError::TcpFlow(TcpFlowStateError::NewFlow { .. }) | ProcessError::MissingFlow(_), - ) => match self.create_new_tcp_entry( + ) => self.create_new_tcp_entry( &mut data.tcp_flows, tcp, &dir, pkt_len, - ) { - // Note: don't need to remove on this case, as create_new_tcp_entry - // will only insert to the map if state != Closed. - Ok(TcpState::Closed) => { - Ok(TcpMaybeClosed::Closed { ufid_inbound: None }) - } - Ok(a) => Ok(TcpMaybeClosed::NewState(a)), - Err(e) => Err(e), - }, + ), other => other, } } @@ -2590,54 +2594,60 @@ impl Port { // For outbound traffic the TCP flow table must be checked // _before_ processing take place. // TODO: uncork - // if pkt.meta().is_inner_tcp() { - // match self.process_out_tcp_new( - // data, - // pkt.flow(), - // pkt.meta(), - // pkt.len() as u64, - // ) { - // Ok(TcpMaybeClosed::Closed { ufid_inbound }) => { - // tcp_closed = true; - // self.uft_tcp_closed( - // data, - // pkt.flow(), - // ufid_inbound.as_ref(), - // ); - // } - - // // Continue with processing. - // Ok(_) => (), - - // // Unlike for existing flows, we don't allow through - // // unexpected packets here for now -- the `TcpState` FSM - // // already encodes a shortcut from `Closed` to `Established. - // Err(ProcessError::TcpFlow(err)) => { - // let e = format!("{err}"); - // self.tcp_err(&data.tcp_flows, Out, e, pkt); - // return Ok(InternalProcessResult::Drop { - // reason: DropReason::TcpErr, - // }); - // } - // Err(ProcessError::MissingFlow(flow_id)) => { - // let e = format!("Missing TCP flow ID: {flow_id}"); - // self.tcp_err(&data.tcp_flows, Direction::In, e, pkt); - // return Ok(InternalProcessResult::Drop { - // reason: DropReason::TcpErr, - // }); - // } - // Err(ProcessError::FlowTableFull { kind, limit }) => { - // let e = format!("{kind} flow table full ({limit} entries)"); - // self.tcp_err(&data.tcp_flows, Direction::In, e, pkt); - // return Ok(InternalProcessResult::Drop { - // reason: DropReason::TcpErr, - // }); - // } - // res => unreachable!( - // "Cannot return other errors from process_in_tcp_new, returned: {res:?}" - // ), - // } - // } + let tcp_flow = if pkt.meta().is_inner_tcp() { + match self.process_out_tcp_new( + data, + pkt.flow(), + pkt.meta(), + pkt.len() as u64, + ) { + Ok(TcpMaybeClosed::Closed { ufid_inbound }) => { + tcp_closed = true; + self.uft_tcp_closed( + data, + pkt.flow(), + ufid_inbound.as_ref(), + ); + None + } + + // Continue with processing. + Ok(TcpMaybeClosed::NewState (_, flow)) => Some(flow), + + // Unlike for existing flows, we don't allow through + // unexpected packets here for now -- the `TcpState` FSM + // already encodes a shortcut from `Closed` to `Established. + Err(ProcessError::TcpFlow(err)) => { + let e = format!("{err}"); + // TODO(kyle) + // self.tcp_err(&data.tcp_flows, Out, e, pkt); + return Ok(InternalProcessResult::Drop { + reason: DropReason::TcpErr, + }); + } + Err(ProcessError::MissingFlow(flow_id)) => { + let e = format!("Missing TCP flow ID: {flow_id}"); + // TODO(kyle) + // self.tcp_err(&data.tcp_flows, Direction::In, e, pkt); + return Ok(InternalProcessResult::Drop { + reason: DropReason::TcpErr, + }); + } + Err(ProcessError::FlowTableFull { kind, limit }) => { + let e = format!("{kind} flow table full ({limit} entries)"); + // TODO(kyle) + // self.tcp_err(&data.tcp_flows, Direction::In, e, pkt); + return Ok(InternalProcessResult::Drop { + reason: DropReason::TcpErr, + }); + } + res => unreachable!( + "Cannot return other errors from process_in_tcp_new, returned: {res:?}" + ), + } + } else { + None + }; let mut xforms = Transforms::new(); let flow_before = *pkt.flow(); @@ -2648,6 +2658,7 @@ impl Port { xforms: xforms.compile(pkt.checksums_dirty()), epoch, l4_hash: flow_before.crc32(), + tcp_flow, }; match res { @@ -2699,11 +2710,11 @@ impl Port { // Use the compiled UFT entry if one exists. Otherwise // fallback to layer processing. - match uft_out.get_mut(&pkt.flow()) { + match uft_out.get(&pkt.flow()) { Some(entry) if entry.state().epoch == epoch => { entry.hit(); data.stats.vals.out_uft_hit += 1; - self.uft_hit_probe(Out, pkt.flow(), epoch, entry.last_hit()); + self.uft_hit_probe(Out, pkt.flow(), epoch, &entry.last_hit()); let mut invalidated = false; let mut reprocess = false; @@ -2733,12 +2744,13 @@ impl Port { // )) => { // invalidated = true; // reprocess = true; - // self.tcp_err( - // &data.tcp_flows, - // Out, - // e.to_string(), - // pkt, - // ); + // // TODO(kyle) + // // self.tcp_err( + // // &data.tcp_flows, + // // Out, + // // e.to_string(), + // // pkt, + // // ); // } // Err(ProcessError::MissingFlow(flow_id)) => { @@ -2748,12 +2760,13 @@ impl Port { // invalidated = true; // reprocess = true; // let e = format!("Missing TCP flow ID: {flow_id}"); - // self.tcp_err( - // &data.tcp_flows, - // Direction::In, - // e, - // pkt, - // ); + // // TODO(kyle) + // // self.tcp_err( + // // &data.tcp_flows, + // // Direction::In, + // // e, + // // pkt, + // // ); // } // Err(ProcessError::TcpFlow( @@ -2761,12 +2774,13 @@ impl Port { // )) => { // // Technically unreachable, as we filter these out in `update_tcp_entry`. // // Panicking here would probably be overly fragile, however. - // self.tcp_err( - // &data.tcp_flows, - // Direction::In, - // e.to_string(), - // pkt, - // ); + // // TODO(kyle) + // // self.tcp_err( + // // &data.tcp_flows, + // // Direction::In, + // // e.to_string(), + // // pkt, + // // ); // return Ok(ProcessResult::Drop { // reason: DropReason::TcpErr, // }); @@ -3069,7 +3083,7 @@ pub enum Pos { /// An entry in the TCP flow table. #[derive(Clone, Debug)] -pub struct TcpFlowEntryState { +pub struct TcpFlowEntryStateInner { // This must be the UFID of inbound traffic _as it arrives_ from // the network, not after it's processed. inbound_ufid: Option, @@ -3080,6 +3094,10 @@ pub struct TcpFlowEntryState { bytes_out: u64, } +pub struct TcpFlowEntryState { + inner: KMutex, +} + impl TcpFlowEntryState { fn new_inbound( inbound_ufid: InnerFlowId, @@ -3087,28 +3105,52 @@ impl TcpFlowEntryState { bytes_in: u64, ) -> Self { Self { - inbound_ufid: Some(inbound_ufid), - tcp_state, - segs_in: 1, - segs_out: 0, - bytes_in, - bytes_out: 0, + inner: KMutex::new( + TcpFlowEntryStateInner { + inbound_ufid: Some(inbound_ufid), + tcp_state, + segs_in: 1, + segs_out: 0, + bytes_in, + bytes_out: 0, + }, + KMutexType::Spin, + ) + .into(), } } fn new_outbound(tcp_state: TcpFlowState, bytes_out: u64) -> Self { Self { - inbound_ufid: None, - tcp_state, - segs_in: 0, - segs_out: 1, - bytes_in: 0, - bytes_out, + inner: KMutex::new( + TcpFlowEntryStateInner { + inbound_ufid: None, + tcp_state, + segs_in: 0, + segs_out: 1, + bytes_in: 0, + bytes_out, + }, + KMutexType::Spin, + ) + .into(), } } + + fn tcp_state(&self) -> TcpState { + let lock = self.inner.lock(); + lock.tcp_state.tcp_state() + } } -impl Display for TcpFlowEntryState { +impl core::fmt::Debug for TcpFlowEntryState { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let inner = self.inner.lock(); + core::fmt::Debug::fmt(&*inner, f) + } +} + +impl Display for TcpFlowEntryStateInner { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match &self.inbound_ufid { None => write!(f, "None {}", self.tcp_state), @@ -3117,7 +3159,14 @@ impl Display for TcpFlowEntryState { } } -impl Dump for TcpFlowEntryState { +impl Display for TcpFlowEntryState { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let inner = self.inner.lock(); + Display::fmt(&*inner, f) + } +} + +impl Dump for TcpFlowEntryStateInner { type DumpVal = TcpFlowEntryDump; fn dump(&self, hits: u64) -> TcpFlowEntryDump { @@ -3133,6 +3182,15 @@ impl Dump for TcpFlowEntryState { } } +impl Dump for TcpFlowEntryState { + type DumpVal = TcpFlowEntryDump; + + fn dump(&self, hits: u64) -> TcpFlowEntryDump { + let inner = self.inner.lock(); + inner.dump(hits) + } +} + /// Expiry behaviour for TCP flows dependent on the connection FSM. #[derive(Debug)] pub struct TcpExpiry { @@ -3155,11 +3213,11 @@ impl ExpiryPolicy for TcpExpiry { entry: &FlowEntry, now: Moment, ) -> bool { - let ttl = match entry.state().tcp_state.tcp_state() { + let ttl = match entry.state().tcp_state() { TcpState::TimeWait => self.time_wait_ttl, _ => self.keepalive_ttl, }; - ttl.is_expired(*entry.last_hit(), now) + ttl.is_expired(entry.last_hit(), now) } } diff --git a/lib/opte/src/engine/tcp_state.rs b/lib/opte/src/engine/tcp_state.rs index 62423648..eb3bc441 100644 --- a/lib/opte/src/engine/tcp_state.rs +++ b/lib/opte/src/engine/tcp_state.rs @@ -15,7 +15,10 @@ use core::fmt; use core::fmt::Display; #[cfg(all(not(feature = "std"), not(test)))] use illumos_sys_hdrs::uintptr_t; +use ingot::tcp::TcpFlags as IngotTcpFlags; +use ingot::tcp::TcpRef; use opte_api::Direction; +use zerocopy::ByteSlice; /// An error processing a TCP flow. #[derive(Clone, Copy, Debug, PartialEq)] @@ -135,10 +138,14 @@ impl TcpFlowState { /// `return None` and replace them with a single `None` value at /// the end of the function; but the author finds it useful to be /// explicit for each case. - fn flow_in(&mut self, tcp: &TcpMeta) -> Option { + fn flow_in( + &mut self, + flags: IngotTcpFlags, + tcp_ack: u32, + ) -> Option { use TcpState::*; - if tcp.has_flag(TcpFlags::RST) { + if flags.contains(IngotTcpFlags::RST) { return Some(Closed); } @@ -147,7 +154,7 @@ impl TcpFlowState { // We have a new inbound SYN. We assume for now the // guest is listening on the given port by moving to // the LISTEN state. - if tcp.has_flag(TcpFlags::SYN) { + if flags.contains(IngotTcpFlags::SYN) { return Some(Listen); } @@ -160,7 +167,7 @@ impl TcpFlowState { // respond with an ACK or RST. In the future we could // instead keep this in some type of probationary // state (or separate table). - if tcp.has_flag(TcpFlags::ACK) { + if flags.contains(IngotTcpFlags::ACK) { return Some(Established); } @@ -171,7 +178,7 @@ impl TcpFlowState { // If the guest doesn't respond to the first SYN, or // the sender never sees the guest's ACK, then the // sender may send more SYNs. - if tcp.has_flag(TcpFlags::SYN) { + if flags.contains(IngotTcpFlags::SYN) { return Some(Listen); } @@ -181,7 +188,7 @@ impl TcpFlowState { // The guest is in active open and waiting for the // remote's SYN+ACK. SynSent => { - if tcp.has_flag(TcpFlags::SYN) && tcp.has_flag(TcpFlags::ACK) { + if flags.contains(IngotTcpFlags::SYN | IngotTcpFlags::ACK) { Some(Established) } else { // Could be simultaneous open, but not worrying @@ -193,14 +200,14 @@ impl TcpFlowState { // The guest is in passive open and waiting for the // remote's ACK. SynRcvd => { - if tcp.has_flag(TcpFlags::ACK) { + if flags.contains(IngotTcpFlags::ACK) { return Some(Established); } // In this case the client is retransmitting its SYN; // probably because the guest's SYN+ACK reply got lost // or stuck in a buffer somewhere. - if tcp.has_flag(TcpFlags::SYN) { + if flags.contains(IngotTcpFlags::SYN) { return Some(SynRcvd); } @@ -210,13 +217,13 @@ impl TcpFlowState { } Established => { - if tcp.has_flag(TcpFlags::FIN) { + if flags.contains(IngotTcpFlags::FIN) { // In this case remote end has initiated the close // and the guest is entering passive close. return Some(CloseWait); } - if tcp.has_flag(TcpFlags::SYN) { + if flags.contains(IngotTcpFlags::SYN) { // We may have gotten stuck in `Established` due to // a delayed FIN+ACK/ACK at connection close, or // unexpected OS reset/panic. @@ -236,7 +243,7 @@ impl TcpFlowState { // // We could also see an ACK for previous data sent // from the guest. - if tcp.has_flag(TcpFlags::FIN) || tcp.has_flag(TcpFlags::ACK) { + if flags.intersects(IngotTcpFlags::FIN | IngotTcpFlags::ACK) { return Some(CloseWait); } @@ -255,8 +262,8 @@ impl TcpFlowState { // 2. We are seeing an ACK from the remote for a // previous data segment. Pass it up to the guest // so it can log the duplicate ACK. - if tcp.has_flag(TcpFlags::ACK) { - if tcp.ack == self.guest_seq.unwrap() + 1 { + if flags.contains(IngotTcpFlags::ACK) { + if tcp_ack == self.guest_seq.unwrap() + 1 { return Some(Closed); } @@ -273,22 +280,22 @@ impl TcpFlowState { // at this point. // // TODO Verify ack number. - if tcp.has_flag(TcpFlags::FIN) && tcp.has_flag(TcpFlags::ACK) { + if flags.contains(IngotTcpFlags::FIN | IngotTcpFlags::ACK) { return Some(TimeWait); } // The remote sent its ACK for out active FIN. We now // need to wait for the remote to passive close and // send its FIN. - if tcp.has_flag(TcpFlags::ACK) - && tcp.ack == self.guest_seq.unwrap() + 1 + if flags.contains(IngotTcpFlags::ACK) + && tcp_ack == self.guest_seq.unwrap() + 1 { return Some(FinWait2); } // Presumably an ACK for some previous data. Let the // guest decide. - if tcp.has_flag(TcpFlags::ACK) { + if flags.contains(IngotTcpFlags::ACK) { return Some(FinWait1); } @@ -298,8 +305,8 @@ impl TcpFlowState { // The guest is in active close. FinWait2 => { - if tcp.has_flag(TcpFlags::FIN) - && tcp.ack == self.guest_seq.unwrap() + 1 + if flags.contains(IngotTcpFlags::FIN) + && tcp_ack == self.guest_seq.unwrap() + 1 { // In this case the guest was the active closer, // has sent its FIN, and has seen an ACK for that @@ -309,7 +316,7 @@ impl TcpFlowState { return Some(TimeWait); } - if tcp.has_flag(TcpFlags::ACK) { + if flags.contains(IngotTcpFlags::ACK) { return Some(FinWait2); } @@ -320,7 +327,7 @@ impl TcpFlowState { TimeWait => { // The guest is receiving additional copies of FIN for // remote's passive close. - if tcp.has_flag(TcpFlags::FIN) { + if flags.contains(IngotTcpFlags::FIN) { return Some(TimeWait); } @@ -328,7 +335,7 @@ impl TcpFlowState { // so I'm not sure why we would get an ACK in the // TIME_WAIT state. But for now I allow it to make // progress. - if tcp.has_flag(TcpFlags::ACK) { + if flags.contains(IngotTcpFlags::ACK) { return Some(TimeWait); } @@ -341,10 +348,10 @@ impl TcpFlowState { /// `return None` and replace them with a single `None` value at /// the end of the function; but the author finds it useful to be /// explicit for each case. - fn flow_out(&mut self, tcp: &TcpMeta) -> Option { + fn flow_out(&mut self, flags: IngotTcpFlags) -> Option { use TcpState::*; - if tcp.has_flag(TcpFlags::RST) { + if flags.contains(IngotTcpFlags::RST) { return Some(Closed); } @@ -352,13 +359,13 @@ impl TcpFlowState { Closed => { // The guest is trying to create a new outbound // connection. - if tcp.has_flag(TcpFlags::SYN) { + if flags.contains(IngotTcpFlags::SYN) { return Some(SynSent); } // The guest is responding to a data segment, // immediately move to established. - if tcp.has_flag(TcpFlags::ACK) { + if flags.contains(IngotTcpFlags::ACK) { return Some(Established); } @@ -369,7 +376,7 @@ impl TcpFlowState { // In this case the guest process is responding to the // remote client with SYN+ACK. Listen => { - if tcp.has_flag(TcpFlags::SYN) && tcp.has_flag(TcpFlags::ACK) { + if flags.contains(IngotTcpFlags::SYN | IngotTcpFlags::ACK) { return Some(SynRcvd); } @@ -378,7 +385,7 @@ impl TcpFlowState { SynSent => { // In this case we are retransmitting the SYN packet. - if tcp.has_flag(TcpFlags::SYN) { + if flags.contains(IngotTcpFlags::SYN) { return Some(SynSent); } @@ -388,7 +395,7 @@ impl TcpFlowState { SynRcvd => { // In this case the guest is retransmitting the // SYN+ACK from its SYN_RCVD state. - if tcp.has_flag(TcpFlags::SYN) && tcp.has_flag(TcpFlags::ACK) { + if flags.contains(IngotTcpFlags::SYN | IngotTcpFlags::ACK) { return Some(SynRcvd); } @@ -397,11 +404,11 @@ impl TcpFlowState { // TODO passive close Established => { - if tcp.has_flag(TcpFlags::FIN) { + if flags.contains(IngotTcpFlags::FIN) { return Some(FinWait1); } - if tcp.has_flag(TcpFlags::SYN) { + if flags.contains(IngotTcpFlags::SYN) { return None; } @@ -412,11 +419,11 @@ impl TcpFlowState { FinWait1 => { // The guest is resending its FIN to the remote to // indicate its active close. - if tcp.has_flag(TcpFlags::FIN) { + if flags.contains(IngotTcpFlags::FIN) { return Some(FinWait1); } - if tcp.has_flag(TcpFlags::ACK) { + if flags.contains(IngotTcpFlags::ACK) { return Some(FinWait1); } @@ -428,7 +435,7 @@ impl TcpFlowState { // The guest has closed its side but the remote might // still be sending data, make sure to allow ACKs get // out. - if tcp.has_flag(TcpFlags::ACK) { + if flags.contains(IngotTcpFlags::ACK) { return Some(FinWait2); } @@ -443,7 +450,7 @@ impl TcpFlowState { // passive FIN. Eventually this connection will time // out on the guest and in that case an RST reply is // sent. Or this flow will expire. - if tcp.has_flag(TcpFlags::ACK) { + if flags.contains(IngotTcpFlags::ACK) { return Some(TimeWait); } @@ -454,7 +461,7 @@ impl TcpFlowState { CloseWait => { // The guest is performing its half of the passive // close now. - if tcp.has_flag(TcpFlags::FIN) { + if flags.contains(IngotTcpFlags::FIN) { return Some(LastAck); } @@ -469,7 +476,7 @@ impl TcpFlowState { LastAck => { // The guest is either reacknowledging the remote's // FIN or resending its own FIN to the remote. - if tcp.has_flag(TcpFlags::FIN) || tcp.has_flag(TcpFlags::ACK) { + if flags.intersects(IngotTcpFlags::FIN | IngotTcpFlags::ACK) { return Some(LastAck); } @@ -488,14 +495,16 @@ impl TcpFlowState { } } - pub fn process( + pub fn process( &mut self, port: &CStr, dir: Direction, flow_id: &InnerFlowId, - tcp: &TcpMeta, + tcp: &impl TcpRef, ) -> Result { let curr_state = self.tcp_state; + let flags = tcp.flags(); + let ack = tcp.acknowledgement(); // Run the segment through the corresponding side of the TCP // state machine. A successful transition should return @@ -504,19 +513,19 @@ impl TcpFlowState { // unexpected transition. let res = match dir { Direction::In => { - let res = self.flow_in(tcp); - self.remote_seq = Some(tcp.seq); - if tcp.has_flag(TcpFlags::ACK) { - self.remote_ack = Some(tcp.ack); + let res = self.flow_in(flags, ack); + self.remote_seq = Some(tcp.sequence()); + if flags.contains(IngotTcpFlags::ACK) { + self.remote_ack = Some(ack); } res } Direction::Out => { - let res = self.flow_out(tcp); - self.guest_seq = Some(tcp.seq); - if tcp.has_flag(TcpFlags::ACK) { - self.guest_ack = Some(tcp.ack); + let res = self.flow_out(flags); + self.guest_seq = Some(tcp.sequence()); + if flags.contains(IngotTcpFlags::ACK) { + self.guest_ack = Some(ack); } res } @@ -541,22 +550,22 @@ impl TcpFlowState { // close (active or simul) will leave a flow in TIME-WAIT, which // is the most common case. If the guest is not yet ready, we expect // it will send its own RST in response. - None if tcp.has_flag(TcpFlags::SYN) => { + None if flags.contains(IngotTcpFlags::SYN) => { return Err(TcpFlowStateError::NewFlow { direction: dir, flow_id: *flow_id, state: curr_state, - flags: tcp.flags, + flags: flags.bits(), }); } None => { - self.tcp_flow_drop_probe(port, flow_id, dir, tcp.flags); + self.tcp_flow_drop_probe(port, flow_id, dir, flags.bits()); return Err(TcpFlowStateError::UnexpectedSegment { direction: dir, flow_id: *flow_id, state: curr_state, - flags: tcp.flags, + flags: flags.bits(), }); } }; From 1a9212a1560686953d2fe1d02b82c1fdb11f7370 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 16 Oct 2024 16:42:00 +0100 Subject: [PATCH 050/115] Reworked TCP flow tracking is GO. --- lib/opte/src/engine/ingot_packet.rs | 16 ++ lib/opte/src/engine/mod.rs | 4 + lib/opte/src/engine/port.rs | 291 ++++++++++++++++++++-------- 3 files changed, 234 insertions(+), 77 deletions(-) diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index 62243dd0..43ae281b 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -335,6 +335,14 @@ impl LightweightMeta for ValidNoEncap { l3.compute_checksum(); } } + + #[inline] + fn inner_tcp(&self) -> Option<&impl TcpRef> { + match self.inner_ulp.as_ref() { + Some(ValidUlp::Tcp(t)) => Some(t), + _ => None, + } + } } impl From> for OpteMeta { @@ -491,6 +499,14 @@ impl LightweightMeta for ValidGeneveOverV6 { self.inner_ulp.compute_checksum(body_csum, &self.inner_l3); self.inner_l3.compute_checksum(); } + + #[inline] + fn inner_tcp(&self) -> Option<&impl TcpRef> { + match &self.inner_ulp { + ValidUlp::Tcp(t) => Some(t), + _ => None, + } + } } // --- REWRITE IN PROGRESS --- diff --git a/lib/opte/src/engine/mod.rs b/lib/opte/src/engine/mod.rs index 1cb13a83..5f5e56d6 100644 --- a/lib/opte/src/engine/mod.rs +++ b/lib/opte/src/engine/mod.rs @@ -46,6 +46,7 @@ use alloc::string::String; use checksum::Checksum; use core::fmt; use core::num::ParseIntError; +use ingot::tcp::TcpRef; use ingot::types::Parsed as IngotParsed; use ingot::types::Read; use ingot_packet::MsgBlk; @@ -349,6 +350,9 @@ pub trait LightweightMeta: Into> { /// Recalculate checksums within inner headers, derived from a pre-computed `body_csum`. fn update_inner_checksums(&mut self, body_csum: Checksum); + + /// Provide a view of internal TCP state. + fn inner_tcp(&self) -> Option<&impl TcpRef>; } /// A generic ULP parser, useful for testing inside of the opte crate diff --git a/lib/opte/src/engine/port.rs b/lib/opte/src/engine/port.rs index 573c989f..c5aae488 100644 --- a/lib/opte/src/engine/port.rs +++ b/lib/opte/src/engine/port.rs @@ -82,6 +82,7 @@ use alloc::string::String; use alloc::string::ToString; use alloc::sync::Arc; use alloc::vec::Vec; +use core::ffi::CStr; use core::fmt; use core::fmt::Display; use core::num::NonZeroU32; @@ -1236,8 +1237,14 @@ impl Port { // In case 1, we can also cache and reuse the same EmitSpec for // all hit packets. + // The lock needs to be optional here because there is one + // case wherein we need to reacquire the lock -- invalidation + // by TCP state. + let mut lock = Some(self.data.lock()); + let mut data = + lock.as_mut().expect("lock should be held on this codepath"); + // (1) Check for UFT and precompiled. - let mut data = self.data.lock(); let epoch = self.epoch(); check_state!(data.state, [PortState::Running]) .map_err(|_| ProcessError::BadState(data.state))?; @@ -1251,46 +1258,47 @@ impl Port { Direction::In => data.uft_in.get(&flow_before), }; - enum FastPathDecision { - CompiledUft { tx: Arc, l4_hash: u32 }, - Uft { tx: Arc, l4_hash: u32 }, - Slow, - } - // enum FastPathDecision { - // CompiledUft { tx: Arc>>, l4_hash: u32 }, - // Uft { tx: Arc>>, l4_hash: u32 }, + // CompiledUft { tx: Arc, l4_hash: u32 }, + // Uft { tx: Arc, l4_hash: u32 }, // Slow, // } + enum FastPathDecision { + CompiledUft(Arc>>), + Uft(Arc>>), + Slow, + } + let decision = match uft { // We have a valid UFT entry of some kind -- clone out the // saved transforms so that we can drop the lock ASAP. Some(entry) if entry.state().epoch == epoch => { - entry.hit(); - let now = entry.last_hit(); + // entry.hit(); + // let now = entry.last_hit(); // The Fast Path. let xforms = &entry.state().xforms; let out = if let Some(compiled) = xforms.compiled.as_ref() { - FastPathDecision::CompiledUft { - tx: Arc::clone(compiled), - // tx: Arc::clone(entry), - l4_hash: entry.state().l4_hash, - } + FastPathDecision::CompiledUft(Arc::clone(entry)) + // FastPathDecision::CompiledUft { + // tx: Arc::clone(compiled), + // // tx: Arc::clone(entry), + // l4_hash: entry.state().l4_hash, + // } } else { - FastPathDecision::Uft { - tx: Arc::clone(xforms), - // tx: Arc::clone(entry), - l4_hash: entry.state().l4_hash, - } + FastPathDecision::Uft(Arc::clone(entry)) + // FastPathDecision::Uft { + // tx: Arc::clone(xforms), + // // tx: Arc::clone(entry), + // l4_hash: entry.state().l4_hash, + // } }; match dir { Direction::In => data.stats.vals.in_uft_hit += 1, Direction::Out => data.stats.vals.out_uft_hit += 1, } - self.uft_hit_probe(dir, &flow_before, epoch, &now); out } @@ -1311,10 +1319,18 @@ impl Port { None => FastPathDecision::Slow, }; - // (1)/(2) UFT hit without invalidation -- We know the result for stats purposes. + // (1)/(2) UFT hit. Update stats, drop locks, validate TCP state. + // We *almost always* know the result is modified. + // This will produce an incorrect stat in the event that TCP invalidation + // forces a reprocess, but I believe this is a necessary evil to keep work + // out of the portlock today. The correct fix is to AtomicU64 those stats, + // which we'll need for later metrics too. + let mut invalidated_tcp = None; + let mut reprocess = false; + match &decision { - FastPathDecision::CompiledUft { .. } - | FastPathDecision::Uft { .. } => { + FastPathDecision::CompiledUft(entry) + | FastPathDecision::Uft(entry) => { // XXX: Ideally the Kstat should be holding AtomicU64s, then we get // out of the lock sooner. Note that we don't need to *apply* a given // set of transforms in order to know which stats we'll modify. @@ -1322,55 +1338,119 @@ impl Port { let dummy_res = Ok(InternalProcessResult::Modified); match dir { Direction::In => { - Self::update_stats_in(&mut data.stats.vals, &dummy_res) + Self::update_stats_in(&mut data.stats.vals, &dummy_res); } Direction::Out => { - Self::update_stats_out(&mut data.stats.vals, &dummy_res) + Self::update_stats_out( + &mut data.stats.vals, + &dummy_res, + ); + } + } + + drop(data); + drop(lock.take()); + + // + entry.hit_at(process_start); + self.uft_hit_probe(dir, &flow_before, epoch, &process_start); + + let tcp = entry.state().tcp_flow.as_ref(); + if let Some(tcp_flow) = tcp { + tcp_flow.hit_at(process_start); + + let tcp = pkt + .meta() + .inner_tcp() + .expect("failed to find TCP state on known TCP flow"); + + let ufid_in = match dir { + Direction::In => Some(&flow_before), + Direction::Out => None, + }; + + match tcp_flow.state().update( + self.name_cstr.as_c_str(), + tcp, + dir, + pkt.len() as u64, + ufid_in, + ) { + Ok(TcpState::Closed) => { + invalidated_tcp = Some(Arc::clone(tcp_flow)); + } + Err(TcpFlowStateError::NewFlow { .. }) => { + invalidated_tcp = Some(Arc::clone(tcp_flow)); + reprocess = true; + } + _ => {} } } } - _ => {} + _ => { + drop(data); + } } - // (1) Execute precompiled, and exit. - if let FastPathDecision::CompiledUft { tx, l4_hash } = decision { - drop(data); + // If we're in here, we took a faster-path. We know the lock is dropped. + // Reacquire the lock to remove the flow. + if let Some(entry) = invalidated_tcp { + let mut lock = self.data.lock(); - let len = pkt.len(); - let meta = pkt.meta_mut(); - let body_csum = if tx.checksums_dirty { - meta.compute_body_csum() - } else { - None - }; - meta.run_compiled_transform(&tx); - if let Some(csum) = body_csum { - meta.update_inner_checksums(csum); - } - let encap_len = meta.encap_len(); - let ulp_len = (len - (encap_len as usize)) as u32; - let rewind = match tx.encap { - CompiledEncap::Pop => encap_len, - _ => 0, - }; - let out = EmittestSpec { - spec: EmitterSpec::Fastpath(tx), - l4_hash, - rewind, - ulp_len, - }; + let flow_lock = entry.state().inner.lock(); + let ufid_out = &flow_lock.outbound_ufid; + + let ufid_in = flow_lock.inbound_ufid.as_ref(); + self.uft_tcp_closed(&mut lock, ufid_out, ufid_in); - let flow_after = meta.flow(); - let res = Ok(ProcessResult::Modified(out)); - self.port_process_return_probe( - dir, - &flow_before, - &flow_after, - epoch, - // &pkt, - &res, - ); - return res; + let _ = lock.tcp_flows.remove(ufid_out).unwrap(); + } + + if !reprocess { + // (1) Execute precompiled, and exit. + if let FastPathDecision::CompiledUft(entry) = decision { + let l4_hash = entry.state().l4_hash; + let tx = + entry.state().xforms.compiled.as_ref().cloned().unwrap(); + + let len = pkt.len(); + let meta = pkt.meta_mut(); + let body_csum = if tx.checksums_dirty { + meta.compute_body_csum() + } else { + None + }; + meta.run_compiled_transform(&tx); + if let Some(csum) = body_csum { + meta.update_inner_checksums(csum); + } + let encap_len = meta.encap_len(); + let ulp_len = (len - (encap_len as usize)) as u32; + let rewind = match tx.encap { + CompiledEncap::Pop => encap_len, + _ => 0, + }; + let out = EmittestSpec { + spec: EmitterSpec::Fastpath(tx), + l4_hash, + rewind, + ulp_len, + }; + + let flow_after = meta.flow(); + let res = Ok(ProcessResult::Modified(out)); + self.port_process_return_probe( + dir, + &flow_before, + &flow_after, + epoch, + // &pkt, + &res, + ); + return res; + } + } else { + lock = Some(self.data.lock()); } // (2)/(3) Full-fat metadata is required. @@ -1381,18 +1461,23 @@ impl Port { self.port_process_entry_probe(dir, &flow_before, epoch, &pkt); let res = match (&decision, dir) { - // (2) Drop lock, then apply retrieved transform. + // (2) Apply retrieved transform. Lock is dropped. // Store cached l4 hash. - (FastPathDecision::Uft { tx, l4_hash }, _) => { - drop(data); - pkt.set_l4_hash(*l4_hash); + (FastPathDecision::Uft(entry), _) if !reprocess => { + let l4_hash = entry.state().l4_hash; + let tx = Arc::clone(&entry.state().xforms); + + pkt.set_l4_hash(l4_hash); tx.apply(&mut pkt, dir)?; Ok(InternalProcessResult::Modified) } // (3) Full-table processing for the packet, then drop the lock. // Cksum updates are the only thing left undone. - (FastPathDecision::Slow, Direction::In) => { + (_, Direction::In) => { + let mut data = lock + .as_mut() + .expect("lock should be held on this codepath"); let res = self.process_in_miss( &mut data, epoch, @@ -1405,7 +1490,10 @@ impl Port { pkt.update_checksums(); res } - (FastPathDecision::Slow, Direction::Out) => { + (_, Direction::Out) => { + let mut data = lock + .as_mut() + .expect("lock should be held on this codepath"); let res = self .process_out_miss(&mut data, epoch, &mut pkt, &mut ameta); Self::update_stats_out(&mut data.stats.vals, &res); @@ -1413,7 +1501,6 @@ impl Port { pkt.update_checksums(); res } - _ => unreachable!(), }; let flow_after = *pkt.flow(); @@ -2035,11 +2122,14 @@ impl Port { let (ufid_out, tfes) = match *dir { TcpDirection::In { ufid_in, ufid_out } => ( ufid_out, - TcpFlowEntryState::new_inbound(*ufid_in, tfs, pkt_len), + TcpFlowEntryState::new_inbound( + *ufid_out, *ufid_in, tfs, pkt_len, + ), + ), + TcpDirection::Out { ufid_out } => ( + ufid_out, + TcpFlowEntryState::new_outbound(*ufid_out, tfs, pkt_len), ), - TcpDirection::Out { ufid_out } => { - (ufid_out, TcpFlowEntryState::new_outbound(tfs, pkt_len)) - } }; match tcp_flows.add_and_return(*ufid_out, tfes) { Ok(entry) => Ok(TcpMaybeClosed::NewState(tcp_state, entry)), @@ -2097,6 +2187,9 @@ impl Port { // Work out atomics shortly... entry.hit(); let tfes_base = entry.state(); + + // let next_state = tfes_base.update(); + let mut tfes = tfes_base.inner.lock(); match *dir { TcpDirection::In { .. } => { @@ -3084,6 +3177,9 @@ pub enum Pos { /// An entry in the TCP flow table. #[derive(Clone, Debug)] pub struct TcpFlowEntryStateInner { + // We store this for the benefit of inbound flows who have UFTs + // but which need to know their partner UFID to perform an invalidation. + outbound_ufid: InnerFlowId, // This must be the UFID of inbound traffic _as it arrives_ from // the network, not after it's processed. inbound_ufid: Option, @@ -3100,6 +3196,7 @@ pub struct TcpFlowEntryState { impl TcpFlowEntryState { fn new_inbound( + outbound_ufid: InnerFlowId, inbound_ufid: InnerFlowId, tcp_state: TcpFlowState, bytes_in: u64, @@ -3107,6 +3204,7 @@ impl TcpFlowEntryState { Self { inner: KMutex::new( TcpFlowEntryStateInner { + outbound_ufid, inbound_ufid: Some(inbound_ufid), tcp_state, segs_in: 1, @@ -3120,10 +3218,15 @@ impl TcpFlowEntryState { } } - fn new_outbound(tcp_state: TcpFlowState, bytes_out: u64) -> Self { + fn new_outbound( + outbound_ufid: InnerFlowId, + tcp_state: TcpFlowState, + bytes_out: u64, + ) -> Self { Self { inner: KMutex::new( TcpFlowEntryStateInner { + outbound_ufid, inbound_ufid: None, tcp_state, segs_in: 0, @@ -3141,6 +3244,40 @@ impl TcpFlowEntryState { let lock = self.inner.lock(); lock.tcp_state.tcp_state() } + + #[inline(always)] + fn update( + &self, + port_name: &CStr, + tcp: &impl TcpRef, + dir: Direction, + pkt_len: u64, + ufid_in: Option<&InnerFlowId>, + ) -> result::Result { + let mut tfes = self.inner.lock(); + match dir { + Direction::In { .. } => { + tfes.segs_in += 1; + tfes.bytes_in += pkt_len; + } + Direction::Out { .. } => { + tfes.segs_out += 1; + tfes.bytes_out += pkt_len; + } + } + + if let Some(ufid_in) = ufid_in { + // We need to store the UFID of the inbound packet + // before it was processed so that we can retire the + // correct UFT/LFT entries upon connection + // termination. + tfes.inbound_ufid = Some(*ufid_in); + } + let ufid_out = tfes.outbound_ufid; + let tcp_state = &mut tfes.tcp_state; + + tcp_state.process(port_name, dir, &ufid_out, tcp) + } } impl core::fmt::Debug for TcpFlowEntryState { From a3af0f5d922f576ea7f6ed380c0d181763d2d7e0 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Thu, 17 Oct 2024 00:33:14 +0100 Subject: [PATCH 051/115] Tests are green (!!!!!!!!!!!!!!!!!!) Tomorrow I get to take an axe to all the old code. --- lib/opte-test-utils/src/lib.rs | 6 ++-- lib/opte/src/engine/port.rs | 46 ++++++++++++++++++------ lib/oxide-vpc/tests/integration_tests.rs | 22 +++++++----- 3 files changed, 53 insertions(+), 21 deletions(-) diff --git a/lib/opte-test-utils/src/lib.rs b/lib/opte-test-utils/src/lib.rs index 854e7846..18794bef 100644 --- a/lib/opte-test-utils/src/lib.rs +++ b/lib/opte-test-utils/src/lib.rs @@ -899,6 +899,7 @@ pub fn http_server_ack_fin2( source: 80, destination: dst_port, sequence: 44161353 + 34, + // We are ACKing the FIN, which counts as 1 byte. acknowledgement: 2382112998 + 1, flags: IngotTcpFlags::ACK, ..Default::default() @@ -930,8 +931,8 @@ pub fn http_server_fin2( let tcp = Tcp { source: 80, destination: dst_port, - sequence: 2382112998 + 1, - acknowledgement: 44161353 + 34, + sequence: 44161353 + 34, + acknowledgement: 2382112998 + 1, flags: IngotTcpFlags::ACK | IngotTcpFlags::FIN, ..Default::default() }; @@ -962,6 +963,7 @@ pub fn http_guest_ack_fin2( source: 44490, destination: 80, sequence: 2382112998, + // We are ACKing the FIN, which counts as 1 byte. acknowledgement: 44161353 + 34 + 1, flags: IngotTcpFlags::ACK, ..Default::default() diff --git a/lib/opte/src/engine/port.rs b/lib/opte/src/engine/port.rs index c5aae488..6aa2894c 100644 --- a/lib/opte/src/engine/port.rs +++ b/lib/opte/src/engine/port.rs @@ -524,10 +524,10 @@ pub enum DumpLayerError { } /// An entry in the Unified Flow Table. -#[derive(Clone, Debug)] +// #[derive(Debug)] pub struct UftEntry { /// The flow ID for the other side. - pair: Option, + pair: KMutex>, /// The transformations to perform. xforms: Arc, @@ -572,6 +572,20 @@ impl Display for UftEntry { } } +impl fmt::Debug for UftEntry { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let UftEntry { pair, xforms, l4_hash, epoch, tcp_flow } = self; + + f.debug_struct("UftEntry") + .field("pair", &"") + .field("xforms", xforms) + .field("l4_hash", l4_hash) + .field("epoch", epoch) + .field("tcp_flow", tcp_flow) + .finish() + } +} + /// Cumulative counters for a single [`Port`]. #[derive(KStatProvider)] struct PortStats { @@ -1307,7 +1321,7 @@ impl Port { // entries and proceed to rule processing. Some(entry) => { let epoch = entry.state().epoch; - let owned_pair = entry.state().pair; + let owned_pair = (*entry.state().pair.lock()); let (ufid_in, ufid_out) = match dir { Direction::Out => (owned_pair.as_ref(), Some(&flow_before)), Direction::In => (Some(&flow_before), owned_pair.as_ref()), @@ -1325,6 +1339,7 @@ impl Port { // forces a reprocess, but I believe this is a necessary evil to keep work // out of the portlock today. The correct fix is to AtomicU64 those stats, // which we'll need for later metrics too. + // However, accounting for this below is simple enough. let mut invalidated_tcp = None; let mut reprocess = false; @@ -1485,7 +1500,12 @@ impl Port { &flow_before, &mut ameta, ); - Self::update_stats_in(&mut data.stats.vals, &res); + // Prevent double-counting reprocessed modify entries. + if !(reprocess + && matches!(res, Ok(InternalProcessResult::Modified))) + { + Self::update_stats_in(&mut data.stats.vals, &res); + } drop(data); pkt.update_checksums(); res @@ -1496,7 +1516,12 @@ impl Port { .expect("lock should be held on this codepath"); let res = self .process_out_miss(&mut data, epoch, &mut pkt, &mut ameta); - Self::update_stats_out(&mut data.stats.vals, &res); + // Prevent double-counting reprocessed modify entries. + if !(reprocess + && matches!(res, Ok(InternalProcessResult::Modified))) + { + Self::update_stats_out(&mut data.stats.vals, &res); + } drop(data); pkt.update_checksums(); res @@ -2352,7 +2377,7 @@ impl Port { let ufid_out = pkt.flow().mirror(); let mut hte = UftEntry { - pair: Some(ufid_out), + pair: KMutex::new(Some(ufid_out), KMutexType::Spin), xforms: xforms.compile(pkt.checksums_dirty()), epoch, l4_hash: ufid_in.crc32(), @@ -2368,8 +2393,7 @@ impl Port { // Remember, the inbound UFID is the flow as seen by // the network, before any processing is done by OPTE. - // TODO(kyle) - // out_entry.state().pair = Some(*ufid_in); + *out_entry.state().pair.lock() = Some(*ufid_in); } // Ideally we would simulate the outbound flow if no @@ -2612,7 +2636,7 @@ impl Port { Some(entry) => { let epoch = entry.state().epoch; let ufid_in = Some(ufid_in); - let ufid_out = entry.state().pair; + let ufid_out = (*entry.state().pair.lock()); self.uft_invalidate(data, ufid_out.as_ref(), ufid_in, epoch); } @@ -2747,7 +2771,7 @@ impl Port { let res = self.layers_process(data, Out, pkt, &mut xforms, ameta); // XXXX: may be hashing the wrong thing. let hte = UftEntry { - pair: None, + pair: KMutex::new(None, KMutexType::Spin), xforms: xforms.compile(pkt.checksums_dirty()), epoch, l4_hash: flow_before.crc32(), @@ -2915,7 +2939,7 @@ impl Port { Some(entry) => { let epoch = entry.state().epoch; let ufid_out = Some(pkt.flow()); - let ufid_in = entry.state().pair; + let ufid_in = (*entry.state().pair.lock()); self.uft_invalidate(data, ufid_out, ufid_in.as_ref(), epoch); } diff --git a/lib/oxide-vpc/tests/integration_tests.rs b/lib/oxide-vpc/tests/integration_tests.rs index 2e386115..3907940c 100644 --- a/lib/oxide-vpc/tests/integration_tests.rs +++ b/lib/oxide-vpc/tests/integration_tests.rs @@ -3598,12 +3598,15 @@ fn early_tcp_invalidation() { let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); let res = g1.port.process(Out, pkt1); expect_modified!(res, pkt1_m); - incr!( + update!( g1, [ - "stats.port.out_modified, stats.port.out_uft_miss", + "incr:stats.port.out_modified, stats.port.out_uft_miss", // We're hitting the old entry, before it is discarded. - "stats.port.out_uft_hit", + "incr:stats.port.out_uft_hit", + // Both UFTs are wiped out for reprocessing, but OUT is + // re-added. + "decr:uft.in" ] ); assert_eq!(TcpState::SynSent, g1.port.tcp_state(&flow).unwrap()); @@ -3636,7 +3639,7 @@ fn early_tcp_invalidation() { let pkt2 = parse_inbound(&mut pkt2_m, VpcParser {}).unwrap(); let res = g1.port.process(In, pkt2); expect_modified!(res, pkt2_m); - incr!(g1, ["stats.port.in_modified, stats.port.in_uft_hit"]); + incr!(g1, ["stats.port.in_modified, stats.port.in_uft_miss, uft.in"]); assert_eq!(TcpState::Established, g1.port.tcp_state(&flow).unwrap()); let mut pkt1_m = http_syn3( @@ -3654,8 +3657,11 @@ fn early_tcp_invalidation() { update!( g1, [ + // Hit the old flow... "incr:stats.port.in_modified, stats.port.in_uft_hit", - "set:uft.in=0, uft.out=0", + // Then reprocesssed. + "incr:stats.port.in_uft_miss", + "set:uft.in=1, uft.out=0", ] ); assert_eq!(TcpState::Listen, g1.port.tcp_state(&flow).unwrap()); @@ -3705,12 +3711,12 @@ fn early_tcp_invalidation() { let flow = pkt1.flow(); let res = g1.port.process(Out, pkt1); expect_modified!(res, pkt1_m); - incr!( + update!( g1, [ - "stats.port.out_modified, stats.port.out_uft_miss", + "incr:stats.port.out_modified, stats.port.out_uft_miss", // We're hitting the old entry, before it is discarded. - "stats.port.out_uft_hit", + "incr:stats.port.out_uft_hit", ] ); assert_eq!(TcpState::SynSent, g1.port.tcp_state(&flow).unwrap()); From 10c3bb4ab898ed15526b5070e77a17f3ef4f348f Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Thu, 17 Oct 2024 00:43:21 +0100 Subject: [PATCH 052/115] ?? --- lib/opte/src/engine/port.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/opte/src/engine/port.rs b/lib/opte/src/engine/port.rs index 6aa2894c..8d79bbda 100644 --- a/lib/opte/src/engine/port.rs +++ b/lib/opte/src/engine/port.rs @@ -1321,7 +1321,7 @@ impl Port { // entries and proceed to rule processing. Some(entry) => { let epoch = entry.state().epoch; - let owned_pair = (*entry.state().pair.lock()); + let owned_pair = *entry.state().pair.lock(); let (ufid_in, ufid_out) = match dir { Direction::Out => (owned_pair.as_ref(), Some(&flow_before)), Direction::In => (Some(&flow_before), owned_pair.as_ref()), @@ -2636,7 +2636,7 @@ impl Port { Some(entry) => { let epoch = entry.state().epoch; let ufid_in = Some(ufid_in); - let ufid_out = (*entry.state().pair.lock()); + let ufid_out = *entry.state().pair.lock(); self.uft_invalidate(data, ufid_out.as_ref(), ufid_in, epoch); } @@ -2939,7 +2939,7 @@ impl Port { Some(entry) => { let epoch = entry.state().epoch; let ufid_out = Some(pkt.flow()); - let ufid_in = (*entry.state().pair.lock()); + let ufid_in = *entry.state().pair.lock(); self.uft_invalidate(data, ufid_out, ufid_in.as_ref(), epoch); } From 103d2b93ba866d3ac1a39f1e537cd762187411b8 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Thu, 17 Oct 2024 15:56:10 +0100 Subject: [PATCH 053/115] Auto-fix warnings. --- crates/opte-api/src/encap.rs | 9 - lib/opte-test-utils/src/dhcp.rs | 1 - lib/opte-test-utils/src/icmp.rs | 10 +- lib/opte/src/ddi/time.rs | 1 - lib/opte/src/engine/flow_table.rs | 1 - lib/opte/src/engine/icmp/mod.rs | 4 - lib/opte/src/engine/icmp/v4.rs | 2 - lib/opte/src/engine/icmp/v6.rs | 9 +- lib/opte/src/engine/ingot_base.rs | 3 - lib/opte/src/engine/ingot_packet.rs | 12 +- lib/opte/src/engine/mod.rs | 5 - lib/opte/src/engine/nat.rs | 3 - lib/opte/src/engine/packet.rs | 1 - lib/opte/src/engine/port.rs | 285 +---------------------- lib/opte/src/engine/predicate.rs | 5 - lib/opte/src/engine/rule.rs | 13 -- lib/opte/src/engine/snat.rs | 3 - lib/opte/src/engine/tcp_state.rs | 2 - lib/oxide-vpc/src/engine/gateway/mod.rs | 2 - lib/oxide-vpc/src/engine/mod.rs | 15 -- lib/oxide-vpc/src/engine/overlay.rs | 3 - lib/oxide-vpc/tests/fuzz_regression.rs | 2 - lib/oxide-vpc/tests/integration_tests.rs | 20 -- 23 files changed, 8 insertions(+), 403 deletions(-) diff --git a/crates/opte-api/src/encap.rs b/crates/opte-api/src/encap.rs index e036633f..dbe25ef3 100644 --- a/crates/opte-api/src/encap.rs +++ b/crates/opte-api/src/encap.rs @@ -4,15 +4,6 @@ // Copyright 2024 Oxide Computer Company -use alloc::string::String; -use alloc::string::ToString; -use core::fmt; -use core::fmt::Debug; -use core::fmt::Display; -use core::str::FromStr; -use serde::Deserialize; -use serde::Serialize; - pub use ingot::geneve::Vni; #[cfg(test)] diff --git a/lib/opte-test-utils/src/dhcp.rs b/lib/opte-test-utils/src/dhcp.rs index 520f70c5..11321e36 100644 --- a/lib/opte-test-utils/src/dhcp.rs +++ b/lib/opte-test-utils/src/dhcp.rs @@ -17,7 +17,6 @@ use opte::engine::ingot_base::Ipv6; use opte::engine::ingot_packet::MsgBlk; use opte::ingot::ethernet::Ethertype; use opte::ingot::ip::IpProtocol; -use opte::ingot::types::Header; use opte::ingot::udp::Udp; pub use smoltcp::wire::DhcpMessageType; pub use smoltcp::wire::DhcpPacket; diff --git a/lib/opte-test-utils/src/icmp.rs b/lib/opte-test-utils/src/icmp.rs index 6ffaea5f..347c03ae 100644 --- a/lib/opte-test-utils/src/icmp.rs +++ b/lib/opte-test-utils/src/icmp.rs @@ -7,27 +7,21 @@ //! Routines for ICMP testing. use opte::api::*; -use opte::engine::ether::*; use opte::engine::ingot_base::Ethernet; use opte::engine::ingot_base::Ipv4; use opte::engine::ingot_base::Ipv6; use opte::engine::ingot_base::L3; use opte::engine::ingot_packet::MsgBlk; -use opte::engine::ip4::*; -use opte::engine::ip6::*; use opte::engine::packet::*; -use opte::engine::Direction::*; use opte::ingot::ethernet::Ethertype; use opte::ingot::ip::IpProtocol as IngotIpProto; use opte::ingot::types::HeaderLen; -use oxide_vpc::engine::VpcParser; use smoltcp::phy::ChecksumCapabilities as CsumCapab; use smoltcp::wire::Icmpv4Packet; use smoltcp::wire::Icmpv4Repr; use smoltcp::wire::Icmpv6Packet; use smoltcp::wire::Icmpv6Repr; use smoltcp::wire::IpAddress; -use smoltcp::wire::IpProtocol; use smoltcp::wire::Ipv6Address; use smoltcp::wire::NdiscNeighborFlags; pub use smoltcp::wire::NdiscRepr; @@ -194,7 +188,7 @@ pub fn gen_icmp_echo( while segments.len() > 1 { let chain = segments.pop().unwrap(); - let mut new_el = segments.last_mut().unwrap(); + let new_el = segments.last_mut().unwrap(); new_el.extend_if_one(chain); } @@ -320,7 +314,7 @@ pub fn gen_icmpv6_echo( while segments.len() > 1 { let chain = segments.pop().unwrap(); - let mut new_el = segments.last_mut().unwrap(); + let new_el = segments.last_mut().unwrap(); new_el.extend_if_one(chain); } diff --git a/lib/opte/src/ddi/time.rs b/lib/opte/src/ddi/time.rs index e5d11ddc..09734850 100644 --- a/lib/opte/src/ddi/time.rs +++ b/lib/opte/src/ddi/time.rs @@ -6,7 +6,6 @@ //! Moments, periodics, etc. use core::ops::Add; -use core::sync::atomic::AtomicU64; use core::time::Duration; cfg_if! { diff --git a/lib/opte/src/engine/flow_table.rs b/lib/opte/src/engine/flow_table.rs index 758bd32c..1e761ab8 100644 --- a/lib/opte/src/engine/flow_table.rs +++ b/lib/opte/src/engine/flow_table.rs @@ -12,7 +12,6 @@ use super::packet::InnerFlowId; use crate::ddi::time::Moment; use crate::ddi::time::MILLIS; -use crate::ddi::time::NANOS_TO_MILLIS; use alloc::boxed::Box; use alloc::collections::BTreeMap; use alloc::ffi::CString; diff --git a/lib/opte/src/engine/icmp/mod.rs b/lib/opte/src/engine/icmp/mod.rs index 45e41e63..e5b29ce9 100644 --- a/lib/opte/src/engine/icmp/mod.rs +++ b/lib/opte/src/engine/icmp/mod.rs @@ -16,12 +16,8 @@ use super::headers::RawHeader; use super::packet::PacketReadMut; use super::packet::ReadErr; use crate::d_error::DError; -use crate::engine::ether::EtherHdr; -use crate::engine::ether::EtherMeta; -use crate::engine::ether::EtherType; use crate::engine::headers::HeaderActionModify; use crate::engine::headers::UlpMetaModify; -use crate::engine::packet::Packet; use crate::engine::predicate::DataPredicate; use crate::engine::predicate::EtherAddrMatch; use crate::engine::predicate::IpProtoMatch; diff --git a/lib/opte/src/engine/icmp/v4.rs b/lib/opte/src/engine/icmp/v4.rs index b38fb1f8..a150993c 100644 --- a/lib/opte/src/engine/icmp/v4.rs +++ b/lib/opte/src/engine/icmp/v4.rs @@ -12,8 +12,6 @@ use crate::engine::ingot_base::Ipv4; use crate::engine::ingot_base::L3; use crate::engine::ingot_packet::MsgBlk; use crate::engine::ingot_packet::PacketHeaders2; -use crate::engine::ip4::Ipv4Hdr; -use crate::engine::ip4::Ipv4Meta; use crate::engine::predicate::Ipv4AddrMatch; use ingot::ethernet::Ethertype; use ingot::ip::IpProtocol; diff --git a/lib/opte/src/engine/icmp/v6.rs b/lib/opte/src/engine/icmp/v6.rs index 2bffa62c..90077879 100644 --- a/lib/opte/src/engine/icmp/v6.rs +++ b/lib/opte/src/engine/icmp/v6.rs @@ -12,8 +12,6 @@ use crate::engine::ingot_base::Ipv6; use crate::engine::ingot_base::Ipv6Ref; use crate::engine::ingot_packet::MsgBlk; use crate::engine::ingot_packet::PacketHeaders2; -use crate::engine::ip6::Ipv6Hdr; -use crate::engine::ip6::Ipv6Meta; use crate::engine::predicate::Ipv6AddrMatch; use alloc::string::String; use ingot::ethernet::Ethertype; @@ -30,7 +28,6 @@ use smoltcp::wire::Icmpv6Message; use smoltcp::wire::Icmpv6Packet; use smoltcp::wire::Icmpv6Repr; use smoltcp::wire::IpAddress; -use smoltcp::wire::IpProtocol; use smoltcp::wire::Ipv6Address; use smoltcp::wire::NdiscNeighborFlags; use smoltcp::wire::NdiscRepr; @@ -192,7 +189,7 @@ impl HairpinAction for Icmpv6EchoReply { csum.icmpv6 = Checksum::Tx; reply.emit(&dst_ip, &src_ip, &mut icmp_reply, &csum); - let mut ip6 = Ipv6 { + let ip6 = Ipv6 { source: self.dst_ip, destination: self.src_ip, next_header: IngotIpProto::ICMP_V6, @@ -366,7 +363,7 @@ impl HairpinAction for RouterAdvertisement { &csum, ); - let mut ip6 = Ipv6 { + let ip6 = Ipv6 { source: *self.ip(), destination: meta.inner_ip6().unwrap().source(), next_header: IngotIpProto::ICMP_V6, @@ -631,7 +628,7 @@ impl HairpinAction for NeighborAdvertisement { // is addressed to depends on whether we should multicast the packet. let dst_mac = dst_ip.multicast_mac().unwrap_or(self.src_mac); - let mut ip6 = Ipv6 { + let ip6 = Ipv6 { source: *self.ip(), destination: dst_ip, next_header: IngotIpProto::ICMP_V6, diff --git a/lib/opte/src/engine/ingot_base.rs b/lib/opte/src/engine/ingot_base.rs index f152e246..862684c2 100644 --- a/lib/opte/src/engine/ingot_base.rs +++ b/lib/opte/src/engine/ingot_base.rs @@ -1,5 +1,4 @@ use super::checksum::Checksum; -use bitflags::bitflags; use ingot::choice; use ingot::ethernet::Ethertype; use ingot::icmp::IcmpV4; @@ -24,9 +23,7 @@ use ingot::types::ByteSlice; use ingot::types::Emit; use ingot::types::Header; use ingot::types::HeaderLen; -use ingot::types::NetworkRepr; use ingot::types::NextLayer; -use ingot::types::ParseError; use ingot::types::Vec; use ingot::udp::Udp; use ingot::udp::UdpMut; diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index 43ae281b..3ac0d513 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -3,7 +3,6 @@ use super::checksum::Checksum; use super::checksum::HeaderChecksum; use super::ether::EtherMeta; use super::ether::EtherMod; -use super::geneve::GeneveMeta; use super::geneve::GENEVE_PORT; use super::headers::EncapMeta; use super::headers::EncapMod; @@ -16,7 +15,6 @@ use super::headers::IpMod; use super::headers::IpPush; use super::headers::PushAction; use super::headers::UlpMetaModify; -use super::headers::UlpMod; use super::icmp::QueryEcho; use super::ingot_base::Ethernet; use super::ingot_base::EthernetMut; @@ -34,7 +32,6 @@ use super::ingot_base::L3Repr; use super::ingot_base::Ulp; use super::ingot_base::UlpRepr; use super::ingot_base::ValidEthernet; -use super::ingot_base::ValidIpv6; use super::ingot_base::ValidL3; use super::ingot_base::ValidL4; use super::ingot_base::ValidUlp; @@ -63,12 +60,10 @@ use alloc::boxed::Box; use alloc::sync::Arc; use alloc::vec::Vec; use core::cell::Cell; -use core::cell::RefCell; use core::hash::Hash; use core::marker::PhantomData; use core::mem::ManuallyDrop; use core::mem::MaybeUninit; -use core::num::NonZeroU32; use core::ops::Deref; use core::ops::DerefMut; use core::ptr; @@ -93,12 +88,10 @@ use ingot::icmp::IcmpV6Packet; use ingot::icmp::IcmpV6Ref; use ingot::ip::IpProtocol; use ingot::ip::Ipv4Flags; -use ingot::ip::LowRentV6EhRepr; use ingot::tcp::TcpFlags; use ingot::tcp::TcpMut; use ingot::tcp::TcpPacket; use ingot::tcp::TcpRef; -use ingot::types::primitives::*; use ingot::types::util::Repeated; use ingot::types::BoxedHeader; use ingot::types::Emit; @@ -109,7 +102,6 @@ use ingot::types::InlineHeader; use ingot::types::NextLayer; use ingot::types::ParseControl; use ingot::types::ParseError as IngotParseErr; -use ingot::types::ParseResult; use ingot::types::Parsed as IngotParsed; use ingot::types::Read; use ingot::udp::Udp; @@ -119,7 +111,6 @@ use ingot::udp::UdpRef; use ingot::udp::ValidUdp; use ingot::Parse; use opte_api::Direction; -use opte_api::Ipv4Addr; use opte_api::Ipv6Addr; use opte_api::Vni; use zerocopy::ByteSlice; @@ -1031,8 +1022,7 @@ pub type Test2 = ValidNoEncap<&'static [u8]>; pub type Test3 = ValidGeneveOverV6<&'static [u8]>; pub type OpteParsed = IngotParsed::Chunk>, T>; -pub type OpteParsed2::Chunk>> = - IngotParsed; +pub type OpteParsed2 = IngotParsed; impl OpteMeta { #[inline] diff --git a/lib/opte/src/engine/mod.rs b/lib/opte/src/engine/mod.rs index 5f5e56d6..eea1d7ec 100644 --- a/lib/opte/src/engine/mod.rs +++ b/lib/opte/src/engine/mod.rs @@ -47,7 +47,6 @@ use checksum::Checksum; use core::fmt; use core::num::ParseIntError; use ingot::tcp::TcpRef; -use ingot::types::Parsed as IngotParsed; use ingot::types::Read; use ingot_packet::MsgBlk; use ingot_packet::NoEncap; @@ -55,9 +54,7 @@ use ingot_packet::OpteMeta; use ingot_packet::OpteParsed; use ingot_packet::OpteParsed2; use ingot_packet::Packet2; -use ingot_packet::PacketHeaders; use ingot_packet::Parsed2; -use ingot_packet::ParsedMblk; use ingot_packet::ValidNoEncap; use ip4::IpError; pub use opte_api::Direction; @@ -197,9 +194,7 @@ pub use dbg_macro as dbg; pub use err_macro as err; use crate::engine::flow_table::FlowTable; -use crate::engine::packet::Initialized; use crate::engine::packet::InnerFlowId; -use crate::engine::packet::Packet; use crate::engine::packet::ParseError; use crate::engine::port::UftEntry; diff --git a/lib/opte/src/engine/nat.rs b/lib/opte/src/engine/nat.rs index b32eb29f..0b226f20 100644 --- a/lib/opte/src/engine/nat.rs +++ b/lib/opte/src/engine/nat.rs @@ -11,8 +11,6 @@ use super::headers::IpMod; use super::ingot_packet::Packet2; use super::ingot_packet::ParsedMblk; use super::packet::InnerFlowId; -use super::packet::Packet; -use super::packet::Parsed; use super::port::meta::ActionMeta; use super::predicate::DataPredicate; use super::predicate::Predicate; @@ -27,7 +25,6 @@ use alloc::sync::Arc; use alloc::vec::Vec; use core::fmt; use core::hash::Hash; -use core::marker::PhantomData; use crc32fast::Hasher; use itertools::Itertools; use opte_api::Direction; diff --git a/lib/opte/src/engine/packet.rs b/lib/opte/src/engine/packet.rs index 1d656d8f..9d98ea8a 100644 --- a/lib/opte/src/engine/packet.rs +++ b/lib/opte/src/engine/packet.rs @@ -61,7 +61,6 @@ use dyn_clone::DynClone; use serde::Deserialize; use serde::Serialize; // TODO should probably move these two into this module now. -use super::rule::HdrTransform; use super::tcp::TcpHdr; use super::tcp::TcpHdrError; use super::tcp::TcpMeta; diff --git a/lib/opte/src/engine/port.rs b/lib/opte/src/engine/port.rs index 8d79bbda..72ace71d 100644 --- a/lib/opte/src/engine/port.rs +++ b/lib/opte/src/engine/port.rs @@ -40,10 +40,8 @@ use super::layer::LayerStatsSnap; use super::layer::RuleId; use super::packet::BodyTransform; use super::packet::BodyTransformError; -use super::packet::Initialized; use super::packet::InnerFlowId; use super::packet::Packet; -use super::packet::PacketMeta; use super::packet::Parsed; use super::packet::FLOW_ID_DEFAULT; use super::rule::Action; @@ -74,7 +72,6 @@ use crate::engine::flow_table::ExpiryPolicy; use crate::engine::ingot_packet::EmitterSpec; use crate::engine::ingot_packet::EmittestSpec; use crate::engine::rule::CompiledEncap; -use crate::engine::tcp::TcpMeta; use crate::ExecCtx; use alloc::boxed::Box; use alloc::ffi::CString; @@ -1266,8 +1263,7 @@ impl Port { // TODO: fixup types here. // self.port_process_entry_probe(dir, &flow_before, epoch, &pkt); - let mut uft: Option<&Arc>>> = match dir - { + let uft: Option<&Arc>>> = match dir { Direction::Out => data.uft_out.get(&flow_before), Direction::In => data.uft_in.get(&flow_before), }; @@ -1834,7 +1830,7 @@ impl Transforms { if still_permissable { let encap = match (outer_ether, outer_ip, outer_encap) { (Some(eth), Some(ip), Some(encap)) => { - let mut encap_repr = match encap { + let encap_repr = match encap { EncapPush::Geneve(g) => ( Udp { source: g.entropy, @@ -2507,146 +2503,6 @@ impl Port { } } - // TODO: remove. - fn process_in( - &self, - data: &mut PortData, - epoch: u64, - pkt: &mut Packet2, - ufid_in: &InnerFlowId, - ameta: &mut ActionMeta, - ) -> result::Result { - use Direction::In; - - // Use the compiled UFT entry if one exists. Otherwise - // fallback to layer processing. - match data.uft_in.get(ufid_in) { - Some(entry) if entry.state().epoch == epoch => { - // TODO At the moment I'm holding the UFT locks not - // just for lookup, but for the entire duration of - // processing. It might be better to ht.clone() or - // Arc; that way we only hold the lock - // for lookup. - entry.hit(); - data.stats.vals.in_uft_hit += 1; - self.uft_hit_probe(In, pkt.flow(), epoch, &entry.last_hit()); - - let transform = Some(Arc::clone(&entry.state().xforms)); - pkt.set_l4_hash(entry.state().l4_hash); - - // for ht in &entry.state().xforms.hdr { - // pkt.hdr_transform(ht)?; - // } - - // for bt in &entry.state().xforms.body { - // pkt.body_transform(In, &**bt)?; - // } - - // For inbound traffic the TCP flow table must be - // checked _after_ processing take place. - // TODO: uncork - // if pkt.meta().is_inner_tcp() { - // match self.process_in_tcp( - // data, - // pkt.meta(), - // ufid_in, - // pkt.len() as u64, - // ) { - // Ok(_) => return Ok(ProcessResult::Modified), - // Err(ProcessError::TcpFlow( - // e @ TcpFlowStateError::NewFlow { .. }, - // )) => { - // self.tcp_err( - // &data.tcp_flows, - // In, - // e.to_string(), - // pkt, - // ); - // // We cant redo processing here like we can in `process_out`: - // // we already modified the packet to check TCP state. - // // However, we *have* deleted and replaced the TCP FSM and - // // removed the UFT. The next packet on this flow (SYN-ACK) will - // // create the UFT, reference the existing TCP flow, and increment - // // all other layers' stats. - // return Ok(ProcessResult::Modified); - // } - // Err(ProcessError::MissingFlow(flow_id)) => { - // let e = format!("Missing TCP flow ID: {flow_id}"); - // self.tcp_err( - // &data.tcp_flows, - // Direction::In, - // e, - // pkt, - // ); - // // If we have a UFT but no TCP flow ID, there is likely a bug - // // and we are now out of sync. As above we can't reprocess, - // // but we have regenerated the TCP entry to be less disruptive - // // than a drop. Remove the UFT entry on the same proviso since the - // // next packet to use it will regenerate it. - // self.uft_invalidate( - // data, - // None, - // Some(ufid_in), - // epoch, - // ); - // return Ok(ProcessResult::Modified); - // } - // Err(ProcessError::TcpFlow( - // e @ TcpFlowStateError::UnexpectedSegment { .. }, - // )) => { - // // Technically unreachable, as we filter these out in `update_tcp_entry`. - // // Panicking here would probably be overly fragile, however. - // self.tcp_err( - // &data.tcp_flows, - // Direction::In, - // e.to_string(), - // pkt, - // ); - // return Ok(ProcessResult::Drop { - // reason: DropReason::TcpErr, - // }); - // } - // Err(ProcessError::FlowTableFull { kind, limit }) => { - // let e = format!( - // "{kind} flow table full ({limit} entries)" - // ); - // self.tcp_err( - // &data.tcp_flows, - // Direction::In, - // e, - // pkt, - // ); - // return Ok(ProcessResult::Drop { - // reason: DropReason::TcpErr, - // }); - // } - // _ => unreachable!( - // "Cannot return other errors from process_in_tcp" - // ), - // } - // } else { - // return Ok(ProcessResult::Modified); - // } - - return Ok(InternalProcessResult::Modified); - } - - // The entry is from a previous epoch; invalidate its UFT - // entries and proceed to rule processing. - Some(entry) => { - let epoch = entry.state().epoch; - let ufid_in = Some(ufid_in); - let ufid_out = *entry.state().pair.lock(); - self.uft_invalidate(data, ufid_out.as_ref(), ufid_in, epoch); - } - - // There is no entry; proceed to rule processing; - None => (), - }; - - self.process_in_miss(data, epoch, pkt, ufid_in, ameta) - } - // Process the TCP packet for the purposes of connection tracking // when an outbound UFT entry exists. fn process_out_tcp_existing( @@ -2813,143 +2669,6 @@ impl Port { } } - // TODO: remove. - fn process_out( - &self, - data: &mut PortData, - epoch: u64, - pkt: &mut Packet2, - ameta: &mut ActionMeta, - ) -> result::Result { - use Direction::Out; - - let uft_out = &mut data.uft_out; - - // Use the compiled UFT entry if one exists. Otherwise - // fallback to layer processing. - match uft_out.get(&pkt.flow()) { - Some(entry) if entry.state().epoch == epoch => { - entry.hit(); - data.stats.vals.out_uft_hit += 1; - self.uft_hit_probe(Out, pkt.flow(), epoch, &entry.last_hit()); - - let mut invalidated = false; - let mut reprocess = false; - let mut ufid_in = None; - - // TODO: find the best way to unbreak. - - // For outbound traffic the TCP flow table must be - // checked _before_ processing take place. - // if pkt.meta().is_inner_tcp() { - // match self.process_out_tcp_existing( - // &mut data.tcp_flows, - // pkt.flow(), - // pkt.meta(), - // pkt.len() as u64, - // ) { - // // Continue with processing. - // Ok(TcpMaybeClosed::NewState(_)) => (), - - // Ok(TcpMaybeClosed::Closed { ufid_inbound }) => { - // invalidated = true; - // ufid_in = ufid_inbound; - // } - - // Err(ProcessError::TcpFlow( - // e @ TcpFlowStateError::NewFlow { .. }, - // )) => { - // invalidated = true; - // reprocess = true; - // // TODO(kyle) - // // self.tcp_err( - // // &data.tcp_flows, - // // Out, - // // e.to_string(), - // // pkt, - // // ); - // } - - // Err(ProcessError::MissingFlow(flow_id)) => { - // // If we have a UFT but no TCP flow ID, there is likely a bug - // // and we are now out of sync. A full reprocess will be - // // slower for this packet but will sync up the tables again. - // invalidated = true; - // reprocess = true; - // let e = format!("Missing TCP flow ID: {flow_id}"); - // // TODO(kyle) - // // self.tcp_err( - // // &data.tcp_flows, - // // Direction::In, - // // e, - // // pkt, - // // ); - // } - - // Err(ProcessError::TcpFlow( - // e @ TcpFlowStateError::UnexpectedSegment { .. }, - // )) => { - // // Technically unreachable, as we filter these out in `update_tcp_entry`. - // // Panicking here would probably be overly fragile, however. - // // TODO(kyle) - // // self.tcp_err( - // // &data.tcp_flows, - // // Direction::In, - // // e.to_string(), - // // pkt, - // // ); - // return Ok(ProcessResult::Drop { - // reason: DropReason::TcpErr, - // }); - // } - - // _ => unreachable!( - // "Cannot return other errors from process_in_tcp_new" - // ), - // } - // } - - let flow_to_invalidate = invalidated.then(|| *pkt.flow()); - - // If we suspect this is a new flow, we need to not perform - // existing transforms if we're going to behave as though we - // have a UFT miss. - if !reprocess { - let transform = Some(Arc::clone(&entry.state().xforms)); - pkt.set_l4_hash(entry.state().l4_hash); - // Due to borrowing constraints from order of operations, we have - // to remove the UFT entry here rather than in `update_tcp_entry`. - // The TCP entry itself is already removed. - if let Some(flow_before) = flow_to_invalidate { - self.uft_tcp_closed( - data, - &flow_before, - ufid_in.as_ref(), - ); - } - - return Ok(InternalProcessResult::Modified); - } else if let Some(flow_before) = flow_to_invalidate { - self.uft_tcp_closed(data, &flow_before, ufid_in.as_ref()); - } - } - - // The entry is from a previous epoch; invalidate its UFT - // entries and proceed to rule processing. - Some(entry) => { - let epoch = entry.state().epoch; - let ufid_out = Some(pkt.flow()); - let ufid_in = *entry.state().pair.lock(); - self.uft_invalidate(data, ufid_out, ufid_in.as_ref(), epoch); - } - - // There is no entry; proceed to layer processing. - None => (), - } - - self.process_out_miss(data, epoch, pkt, ameta) - } - fn uft_invalidate( &self, data: &mut PortData, diff --git a/lib/opte/src/engine/predicate.rs b/lib/opte/src/engine/predicate.rs index e640c721..c9548cc1 100644 --- a/lib/opte/src/engine/predicate.rs +++ b/lib/opte/src/engine/predicate.rs @@ -9,7 +9,6 @@ use super::dhcp::MessageType as DhcpMessageType; use super::dhcpv6::MessageType as Dhcpv6MessageType; use super::ether::EtherType; -use super::headers::IpMeta; use super::icmp::v4::MessageType as IcmpMessageType; use super::icmp::v6::MessageType as Icmpv6MessageType; use super::ingot_base::EthernetRef; @@ -18,16 +17,12 @@ use super::ingot_base::Ipv6Ref; use super::ingot_base::L3; use super::ingot_packet::ulp_dst_port; use super::ingot_packet::ulp_src_port; -use super::ingot_packet::PacketHeaders; use super::ingot_packet::PacketHeaders2; use super::ip4::Ipv4Addr; use super::ip4::Ipv4Cidr; -use super::ip4::Ipv4Meta; use super::ip4::Protocol; use super::ip6::Ipv6Addr; use super::ip6::Ipv6Cidr; -use super::ip6::Ipv6Meta; -use super::packet::PacketMeta; use super::packet::PacketRead; use super::port::meta::ActionMeta; use alloc::boxed::Box; diff --git a/lib/opte/src/engine/rule.rs b/lib/opte/src/engine/rule.rs index f8d5dd0c..29a85034 100644 --- a/lib/opte/src/engine/rule.rs +++ b/lib/opte/src/engine/rule.rs @@ -6,19 +6,15 @@ //! Rules and actions. -use crate::engine::ingot_base::Ipv4; use crate::engine::ingot_base::Ipv4Mut; -use crate::engine::GenericUlp; use super::ether::EtherMeta; use super::ether::EtherMod; use super::flow_table::StateSummary; -use super::headers::EncapMeta; use super::headers::EncapMod; use super::headers::EncapPush; use super::headers::HeaderAction; use super::headers::HeaderActionError; -use super::headers::IpMeta; use super::headers::IpMod; use super::headers::IpPush; use super::headers::Transform; @@ -34,12 +30,7 @@ use super::ingot_packet::PacketHeaders; use super::ingot_packet::PacketHeaders2; use super::ingot_packet::ParsedMblk; use super::packet::BodyTransform; -use super::packet::Initialized; use super::packet::InnerFlowId; -use super::packet::Packet; -use super::packet::PacketMeta; -use super::packet::PacketReader; -use super::packet::Parsed; use super::port::meta::ActionMeta; use super::predicate::DataPredicate; use super::predicate::Predicate; @@ -53,12 +44,8 @@ use core::ffi::CStr; use core::fmt; use core::fmt::Debug; use core::fmt::Display; -use core::mem::MaybeUninit; use illumos_sys_hdrs::c_char; use illumos_sys_hdrs::uintptr_t; -use ingot::ethernet::Ethertype; -use ingot::ip::IpProtocol; -use ingot::tcp::Tcp; use ingot::types::HeaderLen; use ingot::types::InlineHeader; use ingot::types::Read; diff --git a/lib/opte/src/engine/snat.rs b/lib/opte/src/engine/snat.rs index 2d96bfd1..73868488 100644 --- a/lib/opte/src/engine/snat.rs +++ b/lib/opte/src/engine/snat.rs @@ -14,8 +14,6 @@ use super::headers::UlpMetaModify; use super::ingot_packet::Packet2; use super::ingot_packet::ParsedMblk; use super::packet::InnerFlowId; -use super::packet::Packet; -use super::packet::Parsed; use super::port::meta::ActionMeta; use super::predicate::DataPredicate; use super::predicate::Predicate; @@ -39,7 +37,6 @@ use alloc::sync::Arc; use alloc::vec::Vec; use core::fmt; use core::fmt::Display; -use core::marker::PhantomData; use core::ops::RangeInclusive; use ingot::icmp::IcmpV4Ref; use ingot::icmp::IcmpV6Ref; diff --git a/lib/opte/src/engine/tcp_state.rs b/lib/opte/src/engine/tcp_state.rs index eb3bc441..97446e56 100644 --- a/lib/opte/src/engine/tcp_state.rs +++ b/lib/opte/src/engine/tcp_state.rs @@ -7,8 +7,6 @@ //! Basic TCP state machine. use super::packet::InnerFlowId; -use super::tcp::TcpFlags; -use super::tcp::TcpMeta; use super::tcp::TcpState; use core::ffi::CStr; use core::fmt; diff --git a/lib/oxide-vpc/src/engine/gateway/mod.rs b/lib/oxide-vpc/src/engine/gateway/mod.rs index ecc2c42a..f8844923 100644 --- a/lib/oxide-vpc/src/engine/gateway/mod.rs +++ b/lib/oxide-vpc/src/engine/gateway/mod.rs @@ -52,7 +52,6 @@ use alloc::sync::Arc; use alloc::vec::Vec; use core::fmt; use core::fmt::Display; -use core::marker::PhantomData; use opte::api::Direction; use opte::api::OpteError; use opte::engine::ether::EtherMod; @@ -62,7 +61,6 @@ use opte::engine::layer::DefaultAction; use opte::engine::layer::Layer; use opte::engine::layer::LayerActions; use opte::engine::packet::InnerFlowId; -use opte::engine::packet::PacketMeta; use opte::engine::port::meta::ActionMeta; use opte::engine::port::PortBuilder; use opte::engine::port::Pos; diff --git a/lib/oxide-vpc/src/engine/mod.rs b/lib/oxide-vpc/src/engine/mod.rs index 89821a6b..fe0806a6 100644 --- a/lib/oxide-vpc/src/engine/mod.rs +++ b/lib/oxide-vpc/src/engine/mod.rs @@ -13,29 +13,15 @@ pub mod print; pub mod router; use crate::cfg::VpcCfg; -use opte::engine::ether::EtherType; use opte::engine::flow_table::FlowTable; -use opte::engine::headers::EncapMeta; -use opte::engine::ingot_packet::GeneveOverV6; use opte::engine::ingot_packet::MsgBlk; -use opte::engine::ingot_packet::NoEncap; -use opte::engine::ingot_packet::OpteMeta; -use opte::engine::ingot_packet::OpteParsed; use opte::engine::ingot_packet::OpteParsed2; use opte::engine::ingot_packet::Packet2; use opte::engine::ingot_packet::Parsed2; use opte::engine::ingot_packet::ValidGeneveOverV6; use opte::engine::ingot_packet::ValidNoEncap; -use opte::engine::ip4::Protocol; -use opte::engine::packet::HeaderOffsets; use opte::engine::packet::InnerFlowId; -use opte::engine::packet::Packet; -use opte::engine::packet::PacketInfo; -use opte::engine::packet::PacketMeta; -use opte::engine::packet::PacketRead; -use opte::engine::packet::PacketReaderMut; use opte::engine::packet::ParseError; -use opte::engine::packet::Parsed; use opte::engine::port::UftEntry; use opte::engine::Direction; use opte::engine::HdlPktAction; @@ -51,7 +37,6 @@ use opte::engine::ingot_base::EthernetRef; use opte::engine::ip4::Ipv4Addr; use opte::ingot::ethernet::Ethertype; use opte::ingot::types::Read; -use zerocopy::ByteSlice; use zerocopy::ByteSliceMut; #[derive(Clone, Copy, Debug, Default)] diff --git a/lib/oxide-vpc/src/engine/overlay.rs b/lib/oxide-vpc/src/engine/overlay.rs index fbad40d1..6230812e 100644 --- a/lib/oxide-vpc/src/engine/overlay.rs +++ b/lib/oxide-vpc/src/engine/overlay.rs @@ -22,7 +22,6 @@ use alloc::string::ToString; use alloc::sync::Arc; use alloc::vec::Vec; use core::fmt; -use core::marker::PhantomData; use opte::api::Direction; use opte::api::Ipv4Addr; use opte::api::Ipv4Cidr; @@ -38,7 +37,6 @@ use opte::engine::ether::EtherMod; use opte::engine::ether::EtherType; use opte::engine::geneve::GenevePush; use opte::engine::geneve::Vni; -use opte::engine::headers::EncapMeta; use opte::engine::headers::EncapPush; use opte::engine::headers::HeaderAction; use opte::engine::headers::IpAddr; @@ -53,7 +51,6 @@ use opte::engine::layer::DefaultAction; use opte::engine::layer::Layer; use opte::engine::layer::LayerActions; use opte::engine::packet::InnerFlowId; -use opte::engine::packet::PacketMeta; use opte::engine::port::meta::ActionMeta; use opte::engine::port::meta::ActionMetaValue; use opte::engine::port::PortBuilder; diff --git a/lib/oxide-vpc/tests/fuzz_regression.rs b/lib/oxide-vpc/tests/fuzz_regression.rs index 9e091f22..ef7b92d1 100644 --- a/lib/oxide-vpc/tests/fuzz_regression.rs +++ b/lib/oxide-vpc/tests/fuzz_regression.rs @@ -11,8 +11,6 @@ use opte::engine::ingot_packet::MsgBlk; use opte::engine::ingot_packet::Packet2; -use opte::engine::packet::Packet; -use opte::engine::Direction; use oxide_vpc::engine::VpcParser; use serde::Deserialize; use serde::Serialize; diff --git a/lib/oxide-vpc/tests/integration_tests.rs b/lib/oxide-vpc/tests/integration_tests.rs index 3907940c..de246c6f 100644 --- a/lib/oxide-vpc/tests/integration_tests.rs +++ b/lib/oxide-vpc/tests/integration_tests.rs @@ -20,17 +20,10 @@ use opte::api::OpteError; use opte::ddi::time::Moment; use opte::engine::arp::ArpEthIpv4; use opte::engine::arp::ArpEthIpv4Raw; -use opte::engine::checksum::Checksum as OpteCsum; use opte::engine::dhcpv6; use opte::engine::ether::EtherHdr; -use opte::engine::ether::EtherHdrRaw; -use opte::engine::ether::EtherMeta; use opte::engine::flow_table::FLOW_DEF_EXPIRE_SECS; use opte::engine::geneve::Vni; -use opte::engine::headers::EncapMeta; -use opte::engine::headers::IpMeta; -use opte::engine::headers::UlpMeta; -use opte::engine::icmp::IcmpHdr; use opte::engine::ingot_base::Ethernet; use opte::engine::ingot_base::EthernetRef; use opte::engine::ingot_base::Ipv4Ref; @@ -39,31 +32,19 @@ use opte::engine::ingot_base::Ipv6Ref; use opte::engine::ingot_base::ValidL3; use opte::engine::ingot_base::ValidUlp; use opte::engine::ingot_base::L3; -use opte::engine::ingot_packet::LightParsedMblk; use opte::engine::ingot_packet::MsgBlk; use opte::engine::ingot_packet::Packet2; -use opte::engine::ingot_packet::Parsed2; use opte::engine::ingot_packet::ParsedMblk; use opte::engine::ip4::Ipv4Addr; use opte::engine::ip4::Ipv4Hdr; -use opte::engine::ip4::Ipv4HdrError; -use opte::engine::ip4::Ipv4Meta; -use opte::engine::ip4::Protocol; use opte::engine::ip6::Ipv6Hdr; -use opte::engine::ip6::Ipv6Meta; -use opte::engine::packet::Initialized; use opte::engine::packet::InnerFlowId; -use opte::engine::packet::Packet; use opte::engine::packet::PacketRead; -use opte::engine::packet::ParseError; -use opte::engine::packet::Parsed; use opte::engine::port::ProcessError; use opte::engine::tcp::TcpState; use opte::engine::tcp::TIME_WAIT_EXPIRE_SECS; use opte::engine::udp::UdpHdr; -use opte::engine::udp::UdpMeta; use opte::engine::Direction; -use opte::engine::NetworkParser; use opte::ingot::geneve::GeneveRef; use opte::ingot::icmp::IcmpV6Ref; use opte::ingot::tcp::TcpRef; @@ -78,7 +59,6 @@ use oxide_vpc::api::RouterClass; use oxide_vpc::api::VpcCfg; use oxide_vpc::engine::overlay::BOUNDARY_SERVICES_VNI; use pcap::*; -use smoltcp::phy::Checksum; use smoltcp::phy::ChecksumCapabilities as CsumCapab; use smoltcp::wire::Icmpv4Packet; use smoltcp::wire::Icmpv4Repr; From b17439568d1e4c50c755751f2f4c19376a82bc2c Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Thu, 17 Oct 2024 17:55:34 +0100 Subject: [PATCH 054/115] Tweaks. --- lib/opte-test-utils/src/dhcp.rs | 4 +- lib/opte-test-utils/src/icmp.rs | 5 +- lib/opte-test-utils/src/pcap.rs | 1 - lib/opte/src/engine/ingot_base.rs | 3 +- lib/opte/src/engine/ingot_packet.rs | 202 +++++++---------------- lib/opte/src/engine/mod.rs | 17 -- lib/opte/src/engine/port.rs | 182 +++++--------------- lib/opte/src/engine/predicate.rs | 1 - lib/opte/src/engine/rule.rs | 3 - lib/oxide-vpc/src/engine/overlay.rs | 3 +- lib/oxide-vpc/tests/firewall_tests.rs | 11 -- lib/oxide-vpc/tests/integration_tests.rs | 54 ++---- 12 files changed, 118 insertions(+), 368 deletions(-) diff --git a/lib/opte-test-utils/src/dhcp.rs b/lib/opte-test-utils/src/dhcp.rs index 11321e36..a38878d5 100644 --- a/lib/opte-test-utils/src/dhcp.rs +++ b/lib/opte-test-utils/src/dhcp.rs @@ -56,7 +56,7 @@ pub fn packet_from_client_dhcpv4_message( let mut pkt = MsgBlk::new_ethernet(total_len); pkt.emit_back(&headers).unwrap(); let dhcp_off = pkt.len(); - pkt.resize(total_len); + pkt.resize(total_len).unwrap(); let mut dhcp_pkt = DhcpPacket::new_checked(&mut pkt[dhcp_off..]).unwrap(); msg.emit(&mut dhcp_pkt).unwrap(); @@ -104,7 +104,7 @@ pub fn write_dhcpv6_packet( let mut pkt = MsgBlk::new_ethernet(total_len); pkt.emit_back(&headers).unwrap(); let dhcp_off = pkt.len(); - pkt.resize(total_len); + pkt.resize(total_len).unwrap(); msg.copy_into(&mut pkt[dhcp_off..]).unwrap(); pkt diff --git a/lib/opte-test-utils/src/icmp.rs b/lib/opte-test-utils/src/icmp.rs index 347c03ae..a092de7e 100644 --- a/lib/opte-test-utils/src/icmp.rs +++ b/lib/opte-test-utils/src/icmp.rs @@ -12,7 +12,6 @@ use opte::engine::ingot_base::Ipv4; use opte::engine::ingot_base::Ipv6; use opte::engine::ingot_base::L3; use opte::engine::ingot_packet::MsgBlk; -use opte::engine::packet::*; use opte::ingot::ethernet::Ethertype; use opte::ingot::ip::IpProtocol as IngotIpProto; use opte::ingot::types::HeaderLen; @@ -278,7 +277,7 @@ pub fn gen_icmpv6_echo( match n_segments { 1 => { let mut pkt = MsgBlk::new_ethernet(total_len); - pkt.emit_back(&(eth, ip)); + pkt.emit_back(&(eth, ip)).unwrap(); pkt.write_bytes_back(&body_bytes).unwrap(); return pkt; @@ -352,7 +351,7 @@ pub fn generate_ndisc( let mut pkt = MsgBlk::new_ethernet(total_len); pkt.emit_back(&headers).unwrap(); let ndisc_off = pkt.len(); - pkt.resize(total_len); + pkt.resize(total_len).unwrap(); let mut req_pkt = Icmpv6Packet::new_unchecked(&mut pkt[ndisc_off..]); let mut csum = CsumCapab::ignored(); diff --git a/lib/opte-test-utils/src/pcap.rs b/lib/opte-test-utils/src/pcap.rs index 3af986e9..4983a810 100644 --- a/lib/opte-test-utils/src/pcap.rs +++ b/lib/opte-test-utils/src/pcap.rs @@ -7,7 +7,6 @@ //! Routines for building packet capture files. use opte::engine::ingot_packet::MsgBlk; -use opte::engine::packet::*; use pcap_parser::pcap; use pcap_parser::pcap::LegacyPcapBlock; use pcap_parser::pcap::PcapHeader; diff --git a/lib/opte/src/engine/ingot_base.rs b/lib/opte/src/engine/ingot_base.rs index 862684c2..5f4b5515 100644 --- a/lib/opte/src/engine/ingot_base.rs +++ b/lib/opte/src/engine/ingot_base.rs @@ -22,7 +22,6 @@ use ingot::types::util::Repeated; use ingot::types::ByteSlice; use ingot::types::Emit; use ingot::types::Header; -use ingot::types::HeaderLen; use ingot::types::NextLayer; use ingot::types::Vec; use ingot::udp::Udp; @@ -263,7 +262,7 @@ impl ValidL3 { pub fn csum(&self) -> [u8; 2] { match self { ValidL3::Ipv4(i4) => i4.checksum(), - ValidL3::Ipv6(i6) => 0, + ValidL3::Ipv6(_) => 0, } .to_be_bytes() } diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index 3ac0d513..3a07509f 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -104,6 +104,7 @@ use ingot::types::ParseControl; use ingot::types::ParseError as IngotParseErr; use ingot::types::Parsed as IngotParsed; use ingot::types::Read; +use ingot::types::ToOwnedPacket; use ingot::udp::Udp; use ingot::udp::UdpMut; use ingot::udp::UdpPacket; @@ -1100,7 +1101,7 @@ impl Emit for ValidEncapMeta { #[inline] fn emit_raw(&self, buf: V) -> usize { match self { - ValidEncapMeta::Geneve(u, g) => todo!(), + ValidEncapMeta::Geneve(u, g) => (u, g).emit_raw(buf), } } @@ -1830,7 +1831,7 @@ impl Packet2> { #[inline] /// Convert a packet's metadata into a set of instructions /// needed to serialize all its changes to the wire. - pub fn emit_spec(self) -> EmitSpec + pub fn emit_spec(self) -> Result where T::Chunk: ByteSliceMut, { @@ -1867,7 +1868,7 @@ impl Packet2> { if ulp.needs_emit() || l != init_lens.inner_ulp { let inner = push_spec.inner.get_or_insert_with(Default::default); - // TODO: impl InlineHeader / From<&Ulp> for UlpRepr here? generally seems a bit anaemic. + inner.ulp = Some(match ulp { Ulp::Tcp(IngotHeader::Repr(t)) => UlpRepr::Tcp(*t), Ulp::Tcp(IngotHeader::Raw(t)) => { @@ -1890,7 +1891,6 @@ impl Packet2> { UlpRepr::IcmpV6((&t).into()) } }); - // inner.ulp = Some((&ulp).into()); force_serialize = true; rewind += init_lens.inner_ulp; } @@ -1921,7 +1921,9 @@ impl Packet2> { // This needs a fuller InlineHeader due to EHs... // We can't actually do structural mods here today using OPTE. - L3::Ipv6(IngotHeader::Raw(v6)) => todo!(), // L3Repr::Ipv6((&v6).into()), + L3::Ipv6(IngotHeader::Raw(v6)) => { + L3Repr::Ipv6(v6.to_owned(None)?) + } }); force_serialize = true; rewind += init_lens.inner_l3; @@ -2015,22 +2017,20 @@ impl Packet2> { InlineHeader::Raw(_) => todo!(), }); - force_serialize = true; rewind += init_lens.outer_eth; } None if init_lens.outer_eth != 0 => { - force_serialize = true; rewind += init_lens.outer_eth; } _ => {} } - EmitSpec { + Ok(EmitSpec { rewind: rewind as u16, payload_len: payload_len as u16, encapped_len: encapped_len as u16, push_spec, - } + }) } pub fn len(&self) -> usize { @@ -2435,7 +2435,7 @@ fn csum_minus_hdr(ulp: &ValidUlp) -> Option { } } -trait QueryLen { +pub trait QueryLen { fn len(&self) -> usize; } @@ -2486,6 +2486,12 @@ pub struct EmittestSpec { pub ulp_len: u32, } +impl Default for EmittestSpec { + fn default() -> Self { + Self { spec: EmitterSpec::NoOp, l4_hash: 0, rewind: 0, ulp_len: 0 } + } +} + impl EmittestSpec { #[inline] #[must_use] @@ -2500,7 +2506,6 @@ impl EmittestSpec { while to_rewind != 0 { let this = reader.next(); let Some(node) = this else { - to_rewind = 0; break; }; @@ -2512,25 +2517,30 @@ impl EmittestSpec { slots.push(node).unwrap(); } } - - // TODO: put available layers into said slots? } - let mut out = match &self.spec { + // TODO: put available layers into said slots? + pkt.drop_empty_segments(); + + let out = match &self.spec { EmitterSpec::Fastpath(push_spec) => { push_spec.encap.prepend(pkt, self.ulp_len as usize) } EmitterSpec::Slowpath(push_spec) => { // TODO: - // - remove all zero-length nodes. // - actually push in to existing slots we rewound past if needed. - // - actually support pushing dirty segments apart from the encap. - let needed_push = push_spec.outer_eth.packet_length() + let mut needed_push = push_spec.outer_eth.packet_length() + push_spec.outer_ip.packet_length() + push_spec.outer_encap.packet_length(); - let needed_alloc = needed_push; //.saturating_sub(pkt.headroom()); - let mut space_in_front = needed_push - needed_alloc; + + if let Some(inner_new) = &push_spec.inner { + needed_push += inner_new.eth.packet_length() + + inner_new.l3.packet_length() + + inner_new.ulp.packet_length(); + } + + let needed_alloc = needed_push; let mut prepend = if needed_alloc > 0 { let mut new_mblk = MsgBlk::new_ethernet(needed_alloc); @@ -2540,9 +2550,34 @@ impl EmittestSpec { None }; - // NOT NEEDED TODAY. if let Some(inner_new) = &push_spec.inner { - todo!() + if let Some(inner_ulp) = &inner_new.ulp { + let target = if prepend.is_none() { + &mut pkt + } else { + prepend.as_mut().unwrap() + }; + + target.emit_front(inner_ulp).unwrap(); + } + + if let Some(inner_l3) = &inner_new.l3 { + let target = if prepend.is_none() { + &mut pkt + } else { + prepend.as_mut().unwrap() + }; + + target.emit_front(inner_l3).unwrap(); + } + + let target = if prepend.is_none() { + &mut pkt + } else { + prepend.as_mut().unwrap() + }; + + target.emit_front(&inner_new.eth).unwrap(); } if let Some(outer_encap) = &push_spec.outer_encap { @@ -2551,13 +2586,9 @@ impl EmittestSpec { meta: &outer_encap, }; - let l = encap.packet_length(); - let target = if prepend.is_none() { - space_in_front -= l; &mut pkt } else { - space_in_front = 0; prepend.as_mut().unwrap() }; @@ -2565,12 +2596,9 @@ impl EmittestSpec { } if let Some(outer_ip) = &push_spec.outer_ip { - let l = outer_ip.packet_length(); let target = if prepend.is_none() { - space_in_front -= l; &mut pkt } else { - space_in_front = 0; prepend.as_mut().unwrap() }; @@ -2578,12 +2606,9 @@ impl EmittestSpec { } if let Some(outer_eth) = &push_spec.outer_eth { - let l = outer_eth.packet_length(); let target = if prepend.is_none() { - space_in_front -= l; &mut pkt } else { - space_in_front = 0; prepend.as_mut().unwrap() }; @@ -2597,10 +2622,9 @@ impl EmittestSpec { pkt } } + EmitterSpec::NoOp => pkt, }; - out.drop_empty_segments(); - out } @@ -2617,6 +2641,7 @@ impl EmittestSpec { Some(EncapMeta::Geneve(g)) => Some(g.vni), _ => None, }, + EmitterSpec::NoOp => None, } } @@ -2633,6 +2658,7 @@ impl EmittestSpec { Some(L3Repr::Ipv6(v6)) => Some((v6.source, v6.destination)), _ => None, }, + EmitterSpec::NoOp => None, } } } @@ -2641,6 +2667,7 @@ impl EmittestSpec { pub enum EmitterSpec { Fastpath(Arc), Slowpath(Box), + NoOp, } #[derive(Clone, Debug)] @@ -2651,113 +2678,6 @@ pub struct EmitSpec { pub push_spec: OpteEmit, } -impl EmitSpec { - #[inline] - pub fn apply(&mut self, mut pkt: MsgBlk) -> MsgBlk { - // Rewind - { - let mut slots = heapless::Vec::<&mut MsgBlkNode, 6>::new(); - let mut to_rewind = self.rewind as usize; - - if to_rewind > 0 { - let mut reader = pkt.iter_mut(); - while to_rewind != 0 { - let this = reader.next(); - let Some(node) = this else { - to_rewind = 0; - break; - }; - - let has = node.len(); - let droppable = to_rewind.min(has); - node.drop_front_bytes(droppable); - to_rewind -= droppable; - - slots.push(node).unwrap(); - } - } - - // TODO: put available layers into said slots? - } - - // TODO: - // - remove all zero-length nodes. - // - actually push in to existing slots we rewound past if needed. - // - actually support pushing dirty segments apart from the encap. - - let needed_push = self.push_spec.outer_eth.packet_length() - + self.push_spec.outer_ip.packet_length() - + self.push_spec.outer_encap.packet_length(); - let needed_alloc = needed_push; //.saturating_sub(pkt.headroom()); - let mut space_in_front = needed_push - needed_alloc; - - let mut prepend = if needed_alloc > 0 { - let mut new_mblk = MsgBlk::new_ethernet(needed_alloc); - new_mblk.pop_all(); - Some(new_mblk) - } else { - None - }; - - // NOT NEEDED TODAY. - if let Some(inner_new) = &self.push_spec.inner { - todo!() - } - - if let Some(outer_encap) = &self.push_spec.outer_encap { - let encap = SizeHoldingEncap { - encapped_len: self.encapped_len, - meta: &outer_encap, - }; - - let l = encap.packet_length(); - - let target = if prepend.is_none() { - space_in_front -= l; - &mut pkt - } else { - space_in_front = 0; - prepend.as_mut().unwrap() - }; - - target.emit_front(&encap).unwrap(); - } - - if let Some(outer_ip) = &self.push_spec.outer_ip { - let l = outer_ip.packet_length(); - let target = if prepend.is_none() { - space_in_front -= l; - &mut pkt - } else { - space_in_front = 0; - prepend.as_mut().unwrap() - }; - - target.emit_front(outer_ip).unwrap(); - } - - if let Some(outer_eth) = &self.push_spec.outer_eth { - let l = outer_eth.packet_length(); - let target = if prepend.is_none() { - space_in_front -= l; - &mut pkt - } else { - space_in_front = 0; - prepend.as_mut().unwrap() - }; - - target.emit_front(outer_eth).unwrap(); - } - - if let Some(mut prepend) = prepend { - prepend.extend_if_one(pkt); - prepend - } else { - pkt - } - } -} - #[derive(Copy, Clone, Debug, Hash, Eq, PartialEq, Ord, PartialOrd, Default)] pub enum Memoised { #[default] @@ -3041,7 +2961,7 @@ impl HeaderActionModify } } ( - InlineHeader::Raw(ValidEncapMeta::Geneve(u, g)), + InlineHeader::Raw(ValidEncapMeta::Geneve(_, g)), EncapMod::Geneve(mod_spec), ) => { if let Some(vni) = mod_spec.vni { diff --git a/lib/opte/src/engine/mod.rs b/lib/opte/src/engine/mod.rs index eea1d7ec..91ae33d7 100644 --- a/lib/opte/src/engine/mod.rs +++ b/lib/opte/src/engine/mod.rs @@ -49,9 +49,7 @@ use core::num::ParseIntError; use ingot::tcp::TcpRef; use ingot::types::Read; use ingot_packet::MsgBlk; -use ingot_packet::NoEncap; use ingot_packet::OpteMeta; -use ingot_packet::OpteParsed; use ingot_packet::OpteParsed2; use ingot_packet::Packet2; use ingot_packet::Parsed2; @@ -354,21 +352,6 @@ pub trait LightweightMeta: Into> { /// itself. pub struct GenericUlp {} -impl GenericUlp { - /// Parse a generic L2 + L3 + L4 packet, storing the headers in - /// the inner position. - fn parse_ulp<'a, T: Read + 'a>( - &self, - rdr: T, - ) -> Result, ParseError> - where - T::Chunk: ingot::types::IntoBufPointer<'a>, - { - let v = NoEncap::parse_read(rdr)?; - Ok(OpteMeta::convert_ingot(v)) - } -} - impl NetworkParser for GenericUlp { type InMeta = ValidNoEncap; type OutMeta = ValidNoEncap; diff --git a/lib/opte/src/engine/port.rs b/lib/opte/src/engine/port.rs index 72ace71d..ee2c8216 100644 --- a/lib/opte/src/engine/port.rs +++ b/lib/opte/src/engine/port.rs @@ -41,8 +41,6 @@ use super::layer::RuleId; use super::packet::BodyTransform; use super::packet::BodyTransformError; use super::packet::InnerFlowId; -use super::packet::Packet; -use super::packet::Parsed; use super::packet::FLOW_ID_DEFAULT; use super::rule::Action; use super::rule::CompiledTransform; @@ -114,6 +112,7 @@ pub enum ProcessError { WriteError(super::packet::WriteError), MissingFlow(InnerFlowId), TcpFlow(TcpFlowStateError), + BadEmitSpec, FlowTableFull { kind: &'static str, limit: u64 }, } @@ -174,7 +173,10 @@ pub enum ProcessResult { impl From for ProcessResult { fn from(hpa: HdlPktAction) -> Self { match hpa { - HdlPktAction::Allow => Self::Modified(todo!()), + // TODO: In theory HdlPacket::Allow should have an emit spec, too. + // We are not using any op other than Hairpin, so kick that particular + // can down the road. + HdlPktAction::Allow => Self::Modified(EmittestSpec::default()), HdlPktAction::Deny => Self::Drop { reason: DropReason::HandlePkt }, HdlPktAction::Hairpin(pkt) => Self::Hairpin(pkt), } @@ -182,7 +184,6 @@ impl From for ProcessResult { } enum InternalProcessResult { - Bypass, Drop { reason: DropReason }, Modified, Hairpin(MsgBlk), @@ -571,7 +572,7 @@ impl Display for UftEntry { impl fmt::Debug for UftEntry { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let UftEntry { pair, xforms, l4_hash, epoch, tcp_flow } = self; + let UftEntry { pair: _pair, xforms, l4_hash, epoch, tcp_flow } = self; f.debug_struct("UftEntry") .field("pair", &"") @@ -906,10 +907,10 @@ impl Port { data: &FlowTable, dir: Direction, msg: String, - pkt: &mut Packet, + pkt: &mut Packet2, ) { if unsafe { super::opte_panic_debug != 0 } { - super::err!("mblk: {}", pkt.mblk_ptr_str()); + super::err!("mblk: {}", pkt.mblk_addr()); super::err!("flow: {}", pkt.flow()); super::err!("meta: {:?}", pkt.meta()); super::err!("flows: {:?}", data); @@ -922,7 +923,7 @@ impl Port { fn tcp_err_probe( &self, dir: Direction, - pkt: Option<&Packet>, + pkt: Option<&Packet2>, flow: &InnerFlowId, msg: String, ) { @@ -1268,12 +1269,6 @@ impl Port { Direction::In => data.uft_in.get(&flow_before), }; - // enum FastPathDecision { - // CompiledUft { tx: Arc, l4_hash: u32 }, - // Uft { tx: Arc, l4_hash: u32 }, - // Slow, - // } - enum FastPathDecision { CompiledUft(Arc>>), Uft(Arc>>), @@ -1284,25 +1279,12 @@ impl Port { // We have a valid UFT entry of some kind -- clone out the // saved transforms so that we can drop the lock ASAP. Some(entry) if entry.state().epoch == epoch => { - // entry.hit(); - // let now = entry.last_hit(); - // The Fast Path. let xforms = &entry.state().xforms; - let out = if let Some(compiled) = xforms.compiled.as_ref() { + let out = if xforms.compiled.is_some() { FastPathDecision::CompiledUft(Arc::clone(entry)) - // FastPathDecision::CompiledUft { - // tx: Arc::clone(compiled), - // // tx: Arc::clone(entry), - // l4_hash: entry.state().l4_hash, - // } } else { FastPathDecision::Uft(Arc::clone(entry)) - // FastPathDecision::Uft { - // tx: Arc::clone(xforms), - // // tx: Arc::clone(entry), - // l4_hash: entry.state().l4_hash, - // } }; match dir { @@ -1359,10 +1341,9 @@ impl Port { } } - drop(data); + let _ = data; drop(lock.take()); - // entry.hit_at(process_start); self.uft_hit_probe(dir, &flow_before, epoch, &process_start); @@ -1398,9 +1379,7 @@ impl Port { } } } - _ => { - drop(data); - } + _ => {} } // If we're in here, we took a faster-path. We know the lock is dropped. @@ -1502,7 +1481,7 @@ impl Port { { Self::update_stats_in(&mut data.stats.vals, &res); } - drop(data); + drop(lock); pkt.update_checksums(); res } @@ -1518,7 +1497,7 @@ impl Port { { Self::update_stats_out(&mut data.stats.vals, &res); } - drop(data); + drop(lock); pkt.update_checksums(); res } @@ -1526,24 +1505,24 @@ impl Port { let flow_after = *pkt.flow(); - let res = res.map(|v| match v { - InternalProcessResult::Bypass => ProcessResult::Bypass, + let res = res.and_then(|v| match v { InternalProcessResult::Drop { reason } => { - ProcessResult::Drop { reason } + Ok(ProcessResult::Drop { reason }) } - InternalProcessResult::Hairpin(v) => ProcessResult::Hairpin(v), + InternalProcessResult::Hairpin(v) => Ok(ProcessResult::Hairpin(v)), InternalProcessResult::Modified => { let l4_hash = pkt.l4_hash(); - let emit_spec = pkt.emit_spec(); + let emit_spec = + pkt.emit_spec().map_err(|_| ProcessError::BadEmitSpec)?; // TODO: remove EmitSpec and have above method just spit out the new // variant. - ProcessResult::Modified(EmittestSpec { + Ok(ProcessResult::Modified(EmittestSpec { spec: EmitterSpec::Slowpath(emit_spec.push_spec.into()), l4_hash, rewind: emit_spec.rewind, ulp_len: emit_spec.encapped_len as u32, - }) + })) } }); self.port_process_return_probe( @@ -2172,7 +2151,7 @@ impl Port { } /// Attempts to lookup and update TCP flowstate in response to a given - /// packet. + /// packet from within the slowpath. /// /// Unexpected TCP segments on existing connections will be allowed, /// but will fire DTrace probes via `Self::tcp_err_probe`. @@ -2188,73 +2167,45 @@ impl Port { /// a packet as a UFT miss (e.g., `process_out_miss`) and reprocessing the flow. fn update_tcp_entry( &self, - mut data: PortDataOrSubset, + data: &mut PortData, tcp: &impl TcpRef, dir: &TcpDirection, pkt_len: u64, ) -> result::Result { - let tcp_flows = data.tcp_flows(); let (ufid_out, ufid_in) = match *dir { TcpDirection::In { ufid_in, ufid_out } => (ufid_out, Some(ufid_in)), TcpDirection::Out { ufid_out } => (ufid_out, None), }; - let Some(entry) = tcp_flows.get(ufid_out) else { + let Some(entry) = data.tcp_flows.get(ufid_out) else { return Err(ProcessError::MissingFlow(*ufid_out)); }; let entry = entry.clone(); - // TODO: need to hit this from a UFT entry. - // Work out atomics shortly... entry.hit(); let tfes_base = entry.state(); - // let next_state = tfes_base.update(); - - let mut tfes = tfes_base.inner.lock(); - match *dir { - TcpDirection::In { .. } => { - tfes.segs_in += 1; - tfes.bytes_in += pkt_len; - } - TcpDirection::Out { .. } => { - tfes.segs_out += 1; - tfes.bytes_out += pkt_len; - } - } - - let next_state = tfes.tcp_state.process( + let next_state = tfes_base.update( self.name_cstr.as_c_str(), - dir.dir(), - ufid_out, tcp, + dir.dir(), + pkt_len, + ufid_in, ); - if let Some(ufid_in) = ufid_in { - // We need to store the UFID of the inbound packet - // before it was processed so that we can retire the - // correct UFT/LFT entries upon connection - // termination. - tfes.inbound_ufid = Some(*ufid_in); - } - - drop(tfes); - let ufid_inbound = if matches!( next_state, Ok(TcpState::Closed) | Err(TcpFlowStateError::NewFlow { .. }) ) { // Due to order of operations, out_tcp_existing must // call uft_tcp_closed separately. - let entry = tcp_flows.remove(ufid_out).unwrap(); + let entry = data.tcp_flows.remove(ufid_out).unwrap(); let lock = entry.state().inner.lock(); let state_ufid = lock.inbound_ufid; - if let PortDataOrSubset::Port(data) = data { - // The inbound side of the UFT is based on - // the network-side of the flow (pre-processing). - self.uft_tcp_closed(data, ufid_out, state_ufid.as_ref()); - } + // The inbound side of the UFT is based on + // the network-side of the flow (pre-processing). + self.uft_tcp_closed(data, ufid_out, state_ufid.as_ref()); ufid_in.copied().or(state_ufid) } else { @@ -2305,15 +2256,10 @@ impl Port { let dir = TcpDirection::In { ufid_in, ufid_out: &ufid_out }; - match self.update_tcp_entry( - PortDataOrSubset::Port(data), - tcp, - &dir, - pkt_len, - ) { + match self.update_tcp_entry(data, tcp, &dir, pkt_len) { // We need to create a new TCP entry here because we can't call // `process_in_miss` on the already-modified packet. - e @ Err( + Err( ProcessError::TcpFlow(TcpFlowStateError::NewFlow { .. }) | ProcessError::MissingFlow(_), ) => self.create_new_tcp_entry( @@ -2440,16 +2386,14 @@ impl Port { // already encodes a shortcut from `Closed` to `Established. Err(ProcessError::TcpFlow(err)) => { let e = format!("{err}"); - // TODO(kyle) - // self.tcp_err(&data.tcp_flows, Direction::In, e, pkt); + self.tcp_err(&data.tcp_flows, Direction::In, e, pkt); Ok(InternalProcessResult::Drop { reason: DropReason::TcpErr, }) } Err(ProcessError::FlowTableFull { kind, limit }) => { let e = format!("{kind} flow table full ({limit} entries)"); - // TODO(kyle) - // self.tcp_err(&data.tcp_flows, Direction::In, e, pkt); + self.tcp_err(&data.tcp_flows, Direction::In, e, pkt); Ok(InternalProcessResult::Drop { reason: DropReason::TcpErr, }) @@ -2503,24 +2447,6 @@ impl Port { } } - // Process the TCP packet for the purposes of connection tracking - // when an outbound UFT entry exists. - fn process_out_tcp_existing( - &self, - tcp_flows: &mut FlowTable, - ufid_out: &InnerFlowId, - pmeta: &PacketHeaders2, - pkt_len: u64, - ) -> result::Result { - let tcp = pmeta.inner_tcp().unwrap(); - self.update_tcp_entry( - PortDataOrSubset::Tcp(tcp_flows), - tcp, - &TcpDirection::Out { ufid_out }, - pkt_len, - ) - } - // Process the TCP packet for the purposes of connection tracking // when an outbound UFT entry was just created. fn process_out_tcp_new( @@ -2533,12 +2459,7 @@ impl Port { let tcp = pmeta.inner_tcp().unwrap(); let dir = TcpDirection::Out { ufid_out }; - match self.update_tcp_entry( - PortDataOrSubset::Port(data), - tcp, - &dir, - pkt_len, - ) { + match self.update_tcp_entry(data, tcp, &dir, pkt_len) { Err( ProcessError::TcpFlow(TcpFlowStateError::NewFlow { .. }) | ProcessError::MissingFlow(_), @@ -2592,24 +2513,21 @@ impl Port { // already encodes a shortcut from `Closed` to `Established. Err(ProcessError::TcpFlow(err)) => { let e = format!("{err}"); - // TODO(kyle) - // self.tcp_err(&data.tcp_flows, Out, e, pkt); + self.tcp_err(&data.tcp_flows, Out, e, pkt); return Ok(InternalProcessResult::Drop { reason: DropReason::TcpErr, }); } Err(ProcessError::MissingFlow(flow_id)) => { let e = format!("Missing TCP flow ID: {flow_id}"); - // TODO(kyle) - // self.tcp_err(&data.tcp_flows, Direction::In, e, pkt); + self.tcp_err(&data.tcp_flows, Direction::In, e, pkt); return Ok(InternalProcessResult::Drop { reason: DropReason::TcpErr, }); } Err(ProcessError::FlowTableFull { kind, limit }) => { let e = format!("{kind} flow table full ({limit} entries)"); - // TODO(kyle) - // self.tcp_err(&data.tcp_flows, Direction::In, e, pkt); + self.tcp_err(&data.tcp_flows, Direction::In, e, pkt); return Ok(InternalProcessResult::Drop { reason: DropReason::TcpErr, }); @@ -2756,8 +2674,6 @@ impl Port { res: &result::Result, ) { match res { - Ok(InternalProcessResult::Bypass) => stats.in_bypass += 1, - Ok(InternalProcessResult::Drop { reason }) => { stats.in_drop += 1; @@ -2789,8 +2705,6 @@ impl Port { res: &result::Result, ) { match res { - Ok(InternalProcessResult::Bypass) => stats.out_bypass += 1, - Ok(InternalProcessResult::Drop { reason }) => { stats.out_drop += 1; @@ -2870,22 +2784,6 @@ impl Port { } } -/// Helper enum used to delay UFT entry removal in case of -/// `tcp_out_existing`. -enum PortDataOrSubset<'a> { - Port(&'a mut PortData), - Tcp(&'a mut FlowTable), -} - -impl<'a> PortDataOrSubset<'a> { - fn tcp_flows(&mut self) -> &mut FlowTable { - match self { - Self::Port(p) => &mut p.tcp_flows, - Self::Tcp(t) => t, - } - } -} - /// Helper enum for encoding what UFIDs are available when /// updating TCP flow state. enum TcpDirection<'a> { diff --git a/lib/opte/src/engine/predicate.rs b/lib/opte/src/engine/predicate.rs index c9548cc1..408bc765 100644 --- a/lib/opte/src/engine/predicate.rs +++ b/lib/opte/src/engine/predicate.rs @@ -23,7 +23,6 @@ use super::ip4::Ipv4Cidr; use super::ip4::Protocol; use super::ip6::Ipv6Addr; use super::ip6::Ipv6Cidr; -use super::packet::PacketRead; use super::port::meta::ActionMeta; use alloc::boxed::Box; use alloc::string::String; diff --git a/lib/opte/src/engine/rule.rs b/lib/opte/src/engine/rule.rs index 29a85034..e254f040 100644 --- a/lib/opte/src/engine/rule.rs +++ b/lib/opte/src/engine/rule.rs @@ -6,8 +6,6 @@ //! Rules and actions. -use crate::engine::ingot_base::Ipv4Mut; - use super::ether::EtherMeta; use super::ether::EtherMod; use super::flow_table::StateSummary; @@ -46,7 +44,6 @@ use core::fmt::Debug; use core::fmt::Display; use illumos_sys_hdrs::c_char; use illumos_sys_hdrs::uintptr_t; -use ingot::types::HeaderLen; use ingot::types::InlineHeader; use ingot::types::Read; use opte_api::Direction; diff --git a/lib/oxide-vpc/src/engine/overlay.rs b/lib/oxide-vpc/src/engine/overlay.rs index 6230812e..bfa83cd9 100644 --- a/lib/oxide-vpc/src/engine/overlay.rs +++ b/lib/oxide-vpc/src/engine/overlay.rs @@ -203,10 +203,9 @@ impl StaticAction for EncapAction { // The encap action is only used for outgoing. _dir: Direction, flow_id: &InnerFlowId, - pkt_meta: &PacketHeaders2, + _pkt_meta: &PacketHeaders2, action_meta: &mut ActionMeta, ) -> GenHtResult { - // TODO: can't access the memoised form from here.... let f_hash = flow_id.crc32(); // The router layer determines a RouterTarget and stores it in diff --git a/lib/oxide-vpc/tests/firewall_tests.rs b/lib/oxide-vpc/tests/firewall_tests.rs index d4c1a6df..aad20b71 100644 --- a/lib/oxide-vpc/tests/firewall_tests.rs +++ b/lib/oxide-vpc/tests/firewall_tests.rs @@ -290,23 +290,12 @@ fn firewall_vni_outbound() { // ================================================================ // Create a packet that is leaving g1 with g2 as its destination. // ================================================================ - let phys_src = TestIpPhys { - ip: g1_cfg.phys_ip, - mac: g1_cfg.guest_mac, - vni: g1_cfg.vni, - }; - let phys_dst = TestIpPhys { - ip: g2_cfg.phys_ip, - mac: g2_cfg.guest_mac, - vni: g2_cfg.vni, - }; let mut pkt1_m = http_syn2( g1_cfg.guest_mac, g1_cfg.ipv4().private_ip, g1_cfg.guest_mac, g2_cfg.ipv4().private_ip, ); - // pkt1 = encap(pkt1, phys_src, phys_dst); let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); // ================================================================ diff --git a/lib/oxide-vpc/tests/integration_tests.rs b/lib/oxide-vpc/tests/integration_tests.rs index de246c6f..b639e653 100644 --- a/lib/oxide-vpc/tests/integration_tests.rs +++ b/lib/oxide-vpc/tests/integration_tests.rs @@ -21,7 +21,6 @@ use opte::ddi::time::Moment; use opte::engine::arp::ArpEthIpv4; use opte::engine::arp::ArpEthIpv4Raw; use opte::engine::dhcpv6; -use opte::engine::ether::EtherHdr; use opte::engine::flow_table::FLOW_DEF_EXPIRE_SECS; use opte::engine::geneve::Vni; use opte::engine::ingot_base::Ethernet; @@ -36,14 +35,10 @@ use opte::engine::ingot_packet::MsgBlk; use opte::engine::ingot_packet::Packet2; use opte::engine::ingot_packet::ParsedMblk; use opte::engine::ip4::Ipv4Addr; -use opte::engine::ip4::Ipv4Hdr; -use opte::engine::ip6::Ipv6Hdr; use opte::engine::packet::InnerFlowId; -use opte::engine::packet::PacketRead; use opte::engine::port::ProcessError; use opte::engine::tcp::TcpState; use opte::engine::tcp::TIME_WAIT_EXPIRE_SECS; -use opte::engine::udp::UdpHdr; use opte::engine::Direction; use opte::ingot::geneve::GeneveRef; use opte::ingot::icmp::IcmpV6Ref; @@ -78,13 +73,6 @@ use uuid::Uuid; use zerocopy::FromBytes; use zerocopy::IntoBytes; -const IP4_SZ: usize = EtherHdr::SIZE + Ipv4Hdr::BASE_SIZE; -const IP6_SZ: usize = EtherHdr::SIZE + Ipv6Hdr::BASE_SIZE; -const TCP4_SZ: usize = IP4_SZ + TcpHdr::BASE_SIZE; -const TCP6_SZ: usize = IP6_SZ + TcpHdr::BASE_SIZE; - -const VPC_ENCAP_SZ: usize = IP6_SZ + UdpHdr::SIZE + GeneveHdr::BASE_SIZE; - // If we are running `cargo test`, then make sure to // register the USDT probes before running any tests. #[cfg(test)] @@ -1234,17 +1222,6 @@ fn external_ip_receive_and_reply_on_all() { fn external_ip_balanced_over_floating_ips() { let (mut g1, g1_cfg, ext_v4, ext_v6) = multi_external_ip_setup(8, true); - let bsvc_phys = TestIpPhys { - ip: BS_IP_ADDR, - mac: BS_MAC_ADDR, - vni: Vni::new(BOUNDARY_SERVICES_VNI).unwrap(), - }; - let g1_phys = TestIpPhys { - ip: g1_cfg.phys_ip, - mac: g1_cfg.guest_mac, - vni: g1_cfg.vni, - }; - let partner_ipv4: IpAddr = "93.184.216.34".parse().unwrap(); let partner_ipv6: IpAddr = "2606:2800:220:1:248:1893:25c8:1946".parse().unwrap(); @@ -1517,7 +1494,6 @@ fn unpack_and_verify_icmp( params: &IcmpSnatParams, dir: Direction, seq_no: u16, - body_seg: usize, ) { // Note the reversed direction -- parse the expected *output* format. let parsed = match dir { @@ -1526,13 +1502,12 @@ fn unpack_and_verify_icmp( }; let meta = parsed.meta(); - let (src_eth, dst_eth, src_ip, dst_ip, encapped, ident) = match dir { + let (src_eth, dst_eth, src_ip, dst_ip, ident) = match dir { Direction::Out => ( cfg.guest_mac, BS_MAC_ADDR, params.public_ip, params.partner_ip, - true, params.snat_port, ), Direction::In => ( @@ -1540,7 +1515,6 @@ fn unpack_and_verify_icmp( cfg.guest_mac, params.partner_ip, params.private_ip, - false, params.icmp_id, ), }; @@ -1556,7 +1530,7 @@ fn unpack_and_verify_icmp( assert_eq!(IpAddr::from(meta.destination()), dst_ip); assert_eq!(meta.protocol(), IngotIpProto::ICMP); - unpack_and_verify_icmp4(&parsed, ident, seq_no, encapped, body_seg); + unpack_and_verify_icmp4(&parsed, ident, seq_no); } (IpAddr::Ip6(_), L3::Ipv6(meta)) => { assert_eq!(eth.ethertype(), Ethertype::IPV6); @@ -1568,16 +1542,14 @@ fn unpack_and_verify_icmp( &parsed, ident, seq_no, - encapped, - body_seg, meta.source(), meta.destination(), ); } - (IpAddr::Ip4(_), ip6) => { + (IpAddr::Ip4(_), _) => { panic!("expected inner IPv4 metadata, got IPv6") } - (IpAddr::Ip6(_), ip4) => { + (IpAddr::Ip6(_), _) => { panic!("expected inner IPv6 metadata, got IPv4") } } @@ -1587,8 +1559,6 @@ fn unpack_and_verify_icmp4( pkt: &Packet2, expected_ident: u16, seq_no: u16, - encapped: bool, - body_seg: usize, ) { // Because we treat ICMPv4 as a full-fledged ULP, we need to // unsplit the emitted header from the body. @@ -1606,8 +1576,6 @@ fn unpack_and_verify_icmp6( pkt: &Packet2, expected_ident: u16, seq_no: u16, - encapped: bool, - body_seg: usize, src_ip: Ipv6Addr, dst_ip: Ipv6Addr, ) { @@ -1723,7 +1691,7 @@ fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { ] ); - unpack_and_verify_icmp(&mut pkt1_m, &g1_cfg, ¶ms, Out, seq_no, 0); + unpack_and_verify_icmp(&mut pkt1_m, &g1_cfg, ¶ms, Out, seq_no); // ================================================================ // Verify echo reply rewrite. @@ -1759,7 +1727,7 @@ fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { pcap.add_pkt(&pkt2_m); incr!(g1, ["uft.in", "stats.port.in_modified, stats.port.in_uft_miss"]); - unpack_and_verify_icmp(&mut pkt2_m, &g1_cfg, ¶ms, In, seq_no, 0); + unpack_and_verify_icmp(&mut pkt2_m, &g1_cfg, ¶ms, In, seq_no); // ================================================================ // Send ICMP Echo Req a second time. We want to verify that a) the @@ -1787,7 +1755,7 @@ fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { incr!(g1, ["stats.port.out_modified, stats.port.out_uft_hit"]); assert_eq!(g1.port.stats_snap().out_uft_hit, 1); - unpack_and_verify_icmp(&mut pkt3_m, &g1_cfg, ¶ms, Out, seq_no, 1); + unpack_and_verify_icmp(&mut pkt3_m, &g1_cfg, ¶ms, Out, seq_no); // ================================================================ // Process ICMP Echo Reply a second time. Once again, this time we @@ -1815,7 +1783,7 @@ fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { incr!(g1, ["stats.port.in_modified, stats.port.in_uft_hit"]); assert_eq!(g1.port.stats_snap().in_uft_hit, 1); - unpack_and_verify_icmp(&mut pkt4_m, &g1_cfg, ¶ms, In, seq_no, 0); + unpack_and_verify_icmp(&mut pkt4_m, &g1_cfg, ¶ms, In, seq_no); // ================================================================ // Insert a new packet along the same S/D pair: this should occupy @@ -1850,7 +1818,7 @@ fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { ] ); - unpack_and_verify_icmp(&mut pkt5_m, &g1_cfg, &new_params, Out, seq_no, 0); + unpack_and_verify_icmp(&mut pkt5_m, &g1_cfg, &new_params, Out, seq_no); } // TODO(kyle) @@ -2712,9 +2680,9 @@ fn write_dhcpv6_packet( let total_len = msg.buffer_len() + (ð, &ip, &udp).packet_length(); let mut pkt = MsgBlk::new_ethernet(total_len); - pkt.emit_back((eth, ip, udp)); + pkt.emit_back((eth, ip, udp)).unwrap(); let l = pkt.len(); - pkt.resize(total_len); + pkt.resize(total_len).unwrap(); msg.copy_into(&mut pkt[l..]); pkt From 49680e106356151eda5b2d599f7a8f7eecb4fea6 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Fri, 18 Oct 2024 09:12:54 +0100 Subject: [PATCH 055/115] Strip dead code, fill out the `todo!`s --- lib/opte/src/engine/dhcpv6/protocol.rs | 2 - lib/opte/src/engine/geneve.rs | 68 ++++++++ lib/opte/src/engine/ingot_packet.rs | 231 ++++--------------------- lib/opte/src/engine/layer.rs | 6 - lib/opte/src/engine/nat.rs | 11 +- lib/opte/src/engine/packet.rs | 5 +- lib/opte/src/engine/rule.rs | 14 +- lib/opte/src/engine/snat.rs | 13 +- 8 files changed, 116 insertions(+), 234 deletions(-) diff --git a/lib/opte/src/engine/dhcpv6/protocol.rs b/lib/opte/src/engine/dhcpv6/protocol.rs index 8df11c34..eb1a9e79 100644 --- a/lib/opte/src/engine/dhcpv6/protocol.rs +++ b/lib/opte/src/engine/dhcpv6/protocol.rs @@ -708,13 +708,11 @@ mod test { use super::Message; use super::MessageType; use super::OptionCode; - use super::Packet; use crate::engine::dhcpv6::test_data; use crate::engine::ingot_packet::MsgBlk; use crate::engine::ingot_packet::Packet2; use crate::engine::port::meta::ActionMeta; use crate::engine::GenericUlp; - use opte_api::Direction::*; // Test that we correctly parse out the entire Solicit message from a // snooped packet. diff --git a/lib/opte/src/engine/geneve.rs b/lib/opte/src/engine/geneve.rs index 9afa3717..7c6678de 100644 --- a/lib/opte/src/engine/geneve.rs +++ b/lib/opte/src/engine/geneve.rs @@ -18,9 +18,15 @@ use super::udp::UdpHdr; use super::udp::UdpMeta; use crate::d_error::DError; use core::mem; +use ingot::geneve::Geneve; +use ingot::geneve::GeneveOptRef; +use ingot::geneve::GeneveRef; +use ingot::geneve::ValidGeneve; +use ingot::types::Header; pub use opte_api::Vni; use serde::Deserialize; use serde::Serialize; +use zerocopy::ByteSlice; use zerocopy::FromBytes; use zerocopy::Immutable; use zerocopy::IntoBytes; @@ -500,6 +506,68 @@ impl<'a> RawHeader<'a> for GeneveOptHdrRaw { } } +// We probably want a more general way to retrieve all facts we care about +// from the geneve options -- we only have the one today, however. +#[inline] +pub fn geneve_has_oxide_external(pkt: &Geneve) -> bool { + for opt in pkt.options.iter() { + let out = geneve_opt_is_oxide_external::<&[u8]>(opt); + if out { + break; + } + } + + false +} + +#[inline] +pub fn valid_geneve_has_oxide_external( + pkt: &ValidGeneve, +) -> bool { + let mut out = false; + + match pkt.options_ref() { + ingot::types::FieldRef::Repr(g) => { + for opt in g.iter() { + out = geneve_opt_is_oxide_external::<&[u8]>(opt); + if out { + break; + } + } + } + ingot::types::FieldRef::Raw(Header::Repr(g)) => { + for opt in g.iter() { + out = geneve_opt_is_oxide_external::<&[u8]>(opt); + if out { + break; + } + } + } + ingot::types::FieldRef::Raw(Header::Raw(g)) => { + for opt in g.iter(None) { + let Ok(opt) = opt else { + break; + }; + + out = geneve_opt_is_oxide_external(&opt); + if out { + break; + } + } + } + } + + out +} + +#[inline(always)] +pub fn geneve_opt_is_oxide_external( + opt: &impl GeneveOptRef, +) -> bool { + opt.class() == GENEVE_OPT_CLASS_OXIDE + && opt.option_type().0 == OxideOption::External.opt_type() +} + #[cfg(test)] mod test { use core::matches; diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index 3a07509f..a824d183 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -54,6 +54,8 @@ use super::rule::HdrTransform; use super::rule::HdrTransformError; use super::LightweightMeta; use super::NetworkParser; +use crate::engine::geneve::valid_geneve_has_oxide_external; +use crate::engine::geneve::GeneveMeta; #[cfg(any(feature = "std", test))] use crate::engine::packet::mock_freemsg; use alloc::boxed::Box; @@ -340,7 +342,17 @@ impl LightweightMeta for ValidNoEncap { impl From> for OpteMeta { #[inline] fn from(value: ValidGeneveOverV6) -> Self { - GeneveOverV6::from(value).into() + OpteMeta { + outer_eth: Some(value.outer_eth.into()), + outer_l3: Some(L3::Ipv6(value.outer_v6.into())), + outer_encap: Some(InlineHeader::Raw(ValidEncapMeta::Geneve( + value.outer_udp, + value.outer_encap, + ))), + inner_eth: value.inner_eth.into(), + inner_l3: Some(value.inner_l3.into()), + inner_ulp: Some(value.inner_ulp.into()), + } } } @@ -942,17 +954,6 @@ impl Drop for MsgBlk { } } -pub struct OpteUnified { - pub outer_eth: Option>, - pub outer_v6: Option>, - pub outer_udp: Option>, - pub outer_encap: Option>, - - pub inner_eth: EthernetPacket, - pub inner_l3: Option>, - pub inner_ulp: Option>, -} - pub struct OpteUnifiedLengths { pub outer_eth: usize, pub outer_l3: usize, @@ -975,44 +976,15 @@ impl OpteUnifiedLengths { } } -// TODO: Choices (L3, etc.) don't have Debug in all the right places yet. -impl core::fmt::Debug for OpteUnified { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - f.write_str("OpteUnified{ .. }") - } -} - -// THIS IS THE GOAL. - -// IE -// pub struct OpteEmit { -// outer_eth: Option, -// outer_ip: Option, -// outer_encap: Option, - -// // We can (but do not often) push/pop inner meta. -// // Splitting minimises struct size in the general case. -// inner: Option>, -// } - -// pub struct OpteInnerEmit { -// eth: Ethernet, -// l3: Option, -// ulp: Option, -// } - pub enum ValidEncapMeta { Geneve(ValidUdp, ValidGeneve), } pub struct OpteMeta { pub outer_eth: Option>>, - // pub outer_eth: Option>>, pub outer_l3: Option>, - // pub outer_l3: Option>>, - // pub outer_v6: Option>>, pub outer_encap: Option>>, - // pub outer_encap: Option>>, + pub inner_eth: EthernetPacket, pub inner_l3: Option>, pub inner_ulp: Option>, @@ -1140,36 +1112,6 @@ impl HeaderLen for ValidEncapMeta { } } -impl From> for OpteUnified { - #[inline] - fn from(value: GeneveOverV6) -> Self { - Self { - outer_eth: Some(value.outer_eth), - outer_v6: Some(L3::Ipv6(value.outer_v6)), - outer_udp: Some(value.outer_udp), - outer_encap: Some(value.outer_encap), - inner_eth: value.inner_eth, - inner_l3: Some(value.inner_l3), - inner_ulp: Some(value.inner_ulp), - } - } -} - -impl From> for OpteUnified { - #[inline] - fn from(value: NoEncap) -> Self { - Self { - outer_eth: None, - outer_v6: None, - outer_udp: None, - outer_encap: None, - inner_eth: value.inner_eth, - inner_l3: value.inner_l3, - inner_ulp: value.inner_ulp, - } - } -} - // This really needs a rethink, but also I just need to get this working... struct PktBodyWalker { base: Cell, T)>>, @@ -1298,60 +1240,6 @@ impl From> for OpteMeta { } } -impl From> for OpteMeta { - #[inline] - fn from(value: GeneveOverV6) -> Self { - // These are practically all Valid, anyhow. - let outer_encap = match (value.outer_udp, value.outer_encap) { - (ingot::types::Header::Raw(u), ingot::types::Header::Raw(g)) => { - Some(InlineHeader::Raw(ValidEncapMeta::Geneve(u, g))) - } - _ => todo!(), - }; - - // let outer_l3 = match value.outer_v6 { - // ingot::types::Header::Repr(v) => { - // Some(InlineHeader::Repr(L3Repr::Ipv6(*v))) - // } - // ingot::types::Header::Raw(v) => { - // Some(InlineHeader::Raw(ValidL3::Ipv6(v))) - // } - // }; - - let outer_l3 = Some(L3::Ipv6(value.outer_v6)); - - OpteMeta { - outer_eth: Some(value.outer_eth.into()), - outer_l3, - outer_encap, - inner_eth: value.inner_eth, - inner_l3: Some(value.inner_l3), - inner_ulp: Some(value.inner_ulp), - } - } -} - -// impl From, T>> for PacketHeaders { -// fn from(value: IngotParsed, T>) -> Self { -// let IngotParsed { stack: HeaderStack(headers), data, last_chunk } = -// value; -// let initial_lens = OpteUnifiedLengths { -// outer_eth: headers.outer_eth.packet_length(), -// outer_l3: headers.outer_v6.packet_length(), -// outer_encap: headers.outer_udp.packet_length() -// + headers.outer_encap.packet_length(), -// inner_eth: headers.inner_eth.packet_length(), -// inner_l3: headers.inner_l3.packet_length(), -// inner_ulp: headers.inner_ulp.packet_length(), -// }; -// let body = PktBodyWalker { -// base: Some((last_chunk, data)).into(), -// slice: Default::default(), -// }; -// Self { headers, initial_lens, body } -// } -// } - impl core::fmt::Debug for PacketHeaders { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { f.write_str("PacketHeaders(..)") @@ -1657,63 +1545,6 @@ impl<'a, T: Read + 'a> Packet2> where T::Chunk: ingot::types::IntoBufPointer<'a> + ByteSliceMut, { - // #[inline] - // pub fn parse( - // self, - // dir: Direction, - // net: impl NetworkParser, - // ) -> Result>, ParseError> { - // let Packet2 { state: Initialized2 { len, inner } } = self; - // let IngotParsed { stack: HeaderStack(headers), data, last_chunk } = - // match dir { - // Direction::Out => net.parse_outbound(inner)?, - // Direction::In => net.parse_inbound(inner)?, - // }; - - // let initial_lens = Some(OpteUnifiedLengths { - // outer_eth: headers.outer_eth.packet_length(), - // outer_l3: headers.outer_l3.packet_length(), - // outer_encap: headers.outer_encap.packet_length(), - // inner_eth: headers.inner_eth.packet_length(), - // inner_l3: headers.inner_l3.packet_length(), - // inner_ulp: headers.inner_ulp.packet_length(), - // } - // .into()); - - // let body = PktBodyWalker { - // base: Some((last_chunk, data)).into(), - // slice: Default::default(), - // }; - - // let meta = Box::new(PacketHeaders { headers, initial_lens, body }); - - // let flow = (&*meta).into(); - - // let body_csum = match (&meta.headers).inner_eth.ethertype() { - // Ethertype::ARP => Memoised::Known(None), - // Ethertype::IPV4 | Ethertype::IPV6 => Memoised::Uninit, - // _ => return Err(IngotParseErr::Unwanted.into()), - // }; - - // let state = Parsed2 { - // meta, - // flow, - // body_csum, - // l4_hash: Memoised::Uninit, - // body_modified: false, - // len, - // inner_csum_dirty: false, - // }; - - // let mut pkt = Packet2 { state }; - // // TODO: we can probably not do this in some cases, but we - // // don't have a way for headeractions to signal that they - // // *may* change the fields we need in the slowpath. - // let _ = pkt.body_csum(); - - // Ok(pkt) - // } - // TODO: cleanup type aliases. #[inline] @@ -1835,7 +1666,7 @@ impl Packet2> { where T::Chunk: ByteSliceMut, { - // Roughly how does this work: + // Roughly how this works: // - Identify rightmost structural-changed field. // - fill out owned versions into the push_spec of all // extant fields we rewound past. @@ -1919,8 +1750,8 @@ impl Packet2> { } L3::Ipv6(IngotHeader::Repr(v6)) => L3Repr::Ipv6(*v6), - // This needs a fuller InlineHeader due to EHs... - // We can't actually do structural mods here today using OPTE. + // We can't actually do structural mods here today using OPTE, + // but account for the possibiliry at least. L3::Ipv6(IngotHeader::Raw(v6)) => { L3Repr::Ipv6(v6.to_owned(None)?) } @@ -1955,8 +1786,15 @@ impl Packet2> { { push_spec.outer_encap = Some(match encap { InlineHeader::Repr(o) => o, - // Needed in fullness of time, but not here. - InlineHeader::Raw(_) => todo!(), + InlineHeader::Raw(ValidEncapMeta::Geneve(u, g)) => { + EncapMeta::Geneve(GeneveMeta { + entropy: u.source(), + vni: g.vni(), + oxide_external_pkt: valid_geneve_has_oxide_external( + &g, + ), + }) + } }); force_serialize = true; @@ -1980,7 +1818,10 @@ impl Packet2> { push_spec.outer_ip = Some(match l3 { L3::Ipv6(BoxedHeader::Repr(o)) => L3Repr::Ipv6(*o), L3::Ipv4(BoxedHeader::Repr(o)) => L3Repr::Ipv4(*o), - _ => todo!(), + L3::Ipv6(BoxedHeader::Raw(o)) => { + L3Repr::Ipv6((&o).to_owned(None)?) + } + L3::Ipv4(BoxedHeader::Raw(o)) => L3Repr::Ipv4((&o).into()), }); let inner_sz = (encapped_len + encap_len) as u16; @@ -2013,8 +1854,7 @@ impl Packet2> { { push_spec.outer_eth = Some(match eth { InlineHeader::Repr(o) => o, - // Needed in fullness of time, but not here. - InlineHeader::Raw(_) => todo!(), + InlineHeader::Raw(r) => (&r).into(), }); rewind += init_lens.outer_eth; @@ -2519,7 +2359,11 @@ impl EmittestSpec { } } - // TODO: put available layers into said slots? + // TODO: actually push in to existing slots we rewound past if needed, + // then run this step at the end. + // This is not really an issue in practice -- no packets should need + // to rewind *and* prepend new segments with how we're using OPTE today, + // much less so in the fastpath. pkt.drop_empty_segments(); let out = match &self.spec { @@ -2527,9 +2371,6 @@ impl EmittestSpec { push_spec.encap.prepend(pkt, self.ulp_len as usize) } EmitterSpec::Slowpath(push_spec) => { - // TODO: - // - actually push in to existing slots we rewound past if needed. - let mut needed_push = push_spec.outer_eth.packet_length() + push_spec.outer_ip.packet_length() + push_spec.outer_encap.packet_length(); diff --git a/lib/opte/src/engine/layer.rs b/lib/opte/src/engine/layer.rs index 9ee46e3b..6f8eaa84 100644 --- a/lib/opte/src/engine/layer.rs +++ b/lib/opte/src/engine/layer.rs @@ -1840,15 +1840,9 @@ mod test { #[test] fn find_rule() { - use crate::engine::headers::IpMeta; - use crate::engine::headers::UlpMeta; - use crate::engine::ip4::Ipv4Meta; - use crate::engine::ip4::Protocol; - use crate::engine::packet::InnerMeta; use crate::engine::predicate::Ipv4AddrMatch; use crate::engine::predicate::Predicate; use crate::engine::rule; - use crate::engine::tcp::TcpMeta; let mut rule_table = RuleTable::new("port", "test", Direction::Out); let mut rule = Rule::new( diff --git a/lib/opte/src/engine/nat.rs b/lib/opte/src/engine/nat.rs index 0b226f20..50c6645e 100644 --- a/lib/opte/src/engine/nat.rs +++ b/lib/opte/src/engine/nat.rs @@ -215,7 +215,7 @@ impl ActionDesc for NatDesc { #[cfg(test)] mod test { use super::*; - use crate::engine::ether::EtherMeta; + use crate::engine::ingot_base::Ethernet; use crate::engine::ingot_base::EthernetRef; use crate::engine::ingot_base::Ipv4; @@ -228,7 +228,6 @@ mod test { use ingot::tcp::TcpFlags; use ingot::tcp::TcpRef; use ingot::types::HeaderLen; - use opte_api::Direction::*; #[derive(Debug)] struct DummyVerify; @@ -241,14 +240,6 @@ mod test { #[test] fn nat4_rewrite() { - use crate::engine::ether::EtherHdr; - use crate::engine::ether::EtherType; - use crate::engine::headers::IpMeta; - use crate::engine::headers::UlpMeta; - use crate::engine::ip4::Ipv4Hdr; - use crate::engine::ip4::Ipv4Meta; - use crate::engine::ip4::Protocol; - use crate::engine::tcp::TcpMeta; use opte_api::MacAddr; let priv_mac = MacAddr::from([0xA8, 0x40, 0x25, 0xF0, 0x00, 0x01]); diff --git a/lib/opte/src/engine/packet.rs b/lib/opte/src/engine/packet.rs index 9d98ea8a..86c97d8a 100644 --- a/lib/opte/src/engine/packet.rs +++ b/lib/opte/src/engine/packet.rs @@ -3301,11 +3301,8 @@ mod test { use crate::engine::ether::EtherHdr; use crate::engine::ether::EtherType; use crate::engine::ip4::Ipv4Hdr; - use crate::engine::ip6::Ipv6Hdr; use crate::engine::tcp::TcpFlags; - use crate::engine::tcp::TcpHdr; - use crate::engine::GenericUlp; - use opte_api::Direction::*; + use opte_api::Ipv6Addr; use opte_api::MacAddr; diff --git a/lib/opte/src/engine/rule.rs b/lib/opte/src/engine/rule.rs index e254f040..feaaea05 100644 --- a/lib/opte/src/engine/rule.rs +++ b/lib/opte/src/engine/rule.rs @@ -1007,13 +1007,15 @@ impl From<&Rule> for super::ioctl::RuleDump { #[test] fn rule_matching() { - use super::ip4::Protocol; - use crate::engine::headers::UlpMeta; - use crate::engine::ip4::Ipv4Meta; - use crate::engine::packet::InnerMeta; + use crate::engine::ingot_base::Ipv4; + use crate::engine::ingot_base::Ipv4Mut; use crate::engine::predicate::Ipv4AddrMatch; use crate::engine::predicate::Predicate; - use crate::engine::tcp::TcpMeta; + use crate::engine::GenericUlp; + use ingot::ethernet::Ethertype; + use ingot::ip::IpProtocol; + use ingot::tcp::Tcp; + use ingot::types::HeaderLen; let action = Identity::new("rule_matching"); let mut r1 = Rule::new(1, Action::Static(Arc::new(action))); @@ -1029,7 +1031,7 @@ fn rule_matching() { window_size: 64240, ..Default::default() }; - let mut ip4 = Ipv4 { + let ip4 = Ipv4 { source: src_ip, destination: dst_ip, protocol: IpProtocol::TCP, diff --git a/lib/opte/src/engine/snat.rs b/lib/opte/src/engine/snat.rs index 73868488..4f702535 100644 --- a/lib/opte/src/engine/snat.rs +++ b/lib/opte/src/engine/snat.rs @@ -499,15 +499,6 @@ mod test { #[test] fn snat4_desc_lifecycle() { - use crate::engine::ether::EtherHdr; - use crate::engine::ether::EtherMeta; - use crate::engine::ether::EtherType; - use crate::engine::headers::IpMeta; - use crate::engine::headers::UlpMeta; - use crate::engine::ip4::Ipv4Hdr; - use crate::engine::ip4::Ipv4Meta; - use crate::engine::ip4::Protocol; - use crate::engine::tcp::TcpMeta; use crate::engine::GenericUlp; use opte_api::Ipv4Addr; use opte_api::MacAddr; @@ -535,7 +526,7 @@ mod test { destination: outside_port, ..Default::default() }; - let mut ip4 = Ipv4 { + let ip4 = Ipv4 { source: priv_ip, destination: outside_ip, protocol: IpProtocol::TCP, @@ -603,7 +594,7 @@ mod test { destination: pub_port, ..Default::default() }; - let mut ip4 = Ipv4 { + let ip4 = Ipv4 { source: outside_ip, destination: pub_ip, protocol: IpProtocol::TCP, From 7ced0c63ed9b7fa22fa38b9f5d93f8f4623f20ec Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Sat, 19 Oct 2024 19:02:51 +0100 Subject: [PATCH 056/115] The Axe Shall Fall (pt.1) Satisfying but time-consuming. A LOT of code to remove. --- lib/opte-test-utils/src/dhcp.rs | 2 +- lib/opte-test-utils/src/lib.rs | 11 - lib/opte/src/engine/arp.rs | 279 ++---- lib/opte/src/engine/dhcp.rs | 92 +- lib/opte/src/engine/dhcpv6/protocol.rs | 76 +- lib/opte/src/engine/ether.rs | 148 +-- lib/opte/src/engine/geneve.rs | 356 +------ lib/opte/src/engine/headers.rs | 424 -------- lib/opte/src/engine/icmp/mod.rs | 195 +--- lib/opte/src/engine/icmp/v4.rs | 18 - lib/opte/src/engine/icmp/v6.rs | 18 - lib/opte/src/engine/ingot_packet.rs | 185 ++-- lib/opte/src/engine/ip4.rs | 364 ------- lib/opte/src/engine/packet.rs | 1122 +--------------------- lib/opte/src/engine/tcp.rs | 348 ------- lib/opte/src/engine/udp.rs | 210 ---- lib/oxide-vpc/src/engine/mod.rs | 49 +- lib/oxide-vpc/tests/integration_tests.rs | 37 +- 18 files changed, 269 insertions(+), 3665 deletions(-) diff --git a/lib/opte-test-utils/src/dhcp.rs b/lib/opte-test-utils/src/dhcp.rs index a38878d5..28869175 100644 --- a/lib/opte-test-utils/src/dhcp.rs +++ b/lib/opte-test-utils/src/dhcp.rs @@ -85,7 +85,7 @@ pub fn packet_from_client_dhcpv6_message( let udp = Udp { source: dhcpv6::CLIENT_PORT, destination: dhcpv6::SERVER_PORT, - length: (UdpHdr::SIZE + msg.buffer_len()) as u16, + length: ip.payload_len, ..Default::default() }; diff --git a/lib/opte-test-utils/src/lib.rs b/lib/opte-test-utils/src/lib.rs index 18794bef..c1f16579 100644 --- a/lib/opte-test-utils/src/lib.rs +++ b/lib/opte-test-utils/src/lib.rs @@ -18,10 +18,8 @@ pub mod port_state; // Let's make our lives easier and pub use a bunch of stuff. pub use opte::api::Direction::*; pub use opte::api::MacAddr; -pub use opte::engine::ether::EtherHdr; pub use opte::engine::ether::EtherMeta; pub use opte::engine::ether::EtherType; -pub use opte::engine::geneve::GeneveHdr; pub use opte::engine::geneve::GeneveMeta; pub use opte::engine::geneve::GeneveOption; pub use opte::engine::geneve::OxideOption; @@ -30,8 +28,6 @@ use opte::engine::geneve::GENEVE_OPT_CLASS_OXIDE; use opte::engine::geneve::GENEVE_PORT; pub use opte::engine::headers::IpAddr; pub use opte::engine::headers::IpCidr; -pub use opte::engine::headers::IpMeta; -pub use opte::engine::headers::UlpMeta; use opte::engine::ingot_base::Ethernet; use opte::engine::ingot_base::Ipv4; use opte::engine::ingot_base::Ipv6; @@ -40,8 +36,6 @@ use opte::engine::ingot_packet::LightParsedMblk; use opte::engine::ingot_packet::MsgBlk; use opte::engine::ingot_packet::Packet2; pub use opte::engine::ip4::Ipv4Addr; -pub use opte::engine::ip4::Ipv4Hdr; -pub use opte::engine::ip4::Ipv4Meta; pub use opte::engine::ip4::Protocol; pub use opte::engine::ip6::Ipv6Addr; pub use opte::engine::ip6::Ipv6Hdr; @@ -59,11 +53,6 @@ pub use opte::engine::port::Port; pub use opte::engine::port::PortBuilder; pub use opte::engine::port::ProcessResult; pub use opte::engine::port::ProcessResult::*; -pub use opte::engine::tcp::TcpFlags; -pub use opte::engine::tcp::TcpHdr; -pub use opte::engine::tcp::TcpMeta; -pub use opte::engine::udp::UdpHdr; -pub use opte::engine::udp::UdpMeta; pub use opte::engine::GenericUlp; use opte::engine::NetworkParser; pub use opte::ingot::ethernet::Ethertype; diff --git a/lib/opte/src/engine/arp.rs b/lib/opte/src/engine/arp.rs index be783bdb..59013ad7 100644 --- a/lib/opte/src/engine/arp.rs +++ b/lib/opte/src/engine/arp.rs @@ -6,10 +6,10 @@ //! ARP headers and data. -use super::ether::EtherHdr; use super::ether::EtherMeta; use super::ether::EtherType; -use super::headers::RawHeader; +use super::ingot_base::Ethernet; +use super::ingot_packet::MsgBlk; use super::packet::Initialized; use super::packet::Packet; use super::packet::PacketReadMut; @@ -17,10 +17,15 @@ use super::packet::ReadErr; use crate::d_error::DError; use core::fmt; use core::fmt::Display; +use ingot::ethernet::Ethertype; +use ingot::types::primitives::u16be; +use ingot::types::NetworkRepr; +use ingot::Ingot; use opte_api::Ipv4Addr; use opte_api::MacAddr; use serde::Deserialize; use serde::Serialize; +use zerocopy::ByteSlice; use zerocopy::FromBytes; use zerocopy::Immutable; use zerocopy::IntoBytes; @@ -30,73 +35,49 @@ use zerocopy::Unaligned; pub const ARP_HTYPE_ETHERNET: u16 = 1; -#[repr(u16)] #[derive( - Clone, Copy, Debug, Deserialize, Eq, Ord, PartialEq, PartialOrd, Serialize, + Clone, + Copy, + Debug, + Deserialize, + Eq, + Ord, + PartialEq, + PartialOrd, + Serialize, + Hash, )] -pub enum ArpOp { - Request = 1, - Reply = 2, -} +pub struct ArpOp(u16); impl ArpOp { - pub fn to_be_bytes(self) -> [u8; 2] { - match self { - ArpOp::Request => 1u16.to_be_bytes(), - ArpOp::Reply => 2u16.to_be_bytes(), - } - } + pub const REQUEST: Self = Self(1); + pub const REPLY: Self = Self(2); } -impl TryFrom for ArpOp { - type Error = ArpHdrError; - - fn try_from(val: u16) -> Result { - match val { - 1 => Ok(ArpOp::Request), - 2 => Ok(ArpOp::Reply), - _ => Err(Self::Error::BadOp { op: val }), - } +impl Default for ArpOp { + fn default() -> Self { + Self::REQUEST } } impl Display for ArpOp { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let s = match self { - ArpOp::Request => "Request", - ArpOp::Reply => "Reply", + let s = match *self { + ArpOp::REQUEST => "Request", + ArpOp::REPLY => "Reply", + _ => "Unknown", }; write!(f, "{}", s) } } -#[derive(Clone, Copy, Debug, DError, Eq, PartialEq)] -#[derror(leaf_data = ArpHdrError::derror_data)] -pub enum ArpHdrError { - BadOp { op: u16 }, - ReadError(ReadErr), - UnexpectedProtoLen { plen: u8 }, - UnexpectedProtoType { ptype: u16 }, - UnexpectedHwLen { hlen: u8 }, - UnexpectedHwType { htype: u16 }, -} - -impl ArpHdrError { - fn derror_data(&self, data: &mut [u64]) { - data[0] = match self { - Self::BadOp { op } => *op as u64, - Self::UnexpectedProtoLen { plen } => *plen as u64, - Self::UnexpectedProtoType { ptype } => *ptype as u64, - Self::UnexpectedHwLen { hlen } => *hlen as u64, - Self::UnexpectedHwType { htype } => *htype as u64, - _ => 0, - }; +impl NetworkRepr> for ArpOp { + fn to_network(self) -> zerocopy::U16 { + self.0.into() } -} -impl From for ArpHdrError { - fn from(error: ReadErr) -> Self { - Self::ReadError(error) + fn from_network(val: zerocopy::U16) -> Self { + Self(val.into()) } } @@ -106,175 +87,55 @@ pub fn gen_arp_reply( spa: Ipv4Addr, tha: MacAddr, tpa: Ipv4Addr, -) -> Packet { - let len = EtherHdr::SIZE + ArpEthIpv4Raw::SIZE; - let mut pkt = Packet::alloc_and_expand(len); - let mut wtr = pkt.seg0_wtr(); - - let eth = EtherMeta { dst: tha, src: sha, ether_type: EtherType::Arp }; - - let arp = ArpEthIpv4 { - htype: ARP_HTYPE_ETHERNET, - ptype: u16::from(EtherType::Ipv4), - hlen: 6, - plen: 4, - op: ArpOp::Reply, - sha, - spa, - tha, - tpa, - }; - - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - arp.emit(wtr.slice_mut(ArpEthIpv4::SIZE).unwrap()); - pkt +) -> MsgBlk { + MsgBlk::new_ethernet_pkt(( + Ethernet { destination: tha, source: sha, ethertype: Ethertype::ARP }, + ArpEthIpv4 { + op: ArpOp::REPLY, + sha, + spa, + tha, + tpa, + ..Default::default() + }, + )) } -#[derive(Clone, Copy, Debug)] +/// An ARP packet containing Ethernet (MAC) to IPv4 address mappings. +#[derive(Copy, Clone, Debug, Eq, Hash, PartialEq, Ingot)] +#[ingot(impl_default)] pub struct ArpEthIpv4 { - pub htype: u16, - pub ptype: u16, + #[ingot(default = ARP_HTYPE_ETHERNET)] + pub htype: u16be, + #[ingot(default = Ethertype::IPV4, is = "u16be")] + pub ptype: Ethertype, + #[ingot(default = size_of::() as u8)] pub hlen: u8, + #[ingot(default = size_of::() as u8)] pub plen: u8, + + // TODO: I think we need to make NetworkRepr fallible when + // reading. + #[ingot(is = "u16be")] pub op: ArpOp, + + #[ingot(is = "[u8; 6]")] pub sha: MacAddr, + #[ingot(is = "[u8; 4]")] pub spa: Ipv4Addr, + + #[ingot(is = "[u8; 6]")] pub tha: MacAddr, + #[ingot(is = "[u8; 4]")] pub tpa: Ipv4Addr, } -impl ArpEthIpv4 { - pub const SIZE: usize = ArpEthIpv4Raw::SIZE; - - pub fn emit(&self, dst: &mut [u8]) { - debug_assert_eq!(dst.len(), ArpEthIpv4Raw::SIZE); - // let mut raw = ArpEthIpv4Raw::new_mut(dst).unwrap(); - // raw.write_to(); - - ArpEthIpv4Raw::from(self).write_to(dst).unwrap() - } - - pub fn parse<'a, 'b, R>(rdr: &'b mut R) -> Result - where - R: PacketReadMut<'a>, - { - let src = rdr.slice_mut(ArpEthIpv4Raw::SIZE)?; - Self::try_from(&ArpEthIpv4Raw::new(src)?) - } - - pub fn parse_normally(rdr: &[&[u8]]) -> Result { - let space_in_front = rdr.get(0).map(|v| !v.is_empty()); - - let to_use = match space_in_front { - None => { - return Err(ArpHdrError::ReadError(ReadErr::NotEnoughBytes)) - } - Some(true) => rdr.get(0), - Some(false) => rdr.get(1), - }; - - if let Some(to_use) = to_use { - Self::try_from(&ArpEthIpv4Raw::new(to_use)?) - } else { - Err(ArpHdrError::ReadError(ReadErr::NotEnoughBytes)) - } - } -} - -impl TryFrom<&Ref<&[u8], ArpEthIpv4Raw>> for ArpEthIpv4 { - type Error = ArpHdrError; - - // NOTE: This only accepts IPv4/Ethernet ARP. - fn try_from(raw: &Ref<&[u8], ArpEthIpv4Raw>) -> Result { - let htype = u16::from_be_bytes(raw.htype); - - if htype != ARP_HTYPE_ETHERNET { - return Err(Self::Error::UnexpectedHwType { htype }); - } - - let hlen = raw.hlen; - - if hlen != 6 { - return Err(Self::Error::UnexpectedHwLen { hlen }); - } - - let ptype = u16::from_be_bytes(raw.ptype); - - if ptype != super::ether::ETHER_TYPE_IPV4 { - return Err(Self::Error::UnexpectedProtoType { ptype }); - } - - let plen = raw.plen; - - if plen != 4 { - return Err(Self::Error::UnexpectedProtoLen { plen }); - } - - let op = ArpOp::try_from(u16::from_be_bytes(raw.op))?; - - Ok(Self { - htype, - ptype, - hlen, - plen, - op, - sha: MacAddr::from(raw.sha), - spa: Ipv4Addr::from(u32::from_be_bytes(raw.spa)), - tha: MacAddr::from(raw.tha), - tpa: Ipv4Addr::from(u32::from_be_bytes(raw.tpa)), - }) - } -} - -impl From<&ArpEthIpv4> for ArpEthIpv4Raw { - fn from(arp: &ArpEthIpv4) -> Self { - Self { - htype: arp.htype.to_be_bytes(), - ptype: arp.ptype.to_be_bytes(), - hlen: arp.hlen, - plen: arp.plen, - op: arp.op.to_be_bytes(), - sha: arp.sha.bytes(), - spa: arp.spa.bytes(), - tha: arp.tha.bytes(), - tpa: arp.tpa.bytes(), - } - } -} - -#[repr(C)] -#[derive( - IntoBytes, Clone, Debug, FromBytes, Unaligned, Immutable, KnownLayout, -)] -pub struct ArpEthIpv4Raw { - pub htype: [u8; 2], - pub ptype: [u8; 2], - pub hlen: u8, - pub plen: u8, - pub op: [u8; 2], - pub sha: [u8; 6], - pub spa: [u8; 4], - pub tha: [u8; 6], - pub tpa: [u8; 4], -} - -impl<'a> RawHeader<'a> for ArpEthIpv4Raw { - #[inline] - fn new_mut(src: &mut [u8]) -> Result, ReadErr> { - debug_assert_eq!(src.len(), Self::SIZE); - let hdr = match Ref::from_bytes(src).ok() { - Some(hdr) => hdr, - None => return Err(ReadErr::BadLayout), - }; - Ok(hdr) - } - - fn new(src: &[u8]) -> Result, ReadErr> { - debug_assert_eq!(src.len(), Self::SIZE); - let hdr = match Ref::from_bytes(src).ok() { - Some(hdr) => hdr, - None => return Err(ReadErr::BadLayout), - }; - Ok(hdr) +impl ValidArpEthIpv4 { + pub fn values_valid(&self) -> bool { + self.htype() == ARP_HTYPE_ETHERNET + && self.ptype() == Ethertype::IPV4 + && self.hlen() == (size_of::() as u8) + && self.plen() == (size_of::() as u8) + && (self.op() == ArpOp::REQUEST || self.op() == ArpOp::REPLY) } } diff --git a/lib/opte/src/engine/dhcp.rs b/lib/opte/src/engine/dhcp.rs index ee66ca0e..4da14378 100644 --- a/lib/opte/src/engine/dhcp.rs +++ b/lib/opte/src/engine/dhcp.rs @@ -7,14 +7,13 @@ //! DHCP headers, data, and actions. use super::checksum::HeaderChecksum; -use super::ether::EtherHdr; use super::ether::EtherMeta; use super::ether::EtherType; +use super::ingot_base::Ethernet; +use super::ingot_base::Ipv4; use super::ingot_packet::MsgBlk; use super::ingot_packet::PacketHeaders2; use super::ip4::Ipv4Addr; -use super::ip4::Ipv4Hdr; -use super::ip4::Ipv4Meta; use super::ip4::Protocol; use super::ip6::UlpCsumOpt; use super::packet::Packet; @@ -27,13 +26,15 @@ use super::predicate::Predicate; use super::rule::AllowOrDeny; use super::rule::GenPacketResult; use super::rule::HairpinAction; -use super::udp::UdpHdr; -use super::udp::UdpMeta; use alloc::string::ToString; use alloc::vec::Vec; use core::fmt; use core::fmt::Display; use heapless::Vec as HeaplessVec; +use ingot::ethernet::Ethertype; +use ingot::ip::IpProtocol; +use ingot::types::HeaderLen; +use ingot::udp::Udp; use opte_api::DhcpCfg; use opte_api::DhcpReplyType; use opte_api::DomainName; @@ -570,21 +571,10 @@ impl HairpinAction for DhcpAction { let reply_len = reply.buffer_len(); - // XXX This is temporary until I can add interface to Packet - // to initialize a zero'd mblk of N bytes and then get a - // direct mutable reference to the PacketSeg. - // - // We provide exactly the number of bytes needed guaranteeing - // that emit() should not fail. - let mut tmp = vec![0u8; reply_len]; - let mut dhcp = DhcpPacket::new_unchecked(&mut tmp); - reply.emit(&mut dhcp).unwrap(); - - let mut udp = UdpMeta { - src: 67, - dst: 68, - len: (UdpHdr::SIZE + tmp.len()) as u16, - ..Default::default() + let eth_dst = if client_dhcp.broadcast { + MacAddr::BROADCAST + } else { + self.client_mac }; let ip_dst = if client_dhcp.broadcast { @@ -593,47 +583,41 @@ impl HairpinAction for DhcpAction { self.client_ip }; - let mut ip = Ipv4Meta { - src: self.gw_ip, - dst: ip_dst, - proto: Protocol::UDP, - total_len: Ipv4Hdr::BASE_SIZE as u16 + udp.len, + let udp = Udp { + source: DHCP_SERVER_PORT, + destination: DHCP_CLIENT_PORT, + length: (Udp::MINIMUM_LENGTH + reply_len) as u16, ..Default::default() }; - ip.compute_hdr_csum(); - let eth_dst = if client_dhcp.broadcast { - MacAddr::BROADCAST - } else { - self.client_mac + let mut ip = Ipv4 { + source: self.gw_ip, + destination: ip_dst, + protocol: IpProtocol::UDP, + total_len: Ipv4::MINIMUM_LENGTH as u16 + udp.length, + ..Default::default() }; + ip.compute_checksum(); - let eth = EtherMeta { - dst: eth_dst, - src: self.gw_mac, - ether_type: EtherType::Ipv4, + let eth = Ethernet { + destination: eth_dst, + source: self.gw_mac, + ethertype: Ethertype::IPV4, }; - // XXX: Would be preferable to write in here directly rather than - // allocing tmp. - let hdr_len = EtherHdr::SIZE + Ipv4Hdr::BASE_SIZE + UdpHdr::SIZE; - let total_len = hdr_len + tmp.len(); - - let mut pkt = Packet::alloc_and_expand(total_len); - let mut wtr = pkt.seg0_wtr(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip.emit(wtr.slice_mut(ip.hdr_len()).unwrap()); - let mut udp_buf = [0u8; UdpHdr::SIZE]; - udp.emit(&mut udp_buf); - let csum = ip.compute_ulp_csum(UlpCsumOpt::Full, &udp_buf, &tmp); - udp.csum = HeaderChecksum::from(csum).bytes(); - udp.emit(wtr.slice_mut(udp.hdr_len()).unwrap()); - wtr.write(&tmp).unwrap(); - - Ok(AllowOrDeny::Allow( - unsafe { MsgBlk::wrap_mblk(pkt.unwrap_mblk()) } - .expect("known valid"), - )) + let ingot_layers = (ð, &ip, &udp); + let total_sz = ingot_layers.packet_length() + reply_len; + let mut pkt = MsgBlk::new_ethernet(total_sz); + pkt.emit_back(ingot_layers) + .expect("MsgBlk should have enough bytes by construction"); + let l = pkt.len(); + pkt.resize(total_sz) + .expect("MsgBlk should have enough bytes by construction"); + + let mut dhcp = DhcpPacket::new_unchecked(&mut pkt[l..]); + reply.emit(&mut dhcp).unwrap(); + + Ok(AllowOrDeny::Allow(pkt)) } } diff --git a/lib/opte/src/engine/dhcpv6/protocol.rs b/lib/opte/src/engine/dhcpv6/protocol.rs index eb1a9e79..56ac4ad4 100644 --- a/lib/opte/src/engine/dhcpv6/protocol.rs +++ b/lib/opte/src/engine/dhcpv6/protocol.rs @@ -22,9 +22,10 @@ use crate::engine::dhcpv6::ALL_RELAYS_AND_SERVERS; use crate::engine::dhcpv6::ALL_SERVERS; use crate::engine::dhcpv6::CLIENT_PORT; use crate::engine::dhcpv6::SERVER_PORT; -use crate::engine::ether::EtherHdr; use crate::engine::ether::EtherMeta; use crate::engine::ether::EtherType; +use crate::engine::ingot_base::Ethernet; +use crate::engine::ingot_base::Ipv6; use crate::engine::ingot_base::Ipv6Ref; use crate::engine::ingot_packet::MsgBlk; use crate::engine::ingot_packet::PacketHeaders2; @@ -41,12 +42,14 @@ use crate::engine::predicate::Predicate; use crate::engine::rule::AllowOrDeny; use crate::engine::rule::GenPacketResult; use crate::engine::rule::HairpinAction; -use crate::engine::udp::UdpHdr; -use crate::engine::udp::UdpMeta; use alloc::borrow::Cow; use alloc::vec::Vec; use core::fmt; use core::ops::Range; +use ingot::ethernet::Ethertype; +use ingot::ip::IpProtocol as IngotIpProto; +use ingot::types::HeaderLen; +use ingot::udp::Udp; use opte_api::Ipv6Addr; use opte_api::Ipv6Cidr; use opte_api::MacAddr; @@ -615,55 +618,40 @@ fn generate_packet<'a>( meta: &PacketHeaders2, msg: &'a Message<'a>, ) -> GenPacketResult { - let eth = EtherMeta { - dst: action.client_mac, - src: action.server_mac, - ether_type: EtherType::Ipv6, + let udp = Udp { + source: SERVER_PORT, + destination: CLIENT_PORT, + length: (Udp::MINIMUM_LENGTH + msg.buffer_len()) as u16, + ..Default::default() }; - let ip = Ipv6Meta { - src: Ipv6Addr::from_eui64(&action.server_mac), - // Safety: We're only here if the predicates match, one of which is - // IPv6. - dst: meta.inner_ip6().unwrap().source(), - proto: Protocol::UDP, - next_hdr: IpProtocol::Udp, - pay_len: (UdpHdr::SIZE + msg.buffer_len()) as u16, + let ip = Ipv6 { + source: Ipv6Addr::from_eui64(&action.server_mac), + destination: meta.inner_ip6().unwrap().source(), + next_header: IngotIpProto::UDP, + payload_len: udp.length, ..Default::default() }; - let mut udp = UdpMeta { - src: SERVER_PORT, - dst: CLIENT_PORT, - len: (UdpHdr::SIZE + msg.buffer_len()) as u16, - ..Default::default() + let eth = Ethernet { + destination: action.client_mac, + source: action.server_mac, + ethertype: Ethertype::IPV6, }; // Allocate a segment into which we'll write the packet. - let reply_len = - msg.buffer_len() + UdpHdr::SIZE + Ipv6Hdr::BASE_SIZE + EtherHdr::SIZE; - let mut pkt = Packet::alloc_and_expand(reply_len); - let mut wtr = pkt.seg0_wtr(); - - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip.emit(wtr.slice_mut(ip.hdr_len()).unwrap()); - - // Create the buffer to contain the DHCP message so that we may - // compute the UDP checksum. - let mut msg_buf = vec![0; msg.buffer_len()]; - msg.copy_into(&mut msg_buf).unwrap(); - - // Compute the UDP checksum. Write the UDP header and DHCP message - // to the segment. - let mut udp_buf = [0u8; UdpHdr::SIZE]; - udp.emit(&mut udp_buf); - let csum = ip.compute_ulp_csum(UlpCsumOpt::Full, &udp_buf, &msg_buf); - udp.csum = HeaderChecksum::from(csum).bytes(); - udp.emit(wtr.slice_mut(udp.hdr_len()).unwrap()); - wtr.write(&msg_buf).unwrap(); - Ok(AllowOrDeny::Allow( - unsafe { MsgBlk::wrap_mblk(pkt.unwrap_mblk()) }.expect("known valid"), - )) + let ingot_layers = (ð, &ip, &udp); + let total_sz = ingot_layers.packet_length() + msg.buffer_len(); + + let mut pkt = MsgBlk::new_ethernet(total_sz); + pkt.emit_back(ingot_layers) + .expect("MsgBlk should have enough bytes by construction"); + let l = pkt.len(); + pkt.resize(total_sz) + .expect("MsgBlk should have enough bytes by construction"); + msg.copy_into(&mut pkt[l..]); + + Ok(AllowOrDeny::Allow(pkt)) } impl HairpinAction for Dhcpv6Action { diff --git a/lib/opte/src/engine/ether.rs b/lib/opte/src/engine/ether.rs index bce4842b..8cb868d5 100644 --- a/lib/opte/src/engine/ether.rs +++ b/lib/opte/src/engine/ether.rs @@ -8,7 +8,6 @@ use super::headers::ModifyAction; use super::headers::PushAction; -use super::headers::RawHeader; use super::packet::PacketReadMut; use super::packet::ReadErr; use crate::d_error::DError; @@ -19,6 +18,8 @@ use core::fmt::Debug; use core::fmt::Display; use core::result; use core::str::FromStr; +use ingot::ethernet::Ethernet; +use ingot::types::HeaderLen; use opte_api::MacAddr; use serde::Deserialize; use serde::Serialize; @@ -210,16 +211,6 @@ impl PushAction for EtherMeta { } } -impl<'a> From<&EtherHdr<'a>> for EtherMeta { - fn from(eth: &EtherHdr) -> Self { - EtherMeta { - src: eth.src(), - dst: eth.dst(), - ether_type: eth.ether_type(), - } - } -} - #[derive(Clone, Debug, Default, Deserialize, Serialize)] pub struct EtherMod { pub src: Option, @@ -239,142 +230,9 @@ impl ModifyAction for EtherMod { } impl EtherMeta { - #[inline] - pub fn emit(&self, dst: &mut [u8]) { - debug_assert_eq!(dst.len(), EtherHdrRaw::SIZE); - // let mut raw = EtherHdrRaw::new_mut(dst).unwrap(); - // raw. .write(EtherHdrRaw::from(self)); - - EtherHdrRaw::from(self).write_to(dst).unwrap() - } - #[inline] pub fn hdr_len(&self) -> usize { - EtherHdr::SIZE - } -} - -#[derive(Debug)] -pub struct EtherHdr<'a> { - bytes: Ref<&'a mut [u8], EtherHdrRaw>, -} - -impl<'a> EtherHdr<'a> { - // For the moment, this type is for non-VLAN ethernet headers - // only. - pub const SIZE: usize = EtherHdrRaw::SIZE; - - pub fn as_bytes(&self) -> &[u8] { - self.bytes.as_bytes() - } - - pub fn ether_type(&self) -> EtherType { - EtherType::from(u16::from_be_bytes(self.bytes.ether_type)) - } - - pub fn hdr_len(&self) -> usize { - Self::SIZE - } - - pub fn src(&self) -> MacAddr { - MacAddr::from(self.bytes.src) - } - - pub fn dst(&self) -> MacAddr { - MacAddr::from(self.bytes.dst) - } - - pub fn set_dst(&mut self, dst: MacAddr) { - self.bytes.dst = dst.bytes(); - } - - pub fn parse<'b, R>(rdr: &'b mut R) -> Result - where - R: PacketReadMut<'a>, - { - let src = rdr.slice_mut(EtherHdrRaw::SIZE)?; - Ok(Self { bytes: EtherHdrRaw::new_mut(src)? }) - } -} - -#[derive(Clone, Copy, Eq, PartialEq, DError)] -#[derror(leaf_data = EtherHdrError::derror_data)] -pub enum EtherHdrError { - ReadError(ReadErr), - UnsupportedEtherType { ether_type: u16 }, -} - -impl EtherHdrError { - fn derror_data(&self, data: &mut [u64]) { - if let Self::UnsupportedEtherType { ether_type } = self { - data[0] = *ether_type as u64; - } - } -} - -impl From for EtherHdrError { - fn from(error: ReadErr) -> Self { - EtherHdrError::ReadError(error) - } -} - -impl Display for EtherHdrError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - Self::UnsupportedEtherType { ether_type } => { - write!(f, "Unsupported Ether Type: 0x{:04X}", ether_type) - } - - Self::ReadError(error) => { - write!(f, "read error: {:?}", error) - } - } - } -} - -impl Debug for EtherHdrError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}", self) - } -} - -impl From<&EtherMeta> for EtherHdrRaw { - fn from(meta: &EtherMeta) -> Self { - Self { - dst: meta.dst.bytes(), - src: meta.src.bytes(), - ether_type: u16::from(meta.ether_type).to_be_bytes(), - } - } -} - -/// Note: For now we keep this unaligned to be safe. -#[repr(C)] -#[derive( - Clone, - Debug, - Default, - FromBytes, - IntoBytes, - Unaligned, - Immutable, - KnownLayout, -)] -pub struct EtherHdrRaw { - pub dst: [u8; 6], - pub src: [u8; 6], - pub ether_type: [u8; 2], -} - -impl<'a> RawHeader<'a> for EtherHdrRaw { - #[inline] - fn new_mut(src: &mut [u8]) -> Result, ReadErr> { - debug_assert_eq!(src.len(), Self::SIZE); - let hdr = match Ref::from_bytes(src).ok() { - Some(hdr) => hdr, - None => return Err(ReadErr::BadLayout), - }; - Ok(hdr) + Ethernet::MINIMUM_LENGTH } } diff --git a/lib/opte/src/engine/geneve.rs b/lib/opte/src/engine/geneve.rs index 7c6678de..b86f1a83 100644 --- a/lib/opte/src/engine/geneve.rs +++ b/lib/opte/src/engine/geneve.rs @@ -11,18 +11,18 @@ use super::ether::ETHER_TYPE_ETHER; use super::headers::ModifyAction; use super::headers::PushAction; -use super::headers::RawHeader; use super::packet::PacketReadMut; use super::packet::ReadErr; -use super::udp::UdpHdr; -use super::udp::UdpMeta; use crate::d_error::DError; use core::mem; use ingot::geneve::Geneve; +use ingot::geneve::GeneveOpt; use ingot::geneve::GeneveOptRef; use ingot::geneve::GeneveRef; use ingot::geneve::ValidGeneve; use ingot::types::Header; +use ingot::types::HeaderLen; +use ingot::udp::Udp; pub use opte_api::Vni; use serde::Deserialize; use serde::Serialize; @@ -95,52 +95,17 @@ impl ModifyAction for GeneveMod { } impl GeneveMeta { - /// Emit only the inner Geneve header. - #[inline] - pub fn emit_inner(&self, dst: &mut [u8]) { - debug_assert_eq!(dst.len(), self.hdr_len_inner()); - let (base, remainder) = dst.split_at_mut(GeneveHdrRaw::SIZE); - let mut raw = GeneveHdrRaw::new_mut(base).unwrap(); - Ref::write(&mut raw, GeneveHdrRaw::from(self)); - - // GeneveHdrRaw::from(self).write_to(dst).unwrap(); - - raw.ver_opt_len = if self.oxide_external_pkt { - GeneveOption::Oxide(OxideOption::External).emit(remainder) as u8 - } else { - raw.ver_opt_len - }; - } - - /// Emit a full Geneve encapsulation for an inner packet, including - /// UDP. - /// - /// `total_len` should be precomputed as `self.hdr_len() + body.len()`. - #[inline] - pub fn emit(&self, total_len: u16, dst: &mut [u8]) { - let (udp_buf, geneve_buf) = dst.split_at_mut(UdpHdr::SIZE); - let udp = UdpMeta { - src: self.entropy, - dst: GENEVE_PORT, - len: total_len, - csum: [0; 2], - }; - udp.emit(udp_buf); - - self.emit_inner(geneve_buf); - } - /// Return the length of headers needed to fully Geneve-encapsulate /// a packet, including UDP. #[inline] pub fn hdr_len(&self) -> usize { - UdpHdr::SIZE + self.hdr_len_inner() + Udp::MINIMUM_LENGTH + self.hdr_len_inner() } /// Return the length of only the Geneve header. #[inline] pub fn hdr_len_inner(&self) -> usize { - GeneveHdr::BASE_SIZE + self.options_len() + Geneve::MINIMUM_LENGTH + self.options_len() } /// Return the required length (in bytes) needed to store @@ -148,190 +113,13 @@ impl GeneveMeta { pub fn options_len(&self) -> usize { // XXX: This is very special-cased just to enable testing. if self.oxide_external_pkt { - GeneveOptHdrRaw::SIZE + GeneveOpt::MINIMUM_LENGTH } else { 0 } } } -impl<'a> From<(&UdpHdr<'a>, &GeneveHdr<'a>)> for GeneveMeta { - fn from((udp, geneve): (&UdpHdr<'a>, &GeneveHdr<'a>)) -> Self { - let mut out = Self::from(geneve); - out.entropy = udp.src_port(); - out - } -} - -impl<'a> From<&GeneveHdr<'a>> for GeneveMeta { - fn from(geneve: &GeneveHdr<'a>) -> Self { - let mut out = - Self { vni: geneve.vni(), entropy: 0, ..Default::default() }; - - if let Some(ref opts) = geneve.opts { - // XXX: Prevent duplication by making Meta generation fallible - // in same way as Parsing? - // Unwrap safety: Invalid options will have been caught in - // GeneveHdr::parse. - GeneveOption::parse_all(opts, Some(&mut out)).unwrap(); - } - - out - } -} - -pub struct GeneveHdr<'a> { - /// Main body of the Geneve Header. - bytes: Ref<&'a mut [u8], GeneveHdrRaw>, - /// Byte slice occupied by Geneve options. - opts: Option<&'a mut [u8]>, -} - -impl<'a> GeneveHdr<'a> { - pub const BASE_SIZE: usize = mem::size_of::(); - - /// Return the header length, in bytes. - pub fn hdr_len(&self) -> usize { - usize::from(self.bytes.options_len() * 4) + Self::BASE_SIZE - } - - pub fn parse<'b, R>(rdr: &'b mut R) -> Result - where - R: PacketReadMut<'a>, - { - let src = rdr.slice_mut(GeneveHdrRaw::SIZE)?; - let bytes = GeneveHdrRaw::new_mut(src)?; - let opt_len = bytes.options_len_bytes().into(); - let opts = if opt_len != 0 { - let opts_body = rdr.slice_mut(opt_len)?; - - // Check for malformed options. - // XXX: Can we use this to elide some checks when building GeneveMeta? - // Otherwise, currently repeated to filter packets at parse time. - GeneveOption::parse_all(opts_body, None)?; - - Some(opts_body) - } else { - None - }; - - Ok(Self { bytes, opts }) - } - - /// Return the VNI. - pub fn vni(&self) -> Vni { - // Unwrap: We know it's legit because we are making sure the - // MSB is zero. - Vni::new(u32::from_be_bytes([ - 0, - self.bytes.vni[0], - self.bytes.vni[1], - self.bytes.vni[2], - ])) - .unwrap() - } -} - -#[derive(Clone, Copy, Debug, Eq, PartialEq, DError)] -#[derror(leaf_data = GeneveHdrError::derror_data)] -pub enum GeneveHdrError { - BadDstPort { dst_port: u16 }, - BadLength { len: u16 }, - BadVersion { vsn: u8 }, - BadVni { vni: u32 }, - ReadError(ReadErr), - UnexpectedProtocol { protocol: u16 }, - UnknownCriticalOption { class: u16, opt_type: u8 }, -} - -impl From for GeneveHdrError { - fn from(error: ReadErr) -> Self { - GeneveHdrError::ReadError(error) - } -} - -impl GeneveHdrError { - fn derror_data(&self, data: &mut [u64]) { - [data[0], data[1]] = match self { - Self::BadDstPort { dst_port } => [*dst_port as u64, 0], - Self::BadLength { len } => [*len as u64, 0], - Self::BadVersion { vsn } => [*vsn as u64, 0], - Self::BadVni { vni } => [*vni as u64, 0], - Self::UnexpectedProtocol { protocol } => [*protocol as u64, 0], - Self::UnknownCriticalOption { class, opt_type } => { - [*class as u64, *opt_type as u64] - } - _ => [0, 0], - } - } -} - -/// Note: For now we keep this unaligned to be safe. -#[repr(C)] -#[derive( - Clone, Debug, FromBytes, IntoBytes, Unaligned, Immutable, KnownLayout, -)] -pub struct GeneveHdrRaw { - ver_opt_len: u8, - flags: u8, - proto: [u8; 2], - vni: [u8; 3], - reserved: u8, -} - -impl GeneveHdrRaw { - /// Return the length of the Geneve options in 4-byte units. - pub fn options_len(&self) -> u8 { - self.ver_opt_len & GENEVE_OPT_LEN_MASK - } - - /// Return the length of the Geneve options in bytes. - pub fn options_len_bytes(&self) -> u8 { - self.options_len() << GENEVE_OPT_LEN_SCALE_SHIFT - } - - pub fn version(&self) -> u8 { - (self.ver_opt_len & GENEVE_VER_MASK) >> GENEVE_VER_SHIFT - } -} - -impl<'a> RawHeader<'a> for GeneveHdrRaw { - #[inline] - fn new_mut(src: &mut [u8]) -> Result, ReadErr> { - debug_assert_eq!(src.len(), mem::size_of::()); - let hdr = match Ref::from_bytes(src).ok() { - Some(hdr) => hdr, - None => return Err(ReadErr::BadLayout), - }; - Ok(hdr) - } -} - -impl Default for GeneveHdrRaw { - fn default() -> Self { - Self { - ver_opt_len: 0x0, - flags: 0x0, - proto: ETHER_TYPE_ETHER.to_be_bytes(), - vni: [0x0; 3], - reserved: 0, - } - } -} - -impl From<&GeneveMeta> for GeneveHdrRaw { - fn from(meta: &GeneveMeta) -> Self { - Self { - ver_opt_len: (meta.options_len() >> GENEVE_OPT_LEN_SCALE_SHIFT) - as u8, - flags: 0x0, - proto: ETHER_TYPE_ETHER.to_be_bytes(), - vni: meta.vni.bytes(), - reserved: 0, - } - } -} - /// Parsed form of an individual Geneve option TLV. /// /// These are grouped by the vendor `class`es understood by OPTE. @@ -341,86 +129,12 @@ pub enum GeneveOption { } impl GeneveOption { - /// Parse and check validity for all options attached to a Geneve - /// header, recording known extensions in a [`GeneveMeta`] if - /// given. - pub fn parse_all( - mut src: &[u8], - mut meta: Option<&mut GeneveMeta>, - ) -> Result<(), GeneveHdrError> { - while !src.is_empty() { - let option = GeneveOption::parse(&mut src)?; - if let Some(ref mut meta) = meta { - #[allow(clippy::single_match)] - match option { - Some(GeneveOption::Oxide(OxideOption::External)) => { - meta.oxide_external_pkt = true - } - _ => {} - } - } - } - - Ok(()) - } - - /// Parse an individual Geneve option from a byte slice, advancing the - /// read location. - pub fn parse(src: &mut &[u8]) -> Result, GeneveHdrError> { - let (head, tail) = src.split_at(GeneveOptHdrRaw::SIZE); - let opt_header = GeneveOptHdrRaw::new(head)?; - let needed_bytes = opt_header.options_len_bytes() as usize; - if tail.len() < needed_bytes { - return Err(GeneveHdrError::BadLength { len: needed_bytes as u16 }); - } - - let class = u16::from_be_bytes(opt_header.option_class); - let opt_type = opt_header.option_type(); - - // We don't yet have any options which need body parsing. - // This will skip over them regardless. - let (_body, tail) = tail.split_at(needed_bytes); - *src = tail; - - // XXX: Break this out into a trait/impls to handle more cleanly. - Ok(match (class, opt_header.option_type()) { - (GENEVE_OPT_CLASS_OXIDE, 0) => { - Some(GeneveOption::Oxide(OxideOption::External)) - } - _ if opt_header.is_critical() => { - return Err(GeneveHdrError::UnknownCriticalOption { - class, - opt_type, - }) - } - _ => None, - }) - } - /// Return the wire-length of this option in bytes, including headers. pub fn len(&self) -> usize { 4 + match self { GeneveOption::Oxide(o) => o.len(), } } - - /// Emit an option, returning the number of 4-byte chunks written. - pub fn emit(&self, dst: &mut [u8]) -> usize { - let mut raw = GeneveOptHdrRaw::new_mut(dst).unwrap(); - - let (class, opt_type, len) = match self { - Self::Oxide(o) => ( - GENEVE_OPT_CLASS_OXIDE, - o.opt_type(), - o.len() >> GENEVE_OPT_LEN_SCALE_SHIFT, - ), - }; - raw.option_class = class.to_be_bytes(); - raw.crit_type = opt_type; - raw.reserved_len = len as u8; - - len + 1 - } } /// Geneve options defined by Oxide, [`GENEVE_OPT_CLASS_OXIDE`]. @@ -448,64 +162,6 @@ impl OxideOption { } } -/// Field layout for a single Geneve option. -/// -/// Note: Unaligned on the same rationale as [`GeneveHdrRaw`]. -#[repr(C)] -#[derive( - Clone, Debug, FromBytes, IntoBytes, Unaligned, Immutable, KnownLayout, -)] -pub struct GeneveOptHdrRaw { - option_class: [u8; 2], - crit_type: u8, - reserved_len: u8, -} - -impl GeneveOptHdrRaw { - /// Indicates whether this option is critical, and MUST be dropped - /// if not understood by a tunnel endpoint. - pub fn is_critical(&self) -> bool { - (self.crit_type >> GENEVE_OPT_CRIT_SHIFT) != 0 - } - - /// Return the type of this header. - pub fn option_type(&self) -> u8 { - self.crit_type & GENEVE_OPT_TYPE_MASK - } - - /// Return the length of this Geneve option's body in 4-byte units. - pub fn options_len(&self) -> u8 { - self.reserved_len & GENEVE_OPT_RESERVED_MASK - } - - /// Return the length of the Geneve options in bytes. - pub fn options_len_bytes(&self) -> u8 { - self.options_len() << GENEVE_OPT_LEN_SCALE_SHIFT - } -} - -impl<'a> RawHeader<'a> for GeneveOptHdrRaw { - #[inline] - fn new_mut(src: &mut [u8]) -> Result, ReadErr> { - debug_assert_eq!(src.len(), mem::size_of::()); - let hdr = match Ref::from_bytes(src).ok() { - Some(hdr) => hdr, - None => return Err(ReadErr::BadLayout), - }; - Ok(hdr) - } - - #[inline] - fn new(src: &[u8]) -> Result, ReadErr> { - debug_assert_eq!(src.len(), mem::size_of::()); - let hdr = match Ref::from_bytes(src).ok() { - Some(hdr) => hdr, - None => return Err(ReadErr::BadLayout), - }; - Ok(hdr) - } -} - // We probably want a more general way to retrieve all facts we care about // from the geneve options -- we only have the one today, however. #[inline] diff --git a/lib/opte/src/engine/headers.rs b/lib/opte/src/engine/headers.rs index 530a0f1c..6f067d4c 100644 --- a/lib/opte/src/engine/headers.rs +++ b/lib/opte/src/engine/headers.rs @@ -7,28 +7,16 @@ //! Header metadata combinations for IP, ULP, and Encap. use super::checksum::Checksum; -use super::geneve::GeneveHdr; use super::geneve::GeneveMeta; use super::geneve::GeneveMod; use super::geneve::GenevePush; -use super::icmp::IcmpHdr; -use super::icmp::Icmpv4Meta; -use super::icmp::Icmpv6Meta; -use super::ip4::Ipv4Hdr; -use super::ip4::Ipv4Meta; use super::ip4::Ipv4Mod; use super::ip4::Ipv4Push; -use super::ip6::Ipv6Hdr; -use super::ip6::Ipv6Meta; use super::ip6::Ipv6Mod; use super::ip6::Ipv6Push; use super::packet::ReadErr; -use super::tcp::TcpHdr; -use super::tcp::TcpMeta; use super::tcp::TcpMod; use super::tcp::TcpPush; -use super::udp::UdpHdr; -use super::udp::UdpMeta; use super::udp::UdpMod; use super::udp::UdpPush; use crate::engine::icmp::QueryEcho; @@ -44,27 +32,6 @@ use zerocopy::Ref; pub const AF_INET: i32 = 2; pub const AF_INET6: i32 = 26; -/// A raw header. -/// -/// A raw header is the most basic and raw representation of a given -/// header type. A raw header value preserves the bytes as they are, -/// in network order. A raw header undergoes no validation of header -/// fields. A raw header represents only the base header, eschewing -/// any options or extensions. -pub trait RawHeader<'a>: Sized { - const SIZE: usize = core::mem::size_of::(); - - /// Create a mutable, zerocopy version of the raw header from the - /// src. - fn new_mut(src: &mut [u8]) -> Result, ReadErr>; - - /// Create an immutable, zerocopy version of the raw header from the - /// src. - fn new(_src: &[u8]) -> Result, ReadErr> { - Err(ReadErr::NotImplemented) - } -} - pub trait PushAction { fn push(&self) -> HdrM; } @@ -81,131 +48,12 @@ pub enum IpType { Ipv6, } -#[derive(Debug)] -pub enum IpHdr<'a> { - Ip4(Ipv4Hdr<'a>), - Ip6(Ipv6Hdr<'a>), -} - -impl<'a> IpHdr<'a> { - pub fn pseudo_csum(&self) -> Checksum { - match self { - Self::Ip4(ip4) => ip4.pseudo_csum(), - Self::Ip6(ip6) => ip6.pseudo_csum(), - } - } -} - -impl<'a> From> for IpHdr<'a> { - fn from(ip4: Ipv4Hdr<'a>) -> Self { - Self::Ip4(ip4) - } -} - -impl<'a> From> for IpHdr<'a> { - fn from(ip6: Ipv6Hdr<'a>) -> Self { - Self::Ip6(ip6) - } -} - -#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd, Copy)] -pub enum IpMeta { - Ip4(Ipv4Meta), - Ip6(Ipv6Meta), -} - -impl IpMeta { - /// Return the checksum value. - pub fn csum(&self) -> [u8; 2] { - match self { - Self::Ip4(ip4) => ip4.csum, - // IPv6 has no checksum. - Self::Ip6(_) => [0; 2], - } - } - - pub fn has_csum(&self) -> bool { - match self { - Self::Ip4(ip4) => ip4.csum != [0; 2], - // IPv6 has no checksum. - Self::Ip6(_) => false, - } - } - - pub fn emit(&self, dst: &mut [u8]) { - match self { - Self::Ip4(ip4) => ip4.emit(dst), - Self::Ip6(ip6) => ip6.emit(dst), - } - } - - pub fn hdr_len(&self) -> usize { - match self { - Self::Ip4(ip4) => ip4.hdr_len(), - Self::Ip6(ip6) => ip6.hdr_len(), - } - } - - /// Get the [`Ipv4Meta`], if this is IPv4. - pub fn ip4(&self) -> Option<&Ipv4Meta> { - match self { - Self::Ip4(meta) => Some(meta), - _ => None, - } - } - - /// Get the [`Ipv6Meta`], if this is IPv6. - pub fn ip6(&self) -> Option<&Ipv6Meta> { - match self { - Self::Ip6(meta) => Some(meta), - _ => None, - } - } - - /// Get the [`Protocol`]. - pub fn proto(&self) -> Protocol { - match self { - Self::Ip4(meta) => meta.proto, - Self::Ip6(meta) => meta.proto, - } - } - - pub fn pseudo_csum(&self) -> Checksum { - match self { - Self::Ip4(ip4) => ip4.pseudo_csum(), - Self::Ip6(ip6) => ip6.pseudo_csum(), - } - } -} - -impl From for IpMeta { - fn from(ip4: Ipv4Meta) -> Self { - IpMeta::Ip4(ip4) - } -} - -impl From for IpMeta { - fn from(ip6: Ipv6Meta) -> Self { - IpMeta::Ip6(ip6) - } -} - #[derive(Clone, Copy, Debug, Deserialize, Serialize)] pub enum IpPush { Ip4(Ipv4Push), Ip6(Ipv6Push), } -impl PushAction for IpPush { - fn push(&self) -> IpMeta { - match self { - Self::Ip4(spec) => IpMeta::from(spec.push()), - - Self::Ip6(spec) => IpMeta::from(spec.push()), - } - } -} - impl From for IpPush { fn from(ip4: Ipv4Push) -> Self { Self::Ip4(ip4) @@ -248,27 +96,6 @@ impl IpMod { } } -impl ModifyAction for IpMod { - fn modify(&self, meta: &mut IpMeta) { - match (self, meta) { - (IpMod::Ip4(spec), IpMeta::Ip4(meta)) => { - spec.modify(meta); - } - - (IpMod::Ip6(spec), IpMeta::Ip6(meta)) => { - spec.modify(meta); - } - - (meta, spec) => { - panic!( - "Different IP versions for meta and spec: {:?} {:?}", - meta, spec - ); - } - } - } -} - impl From for IpMod { fn from(ip4: Ipv4Mod) -> Self { Self::Ip4(ip4) @@ -281,16 +108,6 @@ impl From for IpMod { } } -pub enum EncapHdr<'a> { - Geneve(GeneveHdr<'a>), -} - -impl<'a> From> for EncapHdr<'a> { - fn from(hdr: GeneveHdr<'a>) -> Self { - Self::Geneve(hdr) - } -} - #[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)] pub enum EncapMeta { Geneve(GeneveMeta), @@ -346,147 +163,6 @@ impl EncapMeta { } } -#[derive(Debug)] -pub enum UlpHdr<'a> { - Icmpv4(IcmpHdr<'a>), - Icmpv6(IcmpHdr<'a>), - Tcp(TcpHdr<'a>), - Udp(UdpHdr<'a>), -} - -impl<'a> UlpHdr<'a> { - pub fn csum_minus_hdr(&self) -> Option { - match self { - Self::Icmpv4(icmp) | Self::Icmpv6(icmp) => icmp.csum_minus_hdr(), - Self::Tcp(tcp) => tcp.csum_minus_hdr(), - Self::Udp(udp) => udp.csum_minus_hdr(), - } - } - - pub fn hdr_len(&self) -> usize { - match self { - Self::Icmpv4(icmp) | Self::Icmpv6(icmp) => icmp.hdr_len(), - Self::Tcp(tcp) => tcp.hdr_len(), - Self::Udp(udp) => udp.hdr_len(), - } - } - - pub fn set_pay_len(&mut self, len: usize) { - match self { - // Nothing to do for ICMP(v6) or TCP which determine payload len - // from IP header. - Self::Icmpv4(_) | Self::Icmpv6(_) => (), - Self::Tcp(_tcp) => (), - Self::Udp(udp) => udp.set_pay_len(len as u16), - } - } - - pub fn set_total_len(&mut self, len: usize) { - match self { - // Nothing to do for ICMP(v6) or TCP which determine payload len - // from IP header. - Self::Icmpv4(_) | Self::Icmpv6(_) => (), - Self::Tcp(_tcp) => (), - Self::Udp(udp) => udp.set_len(len as u16), - } - } - - pub fn udp(&self) -> Option<&UdpHdr> { - match self { - Self::Udp(udp) => Some(udp), - _ => None, - } - } -} - -impl<'a> From> for UlpHdr<'a> { - fn from(tcp: TcpHdr<'a>) -> Self { - UlpHdr::Tcp(tcp) - } -} - -impl<'a> From> for UlpHdr<'a> { - fn from(udp: UdpHdr<'a>) -> Self { - Self::Udp(udp) - } -} - -#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)] -pub enum UlpMeta { - Icmpv4(Icmpv4Meta), - Icmpv6(Icmpv6Meta), - Tcp(TcpMeta), - Udp(UdpMeta), -} - -impl UlpMeta { - /// Return the checksum value. - pub fn csum(&self) -> [u8; 2] { - match self { - Self::Icmpv4(icmp) => icmp.csum, - Self::Icmpv6(icmp6) => icmp6.csum, - Self::Tcp(tcp) => tcp.csum, - Self::Udp(udp) => udp.csum, - } - } - - pub fn has_csum(&self) -> bool { - self.csum() != [0; 2] - } - - pub fn is_pseudoheader_in_csum(&self) -> bool { - !matches!(self, Self::Icmpv4(_)) - } - - /// Return the destination port, if any. - pub fn dst_port(&self) -> Option { - match self { - Self::Icmpv4(_) => None, - Self::Icmpv6(_) => None, - Self::Tcp(tcp) => Some(tcp.dst), - Self::Udp(udp) => Some(udp.dst), - } - } - - pub fn hdr_len(&self) -> usize { - match self { - Self::Icmpv4(icmp) => icmp.hdr_len(), - Self::Icmpv6(icmp6) => icmp6.hdr_len(), - Self::Tcp(tcp) => tcp.hdr_len(), - Self::Udp(udp) => udp.hdr_len(), - } - } - - /// Return a pseudo port used to differentiate flows if the - /// ULP does not include source/dest ports. - pub fn pseudo_port(&self) -> Option { - match self { - Self::Icmpv4(icmp) => icmp.echo_id(), - Self::Icmpv6(icmp6) => icmp6.echo_id(), - _ => None, - } - } - - /// Return the source port, if any. - pub fn src_port(&self) -> Option { - match self { - Self::Icmpv4(_) => None, - Self::Icmpv6(_) => None, - Self::Tcp(tcp) => Some(tcp.src), - Self::Udp(udp) => Some(udp.src), - } - } - - pub fn emit(&self, dst: &mut [u8]) { - match self { - Self::Icmpv4(icmp) => icmp.emit(dst), - Self::Icmpv6(icmp6) => icmp6.emit(dst), - Self::Tcp(tcp) => tcp.emit(dst), - Self::Udp(udp) => udp.emit(dst), - } - } -} - #[derive( Clone, Copy, Debug, Deserialize, Eq, Ord, PartialEq, PartialOrd, Serialize, )] @@ -495,16 +171,6 @@ pub enum UlpPush { Udp(UdpPush), } -impl PushAction for UlpPush { - fn push(&self) -> UlpMeta { - match self { - Self::Tcp(tcp) => UlpMeta::from(tcp.push()), - - Self::Udp(udp) => UlpMeta::from(udp.push()), - } - } -} - impl From for UlpPush { fn from(tcp: TcpPush) -> Self { UlpPush::Tcp(tcp) @@ -523,24 +189,6 @@ pub enum UlpMod { Udp(UdpMod), } -impl ModifyAction for UlpMod { - fn modify(&self, meta: &mut UlpMeta) { - match (self, meta) { - (Self::Tcp(spec), UlpMeta::Tcp(meta)) => { - spec.modify(meta); - } - - (Self::Udp(spec), UlpMeta::Udp(meta)) => { - spec.modify(meta); - } - - (spec, meta) => { - panic!("differeing ULP meta and spec: {:?} {:?}", meta, spec); - } - } - } -} - impl From for UlpMod { fn from(tcp: TcpMod) -> Self { UlpMod::Tcp(tcp) @@ -553,55 +201,6 @@ impl From for UlpMod { } } -impl From for UlpMeta { - fn from(icmp: Icmpv4Meta) -> Self { - UlpMeta::Icmpv4(icmp) - } -} - -impl From for UlpMeta { - fn from(icmp6: Icmpv6Meta) -> Self { - UlpMeta::Icmpv6(icmp6) - } -} - -impl From for UlpMeta { - fn from(tcp: TcpMeta) -> Self { - UlpMeta::Tcp(tcp) - } -} - -impl From for UlpMeta { - fn from(udp: UdpMeta) -> Self { - UlpMeta::Udp(udp) - } -} - -impl<'a> From<&UlpHdr<'a>> for UlpMeta { - fn from(ulp: &UlpHdr) -> Self { - match ulp { - UlpHdr::Icmpv4(icmp) => UlpMeta::Icmpv4(Icmpv4Meta::from(icmp)), - UlpHdr::Icmpv6(icmp6) => UlpMeta::Icmpv6(Icmpv6Meta::from(icmp6)), - UlpHdr::Tcp(tcp) => UlpMeta::Tcp(TcpMeta::from(tcp)), - UlpHdr::Udp(udp) => UlpMeta::Udp(UdpMeta::from(udp)), - } - } -} - -impl HeaderActionModify for UlpMeta { - fn run_modify( - &mut self, - spec: &UlpMetaModify, - ) -> Result<(), HeaderActionError> { - match self { - UlpMeta::Icmpv4(icmp_meta) => icmp_meta.run_modify(spec), - UlpMeta::Icmpv6(icmp6_meta) => icmp6_meta.run_modify(spec), - UlpMeta::Tcp(tcp_meta) => tcp_meta.run_modify(spec), - UlpMeta::Udp(udp_meta) => udp_meta.run_modify(spec), - } - } -} - pub trait HasInnerCksum { const HAS_CKSUM: bool; } @@ -624,29 +223,6 @@ impl HasInnerCksum for Option { const HAS_CKSUM: bool = T::HAS_CKSUM; } -// impl Transform for Option -// where -// P: PushAction + fmt::Debug, -// M: ModifyAction + fmt::Debug, -// X: Transform + From -// { -// fn act_on(&mut self, action: &HeaderAction) -> Result { -// match (action, self) { -// (HeaderAction::Ignore, _) => Ok(false), -// (HeaderAction::Push(p), a) => { -// *a = Some(p.push().into()); -// Ok(X::HAS_CKSUM) -// }, -// (HeaderAction::Pop, a) => { -// *a = None; -// Ok(X::HAS_CKSUM) -// } -// (a @ HeaderAction::Modify(..), Some(h)) => h.act_on(a), -// (_, None) => Err(HeaderActionError::MissingHeader), -// } -// } -// } - impl Transform for X where P: PushAction + fmt::Debug, diff --git a/lib/opte/src/engine/icmp/mod.rs b/lib/opte/src/engine/icmp/mod.rs index e5b29ce9..d3f47b0d 100644 --- a/lib/opte/src/engine/icmp/mod.rs +++ b/lib/opte/src/engine/icmp/mod.rs @@ -12,7 +12,6 @@ pub mod v6; use super::checksum::Checksum as OpteCsum; use super::checksum::HeaderChecksum; use super::headers::HeaderActionError; -use super::headers::RawHeader; use super::packet::PacketReadMut; use super::packet::ReadErr; use crate::d_error::DError; @@ -29,13 +28,13 @@ use crate::engine::rule::HairpinAction; use alloc::vec::Vec; use core::fmt; use core::fmt::Display; +use ingot::types::primitives::u16be; +use ingot::Ingot; pub use opte_api::ip::Protocol; use serde::Deserialize; use serde::Serialize; use smoltcp::phy::Checksum; use smoltcp::phy::ChecksumCapabilities as Csum; -pub use v4::Icmpv4Meta; -pub use v6::Icmpv6Meta; use zerocopy::ByteSlice; use zerocopy::FromBytes; use zerocopy::Immutable; @@ -44,54 +43,6 @@ use zerocopy::KnownLayout; use zerocopy::Ref; use zerocopy::Unaligned; -#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)] -pub struct IcmpMeta { - pub msg_type: T, - pub msg_code: u8, - pub csum: [u8; 2], - pub rest_of_header: [u8; 4], -} - -impl + Copy> IcmpMeta { - // This assumes the dst is large enough. - #[inline] - pub fn emit(&self, dst: &mut [u8]) { - debug_assert!(dst.len() >= IcmpHdr::SIZE); - dst[0] = self.msg_type.into(); - dst[1] = self.msg_code; - dst[2..4].copy_from_slice(&self.csum); - dst[4..8].copy_from_slice(&self.rest_of_header); - } - - #[inline] - pub fn hdr_len(&self) -> usize { - IcmpHdr::SIZE - } - - #[inline] - pub fn body_echo(&self) -> Ref<&[u8], IcmpEchoRaw> { - // Panic safety: Size *must* be 4B by construction. - IcmpEchoRaw::new(&self.rest_of_header[..]).unwrap() - } - - #[inline] - pub fn body_echo_mut(&mut self) -> Ref<&mut [u8], IcmpEchoRaw> { - // Panic safety: Size *must* be 4B by construction. - IcmpEchoRaw::new_mut(&mut self.rest_of_header[..]).unwrap() - } -} - -impl<'a, T: From> From<&IcmpHdr<'a>> for IcmpMeta { - fn from(hdr: &IcmpHdr<'a>) -> Self { - Self { - msg_type: hdr.base.msg_type.into(), - msg_code: hdr.base.msg_code, - csum: hdr.base.csum, - rest_of_header: hdr.base.rest_of_header, - } - } -} - /// Shared methods for handling ICMPv4/v6 Echo fields. pub trait QueryEcho { /// Extract an ID from the body of an ICMP(v6) packet. @@ -100,142 +51,10 @@ pub trait QueryEcho { fn echo_id(&self) -> Option; } -// This covers both v4/v6 ICMP Echo rewriting for SNAT compatibility. -impl + Copy> HeaderActionModify for IcmpMeta -where - IcmpMeta: QueryEcho, -{ - fn run_modify( - &mut self, - spec: &UlpMetaModify, - ) -> Result<(), HeaderActionError> { - let Some(new_id) = spec.icmp_id else { - return Ok(()); - }; - - if self.echo_id().is_none() { - return Ok(()); - } - - let mut echo_data = self.body_echo_mut(); - echo_data.id = new_id.to_be_bytes(); - - Ok(()) - } -} - -#[derive(Clone, Copy, Debug, Eq, PartialEq, DError)] -pub enum IcmpHdrError { - ReadError(ReadErr), -} - -impl From for IcmpHdrError { - fn from(error: ReadErr) -> Self { - IcmpHdrError::ReadError(error) - } -} - -#[derive(Debug)] -pub struct IcmpHdr<'a> { - base: Ref<&'a mut [u8], IcmpHdrRaw>, -} - -impl<'a> IcmpHdr<'a> { - pub const SIZE: usize = IcmpHdrRaw::SIZE; - - /// Offset to the start of the ICMP(v6) checksum field. - pub const CSUM_BEGIN_OFFSET: usize = 2; - - /// Offset to the end of the ICMP(v6) checksum field. - pub const CSUM_END_OFFSET: usize = 4; - - pub fn csum_minus_hdr(&self) -> Option { - if self.base.csum != [0; 2] { - let mut csum = OpteCsum::from(HeaderChecksum::wrap(self.base.csum)); - let bytes = self.base.as_bytes(); - csum.sub_bytes(&bytes[..Self::CSUM_BEGIN_OFFSET]); - csum.sub_bytes(&bytes[Self::CSUM_END_OFFSET..]); - Some(csum) - } else { - None - } - } - - /// Return the header length, in bytes. - pub fn hdr_len(&self) -> usize { - Self::SIZE - } - - pub fn parse<'b>( - rdr: &'b mut impl PacketReadMut<'a>, - ) -> Result { - let src = rdr.slice_mut(IcmpHdr::SIZE)?; - Ok(Self { base: IcmpHdrRaw::new_mut(src)? }) - } -} - -/// Note: For now we keep this unaligned to be safe. -#[repr(C)] -#[derive( - Clone, Debug, FromBytes, IntoBytes, Unaligned, Immutable, KnownLayout, -)] -pub struct IcmpHdrRaw { - pub msg_type: u8, - pub msg_code: u8, - pub csum: [u8; 2], - pub rest_of_header: [u8; 4], -} - -impl IcmpHdrRaw { - /// An ICMP(v6) header is always 8 bytes. - pub const SIZE: usize = core::mem::size_of::(); -} - -impl<'a> RawHeader<'a> for IcmpHdrRaw { - #[inline] - fn new_mut(src: &mut [u8]) -> Result, ReadErr> { - debug_assert_eq!(src.len(), Self::SIZE); - let hdr = match Ref::from_bytes(src).ok() { - Some(hdr) => hdr, - None => return Err(ReadErr::BadLayout), - }; - Ok(hdr) - } -} - /// Internal structure of an ICMP(v6) Echo(Reply)'s rest_of_header. -#[repr(C)] -#[derive( - Clone, Debug, FromBytes, IntoBytes, Unaligned, Immutable, KnownLayout, -)] -pub struct IcmpEchoRaw { - pub id: [u8; 2], - pub sequence: [u8; 2], -} - -impl IcmpEchoRaw { - /// Echo-specific fields are always 4 bytes. - pub const SIZE: usize = core::mem::size_of::(); -} - -impl<'a> RawHeader<'a> for IcmpEchoRaw { - #[inline] - fn new_mut(src: &mut [u8]) -> Result, ReadErr> { - debug_assert_eq!(src.len(), Self::SIZE); - let hdr = match Ref::from_bytes(src).ok() { - Some(hdr) => hdr, - None => return Err(ReadErr::BadLayout), - }; - Ok(hdr) - } - - #[inline] - fn new(src: &[u8]) -> Result, ReadErr> { - debug_assert_eq!(src.len(), Self::SIZE); - let hdr = match Ref::from_bytes(src).ok() { - Some(hdr) => hdr, - None => return Err(ReadErr::BadLayout), - }; - Ok(hdr) - } +#[derive(Clone, Debug, Eq, Hash, PartialEq, Ingot)] +#[ingot(impl_default)] +pub struct IcmpEcho { + pub id: u16be, + pub sequence: u16be, } diff --git a/lib/opte/src/engine/icmp/v4.rs b/lib/opte/src/engine/icmp/v4.rs index a150993c..0f061b11 100644 --- a/lib/opte/src/engine/icmp/v4.rs +++ b/lib/opte/src/engine/icmp/v4.rs @@ -23,24 +23,6 @@ use smoltcp::wire::Icmpv4Message; use smoltcp::wire::Icmpv4Packet; use smoltcp::wire::Icmpv4Repr; -pub type Icmpv4Meta = IcmpMeta; - -impl QueryEcho for Icmpv4Meta { - /// Extract an ID from the body of an ICMPv4 packet to use as a - /// pseudo port for flow differentiation. - /// - /// This method returns `None` for any non-echo packets. - #[inline] - fn echo_id(&self) -> Option { - match self.msg_type.inner { - Icmpv4Message::EchoRequest | Icmpv4Message::EchoReply => { - Some(u16::from_be_bytes(self.body_echo().id)) - } - _ => None, - } - } -} - impl HairpinAction for IcmpEchoReply { fn implicit_preds(&self) -> (Vec, Vec) { let hdr_preds = vec![ diff --git a/lib/opte/src/engine/icmp/v6.rs b/lib/opte/src/engine/icmp/v6.rs index 90077879..e573eade 100644 --- a/lib/opte/src/engine/icmp/v6.rs +++ b/lib/opte/src/engine/icmp/v6.rs @@ -33,24 +33,6 @@ use smoltcp::wire::NdiscNeighborFlags; use smoltcp::wire::NdiscRepr; use smoltcp::wire::RawHardwareAddress; -pub type Icmpv6Meta = IcmpMeta; - -impl QueryEcho for Icmpv6Meta { - /// Extract an ID from the body of an ICMPv6 packet to use as a - /// pseudo port for flow differentiation. - /// - /// This method returns `None` for any non-echo packets. - #[inline] - fn echo_id(&self) -> Option { - match self.msg_type.inner { - Icmpv6Message::EchoRequest | Icmpv6Message::EchoReply => { - Some(u16::from_be_bytes(self.body_echo().id)) - } - _ => None, - } - } -} - /// An ICMPv6 message type #[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)] #[serde(from = "u8", into = "u8")] diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index a824d183..8ca1d45d 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -3,6 +3,9 @@ use super::checksum::Checksum; use super::checksum::HeaderChecksum; use super::ether::EtherMeta; use super::ether::EtherMod; +use super::geneve::geneve_has_oxide_external; +use super::geneve::OxideOption; +use super::geneve::GENEVE_OPT_CLASS_OXIDE; use super::geneve::GENEVE_PORT; use super::headers::EncapMeta; use super::headers::EncapMod; @@ -10,12 +13,13 @@ use super::headers::EncapPush; use super::headers::HasInnerCksum; use super::headers::HeaderActionError; use super::headers::HeaderActionModify; -use super::headers::IpMeta; use super::headers::IpMod; use super::headers::IpPush; use super::headers::PushAction; use super::headers::UlpMetaModify; +use super::icmp::IcmpEchoRef; use super::icmp::QueryEcho; +use super::icmp::ValidIcmpEcho; use super::ingot_base::Ethernet; use super::ingot_base::EthernetMut; use super::ingot_base::EthernetPacket; @@ -79,6 +83,8 @@ use illumos_sys_hdrs::uintptr_t; use ingot::ethernet::Ethertype; use ingot::geneve::Geneve; use ingot::geneve::GeneveMut; +use ingot::geneve::GeneveOpt; +use ingot::geneve::GeneveOptionType; use ingot::geneve::GenevePacket; use ingot::geneve::GeneveRef; use ingot::geneve::ValidGeneve; @@ -100,6 +106,7 @@ use ingot::types::Emit; use ingot::types::EmitDoesNotRelyOnBufContents; use ingot::types::Header as IngotHeader; use ingot::types::HeaderLen; +use ingot::types::HeaderParse; use ingot::types::InlineHeader; use ingot::types::NextLayer; use ingot::types::ParseControl; @@ -990,10 +997,6 @@ pub struct OpteMeta { pub inner_ulp: Option>, } -pub type Test = OpteMeta<&'static [u8]>; -pub type Test2 = ValidNoEncap<&'static [u8]>; -pub type Test3 = ValidGeneveOverV6<&'static [u8]>; - pub type OpteParsed = IngotParsed::Chunk>, T>; pub type OpteParsed2 = IngotParsed; @@ -1032,19 +1035,41 @@ impl<'a> Emit for SizeHoldingEncap<'a> { fn emit_raw(&self, buf: V) -> usize { match self.meta { EncapMeta::Geneve(g) => { + let mut opts = vec![]; + + if g.oxide_external_pkt { + opts.push(GeneveOpt { + class: GENEVE_OPT_CLASS_OXIDE, + option_type: GeneveOptionType( + OxideOption::External.opt_type(), + ), + ..Default::default() + }); + } + + let options = Repeated::new(opts); + let opt_len_unscaled = options.packet_length(); + let opt_len = (opt_len_unscaled >> 2) as u8; + + let geneve = Geneve { + protocol_type: Ethertype::ETHERNET, + vni: g.vni, + opt_len, + options, + ..Default::default() + }; + + let length = self.encapped_len + + (Udp::MINIMUM_LENGTH + geneve.packet_length()) as u16; + ( Udp { source: g.entropy, - destination: 6081, - // TODO: account for options. - length: self.encapped_len + 16, - ..Default::default() - }, - Geneve { - protocol_type: Ethertype::ETHERNET, - vni: g.vni, + destination: GENEVE_PORT, + length, ..Default::default() }, + &geneve, ) .emit_raw(buf) } @@ -1286,9 +1311,7 @@ impl PacketHeaders { Some((g.vni, g.oxide_external_pkt)) } Some(InlineHeader::Raw(ValidEncapMeta::Geneve(_, g))) => { - // TODO: hack. - let oxide_external = g.1.packet_length() != 0; - Some((g.vni(), oxide_external)) + Some((g.vni(), valid_geneve_has_oxide_external(&g))) } None => None, } @@ -1892,9 +1915,14 @@ impl Packet2> { T::Chunk: ByteSliceMut, { self.state.inner_csum_dirty |= xform.run(&mut self.state.meta)?; - // Given that n_transform layers is 1 or 2, probably won't - // save too much by trying to tie to a generation number. - // TODO: profile. + + // Recomputing this is a little bit wasteful, since we're moving + // rebuilding a static repr from packet fields. This is a necesary + // part of slowpath use because layers are designed around intermediate + // flowkeys. + // + // We *could* elide this on non-compiled UFT transforms, but we do not + // need those today. self.state.flow = InnerFlowId::from(self.meta()); Ok(()) } @@ -2066,9 +2094,16 @@ impl Packet2> { where T::Chunk: ByteSliceMut, { + // If we know that no transform touched a field which features in + // an inner transport cksum (L4/L3 src/dst, most realistically). if !self.state.inner_csum_dirty { return; } + + // Flag to indicate if an IP header/ULP checksums were + // provided. If the checksum is zero, it's assumed heardware + // checksum offload is being used, and OPTE should not update + // the checksum. let update_ip = self.state.meta.has_ip_csum(); let update_ulp = self.state.meta.has_ulp_csum(); @@ -2249,8 +2284,6 @@ fn csum_minus_hdr(ulp: &ValidUlp) -> Option { csum.sub_bytes(&b[0..16]); csum.sub_bytes(&b[18..]); - // TODO: bad bound? - // csum.sub_bytes(tcp.1.as_ref()); csum.sub_bytes(match &tcp.1 { ingot::types::Header::Repr(v) => &v[..], ingot::types::Header::Raw(v) => &v[..], @@ -2554,9 +2587,11 @@ impl QueryEcho for IcmpV4Packet { #[inline] fn echo_id(&self) -> Option { match (self.code(), self.ty()) { - (0, 0) | (0, 8) => Some(u16::from_be_bytes( - self.rest_of_hdr()[..2].try_into().unwrap(), - )), + (0, 0) | (0, 8) => { + ValidIcmpEcho::parse(self.rest_of_hdr_ref().as_slice()) + .ok() + .map(|(v, ..)| v.id()) + } _ => None, } } @@ -2566,9 +2601,11 @@ impl QueryEcho for IcmpV6Packet { #[inline] fn echo_id(&self) -> Option { match (self.code(), self.ty()) { - (0, 128) | (0, 129) => Some(u16::from_be_bytes( - self.rest_of_hdr()[..2].try_into().unwrap(), - )), + (0, 128) | (0, 129) => { + ValidIcmpEcho::parse(&self.rest_of_hdr_ref()[..]) + .ok() + .map(|(v, ..)| v.id()) + } _ => None, } } @@ -2894,80 +2931,6 @@ impl From } } -impl From for InlineHeader> { - #[inline] - fn from(value: IpMeta) -> Self { - match value { - IpMeta::Ip4(v4) => InlineHeader::Repr( - Ipv4 { - ihl: (v4.hdr_len / 4) as u8, - total_len: v4.total_len, - identification: v4.ident, - protocol: IpProtocol(u8::from(v4.proto)), - checksum: u16::from_be_bytes(v4.csum), - source: v4.src, - destination: v4.dst, - flags: Ipv4Flags::DONT_FRAGMENT, - ..Default::default() - } - .into(), - ), - IpMeta::Ip6(v6) => InlineHeader::Repr( - Ipv6 { - payload_len: v6.pay_len, - next_header: IpProtocol(u8::from(v6.next_hdr)), - hop_limit: v6.hop_limit, - source: v6.src, - destination: v6.dst, - v6ext: Repeated::default(), // TODO - ..Default::default() - } - .into(), - ), - } - } -} - -impl From for L3 { - #[inline] - fn from(value: IpMeta) -> Self { - match value { - IpMeta::Ip4(v4) => L3::Ipv4( - Ipv4 { - ihl: (v4.hdr_len / 4) as u8, - total_len: v4.total_len, - identification: v4.ident, - protocol: IpProtocol(u8::from(v4.proto)), - checksum: u16::from_be_bytes(v4.csum), - source: v4.src, - destination: v4.dst, - flags: Ipv4Flags::DONT_FRAGMENT, - ..Default::default() - } - .into(), - ), - IpMeta::Ip6(v6) => L3::Ipv6( - Ipv6 { - payload_len: v6.pay_len, - next_header: IpProtocol(u8::from(v6.next_hdr)), - hop_limit: v6.hop_limit, - source: v6.src, - destination: v6.dst, - v6ext: Repeated::default(), // TODO - ..Default::default() - } - .into(), - ), - } - } -} - -// impl PushAction for Ethernet { -// fn push(&self) -> Ethernet { -// *self -// } -// } - impl PushAction>> for EtherMeta { @@ -2995,26 +2958,6 @@ impl PushAction> for EtherMeta { } } -// impl PushAction>> for IpPush { -// fn push(&self) -> InlineHeader> { -// InlineHeader::Repr(match self { -// IpPush::Ip4(v4) => L3Repr::Ipv4(Ipv4 { -// protocol: IpProtocol(u8::from(v4.proto)), -// source: v4.src.bytes().into(), -// destination: v4.dst.bytes().into(), -// flags: Ipv4Flags::DONT_FRAGMENT, -// ..Default::default() -// }), -// IpPush::Ip6(v6) => L3Repr::Ipv6(Ipv6 { -// next_header: IpProtocol(u8::from(v6.proto)), -// source: v6.src.bytes().into(), -// destination: v6.dst.bytes().into(), -// ..Default::default() -// }), -// }) -// } -// } - impl PushAction> for IpPush { fn push(&self) -> L3 { match self { diff --git a/lib/opte/src/engine/ip4.rs b/lib/opte/src/engine/ip4.rs index a5ed1afd..a2f33d8b 100644 --- a/lib/opte/src/engine/ip4.rs +++ b/lib/opte/src/engine/ip4.rs @@ -10,7 +10,6 @@ use super::checksum::Checksum; use super::checksum::HeaderChecksum; use super::headers::ModifyAction; use super::headers::PushAction; -use super::headers::RawHeader; use super::packet::PacketReadMut; use super::packet::ReadErr; use super::predicate::MatchExact; @@ -172,113 +171,6 @@ impl MatchExact for Protocol { } } -#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)] -pub struct Ipv4Meta { - pub src: Ipv4Addr, - pub dst: Ipv4Addr, - pub proto: Protocol, - pub ttl: u8, - pub ident: u16, - pub hdr_len: u16, - pub total_len: u16, - pub csum: [u8; 2], -} - -impl Default for Ipv4Meta { - fn default() -> Self { - Self { - src: Ipv4Addr::ANY_ADDR, - dst: Ipv4Addr::ANY_ADDR, - proto: Protocol::Unknown(255), - ttl: 64, - ident: 0, - hdr_len: Ipv4Hdr::BASE_SIZE as u16, - total_len: 0, - csum: [0; 2], - } - } -} - -impl Ipv4Meta { - pub fn compute_hdr_csum(&mut self) { - let mut hdr = [0; 20]; - self.csum = [0; 2]; - self.emit(&mut hdr); - let csum = Checksum::compute(&hdr); - self.csum = HeaderChecksum::from(csum).bytes(); - } - - pub fn compute_ulp_csum( - &self, - opt: UlpCsumOpt, - ulp_hdr: &[u8], - body: &[u8], - ) -> Checksum { - match opt { - UlpCsumOpt::Partial => todo!("implement partial csum"), - UlpCsumOpt::Full => { - let mut csum = self.pseudo_csum(); - csum.add_bytes(ulp_hdr); - csum.add_bytes(body); - csum - } - } - } - - #[inline] - pub fn emit(&self, dst: &mut [u8]) { - // The raw header relies on the slice being the exactly length. - debug_assert_eq!(dst.len(), Ipv4Hdr::BASE_SIZE); - let mut raw = Ipv4HdrRaw::new_mut(dst).unwrap(); - Ref::write(&mut raw, Ipv4HdrRaw::from(self)); - } - - /// Return the length of the header needed to emit the metadata. - pub fn hdr_len(&self) -> usize { - Ipv4Hdr::BASE_SIZE - } - - /// Populate `bytes` with the pseudo header bytes. - pub fn pseudo_bytes(&self, bytes: &mut [u8; 12]) { - bytes[0..4].copy_from_slice(&self.src.bytes()); - bytes[4..8].copy_from_slice(&self.dst.bytes()); - let ulp_len = self.total_len - self.hdr_len; - let len_bytes = ulp_len.to_be_bytes(); - bytes[8..12].copy_from_slice(&[ - 0, - u8::from(self.proto), - len_bytes[0], - len_bytes[1], - ]); - } - - /// Return a [`Checksum`] of the pseudo header. - pub fn pseudo_csum(&self) -> Checksum { - let mut pseudo_bytes = [0u8; 12]; - self.pseudo_bytes(&mut pseudo_bytes); - Checksum::compute(&pseudo_bytes) - } -} - -impl<'a> From<&Ipv4Hdr<'a>> for Ipv4Meta { - fn from(ip4: &Ipv4Hdr) -> Self { - let raw = &ip4.bytes; - - let hdr_len = u16::from((raw.ver_hdr_len & IPV4_HDR_LEN_MASK) * 4); - - Self { - src: Ipv4Addr::from(raw.src), - dst: Ipv4Addr::from(raw.dst), - proto: Protocol::from(raw.proto), - ttl: raw.ttl, - ident: u16::from_be_bytes(raw.ident), - hdr_len, - total_len: u16::from_be_bytes(raw.total_len), - csum: raw.csum, - } - } -} - #[derive( Clone, Copy, Debug, Deserialize, Eq, Ord, PartialEq, PartialOrd, Serialize, )] @@ -288,17 +180,6 @@ pub struct Ipv4Push { pub proto: Protocol, } -impl PushAction for Ipv4Push { - fn push(&self) -> Ipv4Meta { - Ipv4Meta { - src: self.src, - dst: self.dst, - proto: self.proto, - ..Default::default() - } - } -} - #[derive(Clone, Debug, Default, Deserialize, Serialize)] pub struct Ipv4Mod { pub src: Option, @@ -306,158 +187,6 @@ pub struct Ipv4Mod { pub proto: Option, } -impl ModifyAction for Ipv4Mod { - fn modify(&self, meta: &mut Ipv4Meta) { - if let Some(src) = self.src { - meta.src = src; - } - - if let Some(dst) = self.dst { - meta.dst = dst; - } - - if let Some(proto) = self.proto { - meta.proto = proto; - } - } -} - -#[derive(Debug)] -pub struct Ipv4Hdr<'a> { - bytes: Ref<&'a mut [u8], Ipv4HdrRaw>, -} - -impl<'a> Ipv4Hdr<'a> { - pub const BASE_SIZE: usize = Ipv4HdrRaw::SIZE; - pub const CSUM_BEGIN: usize = 10; - pub const CSUM_END: usize = 12; - - #[inline] - pub fn csum(&self) -> [u8; 2] { - self.bytes.csum - } - - #[inline] - pub fn dst(&self) -> Ipv4Addr { - Ipv4Addr::from(self.bytes.dst) - } - - /// Return the header length, in bytes. - #[inline] - pub fn hdr_len(&self) -> u16 { - u16::from((self.bytes.ver_hdr_len & IPV4_HDR_LEN_MASK) * 4) - } - - #[inline] - pub fn ident(&self) -> u16 { - u16::from_be_bytes(self.bytes.ident) - } - - pub fn parse<'b, R>(rdr: &'b mut R) -> Result - where - R: PacketReadMut<'a>, - { - let src = rdr.slice_mut(Ipv4HdrRaw::SIZE)?; - let ip = Self { bytes: Ipv4HdrRaw::new_mut(src)? }; - - match ip.version() { - 4 => {} - vsn => return Err(Ipv4HdrError::BadVersion { vsn }), - } - - let hdr_len = ip.hdr_len(); - - if (hdr_len as usize) < Ipv4HdrRaw::SIZE { - return Err(Ipv4HdrError::HeaderTruncated { hdr_len }); - } - - if ip.total_len() < hdr_len { - return Err(Ipv4HdrError::BadTotalLen { - total_len: ip.total_len(), - }); - } - - // TODO: actually capture and re-emit ipv4 options. - // before, they were accidentally *becoming* the ULP. - // now, we're at least skipping them. - let remaining_bytes = (hdr_len as usize) - Ipv4HdrRaw::SIZE; - rdr.seek(remaining_bytes) - .map_err(|_| Ipv4HdrError::HeaderTruncated { hdr_len })?; - - let _proto = Protocol::from(ip.bytes.proto); - - Ok(ip) - } - - /// Return the [`Protocol`]. - #[inline] - pub fn proto(&self) -> Protocol { - // Unwrap: We verified the proto is good upon parsing. - Protocol::from(self.bytes.proto) - } - - /// Populate `bytes` with the pseudo header bytes. - pub fn pseudo_bytes(&self, bytes: &mut [u8; 12]) { - bytes[0..4].copy_from_slice(&self.bytes.src); - bytes[4..8].copy_from_slice(&self.bytes.dst); - let len_bytes = self.ulp_len().to_be_bytes(); - bytes[8..12].copy_from_slice(&[ - 0, - self.bytes.proto, - len_bytes[0], - len_bytes[1], - ]); - } - - /// Return a [`Checksum`] of the pseudo header. - pub fn pseudo_csum(&self) -> Checksum { - let mut pseudo_bytes = [0u8; 12]; - self.pseudo_bytes(&mut pseudo_bytes); - Checksum::compute(&pseudo_bytes) - } - - #[inline] - pub fn set_csum(&mut self, csum: [u8; 2]) { - self.bytes.csum = csum; - } - - /// Set the `Total Length` field. - #[inline] - pub fn set_total_len(&mut self, len: u16) { - self.bytes.total_len = len.to_be_bytes() - } - - /// Return the source address. - #[inline] - pub fn src(&self) -> Ipv4Addr { - Ipv4Addr::from(self.bytes.src) - } - - /// Return the value of the `Total Length` field. - #[inline] - pub fn total_len(&self) -> u16 { - u16::from_be_bytes(self.bytes.total_len) - } - - #[inline] - pub fn ttl(&self) -> u8 { - self.bytes.ttl - } - - /// Return the length of the Upper Layer Protocol (ULP) portion of - /// the packet. - #[inline] - pub fn ulp_len(&self) -> u16 { - self.total_len() - self.hdr_len() - } - - /// Return the reported IP version field from the packet. - #[inline] - pub fn version(&self) -> u8 { - self.bytes.ver_hdr_len >> IPV4_HDR_VER_SHIFT - } -} - /// Options for computing a ULP checksum. #[derive(Clone, Copy, Debug)] pub enum UlpCsumOpt { @@ -471,99 +200,6 @@ pub enum UlpCsumOpt { Full, } -#[derive(Clone, Copy, Debug, Eq, PartialEq, DError)] -#[derror(leaf_data = Ipv4HdrError::derror_data)] -pub enum Ipv4HdrError { - BadTotalLen { total_len: u16 }, - BadVersion { vsn: u8 }, - HeaderTruncated { hdr_len: u16 }, - ReadError(ReadErr), - UnexpectedProtocol { protocol: u8 }, -} - -impl From for Ipv4HdrError { - fn from(error: ReadErr) -> Self { - Ipv4HdrError::ReadError(error) - } -} - -impl Ipv4HdrError { - fn derror_data(&self, data: &mut [u64]) { - data[0] = match self { - Self::BadTotalLen { total_len } => *total_len as u64, - Self::BadVersion { vsn } => *vsn as u64, - Self::HeaderTruncated { hdr_len } => *hdr_len as u64, - Self::UnexpectedProtocol { protocol } => *protocol as u64, - _ => 0, - } - } -} - -/// Note: For now we keep this unaligned to be safe. -#[repr(C)] -#[derive( - Clone, Debug, FromBytes, IntoBytes, Unaligned, Immutable, KnownLayout, -)] -pub struct Ipv4HdrRaw { - pub ver_hdr_len: u8, - pub dscp_ecn: u8, - pub total_len: [u8; 2], - pub ident: [u8; 2], - pub frag_and_flags: [u8; 2], - pub ttl: u8, - pub proto: u8, - pub csum: [u8; 2], - pub src: [u8; 4], - pub dst: [u8; 4], -} - -impl<'a> RawHeader<'a> for Ipv4HdrRaw { - #[inline] - fn new_mut(src: &mut [u8]) -> Result, ReadErr> { - debug_assert_eq!(src.len(), Self::SIZE); - let hdr = match Ref::from_bytes(src).ok() { - Some(hdr) => hdr, - None => return Err(ReadErr::BadLayout), - }; - Ok(hdr) - } -} - -impl Default for Ipv4HdrRaw { - fn default() -> Self { - Ipv4HdrRaw { - ver_hdr_len: 0x45, - dscp_ecn: 0x0, - total_len: [0x0; 2], - ident: [0x0; 2], - frag_and_flags: [0x40, 0x0], - ttl: 64, - proto: u8::from(Protocol::Unknown(255)), - csum: [0x0; 2], - src: [0x0; 4], - dst: [0x0; 4], - } - } -} - -impl From<&Ipv4Meta> for Ipv4HdrRaw { - #[inline] - fn from(meta: &Ipv4Meta) -> Self { - Ipv4HdrRaw { - ver_hdr_len: 0x45, - dscp_ecn: 0x0, - total_len: meta.total_len.to_be_bytes(), - ident: meta.ident.to_be_bytes(), - frag_and_flags: [0x40, 0x0], - ttl: meta.ttl, - proto: u8::from(meta.proto), - csum: meta.csum, - src: meta.src.bytes(), - dst: meta.dst.bytes(), - } - } -} - #[cfg(test)] mod test { use super::*; diff --git a/lib/opte/src/engine/packet.rs b/lib/opte/src/engine/packet.rs index 86c97d8a..5b051760 100644 --- a/lib/opte/src/engine/packet.rs +++ b/lib/opte/src/engine/packet.rs @@ -8,41 +8,20 @@ //! //! TODO //! -//! * Add a PacketChain type to represent a chain of one or more -//! indepenndent packets. Also consider having chains that represent -//! multiple packets for the same flow if it would be advantageous to -//! do so. -//! //! * Add hardware offload information to [`Packet`]. //! -use super::arp::ArpHdrError; use super::checksum::Checksum; use super::checksum::HeaderChecksum; -use super::ether::EtherHdr; -use super::ether::EtherHdrError; use super::ether::EtherMeta; -use super::geneve::GeneveHdr; -use super::geneve::GeneveHdrError; use super::geneve::GeneveMeta; use super::geneve::GENEVE_PORT; use super::headers::EncapMeta; use super::headers::IpAddr; -use super::headers::IpMeta; -use super::headers::UlpHdr; -use super::headers::UlpMeta; use super::headers::AF_INET; use super::headers::AF_INET6; -use super::icmp::IcmpHdr; -use super::icmp::IcmpHdrError; -use super::icmp::IcmpMeta; -use super::icmp::Icmpv4Meta; -use super::icmp::Icmpv6Meta; use super::ingot_packet::MsgBlk; use super::ip4::Ipv4Addr; -use super::ip4::Ipv4Hdr; -use super::ip4::Ipv4HdrError; -use super::ip4::Ipv4Meta; use super::ip4::Protocol; use super::ip6::Ipv6Addr; use super::ip6::Ipv6Hdr; @@ -61,12 +40,6 @@ use dyn_clone::DynClone; use serde::Deserialize; use serde::Serialize; // TODO should probably move these two into this module now. -use super::tcp::TcpHdr; -use super::tcp::TcpHdrError; -use super::tcp::TcpMeta; -use super::udp::UdpHdr; -use super::udp::UdpHdrError; -use super::udp::UdpMeta; use super::Direction; use alloc::string::String; use alloc::vec::Vec; @@ -215,219 +188,6 @@ impl Display for InnerFlowId { } } -impl From<&PacketMeta> for InnerFlowId { - fn from(meta: &PacketMeta) -> Self { - let (proto, addrs) = match &meta.inner.ip { - Some(IpMeta::Ip4(ip4)) => { - (ip4.proto, AddrPair::V4 { src: ip4.src, dst: ip4.dst }) - } - Some(IpMeta::Ip6(ip6)) => { - (ip6.proto, AddrPair::V6 { src: ip6.src, dst: ip6.dst }) - } - None => (Protocol::Unknown(255), FLOW_ID_DEFAULT.addrs), - }; - - let (src_port, dst_port) = meta - .inner - .ulp - .map(|ulp| { - ( - ulp.src_port().or_else(|| ulp.pseudo_port()).unwrap_or(0), - ulp.dst_port().or_else(|| ulp.pseudo_port()).unwrap_or(0), - ) - }) - .unwrap_or((0, 0)); - - InnerFlowId { proto: proto.into(), addrs, src_port, dst_port } - } -} - -/// The outer header metadata. -/// -/// All outer headers are always optional. -#[derive(Debug, Default)] -pub struct OuterMeta { - pub ether: Option, - pub ip: Option, - pub encap: Option, -} - -impl OuterMeta { - fn hdr_len(&self) -> usize { - let mut hdr_len = 0; - - if let Some(ether) = self.ether { - hdr_len += ether.hdr_len(); - } - - if let Some(ip) = self.ip { - hdr_len += ip.hdr_len(); - } - - if let Some(encap) = self.encap { - hdr_len += encap.hdr_len(); - } - - hdr_len - } -} - -/// The inner header metadata. -/// -/// There is always an Ethernet frame. -#[derive(Debug, Default)] -pub struct InnerMeta { - pub ether: EtherMeta, - pub ip: Option, - pub ulp: Option, -} - -impl InnerMeta { - fn has_ip_csum(&self) -> bool { - match self.ip { - Some(ip) => ip.has_csum(), - None => false, - } - } - - fn has_ulp_csum(&self) -> bool { - match self.ulp { - Some(ulp) => ulp.has_csum(), - None => false, - } - } - - fn hdr_len(&self) -> usize { - let mut hdr_len = self.ether.hdr_len(); - - if let Some(ip) = self.ip { - hdr_len += ip.hdr_len(); - } - - if let Some(ulp) = self.ulp { - hdr_len += ulp.hdr_len(); - } - - hdr_len - } - - pub fn is_tcp(&self) -> bool { - match self.ip.as_ref() { - Some(IpMeta::Ip4(ip4)) => ip4.proto == Protocol::TCP, - Some(IpMeta::Ip6(ip6)) => ip6.proto == Protocol::TCP, - _ => false, - } - } -} - -/// The various metadata of a packet. -/// -/// The packet metadata is a logical representation of the header data -/// that is relevant to processing. -#[derive(Debug, Default)] -pub struct PacketMeta { - pub outer: OuterMeta, - pub inner: InnerMeta, -} - -impl PacketMeta { - /// Return the number of bytes requires to emit the header - /// metadata into full headers. - fn hdr_len(&self) -> usize { - self.outer.hdr_len() + self.inner.hdr_len() - } - - /// Return the inner Ether metadata. - pub fn inner_ether(&self) -> &EtherMeta { - &self.inner.ether - } - - /// Return the inner IPv4 metadata. - pub fn inner_ip4(&self) -> Option<&Ipv4Meta> { - match &self.inner.ip { - Some(IpMeta::Ip4(ip4_meta)) => Some(ip4_meta), - _ => None, - } - } - - /// Return the inner IPv6 metadata. - pub fn inner_ip6(&self) -> Option<&Ipv6Meta> { - match &self.inner.ip { - Some(IpMeta::Ip6(x)) => Some(x), - _ => None, - } - } - - /// Return the inner ICMP metadata, if the inner ULP is ICMP. - pub fn inner_icmp(&self) -> Option<&Icmpv4Meta> { - match &self.inner.ulp { - Some(UlpMeta::Icmpv4(icmp)) => Some(icmp), - _ => None, - } - } - - /// Return the inner ICMPv6 metadata, if the inner ULP is ICMPv6. - pub fn inner_icmp6(&self) -> Option<&Icmpv6Meta> { - match &self.inner.ulp { - Some(UlpMeta::Icmpv6(icmp6)) => Some(icmp6), - _ => None, - } - } - - /// Return the inner TCP metadata, if the inner ULP is TCP. - /// Otherwise, return `None`. - pub fn inner_tcp(&self) -> Option<&TcpMeta> { - match &self.inner.ulp { - Some(UlpMeta::Tcp(tcp)) => Some(tcp), - _ => None, - } - } - - /// Return true if the inner ULP is TCP. - pub fn is_inner_tcp(&self) -> bool { - self.inner.is_tcp() - } - - /// Return the inner UDP metadata, if the inner ULP is UDP. - /// Otherwise return `None`. - pub fn inner_udp(&self) -> Option<&UdpMeta> { - match &self.inner.ulp { - Some(UlpMeta::Udp(udp)) => Some(udp), - _ => None, - } - } - - pub fn l4_hash(&self) -> Option { - let ulp = match self.inner.ulp { - Some(ulp) => ulp, - None => return None, - }; - let mut h = Hasher::new(); - match &self.inner.ip { - Some(IpMeta::Ip4(m)) => { - h.update(&m.src.bytes()); - h.update(&m.dst.bytes()); - h.update(&[u8::from(m.proto)]); - } - Some(IpMeta::Ip6(m)) => { - h.update(&m.src.bytes()); - h.update(&m.dst.bytes()); - h.update(&[u8::from(m.proto)]); - } - None => return None, - }; - let (src, dst) = match ulp { - UlpMeta::Tcp(t) => (t.src, t.dst), - UlpMeta::Udp(u) => (u.src, u.dst), - UlpMeta::Icmpv4(_) => (0, 0), //TODO use icmp id - UlpMeta::Icmpv6(_) => (0, 0), //TODO use icmp id - }; - h.update(&src.to_be_bytes()); - h.update(&dst.to_be_bytes()); - Some(h.finalize()) - } -} - /// The head and tail of an mblk_t list. struct PacketChainInner { head: NonNull, @@ -737,7 +497,7 @@ pub struct HdrInfo { } pub struct PacketInfo { - pub meta: PacketMeta, + // pub meta: PacketMeta, pub offsets: HeaderOffsets, // The body's checksum. It is up to the `NetworkImpl::Parser` on // whether to populate this field or not. The reason for @@ -776,7 +536,7 @@ pub struct BodyInfo { #[derive(Debug)] pub struct Parsed { len: usize, - meta: PacketMeta, + // meta: PacketMeta, flow: InnerFlowId, hdr_offsets: HeaderOffsets, body_csum: Option, @@ -934,278 +694,6 @@ impl Packet { Packet { avail, segs, state: Initialized { len } } } - pub fn parse_ether<'a>( - rdr: &mut PacketReaderMut<'a>, - ) -> Result<(HdrInfo, EtherHdr<'a>), ParseError> { - let ether = EtherHdr::parse(rdr)?; - let offset = HdrOffset::new(rdr.offset(), ether.hdr_len()); - let meta = EtherMeta::from(ðer); - Ok((HdrInfo { meta, offset }, ether)) - } - - pub fn parse_ip4<'a>( - rdr: &mut PacketReaderMut<'a>, - ) -> Result<(HdrInfo, Ipv4Hdr<'a>), ParseError> { - let ip = Ipv4Hdr::parse(rdr)?; - let offset = HdrOffset::new(rdr.offset(), usize::from(ip.hdr_len())); - let meta = IpMeta::from(Ipv4Meta::from(&ip)); - Ok((HdrInfo { meta, offset }, ip)) - } - - pub fn parse_ip6<'a>( - rdr: &mut PacketReaderMut<'a>, - ) -> Result<(HdrInfo, Ipv6Hdr<'a>), ParseError> { - let ip = Ipv6Hdr::parse(rdr)?; - let offset = HdrOffset::new(rdr.offset(), ip.hdr_len()); - let meta = IpMeta::from(Ipv6Meta::from(&ip)); - Ok((HdrInfo { meta, offset }, ip)) - } - - pub fn parse_icmp<'a>( - rdr: &mut PacketReaderMut<'a>, - ) -> Result<(HdrInfo, UlpHdr<'a>), ParseError> { - let icmp = IcmpHdr::parse(rdr)?; - let offset = HdrOffset::new(rdr.offset(), icmp.hdr_len()); - let icmp_meta = Icmpv4Meta::from(&icmp); - let meta = UlpMeta::from(icmp_meta); - Ok((HdrInfo { meta, offset }, UlpHdr::Icmpv4(icmp))) - } - - pub fn parse_icmp6<'a>( - rdr: &mut PacketReaderMut<'a>, - ) -> Result<(HdrInfo, UlpHdr<'a>), ParseError> { - let icmp6 = IcmpHdr::parse(rdr)?; - let offset = HdrOffset::new(rdr.offset(), icmp6.hdr_len()); - let icmp_meta = Icmpv6Meta::from(&icmp6); - let meta = UlpMeta::from(icmp_meta); - Ok((HdrInfo { meta, offset }, UlpHdr::Icmpv6(icmp6))) - } - - pub fn parse_tcp<'a>( - rdr: &mut PacketReaderMut<'a>, - ) -> Result<(HdrInfo, UlpHdr<'a>), ParseError> { - let tcp = TcpHdr::parse(rdr)?; - let offset = HdrOffset::new(rdr.offset(), tcp.hdr_len()); - let meta = UlpMeta::from(TcpMeta::from(&tcp)); - Ok((HdrInfo { meta, offset }, UlpHdr::from(tcp))) - } - - pub fn parse_udp<'a>( - rdr: &mut PacketReaderMut<'a>, - ) -> Result<(HdrInfo, UlpHdr<'a>), ParseError> { - let udp = UdpHdr::parse(rdr)?; - let offset = HdrOffset::new(rdr.offset(), udp.hdr_len()); - let meta = UlpMeta::from(UdpMeta::from(&udp)); - Ok((HdrInfo { meta, offset }, UlpHdr::from(udp))) - } - - pub fn parse_geneve<'a>( - rdr: &mut PacketReaderMut<'a>, - ) -> Result<(HdrInfo, GeneveHdr<'a>), ParseError> { - // We don't need to store the UDP metadata here because any - // relevant fields can be reconstructed from knowledge of the - // packet body and the encap itself. - let udp_hdr = UdpHdr::parse(rdr)?; - - match udp_hdr.dst_port() { - GENEVE_PORT => { - let geneve = GeneveHdr::parse(rdr)?; - let offset = HdrOffset::new( - rdr.offset(), - geneve.hdr_len() + udp_hdr.hdr_len(), - ); - let meta = GeneveMeta::from((&udp_hdr, &geneve)); - Ok((HdrInfo { meta, offset }, geneve)) - } - port => Err(ParseError::UnexpectedDestPort(port)), - } - } - - pub fn parse_geneve_inner<'a>( - rdr: &mut PacketReaderMut<'a>, - ) -> Result<(HdrInfo, GeneveHdr<'a>), ParseError> { - let geneve = GeneveHdr::parse(rdr)?; - let offset = HdrOffset::new(rdr.offset(), geneve.hdr_len()); - let meta = GeneveMeta::from(&geneve); - Ok((HdrInfo { meta, offset }, geneve)) - } - - // pub fn parse( - // mut self, - // dir: Direction, - // net: impl NetworkParser, - // ) -> Result, ParseError> { - // let mut rdr = self.get_rdr_mut(); - - // let mut info = match dir { - // Direction::Out => net.parse_outbound(&mut rdr)?, - // Direction::In => net.parse_inbound(&mut rdr)?, - // }; - - // let (pkt_offset, mut seg_index, mut seg_offset, end_of_seg) = - // rdr.finish(); - - // // If we finished on the end of a segment, and there are more - // // segments to go, then bump the segment index and reset the - // // segment offset to properly indicate the start of the body. - // if end_of_seg && ((seg_index + 1) < self.segs.len()) { - // seg_index += 1; - // seg_offset = 0; - // } - - // assert!( - // self.state.len >= pkt_offset, - // "{} >= {}", - // self.state.len, - // pkt_offset, - // ); - - // let ulp_hdr_len = info.meta.inner.ulp.map(|u| u.hdr_len()).unwrap_or(0); - // let body_len = match info.meta.inner.ip { - // // If we have IP and ULP metadata, we can use those to compute - // // the payload length. - // // If there's no ULP, just return the L3 payload length. - // Some(IpMeta::Ip4(ip4)) => { - // // Total length here refers to the n_bytes in this packet, - // // so we won't get bogus overly long values in case of - // // fragmentation. - // let expected = ip4.hdr_len() + ulp_hdr_len; - - // usize::from(ip4.total_len).checked_sub(expected).ok_or( - // ParseError::BadInnerIpLen { - // expected, - // actual: usize::from(ip4.total_len), - // }, - // )? - // } - // Some(IpMeta::Ip6(ip6)) => usize::from(ip6.pay_len) - // .checked_sub(ulp_hdr_len) - // .ok_or(ParseError::BadInnerIpLen { - // expected: ulp_hdr_len, - // actual: usize::from(ip6.pay_len), - // })?, - - // // If there's no IP metadata, we fallback to considering any - // // remaining bytes in the packet buffer to be the body. - // None => self.state.len - pkt_offset, - // }; - // let mut body = - // BodyInfo { pkt_offset, seg_index, seg_offset, len: body_len }; - // let flow = InnerFlowId::from(&info.meta); - - // // Packet processing logic requires all headers to be in the leading - // // segment. Detect if this is not the case and squash segments - // // containing headers into one segment. This value represents the - // // inclusive upper bound of the squash. - // let squash_to = match (body.seg_index, body.seg_offset) { - // // The body is in the first segment meaning all headers are also in - // // the first segment. No squashing needed. - // (0, _) => 0, - - // // The body starts at a zero offset in segment n. This means we need - // // to squash all segments prior to n. - // (n, 0) => n - 1, - - // // The body starts at a non-zero offset in segment n. This means we - // // need to squash all segments up to and including n. - // (n, _) => n, - // }; - - // // If the squash bound is zero, there is nothing left to do here, just - // // return. - // if squash_to == 0 { - // return Ok(Packet { - // avail: self.avail, - // // The new packet is taking ownership of the segments. - // segs: core::mem::take(&mut self.segs), - // state: Parsed { - // len: self.state.len, - // hdr_offsets: info.offsets, - // meta: info.meta, - // flow, - // body_csum: info.body_csum, - // body, - // body_modified: false, - // }, - // }); - // } - - // // Calculate the body offset within the new squashed segment - // if body.seg_offset != 0 { - // for s in &self.segs[..squash_to] { - // body.seg_offset += s.len; - // } - // } - // body.seg_index -= squash_to; - - // // Determine how big the message block for the squashed segment needs to - // // be. - // let mut new_seg_size = 0; - // for s in &self.segs[..squash_to + 1] { - // new_seg_size += s.len; - // } - - // let extra_space = info.extra_hdr_space.unwrap_or(0); - // let mp = allocb(new_seg_size + extra_space); - // unsafe { - // (*mp).b_wptr = (*mp).b_wptr.add(extra_space); - // (*mp).b_rptr = (*mp).b_rptr.add(extra_space); - // for s in &self.segs[..squash_to + 1] { - // core::ptr::copy_nonoverlapping( - // (*s.mp).b_rptr, - // (*mp).b_wptr, - // s.len, - // ); - // (*mp).b_wptr = (*mp).b_wptr.add(s.len); - // } - // } - - // // Construct a new segment vector, tacking on any remaining segments - // // after the header segments. - // let orig_segs = core::mem::take(&mut self.segs); - // let mut segs = vec![unsafe { PacketSeg::wrap_mblk(mp) }]; - // if squash_to + 1 < orig_segs.len() { - // segs[0].link(&orig_segs[squash_to + 1]); - // segs.extend_from_slice(&orig_segs[squash_to + 1..]); - // } - // #[cfg(any(feature = "std", test))] - // for s in &orig_segs[..squash_to + 1] { - // mock_freeb(s.mp); - // } - - // let mut off = 0; - // for header_offsets in [ - // info.offsets.outer.ether.as_mut(), - // info.offsets.outer.ip.as_mut(), - // info.offsets.outer.encap.as_mut(), - // Some(&mut info.offsets.inner.ether), - // info.offsets.inner.ip.as_mut(), - // info.offsets.inner.ulp.as_mut(), - // ] - // .into_iter() - // .flatten() - // { - // header_offsets.pkt_pos = off; - // header_offsets.seg_idx = 0; - // header_offsets.seg_pos = off; - // off += header_offsets.hdr_len; - // } - - // Ok(Packet { - // avail: self.avail, - // segs, - // state: Parsed { - // len: self.state.len, - // hdr_offsets: info.offsets, - // meta: info.meta, - // flow, - // body_csum: info.body_csum, - // body, - // body_modified: false, - // }, - // }) - // } - pub fn seg0_wtr(&mut self) -> PacketSegWriter { self.segs[0].get_writer() } @@ -1446,220 +934,10 @@ impl Packet { Some(body_segs) } - /// Compute ULP and IP header checksum from scratch. - /// - /// This should really only be used for testing. - pub fn compute_checksums(&mut self) { - if let Some(ulp_off) = self.state.hdr_offsets.inner.ulp { - let mut body_rdr = self.get_body_rdr(); - let mut csum = Checksum::from(0u32); - loop { - let len = body_rdr.seg_left(); - match body_rdr.slice(len) { - Ok(seg_bytes) => csum.add_bytes(seg_bytes), - _ => break, - } - } - - self.state.body_csum = Some(csum); - - // Unwrap: Can't have a ULP without an IP. - let ip = self.meta().inner.ip.unwrap(); - // Add pseudo header checksum. - let pseudo_csum = ip.pseudo_csum(); - csum += pseudo_csum; - // All headers must reside in the first segment. - let seg0_bytes = self.segs[0].slice_mut(); - // Determine ULP slice and add its bytes to the - // checksum. - let ulp_start = ulp_off.seg_pos; - let ulp_end = ulp_start + ulp_off.hdr_len; - let ulp = &mut seg0_bytes[ulp_start..ulp_end]; - - match self.state.meta.inner.ulp.as_mut().unwrap() { - UlpMeta::Icmpv4(icmp) => { - Self::update_icmp_csum( - icmp, - self.state.body_csum.unwrap(), - ulp, - ); - } - - UlpMeta::Icmpv6(icmp) => { - Self::update_icmp_csum(icmp, csum, ulp); - } - - UlpMeta::Tcp(tcp) => { - Self::update_tcp_csum(tcp, csum, ulp); - } - - UlpMeta::Udp(udp) => { - Self::update_udp_csum(udp, csum, ulp); - } - } - } - - // Compute and fill in the IPv4 header checksum. - if let Some(IpMeta::Ip4(ip)) = self.state.meta.inner.ip.as_mut() { - let ip_off = self.state.hdr_offsets.inner.ip.unwrap(); - let all_hdr_bytes = self.segs[0].slice_mut(); - let ip_start = ip_off.seg_pos; - let ip_end = ip_start + ip_off.hdr_len; - let csum = HeaderChecksum::from(Checksum::compute( - &all_hdr_bytes[ip_start..ip_end], - )) - .bytes(); - - // Update the metadata. - ip.csum = csum; - - // Update the header bytes. - let csum_begin = ip_start + Ipv4Hdr::CSUM_BEGIN; - let csum_end = ip_start + Ipv4Hdr::CSUM_END; - all_hdr_bytes[csum_begin..csum_end].copy_from_slice(&csum[..]); - } - } - - fn update_icmp_csum( - icmp: &mut IcmpMeta, - mut csum: Checksum, - ulp: &mut [u8], - ) { - let csum_start = IcmpHdr::CSUM_BEGIN_OFFSET; - let csum_end = IcmpHdr::CSUM_END_OFFSET; - - // First we must zero the existing checksum. - ulp[csum_start..csum_end].copy_from_slice(&[0; 2]); - // Then we can add the ULP header bytes to the checksum. - csum.add_bytes(ulp); - // Convert the checksum to its final form. - let ulp_csum = HeaderChecksum::from(csum).bytes(); - // Update the ICMP(v6) metadata. - icmp.csum = ulp_csum; - // Update the ICMP(v6) header bytes. - ulp[csum_start..csum_end].copy_from_slice(&ulp_csum); - } - - fn update_tcp_csum(tcp: &mut TcpMeta, mut csum: Checksum, ulp: &mut [u8]) { - let csum_start = TcpHdr::CSUM_BEGIN_OFFSET; - let csum_end = TcpHdr::CSUM_END_OFFSET; - - // First we must zero the existing checksum. - ulp[csum_start..csum_end].copy_from_slice(&[0; 2]); - // Then we can add the ULP header bytes to the checksum. - csum.add_bytes(ulp); - // Convert the checksum to its final form. - let ulp_csum = HeaderChecksum::from(csum).bytes(); - // Update the TCP metadata. - tcp.csum = ulp_csum; - // Update the TCP header bytes. - ulp[csum_start..csum_end].copy_from_slice(&ulp_csum); - } - - fn update_udp_csum(udp: &mut UdpMeta, mut csum: Checksum, ulp: &mut [u8]) { - let csum_start = UdpHdr::CSUM_BEGIN_OFFSET; - let csum_end = UdpHdr::CSUM_END_OFFSET; - - // First we must zero the existing checksum. - ulp[csum_start..csum_end].copy_from_slice(&[0; 2]); - // Then we can add the ULP header bytes to the checksum. - csum.add_bytes(ulp); - // Convert the checksum to its final form. - let ulp_csum = HeaderChecksum::from(csum).bytes(); - // Update the UDP metadata. - udp.csum = ulp_csum; - // Update the UDP header bytes. - ulp[csum_start..csum_end].copy_from_slice(&ulp_csum); - } - - /// Perform an incremental checksum update for the ULP checksums - /// based on the stored body checksum. - /// - /// This avoids duplicating work already done by the client in the - /// case where checksums are **not** being offloaded to the hardware. - fn update_checksums(&mut self, update_ip: bool, update_ulp: bool) { - // If a ULP exists, then compute and set its checksum. - if let (true, Some(ulp_off)) = - (update_ulp, self.state.hdr_offsets.inner.ulp) - { - // Start by reusing the known checksum of the body. - let mut csum = self.state.body_csum.unwrap(); - // Unwrap: Can't have a ULP without an IP. - let ip = self.meta().inner.ip.unwrap(); - // Add pseudo header checksum. - let pseudo_csum = ip.pseudo_csum(); - csum += pseudo_csum; - // All headers must reside in the first segment. - let all_hdr_bytes = self.segs[0].slice_mut(); - // Determine ULP slice and add its bytes to the - // checksum. - let ulp_start = ulp_off.seg_pos; - let ulp_end = ulp_start + ulp_off.hdr_len; - let ulp = &mut all_hdr_bytes[ulp_start..ulp_end]; - - match self.state.meta.inner.ulp.as_mut().unwrap() { - UlpMeta::Icmpv4(icmp) => { - Self::update_icmp_csum( - icmp, - // ICMP4 requires the body_csum *without* - // the pseudoheader added back in. - self.state.body_csum.unwrap(), - ulp, - ); - } - - UlpMeta::Icmpv6(icmp) => { - Self::update_icmp_csum(icmp, csum, ulp); - } - - UlpMeta::Tcp(tcp) => { - Self::update_tcp_csum(tcp, csum, ulp); - } - - UlpMeta::Udp(udp) => { - Self::update_udp_csum(udp, csum, ulp); - } - } - } - - // Compute and fill in the IPv4 header checksum. - if let (true, Some(IpMeta::Ip4(ip))) = - (update_ip, self.state.meta.inner.ip.as_mut()) - { - let ip_off = self.state.hdr_offsets.inner.ip.unwrap(); - let all_hdr_bytes = self.segs[0].slice_mut(); - let ip_start = ip_off.seg_pos; - let ip_end = ip_start + ip_off.hdr_len; - let ip_bytes = &mut all_hdr_bytes[ip_start..ip_end]; - let csum_start = Ipv4Hdr::CSUM_BEGIN; - let csum_end = Ipv4Hdr::CSUM_END; - ip_bytes[csum_start..csum_end].copy_from_slice(&[0; 2]); - let csum = - HeaderChecksum::from(Checksum::compute(ip_bytes)).bytes(); - - // Update the metadata. - ip.csum = csum; - - // Update the header bytes. - ip_bytes[csum_start..csum_end].copy_from_slice(&csum[..]); - } - } - pub fn hdr_offsets(&self) -> HeaderOffsets { self.state.hdr_offsets.clone() } - /// Run the [`HdrTransform`] against this packet. - // #[inline] - // pub fn hdr_transform( - // &mut self, - // xform: &HdrTransform, - // ) -> Result<(), HdrTransformError> { - // xform.run(&mut self.state.meta)?; - // self.state.flow = InnerFlowId::from(&self.state.meta); - // Ok(()) - // } - /// Return a reference to the flow ID of this packet. #[inline] pub fn flow(&self) -> &InnerFlowId { @@ -1687,17 +965,9 @@ impl Packet { #[inline] pub fn is_tcp(&self) -> bool { - self.state.meta.inner.is_tcp() - } - - #[inline] - pub fn meta(&self) -> &PacketMeta { - &self.state.meta - } - - #[inline] - pub fn meta_mut(&mut self) -> &mut PacketMeta { - &mut self.state.meta + // self.state.meta.inner.is_tcp() + // TODO: about to gut anyhow. + false } /// Return the mblk pointer value as a formatted String. This is @@ -1811,286 +1081,6 @@ impl Packet { // start after the new headers. body.pkt_offset = new_hdr_len; } - - /// Emit the new headers to the [`Packet`] based on its current - /// metadata. - pub fn emit_new_headers(&mut self) -> Result<(), WriteError> { - // At this point the packet metadata represents the - // transformations made by the pipeline. We take the following - // steps to emit the new headers and update the packet data. - // - // 1. Figure out length required to emit the new headers. - // - // 2. Determine if this length can be met by the current first - // segment. If not, allocate a new segment to prepend to - // the xlist. - // - // 3. Emit the new header bytes based on the current metadata. - // - // 4. Update the headers offsets, body info, and checksums. - let innerm = &self.state.meta.inner; - - // Flag to indicate if an IP header/ULP checksums were - // provided. If the checksum is zero, it's assumed heardware - // checksum offload is being used, and OPTE should not update - // the checksum. - let inner_ip_csum = innerm.has_ip_csum(); - let inner_ulp_csum = innerm.has_ulp_csum(); - - // The length of the new headers. - let new_hdr_len = self.state.meta.hdr_len(); - // The total length of the new packet, including headers and - // body. This is used to determine the offset/length values of - // the new headers. - let new_pkt_len = new_hdr_len + self.state.body.len; - - // Given the new header length requirement, determine if it - // can be met with the current segment buffers, or if a new - // segment must be allocated and tacked onto the front of the - // segment list. - // - // Upon returning from this function the header offsets are no - // longer correct. New offsets are calculated as part of - // emitting the new headers below. - // - // The body offset **is** updated as part of this function, - // and is correct upon return. - Self::hdr_seg(&mut self.segs, new_hdr_len, &mut self.state.body); - let mut wtr = self.segs[0].get_writer(); - let new_offsets = Self::emit_headers( - &mut wtr, - &mut self.state.meta.outer, - &mut self.state.meta.inner, - new_pkt_len, - )?; - - // Update the header offsets. - self.state.hdr_offsets = new_offsets; - self.avail = self.segs.iter().map(|s| s.avail).sum(); - self.state.len = self.segs.iter().map(|s| s.len).sum(); - - // Update the ULP and IP header checksums. - self.update_checksums(inner_ip_csum, inner_ulp_csum); - Ok(()) - } - - fn emit_outer_headers( - wtr: &mut PacketSegWriter, - meta: &mut OuterMeta, - new_pkt_len: usize, - ) -> Result<(usize, OuterHeaderOffsets), WriteError> { - let mut offsets = OuterHeaderOffsets::default(); - let mut pkt_offset = 0; - - match &meta.ether { - Some(ether) => { - ether.emit(wtr.slice_mut(EtherHdr::SIZE)?); - offsets.ether = Some(HdrOffset { - pkt_pos: pkt_offset, - seg_idx: 0, - seg_pos: pkt_offset, - hdr_len: EtherHdr::SIZE, - }); - pkt_offset += EtherHdr::SIZE; - } - - // If there is no outer Ethernet, then there can be no - // outer headers at all. - None => return Ok((pkt_offset, offsets)), - } - - match meta.ip.as_mut() { - Some(IpMeta::Ip4(ip4)) => { - ip4.total_len = (new_pkt_len - pkt_offset) as u16; - ip4.emit(wtr.slice_mut(ip4.hdr_len())?); - offsets.ip = Some(HdrOffset { - pkt_pos: pkt_offset, - seg_idx: 0, - seg_pos: pkt_offset, - hdr_len: ip4.hdr_len(), - }); - pkt_offset += ip4.hdr_len(); - } - - Some(IpMeta::Ip6(ip6)) => { - // IPv6 Payload Length field is defined in RFC 2640 section 3 - // as: - // - // > Length of the IPv6 payload, i.e., the rest of the packet - // > following this IPv6 header, in octets. (Note that any - // > extension headers [section 4] present are considered part - // > of the payload, i.e., included in the length count.) - // - // So we need to remove the size of the fixed header (40 - // octets), which is included in the total new packet length, - // when setting the payload length. - ip6.pay_len = - (new_pkt_len - pkt_offset - Ipv6Hdr::BASE_SIZE) as u16; - ip6.emit(wtr.slice_mut(ip6.hdr_len())?); - offsets.ip = Some(HdrOffset { - pkt_pos: pkt_offset, - seg_idx: 0, - seg_pos: pkt_offset, - hdr_len: ip6.hdr_len(), - }); - pkt_offset += ip6.hdr_len(); - } - - None => return Ok((pkt_offset, offsets)), - } - - match meta.encap.as_mut() { - Some(EncapMeta::Geneve(geneve)) => { - geneve.emit( - (new_pkt_len - pkt_offset) as u16, - wtr.slice_mut(geneve.hdr_len())?, - ); - // geneve.emit(wtr.slice_mut(geneve.hdr_len())?); - offsets.ip = Some(HdrOffset { - pkt_pos: pkt_offset, - seg_idx: 0, - seg_pos: pkt_offset, - hdr_len: geneve.hdr_len(), - }); - pkt_offset += geneve.hdr_len(); - } - - None => return Ok((pkt_offset, offsets)), - } - - Ok((pkt_offset, offsets)) - } - - fn emit_inner_headers( - wtr: &mut PacketSegWriter, - meta: &mut InnerMeta, - mut pkt_offset: usize, - new_pkt_len: usize, - ) -> Result { - let mut offsets = InnerHeaderOffsets::default(); - - // ================================================================ - // Ether - // ================================================================ - meta.ether.emit(wtr.slice_mut(EtherHdr::SIZE)?); - offsets.ether = HdrOffset { - pkt_pos: pkt_offset, - seg_idx: 0, - seg_pos: pkt_offset, - hdr_len: EtherHdr::SIZE, - }; - pkt_offset += EtherHdr::SIZE; - - // ================================================================ - // IP - // ================================================================ - match meta.ip.as_mut() { - Some(IpMeta::Ip4(ip4)) => { - ip4.total_len = (new_pkt_len - pkt_offset) as u16; - ip4.emit(wtr.slice_mut(ip4.hdr_len())?); - offsets.ip = Some(HdrOffset { - pkt_pos: pkt_offset, - seg_idx: 0, - seg_pos: pkt_offset, - hdr_len: ip4.hdr_len(), - }); - pkt_offset += ip4.hdr_len(); - } - - Some(IpMeta::Ip6(ip6)) => { - // IPv6 Payload Length field is defined in RFC 2640 section 3 - // as: - // - // > Length of the IPv6 payload, i.e., the rest of the packet - // > following this IPv6 header, in octets. (Note that any - // > extension headers [section 4] present are considered part - // > of the payload, i.e., included in the length count.) - // - // So we need to remove the size of the fixed header (40 - // octets), which is included in the total new packet length, - // when setting the payload length. - ip6.pay_len = - (new_pkt_len - pkt_offset - Ipv6Hdr::BASE_SIZE) as u16; - ip6.emit(wtr.slice_mut(ip6.hdr_len())?); - offsets.ip = Some(HdrOffset { - pkt_pos: pkt_offset, - seg_idx: 0, - seg_pos: pkt_offset, - hdr_len: ip6.hdr_len(), - }); - pkt_offset += ip6.hdr_len(); - } - - None => return Ok(offsets), - } - - // ================================================================ - // ULP - // ================================================================ - match meta.ulp.as_mut() { - Some(UlpMeta::Icmpv4(icmp)) => { - icmp.emit(wtr.slice_mut(icmp.hdr_len())?); - offsets.ulp = Some(HdrOffset { - pkt_pos: pkt_offset, - seg_idx: 0, - seg_pos: pkt_offset, - hdr_len: icmp.hdr_len(), - }); - } - - Some(UlpMeta::Icmpv6(icmp6)) => { - icmp6.emit(wtr.slice_mut(icmp6.hdr_len())?); - offsets.ulp = Some(HdrOffset { - pkt_pos: pkt_offset, - seg_idx: 0, - seg_pos: pkt_offset, - hdr_len: icmp6.hdr_len(), - }); - } - - Some(UlpMeta::Udp(udp)) => { - udp.len = (new_pkt_len - pkt_offset) as u16; - udp.emit(wtr.slice_mut(udp.hdr_len())?); - offsets.ulp = Some(HdrOffset { - pkt_pos: pkt_offset, - seg_idx: 0, - seg_pos: pkt_offset, - hdr_len: udp.hdr_len(), - }); - } - - Some(UlpMeta::Tcp(tcp)) => { - tcp.emit(wtr.slice_mut(tcp.hdr_len())?); - offsets.ulp = Some(HdrOffset { - pkt_pos: pkt_offset, - seg_idx: 0, - seg_pos: pkt_offset, - hdr_len: tcp.hdr_len(), - }); - } - - None => return Ok(offsets), - } - - Ok(offsets) - } - - /// Emit header bytes to the given writer based on the passed-in - /// metadata. - fn emit_headers( - wtr: &mut PacketSegWriter<'_>, - outer_meta: &mut OuterMeta, - inner_meta: &mut InnerMeta, - new_pkt_len: usize, - ) -> Result { - let (pkt_offset, outer_offsets) = - Self::emit_outer_headers(wtr, outer_meta, new_pkt_len)?; - - let inner_offsets = - Self::emit_inner_headers(wtr, inner_meta, pkt_offset, new_pkt_len)?; - - Ok(HeaderOffsets { outer: outer_offsets, inner: inner_offsets }) - } } impl Packet { @@ -2566,7 +1556,6 @@ impl DError for ingot::types::PacketParseError { pub enum ParseError { // TODO: I think this may be the only err variant? IngotError(ingot::types::PacketParseError), - BadHeader(HeaderReadErr), BadInnerIpLen { expected: usize, actual: usize, @@ -2637,65 +1626,6 @@ impl From for ParseError { } } -impl> From for ParseError { - fn from(value: T) -> Self { - Self::BadHeader(value.into()) - } -} - -#[derive(Clone, Debug, Eq, PartialEq, DError)] -pub enum HeaderReadErr { - EtherHdr(EtherHdrError), - ArpHdr(ArpHdrError), - GeneveHdr(GeneveHdrError), - Ipv4Hdr(Ipv4HdrError), - Ipv6Hdr(Ipv6HdrError), - IcmpHdr(IcmpHdrError), - TcpHdr(TcpHdrError), - UdpHdr(UdpHdrError), -} - -impl From for HeaderReadErr { - fn from(v: EtherHdrError) -> HeaderReadErr { - Self::EtherHdr(v) - } -} -impl From for HeaderReadErr { - fn from(v: ArpHdrError) -> HeaderReadErr { - Self::ArpHdr(v) - } -} -impl From for HeaderReadErr { - fn from(v: GeneveHdrError) -> HeaderReadErr { - Self::GeneveHdr(v) - } -} -impl From for HeaderReadErr { - fn from(v: Ipv4HdrError) -> HeaderReadErr { - Self::Ipv4Hdr(v) - } -} -impl From for HeaderReadErr { - fn from(v: Ipv6HdrError) -> HeaderReadErr { - Self::Ipv6Hdr(v) - } -} -impl From for HeaderReadErr { - fn from(v: IcmpHdrError) -> HeaderReadErr { - Self::IcmpHdr(v) - } -} -impl From for HeaderReadErr { - fn from(v: TcpHdrError) -> HeaderReadErr { - Self::TcpHdr(v) - } -} -impl From for HeaderReadErr { - fn from(v: UdpHdrError) -> HeaderReadErr { - Self::UdpHdr(v) - } -} - #[derive(Clone, Copy, Debug, Eq, PartialEq, DError)] pub enum ReadErr { BadLayout, @@ -2710,51 +1640,9 @@ pub enum ReadErr { pub enum WriteError { BadLayout, EndOfPacket, - EtherHdr(EtherHdrError), - GeneveHdr(GeneveHdrError), - Ipv4Hdr(Ipv4HdrError), - Ipv6Hdr(Ipv6HdrError), NotEnoughBytes { available: usize, needed: usize }, Read(ReadErr), StraddledWrite, - TcpHdr(TcpHdrError), - UdpHdr(UdpHdrError), -} - -impl From for WriteError { - fn from(e: TcpHdrError) -> Self { - Self::TcpHdr(e) - } -} - -impl From for WriteError { - fn from(e: UdpHdrError) -> Self { - Self::UdpHdr(e) - } -} - -impl From for WriteError { - fn from(e: EtherHdrError) -> Self { - Self::EtherHdr(e) - } -} - -impl From for WriteError { - fn from(e: GeneveHdrError) -> Self { - Self::GeneveHdr(e) - } -} - -impl From for WriteError { - fn from(e: Ipv4HdrError) -> Self { - Self::Ipv4Hdr(e) - } -} - -impl From for WriteError { - fn from(e: Ipv6HdrError) -> Self { - Self::Ipv6Hdr(e) - } } impl From for WriteError { diff --git a/lib/opte/src/engine/tcp.rs b/lib/opte/src/engine/tcp.rs index c165b988..9b07fea0 100644 --- a/lib/opte/src/engine/tcp.rs +++ b/lib/opte/src/engine/tcp.rs @@ -13,7 +13,6 @@ use super::headers::HeaderActionError; use super::headers::HeaderActionModify; use super::headers::ModifyAction; use super::headers::PushAction; -use super::headers::RawHeader; use super::headers::UlpMetaModify; use super::packet::PacketReadMut; use super::packet::ReadErr; @@ -54,19 +53,6 @@ pub const KEEPALIVE_EXPIRE_SECS: u64 = 8_000; pub const TIME_WAIT_EXPIRE_TTL: Ttl = Ttl::new_seconds(TIME_WAIT_EXPIRE_SECS); pub const KEEPALIVE_EXPIRE_TTL: Ttl = Ttl::new_seconds(KEEPALIVE_EXPIRE_SECS); -/// The standard TCP flags. We don't bother with the experimental NS -/// flag. -pub mod TcpFlags { - pub const FIN: u8 = crate::bit_on(0); - pub const SYN: u8 = crate::bit_on(1); - pub const RST: u8 = crate::bit_on(2); - pub const PSH: u8 = crate::bit_on(3); - pub const ACK: u8 = crate::bit_on(4); - pub const URG: u8 = crate::bit_on(5); - pub const ECE: u8 = crate::bit_on(6); - pub const CWR: u8 = crate::bit_on(7); -} - // The standard TCP states. // // See Figure 13-8 of TCP/IP Illustrated Vol. 1 Ed. 2 @@ -102,73 +88,6 @@ impl Display for TcpState { } } -#[derive(Clone, Copy, Debug, Default, Eq, Ord, PartialEq, PartialOrd)] -pub struct TcpMeta { - pub src: u16, - pub dst: u16, - pub flags: u8, - pub seq: u32, - pub ack: u32, - pub window_size: u16, - pub csum: [u8; 2], - // Fow now we keep options as raw bytes, allowing up to 40 bytes - // of options. - pub options_bytes: Option<[u8; 40]>, - pub options_len: usize, -} - -impl TcpMeta { - // This assumes the slice is large enough to hold the header. - #[inline] - pub fn emit(&self, dst: &mut [u8]) { - debug_assert_eq!(dst.len(), self.hdr_len()); - let base = &mut dst[0..TcpHdrRaw::SIZE]; - let mut raw = TcpHdrRaw::new_mut(base).unwrap(); - // raw.write_to(TcpHdrRaw::from(self)); - Ref::write(&mut raw, TcpHdrRaw::from(self)); - if let Some(bytes) = self.options_bytes { - dst[TcpHdr::BASE_SIZE..] - .copy_from_slice(&bytes[0..self.options_len]); - } - } - - #[inline] - pub fn has_flag(&self, flag: u8) -> bool { - (self.flags & flag) != 0 - } - - #[inline] - pub fn hdr_len(&self) -> usize { - TcpHdr::BASE_SIZE + self.options_len - } -} - -impl<'a> From<&TcpHdr<'a>> for TcpMeta { - fn from(tcp: &TcpHdr) -> Self { - let (options_bytes, options_len) = match tcp.options_raw() { - None => (None, 0), - Some(src) => { - let mut dst = [0; TcpHdr::MAX_OPTION_SIZE]; - dst[0..src.len()].copy_from_slice(src); - (Some(dst), src.len()) - } - }; - - let raw = &tcp.base; - Self { - src: u16::from_be_bytes(raw.src_port), - dst: u16::from_be_bytes(raw.dst_port), - flags: raw.flags, - seq: u32::from_be_bytes(raw.seq), - ack: u32::from_be_bytes(raw.ack), - window_size: u16::from_be_bytes(raw.window_size), - csum: raw.csum, - options_bytes, - options_len, - } - } -} - #[derive( Clone, Copy, @@ -186,279 +105,12 @@ pub struct TcpPush { pub dst: u16, } -impl PushAction for TcpPush { - fn push(&self) -> TcpMeta { - TcpMeta { src: self.src, dst: self.dst, ..Default::default() } - } -} - #[derive(Clone, Debug, Deserialize, Serialize)] pub struct TcpMod { src: Option, dst: Option, } -impl ModifyAction for TcpMod { - fn modify(&self, meta: &mut TcpMeta) { - if let Some(src) = self.src { - meta.src = src; - } - - if let Some(dst) = self.dst { - meta.dst = dst; - } - } -} - -impl HeaderActionModify for TcpMeta { - fn run_modify( - &mut self, - spec: &UlpMetaModify, - ) -> Result<(), HeaderActionError> { - if spec.generic.src_port.is_some() { - self.src = spec.generic.src_port.unwrap() - } - - if spec.generic.dst_port.is_some() { - self.dst = spec.generic.dst_port.unwrap() - } - - if spec.tcp_flags.is_some() { - self.flags = spec.tcp_flags.unwrap() - } - - Ok(()) - } -} - -#[derive(Debug)] -pub struct TcpHdr<'a> { - base: Ref<&'a mut [u8], TcpHdrRaw>, - options: Option<&'a mut [u8]>, -} - -impl<'a> TcpHdr<'a> { - pub const BASE_SIZE: usize = TcpHdrRaw::SIZE; - pub const CSUM_BEGIN_OFFSET: usize = 16; - pub const CSUM_END_OFFSET: usize = 18; - - /// The maximum size of a TCP header. - /// - /// The header length is derived from the data offset field. - /// Given it is a 4-bit field and specifies the size in 32-bit words, - /// the maximum header size is therefore (2^4 - 1) * 4 = 60 bytes. - pub const MAX_SIZE: usize = 60; - - /// The maximum size of any TCP options in a TCP header. - pub const MAX_OPTION_SIZE: usize = Self::MAX_SIZE - Self::BASE_SIZE; - - /// Return the acknowledgement number. - pub fn ack(&self) -> u32 { - u32::from_be_bytes(self.base.ack) - } - - pub fn csum(&self) -> [u8; 2] { - self.base.csum - } - - pub fn base_bytes(&self) -> &[u8] { - self.base.as_bytes() - } - - pub fn options_bytes(&self) -> Option<&[u8]> { - match &self.options { - None => None, - Some(options) => Some(*options), - } - } - - /// Return the checksum value minus header TCP header bytes, - /// producing the checksum value of the body. - pub fn csum_minus_hdr(&self) -> Option { - // There was no checksum to begin with. - if self.base.csum == [0; 2] { - return None; - } - - let mut csum = Checksum::from(HeaderChecksum::wrap(self.base.csum)); - // When a checksum is calculated you treat the checksum field - // bytes themselves as zero; therefore its imperative we do - // not include the checksum field bytes when subtracting from - // the checksum value. - csum.sub_bytes(&self.base.as_bytes()[0..Self::CSUM_BEGIN_OFFSET]); - csum.sub_bytes(&self.base.as_bytes()[Self::CSUM_END_OFFSET..]); - - if let Some(options) = self.options.as_ref() { - csum.sub_bytes(options); - } - Some(csum) - } - - /// Return destination port. - pub fn dst_port(&self) -> u16 { - u16::from_be_bytes(self.base.dst_port) - } - - /// Return the TCP flags. - pub fn flags(&self) -> u8 { - self.base.flags - } - - /// Return the leangth of the TCP header, in bytes. - /// - /// This length includes the TCP options. - pub fn hdr_len(&self) -> usize { - usize::from(self.base.offset()) * 4 - } - - /// Return a reference to the options data. - pub fn options_raw(&self) -> Option<&[u8]> { - match &self.options { - None => None, - Some(options) => Some(*options), - } - } - - pub fn parse<'b>( - rdr: &'b mut impl PacketReadMut<'a>, - ) -> Result { - let src = rdr.slice_mut(TcpHdrRaw::SIZE)?; - let mut hdr = Self { base: TcpHdrRaw::new_mut(src)?, options: None }; - - if hdr.src_port() == DYNAMIC_PORT { - return Err(TcpHdrError::BadSrcPort { src_port: hdr.src_port() }); - } - - if hdr.dst_port() == DYNAMIC_PORT { - return Err(TcpHdrError::BadDstPort { dst_port: hdr.dst_port() }); - } - - let hdr_len = hdr.hdr_len(); - - if hdr_len < Self::BASE_SIZE { - return Err(TcpHdrError::TruncatedHdr { - hdr_len_bytes: hdr.hdr_len(), - }); - } - - if hdr_len > Self::BASE_SIZE { - let opts_len = hdr.hdr_len() - Self::BASE_SIZE; - match rdr.slice_mut(opts_len) { - Ok(opts) => hdr.options = Some(opts), - Err(e) => { - return Err(TcpHdrError::TruncatedOptions(e)); - } - } - } - - Ok(hdr) - } - - /// Return the sequence number. - pub fn seq(&self) -> u32 { - u32::from_be_bytes(self.base.seq) - } - - /// Set the checksum value. - pub fn set_csum(&mut self, csum: [u8; 2]) { - self.base.csum = csum - } - - /// Return the source port. - pub fn src_port(&self) -> u16 { - u16::from_be_bytes(self.base.src_port) - } - - /// Return the window size value. - pub fn window_size(&self) -> u16 { - u16::from_be_bytes(self.base.window_size) - } -} - -#[derive(Clone, Copy, Debug, Eq, PartialEq, DError)] -#[derror(leaf_data = TcpHdrError::derror_data)] -pub enum TcpHdrError { - BadDstPort { dst_port: u16 }, - BadOffset { offset: u8, len_in_bytes: u8 }, - BadSrcPort { src_port: u16 }, - ReadError(ReadErr), - Straddled, - TruncatedHdr { hdr_len_bytes: usize }, - TruncatedOptions(ReadErr), -} - -impl TcpHdrError { - fn derror_data(&self, data: &mut [u64]) { - [data[0], data[1]] = match self { - Self::BadDstPort { dst_port } => [*dst_port as u64, 0], - Self::BadOffset { offset, len_in_bytes } => { - [*offset as u64, *len_in_bytes as u64] - } - Self::BadSrcPort { src_port } => [*src_port as u64, 0], - Self::TruncatedHdr { hdr_len_bytes } => [*hdr_len_bytes as u64, 0], - _ => [0, 0], - } - } -} - -impl From for TcpHdrError { - fn from(error: ReadErr) -> Self { - TcpHdrError::ReadError(error) - } -} - -/// Note: For now we keep this unaligned to be safe. -#[repr(C)] -#[derive( - Clone, Debug, FromBytes, IntoBytes, Unaligned, Immutable, KnownLayout, -)] -pub struct TcpHdrRaw { - pub src_port: [u8; 2], - pub dst_port: [u8; 2], - pub seq: [u8; 4], - pub ack: [u8; 4], - pub offset: u8, - pub flags: u8, - pub window_size: [u8; 2], - pub csum: [u8; 2], - pub urg: [u8; 2], -} - -impl TcpHdrRaw { - fn offset(&self) -> u8 { - (self.offset & TCP_HDR_OFFSET_MASK) >> TCP_HDR_OFFSET_SHIFT - } -} - -impl<'a> RawHeader<'a> for TcpHdrRaw { - #[inline] - fn new_mut(src: &mut [u8]) -> Result, ReadErr> { - debug_assert_eq!(src.len(), Self::SIZE); - let hdr = match Ref::from_bytes(src).ok() { - Some(hdr) => hdr, - None => return Err(ReadErr::BadLayout), - }; - Ok(hdr) - } -} - -impl From<&TcpMeta> for TcpHdrRaw { - #[inline] - fn from(meta: &TcpMeta) -> Self { - Self { - src_port: meta.src.to_be_bytes(), - dst_port: meta.dst.to_be_bytes(), - seq: meta.seq.to_be_bytes(), - ack: meta.ack.to_be_bytes(), - offset: ((meta.hdr_len() as u8 / 4) & 0x0F) << 4, - flags: meta.flags, - window_size: meta.window_size.to_be_bytes(), - csum: meta.csum, - urg: [0; 2], - } - } -} - #[cfg(test)] mod test { use super::*; diff --git a/lib/opte/src/engine/udp.rs b/lib/opte/src/engine/udp.rs index fe062f51..338839d0 100644 --- a/lib/opte/src/engine/udp.rs +++ b/lib/opte/src/engine/udp.rs @@ -12,7 +12,6 @@ use crate::engine::checksum::HeaderChecksum; use crate::engine::headers::HeaderActionModify; use crate::engine::headers::ModifyAction; use crate::engine::headers::PushAction; -use crate::engine::headers::RawHeader; use crate::engine::headers::UlpMetaModify; use crate::engine::packet::PacketReadMut; use crate::engine::packet::ReadErr; @@ -29,41 +28,6 @@ use zerocopy::Unaligned; use super::headers::HeaderActionError; -#[derive(Clone, Copy, Debug, Default, Eq, Ord, PartialEq, PartialOrd)] -pub struct UdpMeta { - pub src: u16, - pub dst: u16, - pub len: u16, - pub csum: [u8; 2], -} - -impl UdpMeta { - // This assumes the dst is large enough. - #[inline] - pub fn emit(&self, dst: &mut [u8]) { - debug_assert!(dst.len() >= UdpHdr::SIZE); - dst[0..2].copy_from_slice(&self.src.to_be_bytes()); - dst[2..4].copy_from_slice(&self.dst.to_be_bytes()); - dst[4..6].copy_from_slice(&self.len.to_be_bytes()); - dst[6..8].copy_from_slice(&self.csum); - } - - pub fn hdr_len(&self) -> usize { - UdpHdr::SIZE - } -} - -impl<'a> From<&UdpHdr<'a>> for UdpMeta { - fn from(udp: &UdpHdr) -> Self { - UdpMeta { - src: udp.src_port(), - dst: udp.dst_port(), - len: udp.len(), - csum: udp.csum_bytes(), - } - } -} - #[derive( Clone, Copy, @@ -81,186 +45,12 @@ pub struct UdpPush { pub dst: u16, } -impl PushAction for UdpPush { - fn push(&self) -> UdpMeta { - UdpMeta { src: self.src, dst: self.dst, ..Default::default() } - } -} - #[derive(Clone, Debug, Deserialize, Serialize)] pub struct UdpMod { src: Option, dst: Option, } -impl ModifyAction for UdpMod { - fn modify(&self, meta: &mut UdpMeta) { - if let Some(src) = self.src { - meta.src = src; - } - - if let Some(dst) = self.dst { - meta.dst = dst; - } - } -} - -impl HeaderActionModify for UdpMeta { - fn run_modify( - &mut self, - spec: &UlpMetaModify, - ) -> Result<(), HeaderActionError> { - if spec.generic.src_port.is_some() { - self.src = spec.generic.src_port.unwrap() - } - - if spec.generic.dst_port.is_some() { - self.dst = spec.generic.dst_port.unwrap() - } - - Ok(()) - } -} - -#[derive(Debug)] -pub struct UdpHdr<'a> { - base: Ref<&'a mut [u8], UdpHdrRaw>, -} - -impl<'a> UdpHdr<'a> { - pub const SIZE: usize = UdpHdrRaw::SIZE; - pub const CSUM_BEGIN_OFFSET: usize = 6; - pub const CSUM_END_OFFSET: usize = 8; - - pub fn bytes(&self) -> &[u8] { - self.base.as_bytes() - } - - pub fn csum_bytes(&self) -> [u8; 2] { - self.base.csum - } - - pub fn csum_minus_hdr(&self) -> Option { - if self.base.csum != [0; 2] { - let mut csum = Checksum::from(HeaderChecksum::wrap(self.base.csum)); - csum.sub_bytes(&self.base.as_bytes()[0..Self::CSUM_BEGIN_OFFSET]); - Some(csum) - } else { - None - } - } - - pub fn dst_port(&self) -> u16 { - u16::from_be_bytes(self.base.dst_port) - } - - /// Return the header length, in bytes. - pub fn hdr_len(&self) -> usize { - Self::SIZE - } - - pub fn parse<'b>( - rdr: &'b mut impl PacketReadMut<'a>, - ) -> Result { - let src = rdr.slice_mut(UdpHdrRaw::SIZE)?; - let udp = Self { base: UdpHdrRaw::new_mut(src)? }; - - let src_port = udp.src_port(); - if src_port == DYNAMIC_PORT { - return Err(UdpHdrError::BadSrcPort { src_port }); - } - - let dst_port = udp.dst_port(); - if dst_port == DYNAMIC_PORT { - return Err(UdpHdrError::BadDstPort { dst_port }); - } - - let length = udp.len(); - if length < Self::SIZE as u16 { - return Err(UdpHdrError::BadLength { length }); - } - - Ok(udp) - } - - pub fn set_csum(&mut self, csum: [u8; 2]) { - self.base.csum = csum; - } - - pub fn len(&self) -> u16 { - u16::from_be_bytes(self.base.length) - } - - /// Set the length, in bytes. - /// - /// The UDP length field includes both header and payload. - pub fn set_len(&mut self, len: u16) { - self.base.length = len.to_be_bytes(); - } - - pub fn set_pay_len(&mut self, len: u16) { - self.base.length = (Self::SIZE as u16 + len).to_be_bytes(); - } - - pub fn src_port(&self) -> u16 { - u16::from_be_bytes(self.base.src_port) - } -} - -#[derive(Clone, Copy, Debug, Eq, PartialEq, DError)] -#[derror(leaf_data = UdpHdrError::derror_data)] -pub enum UdpHdrError { - BadDstPort { dst_port: u16 }, - BadLength { length: u16 }, - BadSrcPort { src_port: u16 }, - ReadError(ReadErr), -} - -impl UdpHdrError { - fn derror_data(&self, data: &mut [u64]) { - data[0] = match self { - Self::BadDstPort { dst_port } => *dst_port as u64, - Self::BadLength { length } => *length as u64, - Self::BadSrcPort { src_port } => *src_port as u64, - _ => 0, - } - } -} - -impl From for UdpHdrError { - fn from(error: ReadErr) -> Self { - UdpHdrError::ReadError(error) - } -} - -/// Note: For now we keep this unaligned to be safe. -#[repr(C)] -#[derive( - Clone, Debug, FromBytes, IntoBytes, Unaligned, Immutable, KnownLayout, -)] -pub struct UdpHdrRaw { - pub src_port: [u8; 2], - pub dst_port: [u8; 2], - pub length: [u8; 2], - pub csum: [u8; 2], -} - -impl UdpHdrRaw { - pub const SIZE: usize = mem::size_of::(); -} - -impl<'a> RawHeader<'a> for UdpHdrRaw { - #[inline] - fn new_mut(src: &mut [u8]) -> Result, ReadErr> { - debug_assert_eq!(src.len(), Self::SIZE); - let hdr = match Ref::from_bytes(src).ok() { - Some(hdr) => hdr, - None => return Err(ReadErr::BadLayout), - }; - Ok(hdr) - } -} - #[cfg(test)] mod test { use super::*; diff --git a/lib/oxide-vpc/src/engine/mod.rs b/lib/oxide-vpc/src/engine/mod.rs index fe0806a6..2783f88a 100644 --- a/lib/oxide-vpc/src/engine/mod.rs +++ b/lib/oxide-vpc/src/engine/mod.rs @@ -13,13 +13,22 @@ pub mod print; pub mod router; use crate::cfg::VpcCfg; +use opte::engine::arp; +use opte::engine::arp::ArpEthIpv4; +use opte::engine::arp::ArpEthIpv4Ref; +use opte::engine::arp::ArpOp; +use opte::engine::arp::ValidArpEthIpv4; +use opte::engine::arp::ARP_HTYPE_ETHERNET; +use opte::engine::ether::ETHER_TYPE_IPV4; use opte::engine::flow_table::FlowTable; +use opte::engine::ingot_base::EthernetRef; use opte::engine::ingot_packet::MsgBlk; use opte::engine::ingot_packet::OpteParsed2; use opte::engine::ingot_packet::Packet2; use opte::engine::ingot_packet::Parsed2; use opte::engine::ingot_packet::ValidGeneveOverV6; use opte::engine::ingot_packet::ValidNoEncap; +use opte::engine::ip4::Ipv4Addr; use opte::engine::packet::InnerFlowId; use opte::engine::packet::ParseError; use opte::engine::port::UftEntry; @@ -28,14 +37,8 @@ use opte::engine::HdlPktAction; use opte::engine::HdlPktError; use opte::engine::NetworkImpl; use opte::engine::NetworkParser; - -use opte::engine::arp; -use opte::engine::arp::ArpEthIpv4; -use opte::engine::arp::ArpOp; -use opte::engine::ether::ETHER_TYPE_IPV4; -use opte::engine::ingot_base::EthernetRef; -use opte::engine::ip4::Ipv4Addr; use opte::ingot::ethernet::Ethertype; +use opte::ingot::types::HeaderParse; use opte::ingot::types::Read; use zerocopy::ByteSliceMut; @@ -53,17 +56,14 @@ pub struct VpcNetwork { pub cfg: VpcCfg, } -// The ARP HTYPE for Ethernet. -const HTYPE_ETHER: u16 = 1; - -fn is_arp_req(arp: &ArpEthIpv4) -> bool { - arp.htype == HTYPE_ETHER - && arp.ptype == ETHER_TYPE_IPV4 - && arp.op == ArpOp::Request +fn is_arp_req(arp: &impl ArpEthIpv4Ref) -> bool { + arp.htype() == ARP_HTYPE_ETHERNET + && arp.ptype() == Ethertype::IPV4 + && arp.op() == ArpOp::REQUEST } -fn is_arp_req_for_tpa(tpa: Ipv4Addr, arp: &ArpEthIpv4) -> bool { - is_arp_req(arp) && arp.tpa == tpa +fn is_arp_req_for_tpa(tpa: Ipv4Addr, arp: &impl ArpEthIpv4Ref) -> bool { + is_arp_req(arp) && arp.tpa() == tpa } impl VpcNetwork { @@ -76,20 +76,23 @@ impl VpcNetwork { { let body = pkt .body_segs() + .and_then(|v| v.get(0)) .ok_or_else(|| HdlPktError("outbound ARP (no body)"))?; - let arp = ArpEthIpv4::parse_normally(body) + + let (arp, ..) = ValidArpEthIpv4::parse(*body) .map_err(|_| HdlPktError("outbound ARP (parse)"))?; + + if !arp.values_valid() { + return Err(HdlPktError("outbound ARP (parse -- bad values)")); + } + let gw_ip = self.cfg.ipv4_cfg().unwrap().gateway_ip; if is_arp_req_for_tpa(gw_ip, &arp) { let gw_mac = self.cfg.gateway_mac; - let hp = arp::gen_arp_reply(gw_mac, gw_ip, arp.sha, arp.spa); - // TODO: just emit into an mblk normally. - return Ok(HdlPktAction::Hairpin( - unsafe { MsgBlk::wrap_mblk(hp.unwrap_mblk()) } - .expect("known valid"), - )); + let hp = arp::gen_arp_reply(gw_mac, gw_ip, arp.sha(), arp.spa()); + return Ok(HdlPktAction::Hairpin(hp)); } Ok(HdlPktAction::Deny) diff --git a/lib/oxide-vpc/tests/integration_tests.rs b/lib/oxide-vpc/tests/integration_tests.rs index b639e653..73ee6f19 100644 --- a/lib/oxide-vpc/tests/integration_tests.rs +++ b/lib/oxide-vpc/tests/integration_tests.rs @@ -19,7 +19,9 @@ use opte::api::MacAddr; use opte::api::OpteError; use opte::ddi::time::Moment; use opte::engine::arp::ArpEthIpv4; -use opte::engine::arp::ArpEthIpv4Raw; +use opte::engine::arp::ArpEthIpv4Ref; +use opte::engine::arp::ValidArpEthIpv4; +use opte::engine::arp::ARP_HTYPE_ETHERNET; use opte::engine::dhcpv6; use opte::engine::flow_table::FLOW_DEF_EXPIRE_SECS; use opte::engine::geneve::Vni; @@ -45,6 +47,7 @@ use opte::ingot::icmp::IcmpV6Ref; use opte::ingot::tcp::TcpRef; use opte::ingot::types::Emit; use opte::ingot::types::HeaderLen; +use opte::ingot::types::HeaderParse; use opte::ingot::udp::Udp; use opte::ingot::udp::UdpRef; use opte_test_utils as common; @@ -1879,21 +1882,21 @@ fn arp_gateway() { // TODO: ingot? let arp = ArpEthIpv4 { - htype: 1, - ptype: u16::from(EtherType::Ipv4), + htype: ARP_HTYPE_ETHERNET, + ptype: Ethertype::IPV4, hlen: 6, plen: 4, - op: ArpOp::Request, + op: ArpOp::REQUEST, sha: cfg.guest_mac, spa: cfg.ipv4_cfg().unwrap().private_ip, tha: MacAddr::from([0x00; 6]), tpa: cfg.ipv4_cfg().unwrap().gateway_ip, }; - let mut bytes = eth_hdr.emit_vec(); - bytes.extend_from_slice(ArpEthIpv4Raw::from(&arp).as_bytes()); + // let mut bytes = eth_hdr.emit_vec(); + // bytes.extend_from_slice(ArpEthIpv4Raw::from(&arp).as_bytes()); - let mut pkt_m = MsgBlk::copy(bytes); + let mut pkt_m = MsgBlk::new_ethernet_pkt((eth_hdr, arp)); let pkt = parse_outbound(&mut pkt_m, VpcParser {}).unwrap(); let res = g1.port.process(Out, pkt); @@ -1911,19 +1914,13 @@ fn arp_gateway() { let body = hppkt.to_full_meta().meta().copy_remaining(); - let (arp, _) = ArpEthIpv4Raw::ref_from_prefix(&body[..]).unwrap(); - assert_eq!(arp.op, ArpOp::Reply.to_be_bytes()); - assert_eq!(arp.ptype, Ethertype::IPV4.0.to_be_bytes()); - assert_eq!(MacAddr::from(arp.sha), cfg.gateway_mac); - assert_eq!( - Ipv4Addr::from(arp.spa), - cfg.ipv4_cfg().unwrap().gateway_ip - ); - assert_eq!(MacAddr::from(arp.tha), cfg.guest_mac); - assert_eq!( - Ipv4Addr::from(arp.tpa), - cfg.ipv4_cfg().unwrap().private_ip - ); + let (arp, ..) = ValidArpEthIpv4::parse(&body[..]).unwrap(); + assert_eq!(arp.op(), ArpOp::REPLY); + assert_eq!(arp.ptype(), Ethertype::IPV4); + assert_eq!(arp.sha(), cfg.gateway_mac); + assert_eq!(arp.spa(), cfg.ipv4_cfg().unwrap().gateway_ip); + assert_eq!(arp.tha(), cfg.guest_mac); + assert_eq!(arp.tpa(), cfg.ipv4_cfg().unwrap().private_ip); } res => panic!("expected a Hairpin, got {:?}", res), From 496a25cdb0d98e31c4ab3c653e1eab9933b2f0c0 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Sat, 19 Oct 2024 19:06:10 +0100 Subject: [PATCH 057/115] The Axe Shall Fall (pt.2) --- lib/opte/src/engine/arp.rs | 13 ------------- lib/opte/src/engine/dhcp.rs | 5 ----- lib/opte/src/engine/dhcpv6/protocol.rs | 8 -------- lib/opte/src/engine/ether.rs | 9 --------- lib/opte/src/engine/geneve.rs | 11 ----------- lib/opte/src/engine/headers.rs | 4 ---- lib/opte/src/engine/icmp/mod.rs | 14 -------------- lib/opte/src/engine/icmp/v4.rs | 1 - lib/opte/src/engine/ingot_packet.rs | 1 - lib/opte/src/engine/ip4.rs | 13 ------------- lib/opte/src/engine/packet.rs | 8 -------- lib/opte/src/engine/tcp.rs | 17 ----------------- lib/opte/src/engine/udp.rs | 19 ------------------- lib/oxide-vpc/src/engine/mod.rs | 3 --- lib/oxide-vpc/tests/integration_tests.rs | 2 -- 15 files changed, 128 deletions(-) diff --git a/lib/opte/src/engine/arp.rs b/lib/opte/src/engine/arp.rs index 59013ad7..a7508ce9 100644 --- a/lib/opte/src/engine/arp.rs +++ b/lib/opte/src/engine/arp.rs @@ -6,15 +6,8 @@ //! ARP headers and data. -use super::ether::EtherMeta; -use super::ether::EtherType; use super::ingot_base::Ethernet; use super::ingot_packet::MsgBlk; -use super::packet::Initialized; -use super::packet::Packet; -use super::packet::PacketReadMut; -use super::packet::ReadErr; -use crate::d_error::DError; use core::fmt; use core::fmt::Display; use ingot::ethernet::Ethertype; @@ -26,12 +19,6 @@ use opte_api::MacAddr; use serde::Deserialize; use serde::Serialize; use zerocopy::ByteSlice; -use zerocopy::FromBytes; -use zerocopy::Immutable; -use zerocopy::IntoBytes; -use zerocopy::KnownLayout; -use zerocopy::Ref; -use zerocopy::Unaligned; pub const ARP_HTYPE_ETHERNET: u16 = 1; diff --git a/lib/opte/src/engine/dhcp.rs b/lib/opte/src/engine/dhcp.rs index 4da14378..406008fb 100644 --- a/lib/opte/src/engine/dhcp.rs +++ b/lib/opte/src/engine/dhcp.rs @@ -6,17 +6,12 @@ //! DHCP headers, data, and actions. -use super::checksum::HeaderChecksum; -use super::ether::EtherMeta; -use super::ether::EtherType; use super::ingot_base::Ethernet; use super::ingot_base::Ipv4; use super::ingot_packet::MsgBlk; use super::ingot_packet::PacketHeaders2; use super::ip4::Ipv4Addr; use super::ip4::Protocol; -use super::ip6::UlpCsumOpt; -use super::packet::Packet; use super::predicate::DataPredicate; use super::predicate::EtherAddrMatch; use super::predicate::IpProtoMatch; diff --git a/lib/opte/src/engine/dhcpv6/protocol.rs b/lib/opte/src/engine/dhcpv6/protocol.rs index 56ac4ad4..322fabf6 100644 --- a/lib/opte/src/engine/dhcpv6/protocol.rs +++ b/lib/opte/src/engine/dhcpv6/protocol.rs @@ -8,7 +8,6 @@ use super::Dhcpv6Action; use super::TransactionId; -use crate::engine::checksum::HeaderChecksum; use crate::engine::dhcpv6::options::Code as OptionCode; use crate::engine::dhcpv6::options::IaAddr; use crate::engine::dhcpv6::options::IaNa; @@ -22,17 +21,11 @@ use crate::engine::dhcpv6::ALL_RELAYS_AND_SERVERS; use crate::engine::dhcpv6::ALL_SERVERS; use crate::engine::dhcpv6::CLIENT_PORT; use crate::engine::dhcpv6::SERVER_PORT; -use crate::engine::ether::EtherMeta; -use crate::engine::ether::EtherType; use crate::engine::ingot_base::Ethernet; use crate::engine::ingot_base::Ipv6; use crate::engine::ingot_base::Ipv6Ref; use crate::engine::ingot_packet::MsgBlk; use crate::engine::ingot_packet::PacketHeaders2; -use crate::engine::ip6::Ipv6Hdr; -use crate::engine::ip6::Ipv6Meta; -use crate::engine::ip6::UlpCsumOpt; -use crate::engine::packet::Packet; use crate::engine::predicate::DataPredicate; use crate::engine::predicate::EtherAddrMatch; use crate::engine::predicate::IpProtoMatch; @@ -56,7 +49,6 @@ use opte_api::MacAddr; use opte_api::Protocol; use serde::Deserialize; use serde::Serialize; -use smoltcp::wire::IpProtocol; #[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)] pub enum MessageType { diff --git a/lib/opte/src/engine/ether.rs b/lib/opte/src/engine/ether.rs index 8cb868d5..4e3aa790 100644 --- a/lib/opte/src/engine/ether.rs +++ b/lib/opte/src/engine/ether.rs @@ -8,9 +8,6 @@ use super::headers::ModifyAction; use super::headers::PushAction; -use super::packet::PacketReadMut; -use super::packet::ReadErr; -use crate::d_error::DError; use alloc::string::String; use alloc::vec::Vec; use core::fmt; @@ -23,12 +20,6 @@ use ingot::types::HeaderLen; use opte_api::MacAddr; use serde::Deserialize; use serde::Serialize; -use zerocopy::FromBytes; -use zerocopy::Immutable; -use zerocopy::IntoBytes; -use zerocopy::KnownLayout; -use zerocopy::Ref; -use zerocopy::Unaligned; pub const ETHER_TYPE_ETHER: u16 = 0x6558; pub const ETHER_TYPE_IPV4: u16 = 0x0800; diff --git a/lib/opte/src/engine/geneve.rs b/lib/opte/src/engine/geneve.rs index b86f1a83..e906dd47 100644 --- a/lib/opte/src/engine/geneve.rs +++ b/lib/opte/src/engine/geneve.rs @@ -8,13 +8,8 @@ //! //! RFC 8926 Geneve: Generic Network Virtualization Encapsulation -use super::ether::ETHER_TYPE_ETHER; use super::headers::ModifyAction; use super::headers::PushAction; -use super::packet::PacketReadMut; -use super::packet::ReadErr; -use crate::d_error::DError; -use core::mem; use ingot::geneve::Geneve; use ingot::geneve::GeneveOpt; use ingot::geneve::GeneveOptRef; @@ -27,12 +22,6 @@ pub use opte_api::Vni; use serde::Deserialize; use serde::Serialize; use zerocopy::ByteSlice; -use zerocopy::FromBytes; -use zerocopy::Immutable; -use zerocopy::IntoBytes; -use zerocopy::KnownLayout; -use zerocopy::Ref; -use zerocopy::Unaligned; pub const GENEVE_VSN: u8 = 0; pub const GENEVE_VER_MASK: u8 = 0xC0; diff --git a/lib/opte/src/engine/headers.rs b/lib/opte/src/engine/headers.rs index 6f067d4c..4232b8b4 100644 --- a/lib/opte/src/engine/headers.rs +++ b/lib/opte/src/engine/headers.rs @@ -6,7 +6,6 @@ //! Header metadata combinations for IP, ULP, and Encap. -use super::checksum::Checksum; use super::geneve::GeneveMeta; use super::geneve::GeneveMod; use super::geneve::GenevePush; @@ -14,12 +13,10 @@ use super::ip4::Ipv4Mod; use super::ip4::Ipv4Push; use super::ip6::Ipv6Mod; use super::ip6::Ipv6Push; -use super::packet::ReadErr; use super::tcp::TcpMod; use super::tcp::TcpPush; use super::udp::UdpMod; use super::udp::UdpPush; -use crate::engine::icmp::QueryEcho; use core::fmt; pub use opte_api::IpAddr; pub use opte_api::IpCidr; @@ -27,7 +24,6 @@ pub use opte_api::Protocol; pub use opte_api::Vni; use serde::Deserialize; use serde::Serialize; -use zerocopy::Ref; pub const AF_INET: i32 = 2; pub const AF_INET6: i32 = 26; diff --git a/lib/opte/src/engine/icmp/mod.rs b/lib/opte/src/engine/icmp/mod.rs index d3f47b0d..c44e6fd5 100644 --- a/lib/opte/src/engine/icmp/mod.rs +++ b/lib/opte/src/engine/icmp/mod.rs @@ -9,14 +9,6 @@ pub mod v4; pub mod v6; -use super::checksum::Checksum as OpteCsum; -use super::checksum::HeaderChecksum; -use super::headers::HeaderActionError; -use super::packet::PacketReadMut; -use super::packet::ReadErr; -use crate::d_error::DError; -use crate::engine::headers::HeaderActionModify; -use crate::engine::headers::UlpMetaModify; use crate::engine::predicate::DataPredicate; use crate::engine::predicate::EtherAddrMatch; use crate::engine::predicate::IpProtoMatch; @@ -36,12 +28,6 @@ use serde::Serialize; use smoltcp::phy::Checksum; use smoltcp::phy::ChecksumCapabilities as Csum; use zerocopy::ByteSlice; -use zerocopy::FromBytes; -use zerocopy::Immutable; -use zerocopy::IntoBytes; -use zerocopy::KnownLayout; -use zerocopy::Ref; -use zerocopy::Unaligned; /// Shared methods for handling ICMPv4/v6 Echo fields. pub trait QueryEcho { diff --git a/lib/opte/src/engine/icmp/v4.rs b/lib/opte/src/engine/icmp/v4.rs index 0f061b11..ca6edc64 100644 --- a/lib/opte/src/engine/icmp/v4.rs +++ b/lib/opte/src/engine/icmp/v4.rs @@ -19,7 +19,6 @@ use ingot::types::Emit; use ingot::types::HeaderLen; pub use opte_api::ip::IcmpEchoReply; use smoltcp::wire; -use smoltcp::wire::Icmpv4Message; use smoltcp::wire::Icmpv4Packet; use smoltcp::wire::Icmpv4Repr; diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index 8ca1d45d..7b9ecca9 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -3,7 +3,6 @@ use super::checksum::Checksum; use super::checksum::HeaderChecksum; use super::ether::EtherMeta; use super::ether::EtherMod; -use super::geneve::geneve_has_oxide_external; use super::geneve::OxideOption; use super::geneve::GENEVE_OPT_CLASS_OXIDE; use super::geneve::GENEVE_PORT; diff --git a/lib/opte/src/engine/ip4.rs b/lib/opte/src/engine/ip4.rs index a2f33d8b..0dba674e 100644 --- a/lib/opte/src/engine/ip4.rs +++ b/lib/opte/src/engine/ip4.rs @@ -6,18 +6,11 @@ //! IPv4 headers. -use super::checksum::Checksum; -use super::checksum::HeaderChecksum; -use super::headers::ModifyAction; -use super::headers::PushAction; -use super::packet::PacketReadMut; -use super::packet::ReadErr; use super::predicate::MatchExact; use super::predicate::MatchExactVal; use super::predicate::MatchPrefix; use super::predicate::MatchPrefixVal; use super::predicate::MatchRangeVal; -use crate::d_error::DError; use alloc::string::String; use core::fmt; use core::fmt::Debug; @@ -30,12 +23,6 @@ pub use opte_api::Ipv4PrefixLen; pub use opte_api::Protocol; use serde::Deserialize; use serde::Serialize; -use zerocopy::FromBytes; -use zerocopy::Immutable; -use zerocopy::IntoBytes; -use zerocopy::KnownLayout; -use zerocopy::Ref; -use zerocopy::Unaligned; pub const IPV4_HDR_LEN_MASK: u8 = 0x0F; pub const IPV4_HDR_VER_MASK: u8 = 0xF0; diff --git a/lib/opte/src/engine/packet.rs b/lib/opte/src/engine/packet.rs index 5b051760..be0eaf4b 100644 --- a/lib/opte/src/engine/packet.rs +++ b/lib/opte/src/engine/packet.rs @@ -12,11 +12,6 @@ //! use super::checksum::Checksum; -use super::checksum::HeaderChecksum; -use super::ether::EtherMeta; -use super::geneve::GeneveMeta; -use super::geneve::GENEVE_PORT; -use super::headers::EncapMeta; use super::headers::IpAddr; use super::headers::AF_INET; use super::headers::AF_INET6; @@ -24,9 +19,6 @@ use super::ingot_packet::MsgBlk; use super::ip4::Ipv4Addr; use super::ip4::Protocol; use super::ip6::Ipv6Addr; -use super::ip6::Ipv6Hdr; -use super::ip6::Ipv6HdrError; -use super::ip6::Ipv6Meta; use crate::d_error::DError; use core::fmt; use core::fmt::Display; diff --git a/lib/opte/src/engine/tcp.rs b/lib/opte/src/engine/tcp.rs index 9b07fea0..81496ab5 100644 --- a/lib/opte/src/engine/tcp.rs +++ b/lib/opte/src/engine/tcp.rs @@ -6,28 +6,11 @@ //! TCP headers. -use super::checksum::Checksum; -use super::checksum::HeaderChecksum; use super::flow_table::Ttl; -use super::headers::HeaderActionError; -use super::headers::HeaderActionModify; -use super::headers::ModifyAction; -use super::headers::PushAction; -use super::headers::UlpMetaModify; -use super::packet::PacketReadMut; -use super::packet::ReadErr; -use crate::d_error::DError; use core::fmt; use core::fmt::Display; -use opte_api::DYNAMIC_PORT; use serde::Deserialize; use serde::Serialize; -use zerocopy::FromBytes; -use zerocopy::Immutable; -use zerocopy::IntoBytes; -use zerocopy::KnownLayout; -use zerocopy::Ref; -use zerocopy::Unaligned; pub const TCP_HDR_OFFSET_MASK: u8 = 0xF0; pub const TCP_HDR_OFFSET_SHIFT: u8 = 4; diff --git a/lib/opte/src/engine/udp.rs b/lib/opte/src/engine/udp.rs index 338839d0..615bef45 100644 --- a/lib/opte/src/engine/udp.rs +++ b/lib/opte/src/engine/udp.rs @@ -6,27 +6,8 @@ //! UDP headers. -use crate::d_error::DError; -use crate::engine::checksum::Checksum; -use crate::engine::checksum::HeaderChecksum; -use crate::engine::headers::HeaderActionModify; -use crate::engine::headers::ModifyAction; -use crate::engine::headers::PushAction; -use crate::engine::headers::UlpMetaModify; -use crate::engine::packet::PacketReadMut; -use crate::engine::packet::ReadErr; -use core::mem; -use opte_api::DYNAMIC_PORT; use serde::Deserialize; use serde::Serialize; -use zerocopy::FromBytes; -use zerocopy::Immutable; -use zerocopy::IntoBytes; -use zerocopy::KnownLayout; -use zerocopy::Ref; -use zerocopy::Unaligned; - -use super::headers::HeaderActionError; #[derive( Clone, diff --git a/lib/oxide-vpc/src/engine/mod.rs b/lib/oxide-vpc/src/engine/mod.rs index 2783f88a..15ba4864 100644 --- a/lib/oxide-vpc/src/engine/mod.rs +++ b/lib/oxide-vpc/src/engine/mod.rs @@ -14,15 +14,12 @@ pub mod router; use crate::cfg::VpcCfg; use opte::engine::arp; -use opte::engine::arp::ArpEthIpv4; use opte::engine::arp::ArpEthIpv4Ref; use opte::engine::arp::ArpOp; use opte::engine::arp::ValidArpEthIpv4; use opte::engine::arp::ARP_HTYPE_ETHERNET; -use opte::engine::ether::ETHER_TYPE_IPV4; use opte::engine::flow_table::FlowTable; use opte::engine::ingot_base::EthernetRef; -use opte::engine::ingot_packet::MsgBlk; use opte::engine::ingot_packet::OpteParsed2; use opte::engine::ingot_packet::Packet2; use opte::engine::ingot_packet::Parsed2; diff --git a/lib/oxide-vpc/tests/integration_tests.rs b/lib/oxide-vpc/tests/integration_tests.rs index 73ee6f19..0fbe3f9b 100644 --- a/lib/oxide-vpc/tests/integration_tests.rs +++ b/lib/oxide-vpc/tests/integration_tests.rs @@ -73,8 +73,6 @@ use std::collections::BTreeMap; use std::prelude::v1::*; use std::time::Duration; use uuid::Uuid; -use zerocopy::FromBytes; -use zerocopy::IntoBytes; // If we are running `cargo test`, then make sure to // register the USDT probes before running any tests. From c7be20cc4ab86ad8ef7c85fb96be07f56d91af36 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Mon, 21 Oct 2024 15:24:41 -0700 Subject: [PATCH 058/115] The Axe Shall Fall (pt.3) --- lib/opte-test-utils/src/lib.rs | 7 - lib/opte/src/ddi/mblk.rs | 5 + lib/opte/src/ddi/mod.rs | 3 +- lib/opte/src/engine/ingot_base.rs | 6 + lib/opte/src/engine/ingot_packet.rs | 257 ++++- lib/opte/src/engine/ip6.rs | 460 -------- lib/opte/src/engine/packet.rs | 1538 +-------------------------- xde/src/dls/mod.rs | 28 +- xde/src/mac/mod.rs | 42 +- xde/src/xde.rs | 33 +- 10 files changed, 283 insertions(+), 2096 deletions(-) create mode 100644 lib/opte/src/ddi/mblk.rs diff --git a/lib/opte-test-utils/src/lib.rs b/lib/opte-test-utils/src/lib.rs index c1f16579..8c8e607e 100644 --- a/lib/opte-test-utils/src/lib.rs +++ b/lib/opte-test-utils/src/lib.rs @@ -38,15 +38,8 @@ use opte::engine::ingot_packet::Packet2; pub use opte::engine::ip4::Ipv4Addr; pub use opte::engine::ip4::Protocol; pub use opte::engine::ip6::Ipv6Addr; -pub use opte::engine::ip6::Ipv6Hdr; -pub use opte::engine::ip6::Ipv6Meta; pub use opte::engine::layer::DenyReason; -pub use opte::engine::packet::BodyInfo; -pub use opte::engine::packet::HdrOffset; -pub use opte::engine::packet::Initialized; -pub use opte::engine::packet::Packet; use opte::engine::packet::ParseError; -pub use opte::engine::packet::Parsed; pub use opte::engine::port::meta::ActionMeta; pub use opte::engine::port::DropReason; pub use opte::engine::port::Port; diff --git a/lib/opte/src/ddi/mblk.rs b/lib/opte/src/ddi/mblk.rs new file mode 100644 index 00000000..5ca7b789 --- /dev/null +++ b/lib/opte/src/ddi/mblk.rs @@ -0,0 +1,5 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2024 Oxide Computer Company diff --git a/lib/opte/src/ddi/mod.rs b/lib/opte/src/ddi/mod.rs index f2de7cca..79f9c257 100644 --- a/lib/opte/src/ddi/mod.rs +++ b/lib/opte/src/ddi/mod.rs @@ -2,10 +2,11 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2024 Oxide Computer Company //! Various abstractions for using the illumos DDI/DKI. pub mod kstat; +pub mod mblk; pub mod sync; pub mod time; diff --git a/lib/opte/src/engine/ingot_base.rs b/lib/opte/src/engine/ingot_base.rs index 5f4b5515..715e26b4 100644 --- a/lib/opte/src/engine/ingot_base.rs +++ b/lib/opte/src/engine/ingot_base.rs @@ -1,3 +1,9 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2024 Oxide Computer Company + use super::checksum::Checksum; use ingot::choice; use ingot::ethernet::Ethertype; diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index 7b9ecca9..1f26602e 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -1,3 +1,9 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2024 Oxide Computer Company + use super::checksum::Checksum as OpteCsum; use super::checksum::Checksum; use super::checksum::HeaderChecksum; @@ -44,11 +50,11 @@ use super::packet::allocb; use super::packet::AddrPair; use super::packet::BodyTransform; use super::packet::BodyTransformError; -use super::packet::Initialized; use super::packet::InnerFlowId; -use super::packet::Packet; use super::packet::PacketState; use super::packet::ParseError; +use super::packet::SegAdjustError; +use super::packet::WrapError; use super::packet::WriteError; use super::packet::FLOW_ID_DEFAULT; use super::rule::CompiledEncap; @@ -575,15 +581,38 @@ impl DerefMut for MsgBlkNode { } impl MsgBlkNode { - pub fn drop_front_bytes(&mut self, n: usize) { + /// Shrink the writable/readable area by shifting the `b_rptr` by + /// `len`; effectively removing bytes from the start of the packet. + /// + /// # Errors + /// + /// `SegAdjustError::StartPastEnd`: Shifting the read pointer by + /// `len` would move `b_rptr` past `b_wptr`. + pub fn drop_front_bytes(&mut self, n: usize) -> Result<(), SegAdjustError> { unsafe { - assert!(self.0.b_wptr.offset_from(self.0.b_rptr) >= n as isize); + if self.0.b_wptr.offset_from(self.0.b_rptr) < n as isize { + return Err(SegAdjustError::StartPastEnd); + } self.0.b_rptr = self.0.b_rptr.add(n); } + + Ok(()) } } impl MsgBlk { + /// Allocate a new [`MsgBlk`] containing a data buffer of `len` + /// bytes. + /// + /// The returned packet consists of exactly one segment. + /// + /// In the kernel environment this uses `allocb(9F)` and + /// `freemsg(9F)` under the hood. + /// + /// In the `std` environment this uses a mock implementation of + /// `allocb(9F)` and `freeb(9F)`, which contains enough scaffolding + /// to satisfy OPTE's use of the underlying `mblk_t` and `dblk_t` + /// structures. pub fn new(len: usize) -> Self { let inner = NonNull::new(allocb(len)) .expect("somehow failed to get an mblk..."); @@ -591,19 +620,23 @@ impl MsgBlk { Self { inner } } + /// Allocates a new [`MsgBlk`] of size `buf.len()`, copying its + /// contents. pub fn copy(buf: impl AsRef<[u8]>) -> Self { let mut out = Self::new(buf.as_ref().len()); - // Unwarp safety -- just allocated length of input buffer. + // Unwrap safety -- just allocated length of input buffer. out.write_bytes_back(buf).unwrap(); out } + /// Creates a new [`MsgBlk`] using a given set of packet headers. pub fn new_pkt(emit: impl Emit + EmitDoesNotRelyOnBufContents) -> Self { let mut pkt = Self::new(emit.packet_length()); pkt.emit_back(emit).unwrap(); pkt } + /// Returns the number of bytes available for writing before pub fn headroom(&self) -> usize { unsafe { let inner = self.inner.as_ref(); @@ -612,10 +645,18 @@ impl MsgBlk { } } + /// Creates a new [`MsgBlk`] containing a data buffer of `len` + /// bytes with 2B of headroom/alignment. + /// + /// This sets up 4B alignment on all post-ethernet headers. pub fn new_ethernet(len: usize) -> Self { Self::new_with_headroom(2, len) } + /// Creates a new [`MsgBlk`] using a given set of packet headers + /// with 2B of headroom/alignment. + /// + /// This sets up 4B alignment on all post-ethernet headers. pub fn new_ethernet_pkt( emit: impl Emit + EmitDoesNotRelyOnBufContents, ) -> Self { @@ -624,14 +665,23 @@ impl MsgBlk { pkt } + /// Return the number of initialised bytes in this `MsgBlk` over + /// all linked segments. pub fn byte_len(&self) -> usize { self.iter().map(|el| el.len()).sum() } + /// Return the number of initialised bytes in this `MsgBlk` in + /// the head segment. pub fn seg_len(&self) -> usize { self.iter().count() } + /// Allocate a new [`MsgBlk`] containing a data buffer of size + /// `head_len + body_len`. + /// + /// The read/write pointer is set to have `head_len` bytes of + /// headroom and `body_len` bytes of capacity at the back. pub fn new_with_headroom(head_len: usize, body_len: usize) -> Self { let mut out = Self::new(head_len + body_len); @@ -643,7 +693,15 @@ impl MsgBlk { out } - pub unsafe fn write( + /// Provides a slice of length `n_bytes` at the back of an [`MsgBlk`] + /// (if capacity exists) to be initialised, before increasing `len` + /// by `n_bytes`. + /// + /// # Safety + /// Users must write a value to every element of the `MaybeUninit` + /// buffer at least once in the `MsgBlk` lifecycle -- all `n_bytes` + /// are assumed to be initialised. + pub unsafe fn write_back( &mut self, n_bytes: usize, f: impl FnOnce(&mut [MaybeUninit]), @@ -673,6 +731,14 @@ impl MsgBlk { Ok(()) } + /// Provides a slice of length `n_bytes` at the front of an [`MsgBlk`] + /// (if capacity exists) to be initialised, before increasing `len` + /// by `n_bytes`. + /// + /// # Safety + /// Users must write a value to every element of the `MaybeUninit` + /// buffer at least once in the `MsgBlk` lifecycle -- all `n_bytes` + /// are assumed to be initialised. pub unsafe fn write_front( &mut self, n_bytes: usize, @@ -713,7 +779,7 @@ impl MsgBlk { Ok(()) } else if new_len > len { unsafe { - self.write(new_len - len, |v| { + self.write_back(new_len - len, |v| { // MaybeUninit::fill is unstable. let n = v.len(); v.as_mut_ptr().write_bytes(0, n); @@ -730,7 +796,7 @@ impl MsgBlk { pkt: impl Emit + EmitDoesNotRelyOnBufContents, ) -> Result<(), WriteError> { unsafe { - self.write(pkt.packet_length(), |v| { + self.write_back(pkt.packet_length(), |v| { // Unwrap safety: write will return an Error if // unsuccessful. pkt.emit_uninit(v).unwrap(); @@ -750,14 +816,14 @@ impl MsgBlk { } } - /// XXX + /// Copies a byte slice into the region after any bytes present in this mblk. pub fn write_bytes_back( &mut self, bytes: impl AsRef<[u8]>, ) -> Result<(), WriteError> { let bytes = bytes.as_ref(); unsafe { - self.write(bytes.len(), |v| { + self.write_back(bytes.len(), |v| { // feat(maybe_uninit_write_slice) -> copy_from_slice // is unstable. let uninit_src: &[MaybeUninit] = @@ -767,7 +833,7 @@ impl MsgBlk { } } - /// XXX + /// Copies a byte slice into the region before any bytes present in this mblk. pub fn write_bytes_front( &mut self, bytes: impl AsRef<[u8]>, @@ -792,7 +858,7 @@ impl MsgBlk { panic!("oopsie daisy") } - mut_self.b_cont = other.unwrap_mblk(); + mut_self.b_cont = other.unwrap_mblk().as_ptr(); } /// Drop all bytes and move the cursor to the very back of the dblk. @@ -805,18 +871,16 @@ impl MsgBlk { } } + /// Returns a shared cursor over all segments in this `MsgBlk`. pub fn iter(&self) -> MsgBlkIter { MsgBlkIter { curr: Some(self.inner), marker: PhantomData } } + /// Returns a mutable cursor over all segments in this `MsgBlk`. pub fn iter_mut(&mut self) -> MsgBlkIterMut { MsgBlkIterMut { curr: Some(self.inner), marker: PhantomData } } - pub fn as_pkt(self) -> Packet { - unsafe { Packet::wrap_mblk(self.unwrap_mblk()).expect("already good.") } - } - /// Return the pointer address of the underlying mblk_t. /// /// NOTE: This is purely to allow passing the pointer value up to @@ -826,16 +890,42 @@ impl MsgBlk { self.inner.as_ptr() as uintptr_t } - pub fn unwrap_mblk(self) -> *mut mblk_t { - let ptr_out = self.inner.as_ptr(); + /// Return the head of the underlying `mblk_t` segment chain and + /// consume `self`. The caller of this function now owns the + /// `mblk_t` segment chain. + pub fn unwrap_mblk(self) -> NonNull { + let ptr_out = self.inner; _ = ManuallyDrop::new(self); ptr_out } - pub unsafe fn wrap_mblk(ptr: *mut mblk_t) -> Option { - let inner = NonNull::new(ptr)?; - - Some(Self { inner }) + /// Wrap the `mblk_t` packet in a [`MsgBlk`], taking ownership of + /// the `mblk_t` packet as a result. An `mblk_t` packet consists + /// of one or more `mblk_t` segments chained together via + /// `b_cont`. When the [`MsgBlk`] is dropped, the + /// underlying `mblk_t` segment chain is freed. If you wish to + /// pass on ownership you must call the [`MsgBlk::unwrap_mblk()`] + /// function. + /// + /// # Safety + /// + /// The `mp` pointer must point to an `mblk_t` allocated by + /// `allocb(9F)` or provided by some kernel API which itself used + /// one of the DDI/DKI APIs to allocate it. + /// + /// # Errors + /// + /// * Return [`WrapError::NullPtr`] is `mp` is `NULL`. + /// * Return [`WrapError::Chain`] is `mp->b_next` or `mp->b_next` are set. + pub unsafe fn wrap_mblk(ptr: *mut mblk_t) -> Result { + let inner = NonNull::new(ptr).ok_or(WrapError::NullPtr)?; + let inner_ref = inner.as_ref(); + + if inner_ref.b_next.is_null() && inner_ref.b_prev.is_null() { + Ok(Self { inner }) + } else { + Err(WrapError::Chain) + } } /// Copy out all bytes within this mblk and its successors @@ -853,6 +943,10 @@ impl MsgBlk { /// Drops all empty mblks from the start of this chain where possible /// (i.e., any empty mblk is followed by another mblk). pub fn drop_empty_segments(&mut self) { + // We should not be creating message block continuations to zero + // sized blocks. This is not a generally expected thing and has + // caused NIC hardware to stop working. + // Stripping these out where possible is necessary. let mut head = self.inner; let mut neighbour = unsafe { (*head.as_ptr()).b_cont }; @@ -888,7 +982,6 @@ pub struct MsgBlkIterMut<'a> { } impl<'a> MsgBlkIterMut<'a> { - /// pub fn next_iter(&self) -> MsgBlkIter { let curr = self .curr @@ -948,10 +1041,19 @@ impl<'a> Read for MsgBlkIterMut<'a> { } } +/// For the `no_std`/illumos kernel environment, we want the `mblk_t` +/// drop to occur at the [`Packet`] level, where we can make use of +/// `freemsg(9F)`. impl Drop for MsgBlk { fn drop(&mut self) { + // Drop the segment chain if there is one. Consumers of MsgBlk + // will never own a packet with no segments. + // This guarantees that we only free the segment chain once. cfg_if! { if #[cfg(all(not(feature = "std"), not(test)))] { + // Safety: This is safe as long as the original + // `mblk_t` came from a call to `allocb(9F)` (or + // similar API). unsafe { ddi::freemsg(self.inner.as_ptr()) }; } else { mock_freemsg(self.inner.as_ptr()); @@ -1545,9 +1647,62 @@ impl From<&PacketHeaders> for InnerFlowId { } } -// GOAL: get to an absolute minimum point where we: -// - parse into an innerflowid -// - use existing transforms if a ULP entry exists. +/// A network packet. +/// +/// The [`Packet`] type presents an abstraction for manipulating +/// network packets in both a `std` and `no_std` environment. The +/// first is useful for writing tests against the OPTE core engine and +/// executing them in userland, without the need for standing up a +/// full-blown virtual machine. To the engine this [`Packet`] is +/// absolutely no different than if it was running in-kernel for a +/// real virtual machine. +/// +/// The `no_std` implementation is used when running in-kernel. The +/// main difference is the `mblk_t` and `dblk_t` structures are coming +/// from viona (outbound/Tx) and mac (inbound/Rx), and we consume them +/// via [`Packet::wrap_mblk()`]. In reality this is typically holding +/// an Ethernet _frame_, but we prefer to use the colloquial +/// nomenclature of "packet". +/// +/// A [`Packet`] is made up of one or more segments ([`PacketSeg`]). +/// Any given header is *always* contained in a single segment, i.e. a +/// header never straddles multiple segments. While it's preferable to +/// have all headers in the first segment, it *may* be the case that +/// the headers span multiple segments; but a *single* header type +/// (e.g. the IP header) will *never* straddle two segments. The +/// payload, however, *may* span multiple segments. +/// +/// # illumos terminology +/// +/// In illumos there is no real notion of an mblk "packet" or +/// "segment": a packet is just a linked list of `mblk_t` values. +/// The "packet" is simply a pointer to the first `mblk_t` in the +/// list, which also happens to be the first "segment", and any +/// further segments are linked via `b_cont`. In the illumos +/// kernel code you'll *sometimes* find variables named `mp_head` +/// to indicate that it points to a packet. +/// +/// There is also the notion of a "chain" of packets. This is +/// represented by a list of `mblk_t` structure as well, but instead +/// of using `b_cont` the individual packets are linked via the +/// `b_next` field. In the illumos kernel code this this is often +/// referred to with the variable name `mp_chain`, but sometimes also +/// `mp_head` (or just `mp`). It's a bit ambiguous, and something you +/// kind of figure out as you work in the code more. Though part of me +/// would like to create some rust-like "new type pattern" in C to +/// disambiguate packets from packet chains across APIs so the +/// compiler can detect when your API is working against the wrong +/// contract (for example a function that expects a single packet but +/// is being fed a packet chain). +/// +/// TODOx +/// +/// * Document the various type states, their purpose, their data, and +/// how the [`Packet`] generally transitions between them. +/// +/// * Somewhere we'll want to enforce and document a 2-byte prefix pad +/// to keep IP header alignment (the host expects this). +/// #[derive(Debug)] pub struct Packet2 { state: S, @@ -1916,7 +2071,7 @@ impl Packet2> { self.state.inner_csum_dirty |= xform.run(&mut self.state.meta)?; // Recomputing this is a little bit wasteful, since we're moving - // rebuilding a static repr from packet fields. This is a necesary + // rebuilding a static repr from packet fields. This is a necessary // part of slowpath use because layers are designed around intermediate // flowkeys. // @@ -1985,7 +2140,9 @@ impl Packet2> { /// Compute ULP and IP header checksum from scratch. /// - /// This should really only be used for testing. + /// This should really only be used for testing, or in the case + /// where we have applied body transforms and know that any initial + /// body_csum cannot be valid. pub fn compute_checksums(&mut self) where T::Chunk: ByteSliceMut, @@ -2099,6 +2256,13 @@ impl Packet2> { return; } + // TODO: DOUBLE CHECK LOGIC + + // We expect + if self.state.body_modified { + return self.compute_checksums(); + } + // Flag to indicate if an IP header/ULP checksums were // provided. If the checksum is zero, it's assumed heardware // checksum offload is being used, and OPTE should not update @@ -2190,9 +2354,9 @@ impl Packet2> { /// about which nothing else is known besides the length. #[derive(Debug)] pub struct Initialized2 { - // Total length of packet, in bytes. This is equal to the sum of - // the length of the _initialized_ window in all the segments - // (`b_wptr - b_rptr`). + /// Total length of packet, in bytes. This is equal to the sum of + /// the length of the _initialized_ window in all the segments + /// (`b_wptr - b_rptr`). len: usize, inner: T, @@ -2203,13 +2367,41 @@ impl PacketState for Parsed2 {} /// Zerocopy view onto a parsed packet, acompanied by locally /// computed state. +/// XXX: this is 'full meta'. Maybe rename LightweightMeta (???) pub struct Parsed2 { + /// Total length of packet, in bytes. This is equal to the sum of + /// the length of the _initialized_ window in all the segments + /// (`b_wptr - b_rptr`). len: usize, + /// Access to parsed packet headers and the packet body. meta: Box>, + /// Current Flow ID of this packet, accountgin for any applied + /// transforms. flow: InnerFlowId, + + /// The body's checksum. It is up to the `NetworkImpl::Parser` on + /// whether to populate this field or not. The reason for + /// populating this field is to avoid duplicate work if the client + /// has provided a ULP checksum. Rather than redoing the body + /// checksum calculation, we can use incremental checksum + /// techniques to stash the body's checksum for reuse when emitting + /// the new headers. + /// + /// However, if the client does not provide a checksum, presumably + /// because they are relying on checksum offload, this value should + /// be `None`. In such case, `emit_headers()` will perform no ULP + /// checksum update. + /// + /// This value may also be none if the packet has no notion of a + /// ULP checksum; e.g., ARP. body_csum: Option, + /// L4 hash for this packet, computed from the flow ID. l4_hash: Memoised, + /// Tracks whether any body transforms have been executed on this + /// packet. body_modified: bool, + /// Tracks whether any transform has been applied to this packet + /// which would dirty the inner L3 and/or ULP header checksums. inner_csum_dirty: bool, } @@ -2383,7 +2575,8 @@ impl EmittestSpec { let has = node.len(); let droppable = to_rewind.min(has); - node.drop_front_bytes(droppable); + node.drop_front_bytes(droppable) + .expect("droppable should be bounded above by len"); to_rewind -= droppable; slots.push(node).unwrap(); diff --git a/lib/opte/src/engine/ip6.rs b/lib/opte/src/engine/ip6.rs index 2c0d4ad1..8ab06d9f 100644 --- a/lib/opte/src/engine/ip6.rs +++ b/lib/opte/src/engine/ip6.rs @@ -6,14 +6,8 @@ //! IPv6 headers. -use super::checksum::Checksum; -use super::headers::ModifyAction; -use super::headers::PushAction; use super::ip4::Protocol; pub use super::ip4::UlpCsumOpt; -use super::packet::PacketReadMut; -use super::packet::ReadErr; -use crate::d_error::DError; use crate::engine::predicate::MatchExact; use crate::engine::predicate::MatchExactVal; use crate::engine::predicate::MatchPrefix; @@ -22,22 +16,11 @@ pub use opte_api::Ipv6Addr; pub use opte_api::Ipv6Cidr; use serde::Deserialize; use serde::Serialize; -use smoltcp::wire::IpProtocol; -use smoltcp::wire::Ipv6ExtHeader; -use smoltcp::wire::Ipv6FragmentHeader; -use smoltcp::wire::Ipv6HopByHopHeader; -use smoltcp::wire::Ipv6Packet; -use smoltcp::wire::Ipv6RoutingHeader; pub const IPV6_HDR_VSN_MASK: u8 = 0xF0; pub const IPV6_HDR_VSN_SHIFT: u8 = 4; pub const IPV6_VERSION: u8 = 6; pub const DDM_HEADER_ID: u8 = 0xFE; -/// Current maximum bytes for extension headers which fit -/// in IPv6Meta. -/// -/// TODO: refactor so as *not* to need this. -pub const IPV6_MAX_EXT_LEN: usize = 64; impl MatchExactVal for Ipv6Addr {} impl MatchPrefixVal for Ipv6Cidr {} @@ -54,141 +37,6 @@ impl MatchPrefix for Ipv6Addr { } } -#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)] -pub struct Ipv6Meta { - pub src: Ipv6Addr, - pub dst: Ipv6Addr, - pub next_hdr: IpProtocol, - pub proto: Protocol, - pub hop_limit: u8, - pub pay_len: u16, - - // For now we hold extensions as raw bytes. Ideally, each extension - // we support should get its own meta-like type and be declared - // optional. - // - // ``` - // pub hbh: Option, - // pub routing: Option, - // pub frag: Option, - // ... - // ``` - pub ext: Option<[u8; 64]>, - // NOTE: We need `ext_len` explicitly, because `ext` is a fixed-size array. - pub ext_len: usize, -} - -impl Default for Ipv6Meta { - fn default() -> Self { - Self { - src: Ipv6Addr::from([0; 16]), - dst: Ipv6Addr::from([0; 16]), - next_hdr: IpProtocol::Unknown(255), - proto: Protocol::Unknown(255), - hop_limit: 128, - pay_len: 0, - ext: None, - ext_len: 0, - } - } -} - -impl Ipv6Meta { - /// Compute the [`Checksum`] of the contained ULP datagram. - /// - /// This computes the checksum of the pseudo-header, and adds to it the sum - /// of the ULP header and body. - pub fn compute_ulp_csum( - &self, - opt: UlpCsumOpt, - ulp_hdr: &[u8], - body: &[u8], - ) -> Checksum { - match opt { - UlpCsumOpt::Partial => todo!("implement partial csum"), - UlpCsumOpt::Full => { - let mut csum = self.pseudo_csum(); - csum.add_bytes(ulp_hdr); - csum.add_bytes(body); - csum - } - } - } - - #[inline] - pub fn emit(&self, dst: &mut [u8]) { - debug_assert_eq!(dst.len(), self.hdr_len()); - let base = &mut dst[0..Ipv6Hdr::BASE_SIZE]; - let mut pkt = Ipv6Packet::new_unchecked(base); - pkt.set_version(6); - // For now assume no traffic class or flow label. - pkt.set_traffic_class(0); - pkt.set_flow_label(0); - pkt.set_payload_len(self.pay_len); - pkt.set_next_header(self.next_hdr); - pkt.set_hop_limit(self.hop_limit); - pkt.set_src_addr(self.src.into()); - pkt.set_dst_addr(self.dst.into()); - - if let Some(ext_bytes) = self.ext { - dst[Ipv6Hdr::BASE_SIZE..] - .copy_from_slice(&ext_bytes[0..self.ext_len]); - } - } - - /// Return the length of the IPv6 header, including the base header and - /// extension headers. - pub fn hdr_len(&self) -> usize { - Ipv6Hdr::BASE_SIZE + self.ext_len - } - - /// Return the pseudo header bytes. - pub fn pseudo_bytes(&self, bytes: &mut [u8; 40]) { - bytes[0..16].copy_from_slice(&self.src.bytes()); - bytes[16..32].copy_from_slice(&self.dst.bytes()); - bytes[32..36].copy_from_slice(&((self.pay_len as u32).to_be_bytes())); - bytes[36..40].copy_from_slice(&[0u8, 0u8, 0u8, u8::from(self.proto)]); - } - - /// Return a [`Checksum`] of the pseudo header. - pub fn pseudo_csum(&self) -> Checksum { - let mut bytes = [0u8; 40]; - self.pseudo_bytes(&mut bytes); - Checksum::compute(&bytes) - } - - /// Return the total length of the packet, including the base header, any - /// extension headers, and the payload itself. - pub fn total_len(&self) -> u16 { - Ipv6Hdr::BASE_SIZE as u16 + self.pay_len - } -} - -impl<'a> From<&Ipv6Hdr<'a>> for Ipv6Meta { - fn from(ip6: &Ipv6Hdr) -> Self { - let (ext, ext_len) = if let Some((ext_bytes, _proto_off)) = &ip6.ext { - let ext_len = ext_bytes.len(); - assert!(ext_len <= 64); - let mut ext = [0; 64]; - ext[0..ext_len].copy_from_slice(ext_bytes); - (Some(ext), ext_len) - } else { - (None, 0) - }; - - Ipv6Meta { - src: ip6.src(), - dst: ip6.dst(), - proto: ip6.proto(), - next_hdr: ip6.next_hdr(), - hop_limit: ip6.hop_limit(), - pay_len: ip6.pay_len() as u16, - ext, - ext_len, - } - } -} - #[derive( Clone, Copy, Debug, Deserialize, Eq, Ord, PartialEq, PartialOrd, Serialize, )] @@ -198,19 +46,6 @@ pub struct Ipv6Push { pub proto: Protocol, } -impl PushAction for Ipv6Push { - fn push(&self) -> Ipv6Meta { - Ipv6Meta { - src: self.src, - dst: self.dst, - proto: self.proto, - // For now you cannot push extension headers. - next_hdr: IpProtocol::from(self.proto), - ..Default::default() - } - } -} - #[derive(Clone, Debug, Default, Deserialize, Serialize)] pub struct Ipv6Mod { pub src: Option, @@ -218,301 +53,6 @@ pub struct Ipv6Mod { pub proto: Option, } -impl ModifyAction for Ipv6Mod { - fn modify(&self, meta: &mut Ipv6Meta) { - if let Some(src) = self.src { - meta.src = src; - } - if let Some(dst) = self.dst { - meta.dst = dst; - } - if let Some(proto) = self.proto { - meta.proto = proto; - } - } -} - -/// An IPv6 packet header. -#[derive(Debug)] -pub struct Ipv6Hdr<'a> { - base: Ipv6Packet<&'a mut [u8]>, - // The proto reference points to the last next_header value (aka - // the upper-layer protocol number). - // proto: &'a mut u8, - /// Byteslice verified to be smaller than `IPV6_MAX_EXT_LEN`. - /// (extensions bytes, protocol field offset). - ext: Option<(&'a mut [u8], usize)>, -} - -impl<'a> Ipv6Hdr<'a> { - /// The size of the fixed IPv6 header. - /// - /// IPv6 headers are variable length, including a fixed, 40-byte portion as - /// well as a variable number of extension headers, each with potentially - /// different sizes. This size describes the fixed portion. - pub const BASE_SIZE: usize = 40; - - /// The offset of the Protocol (Next Header) field in the base header. - pub const BASE_HDR_PROTO_OFFSET: usize = 6; - - /// Return the destination address. - pub fn dst(&self) -> Ipv6Addr { - Ipv6Addr::from(self.base.dst_addr()) - } - - /// Return the length of the extensions headers, or 0 if there are - /// none. - fn ext_len(&self) -> usize { - match &self.ext { - None => 0, - Some((ext_bytes, _)) => ext_bytes.len(), - } - } - - /// Return the length of the header portion of the packet, including - /// extension headers - pub fn hdr_len(&self) -> usize { - Self::BASE_SIZE + self.ext_len() - } - - /// Return the hop limit value. - pub fn hop_limit(&self) -> u8 { - self.base.hop_limit() - } - - fn next_hdr(&self) -> IpProtocol { - self.base.next_header() - } - - /// Parse an IPv6 packet out of a reader, if possible. - pub fn parse<'b>( - rdr: &'b mut impl PacketReadMut<'a>, - ) -> Result { - // Parse the base IPv6 header. - let buf = rdr.slice_mut(Self::BASE_SIZE)?; - let base = Ipv6Packet::new_unchecked(buf); - match base.version() { - 6 => {} - vsn => return Err(Ipv6HdrError::BadVersion { vsn }), - } - - // Parse any extension headers. - // - // At this point, we don't need any information out of the headers other - // than their length (to determine the boundary with the ULP). We'll - // verify that the headers are supported, but otherwise maintain only a - // byte array with their contents. - let mut ext_len = 0; - let mut next_header = base.next_header(); - - // Either we have no extensions or we are parsing zero'd - // header data for the purpose of emitting. - if is_ulp_protocol(next_header) { - return Ok(Self { base, ext: None }); - } - - let mut proto_offset: usize = 0; - while !is_ulp_protocol(next_header) { - let n_bytes = match V6ExtClass::from(next_header) { - V6ExtClass::Rfc6564 => { - let buf = rdr.slice_mut(rdr.seg_left())?; - let mut header = Ipv6ExtHeader::new_checked(buf)?; - - // verify carried protocol if possible. - match next_header { - IpProtocol::HopByHop => { - _ = Ipv6HopByHopHeader::new_checked( - header.payload_mut(), - )? - } - IpProtocol::Ipv6Route => { - _ = Ipv6RoutingHeader::new_checked( - header.payload_mut(), - )? - } - _ => {} - } - - let n_bytes = 8 * (usize::from(header.header_len()) + 1); - next_header = header.next_header(); - let buf = header.into_inner(); - ext_len += n_bytes; - - // Put back any bytes in the segment not needed - // for this header. - rdr.seek_back(buf.len() - n_bytes)?; - - n_bytes - } - V6ExtClass::Frag => { - // This header's length is fixed. - // - // We'd like to use `size_of::()`, but - // that is not `repr(packed)`, so we'd possibly count - // padding. - const FRAGMENT_HDR_SIZE: usize = 8; - let buf = rdr.slice_mut(FRAGMENT_HDR_SIZE)?; - ext_len += buf.len(); - let mut header = Ipv6ExtHeader::new_checked(buf)?; - _ = Ipv6FragmentHeader::new_checked(header.payload_mut())?; - next_header = header.next_header(); - - FRAGMENT_HDR_SIZE - } - _ => { - return Err(Ipv6HdrError::UnexpectedNextHeader { - next_header: next_header.into(), - }); - } - }; - - if !is_ulp_protocol(next_header) { - proto_offset += n_bytes; - } - } - - // Panic: The protocol is the last value of next header, and since - // we've matched on everything we support in the `try_from` impl, this - // unwrap can't panic. - let _protocol = Protocol::from(next_header); - - if ext_len > IPV6_MAX_EXT_LEN { - return Err(Ipv6HdrError::ExtensionsTooLarge); - } - - // Seek back to the start of the extensions, then take a slice of - // all the options. - rdr.seek_back(ext_len)?; - let ext = Some((rdr.slice_mut(ext_len)?, proto_offset)); - Ok(Self { base, ext }) - } - - /// Return the payload length. - /// - /// This length includes any extension headers along with the - /// body. - pub fn pay_len(&self) -> usize { - usize::from(self.base.payload_len()) - } - - /// Return the Upper Layer Protocol in use. - /// - /// Even when extension headers are in play, this call always - /// returns the ULP. In other words, it always returns the final - /// "Next Header" value at the end of the extension header chain. - pub fn proto(&self) -> Protocol { - // Unwrap: We verified the proto is good upon parsing. - if let Some((bytes, proto_offset)) = &self.ext { - Protocol::from(bytes[*proto_offset]) - } else { - Protocol::from(self.base.next_header()) - } - } - - /// Populate `bytes` with the pseudo header bytes. - pub fn pseudo_bytes(&self, bytes: &mut [u8; 40]) { - bytes[0..16].copy_from_slice(self.base.src_addr().as_bytes()); - bytes[16..32].copy_from_slice(self.base.dst_addr().as_bytes()); - bytes[32..36].copy_from_slice(&(self.pay_len() as u32).to_be_bytes()); - bytes[36..40].copy_from_slice(&[0u8, 0u8, 0u8, u8::from(self.proto())]); - } - - /// Return a [`Checksum`] of the pseudo header. - pub fn pseudo_csum(&self) -> Checksum { - let mut pseudo_bytes = [0u8; 40]; - self.pseudo_bytes(&mut pseudo_bytes); - Checksum::compute(&pseudo_bytes) - } - - /// Set the total length of the packet. - /// - /// There is no "total length" for IPv6; it keeps a payload - /// length. However, this API is useful for having a consistent - /// method for setting lengths when emitting headers. - pub fn set_total_len(&mut self, len: u16) { - // The Payload Length field of the IPv6 header includes the ULP payload - // _and_ the length of any extension headers. - self.base.set_payload_len(len - Self::BASE_SIZE as u16); - } - - /// Return the source address. - pub fn src(&self) -> Ipv6Addr { - Ipv6Addr::from(self.base.src_addr()) - } - - /// Return the total length of the packet, including the base header, any - /// extension headers, and the payload itself. - pub fn total_len(&self) -> usize { - self.pay_len() + Self::BASE_SIZE - } - - /// Return the length of the upper-layer protocol payload. - pub fn ulp_len(&self) -> usize { - self.pay_len() - self.ext_len() - } -} - -fn is_ulp_protocol(proto: IpProtocol) -> bool { - matches!(V6ExtClass::from(proto), V6ExtClass::Ulp) -} - -#[derive(Copy, Clone, Debug, Eq, PartialEq)] -enum V6ExtClass { - Ulp, - Frag, - Rfc6564, - Unknown, -} - -impl From for V6ExtClass { - #[inline] - fn from(value: IpProtocol) -> Self { - use IpProtocol::*; - - match value { - Icmp | Igmp | Tcp | Udp | Icmpv6 => Self::Ulp, - Ipv6Frag => Self::Frag, - HopByHop | Ipv6Route | Ipv6Opts => Self::Rfc6564, - // Also follow RFC6564: - // 135 (RFC6275), 139 (RFC7401), 140 (RFC5533) - Unknown(x) if x == DDM_HEADER_ID => Self::Rfc6564, - _ => Self::Unknown, - } - } -} - -#[derive(Clone, Copy, Debug, Eq, PartialEq, DError)] -#[derror(leaf_data = Ipv6HdrError::derror_data)] -pub enum Ipv6HdrError { - BadVersion { vsn: u8 }, - ReadError(ReadErr), - UnexpectedNextHeader { next_header: u8 }, - Malformed, - ExtensionsTooLarge, -} - -impl Ipv6HdrError { - fn derror_data(&self, data: &mut [u64]) { - data[0] = match self { - Self::BadVersion { vsn } => *vsn as u64, - Self::UnexpectedNextHeader { next_header } => *next_header as u64, - _ => 0, - } - } -} - -impl From for Ipv6HdrError { - fn from(_error: smoltcp::wire::Error) -> Ipv6HdrError { - Ipv6HdrError::Malformed - } -} - -impl From for Ipv6HdrError { - fn from(error: ReadErr) -> Self { - Ipv6HdrError::ReadError(error) - } -} - #[cfg(test)] pub(crate) mod test { use super::*; diff --git a/lib/opte/src/engine/packet.rs b/lib/opte/src/engine/packet.rs index be0eaf4b..151cf60b 100644 --- a/lib/opte/src/engine/packet.rs +++ b/lib/opte/src/engine/packet.rs @@ -11,7 +11,6 @@ //! * Add hardware offload information to [`Packet`]. //! -use super::checksum::Checksum; use super::headers::IpAddr; use super::headers::AF_INET; use super::headers::AF_INET6; @@ -26,7 +25,6 @@ use core::hash::Hash; use core::ptr; use core::ptr::NonNull; use core::result; -use core::slice; use crc32fast::Hasher; use dyn_clone::DynClone; use serde::Deserialize; @@ -37,7 +35,6 @@ use alloc::string::String; use alloc::vec::Vec; use illumos_sys_hdrs::dblk_t; use illumos_sys_hdrs::mblk_t; -use illumos_sys_hdrs::uintptr_t; cfg_if! { if #[cfg(all(not(feature = "std"), not(test)))] { @@ -108,6 +105,8 @@ impl Default for InnerFlowId { } } +/// Tagged union of a source-dest IP address pair, used to avoid +/// duplicating the discriminator. #[derive( Clone, Copy, @@ -208,7 +207,7 @@ impl PacketChain { Self { inner: None } } - /// Convert an mblk_t packet chain into a safe source of `Packet`s. + /// Convert an mblk_t packet chain into a safe source of `MsgBlk`s. /// /// # Safety /// The `mp` pointer must point to an `mblk_t` allocated by @@ -230,40 +229,7 @@ impl PacketChain { /// Removes the next packet from the top of the chain and returns /// it, taking ownership. - pub fn pop_front(&mut self) -> Option> { - if let Some(ref mut list) = &mut self.inner { - unsafe { - let curr = list.head.as_ptr(); - let next = NonNull::new((*curr).b_next); - - // Break the forward link on the packet we have access to, - // and the backward link on the next element if possible. - if let Some(next) = next { - (*next.as_ptr()).b_prev = ptr::null_mut(); - } - (*curr).b_next = ptr::null_mut(); - - // Update the current head. If the next element is null, - // we're now empty. - if let Some(next) = next { - list.head = next; - } else { - self.inner = None; - } - - // Unwrap safety: We have already guaranteed that this - // ptr is NonNull in this case, and violating that is - // the only failure mode for wrap_mblk. - Some(Packet::wrap_mblk(curr).unwrap()) - } - } else { - None - } - } - - /// Removes the next packet from the top of the chain and returns - /// it, taking ownership. - pub fn pop_front2(&mut self) -> Option { + pub fn pop_front(&mut self) -> Option { if let Some(ref mut list) = &mut self.inner { unsafe { let curr_b = list.head; @@ -292,15 +258,15 @@ impl PacketChain { } } - /// Adds an owned `Packet` to the end of this chain. + /// Adds an owned `MsgBlk` to the end of this chain. /// - /// Internally, this unwraps the `Packet` back into an mblk_t, + /// Internally, this unwraps the `MsgBlk` back into an mblk_t, /// before placing it at the tail. - pub fn append(&mut self, packet: Packet) { + pub fn append(&mut self, packet: MsgBlk) { // Unwrap safety: a valid Packet implies a non-null mblk_t. // Jamming `NonNull` into PacketSeg/Packet might take some // work just to avoid this unwrap. - let pkt = NonNull::new(packet.unwrap_mblk()).unwrap(); + let pkt = packet.unwrap_mblk(); // We're guaranteeing today that a 'static Packet has // no neighbours and is not part of a chain. @@ -354,446 +320,8 @@ impl Drop for PacketChain { } } -/// A network packet. -/// -/// The [`Packet`] type presents an abstraction for manipulating -/// network packets in both a `std` and `no_std` environment. The -/// first is useful for writing tests against the OPTE core engine and -/// executing them in userland, without the need for standing up a -/// full-blown virtual machine. To the engine this [`Packet`] is -/// absolutely no different than if it was running in-kernel for a -/// real virtual machine. -/// -/// The `no_std` implementation is used when running in-kernel. The -/// main difference is the `mblk_t` and `dblk_t` structures are coming -/// from viona (outbound/Tx) and mac (inbound/Rx), and we consume them -/// via [`Packet::wrap_mblk()`]. In reality this is typically holding -/// an Ethernet _frame_, but we prefer to use the colloquial -/// nomenclature of "packet". -/// -/// A [`Packet`] is made up of one or more segments ([`PacketSeg`]). -/// Any given header is *always* contained in a single segment, i.e. a -/// header never straddles multiple segments. While it's preferable to -/// have all headers in the first segment, it *may* be the case that -/// the headers span multiple segments; but a *single* header type -/// (e.g. the IP header) will *never* straddle two segments. The -/// payload, however, *may* span multiple segments. -/// -/// # illumos terminology -/// -/// In illumos there is no real notion of an mblk "packet" or -/// "segment": a packet is just a linked list of `mblk_t` values. -/// The "packet" is simply a pointer to the first `mblk_t` in the -/// list, which also happens to be the first "segment", and any -/// further segments are linked via `b_cont`. In the illumos -/// kernel code you'll *sometimes* find variables named `mp_head` -/// to indicate that it points to a packet. -/// -/// There is also the notion of a "chain" of packets. This is -/// represented by a list of `mblk_t` structure as well, but instead -/// of using `b_cont` the individual packets are linked via the -/// `b_next` field. In the illumos kernel code this this is often -/// referred to with the variable name `mp_chain`, but sometimes also -/// `mp_head` (or just `mp`). It's a bit ambiguous, and something you -/// kind of figure out as you work in the code more. Though part of me -/// would like to create some rust-like "new type pattern" in C to -/// disambiguate packets from packet chains across APIs so the -/// compiler can detect when your API is working against the wrong -/// contract (for example a function that expects a single packet but -/// is being fed a packet chain). -/// -/// TODOx -/// -/// * Document the various type states, their purpose, their data, and -/// how the [`Packet`] generally transitions between them. -/// -/// * Somewhere we'll want to enforce and document a 2-byte prefix pad -/// to keep IP header alignment (the host expects this). -/// -#[derive(Debug)] -pub struct Packet { - avail: usize, - segs: Vec, - state: S, -} - -/// The type state of a packet that has been initialized and allocated, but -/// about which nothing else is known besides the length. -#[derive(Debug)] -pub struct Initialized { - // Total length of packet, in bytes. This is equal to the sum of - // the length of the _initialized_ window in all the segments - // (`b_wptr - b_rptr`). - len: usize, -} - -/// The offset and length of a header. -#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] -pub struct HdrOffset { - /// The header's offset from start of packet, in bytes. - pub pkt_pos: usize, - - /// The index of the segment the header lives in, starting at 0. - pub seg_idx: usize, - - /// The header's offset from the start of the segment, in bytes. - pub seg_pos: usize, - - /// The length of the header. - pub hdr_len: usize, -} - -impl HdrOffset { - fn new(rdr_offset: ReaderOffset, hdr_len: usize) -> Self { - // We always take the reader offset _after_ parsing, thus we - // need to adjust the positions based on the header length. - Self { - pkt_pos: rdr_offset.pkt_pos - hdr_len, - seg_idx: rdr_offset.seg_idx, - seg_pos: rdr_offset.seg_pos - hdr_len, - hdr_len, - } - } -} - -/// Bytes offsets for the outer headers. -/// -/// All outer headers are optional. -#[derive(Clone, Debug, Default)] -pub struct OuterHeaderOffsets { - pub ether: Option, - pub ip: Option, - pub encap: Option, -} - -/// Byte offsets for the inner headers. -/// -/// The inner headers must consist of at least an Ethernet header. -#[derive(Clone, Debug, Default)] -pub struct InnerHeaderOffsets { - pub ether: HdrOffset, - pub ip: Option, - pub ulp: Option, -} - -/// Byte offsets for all headers. -#[derive(Clone, Debug, Default)] -pub struct HeaderOffsets { - pub outer: OuterHeaderOffsets, - pub inner: InnerHeaderOffsets, -} - -pub struct HdrInfo { - pub meta: M, - pub offset: HdrOffset, -} - -pub struct PacketInfo { - // pub meta: PacketMeta, - pub offsets: HeaderOffsets, - // The body's checksum. It is up to the `NetworkImpl::Parser` on - // whether to populate this field or not. The reason for - // populating this field is to avoid duplicate work if the client - // has provided a ULP checksum. Rather than redoing the body - // checksum calculation, we can use incremental checksum - // techniques to stash the body's checksum for reuse when emitting - // the new headers. - // - // However, if the client does not provide a checksum, presumably - // because they are relying on checksum offload, this value should - // be `None`. In such case, `emit_headers()` will perform no ULP - // checksum update. - // - // This value may also be none if the packet has no notion of a - // ULP checksum; e.g., ARP. - pub body_csum: Option, - // Extra header space to avoid multiple allocations during encapsulation. - pub extra_hdr_space: Option, -} - -/// Body offset and length information. -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub struct BodyInfo { - pub pkt_offset: usize, - pub seg_index: usize, - pub seg_offset: usize, - pub len: usize, -} - -/// The type state of a parsed packet. -/// -/// The parsed type state represents that a packet has been -/// successfully parsed and contains all pertinent information derived -/// from parsing. -#[derive(Debug)] -pub struct Parsed { - len: usize, - // meta: PacketMeta, - flow: InnerFlowId, - hdr_offsets: HeaderOffsets, - body_csum: Option, - body: BodyInfo, - body_modified: bool, -} - pub trait PacketState {} -pub trait CanRead { - fn len(&self) -> usize; -} - -impl PacketState for Initialized {} -impl PacketState for Parsed {} - -impl CanRead for Initialized { - fn len(&self) -> usize { - self.len - } -} - -impl CanRead for Parsed { - fn len(&self) -> usize { - self.len - } -} - -impl Packet { - /// Return the amount of buffer space available to this packet. - pub fn avail(&self) -> usize { - self.avail - } - - /// Return the pointer address of the underlying mblk_t. - /// - /// NOTE: This is purely to allow passing the pointer value up to - /// DTrace so that the mblk can be inspected (read only) in probe - /// context. - pub fn mblk_addr(&self) -> uintptr_t { - self.segs[0].mp as uintptr_t - } - - /// Return the number of segments that make up this packet. - pub fn num_segs(&self) -> usize { - self.segs.len() - } - - /// Return the head of the underlying `mblk_t` segment chain and - /// consume `self`. The caller of this function now owns the - /// `mblk_t` segment chain. - pub fn unwrap_mblk(mut self) -> *mut mblk_t { - let mp_head = self.segs[0].mp; - // We need to make sure to NULL out the mp pointer or else - // `drop()` will `freemsg(9F)` even though ownership of the - // mblk has passed on to someone else. - self.segs[0].mp = ptr::null_mut(); - mp_head - } -} - -/// For the `no_std`/illumos kernel environment, we want the `mblk_t` -/// drop to occur at the [`Packet`] level, where we can make use of -/// `freemsg(9F)`. -impl Drop for Packet { - fn drop(&mut self) { - // Drop the segment chain if there is one. Consumers of Packet - // will never own a packet with no segments. Rather, this - // happens when a Packet transitions from one type-state to - // another, and the segments are passed onto the new Packet. - // This guarantees that we only free the segment chain once. - if !self.segs.is_empty() { - let head_mp = self.segs[0].mp; - drop(core::mem::take(&mut self.segs)); - cfg_if! { - if #[cfg(all(not(feature = "std"), not(test)))] { - // Safety: This is safe as long as the original - // `mblk_t` came from a call to `allocb(9F)` (or - // similar API). - unsafe { ddi::freemsg(head_mp) }; - } else { - mock_freemsg(head_mp); - } - } - } - } -} - -impl Packet { - /// Allocate a new [`Packet`] containing a data buffer of `size` - /// bytes. - /// - /// The returned packet consists of exactly one [`PacketSeg`]. - /// - /// In the kernel environment this uses `allocb(9F)` and - /// `freemsg(9F)` under the hood. - /// - /// In the `std` environment this uses a mock implementation of - /// `allocb(9F)` and `freeb(9F)`, which contains enough scaffolding - /// to satisfy OPTE's use of the underlying `mblk_t` and `dblk_t` - /// structures. - pub fn alloc(size: usize) -> Self { - let mp = allocb(size); - - // Safety: We know this is safe because we just built the `mp` - // in a safe manner. - let seg = unsafe { PacketSeg::wrap_mblk(mp) }; - Packet::new(seg) - } - - pub fn alloc_and_expand(size: usize) -> Self { - let mut seg = PacketSeg::alloc(size); - seg.expand_end(size).unwrap(); - Packet::new(seg) - } - - /// Create a [`Packet`] value from the passed in - /// `bytes`. - /// - /// The returned packet consists of exactly one [`PacketSeg`] with - /// enough space to hold `bytes.len()`. - pub fn copy(bytes: &[u8]) -> Self { - let mut pkt = Packet::alloc_and_expand(bytes.len()); - let mut wtr = pkt.seg0_wtr(); - // Unwrap: We know there cannot be an error because we - // allocate a packet large enough to hold all bytes. - wtr.write(bytes).unwrap(); - pkt.state.len = bytes.len(); - pkt - } - - pub fn get_rdr(&self) -> PacketReader { - PacketReader::new(&self.segs) - } - - pub fn get_rdr_mut(&mut self) -> PacketReaderMut { - PacketReaderMut::new(&mut self.segs) - } - - /// Create a new packet from `seg0`. - fn new(seg0: PacketSeg) -> Self { - let segs = vec![seg0]; - let len: usize = segs.iter().map(|s| s.len).sum(); - let avail: usize = segs.iter().map(|s| s.avail).sum(); - - Packet { avail, segs, state: Initialized { len } } - } - - #[cfg(test)] - fn new2(seg0: PacketSeg, seg1: PacketSeg) -> Self { - let segs = vec![seg0, seg1]; - let len: usize = segs.iter().map(|s| s.len).sum(); - let avail: usize = segs.iter().map(|s| s.avail).sum(); - - Packet { avail, segs, state: Initialized { len } } - } - - pub fn seg0_wtr(&mut self) -> PacketSegWriter { - self.segs[0].get_writer() - } - - pub fn seg_wtr(&mut self, i: usize) -> PacketSegWriter { - self.segs[i].get_writer() - } - - pub fn add_seg( - &mut self, - size: usize, - ) -> Result { - let mut seg = PacketSeg::alloc(size); - seg.expand_end(size)?; - let len = self.segs.len(); - if len > 0 { - let last_seg = &mut self.segs[len - 1]; - last_seg.link(&seg); - } - self.segs.push(seg); - self.state.len += size; - - Ok(self.seg_wtr(len)) - } - - /// Wrap the `mblk_t` packet in a [`Packet`], taking ownership of - /// the `mblk_t` packet as a result. An `mblk_t` packet consists - /// of one or more `mblk_t` segments chained together via - /// `b_cont`. As a result, this [`Packet`] may consist of *one or - /// more* [`PacketSeg`]s. When the [`Packet`] is dropped, the - /// underlying `mblk_t` segment chain is freed. If you wish to - /// pass on ownership you must call the [`Packet::unwrap_mblk()`] - /// function. - /// - /// # Safety - /// - /// The `mp` pointer must point to an `mblk_t` allocated by - /// `allocb(9F)` or provided by some kernel API which itself used - /// one of the DDI/DKI APIs to allocate it. - /// - /// # Errors - /// - /// * Return [`WrapError::NullPtr`] is `mp` is `NULL`. - pub unsafe fn wrap_mblk(mp: *mut mblk_t) -> Result { - if mp.is_null() { - return Err(WrapError::NullPtr); - } - - // Compute the number of `mblk_t`s in this segment chain. - // - // We are currently forced to take at least one memory allocation. - // That's because we're wrapping each `mblk_t` in a segment chain (the - // `b_cont` items) in a `PacketSeg`, and then storing all those in - // `self`. We previously had a statically-sized array here, of length 4, - // to avoid those allocs. However, that obviously assumes we never have - // chains of more than 4 elements, which we've now hit. - // - // We pass over the linked-list twice here: once to compute the length, - // so that we can allocate exactly once, and once to actually wrap - // everything. - let mut n_segments = 1; - let mut next_seg = (*mp).b_cont; - while !next_seg.is_null() { - n_segments += 1; - next_seg = (*next_seg).b_cont; - } - let mut segs = Vec::with_capacity(n_segments); - - // Restore `next_seg`, since we iterate over the list another time to - // actually wrap the `mblk_t`s. - let mut next_seg = (*mp).b_cont; - let mut len = 0; - let mut avail = 0; - let mut seg = PacketSeg::wrap_mblk(mp); - avail += seg.avail; - len += seg.len; - segs.push(seg); - - while !next_seg.is_null() { - let tmp = (*next_seg).b_cont; - seg = PacketSeg::wrap_mblk(next_seg); - avail += seg.avail; - len += seg.len; - segs.push(seg); - next_seg = tmp; - } - - Ok(Packet { avail, segs, state: Initialized { len } }) - } - - // /// A combination of [`Self::wrap_mblk()`] followed by [`Self::parse()`]. - // /// - // /// This is a bit more convenient than dealing with the possible - // /// error from each separately. - // /// - // /// # Safety - // /// - // /// See [`Self::wrap_mblk()`]. - // pub unsafe fn wrap_mblk_and_parse( - // mp: *mut mblk_t, - // dir: Direction, - // net: N, - // ) -> Result, PacketError> { - // let pkt = Self::wrap_mblk(mp)?; - // pkt.parse(dir, net).map_err(PacketError::from) - // } -} - /// A packet body transformation. /// /// A body transformation allows an action to modify zero, one, or @@ -835,286 +363,6 @@ impl From for BodyTransformError { } } -impl Packet { - pub fn body_csum(&self) -> Option { - self.state.body_csum - } - - pub fn body_info(&self) -> BodyInfo { - self.state.body - } - - pub fn body_offset(&self) -> usize { - self.state.body.pkt_offset - } - - /// Run the [`BodyTransform`] against this packet. - pub fn body_transform( - &mut self, - dir: Direction, - xform: &dyn BodyTransform, - ) -> Result<(), BodyTransformError> { - // We set the flag now with the assumption that the transform - // could fail after modifying part of the body. In the future - // we could have something more sophisticated that only sets - // the flag if at least one byte was modified, but for now - // this does the job as nothing that needs top performance - // should make use of body transformations. - self.state.body_modified = true; - - match self.body_segs_mut() { - Some(mut body_segs) => xform.run(dir, &mut body_segs), - - None => { - self.state.body_modified = false; - Err(BodyTransformError::NoPayload) - } - } - } - - pub fn body_seg(&self) -> usize { - self.state.body.seg_index - } - - /// Return a list of the body segments, or `None` if there is no - /// body. - pub fn body_segs(&self) -> Option> { - if self.state.body.len == 0 { - return None; - } - - let mut body_segs = vec![]; - let body_seg = self.state.body.seg_index; - - for (i, seg) in self.segs[body_seg..].iter().enumerate() { - if i == 0 { - // Panic: We are slicing with the parse data. If - // we parsed correctly, this should not panic. - body_segs.push( - seg.slice_unchecked(self.state.body.seg_offset, None), - ); - } else { - body_segs.push(seg.slice()); - } - } - - Some(body_segs) - } - - /// Return a list of mutable body segments, or `None` if there is - /// no body. - pub fn body_segs_mut(&mut self) -> Option> { - if self.state.body.len == 0 { - return None; - } - - let mut body_segs = vec![]; - let body_seg = self.state.body.seg_index; - - for (i, seg) in self.segs[body_seg..].iter_mut().enumerate() { - if i == 0 { - // Panic: We are slicing with the parse data. If - // we parsed correctly, this should not panic. - body_segs.push( - seg.slice_mut_unchecked(self.state.body.seg_offset, None), - ); - } else { - body_segs.push(seg.slice_mut()); - } - } - - Some(body_segs) - } - - pub fn hdr_offsets(&self) -> HeaderOffsets { - self.state.hdr_offsets.clone() - } - - /// Return a reference to the flow ID of this packet. - #[inline] - pub fn flow(&self) -> &InnerFlowId { - &self.state.flow - } - - pub fn get_body_rdr(&self) -> PacketReader { - let mut rdr = PacketReader::new(&self.segs); - // XXX While this works for now it might be nice to have a - // better mechanism for dealing with the body. For example, we - // know this seek() call can't fail, but the current - // abstraction isn't powerful enough to encode that in the - // type system. - rdr.seek(self.body_offset()).expect("failed to seek to body"); - rdr - } - - pub fn get_rdr(&self) -> PacketReader { - PacketReader::new(&self.segs) - } - - pub fn get_rdr_mut(&mut self) -> PacketReaderMut { - PacketReaderMut::new(&mut self.segs) - } - - #[inline] - pub fn is_tcp(&self) -> bool { - // self.state.meta.inner.is_tcp() - // TODO: about to gut anyhow. - false - } - - /// Return the mblk pointer value as a formatted String. This is - /// for debugging purposes. - pub fn mblk_ptr_str(&self) -> String { - format!("{:p}", self.segs[0].mp) - } - - // Determine if the new header fits in the existing first segment. - // If it does, then modify the mblk pointers to reflect the length - // of the new header. If it does not, then insert a new segment to - // the front. - fn hdr_seg( - segs: &mut Vec, - new_hdr_len: usize, - body: &mut BodyInfo, - ) { - let prefix_len = segs[0].prefix_len(); - // Determine the length of the original headers. This is - // equivalent to where the body starts. - let old_hdr_len = body.pkt_offset; - - #[allow(clippy::comparison_chain)] - if new_hdr_len > old_hdr_len { - if prefix_len + old_hdr_len >= new_hdr_len { - // In this case we can fix the new headers in the existing - // first segment. - let delta = new_hdr_len - old_hdr_len; - segs[0].expand_start(delta).unwrap(); - - // If the body starts in this first segment, then make - // sure to update its segment offset. - if body.seg_index == 0 { - body.seg_offset = new_hdr_len; - } - } else { - // In this case we need to "erase" the old headers and - // allocate an mblk to hold the new headers. - // - // This assumes that the headers all reside in the - // first segment. This is checked for in parsing and if the - // headers are not all in the first segment, the leading - // segments are squashed into one until this becomes true. - segs[0].shrink_start(old_hdr_len).unwrap(); - - // Create the new segment for holding the new headers. - let mut seg = unsafe { - let mp = allocb(new_hdr_len); - PacketSeg::wrap_mblk(mp) - }; - - // Make room to write the new headers. - seg.expand_end(new_hdr_len).unwrap(); - - // We shrunk the first segment to erase the old - // headers. If the body starts in this same segment, - // then we need to adjust its segment offset to - // reflect the fact that there is no header data - // before it. That is, since we know we are erasing - // the entirety of the original headers in the - // original first segment, we also know that the body - // must now start at segment offset 0. - // - // If the body **does not** start in the same segment - // as the original headers, then its offset does not - // change, because its segment is not adjusted. - if body.seg_index == 0 { - assert_eq!(body.seg_offset - old_hdr_len, 0); - body.seg_offset = 0; - } - if segs[0].len() > 0 { - seg.link(&segs[0]); - // TODO-performance: This may necessitate another allocation. We - // will want to measure how often we hit this branch, and the - // impact of the allocation. - segs.insert(0, seg); - } else { - // If we shrunk the segment to nothing, do not link a zero - // sized segment as a continuation block. This is not a - // generally expected thing and has caused NIC hardware to - // stop working. - if segs.len() > 1 { - seg.link(&segs[1]); - } - let mut zero_sized = core::mem::replace(&mut segs[0], seg); - zero_sized.unlink(); - zero_sized.free(); - } - - // We've added a segment to the front of the list; the - // body segment moves over by one. - body.seg_index += 1; - } - } else if new_hdr_len < old_hdr_len { - let delta = old_hdr_len - new_hdr_len; - segs[0].shrink_start(delta).unwrap(); - - // If the body starts in this first segment, then make - // sure to update its segment offset. - if body.seg_index == 0 { - body.seg_offset = new_hdr_len; - } - } - - unsafe { - assert!((*segs[0].mp).b_rptr >= (*segs[0].dblk).db_base); - assert!((*segs[0].mp).b_rptr <= (*segs[0].mp).b_wptr); - } - - // With regards to the overall packet, we know the body should - // start after the new headers. - body.pkt_offset = new_hdr_len; - } -} - -impl Packet { - /// Clone and return all bytes. This is used for testing. - pub fn all_bytes(&self) -> Vec { - let mut bytes = Vec::with_capacity(self.state.len()); - for seg in &self.segs { - let s = unsafe { slice::from_raw_parts((*seg.mp).b_rptr, seg.len) }; - bytes.extend_from_slice(s); - } - bytes - } - - /// Return the length of the packet. - /// - /// NOTE: This length only includes the _initialized_ bytes of the - /// packet. Each [`PacketSeg`] may contain _uninitialized_ bytes - /// at the head or tail (or both) of the segment. - /// - /// This is equivalent to the `msgsize(9F)` function in illumos. - pub fn len(&self) -> usize { - self.state.len() - } - - /// Return a byte slice of the bytes in `seg`. - pub fn seg_bytes(&self, seg: usize) -> &[u8] { - let seg = &self.segs[seg]; - // Safety: As long as the `mp` pointer is legit this is safe. - unsafe { slice::from_raw_parts((*seg.mp).b_rptr, seg.len) } - } -} - -/// A packet segment represents one or more (or all) bytes of a -/// [`Packet`]. -#[derive(Clone, Debug)] -pub struct PacketSeg { - mp: *mut mblk_t, - dblk: *mut dblk_t, - len: usize, - avail: usize, -} - #[derive(Clone, Copy, Debug)] pub enum SegAdjustError { /// Attempt to place the end of the writable/readable area of the @@ -1130,378 +378,18 @@ pub enum SegAdjustError { StartPastEnd, } -impl PacketSeg { - fn alloc(len: usize) -> Self { - // Safety: We know this is safe because we are literally - // passing in an mblk derived from `allocb(9F)`. - unsafe { PacketSeg::wrap_mblk(allocb(len)) } - } - - fn free(&mut self) { - cfg_if! { - if #[cfg(all(not(feature = "std"), not(test)))] { - unsafe { ddi::freemsg(self.mp) }; - } else { - mock_freemsg(self.mp); - } - } - } - - /// Return the bytes of the packet. - /// - /// This is useful for testing. - #[cfg(test)] - pub fn bytes(&self) -> &[u8] { - unsafe { slice::from_raw_parts((*self.mp).b_rptr, self.len) } - } - - /// Expand the writable/readable area by pushing `b_wptr` out by - /// len. - /// - /// # Errors - /// - /// `SegAdjustError::EndPastLimit`: Expanding by `len` would put the - /// `b_wptr` past the underlying buffer's limit (`db_lim`). - pub fn expand_end(&mut self, len: usize) -> Result<(), SegAdjustError> { - let wptr = unsafe { (*self.mp).b_wptr }; - let lim = unsafe { (*self.dblk).db_lim }; - let new_wptr = unsafe { wptr.add(len) }; - - if new_wptr > lim { - return Err(SegAdjustError::EndPastLimit); - } - - unsafe { - (*self.mp).b_wptr = new_wptr; - } - self.len = unsafe { - (*self.mp).b_wptr.offset_from((*self.mp).b_rptr) as usize - }; - Ok(()) - } - - /// Expand the writable/readable area by shifting `b_rptr` by len; - /// effectively adding bytes to the start of the packet. - /// - /// # Errors - /// - /// `SegAdjustError::StartBeforeBase`: Shift the read pointer left - /// by `len` bytes would place `b_rptr` before the underlying - /// buffer's base (`db_base`). - pub fn expand_start(&mut self, len: usize) -> Result<(), SegAdjustError> { - let rptr = unsafe { (*self.mp).b_rptr }; - let base = unsafe { (*self.dblk).db_base }; - let new_rptr = unsafe { rptr.sub(len) }; - - if new_rptr < base { - return Err(SegAdjustError::StartBeforeBase); - } - - unsafe { - (*self.mp).b_rptr = new_rptr; - } - self.len = unsafe { - (*self.mp).b_wptr.offset_from((*self.mp).b_rptr) as usize - }; - Ok(()) - } - - /// Shrink the writable/readable area by shifting the `b_rptr` by - /// `len`; effectively removing bytes from the start of the packet. - /// - /// # Errors - /// - /// `SegAdjustError::StartPastEnd`: Shifting the read pointer by - /// `len` would move `b_rptr` past `b_wptr`. - pub fn shrink_start(&mut self, len: usize) -> Result<(), SegAdjustError> { - let wptr = unsafe { (*self.mp).b_wptr }; - let rptr = unsafe { (*self.mp).b_rptr }; - let new_rptr = unsafe { rptr.add(len) }; - - if new_rptr > wptr { - return Err(SegAdjustError::StartPastEnd); - } - - unsafe { - (*self.mp).b_rptr = new_rptr; - } - self.len = unsafe { - (*self.mp).b_wptr.offset_from((*self.mp).b_rptr) as usize - }; - Ok(()) - } - - pub fn get_writer(&mut self) -> PacketSegWriter { - PacketSegWriter::new(self, 0, self.len).unwrap() - } - - pub fn len(&self) -> usize { - self.len - } - - fn link(&mut self, seg: &PacketSeg) { - unsafe { - // We should not be creating message block continuations to zero - // sized blocks. This is not a generally expected thing and has - // caused NIC hardware to stop working. Stopping short of a - // production panic, but this should fail any tests. - debug_assert!( - (*seg.mp).b_wptr != (*seg.mp).b_rptr, - "zero-length continuation", - ); - (*self.mp).b_cont = seg.mp - }; - } - - fn unlink(&mut self) { - unsafe { - (*self.mp).b_cont = ptr::null_mut(); - } - } - - // The amount of space available between the data buffer's base - // (`dblk_t.db_base`) and the packet's start (`mblk_t.b_rptr`). - fn prefix_len(&self) -> usize { - let prefix = - unsafe { (*self.mp).b_rptr.offset_from((*self.dblk).db_base) }; - assert!(prefix >= 0); - prefix as usize - } - - /// Get a slice of the entire segment. - fn slice(&self) -> &[u8] { - // Panic: We are using the segment's own data to take a slice - // of the entire segment. - self.slice_unchecked(0, None) - } - - /// Get a mutable slice of the entire segment. - fn slice_mut(&mut self) -> &mut [u8] { - // Panic: We are using the segment's own data to take a slice - // of the entire segment. - self.slice_mut_unchecked(0, None) - } - - /// Get a slice of the segment. - /// - /// The slice starts at `offset` and consists of `len` bytes. If - /// the length is `None`, then the slice extends to the end of the - /// segment. This includes only the part of the dblk which has - /// been written, i.e. the bytes from `mblk.b_rptr` to - /// `mblk.b_wptr`. - /// - /// # Safety - /// - /// It is up to the caller to ensure that `offset` and `offset + - /// len` reside within the segment boundaries. - /// - /// # Panic - /// - /// The slice formed by the `offset` and `offset + len` MUST be - /// within the bounds of the segment, otherwise panic. - fn slice_unchecked(&self, offset: usize, len: Option) -> &[u8] { - if offset > self.len { - panic!( - "offset is outside the bounds of the mblk: \ - offset: {} len: {} mblk: {:p}", - offset, self.len, self.mp - ); - } - - // Safety: This pointer was handed to us by the system. - let start = unsafe { (*self.mp).b_rptr.add(offset) }; - let len = len.unwrap_or(self.len - offset); - // Safety: If this end is outside the bound of the segment we - // panic below. - let end = unsafe { start.add(len) }; - // Safety: This pointer was handed to us by the system. - let b_wptr = unsafe { (*self.mp).b_wptr }; - assert!( - end <= b_wptr, - "slice past end of segment: offset: {} len: {} end: {:p} \ - mblk: {:p} b_wptr: {:p}", - offset, - len, - end, - self.mp, - b_wptr, - ); - - // Safety: We have verified that the slice is within the - // bounds of the segment. - unsafe { slice::from_raw_parts(start, len) } - } - - /// Get a mutable slice of the segment. - /// - /// The slice starts at `offset` and consists of `len` bytes. If - /// the length is `None`, then the slice extends to the end of the - /// segment. This includes only the part of the dblk which has - /// been written, i.e. the bytes from `mblk.b_rptr` to - /// `mblk.b_wptr`. - /// - /// # Panic - /// - /// The slice formed by the `offset` and `offset + len` MUST be - /// within the bounds of the segment, otherwise panic. - fn slice_mut_unchecked( - &mut self, - offset: usize, - len: Option, - ) -> &mut [u8] { - if offset > self.len { - panic!( - "offset is outside the bounds of the mblk: \ - offset: {} len: {} mblk: {:p}", - offset, self.len, self.mp - ); - } - - // Safety: This pointer was handed to us by the system. - let start = unsafe { (*self.mp).b_rptr.add(offset) }; - let len = len.unwrap_or(self.len - offset); - // Safety: If this end is outside the bound of the segment we - // panic below. - let end = unsafe { start.add(len) }; - // Safety: This pointer was handed to us by the system. - let b_wptr = unsafe { (*self.mp).b_wptr }; - assert!( - end <= b_wptr, - "slice past end of segment: offset: {} len: {} end: {:p} \ - mblk: {:p} b_wptr: {:p}", - offset, - len, - end, - self.mp, - b_wptr, - ); - - // Safety: We have verified that the slice is within the - // bounds of the segment. - unsafe { slice::from_raw_parts_mut(start, len) } - } - - // Wrap an existing `mblk_t`, taking ownership of it. - // - // # Safety - // - // The `mp` passed must be a non-NULL pointer to an `mblk_t` - // created by one of the `allocb(9F)` family of calls. - // - // After calling this function, the original mp pointer should - // not be dereferenced. - unsafe fn wrap_mblk(mp: *mut mblk_t) -> Self { - let dblk = (*mp).b_datap as *mut dblk_t; - let len = (*mp).b_wptr.offset_from((*mp).b_rptr) as usize; - let avail = (*dblk).db_lim.offset_from((*dblk).db_base) as usize; - PacketSeg { mp, dblk, avail, len } - } -} - -/// Modify the bytes of a packet segment. -/// -/// This type allows one to modify all or some of the bytes of a -/// [`PacketSeg`]. This is limited to the initialized bytes of the -/// segment, i.e., those that sit between `b_rptr` and `b_wptr`. -pub struct PacketSegWriter<'a> { - // Current position in the bytes slice. - pos: usize, - avail: usize, - bytes: &'a mut [u8], -} - #[derive(Clone, Copy, Debug)] pub enum ModifierCreateError { StartOutOfRange, EndOutOfRange, } -impl<'a> PacketSegWriter<'a> { - /// Create a new [`PacketSegWriter`], starting at `offset` from - /// `b_rptr`, and running for `len` bytes. - /// - /// The slice of bytes selected must be within `b_rptr` and `b_wptr`. - /// - /// # Errors - /// - /// `ModifierCreateError::StartOutOfRange`: The `offset` value has - /// gone beyond `b_wptr`. - /// - /// `ModifierCreateError::EndOutOfRange`: The `b_rptr + offset + - /// len` has gone beyond `b_wptr`. - fn new( - seg: &'a mut PacketSeg, - offset: usize, - len: usize, - ) -> Result { - let b_rptr = unsafe { (*seg.mp).b_rptr }; - let b_wptr = unsafe { (*seg.mp).b_wptr }; - let start = unsafe { b_rptr.add(offset) }; - - if start > b_wptr { - return Err(ModifierCreateError::StartOutOfRange); - } - - let end = unsafe { start.add(len) }; - - if end > b_wptr { - return Err(ModifierCreateError::EndOutOfRange); - } - - let bytes = unsafe { slice::from_raw_parts_mut(start, len) }; - - Ok(Self { pos: 0, bytes, avail: len }) - } - - pub fn slice_mut(&mut self, len: usize) -> Result<&mut [u8], WriteError> { - if len > self.avail { - return Err(WriteError::NotEnoughBytes { - available: self.avail, - needed: len, - }); - } - - let end = self.pos + len; - let slice = &mut self.bytes[self.pos..end]; - self.pos += len; - self.avail -= len; - Ok(slice) - } - - pub fn write(&mut self, src: &[u8]) -> Result<(), WriteError> { - debug_assert!(self.bytes[self.pos..].len() >= src.len()); - let len = src.len(); - if len > self.avail { - return Err(WriteError::NotEnoughBytes { - available: self.avail, - needed: len, - }); - } - - let end = self.pos + len; - self.bytes[self.pos..end].copy_from_slice(src); - self.pos += len; - self.avail -= len; - Ok(()) - } - - pub fn write_u8(&mut self, val: u8) -> Result<(), WriteError> { - self.write(&[val]) - } - - pub fn write_u16(&mut self, val: u16) -> Result<(), WriteError> { - self.write(&val.to_be_bytes()) - } - - pub fn write_u32(&mut self, val: u32) -> Result<(), WriteError> { - self.write(&val.to_be_bytes()) - } -} - #[derive(Clone, Copy, Debug, DError)] pub enum WrapError { /// We tried to wrap a NULL pointer. NullPtr, + /// We tried to wrap a packet chain as though it were a single mblk. + Chain, } /// Some functions may return multiple types of errors. @@ -1646,412 +534,6 @@ impl From for WriteError { pub type ReadResult = result::Result; pub type WriteResult = result::Result; -/// A trait for reading bytes from packets. -/// -/// All operations start from the current position and move it -/// forward, with the exception of `seek_back()`, which moves the -/// position backwards within the current segment. -pub trait PacketRead<'a> { - /// Copy all bytes from current position to the end of the packet - /// leaving the reader's internal state untouched. - fn copy_remaining(&self) -> Vec; - - /// Return the current position in the packet. - fn pos(&self) -> usize; - - /// Seek forwards from the current position by `amount`. The seek - /// may cross segment boundaries. - /// - /// # Errors - /// - /// If the seek would move beyond the end of the packet, then a - /// [`ReadErr::EndOfPacket`] is returned. - fn seek(&mut self, amount: usize) -> ReadResult<()>; - - /// Seek backwards from the current position by `amount`. - /// - /// # Errors - /// - /// If the seek would move beyond the beginning of the current - /// segment, then an error is returned. - fn seek_back(&mut self, amount: usize) -> ReadResult<()>; - - fn seg_left(&self) -> usize; - fn seg_idx(&self) -> usize; - fn seg_pos(&self) -> usize; - - /// Return the slice of `len` bytes starting from the current - /// position. - /// - /// The slice *must* exist entirely in a single packet segment -- - /// it can never straddle multiple segments. - /// - /// # Errors - /// - /// If `self` cannot satisfy this request a `ReadErr` is returned. - fn slice<'b>(&'b mut self, len: usize) -> ReadResult<&'a [u8]>; -} - -/// Append: Append to the end of the segment or packet, i.e. start at -/// `b_wptr`. -/// -/// Modify(offset): Modify bytes starting at `offset` from the -/// beginning of the segment or packet (`b_rptr`). The length of the -/// write must fit within the end of the current segment (`b_wptr`). -pub enum WritePos { - Append, - Modify(u16), -} - -#[derive(Debug)] -pub struct PacketReader<'a> { - pkt_segs: &'a [PacketSeg], - pkt_pos: usize, - seg_idx: usize, - seg_pos: usize, - seg_len: usize, -} - -impl<'a> PacketReader<'a> { - pub fn finish(self) -> (usize, usize, usize, bool) { - let end_of_seg = self.seg_pos == self.seg_len; - (self.pkt_pos, self.seg_idx, self.seg_pos, end_of_seg) - } - - pub fn new(pkt_segs: &'a [PacketSeg]) -> Self { - let seg_len = pkt_segs[0].len; - - PacketReader { pkt_segs, pkt_pos: 0, seg_idx: 0, seg_pos: 0, seg_len } - } - - pub fn pkt_pos(&self) -> usize { - self.pkt_pos - } -} - -impl<'a> PacketRead<'a> for PacketReader<'a> { - fn pos(&self) -> usize { - self.pkt_pos - } - - fn seek(&mut self, mut amount: usize) -> ReadResult<()> { - while self.seg_pos + amount > self.seg_len { - if self.seg_idx + 1 == self.pkt_segs.len() { - return Err(ReadErr::OutOfRange); - } - - self.seg_idx += 1; - amount -= self.seg_len - self.seg_pos; - self.pkt_pos += self.seg_len - self.seg_pos; - self.seg_len = self.pkt_segs[self.seg_idx].len; - self.seg_pos = 0; - } - - self.seg_pos += amount; - self.pkt_pos += amount; - Ok(()) - } - - /// Seek backwards by `offset`. - /// - /// NOTE: Currently we only allow seeking back to the beginning of - /// the current segment, which should be enough in all situations - /// this is needed (this API is in flux so no point putting in - /// work that isn't needed at the moment). - fn seek_back(&mut self, amount: usize) -> ReadResult<()> { - if amount > self.seg_pos { - return Err(ReadErr::NotEnoughBytes); - } - - self.seg_pos -= amount; - self.pkt_pos -= amount; - Ok(()) - } - - fn seg_left(&self) -> usize { - self.seg_len - self.seg_pos - } - - fn seg_idx(&self) -> usize { - self.seg_idx - } - - fn seg_pos(&self) -> usize { - self.seg_pos - } - - fn slice<'b>(&'b mut self, len: usize) -> ReadResult<&'a [u8]> { - let mut seg = &self.pkt_segs[self.seg_idx]; - - // If we've reached the end of the initialized bytes in this - // segment. - if self.seg_pos == seg.len { - // There are no more segments to be read. - if (self.seg_idx + 1) == self.pkt_segs.len() { - return Err(ReadErr::EndOfPacket); - } - - // Move onto next segment. - self.seg_idx += 1; - seg = &self.pkt_segs[self.seg_idx]; - self.seg_pos = 0; - self.seg_len = seg.len; - } - - if self.seg_pos + len > self.seg_len { - return Err(ReadErr::NotEnoughBytes); - } - - let ret = unsafe { - let start = (*seg.mp).b_rptr.add(self.seg_pos); - slice::from_raw_parts(start, len) - }; - - self.pkt_pos += len; - self.seg_pos += len; - Ok(ret) - } - - fn copy_remaining(&self) -> Vec { - let total_len: usize = self.pkt_segs.iter().map(|s| s.len).sum(); - let mut bytes = Vec::with_capacity(total_len - self.pkt_pos); - let mut seg_idx = self.seg_idx; - let mut seg_pos = self.seg_pos; - let mut seg_len = self.seg_len; - let mut seg = &self.pkt_segs[seg_idx]; - - loop { - let seg_slice = unsafe { - let start = (*seg.mp).b_rptr.add(seg_pos); - slice::from_raw_parts(start, seg_len - seg_pos) - }; - bytes.extend_from_slice(seg_slice); - - seg_idx += 1; - - if seg_idx >= self.pkt_segs.len() { - break; - } - - seg = &self.pkt_segs[seg_idx]; - seg_pos = 0; - seg_len = seg.len - } - - bytes - } -} - -/// A trait for getting mutable slices of bytes from packets. -/// -/// All operations start from the current position and move it -/// forward. -pub trait PacketReadMut<'a>: PacketRead<'a> { - /// Reutrn the current offset into the packet. - fn offset(&self) -> ReaderOffset; - - /// Return a mutable reference to a slice of `len` bytes starting - /// from the current position. - /// - /// The slice *must* exist entirely in a single packet segment -- - /// it can never straddle multiple segments. - /// - /// # Errors - /// - /// If `self` cannot satisfy this request a `ReadErr` is returned. - fn slice_mut<'b>(&'b mut self, len: usize) -> ReadResult<&'a mut [u8]>; -} - -#[derive(Debug)] -pub struct PacketReaderMut<'a> { - pkt_segs: &'a mut [PacketSeg], - pkt_pos: usize, - seg_idx: usize, - seg_pos: usize, - seg_len: usize, -} - -impl<'a> PacketReaderMut<'a> { - pub fn finish(self) -> (usize, usize, usize, bool) { - let end_of_seg = self.seg_pos == self.seg_len; - (self.pkt_pos, self.seg_idx, self.seg_pos, end_of_seg) - } - - pub fn new(pkt_segs: &'a mut [PacketSeg]) -> Self { - let seg_len = pkt_segs[0].len; - - PacketReaderMut { - pkt_segs, - pkt_pos: 0, - seg_idx: 0, - seg_pos: 0, - seg_len, - } - } -} - -#[derive(Clone, Copy, Debug)] -pub struct ReaderOffset { - pub pkt_pos: usize, - pub seg_idx: usize, - pub seg_pos: usize, -} - -impl<'a> PacketRead<'a> for PacketReaderMut<'a> { - fn pos(&self) -> usize { - self.pkt_pos - } - - fn seek(&mut self, mut amount: usize) -> ReadResult<()> { - while self.seg_pos + amount > self.seg_len { - if self.seg_idx + 1 == self.pkt_segs.len() { - return Err(ReadErr::OutOfRange); - } - - self.seg_idx += 1; - amount -= self.seg_len - self.seg_pos; - self.pkt_pos += self.seg_len - self.seg_pos; - self.seg_len = self.pkt_segs[self.seg_idx].len; - self.seg_pos = 0; - } - - self.seg_pos += amount; - self.pkt_pos += amount; - Ok(()) - } - - /// Seek backwards by `offset`. - /// - /// NOTE: Currently we only allow seeking back to the beginning of - /// the current segment, which should be enough in all situations - /// this is needed (this API is in flux so no point putting in - /// work that isn't needed at the moment). - fn seek_back(&mut self, amount: usize) -> ReadResult<()> { - if amount > self.seg_pos { - return Err(ReadErr::NotEnoughBytes); - } - - self.seg_pos -= amount; - self.pkt_pos -= amount; - Ok(()) - } - - fn seg_idx(&self) -> usize { - self.seg_idx - } - - fn seg_left(&self) -> usize { - self.seg_len - self.seg_pos - } - - fn seg_pos(&self) -> usize { - self.seg_pos - } - - fn slice<'b>(&'b mut self, len: usize) -> ReadResult<&'a [u8]> { - let mut seg = &self.pkt_segs[self.seg_idx]; - - // If we've reached the end of the initialized bytes in this - // segment. - if self.seg_pos == seg.len { - // There are no more segments to be read. - if (self.seg_idx + 1) == self.pkt_segs.len() { - return Err(ReadErr::EndOfPacket); - } - - // Move onto next segment. - self.seg_idx += 1; - seg = &self.pkt_segs[self.seg_idx]; - self.seg_pos = 0; - self.seg_len = seg.len; - } - - if self.seg_pos + len > self.seg_len { - return Err(ReadErr::NotEnoughBytes); - } - - let ret = unsafe { - let start = (*seg.mp).b_rptr.add(self.seg_pos); - slice::from_raw_parts(start, len) - }; - - self.pkt_pos += len; - self.seg_pos += len; - Ok(ret) - } - - fn copy_remaining(&self) -> Vec { - let total_len: usize = self.pkt_segs.iter().map(|s| s.len).sum(); - let mut bytes = Vec::with_capacity(total_len - self.pkt_pos); - let mut seg_idx = self.seg_idx; - let mut seg_pos = self.seg_pos; - let mut seg_len = self.seg_len; - let mut seg = &self.pkt_segs[seg_idx]; - - loop { - let seg_slice = unsafe { - let start = (*seg.mp).b_rptr.add(seg_pos); - slice::from_raw_parts(start, seg_len - seg_pos) - }; - bytes.extend_from_slice(seg_slice); - - seg_idx += 1; - - if seg_idx >= self.pkt_segs.len() { - break; - } - - seg = &self.pkt_segs[seg_idx]; - seg_pos = 0; - seg_len = seg.len - } - - bytes - } -} - -impl<'a> PacketReadMut<'a> for PacketReaderMut<'a> { - fn offset(&self) -> ReaderOffset { - ReaderOffset { - pkt_pos: self.pkt_pos, - seg_idx: self.seg_idx, - seg_pos: self.seg_pos, - } - } - - fn slice_mut<'b>(&'b mut self, len: usize) -> ReadResult<&'a mut [u8]> { - let mut seg = &self.pkt_segs[self.seg_idx]; - - // If we've reached the end of the initialized bytes in this - // segment. - if self.seg_pos == seg.len { - // There are no more segments to be read. - if (self.seg_idx + 1) == self.pkt_segs.len() { - return Err(ReadErr::EndOfPacket); - } - - // Move onto next segment. - self.seg_idx += 1; - seg = &self.pkt_segs[self.seg_idx]; - self.seg_pos = 0; - self.seg_len = seg.len; - } - - if self.seg_pos + len > self.seg_len { - return Err(ReadErr::NotEnoughBytes); - } - - let ret = unsafe { - let start = (*seg.mp).b_rptr.add(self.seg_pos); - slice::from_raw_parts_mut(start, len) - }; - - self.pkt_pos += len; - self.seg_pos += len; - Ok(ret) - } -} - /// The common entry into an `allocb(9F)` implementation that works in /// both std and `no_std` environments. /// diff --git a/xde/src/dls/mod.rs b/xde/src/dls/mod.rs index 46ad98ef..a3e2bdbd 100644 --- a/xde/src/dls/mod.rs +++ b/xde/src/dls/mod.rs @@ -22,7 +22,6 @@ use illumos_sys_hdrs::datalink_id_t; use illumos_sys_hdrs::uintptr_t; use illumos_sys_hdrs::ENOENT; use opte::engine::ingot_packet::MsgBlk; -use opte::engine::packet::Packet; use opte::engine::packet::PacketState; pub use sys::*; @@ -200,31 +199,6 @@ impl DlsStream { /// XXX The underlying mac_tx() function accepts a packet chain, /// but for now we pass only a single packet at a time. pub fn tx_drop_on_no_desc( - &self, - pkt: Packet, - hint: uintptr_t, - flags: MacTxFlags, - ) { - let Some(inner) = self.inner.as_ref() else { - // XXX: probably handle or signal an error here. - return; - }; - // We must unwrap the raw `mblk_t` out of the `pkt` here, - // otherwise the mblk_t would be dropped at the end of this - // function along with `pkt`. - let mut raw_flags = flags.bits(); - raw_flags |= MAC_DROP_ON_NO_DESC; - unsafe { - str_mdata_fastpath_put( - inner.dld_str.as_ptr(), - pkt.unwrap_mblk(), - hint, - raw_flags, - ) - }; - } - - pub fn tx_drop_on_no_desc2( &self, pkt: MsgBlk, hint: uintptr_t, @@ -243,7 +217,7 @@ impl DlsStream { // mac_tx(self.mch, pkt.unwrap_mblk(), hint, raw_flags, &mut ret_mp) str_mdata_fastpath_put( inner.dld_str.as_ptr(), - pkt.unwrap_mblk(), + pkt.unwrap_mblk().as_ptr(), hint, raw_flags, ) diff --git a/xde/src/mac/mod.rs b/xde/src/mac/mod.rs index 2faea330..568ba6bb 100644 --- a/xde/src/mac/mod.rs +++ b/xde/src/mac/mod.rs @@ -22,8 +22,6 @@ use core::ptr; use illumos_sys_hdrs::*; use opte::engine::ether::EtherAddr; use opte::engine::ingot_packet::MsgBlk; -use opte::engine::packet::Initialized; -use opte::engine::packet::Packet; use opte::engine::packet::PacketState; pub use sys::*; @@ -210,16 +208,22 @@ impl MacClientHandle { /// but for now we pass only a single packet at a time. pub fn tx( &self, - pkt: Packet, + pkt: MsgBlk, hint: uintptr_t, flags: MacTxFlags, - ) -> Option> { + ) -> Option { // We must unwrap the raw `mblk_t` out of the `pkt` here, // otherwise the mblk_t would be dropped at the end of this // function along with `pkt`. let mut ret_mp = ptr::null_mut(); unsafe { - mac_tx(self.mch, pkt.unwrap_mblk(), hint, flags.bits(), &mut ret_mp) + mac_tx( + self.mch, + pkt.unwrap_mblk().as_ptr(), + hint, + flags.bits(), + &mut ret_mp, + ) }; if !ret_mp.is_null() { // Unwrap: We know the ret_mp is valid because we gave @@ -230,7 +234,7 @@ impl MacClientHandle { // XXX Technically we are still only passing single // packets, but eventually we will pass packet chains and // the sentence above will hold. - Some(unsafe { Packet::wrap_mblk(ret_mp).unwrap() }) + Some(unsafe { MsgBlk::wrap_mblk(ret_mp).unwrap() }) } else { None } @@ -244,24 +248,6 @@ impl MacClientHandle { /// XXX The underlying mac_tx() function accepts a packet chain, /// but for now we pass only a single packet at a time. pub fn tx_drop_on_no_desc( - &self, - pkt: Packet, - hint: uintptr_t, - flags: MacTxFlags, - ) { - // We must unwrap the raw `mblk_t` out of the `pkt` here, - // otherwise the mblk_t would be dropped at the end of this - // function along with `pkt`. - let mut raw_flags = flags.bits(); - raw_flags |= MAC_DROP_ON_NO_DESC; - let mut ret_mp = ptr::null_mut(); - unsafe { - mac_tx(self.mch, pkt.unwrap_mblk(), hint, raw_flags, &mut ret_mp) - }; - debug_assert_eq!(ret_mp, ptr::null_mut()); - } - - pub fn tx_drop_on_no_desc2( &self, pkt: MsgBlk, hint: uintptr_t, @@ -274,7 +260,13 @@ impl MacClientHandle { raw_flags |= MAC_DROP_ON_NO_DESC; let mut ret_mp = ptr::null_mut(); unsafe { - mac_tx(self.mch, pkt.unwrap_mblk(), hint, raw_flags, &mut ret_mp) + mac_tx( + self.mch, + pkt.unwrap_mblk().as_ptr(), + hint, + raw_flags, + &mut ret_mp, + ) }; debug_assert_eq!(ret_mp, ptr::null_mut()); } diff --git a/xde/src/xde.rs b/xde/src/xde.rs index c283de68..e0472ef6 100644 --- a/xde/src/xde.rs +++ b/xde/src/xde.rs @@ -88,12 +88,9 @@ use opte::engine::ingot_packet::Parsed2; use opte::engine::ingot_packet::ParsedMblk; use opte::engine::ioctl::{self as api}; use opte::engine::ip6::Ipv6Addr; -use opte::engine::packet::Initialized; use opte::engine::packet::InnerFlowId; -use opte::engine::packet::Packet; use opte::engine::packet::PacketChain; use opte::engine::packet::PacketError; -use opte::engine::packet::Parsed; use opte::engine::port::meta::ActionMeta; use opte::engine::port::Port; use opte::engine::port::PortBuilder; @@ -1469,7 +1466,7 @@ fn guest_loopback<'a>( mac::mac_rx( dest_dev.mh, ptr::null_mut(), - pkt.unwrap_mblk(), + pkt.unwrap_mblk().as_ptr(), ) }; } @@ -1491,7 +1488,7 @@ fn guest_loopback<'a>( mac::mac_rx( dest_dev.mh, ptr::null_mut(), - pkt.unwrap_mblk(), + pkt.unwrap_mblk().as_ptr(), ) }; } @@ -1556,7 +1553,7 @@ unsafe extern "C" fn xde_mc_tx( // by the mch they're being targeted to. E.g., either build a list // of chains (u1, u2, port0, port1, ...), or hold tx until another // packet breaks the run targeting the same dest. - while let Some(pkt) = chain.pop_front2() { + while let Some(pkt) = chain.pop_front() { xde_mc_tx_one(src_dev, pkt); } @@ -1605,7 +1602,7 @@ unsafe fn xde_mc_tx_one(src_dev: &XdeDev, mut pkt: MsgBlk) -> *mut mblk_t { // // TODO Is there way to set mac_tx to must use result? drop(parsed_pkt); - stream.tx_drop_on_no_desc2(pkt, hint, MacTxFlags::empty()); + stream.tx_drop_on_no_desc(pkt, hint, MacTxFlags::empty()); return ptr::null_mut(); } @@ -1674,7 +1671,7 @@ unsafe fn xde_mc_tx_one(src_dev: &XdeDev, mut pkt: MsgBlk) -> *mut mblk_t { // Get a pointer to the beginning of the outer frame and // fill in the dst/src addresses before sending out the // device. - let mblk = out_pkt.unwrap_mblk(); + let mblk = out_pkt.unwrap_mblk().as_ptr(); let rptr = (*mblk).b_rptr; ptr::copy(dst.as_ptr(), rptr, 6); ptr::copy(src.as_ptr(), rptr.add(6), 6); @@ -1682,7 +1679,7 @@ unsafe fn xde_mc_tx_one(src_dev: &XdeDev, mut pkt: MsgBlk) -> *mut mblk_t { // unwrapped it above. let new_pkt = MsgBlk::wrap_mblk(mblk).unwrap(); - underlay_dev.stream.tx_drop_on_no_desc2( + underlay_dev.stream.tx_drop_on_no_desc( new_pkt, hint, MacTxFlags::empty(), @@ -1694,11 +1691,15 @@ unsafe fn xde_mc_tx_one(src_dev: &XdeDev, mut pkt: MsgBlk) -> *mut mblk_t { } Ok(ProcessResult::Hairpin(hpkt)) => { - mac::mac_rx(src_dev.mh, ptr::null_mut(), hpkt.unwrap_mblk()); + mac::mac_rx( + src_dev.mh, + ptr::null_mut(), + hpkt.unwrap_mblk().as_ptr(), + ); } Ok(ProcessResult::Bypass) => { - stream.tx_drop_on_no_desc2(pkt, hint, MacTxFlags::empty()); + stream.tx_drop_on_no_desc(pkt, hint, MacTxFlags::empty()); } Err(_) => {} @@ -1863,7 +1864,7 @@ unsafe extern "C" fn xde_rx( // by the mch they're being targeted to. E.g., either build a list // of chains (port0, port1, ...), or hold tx until another // packet breaks the run targeting the same dest. - while let Some(pkt) = chain.pop_front2() { + while let Some(pkt) = chain.pop_front() { xde_rx_one(&stream, mrh, pkt); } } @@ -1924,7 +1925,7 @@ unsafe fn xde_rx_one( // We are in passthrough mode, skip OPTE processing. if dev.passthrough { drop(parsed_pkt); - mac::mac_rx(dev.mh, mrh, pkt.unwrap_mblk()); + mac::mac_rx(dev.mh, mrh, pkt.unwrap_mblk().as_ptr()); return; } @@ -1934,15 +1935,15 @@ unsafe fn xde_rx_one( match res { Ok(ProcessResult::Bypass) => { - mac::mac_rx(dev.mh, mrh, pkt.unwrap_mblk()); + mac::mac_rx(dev.mh, mrh, pkt.unwrap_mblk().as_ptr()); } Ok(ProcessResult::Modified(mut emit_spec)) => { let npkt = emit_spec.apply(pkt); - mac::mac_rx(dev.mh, mrh, npkt.unwrap_mblk()); + mac::mac_rx(dev.mh, mrh, npkt.unwrap_mblk().as_ptr()); } Ok(ProcessResult::Hairpin(hppkt)) => { - stream.tx_drop_on_no_desc2(hppkt, 0, MacTxFlags::empty()); + stream.tx_drop_on_no_desc(hppkt, 0, MacTxFlags::empty()); } _ => {} } From 96153628bc0de34f864c1aac0bd9528b2d60630a Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Mon, 21 Oct 2024 16:34:07 -0700 Subject: [PATCH 059/115] Some warnings. More test migration to come. --- xde/src/dls/mod.rs | 1 - xde/src/lib.rs | 1 - xde/src/mac/mod.rs | 1 - xde/src/xde.rs | 51 ++++++++++------------------------------------ 4 files changed, 11 insertions(+), 43 deletions(-) diff --git a/xde/src/dls/mod.rs b/xde/src/dls/mod.rs index a3e2bdbd..c56c2637 100644 --- a/xde/src/dls/mod.rs +++ b/xde/src/dls/mod.rs @@ -22,7 +22,6 @@ use illumos_sys_hdrs::datalink_id_t; use illumos_sys_hdrs::uintptr_t; use illumos_sys_hdrs::ENOENT; use opte::engine::ingot_packet::MsgBlk; -use opte::engine::packet::PacketState; pub use sys::*; /// An integer ID used by DLS to refer to a given link. diff --git a/xde/src/lib.rs b/xde/src/lib.rs index 94360ff8..f562ecc3 100644 --- a/xde/src/lib.rs +++ b/xde/src/lib.rs @@ -6,7 +6,6 @@ // xde - A mac provider for OPTE-based network implementations. #![feature(extern_types)] -#![feature(panic_info_message)] #![no_std] #![allow(non_upper_case_globals)] // XXX We do not use double in the kernel. We should not allow diff --git a/xde/src/mac/mod.rs b/xde/src/mac/mod.rs index 568ba6bb..80198a44 100644 --- a/xde/src/mac/mod.rs +++ b/xde/src/mac/mod.rs @@ -22,7 +22,6 @@ use core::ptr; use illumos_sys_hdrs::*; use opte::engine::ether::EtherAddr; use opte::engine::ingot_packet::MsgBlk; -use opte::engine::packet::PacketState; pub use sys::*; /// Errors while opening a MAC handle. diff --git a/xde/src/xde.rs b/xde/src/xde.rs index e0472ef6..0cddc33f 100644 --- a/xde/src/xde.rs +++ b/xde/src/xde.rs @@ -35,31 +35,16 @@ use alloc::string::ToString; use alloc::sync::Arc; use alloc::vec::Vec; use core::ffi::CStr; -use core::hash::Hash; -use core::mem::MaybeUninit; use core::num::NonZeroU32; use core::ptr; use core::ptr::addr_of; use core::ptr::addr_of_mut; use core::time::Duration; -use crc32fast::Hasher; use illumos_sys_hdrs::*; -use ingot::geneve::GeneveFlags; -use ingot::geneve::GeneveMut; use ingot::geneve::GeneveRef; -use ingot::geneve::ValidGeneve; -use ingot::ip::IpProtocol; -use ingot::ip::Ipv6Mut; -use ingot::ip::ValidIpv6; -use ingot::types::Emit; -use ingot::types::Header; -use ingot::types::HeaderParse; -use ingot::udp::UdpMut; -use ingot::udp::ValidUdp; use opte::api::ClearXdeUnderlayReq; use opte::api::CmdOk; use opte::api::Direction; -use opte::api::MacAddr; use opte::api::NoResp; use opte::api::OpteCmd; use opte::api::OpteCmdIoctl; @@ -75,23 +60,15 @@ use opte::ddi::sync::KRwLockType; use opte::ddi::time::Interval; use opte::ddi::time::Periodic; use opte::engine::geneve::Vni; -use opte::engine::headers::EncapMeta; -use opte::engine::headers::EncapPush; use opte::engine::headers::IpAddr; -use opte::engine::headers::IpPush; -use opte::engine::ingot_base::EthernetMut; use opte::engine::ingot_base::EthernetRef; -use opte::engine::ingot_base::ValidEthernet; use opte::engine::ingot_packet::MsgBlk; use opte::engine::ingot_packet::Packet2; -use opte::engine::ingot_packet::Parsed2; -use opte::engine::ingot_packet::ParsedMblk; use opte::engine::ioctl::{self as api}; use opte::engine::ip6::Ipv6Addr; use opte::engine::packet::InnerFlowId; use opte::engine::packet::PacketChain; use opte::engine::packet::PacketError; -use opte::engine::port::meta::ActionMeta; use opte::engine::port::Port; use opte::engine::port::PortBuilder; use opte::engine::port::ProcessResult; @@ -1299,19 +1276,17 @@ static mut xde_devops: dev_ops = dev_ops { // Safety: Yes, this is a mutable static. No, there is no race as // it's mutated only during `_init()`. Yes, it needs to be mutable // to allow `dld_init_ops()` to set `cb_str`. - devo_cb_ops: unsafe { addr_of!(xde_cb_ops) }, + devo_cb_ops: addr_of!(xde_cb_ops), devo_bus_ops: 0 as *const bus_ops, devo_power: nodev_power, devo_quiesce: ddi_quiesce_not_needed, }; #[no_mangle] -static xde_modldrv: modldrv = unsafe { - modldrv { - drv_modops: addr_of!(mod_driverops), - drv_linkinfo: XDE_STR, - drv_dev_ops: addr_of!(xde_devops), - } +static xde_modldrv: modldrv = modldrv { + drv_modops: addr_of!(mod_driverops), + drv_linkinfo: XDE_STR, + drv_dev_ops: addr_of!(xde_devops), }; #[no_mangle] @@ -1436,7 +1411,7 @@ fn guest_loopback<'a>( // TODO: Rework currently requires a reparse on loopback to account for UFT fastpath. - let mut parsed_pkt = match parsed_pkt.parse_inbound(VpcParser {}) { + let parsed_pkt = match parsed_pkt.parse_inbound(VpcParser {}) { Ok(pkt) => pkt, Err(e) => { opte::engine::dbg!("Loopback bad packet: {:?}", e); @@ -1460,7 +1435,7 @@ fn guest_loopback<'a>( // the packet into the inbound processing path of the // destination Port. match dest_dev.port.process(In, parsed_pkt) { - Ok(ProcessResult::Modified(mut emit_spec)) => { + Ok(ProcessResult::Modified(emit_spec)) => { let pkt = emit_spec.apply(pkt); unsafe { mac::mac_rx( @@ -1562,13 +1537,10 @@ unsafe extern "C" fn xde_mc_tx( #[inline] unsafe fn xde_mc_tx_one(src_dev: &XdeDev, mut pkt: MsgBlk) -> *mut mblk_t { - let mblk_addr = pkt.mblk_addr(); - let pkt_len_old = pkt.byte_len(); - let parser = src_dev.port.network().parser(); let mblk_addr = pkt.mblk_addr(); let parsed_pkt = Packet2::new(pkt.iter_mut()); - let mut parsed_pkt = match parsed_pkt.parse_outbound(parser) { + let parsed_pkt = match parsed_pkt.parse_outbound(parser) { Ok(pkt) => pkt, Err(e) => { // TODO Add bad packet stat. @@ -1614,7 +1586,7 @@ unsafe fn xde_mc_tx_one(src_dev: &XdeDev, mut pkt: MsgBlk) -> *mut mblk_t { let res = port.process(Direction::Out, parsed_pkt); match res { - Ok(ProcessResult::Modified(mut emit_spec)) => { + Ok(ProcessResult::Modified(emit_spec)) => { // If the outer IPv6 destination is the same as the // source, then we need to loop the packet inbound to the // guest on this same host. @@ -1876,14 +1848,13 @@ unsafe fn xde_rx_one( mut pkt: MsgBlk, ) { let mblk_addr = pkt.mblk_addr(); - let pkt_len_old = pkt.byte_len(); let parsed_pkt = Packet2::new(pkt.iter_mut()); // We must first parse the packet in order to determine where it // is to be delivered. let parser = VpcParser {}; // let mblk_addr = parsed_pkt.mblk_addr(); - let mut parsed_pkt = match parsed_pkt.parse_inbound(parser) { + let parsed_pkt = match parsed_pkt.parse_inbound(parser) { Ok(pkt) => pkt, Err(e) => { // TODO Add bad packet stat. @@ -1937,7 +1908,7 @@ unsafe fn xde_rx_one( Ok(ProcessResult::Bypass) => { mac::mac_rx(dev.mh, mrh, pkt.unwrap_mblk().as_ptr()); } - Ok(ProcessResult::Modified(mut emit_spec)) => { + Ok(ProcessResult::Modified(emit_spec)) => { let npkt = emit_spec.apply(pkt); mac::mac_rx(dev.mh, mrh, npkt.unwrap_mblk().as_ptr()); From 48f19f2bc993599ea0d3e998772c428e38011194 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Mon, 21 Oct 2024 22:14:22 -0700 Subject: [PATCH 060/115] Cleaning up names --- lib/opte-test-utils/src/lib.rs | 7 +- lib/opte/src/engine/dhcp.rs | 4 +- lib/opte/src/engine/dhcpv6/protocol.rs | 8 +- lib/opte/src/engine/icmp/v4.rs | 4 +- lib/opte/src/engine/icmp/v6.rs | 8 +- lib/opte/src/engine/ingot_packet.rs | 226 +++++++++++------------ lib/opte/src/engine/layer.rs | 16 +- lib/opte/src/engine/mod.rs | 4 +- lib/opte/src/engine/nat.rs | 6 +- lib/opte/src/engine/packet.rs | 15 +- lib/opte/src/engine/port.rs | 44 ++--- lib/opte/src/engine/predicate.rs | 6 +- lib/opte/src/engine/rule.rs | 20 +- lib/opte/src/engine/snat.rs | 6 +- lib/oxide-vpc/src/engine/gateway/mod.rs | 4 +- lib/oxide-vpc/src/engine/mod.rs | 6 +- lib/oxide-vpc/src/engine/overlay.rs | 6 +- lib/oxide-vpc/tests/integration_tests.rs | 6 +- xde/src/xde.rs | 2 +- 19 files changed, 194 insertions(+), 204 deletions(-) diff --git a/lib/opte-test-utils/src/lib.rs b/lib/opte-test-utils/src/lib.rs index 8c8e607e..f9d52bd9 100644 --- a/lib/opte-test-utils/src/lib.rs +++ b/lib/opte-test-utils/src/lib.rs @@ -32,7 +32,7 @@ use opte::engine::ingot_base::Ethernet; use opte::engine::ingot_base::Ipv4; use opte::engine::ingot_base::Ipv6; use opte::engine::ingot_base::L3Repr; -use opte::engine::ingot_packet::LightParsedMblk; +use opte::engine::ingot_packet::MblkLiteParsed; use opte::engine::ingot_packet::MsgBlk; use opte::engine::ingot_packet::Packet2; pub use opte::engine::ip4::Ipv4Addr; @@ -111,8 +111,7 @@ macro_rules! expect_modified { pub fn parse_inbound( pkt: &mut MsgBlk, parser: NP, -) -> Result>>, ParseError> -{ +) -> Result>>, ParseError> { let pkt = Packet2::new(pkt.iter_mut()); pkt.parse_inbound(parser) } @@ -120,7 +119,7 @@ pub fn parse_inbound( pub fn parse_outbound( pkt: &mut MsgBlk, parser: NP, -) -> Result>>, ParseError> +) -> Result>>, ParseError> { let pkt = Packet2::new(pkt.iter_mut()); pkt.parse_outbound(parser) diff --git a/lib/opte/src/engine/dhcp.rs b/lib/opte/src/engine/dhcp.rs index 406008fb..95c6e907 100644 --- a/lib/opte/src/engine/dhcp.rs +++ b/lib/opte/src/engine/dhcp.rs @@ -8,8 +8,8 @@ use super::ingot_base::Ethernet; use super::ingot_base::Ipv4; +use super::ingot_packet::MblkPacketData; use super::ingot_packet::MsgBlk; -use super::ingot_packet::PacketHeaders2; use super::ip4::Ipv4Addr; use super::ip4::Protocol; use super::predicate::DataPredicate; @@ -480,7 +480,7 @@ impl HairpinAction for DhcpAction { (hdr_preds, data_preds) } - fn gen_packet(&self, meta: &PacketHeaders2) -> GenPacketResult { + fn gen_packet(&self, meta: &MblkPacketData) -> GenPacketResult { // TODO: fold reader access into PacketHeaders2 let body = meta.copy_remaining(); let client_pkt = DhcpPacket::new_checked(&body)?; diff --git a/lib/opte/src/engine/dhcpv6/protocol.rs b/lib/opte/src/engine/dhcpv6/protocol.rs index 322fabf6..845fc6bc 100644 --- a/lib/opte/src/engine/dhcpv6/protocol.rs +++ b/lib/opte/src/engine/dhcpv6/protocol.rs @@ -24,8 +24,8 @@ use crate::engine::dhcpv6::SERVER_PORT; use crate::engine::ingot_base::Ethernet; use crate::engine::ingot_base::Ipv6; use crate::engine::ingot_base::Ipv6Ref; +use crate::engine::ingot_packet::MblkPacketData; use crate::engine::ingot_packet::MsgBlk; -use crate::engine::ingot_packet::PacketHeaders2; use crate::engine::predicate::DataPredicate; use crate::engine::predicate::EtherAddrMatch; use crate::engine::predicate::IpProtoMatch; @@ -585,7 +585,7 @@ fn process_confirm_message<'a>( // Process a DHCPv6 message from the a client. fn process_client_message<'a>( action: &'a Dhcpv6Action, - _meta: &'a PacketHeaders2, + _meta: &'a MblkPacketData, client_msg: &'a Message<'a>, ) -> Option> { match client_msg.typ { @@ -607,7 +607,7 @@ fn process_client_message<'a>( // the request and the actual DHCPv6 message to send out. fn generate_packet<'a>( action: &Dhcpv6Action, - meta: &PacketHeaders2, + meta: &MblkPacketData, msg: &'a Message<'a>, ) -> GenPacketResult { let udp = Udp { @@ -665,7 +665,7 @@ impl HairpinAction for Dhcpv6Action { // Rather than put this logic into DataPredicates, we just parse the packet // here and reply accordingly. So the `Dhcpv6Action` is really a full // server, to the extent we emulate one. - fn gen_packet(&self, meta: &PacketHeaders2) -> GenPacketResult { + fn gen_packet(&self, meta: &MblkPacketData) -> GenPacketResult { let body = meta.copy_remaining(); if let Some(client_msg) = Message::from_bytes(&body) { if let Some(reply) = process_client_message(self, meta, &client_msg) diff --git a/lib/opte/src/engine/icmp/v4.rs b/lib/opte/src/engine/icmp/v4.rs index ca6edc64..57d86bbc 100644 --- a/lib/opte/src/engine/icmp/v4.rs +++ b/lib/opte/src/engine/icmp/v4.rs @@ -10,8 +10,8 @@ use super::*; use crate::engine::ingot_base::Ethernet; use crate::engine::ingot_base::Ipv4; use crate::engine::ingot_base::L3; +use crate::engine::ingot_packet::MblkPacketData; use crate::engine::ingot_packet::MsgBlk; -use crate::engine::ingot_packet::PacketHeaders2; use crate::engine::predicate::Ipv4AddrMatch; use ingot::ethernet::Ethertype; use ingot::ip::IpProtocol; @@ -47,7 +47,7 @@ impl HairpinAction for IcmpEchoReply { (hdr_preds, data_preds) } - fn gen_packet(&self, meta: &PacketHeaders2) -> GenPacketResult { + fn gen_packet(&self, meta: &MblkPacketData) -> GenPacketResult { let Some(icmp) = meta.inner_icmp() else { // Getting here implies the predicate matched, but that the // extracted metadata indicates this isn't an ICMP packet. That diff --git a/lib/opte/src/engine/icmp/v6.rs b/lib/opte/src/engine/icmp/v6.rs index e573eade..ef87bad1 100644 --- a/lib/opte/src/engine/icmp/v6.rs +++ b/lib/opte/src/engine/icmp/v6.rs @@ -10,8 +10,8 @@ use super::*; use crate::engine::ingot_base::Ethernet; use crate::engine::ingot_base::Ipv6; use crate::engine::ingot_base::Ipv6Ref; +use crate::engine::ingot_packet::MblkPacketData; use crate::engine::ingot_packet::MsgBlk; -use crate::engine::ingot_packet::PacketHeaders2; use crate::engine::predicate::Ipv6AddrMatch; use alloc::string::String; use ingot::ethernet::Ethertype; @@ -101,7 +101,7 @@ impl HairpinAction for Icmpv6EchoReply { (hdr_preds, data_preds) } - fn gen_packet(&self, meta: &PacketHeaders2) -> GenPacketResult { + fn gen_packet(&self, meta: &MblkPacketData) -> GenPacketResult { // TODO: fold reader access into PacketHeaders2 let Some(icmp6) = meta.inner_icmp6() else { // Getting here implies the predicate matched, but that the @@ -229,7 +229,7 @@ impl HairpinAction for RouterAdvertisement { (hdr_preds, data_preds) } - fn gen_packet(&self, meta: &PacketHeaders2) -> GenPacketResult { + fn gen_packet(&self, meta: &MblkPacketData) -> GenPacketResult { // TODO: fold reader access into PacketHeaders2 use smoltcp::time::Duration; use smoltcp::wire::NdiscRouterFlags; @@ -552,7 +552,7 @@ impl HairpinAction for NeighborAdvertisement { (hdr_preds, data_preds) } - fn gen_packet(&self, meta: &PacketHeaders2) -> GenPacketResult { + fn gen_packet(&self, meta: &MblkPacketData) -> GenPacketResult { let Some(icmp6) = meta.inner_icmp6() else { // Getting here implies the predicate matched, but that the // extracted metadata indicates this isn't an ICMPv6 packet. That diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index 1f26602e..29ddecd3 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -22,6 +22,7 @@ use super::headers::IpMod; use super::headers::IpPush; use super::headers::PushAction; use super::headers::UlpMetaModify; +use super::icmp::IcmpEchoMut; use super::icmp::IcmpEchoRef; use super::icmp::QueryEcho; use super::icmp::ValidIcmpEcho; @@ -525,7 +526,21 @@ impl LightweightMeta for ValidGeneveOverV6 { } } -// --- REWRITE IN PROGRESS --- +/// An individual illumos `mblk_t` -- a single bytestream +/// comprised of a linked list of data segments. +/// +/// To facilitate testing the OPTE core, [`MsgBlk`] is an abstraction for +/// manipulating network packets in both a `std` and `no_std` environment. +/// The first is useful for writing tests against the OPTE core engine and +/// executing them in userland, without the need for standing up a full-blown +/// virtual machine. +/// +/// The `no_std` implementation is used when running in-kernel. The +/// main difference is the `mblk_t` and `dblk_t` structures are coming +/// from viona (outbound/Tx) and mac (inbound/Rx), and we consume them +/// via [`Packet::wrap_mblk()`]. In reality this is typically holding +/// an Ethernet _frame_, but we prefer to use the colloquial +/// nomenclature of "packet". #[derive(Debug)] pub struct MsgBlk { pub inner: NonNull, @@ -1346,7 +1361,7 @@ impl PktBodyWalker { } } -pub struct PacketHeaders { +pub struct PacketData { pub(crate) headers: OpteMeta, initial_lens: Option>, body: PktBodyWalker, @@ -1366,7 +1381,7 @@ impl From> for OpteMeta { } } -impl core::fmt::Debug for PacketHeaders { +impl core::fmt::Debug for PacketData { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { f.write_str("PacketHeaders(..)") } @@ -1388,7 +1403,7 @@ pub fn ulp_dst_port(pkt: &Ulp) -> Option { } } -impl PacketHeaders { +impl PacketData { pub fn initial_lens(&self) -> Option<&OpteUnifiedLengths> { self.initial_lens.as_ref().map(|v| &**v) } @@ -1614,9 +1629,9 @@ fn pseudo_port_v(chunk: &ValidUlp) -> Option { } } -impl From<&PacketHeaders> for InnerFlowId { +impl From<&PacketData> for InnerFlowId { #[inline] - fn from(meta: &PacketHeaders) -> Self { + fn from(meta: &PacketData) -> Self { let (proto, addrs) = match meta.inner_l3() { Some(L3::Ipv4(pkt)) => ( pkt.protocol().0, @@ -1649,28 +1664,12 @@ impl From<&PacketHeaders> for InnerFlowId { /// A network packet. /// -/// The [`Packet`] type presents an abstraction for manipulating -/// network packets in both a `std` and `no_std` environment. The -/// first is useful for writing tests against the OPTE core engine and -/// executing them in userland, without the need for standing up a -/// full-blown virtual machine. To the engine this [`Packet`] is -/// absolutely no different than if it was running in-kernel for a -/// real virtual machine. -/// -/// The `no_std` implementation is used when running in-kernel. The -/// main difference is the `mblk_t` and `dblk_t` structures are coming -/// from viona (outbound/Tx) and mac (inbound/Rx), and we consume them -/// via [`Packet::wrap_mblk()`]. In reality this is typically holding -/// an Ethernet _frame_, but we prefer to use the colloquial -/// nomenclature of "packet". -/// -/// A [`Packet`] is made up of one or more segments ([`PacketSeg`]). -/// Any given header is *always* contained in a single segment, i.e. a -/// header never straddles multiple segments. While it's preferable to -/// have all headers in the first segment, it *may* be the case that -/// the headers span multiple segments; but a *single* header type -/// (e.g. the IP header) will *never* straddle two segments. The -/// payload, however, *may* span multiple segments. +/// A packet is made up of one or more segments. Any given header is +/// *always* contained in a single segment, i.e. a header never straddles +/// multiple segments. While it's preferable to have all headers in the +/// first segment, it *may* be the case that the headers span multiple +/// segments; but a *single* header type (e.g. the IP header) will *never* +/// straddle two segments. The payload, however, *may* span multiple segments. /// /// # illumos terminology /// @@ -1694,15 +1693,12 @@ impl From<&PacketHeaders> for InnerFlowId { /// compiler can detect when your API is working against the wrong /// contract (for example a function that expects a single packet but /// is being fed a packet chain). -/// -/// TODOx -/// -/// * Document the various type states, their purpose, their data, and -/// how the [`Packet`] generally transitions between them. -/// -/// * Somewhere we'll want to enforce and document a 2-byte prefix pad -/// to keep IP header alignment (the host expects this). -/// +// +// TODO: In theory, this can be any `Read` type giving us `&mut [u8]`s, +// but in practice we are internally reliant on returning `MsgBlk`s in +// hairpin actions and the like. Fighting the battle of making this generic +// is a bridge too far for the `ingot` datapath rewrite. This might have +// value in future. #[derive(Debug)] pub struct Packet2 { state: S, @@ -1733,12 +1729,11 @@ where pub fn parse_inbound( self, net: NP, - ) -> Result>>, ParseError> - { + ) -> Result>>, ParseError> { let Packet2 { state: Initialized2 { len, inner } } = self; Ok(Packet2 { - state: ParsedStage1 { meta: net.parse_inbound(inner)?, len }, + state: LiteParsed { meta: net.parse_inbound(inner)?, len }, }) } @@ -1746,23 +1741,22 @@ where pub fn parse_outbound( self, net: NP, - ) -> Result>>, ParseError> - { + ) -> Result>>, ParseError> { let Packet2 { state: Initialized2 { len, inner } } = self; Ok(Packet2 { - state: ParsedStage1 { meta: net.parse_outbound(inner)?, len }, + state: LiteParsed { meta: net.parse_outbound(inner)?, len }, }) } } -impl<'a, T: Read + 'a, M: LightweightMeta> Packet2> +impl<'a, T: Read + 'a, M: LightweightMeta> Packet2> where T::Chunk: ingot::types::IntoBufPointer<'a>, { #[inline] - pub fn to_full_meta(self) -> Packet2> { - let Packet2 { state: ParsedStage1 { len, meta } } = self; + pub fn to_full_meta(self) -> Packet2> { + let Packet2 { state: LiteParsed { len, meta } } = self; let IngotParsed { stack: headers, data, last_chunk } = meta; // TODO: we can probably not do this in some cases, but we @@ -1787,10 +1781,10 @@ where base: Some((last_chunk, data)).into(), slice: Default::default(), }; - let meta = Box::new(PacketHeaders { headers, initial_lens, body }); + let meta = Box::new(PacketData { headers, initial_lens, body }); Packet2 { - state: Parsed2 { + state: FullParsed { meta, flow, body_csum, @@ -1823,12 +1817,12 @@ where } } -impl Packet2> { - pub fn meta(&self) -> &PacketHeaders { +impl Packet2> { + pub fn meta(&self) -> &PacketData { &self.state.meta } - pub fn meta_mut(&mut self) -> &mut PacketHeaders { + pub fn meta_mut(&mut self) -> &mut PacketData { &mut self.state.meta } @@ -1839,7 +1833,7 @@ impl Packet2> { #[inline] /// Convert a packet's metadata into a set of instructions /// needed to serialize all its changes to the wire. - pub fn emit_spec(self) -> Result + pub fn emit_spec(self) -> Result where T::Chunk: ByteSliceMut, { @@ -2042,7 +2036,7 @@ impl Packet2> { _ => {} } - Ok(EmitSpec { + Ok(OldEmitSpec { rewind: rewind as u16, payload_len: payload_len as u16, encapped_len: encapped_len as u16, @@ -2251,14 +2245,14 @@ impl Packet2> { T::Chunk: ByteSliceMut, { // If we know that no transform touched a field which features in - // an inner transport cksum (L4/L3 src/dst, most realistically). - if !self.state.inner_csum_dirty { + // an inner transport cksum (L4/L3 src/dst, most realistically), + // and no body transform occurred then we can exit early. + if !self.checksums_dirty() && !self.state.body_modified { return; } - // TODO: DOUBLE CHECK LOGIC - - // We expect + // We expect that any body transform will necessarily invalidate + // the body_csum. Recompute from scratch. if self.state.body_modified { return self.compute_checksums(); } @@ -2363,18 +2357,17 @@ pub struct Initialized2 { } impl PacketState for Initialized2 {} -impl PacketState for Parsed2 {} +impl PacketState for FullParsed {} -/// Zerocopy view onto a parsed packet, acompanied by locally +/// Zerocopy view onto a parsed packet, accompanied by locally /// computed state. -/// XXX: this is 'full meta'. Maybe rename LightweightMeta (???) -pub struct Parsed2 { +pub struct FullParsed { /// Total length of packet, in bytes. This is equal to the sum of /// the length of the _initialized_ window in all the segments /// (`b_wptr - b_rptr`). len: usize, /// Access to parsed packet headers and the packet body. - meta: Box>, + meta: Box>, /// Current Flow ID of this packet, accountgin for any applied /// transforms. flow: InnerFlowId, @@ -2405,30 +2398,23 @@ pub struct Parsed2 { inner_csum_dirty: bool, } -pub struct ParsedStage1> { +/// Minimum-size zerocopy view onto a parsed packet, sufficient for fast +/// packet transformation. +pub struct LiteParsed> { len: usize, - // This type is... pretty fat. - // But we need to hang onto this to allow hairpins/body txms/ARP - // to function. meta: IngotParsed, - // REMOVED: - // flow: InnerFlowId, // (can be computed out) - // body_csum: Option, // (can be computed based on header class) - // l4_hash: Option, // Should be stored in emitspec. } -impl> PacketState for ParsedStage1 {} - -impl> ParsedStage1 {} +impl> PacketState for LiteParsed {} -// Needed for now to account for not wanting to redesign ActionDescs -// to be generic over T (trait object safety rules, etc.). -pub type PacketMeta3<'a> = Parsed2>; -pub type PacketHeaders2<'a> = PacketHeaders>; +impl> LiteParsed {} -pub type InitMblk<'a> = Initialized2>; -pub type ParsedMblk<'a> = Parsed2>; -pub type LightParsedMblk<'a, M> = ParsedStage1, M>; +// XXX: Needed for now to account for not wanting to redesign +// ActionDescs to be generic over T (trait object safety rules, etc.), +// in addition to needing to rework Hairpin actions. +pub type MblkPacketData<'a> = PacketData>; +pub type MblkFullParsed<'a> = FullParsed>; +pub type MblkLiteParsed<'a, M> = LiteParsed, M>; #[inline] fn csum_minus_hdr(ulp: &ValidUlp) -> Option { @@ -2518,11 +2504,6 @@ impl<'a> QueryLen for MsgBlkIterMut<'a> { } } -pub enum Emitter { - Repr(Box), - Cached(Arc<[u8]>), -} - // TODO: don't really care about pushing 'inner' reprs today. #[derive(Clone, Debug, Default)] pub struct OpteEmit { @@ -2543,20 +2524,26 @@ pub struct OpteInnerEmit { } #[derive(Clone, Debug)] -pub struct EmittestSpec { - pub spec: EmitterSpec, - pub l4_hash: u32, - pub rewind: u16, - pub ulp_len: u32, +pub struct EmitSpec { + pub(crate) prepend: PushSpec, + pub(crate) l4_hash: u32, + pub(crate) rewind: u16, + pub(crate) ulp_len: u32, } -impl Default for EmittestSpec { +impl Default for EmitSpec { fn default() -> Self { - Self { spec: EmitterSpec::NoOp, l4_hash: 0, rewind: 0, ulp_len: 0 } + Self { prepend: PushSpec::NoOp, l4_hash: 0, rewind: 0, ulp_len: 0 } } } -impl EmittestSpec { +impl EmitSpec { + #[inline] + #[must_use] + pub fn l4_hash(&self) -> u32 { + self.l4_hash + } + #[inline] #[must_use] pub fn apply(&self, mut pkt: MsgBlk) -> MsgBlk { @@ -2591,11 +2578,11 @@ impl EmittestSpec { // much less so in the fastpath. pkt.drop_empty_segments(); - let out = match &self.spec { - EmitterSpec::Fastpath(push_spec) => { + let out = match &self.prepend { + PushSpec::Fastpath(push_spec) => { push_spec.encap.prepend(pkt, self.ulp_len as usize) } - EmitterSpec::Slowpath(push_spec) => { + PushSpec::Slowpath(push_spec) => { let mut needed_push = push_spec.outer_eth.packet_length() + push_spec.outer_ip.packet_length() + push_spec.outer_encap.packet_length(); @@ -2688,7 +2675,7 @@ impl EmittestSpec { pkt } } - EmitterSpec::NoOp => pkt, + PushSpec::NoOp => pkt, }; out @@ -2696,48 +2683,48 @@ impl EmittestSpec { #[inline] pub fn outer_encap_vni(&self) -> Option { - match &self.spec { - EmitterSpec::Fastpath(c) => match &c.encap { + match &self.prepend { + PushSpec::Fastpath(c) => match &c.encap { CompiledEncap::Push { encap: EncapPush::Geneve(g), .. } => { Some(g.vni) } _ => None, }, - EmitterSpec::Slowpath(s) => match &s.outer_encap { + PushSpec::Slowpath(s) => match &s.outer_encap { Some(EncapMeta::Geneve(g)) => Some(g.vni), _ => None, }, - EmitterSpec::NoOp => None, + PushSpec::NoOp => None, } } #[inline] pub fn outer_ip6_addrs(&self) -> Option<(Ipv6Addr, Ipv6Addr)> { - match &self.spec { - EmitterSpec::Fastpath(c) => match &c.encap { + match &self.prepend { + PushSpec::Fastpath(c) => match &c.encap { CompiledEncap::Push { ip: IpPush::Ip6(v6), .. } => { Some((v6.src, v6.dst)) } _ => None, }, - EmitterSpec::Slowpath(s) => match &s.outer_ip { + PushSpec::Slowpath(s) => match &s.outer_ip { Some(L3Repr::Ipv6(v6)) => Some((v6.source, v6.destination)), _ => None, }, - EmitterSpec::NoOp => None, + PushSpec::NoOp => None, } } } #[derive(Clone, Debug)] -pub enum EmitterSpec { +pub enum PushSpec { Fastpath(Arc), Slowpath(Box), NoOp, } #[derive(Clone, Debug)] -pub struct EmitSpec { +pub struct OldEmitSpec { pub rewind: u16, pub encapped_len: u16, pub payload_len: u16, @@ -2803,7 +2790,6 @@ impl QueryEcho for IcmpV6Packet { } } -// TODO: generate ref/mut traits on InlineHeader AND BoxPacket in ingot to halve the code here... impl HeaderActionModify for InlineHeader> { @@ -2852,7 +2838,6 @@ impl HeaderActionModify for EthernetPacket { } } -// TODO: generate ref/mut traits on InlineHeader AND BoxPacket in ingot to halve the code here... impl HeaderActionModify for InlineHeader> { @@ -2888,7 +2873,6 @@ impl HeaderActionModify v4.set_protocol(IpProtocol(u8::from(p))); } } - // run_modify should be capable of returning error... _ => return Err(HeaderActionError::MissingHeader), }, IpMod::Ip6(mods) => match self { @@ -2900,6 +2884,7 @@ impl HeaderActionModify >::set_destination(v6, dst); } if let Some(p) = mods.proto { + // TODO(kyle) // NOTE: I know this is broken for V6EHs >::set_next_header( v6, @@ -2915,11 +2900,11 @@ impl HeaderActionModify v6.set_destination(dst); } if let Some(p) = mods.proto { + // TODO(kyle) // NOTE: I know this is broken for V6EHs v6.set_next_header(IpProtocol(u8::from(p))); } } - // run_modify should be capable of returning error... _ => return Err(HeaderActionError::MissingHeader), }, } @@ -2995,7 +2980,12 @@ impl HeaderActionModify for Ulp { if let Some(id) = mod_spec.icmp_id { if i4.echo_id().is_some() { let roh = i4.rest_of_hdr_mut(); - roh[..2].copy_from_slice(&id.to_be_bytes()) + ValidIcmpEcho::parse(&mut roh[..]) + .expect( + "ICMP ROH is exactly as large as ValidIcmpEcho", + ) + .0 + .set_id(id); } } } @@ -3003,7 +2993,12 @@ impl HeaderActionModify for Ulp { if let Some(id) = mod_spec.icmp_id { if i6.echo_id().is_some() { let roh = i6.rest_of_hdr_mut(); - roh[..2].copy_from_slice(&id.to_be_bytes()) + ValidIcmpEcho::parse(&mut roh[..]) + .expect( + "ICMP ROH is exactly as large as ValidIcmpEcho", + ) + .0 + .set_id(id); } } } @@ -3070,9 +3065,6 @@ impl HasInnerCksum for Ulp { const HAS_CKSUM: bool = true; } -// papering over a lot here... -// need to briefly keep both around while I systematically rewrite the test suite. - impl From for ingot::types::Header> { diff --git a/lib/opte/src/engine/layer.rs b/lib/opte/src/engine/layer.rs index 6f8eaa84..1843d7a9 100644 --- a/lib/opte/src/engine/layer.rs +++ b/lib/opte/src/engine/layer.rs @@ -11,10 +11,10 @@ use super::flow_table::FlowEntry; use super::flow_table::FlowTable; use super::flow_table::FlowTableDump; use super::flow_table::FLOW_DEF_EXPIRE_SECS; +use super::ingot_packet::MblkFullParsed; +use super::ingot_packet::MblkPacketData; use super::ingot_packet::MsgBlk; use super::ingot_packet::Packet2; -use super::ingot_packet::PacketHeaders2; -use super::ingot_packet::ParsedMblk; use super::ioctl; use super::ioctl::ActionDescEntryDump; use super::packet::BodyTransformError; @@ -798,7 +798,7 @@ impl Layer { &mut self, ectx: &ExecCtx, dir: Direction, - pkt: &mut Packet2, + pkt: &mut Packet2, xforms: &mut Transforms, ameta: &mut ActionMeta, ) -> result::Result { @@ -816,7 +816,7 @@ impl Layer { fn process_in( &mut self, ectx: &ExecCtx, - pkt: &mut Packet2, + pkt: &mut Packet2, xforms: &mut Transforms, ameta: &mut ActionMeta, ) -> result::Result { @@ -886,7 +886,7 @@ impl Layer { fn process_in_rules( &mut self, ectx: &ExecCtx, - pkt: &mut Packet2, + pkt: &mut Packet2, xforms: &mut Transforms, ameta: &mut ActionMeta, ) -> result::Result { @@ -1103,7 +1103,7 @@ impl Layer { fn process_out( &mut self, ectx: &ExecCtx, - pkt: &mut Packet2, + pkt: &mut Packet2, xforms: &mut Transforms, ameta: &mut ActionMeta, ) -> result::Result { @@ -1173,7 +1173,7 @@ impl Layer { fn process_out_rules( &mut self, ectx: &ExecCtx, - pkt: &mut Packet2, + pkt: &mut Packet2, xforms: &mut Transforms, ameta: &mut ActionMeta, ) -> result::Result { @@ -1602,7 +1602,7 @@ impl<'a> RuleTable { fn find_match<'b>( &mut self, ifid: &InnerFlowId, - pmeta: &PacketHeaders2, + pmeta: &MblkPacketData, ameta: &ActionMeta, ) -> Option<&Rule> { for rte in self.rules.iter_mut() { diff --git a/lib/opte/src/engine/mod.rs b/lib/opte/src/engine/mod.rs index 91ae33d7..b2a813fc 100644 --- a/lib/opte/src/engine/mod.rs +++ b/lib/opte/src/engine/mod.rs @@ -48,11 +48,11 @@ use core::fmt; use core::num::ParseIntError; use ingot::tcp::TcpRef; use ingot::types::Read; +use ingot_packet::FullParsed; use ingot_packet::MsgBlk; use ingot_packet::OpteMeta; use ingot_packet::OpteParsed2; use ingot_packet::Packet2; -use ingot_packet::Parsed2; use ingot_packet::ValidNoEncap; use ip4::IpError; pub use opte_api::Direction; @@ -278,7 +278,7 @@ pub trait NetworkImpl { fn handle_pkt( &self, dir: Direction, - pkt: &mut Packet2>, + pkt: &mut Packet2>, uft_in: &FlowTable>, uft_out: &FlowTable>, ) -> Result diff --git a/lib/opte/src/engine/nat.rs b/lib/opte/src/engine/nat.rs index 50c6645e..1d6c56d9 100644 --- a/lib/opte/src/engine/nat.rs +++ b/lib/opte/src/engine/nat.rs @@ -8,8 +8,8 @@ use super::headers::HeaderAction; use super::headers::IpMod; +use super::ingot_packet::MblkFullParsed; use super::ingot_packet::Packet2; -use super::ingot_packet::ParsedMblk; use super::packet::InnerFlowId; use super::port::meta::ActionMeta; use super::predicate::DataPredicate; @@ -85,7 +85,7 @@ impl StatefulAction for OutboundNat { fn gen_desc( &self, flow_id: &InnerFlowId, - _pkt: &Packet2, + _pkt: &Packet2, _meta: &mut ActionMeta, ) -> rule::GenDescResult { // When we have several external IPs at our disposal, we are @@ -148,7 +148,7 @@ impl StatefulAction for InboundNat { fn gen_desc( &self, flow_id: &InnerFlowId, - _pkt: &Packet2, + _pkt: &Packet2, _meta: &mut ActionMeta, ) -> rule::GenDescResult { // We rely on the attached predicates to filter out IPs which are *not* diff --git a/lib/opte/src/engine/packet.rs b/lib/opte/src/engine/packet.rs index 151cf60b..621704ba 100644 --- a/lib/opte/src/engine/packet.rs +++ b/lib/opte/src/engine/packet.rs @@ -47,8 +47,6 @@ cfg_if! { pub static MBLK_MAX_SIZE: usize = u16::MAX as usize; -// --- REWRITE IN PROGRESS --- - pub static FLOW_ID_DEFAULT: InnerFlowId = InnerFlowId { proto: 255, addrs: AddrPair::V4 { src: Ipv4Addr::ANY_ADDR, dst: Ipv4Addr::ANY_ADDR }, @@ -190,13 +188,14 @@ struct PacketChainInner { /// Network packets are provided by illumos as a linked list, using /// the `b_next` and `b_prev` fields. /// -/// See the documentation for [`Packet`] for full context. -// TODO: We might modify Packet to do away with the `Vec`. -// I could see Chain being retooled accordingly (i.e., Packets could -// be allocated a lifetime via PhantomData based on whether we want -// to remove them from the chain or modify in place). +/// See the documentation for [`Packet`] and/or [`MsgBlk`] for full context. +// TODO: We might retool this type now that MsgBlk does not decompose +// each mblk_t into individual segments (i.e., packets could be allocated +// a lifetime via PhantomData based on whether we want to remove them from the chain or modify in place). // Today's code is all equivalent to always using 'static, because // we remove and re-add the mblks to work on them. +// We might want also want to return either a chain/mblk_t in an enum, but +// practically XDE will always assume it has a chain from MAC. pub struct PacketChain { inner: Option, } @@ -300,7 +299,7 @@ impl PacketChain { impl Drop for PacketChain { fn drop(&mut self) { - // This is a minor variation on Packet's logic. illumos + // This is a minor variation on MsgBlk's logic. illumos // contains helper functions from STREAMS to just drop a whole // chain. cfg_if! { diff --git a/lib/opte/src/engine/port.rs b/lib/opte/src/engine/port.rs index ee2c8216..cf50388f 100644 --- a/lib/opte/src/engine/port.rs +++ b/lib/opte/src/engine/port.rs @@ -21,13 +21,13 @@ use super::ingot_base::Ethernet; use super::ingot_base::Ipv4; use super::ingot_base::Ipv6; use super::ingot_base::L3Repr; +use super::ingot_packet::FullParsed; +use super::ingot_packet::LiteParsed; +use super::ingot_packet::MblkFullParsed; +use super::ingot_packet::MblkPacketData; use super::ingot_packet::MsgBlk; use super::ingot_packet::MsgBlkIterMut; use super::ingot_packet::Packet2; -use super::ingot_packet::PacketHeaders2; -use super::ingot_packet::Parsed2; -use super::ingot_packet::ParsedMblk; -use super::ingot_packet::ParsedStage1; use super::ioctl; use super::ioctl::TcpFlowEntryDump; use super::ioctl::TcpFlowStateDump; @@ -67,8 +67,8 @@ use crate::ddi::sync::KMutex; use crate::ddi::sync::KMutexType; use crate::ddi::time::Moment; use crate::engine::flow_table::ExpiryPolicy; -use crate::engine::ingot_packet::EmitterSpec; -use crate::engine::ingot_packet::EmittestSpec; +use crate::engine::ingot_packet::EmitSpec; +use crate::engine::ingot_packet::PushSpec; use crate::engine::rule::CompiledEncap; use crate::ExecCtx; use alloc::boxed::Box; @@ -163,7 +163,7 @@ pub enum ProcessResult { reason: DropReason, }, #[leaf] - Modified(EmittestSpec), + Modified(EmitSpec), // TODO: it would be nice if this packet type could be user-specified, but might // be tricky. #[leaf] @@ -176,7 +176,7 @@ impl From for ProcessResult { // TODO: In theory HdlPacket::Allow should have an emit spec, too. // We are not using any op other than Hairpin, so kick that particular // can down the road. - HdlPktAction::Allow => Self::Modified(EmittestSpec::default()), + HdlPktAction::Allow => Self::Modified(EmitSpec::default()), HdlPktAction::Deny => Self::Drop { reason: DropReason::HandlePkt }, HdlPktAction::Hairpin(pkt) => Self::Hairpin(pkt), } @@ -907,7 +907,7 @@ impl Port { data: &FlowTable, dir: Direction, msg: String, - pkt: &mut Packet2, + pkt: &mut Packet2, ) { if unsafe { super::opte_panic_debug != 0 } { super::err!("mblk: {}", pkt.mblk_addr()); @@ -923,7 +923,7 @@ impl Port { fn tcp_err_probe( &self, dir: Direction, - pkt: Option<&Packet2>, + pkt: Option<&Packet2>, flow: &InnerFlowId, msg: String, ) { @@ -1218,7 +1218,7 @@ impl Port { // which can advance to (and hold) light->full-fat metadata. // My gutfeel is that there's a perf cost here -- this struct // is pretty fat, but expressing the transform on a &mut also sucks. - mut pkt: Packet2, M>>, + mut pkt: Packet2, M>>, ) -> result::Result where M: LightweightMeta< as Read>::Chunk>, @@ -1420,8 +1420,8 @@ impl Port { CompiledEncap::Pop => encap_len, _ => 0, }; - let out = EmittestSpec { - spec: EmitterSpec::Fastpath(tx), + let out = EmitSpec { + prepend: PushSpec::Fastpath(tx), l4_hash, rewind, ulp_len, @@ -1517,8 +1517,8 @@ impl Port { // TODO: remove EmitSpec and have above method just spit out the new // variant. - Ok(ProcessResult::Modified(EmittestSpec { - spec: EmitterSpec::Slowpath(emit_spec.push_spec.into()), + Ok(ProcessResult::Modified(EmitSpec { + prepend: PushSpec::Slowpath(emit_spec.push_spec.into()), l4_hash, rewind: emit_spec.rewind, ulp_len: emit_spec.encapped_len as u32, @@ -1698,7 +1698,7 @@ impl Transforms { #[inline] fn apply( &self, - pkt: &mut Packet2>, + pkt: &mut Packet2>, dir: Direction, ) -> result::Result<(), ProcessError> where @@ -1932,7 +1932,7 @@ impl Port { &self, data: &mut PortData, dir: Direction, - pkt: &mut Packet2, + pkt: &mut Packet2, xforms: &mut Transforms, ameta: &mut ActionMeta, ) -> result::Result { @@ -1976,7 +1976,7 @@ impl Port { dir: Direction, flow: &InnerFlowId, epoch: u64, - pkt: &Packet2, + pkt: &Packet2, ) { cfg_if::cfg_if! { if #[cfg(all(not(feature = "std"), not(test)))] { @@ -2237,7 +2237,7 @@ impl Port { fn process_in_tcp( &self, data: &mut PortData, - pmeta: &PacketHeaders2, + pmeta: &MblkPacketData, ufid_in: &InnerFlowId, pkt_len: u64, ) -> result::Result { @@ -2277,7 +2277,7 @@ impl Port { &self, data: &mut PortData, epoch: u64, - pkt: &mut Packet2, + pkt: &mut Packet2, ufid_in: &InnerFlowId, ameta: &mut ActionMeta, ) -> result::Result { @@ -2453,7 +2453,7 @@ impl Port { &self, data: &mut PortData, ufid_out: &InnerFlowId, - pmeta: &PacketHeaders2, + pmeta: &MblkPacketData, pkt_len: u64, ) -> result::Result { let tcp = pmeta.inner_tcp().unwrap(); @@ -2477,7 +2477,7 @@ impl Port { &self, data: &mut PortData, epoch: u64, - pkt: &mut Packet2, + pkt: &mut Packet2, ameta: &mut ActionMeta, ) -> result::Result { use Direction::Out; diff --git a/lib/opte/src/engine/predicate.rs b/lib/opte/src/engine/predicate.rs index 408bc765..53ea8cb8 100644 --- a/lib/opte/src/engine/predicate.rs +++ b/lib/opte/src/engine/predicate.rs @@ -17,7 +17,7 @@ use super::ingot_base::Ipv6Ref; use super::ingot_base::L3; use super::ingot_packet::ulp_dst_port; use super::ingot_packet::ulp_src_port; -use super::ingot_packet::PacketHeaders2; +use super::ingot_packet::MblkPacketData; use super::ip4::Ipv4Addr; use super::ip4::Ipv4Cidr; use super::ip4::Protocol; @@ -356,7 +356,7 @@ impl Display for Predicate { impl Predicate { pub(crate) fn is_match( &self, - meta: &PacketHeaders2, + meta: &MblkPacketData, action_meta: &ActionMeta, ) -> bool { match self { @@ -594,7 +594,7 @@ impl DataPredicate { // use `PacketMeta` to determine if there is a suitable payload to // be inspected. That is, if there is no metadata for a given // header, there is certainly no payload. - pub(crate) fn is_match<'a>(&self, meta: &PacketHeaders2) -> bool { + pub(crate) fn is_match<'a>(&self, meta: &MblkPacketData) -> bool { match self { Self::Not(pred) => !pred.is_match(meta), diff --git a/lib/opte/src/engine/rule.rs b/lib/opte/src/engine/rule.rs index feaaea05..35b4d888 100644 --- a/lib/opte/src/engine/rule.rs +++ b/lib/opte/src/engine/rule.rs @@ -22,11 +22,11 @@ use super::ingot_base::Ethernet; use super::ingot_base::EthernetPacket; use super::ingot_base::ValidEthernet; use super::ingot_base::L3; +use super::ingot_packet::MblkFullParsed; +use super::ingot_packet::MblkPacketData; use super::ingot_packet::MsgBlk; use super::ingot_packet::Packet2; -use super::ingot_packet::PacketHeaders; -use super::ingot_packet::PacketHeaders2; -use super::ingot_packet::ParsedMblk; +use super::ingot_packet::PacketData; use super::packet::BodyTransform; use super::packet::InnerFlowId; use super::port::meta::ActionMeta; @@ -159,7 +159,7 @@ pub trait ActionDesc { fn gen_bt( &self, _dir: Direction, - _meta: &PacketHeaders2, + _meta: &MblkPacketData, _payload_segs: &[&[u8]], ) -> Result>, GenBtError> { Ok(None) @@ -257,7 +257,7 @@ impl StaticAction for Identity { &self, _dir: Direction, _flow_id: &InnerFlowId, - _pkt_meta: &PacketHeaders2, + _pkt_meta: &MblkPacketData, _action_meta: &mut ActionMeta, ) -> GenHtResult { Ok(AllowOrDeny::Allow(HdrTransform::identity(&self.name))) @@ -471,7 +471,7 @@ impl HdrTransform { /// [`HdrTransformError::MissingHeader`] is returned. pub fn run( &self, - meta: &mut PacketHeaders, + meta: &mut PacketData, ) -> Result where T::Chunk: ByteSliceMut, @@ -565,7 +565,7 @@ pub trait StatefulAction: Display { fn gen_desc( &self, flow_id: &InnerFlowId, - pkt: &Packet2, + pkt: &Packet2, meta: &mut ActionMeta, ) -> GenDescResult; @@ -585,7 +585,7 @@ pub trait StaticAction: Display { &self, dir: Direction, flow_id: &InnerFlowId, - packet_meta: &PacketHeaders2, + packet_meta: &MblkPacketData, action_meta: &mut ActionMeta, ) -> GenHtResult; @@ -664,7 +664,7 @@ pub trait HairpinAction: Display { /// `rdr` argument provides a [`PacketReader`] against /// [`Packet`], with its starting position set to the /// beginning of the packet's payload. - fn gen_packet(&self, meta: &PacketHeaders2) -> GenPacketResult; + fn gen_packet(&self, meta: &MblkPacketData) -> GenPacketResult; /// Return the predicates implicit to this action. /// @@ -943,7 +943,7 @@ impl Rule { impl<'a> Rule { pub fn is_match<'b>( &self, - meta: &PacketHeaders2, + meta: &MblkPacketData, action_meta: &ActionMeta, ) -> bool { #[cfg(debug_assertions)] diff --git a/lib/opte/src/engine/snat.rs b/lib/opte/src/engine/snat.rs index 4f702535..f28d4f96 100644 --- a/lib/opte/src/engine/snat.rs +++ b/lib/opte/src/engine/snat.rs @@ -11,8 +11,8 @@ use super::headers::IpMod; use super::headers::UlpGenericModify; use super::headers::UlpHeaderAction; use super::headers::UlpMetaModify; +use super::ingot_packet::MblkFullParsed; use super::ingot_packet::Packet2; -use super::ingot_packet::ParsedMblk; use super::packet::InnerFlowId; use super::port::meta::ActionMeta; use super::predicate::DataPredicate; @@ -241,7 +241,7 @@ impl SNat { fn gen_icmp_desc( &self, nat: SNatAlloc, - pkt: &Packet2, + pkt: &Packet2, ) -> GenDescResult { let meta = pkt.meta(); @@ -303,7 +303,7 @@ where fn gen_desc( &self, flow_id: &InnerFlowId, - pkt: &Packet2, + pkt: &Packet2, _meta: &mut ActionMeta, ) -> GenDescResult { let priv_port = flow_id.src_port; diff --git a/lib/oxide-vpc/src/engine/gateway/mod.rs b/lib/oxide-vpc/src/engine/gateway/mod.rs index f8844923..4c508e78 100644 --- a/lib/oxide-vpc/src/engine/gateway/mod.rs +++ b/lib/oxide-vpc/src/engine/gateway/mod.rs @@ -56,7 +56,7 @@ use opte::api::Direction; use opte::api::OpteError; use opte::engine::ether::EtherMod; use opte::engine::headers::HeaderAction; -use opte::engine::ingot_packet::PacketHeaders2; +use opte::engine::ingot_packet::MblkPacketData; use opte::engine::layer::DefaultAction; use opte::engine::layer::Layer; use opte::engine::layer::LayerActions; @@ -143,7 +143,7 @@ impl StaticAction for RewriteSrcMac { &self, _dir: Direction, _flow_id: &InnerFlowId, - _packet_meta: &PacketHeaders2, + _packet_meta: &MblkPacketData, _action_meta: &mut ActionMeta, ) -> GenHtResult { Ok(AllowOrDeny::Allow(HdrTransform { diff --git a/lib/oxide-vpc/src/engine/mod.rs b/lib/oxide-vpc/src/engine/mod.rs index 15ba4864..5642b5d6 100644 --- a/lib/oxide-vpc/src/engine/mod.rs +++ b/lib/oxide-vpc/src/engine/mod.rs @@ -20,9 +20,9 @@ use opte::engine::arp::ValidArpEthIpv4; use opte::engine::arp::ARP_HTYPE_ETHERNET; use opte::engine::flow_table::FlowTable; use opte::engine::ingot_base::EthernetRef; +use opte::engine::ingot_packet::FullParsed; use opte::engine::ingot_packet::OpteParsed2; use opte::engine::ingot_packet::Packet2; -use opte::engine::ingot_packet::Parsed2; use opte::engine::ingot_packet::ValidGeneveOverV6; use opte::engine::ingot_packet::ValidNoEncap; use opte::engine::ip4::Ipv4Addr; @@ -66,7 +66,7 @@ fn is_arp_req_for_tpa(tpa: Ipv4Addr, arp: &impl ArpEthIpv4Ref) -> bool { impl VpcNetwork { fn handle_arp_out( &self, - pkt: &mut Packet2>, + pkt: &mut Packet2>, ) -> Result where T::Chunk: ByteSliceMut, @@ -102,7 +102,7 @@ impl NetworkImpl for VpcNetwork { fn handle_pkt( &self, dir: Direction, - pkt: &mut Packet2>, + pkt: &mut Packet2>, _uft_in: &FlowTable>, _uft_out: &FlowTable>, ) -> Result diff --git a/lib/oxide-vpc/src/engine/overlay.rs b/lib/oxide-vpc/src/engine/overlay.rs index bfa83cd9..01cf8452 100644 --- a/lib/oxide-vpc/src/engine/overlay.rs +++ b/lib/oxide-vpc/src/engine/overlay.rs @@ -42,7 +42,7 @@ use opte::engine::headers::HeaderAction; use opte::engine::headers::IpAddr; use opte::engine::headers::IpCidr; use opte::engine::headers::IpPush; -use opte::engine::ingot_packet::PacketHeaders2; +use opte::engine::ingot_packet::MblkPacketData; use opte::engine::ip4::Protocol; use opte::engine::ip6::Ipv6Addr; use opte::engine::ip6::Ipv6Cidr; @@ -203,7 +203,7 @@ impl StaticAction for EncapAction { // The encap action is only used for outgoing. _dir: Direction, flow_id: &InnerFlowId, - _pkt_meta: &PacketHeaders2, + _pkt_meta: &MblkPacketData, action_meta: &mut ActionMeta, ) -> GenHtResult { let f_hash = flow_id.crc32(); @@ -382,7 +382,7 @@ impl StaticAction for DecapAction { // The decap action is only used for inbound. _dir: Direction, _flow_id: &InnerFlowId, - pkt_meta: &PacketHeaders2, + pkt_meta: &MblkPacketData, action_meta: &mut ActionMeta, ) -> GenHtResult { match pkt_meta.outer_encap_geneve_vni_and_origin() { diff --git a/lib/oxide-vpc/tests/integration_tests.rs b/lib/oxide-vpc/tests/integration_tests.rs index 0fbe3f9b..5194c1b3 100644 --- a/lib/oxide-vpc/tests/integration_tests.rs +++ b/lib/oxide-vpc/tests/integration_tests.rs @@ -33,9 +33,9 @@ use opte::engine::ingot_base::Ipv6Ref; use opte::engine::ingot_base::ValidL3; use opte::engine::ingot_base::ValidUlp; use opte::engine::ingot_base::L3; +use opte::engine::ingot_packet::MblkFullParsed; use opte::engine::ingot_packet::MsgBlk; use opte::engine::ingot_packet::Packet2; -use opte::engine::ingot_packet::ParsedMblk; use opte::engine::ip4::Ipv4Addr; use opte::engine::packet::InnerFlowId; use opte::engine::port::ProcessError; @@ -1557,7 +1557,7 @@ fn unpack_and_verify_icmp( } fn unpack_and_verify_icmp4( - pkt: &Packet2, + pkt: &Packet2, expected_ident: u16, seq_no: u16, ) { @@ -1574,7 +1574,7 @@ fn unpack_and_verify_icmp4( } fn unpack_and_verify_icmp6( - pkt: &Packet2, + pkt: &Packet2, expected_ident: u16, seq_no: u16, src_ip: Ipv6Addr, diff --git a/xde/src/xde.rs b/xde/src/xde.rs index 0cddc33f..d6e1af3a 100644 --- a/xde/src/xde.rs +++ b/xde/src/xde.rs @@ -1614,7 +1614,7 @@ unsafe fn xde_mc_tx_one(src_dev: &XdeDev, mut pkt: MsgBlk) -> *mut mblk_t { // emitspec in the same place, then send elsewhere. let devs = unsafe { xde_devs.read() }; - let l4_hash = emit_spec.l4_hash; + let l4_hash = emit_spec.l4_hash(); let out_pkt = emit_spec.apply(pkt); From 445d8c0152a2fba39167121f4764ed686b6db737 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Mon, 21 Oct 2024 22:32:03 -0700 Subject: [PATCH 061/115] Fixup ubench. --- bench/src/packet.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/bench/src/packet.rs b/bench/src/packet.rs index cb197876..05a85c6f 100644 --- a/bench/src/packet.rs +++ b/bench/src/packet.rs @@ -11,8 +11,6 @@ use opte::engine::ingot_base::Ipv6; use opte::engine::ingot_base::L3Repr; use opte::engine::ingot_base::UlpRepr; use opte::engine::ingot_packet::MsgBlk; -use opte::engine::packet::Initialized; -use opte::engine::packet::Packet; use opte::engine::Direction; use opte::ingot::tcp::Tcp; use opte::ingot::tcp::TcpFlags; From d27c77ce21db170fe6a77502b672919dfa8eb0d2 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 23 Oct 2024 09:28:04 -0700 Subject: [PATCH 062/115] Wherein I push code around. --- bench/src/packet.rs | 10 +- lib/opte-test-utils/src/dhcp.rs | 6 +- lib/opte-test-utils/src/icmp.rs | 8 +- lib/opte-test-utils/src/lib.rs | 44 +- lib/opte/src/engine/arp.rs | 2 +- lib/opte/src/engine/dhcp.rs | 4 +- lib/opte/src/engine/dhcpv6/protocol.rs | 6 +- lib/opte/src/engine/ether.rs | 14 +- lib/opte/src/engine/flow_table.rs | 11 +- lib/opte/src/engine/icmp/v4.rs | 6 +- lib/opte/src/engine/icmp/v6.rs | 6 +- lib/opte/src/engine/ingot_base.rs | 345 ----------- lib/opte/src/engine/ingot_packet.rs | 578 +------------------ lib/opte/src/engine/ip/mod.rs | 143 +++++ lib/opte/src/engine/ip/v4.rs | 87 +++ lib/opte/src/engine/ip/v6.rs | 38 ++ lib/opte/src/engine/mod.rs | 5 +- lib/opte/src/engine/parse.rs | 693 +++++++++++++++++++++++ lib/opte/src/engine/port.rs | 8 +- lib/opte/src/engine/predicate.rs | 8 +- lib/opte/src/engine/rule.rs | 8 +- lib/oxide-vpc/src/engine/mod.rs | 6 +- lib/oxide-vpc/tests/integration_tests.rs | 16 +- 23 files changed, 1069 insertions(+), 983 deletions(-) delete mode 100644 lib/opte/src/engine/ingot_base.rs create mode 100644 lib/opte/src/engine/ip/mod.rs create mode 100644 lib/opte/src/engine/ip/v4.rs create mode 100644 lib/opte/src/engine/ip/v6.rs create mode 100644 lib/opte/src/engine/parse.rs diff --git a/bench/src/packet.rs b/bench/src/packet.rs index 05a85c6f..dd5f5682 100644 --- a/bench/src/packet.rs +++ b/bench/src/packet.rs @@ -5,12 +5,12 @@ // Copyright 2024 Oxide Computer Company use opte::engine::dhcpv6::MessageType; -use opte::engine::ingot_base::Ethernet; -use opte::engine::ingot_base::Ipv4; -use opte::engine::ingot_base::Ipv6; -use opte::engine::ingot_base::L3Repr; -use opte::engine::ingot_base::UlpRepr; +use opte::engine::ether::Ethernet; use opte::engine::ingot_packet::MsgBlk; +use opte::engine::ip::v4::Ipv4; +use opte::engine::ip::v6::Ipv6; +use opte::engine::ip::L3Repr; +use opte::engine::parse::UlpRepr; use opte::engine::Direction; use opte::ingot::tcp::Tcp; use opte::ingot::tcp::TcpFlags; diff --git a/lib/opte-test-utils/src/dhcp.rs b/lib/opte-test-utils/src/dhcp.rs index 28869175..5029d363 100644 --- a/lib/opte-test-utils/src/dhcp.rs +++ b/lib/opte-test-utils/src/dhcp.rs @@ -11,10 +11,10 @@ use dhcpv6::protocol::MessageType; use opte::engine::dhcp::DHCP_CLIENT_PORT; use opte::engine::dhcp::DHCP_SERVER_PORT; use opte::engine::dhcpv6; -use opte::engine::ingot_base::Ethernet; -use opte::engine::ingot_base::Ipv4; -use opte::engine::ingot_base::Ipv6; +use opte::engine::ether::Ethernet; use opte::engine::ingot_packet::MsgBlk; +use opte::engine::ip::v4::Ipv4; +use opte::engine::ip::v6::Ipv6; use opte::ingot::ethernet::Ethertype; use opte::ingot::ip::IpProtocol; use opte::ingot::udp::Udp; diff --git a/lib/opte-test-utils/src/icmp.rs b/lib/opte-test-utils/src/icmp.rs index a092de7e..2053dcab 100644 --- a/lib/opte-test-utils/src/icmp.rs +++ b/lib/opte-test-utils/src/icmp.rs @@ -7,11 +7,11 @@ //! Routines for ICMP testing. use opte::api::*; -use opte::engine::ingot_base::Ethernet; -use opte::engine::ingot_base::Ipv4; -use opte::engine::ingot_base::Ipv6; -use opte::engine::ingot_base::L3; +use opte::engine::ether::Ethernet; use opte::engine::ingot_packet::MsgBlk; +use opte::engine::ip::v4::Ipv4; +use opte::engine::ip::v6::Ipv6; +use opte::engine::ip::L3; use opte::ingot::ethernet::Ethertype; use opte::ingot::ip::IpProtocol as IngotIpProto; use opte::ingot::types::HeaderLen; diff --git a/lib/opte-test-utils/src/lib.rs b/lib/opte-test-utils/src/lib.rs index f9d52bd9..389f7344 100644 --- a/lib/opte-test-utils/src/lib.rs +++ b/lib/opte-test-utils/src/lib.rs @@ -20,26 +20,26 @@ pub use opte::api::Direction::*; pub use opte::api::MacAddr; pub use opte::engine::ether::EtherMeta; pub use opte::engine::ether::EtherType; +pub use opte::engine::ether::Ethernet; pub use opte::engine::geneve::GeneveMeta; pub use opte::engine::geneve::GeneveOption; pub use opte::engine::geneve::OxideOption; pub use opte::engine::geneve::Vni; -use opte::engine::geneve::GENEVE_OPT_CLASS_OXIDE; -use opte::engine::geneve::GENEVE_PORT; +pub use opte::engine::geneve::GENEVE_OPT_CLASS_OXIDE; +pub use opte::engine::geneve::GENEVE_PORT; pub use opte::engine::headers::IpAddr; pub use opte::engine::headers::IpCidr; -use opte::engine::ingot_base::Ethernet; -use opte::engine::ingot_base::Ipv4; -use opte::engine::ingot_base::Ipv6; -use opte::engine::ingot_base::L3Repr; -use opte::engine::ingot_packet::MblkLiteParsed; -use opte::engine::ingot_packet::MsgBlk; -use opte::engine::ingot_packet::Packet2; +pub use opte::engine::ingot_packet::MblkLiteParsed; +pub use opte::engine::ingot_packet::MsgBlk; +pub use opte::engine::ingot_packet::Packet2; +pub use opte::engine::ip::v4::Ipv4; +pub use opte::engine::ip::v6::Ipv6; +pub use opte::engine::ip::L3Repr; pub use opte::engine::ip4::Ipv4Addr; pub use opte::engine::ip4::Protocol; pub use opte::engine::ip6::Ipv6Addr; pub use opte::engine::layer::DenyReason; -use opte::engine::packet::ParseError; +pub use opte::engine::packet::ParseError; pub use opte::engine::port::meta::ActionMeta; pub use opte::engine::port::DropReason; pub use opte::engine::port::Port; @@ -47,18 +47,18 @@ pub use opte::engine::port::PortBuilder; pub use opte::engine::port::ProcessResult; pub use opte::engine::port::ProcessResult::*; pub use opte::engine::GenericUlp; -use opte::engine::NetworkParser; +pub use opte::engine::NetworkParser; pub use opte::ingot::ethernet::Ethertype; -use opte::ingot::geneve::Geneve; -use opte::ingot::geneve::GeneveOpt; -use opte::ingot::geneve::GeneveOptionType; +pub use opte::ingot::geneve::Geneve; +pub use opte::ingot::geneve::GeneveOpt; +pub use opte::ingot::geneve::GeneveOptionType; pub use opte::ingot::ip::IpProtocol as IngotIpProto; -use opte::ingot::tcp::Tcp; -use opte::ingot::tcp::TcpFlags as IngotTcpFlags; -use opte::ingot::types::Emit; -use opte::ingot::types::EmitDoesNotRelyOnBufContents; -use opte::ingot::types::HeaderLen; -use opte::ingot::udp::Udp; +pub use opte::ingot::tcp::Tcp; +pub use opte::ingot::tcp::TcpFlags as IngotTcpFlags; +pub use opte::ingot::types::Emit; +pub use opte::ingot::types::EmitDoesNotRelyOnBufContents; +pub use opte::ingot::types::HeaderLen; +pub use opte::ingot::udp::Udp; pub use opte::ExecCtx; pub use oxide_vpc::api::AddFwRuleReq; pub use oxide_vpc::api::DhcpCfg; @@ -82,8 +82,8 @@ pub use oxide_vpc::engine::overlay; pub use oxide_vpc::engine::overlay::Virt2Boundary; pub use oxide_vpc::engine::overlay::Virt2Phys; pub use oxide_vpc::engine::overlay::VpcMappings; -use oxide_vpc::engine::overlay::BOUNDARY_SERVICES_VNI; -use oxide_vpc::engine::overlay::TUNNEL_ENDPOINT_MAC; +pub use oxide_vpc::engine::overlay::BOUNDARY_SERVICES_VNI; +pub use oxide_vpc::engine::overlay::TUNNEL_ENDPOINT_MAC; pub use oxide_vpc::engine::router; pub use oxide_vpc::engine::VpcNetwork; pub use oxide_vpc::engine::VpcParser; diff --git a/lib/opte/src/engine/arp.rs b/lib/opte/src/engine/arp.rs index a7508ce9..bbabfe22 100644 --- a/lib/opte/src/engine/arp.rs +++ b/lib/opte/src/engine/arp.rs @@ -6,7 +6,7 @@ //! ARP headers and data. -use super::ingot_base::Ethernet; +use super::ether::Ethernet; use super::ingot_packet::MsgBlk; use core::fmt; use core::fmt::Display; diff --git a/lib/opte/src/engine/dhcp.rs b/lib/opte/src/engine/dhcp.rs index 95c6e907..2fb8967a 100644 --- a/lib/opte/src/engine/dhcp.rs +++ b/lib/opte/src/engine/dhcp.rs @@ -6,10 +6,10 @@ //! DHCP headers, data, and actions. -use super::ingot_base::Ethernet; -use super::ingot_base::Ipv4; +use super::ether::Ethernet; use super::ingot_packet::MblkPacketData; use super::ingot_packet::MsgBlk; +use super::ip::v4::Ipv4; use super::ip4::Ipv4Addr; use super::ip4::Protocol; use super::predicate::DataPredicate; diff --git a/lib/opte/src/engine/dhcpv6/protocol.rs b/lib/opte/src/engine/dhcpv6/protocol.rs index 845fc6bc..e7f19404 100644 --- a/lib/opte/src/engine/dhcpv6/protocol.rs +++ b/lib/opte/src/engine/dhcpv6/protocol.rs @@ -21,11 +21,11 @@ use crate::engine::dhcpv6::ALL_RELAYS_AND_SERVERS; use crate::engine::dhcpv6::ALL_SERVERS; use crate::engine::dhcpv6::CLIENT_PORT; use crate::engine::dhcpv6::SERVER_PORT; -use crate::engine::ingot_base::Ethernet; -use crate::engine::ingot_base::Ipv6; -use crate::engine::ingot_base::Ipv6Ref; +use crate::engine::ether::Ethernet; use crate::engine::ingot_packet::MblkPacketData; use crate::engine::ingot_packet::MsgBlk; +use crate::engine::ip::v6::Ipv6; +use crate::engine::ip::v6::Ipv6Ref; use crate::engine::predicate::DataPredicate; use crate::engine::predicate::EtherAddrMatch; use crate::engine::predicate::IpProtoMatch; diff --git a/lib/opte/src/engine/ether.rs b/lib/opte/src/engine/ether.rs index 4e3aa790..8bee880c 100644 --- a/lib/opte/src/engine/ether.rs +++ b/lib/opte/src/engine/ether.rs @@ -15,8 +15,9 @@ use core::fmt::Debug; use core::fmt::Display; use core::result; use core::str::FromStr; -use ingot::ethernet::Ethernet; +use ingot::ethernet::Ethertype; use ingot::types::HeaderLen; +use ingot::Ingot; use opte_api::MacAddr; use serde::Deserialize; use serde::Serialize; @@ -28,6 +29,17 @@ pub const ETHER_TYPE_IPV6: u16 = 0x86DD; pub const ETHER_ADDR_LEN: usize = 6; +#[derive(Copy, Clone, Debug, Eq, Hash, PartialEq, Ingot)] +#[ingot(impl_default)] +pub struct Ethernet { + #[ingot(is = "[u8; 6]")] + pub destination: MacAddr, + #[ingot(is = "[u8; 6]")] + pub source: MacAddr, + #[ingot(is = "u16be", next_layer)] + pub ethertype: Ethertype, +} + #[repr(u16)] #[derive( Clone, Copy, Deserialize, Eq, Ord, PartialEq, PartialOrd, Serialize, diff --git a/lib/opte/src/engine/flow_table.rs b/lib/opte/src/engine/flow_table.rs index 1e761ab8..0f80ebfe 100644 --- a/lib/opte/src/engine/flow_table.rs +++ b/lib/opte/src/engine/flow_table.rs @@ -197,15 +197,6 @@ where self.map.get(flow_id) } - /// Get a mutable reference to the flow entry for a given flow, if - /// one exists. - // pub fn get_mut( - // &mut self, - // flow_id: &InnerFlowId, - // ) -> Option<&mut FlowEntry> { - // self.map.get_mut(flow_id) - // } - /// Mark all flow table entries as requiring revalidation after a /// reset or removal of rules. /// @@ -294,7 +285,7 @@ pub struct FlowEntry { /// This tracks the last time the flow was matched. /// - /// These are raw u64s sourced from `Moment`, which track time + /// These are raw u64s sourced from a `Moment`, which tracks time /// in nanoseconds. last_hit: AtomicU64, diff --git a/lib/opte/src/engine/icmp/v4.rs b/lib/opte/src/engine/icmp/v4.rs index 57d86bbc..0e210d95 100644 --- a/lib/opte/src/engine/icmp/v4.rs +++ b/lib/opte/src/engine/icmp/v4.rs @@ -7,11 +7,11 @@ //! ICMPv4 headers and processing. use super::*; -use crate::engine::ingot_base::Ethernet; -use crate::engine::ingot_base::Ipv4; -use crate::engine::ingot_base::L3; +use crate::engine::ether::Ethernet; use crate::engine::ingot_packet::MblkPacketData; use crate::engine::ingot_packet::MsgBlk; +use crate::engine::ip::v4::Ipv4; +use crate::engine::ip::L3; use crate::engine::predicate::Ipv4AddrMatch; use ingot::ethernet::Ethertype; use ingot::ip::IpProtocol; diff --git a/lib/opte/src/engine/icmp/v6.rs b/lib/opte/src/engine/icmp/v6.rs index ef87bad1..c95016c6 100644 --- a/lib/opte/src/engine/icmp/v6.rs +++ b/lib/opte/src/engine/icmp/v6.rs @@ -7,11 +7,11 @@ //! ICMPv6 headers and processing. use super::*; -use crate::engine::ingot_base::Ethernet; -use crate::engine::ingot_base::Ipv6; -use crate::engine::ingot_base::Ipv6Ref; +use crate::engine::ether::Ethernet; use crate::engine::ingot_packet::MblkPacketData; use crate::engine::ingot_packet::MsgBlk; +use crate::engine::ip::v6::Ipv6; +use crate::engine::ip::v6::Ipv6Ref; use crate::engine::predicate::Ipv6AddrMatch; use alloc::string::String; use ingot::ethernet::Ethertype; diff --git a/lib/opte/src/engine/ingot_base.rs b/lib/opte/src/engine/ingot_base.rs deleted file mode 100644 index 715e26b4..00000000 --- a/lib/opte/src/engine/ingot_base.rs +++ /dev/null @@ -1,345 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -// Copyright 2024 Oxide Computer Company - -use super::checksum::Checksum; -use ingot::choice; -use ingot::ethernet::Ethertype; -use ingot::icmp::IcmpV4; -use ingot::icmp::IcmpV4Mut; -use ingot::icmp::IcmpV4Ref; -use ingot::icmp::IcmpV6; -use ingot::icmp::IcmpV6Mut; -use ingot::icmp::IcmpV6Ref; -use ingot::icmp::ValidIcmpV4; -use ingot::icmp::ValidIcmpV6; -use ingot::ip::Ecn; -use ingot::ip::IpProtocol; -use ingot::ip::Ipv4Flags; -use ingot::ip::LowRentV6EhRepr; -use ingot::tcp::Tcp; -use ingot::tcp::TcpMut; -use ingot::tcp::TcpRef; -use ingot::tcp::ValidTcp; -use ingot::types::primitives::*; -use ingot::types::util::Repeated; -use ingot::types::ByteSlice; -use ingot::types::Emit; -use ingot::types::Header; -use ingot::types::NextLayer; -use ingot::types::Vec; -use ingot::udp::Udp; -use ingot::udp::UdpMut; -use ingot::udp::UdpRef; -use ingot::udp::ValidUdp; -use ingot::Ingot; -use opte_api::Ipv4Addr; -use opte_api::Ipv6Addr; -use opte_api::MacAddr; -use zerocopy::ByteSliceMut; -use zerocopy::IntoBytes; - -// Redefine Ethernet and v4/v6 because we have our own, internal, -// address types already. - -#[choice(on = Ethertype)] -pub enum L3 { - Ipv4 = Ethertype::IPV4, - Ipv6 = Ethertype::IPV6, -} - -impl L3 { - pub fn pseudo_header(&self) -> Checksum { - match self { - L3::Ipv4(v4) => { - let mut pseudo_hdr_bytes = [0u8; 12]; - pseudo_hdr_bytes[0..4].copy_from_slice(v4.source().as_ref()); - pseudo_hdr_bytes[4..8] - .copy_from_slice(v4.destination().as_ref()); - pseudo_hdr_bytes[9] = v4.protocol().0; - let ulp_len = v4.total_len() - 4 * (v4.ihl() as u16); - pseudo_hdr_bytes[10..].copy_from_slice(&ulp_len.to_be_bytes()); - - Checksum::compute(&pseudo_hdr_bytes) - } - L3::Ipv6(v6) => { - let mut pseudo_hdr_bytes = [0u8; 40]; - pseudo_hdr_bytes[0..16].copy_from_slice(&v6.source().as_ref()); - pseudo_hdr_bytes[16..32] - .copy_from_slice(&v6.destination().as_ref()); - pseudo_hdr_bytes[39] = v6.next_layer().unwrap_or_default().0; - let ulp_len = v6.payload_len() as u32; - pseudo_hdr_bytes[32..36] - .copy_from_slice(&ulp_len.to_be_bytes()); - Checksum::compute(&pseudo_hdr_bytes) - } - } - } -} - -impl ValidL3 { - pub fn pseudo_header(&self) -> Checksum { - match self { - ValidL3::Ipv4(v4) => { - let mut pseudo_hdr_bytes = [0u8; 12]; - pseudo_hdr_bytes[0..4].copy_from_slice(v4.source().as_ref()); - pseudo_hdr_bytes[4..8] - .copy_from_slice(v4.destination().as_ref()); - // pseudo_hdr_bytes[8] reserved - pseudo_hdr_bytes[9] = v4.protocol().0; - let ulp_len = v4.total_len() - 4 * (v4.ihl() as u16); - pseudo_hdr_bytes[10..].copy_from_slice(&ulp_len.to_be_bytes()); - - Checksum::compute(&pseudo_hdr_bytes) - } - ValidL3::Ipv6(v6) => { - let mut pseudo_hdr_bytes = [0u8; 40]; - pseudo_hdr_bytes[0..16].copy_from_slice(&v6.source().as_ref()); - pseudo_hdr_bytes[16..32] - .copy_from_slice(&v6.destination().as_ref()); - pseudo_hdr_bytes[39] = v6.next_layer().unwrap_or_default().0; - let ulp_len = v6.payload_len() as u32; - pseudo_hdr_bytes[32..36] - .copy_from_slice(&ulp_len.to_be_bytes()); - - Checksum::compute(&pseudo_hdr_bytes) - } - } - } -} - -impl Ipv4 { - #[inline] - pub fn compute_checksum(&mut self) { - self.checksum = 0; - - let mut csum = Checksum::new(); - - let mut bytes = [0u8; 56]; - self.emit_raw(&mut bytes[..]); - csum.add_bytes(&bytes[..]); - - self.checksum = csum.finalize_for_ingot(); - } -} - -impl ValidIpv4 { - #[inline] - pub fn compute_checksum(&mut self) { - self.set_checksum(0); - - let mut csum = Checksum::new(); - - csum.add_bytes(self.0.as_bytes()); - - match &self.1 { - Header::Repr(opts) => { - csum.add_bytes(&*opts); - } - Header::Raw(opts) => { - csum.add_bytes(&*opts); - } - } - - self.set_checksum(csum.finalize_for_ingot()); - } -} - -impl L3 { - #[inline] - pub fn compute_checksum(&mut self) { - if let L3::Ipv4(ip) = self { - match ip { - Header::Repr(ip) => ip.compute_checksum(), - Header::Raw(ip) => ip.compute_checksum(), - } - } - } -} - -impl ValidL3 { - #[inline] - pub fn compute_checksum(&mut self) { - if let ValidL3::Ipv4(ip) = self { - ip.set_checksum(0); - - let mut csum = Checksum::new(); - csum.add_bytes(ip.0.as_bytes()); - match &ip.1 { - Header::Repr(opts) => { - csum.add_bytes(&*opts); - } - Header::Raw(opts) => { - csum.add_bytes(&*opts); - } - } - - ip.set_checksum(csum.finalize_for_ingot()); - } - } -} - -#[choice(on = IpProtocol)] -pub enum L4 { - Tcp = IpProtocol::TCP, - Udp = IpProtocol::UDP, -} - -#[choice(on = IpProtocol)] -pub enum Ulp { - Tcp = IpProtocol::TCP, - Udp = IpProtocol::UDP, - IcmpV4 = IpProtocol::ICMP, - IcmpV6 = IpProtocol::ICMP_V6, -} - -impl ValidUlp { - pub fn csum(&self) -> [u8; 2] { - match self { - ValidUlp::Tcp(t) => t.checksum(), - ValidUlp::Udp(u) => u.checksum(), - ValidUlp::IcmpV4(i4) => i4.checksum(), - ValidUlp::IcmpV6(i6) => i6.checksum(), - } - .to_be_bytes() - } -} - -impl ValidUlp { - pub fn compute_checksum( - &mut self, - mut body_csum: Checksum, - l3: &ValidL3, - ) { - match self { - // ICMP4 requires the body_csum *without* - // the pseudoheader added back in. - ValidUlp::IcmpV4(i4) => { - i4.set_checksum(0); - body_csum.add_bytes(i4.0.as_bytes()); - i4.set_checksum(body_csum.finalize_for_ingot()); - } - ValidUlp::IcmpV6(i6) => { - body_csum += l3.pseudo_header(); - - i6.set_checksum(0); - body_csum.add_bytes(i6.0.as_bytes()); - i6.set_checksum(body_csum.finalize_for_ingot()); - } - ValidUlp::Tcp(tcp) => { - body_csum += l3.pseudo_header(); - - tcp.set_checksum(0); - body_csum.add_bytes(tcp.0.as_bytes()); - match &tcp.1 { - Header::Repr(opts) => { - body_csum.add_bytes(&*opts); - } - Header::Raw(opts) => { - body_csum.add_bytes(&*opts); - } - } - tcp.set_checksum(body_csum.finalize_for_ingot()); - } - ValidUlp::Udp(udp) => { - body_csum += l3.pseudo_header(); - - udp.set_checksum(0); - body_csum.add_bytes(udp.0.as_bytes()); - udp.set_checksum(body_csum.finalize_for_ingot()); - } - } - } -} - -impl Ulp { - pub fn src_port(&self) -> Option { - match self { - Ulp::Tcp(t) => Some(t.source()), - Ulp::Udp(u) => Some(u.source()), - _ => None, - } - } -} - -impl ValidL3 { - pub fn csum(&self) -> [u8; 2] { - match self { - ValidL3::Ipv4(i4) => i4.checksum(), - ValidL3::Ipv6(_) => 0, - } - .to_be_bytes() - } -} - -#[derive(Copy, Clone, Debug, Eq, Hash, PartialEq, Ingot)] -#[ingot(impl_default)] -pub struct Ethernet { - #[ingot(is = "[u8; 6]")] - pub destination: MacAddr, - #[ingot(is = "[u8; 6]")] - pub source: MacAddr, - #[ingot(is = "u16be", next_layer)] - pub ethertype: Ethertype, -} - -#[derive(Clone, Debug, Eq, Hash, PartialEq, Ingot)] -#[ingot(impl_default)] -pub struct Ipv4 { - #[ingot(default = 4)] - pub version: u4, - #[ingot(default = 5)] - pub ihl: u4, - pub dscp: u6, - #[ingot(is = "u2")] - pub ecn: Ecn, - // #[ingot(payload_len() + packet_len())] - pub total_len: u16be, - - pub identification: u16be, - #[ingot(is = "u3")] - pub flags: Ipv4Flags, - pub fragment_offset: u13be, - - #[ingot(default = 128)] - pub hop_limit: u8, - #[ingot(is = "u8", next_layer)] - pub protocol: IpProtocol, - pub checksum: u16be, - - #[ingot(is = "[u8; 4]", default = Ipv4Addr::ANY_ADDR)] - pub source: Ipv4Addr, - #[ingot(is = "[u8; 4]", default = Ipv4Addr::ANY_ADDR)] - pub destination: Ipv4Addr, - - #[ingot(var_len = "(ihl * 4).saturating_sub(20)")] - pub options: Vec, -} - -#[derive(Debug, Clone, Ingot, Eq, PartialEq)] -#[ingot(impl_default)] -pub struct Ipv6 { - #[ingot(default = "6")] - pub version: u4, - pub dscp: u6, - #[ingot(is = "u2")] - pub ecn: Ecn, - pub flow_label: u20be, - - // #[ingot(payload_len)] - pub payload_len: u16be, - #[ingot(is = "u8", next_layer)] - pub next_header: IpProtocol, - // #[ingot(default = 128)] - pub hop_limit: u8, - - #[ingot(is = "[u8; 16]", default = Ipv6Addr::ANY_ADDR)] - pub source: Ipv6Addr, - #[ingot(is = "[u8; 16]", default = Ipv6Addr::ANY_ADDR)] - pub destination: Ipv6Addr, - - #[ingot(subparse(on_next_layer))] - pub v6ext: Repeated, -} diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index 29ddecd3..a477894a 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -4,11 +4,13 @@ // Copyright 2024 Oxide Computer Company -use super::checksum::Checksum as OpteCsum; use super::checksum::Checksum; -use super::checksum::HeaderChecksum; use super::ether::EtherMeta; use super::ether::EtherMod; +use super::ether::Ethernet; +use super::ether::EthernetMut; +use super::ether::EthernetPacket; +use super::ether::ValidEthernet; use super::geneve::OxideOption; use super::geneve::GENEVE_OPT_CLASS_OXIDE; use super::geneve::GENEVE_PORT; @@ -26,27 +28,17 @@ use super::icmp::IcmpEchoMut; use super::icmp::IcmpEchoRef; use super::icmp::QueryEcho; use super::icmp::ValidIcmpEcho; -use super::ingot_base::Ethernet; -use super::ingot_base::EthernetMut; -use super::ingot_base::EthernetPacket; -use super::ingot_base::EthernetRef; -use super::ingot_base::Ipv4; -use super::ingot_base::Ipv4Mut; -use super::ingot_base::Ipv4Packet; -use super::ingot_base::Ipv4Ref; -use super::ingot_base::Ipv6; -use super::ingot_base::Ipv6Mut; -use super::ingot_base::Ipv6Packet; -use super::ingot_base::Ipv6Ref; -use super::ingot_base::L3Repr; -use super::ingot_base::Ulp; -use super::ingot_base::UlpRepr; -use super::ingot_base::ValidEthernet; -use super::ingot_base::ValidL3; -use super::ingot_base::ValidL4; -use super::ingot_base::ValidUlp; -use super::ingot_base::L3; -use super::ingot_base::L4; +use super::ip::v4::Ipv4; +use super::ip::v4::Ipv4Mut; +use super::ip::v4::Ipv4Packet; +use super::ip::v4::Ipv4Ref; +use super::ip::v6::Ipv6; +use super::ip::v6::Ipv6Mut; +use super::ip::v6::Ipv6Packet; +use super::ip::v6::Ipv6Ref; +use super::ip::L3Repr; +use super::ip::ValidL3; +use super::ip::L3; use super::packet::allocb; use super::packet::AddrPair; use super::packet::BodyTransform; @@ -58,6 +50,9 @@ use super::packet::SegAdjustError; use super::packet::WrapError; use super::packet::WriteError; use super::packet::FLOW_ID_DEFAULT; +use super::parse::NoEncap; +use super::parse::Ulp; +use super::parse::UlpRepr; use super::rule::CompiledEncap; use super::rule::CompiledTransform; use super::rule::HdrTransform; @@ -91,7 +86,6 @@ use ingot::geneve::Geneve; use ingot::geneve::GeneveMut; use ingot::geneve::GeneveOpt; use ingot::geneve::GeneveOptionType; -use ingot::geneve::GenevePacket; use ingot::geneve::GeneveRef; use ingot::geneve::ValidGeneve; use ingot::icmp::IcmpV4Mut; @@ -115,7 +109,6 @@ use ingot::types::HeaderLen; use ingot::types::HeaderParse; use ingot::types::InlineHeader; use ingot::types::NextLayer; -use ingot::types::ParseControl; use ingot::types::ParseError as IngotParseErr; use ingot::types::Parsed as IngotParsed; use ingot::types::Read; @@ -125,7 +118,6 @@ use ingot::udp::UdpMut; use ingot::udp::UdpPacket; use ingot::udp::UdpRef; use ingot::udp::ValidUdp; -use ingot::Parse; use opte_api::Direction; use opte_api::Ipv6Addr; use opte_api::Vni; @@ -133,399 +125,6 @@ use zerocopy::ByteSlice; use zerocopy::ByteSliceMut; use zerocopy::IntoBytes; -#[derive(Parse)] -pub struct GeneveOverV6 { - pub outer_eth: EthernetPacket, - #[ingot(from = "L3")] - pub outer_v6: Ipv6Packet, - #[ingot(from = "L4", control = geneve_dst_port)] - pub outer_udp: UdpPacket, - pub outer_encap: GenevePacket, - - pub inner_eth: EthernetPacket, - pub inner_l3: L3, - pub inner_ulp: Ulp, -} - -#[inline] -fn geneve_dst_port(l4: &ValidL4) -> ParseControl { - match l4 { - ValidL4::Udp(u) if u.destination() == GENEVE_PORT => { - ParseControl::Continue - } - _ => ParseControl::Reject, - } -} - -#[inline] -fn exit_on_arp(eth: &ValidEthernet) -> ParseControl { - if eth.ethertype() == Ethertype::ARP { - ParseControl::Accept - } else { - ParseControl::Continue - } -} - -#[derive(Parse)] -pub struct NoEncap { - #[ingot(control = exit_on_arp)] - pub inner_eth: EthernetPacket, - pub inner_l3: Option>, - pub inner_ulp: Option>, -} - -impl From> for OpteMeta { - #[inline] - fn from(value: ValidNoEncap) -> Self { - NoEncap::from(value).into() - } -} - -impl LightweightMeta for ValidNoEncap { - #[inline] - fn flow(&self) -> InnerFlowId { - let (proto, addrs) = match &self.inner_l3 { - Some(ValidL3::Ipv4(pkt)) => ( - pkt.protocol().0, - AddrPair::V4 { src: pkt.source(), dst: pkt.destination() }, - ), - Some(ValidL3::Ipv6(pkt)) => ( - pkt.next_layer().unwrap_or_default().0, - AddrPair::V6 { src: pkt.source(), dst: pkt.destination() }, - ), - None => (255, FLOW_ID_DEFAULT.addrs), - }; - - let (src_port, dst_port) = self - .inner_ulp - .as_ref() - .map(|ulp| { - ( - actual_src_port_v(ulp) - .or_else(|| pseudo_port_v(ulp)) - .unwrap_or(0), - actual_dst_port_v(ulp) - .or_else(|| pseudo_port_v(ulp)) - .unwrap_or(0), - ) - }) - .unwrap_or((0, 0)); - - InnerFlowId { proto: proto.into(), addrs, src_port, dst_port } - } - - #[inline] - fn run_compiled_transform(&mut self, transform: &CompiledTransform) - where - V: ByteSliceMut, - { - // TODO: break out commonalities for this and geneve. - if let Some(ether_tx) = &transform.inner_ether { - if let Some(new_src) = ðer_tx.src { - self.inner_eth.set_source(*new_src); - } - if let Some(new_dst) = ðer_tx.dst { - self.inner_eth.set_destination(*new_dst); - } - } - match (&mut self.inner_l3, &transform.inner_ip) { - (Some(ValidL3::Ipv4(pkt)), Some(IpMod::Ip4(tx))) => { - if let Some(new_src) = &tx.src { - pkt.set_source(*new_src); - } - if let Some(new_dst) = &tx.dst { - pkt.set_destination(*new_dst); - } - if let Some(new_proto) = &tx.proto { - pkt.set_protocol(IpProtocol(u8::from(*new_proto))); - } - } - (Some(ValidL3::Ipv6(pkt)), Some(IpMod::Ip6(tx))) => { - if let Some(new_src) = &tx.src { - pkt.set_source(*new_src); - } - if let Some(new_dst) = &tx.dst { - pkt.set_destination(*new_dst); - } - if let Some(new_proto) = &tx.proto { - // TODO: wrong in the face of EHs... - // For now, we never use this on our dataplane. - pkt.set_next_header(IpProtocol(u8::from(*new_proto))); - } - } - _ => {} - } - - match (&mut self.inner_ulp, &transform.inner_ulp) { - (Some(ValidUlp::Tcp(pkt)), Some(tx)) => { - if let Some(flags) = tx.tcp_flags { - pkt.set_flags(TcpFlags::from_bits_retain(flags)); - } - - if let Some(new_src) = &tx.generic.src_port { - pkt.set_source(*new_src); - } - - if let Some(new_dst) = &tx.generic.dst_port { - pkt.set_destination(*new_dst); - } - } - (Some(ValidUlp::Udp(pkt)), Some(tx)) => { - if let Some(new_src) = &tx.generic.src_port { - pkt.set_source(*new_src); - } - - if let Some(new_dst) = &tx.generic.dst_port { - pkt.set_destination(*new_dst); - } - } - (Some(ValidUlp::IcmpV4(pkt)), Some(tx)) - if pkt.ty() == 0 || pkt.ty() == 8 => - { - if let Some(new_id) = tx.icmp_id { - pkt.rest_of_hdr_mut()[..2] - .copy_from_slice(&new_id.to_be_bytes()) - } - } - (Some(ValidUlp::IcmpV6(pkt)), Some(tx)) - if pkt.ty() == 128 || pkt.ty() == 129 => - { - if let Some(new_id) = tx.icmp_id { - pkt.rest_of_hdr_mut()[..2] - .copy_from_slice(&new_id.to_be_bytes()) - } - } - _ => {} - } - } - - // FIXME: identical to Geneve. - #[inline] - fn compute_body_csum(&self) -> Option { - let use_pseudo = if let Some(v) = &self.inner_ulp { - !matches!(v, ValidUlp::IcmpV4(_)) - } else { - false - }; - - let pseudo_csum = match self.inner_eth.ethertype() { - Ethertype::IPV4 | Ethertype::IPV6 => { - self.inner_l3.as_ref().map(|v| v.pseudo_header()) - } - // Includes ARP. - _ => return None, - }; - - let Some(pseudo_csum) = pseudo_csum else { - return None; - }; - - self.inner_ulp.as_ref().and_then(csum_minus_hdr).map(|mut v| { - if use_pseudo { - v -= pseudo_csum; - } - v - }) - } - - #[inline] - fn encap_len(&self) -> u16 { - 0 - } - - #[inline] - fn update_inner_checksums(&mut self, body_csum: OpteCsum) { - if let Some(l3) = self.inner_l3.as_mut() { - if let Some(ulp) = self.inner_ulp.as_mut() { - ulp.compute_checksum(body_csum, l3); - } - l3.compute_checksum(); - } - } - - #[inline] - fn inner_tcp(&self) -> Option<&impl TcpRef> { - match self.inner_ulp.as_ref() { - Some(ValidUlp::Tcp(t)) => Some(t), - _ => None, - } - } -} - -impl From> for OpteMeta { - #[inline] - fn from(value: ValidGeneveOverV6) -> Self { - OpteMeta { - outer_eth: Some(value.outer_eth.into()), - outer_l3: Some(L3::Ipv6(value.outer_v6.into())), - outer_encap: Some(InlineHeader::Raw(ValidEncapMeta::Geneve( - value.outer_udp, - value.outer_encap, - ))), - inner_eth: value.inner_eth.into(), - inner_l3: Some(value.inner_l3.into()), - inner_ulp: Some(value.inner_ulp.into()), - } - } -} - -impl LightweightMeta for ValidGeneveOverV6 { - #[inline] - fn flow(&self) -> InnerFlowId { - let (proto, addrs) = match &self.inner_l3 { - ValidL3::Ipv4(pkt) => ( - pkt.protocol().0, - AddrPair::V4 { src: pkt.source(), dst: pkt.destination() }, - ), - ValidL3::Ipv6(pkt) => ( - pkt.next_layer().unwrap_or_default().0, - AddrPair::V6 { src: pkt.source(), dst: pkt.destination() }, - ), - }; - - let src_port = actual_src_port_v(&self.inner_ulp) - .or_else(|| pseudo_port_v(&self.inner_ulp)) - .unwrap_or(0); - - let dst_port = actual_dst_port_v(&self.inner_ulp) - .or_else(|| pseudo_port_v(&self.inner_ulp)) - .unwrap_or(0); - - InnerFlowId { proto: proto.into(), addrs, src_port, dst_port } - } - - #[inline] - fn run_compiled_transform(&mut self, transform: &CompiledTransform) - where - V: ByteSliceMut, - { - // TODO: break out commonalities for this and geneve. - if let Some(ether_tx) = &transform.inner_ether { - if let Some(new_src) = ðer_tx.src { - self.inner_eth.set_source(*new_src); - } - if let Some(new_dst) = ðer_tx.dst { - self.inner_eth.set_destination(*new_dst); - } - } - match (&mut self.inner_l3, &transform.inner_ip) { - (ValidL3::Ipv4(pkt), Some(IpMod::Ip4(tx))) => { - if let Some(new_src) = &tx.src { - pkt.set_source(*new_src); - } - if let Some(new_dst) = &tx.dst { - pkt.set_destination(*new_dst); - } - if let Some(new_proto) = &tx.proto { - pkt.set_protocol(IpProtocol(u8::from(*new_proto))); - } - } - (ValidL3::Ipv6(pkt), Some(IpMod::Ip6(tx))) => { - if let Some(new_src) = &tx.src { - pkt.set_source(*new_src); - } - if let Some(new_dst) = &tx.dst { - pkt.set_destination(*new_dst); - } - if let Some(new_proto) = &tx.proto { - // TODO: wrong in the face of EHs... - // For now, we never use this on our dataplane. - pkt.set_next_header(IpProtocol(u8::from(*new_proto))); - } - } - _ => {} - } - - match (&mut self.inner_ulp, &transform.inner_ulp) { - (ValidUlp::Tcp(pkt), Some(tx)) => { - if let Some(flags) = tx.tcp_flags { - pkt.set_flags(TcpFlags::from_bits_retain(flags)); - } - - if let Some(new_src) = &tx.generic.src_port { - pkt.set_source(*new_src); - } - - if let Some(new_dst) = &tx.generic.dst_port { - pkt.set_destination(*new_dst); - } - } - (ValidUlp::Udp(pkt), Some(tx)) => { - if let Some(new_src) = &tx.generic.src_port { - pkt.set_source(*new_src); - } - - if let Some(new_dst) = &tx.generic.dst_port { - pkt.set_destination(*new_dst); - } - } - (ValidUlp::IcmpV4(pkt), Some(tx)) - if pkt.ty() == 0 || pkt.ty() == 8 => - { - if let Some(new_id) = tx.icmp_id { - pkt.rest_of_hdr_mut()[..2] - .copy_from_slice(&new_id.to_be_bytes()) - } - } - (ValidUlp::IcmpV6(pkt), Some(tx)) - if pkt.ty() == 128 || pkt.ty() == 129 => - { - if let Some(new_id) = tx.icmp_id { - pkt.rest_of_hdr_mut()[..2] - .copy_from_slice(&new_id.to_be_bytes()) - } - } - _ => {} - } - } - - #[inline] - fn compute_body_csum(&self) -> Option { - let use_pseudo = !matches!(self.inner_ulp, ValidUlp::IcmpV4(_)); - - let pseudo_csum = match self.inner_eth.ethertype() { - Ethertype::IPV4 | Ethertype::IPV6 => { - Some(self.inner_l3.pseudo_header()) - } - // Includes ARP. - _ => return None, - }; - - let Some(pseudo_csum) = pseudo_csum else { - return None; - }; - - csum_minus_hdr(&self.inner_ulp).map(|mut v| { - if use_pseudo { - v -= pseudo_csum; - } - v - }) - } - - #[inline] - fn encap_len(&self) -> u16 { - (self.outer_eth.packet_length() - + self.outer_v6.packet_length() - + self.outer_udp.packet_length() - + self.outer_encap.packet_length()) as u16 - } - - #[inline] - fn update_inner_checksums(&mut self, body_csum: OpteCsum) { - self.inner_ulp.compute_checksum(body_csum, &self.inner_l3); - self.inner_l3.compute_checksum(); - } - - #[inline] - fn inner_tcp(&self) -> Option<&impl TcpRef> { - match &self.inner_ulp { - ValidUlp::Tcp(t) => Some(t), - _ => None, - } - } -} - /// An individual illumos `mblk_t` -- a single bytestream /// comprised of a linked list of data segments. /// @@ -1565,70 +1164,6 @@ impl PacketData { } } -fn actual_src_port(chunk: &Ulp) -> Option { - match chunk { - Ulp::Tcp(pkt) => Some(pkt.source()), - Ulp::Udp(pkt) => Some(pkt.source()), - _ => None, - } -} - -fn actual_src_port_v(chunk: &ValidUlp) -> Option { - match chunk { - ValidUlp::Tcp(pkt) => Some(pkt.source()), - ValidUlp::Udp(pkt) => Some(pkt.source()), - _ => None, - } -} - -fn actual_dst_port(chunk: &Ulp) -> Option { - match chunk { - Ulp::Tcp(pkt) => Some(pkt.destination()), - Ulp::Udp(pkt) => Some(pkt.destination()), - _ => None, - } -} - -fn actual_dst_port_v(chunk: &ValidUlp) -> Option { - match chunk { - ValidUlp::Tcp(pkt) => Some(pkt.destination()), - ValidUlp::Udp(pkt) => Some(pkt.destination()), - _ => None, - } -} - -fn pseudo_port(chunk: &Ulp) -> Option { - match chunk { - Ulp::IcmpV4(pkt) - if pkt.code() == 0 && (pkt.ty() == 0 || pkt.ty() == 8) => - { - Some(u16::from_be_bytes(pkt.rest_of_hdr()[..2].try_into().unwrap())) - } - Ulp::IcmpV6(pkt) - if pkt.code() == 0 && (pkt.ty() == 128 || pkt.ty() == 129) => - { - Some(u16::from_be_bytes(pkt.rest_of_hdr()[..2].try_into().unwrap())) - } - _ => None, - } -} - -fn pseudo_port_v(chunk: &ValidUlp) -> Option { - match chunk { - ValidUlp::IcmpV4(pkt) - if pkt.code() == 0 && (pkt.ty() == 0 || pkt.ty() == 8) => - { - Some(u16::from_be_bytes(pkt.rest_of_hdr()[..2].try_into().unwrap())) - } - ValidUlp::IcmpV6(pkt) - if pkt.code() == 0 && (pkt.ty() == 128 || pkt.ty() == 129) => - { - Some(u16::from_be_bytes(pkt.rest_of_hdr()[..2].try_into().unwrap())) - } - _ => None, - } -} - impl From<&PacketData> for InnerFlowId { #[inline] fn from(meta: &PacketData) -> Self { @@ -1648,11 +1183,11 @@ impl From<&PacketData> for InnerFlowId { .inner_ulp() .map(|ulp| { ( - actual_src_port(ulp) - .or_else(|| pseudo_port(ulp)) + ulp.true_src_port() + .or_else(|| ulp.pseudo_port()) .unwrap_or(0), - actual_dst_port(ulp) - .or_else(|| pseudo_port(ulp)) + ulp.true_dst_port() + .or_else(|| ulp.pseudo_port()) .unwrap_or(0), ) }) @@ -2416,75 +1951,6 @@ pub type MblkPacketData<'a> = PacketData>; pub type MblkFullParsed<'a> = FullParsed>; pub type MblkLiteParsed<'a, M> = LiteParsed, M>; -#[inline] -fn csum_minus_hdr(ulp: &ValidUlp) -> Option { - match ulp { - ValidUlp::IcmpV4(icmp) => { - if icmp.checksum() == 0 { - return None; - } - - let mut csum = OpteCsum::from(HeaderChecksum::wrap( - icmp.checksum().to_be_bytes(), - )); - - csum.sub_bytes(&[icmp.ty(), icmp.code()]); - csum.sub_bytes(icmp.rest_of_hdr_ref()); - - Some(csum) - } - ValidUlp::IcmpV6(icmp) => { - if icmp.checksum() == 0 { - return None; - } - - let mut csum = OpteCsum::from(HeaderChecksum::wrap( - icmp.checksum().to_be_bytes(), - )); - - csum.sub_bytes(&[icmp.ty(), icmp.code()]); - csum.sub_bytes(icmp.rest_of_hdr_ref()); - - Some(csum) - } - ValidUlp::Tcp(tcp) => { - if tcp.checksum() == 0 { - return None; - } - - let mut csum = OpteCsum::from(HeaderChecksum::wrap( - tcp.checksum().to_be_bytes(), - )); - - let b = tcp.0.as_bytes(); - - csum.sub_bytes(&b[0..16]); - csum.sub_bytes(&b[18..]); - - csum.sub_bytes(match &tcp.1 { - ingot::types::Header::Repr(v) => &v[..], - ingot::types::Header::Raw(v) => &v[..], - }); - - Some(csum) - } - ValidUlp::Udp(udp) => { - if udp.checksum() == 0 { - return None; - } - - let mut csum = OpteCsum::from(HeaderChecksum::wrap( - udp.checksum().to_be_bytes(), - )); - - let b = udp.0.as_bytes(); - csum.sub_bytes(&b[0..6]); - - Some(csum) - } - } -} - pub trait QueryLen { fn len(&self) -> usize; } diff --git a/lib/opte/src/engine/ip/mod.rs b/lib/opte/src/engine/ip/mod.rs new file mode 100644 index 00000000..8bbe0f22 --- /dev/null +++ b/lib/opte/src/engine/ip/mod.rs @@ -0,0 +1,143 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2024 Oxide Computer Company + +pub mod v4; +pub mod v6; + +use super::checksum::Checksum; +use ingot::choice; +use ingot::ethernet::Ethertype; +use ingot::types::ByteSlice; +use ingot::types::Header; +use ingot::types::NextLayer; +use ingot::Ingot; +use opte_api::MacAddr; +use v4::*; +use v6::*; +use zerocopy::ByteSliceMut; +use zerocopy::IntoBytes; + +// Redefine Ethernet and v4/v6 because we have our own, internal, +// address types already. + +#[choice(on = Ethertype)] +pub enum L3 { + Ipv4 = Ethertype::IPV4, + Ipv6 = Ethertype::IPV6, +} + +impl L3 { + pub fn pseudo_header(&self) -> Checksum { + match self { + L3::Ipv4(v4) => { + let mut pseudo_hdr_bytes = [0u8; 12]; + pseudo_hdr_bytes[0..4].copy_from_slice(v4.source().as_ref()); + pseudo_hdr_bytes[4..8] + .copy_from_slice(v4.destination().as_ref()); + pseudo_hdr_bytes[9] = v4.protocol().0; + let ulp_len = v4.total_len() - 4 * (v4.ihl() as u16); + pseudo_hdr_bytes[10..].copy_from_slice(&ulp_len.to_be_bytes()); + + Checksum::compute(&pseudo_hdr_bytes) + } + L3::Ipv6(v6) => { + let mut pseudo_hdr_bytes = [0u8; 40]; + pseudo_hdr_bytes[0..16].copy_from_slice(&v6.source().as_ref()); + pseudo_hdr_bytes[16..32] + .copy_from_slice(&v6.destination().as_ref()); + pseudo_hdr_bytes[39] = v6.next_layer().unwrap_or_default().0; + let ulp_len = v6.payload_len() as u32; + pseudo_hdr_bytes[32..36] + .copy_from_slice(&ulp_len.to_be_bytes()); + Checksum::compute(&pseudo_hdr_bytes) + } + } + } +} + +impl L3 { + #[inline] + pub fn compute_checksum(&mut self) { + if let L3::Ipv4(ip) = self { + match ip { + Header::Repr(ip) => ip.compute_checksum(), + Header::Raw(ip) => ip.compute_checksum(), + } + } + } +} + +impl ValidL3 { + pub fn pseudo_header(&self) -> Checksum { + match self { + ValidL3::Ipv4(v4) => { + let mut pseudo_hdr_bytes = [0u8; 12]; + pseudo_hdr_bytes[0..4].copy_from_slice(v4.source().as_ref()); + pseudo_hdr_bytes[4..8] + .copy_from_slice(v4.destination().as_ref()); + // pseudo_hdr_bytes[8] reserved + pseudo_hdr_bytes[9] = v4.protocol().0; + let ulp_len = v4.total_len() - 4 * (v4.ihl() as u16); + pseudo_hdr_bytes[10..].copy_from_slice(&ulp_len.to_be_bytes()); + + Checksum::compute(&pseudo_hdr_bytes) + } + ValidL3::Ipv6(v6) => { + let mut pseudo_hdr_bytes = [0u8; 40]; + pseudo_hdr_bytes[0..16].copy_from_slice(&v6.source().as_ref()); + pseudo_hdr_bytes[16..32] + .copy_from_slice(&v6.destination().as_ref()); + pseudo_hdr_bytes[39] = v6.next_layer().unwrap_or_default().0; + let ulp_len = v6.payload_len() as u32; + pseudo_hdr_bytes[32..36] + .copy_from_slice(&ulp_len.to_be_bytes()); + + Checksum::compute(&pseudo_hdr_bytes) + } + } + } + + pub fn csum(&self) -> [u8; 2] { + match self { + ValidL3::Ipv4(i4) => i4.checksum(), + ValidL3::Ipv6(_) => 0, + } + .to_be_bytes() + } +} + +impl ValidL3 { + #[inline] + pub fn compute_checksum(&mut self) { + if let ValidL3::Ipv4(ip) = self { + ip.set_checksum(0); + + let mut csum = Checksum::new(); + csum.add_bytes(ip.0.as_bytes()); + match &ip.1 { + Header::Repr(opts) => { + csum.add_bytes(&*opts); + } + Header::Raw(opts) => { + csum.add_bytes(&*opts); + } + } + + ip.set_checksum(csum.finalize_for_ingot()); + } + } +} + +#[derive(Copy, Clone, Debug, Eq, Hash, PartialEq, Ingot)] +#[ingot(impl_default)] +pub struct Ethernet { + #[ingot(is = "[u8; 6]")] + pub destination: MacAddr, + #[ingot(is = "[u8; 6]")] + pub source: MacAddr, + #[ingot(is = "u16be", next_layer)] + pub ethertype: Ethertype, +} diff --git a/lib/opte/src/engine/ip/v4.rs b/lib/opte/src/engine/ip/v4.rs new file mode 100644 index 00000000..d63c7e95 --- /dev/null +++ b/lib/opte/src/engine/ip/v4.rs @@ -0,0 +1,87 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2024 Oxide Computer Company + +use crate::engine::checksum::Checksum; +use ingot::ip::Ecn; +use ingot::ip::IpProtocol; +use ingot::ip::Ipv4Flags; +use ingot::types::primitives::*; +use ingot::types::Emit; +use ingot::types::Header; +use ingot::types::Vec; +use ingot::Ingot; +use opte_api::Ipv4Addr; +use zerocopy::ByteSliceMut; +use zerocopy::IntoBytes; + +#[derive(Clone, Debug, Eq, Hash, PartialEq, Ingot)] +#[ingot(impl_default)] +pub struct Ipv4 { + #[ingot(default = 4)] + pub version: u4, + #[ingot(default = 5)] + pub ihl: u4, + pub dscp: u6, + #[ingot(is = "u2")] + pub ecn: Ecn, + pub total_len: u16be, + + pub identification: u16be, + #[ingot(is = "u3")] + pub flags: Ipv4Flags, + pub fragment_offset: u13be, + + #[ingot(default = 128)] + pub hop_limit: u8, + #[ingot(is = "u8", next_layer)] + pub protocol: IpProtocol, + pub checksum: u16be, + + #[ingot(is = "[u8; 4]", default = Ipv4Addr::ANY_ADDR)] + pub source: Ipv4Addr, + #[ingot(is = "[u8; 4]", default = Ipv4Addr::ANY_ADDR)] + pub destination: Ipv4Addr, + + #[ingot(var_len = "(ihl * 4).saturating_sub(20)")] + pub options: Vec, +} + +impl Ipv4 { + #[inline] + pub fn compute_checksum(&mut self) { + self.checksum = 0; + + let mut csum = Checksum::new(); + + let mut bytes = [0u8; 56]; + self.emit_raw(&mut bytes[..]); + csum.add_bytes(&bytes[..]); + + self.checksum = csum.finalize_for_ingot(); + } +} + +impl ValidIpv4 { + #[inline] + pub fn compute_checksum(&mut self) { + self.set_checksum(0); + + let mut csum = Checksum::new(); + + csum.add_bytes(self.0.as_bytes()); + + match &self.1 { + Header::Repr(opts) => { + csum.add_bytes(&*opts); + } + Header::Raw(opts) => { + csum.add_bytes(&*opts); + } + } + + self.set_checksum(csum.finalize_for_ingot()); + } +} diff --git a/lib/opte/src/engine/ip/v6.rs b/lib/opte/src/engine/ip/v6.rs new file mode 100644 index 00000000..f4b33220 --- /dev/null +++ b/lib/opte/src/engine/ip/v6.rs @@ -0,0 +1,38 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2024 Oxide Computer Company + +use ingot::ip::Ecn; +use ingot::ip::IpProtocol; +use ingot::ip::LowRentV6EhRepr; +use ingot::types::primitives::*; +use ingot::types::util::Repeated; +use ingot::Ingot; +use opte_api::Ipv6Addr; + +#[derive(Debug, Clone, Ingot, Eq, PartialEq)] +#[ingot(impl_default)] +pub struct Ipv6 { + #[ingot(default = "6")] + pub version: u4, + pub dscp: u6, + #[ingot(is = "u2")] + pub ecn: Ecn, + pub flow_label: u20be, + + pub payload_len: u16be, + #[ingot(is = "u8", next_layer)] + pub next_header: IpProtocol, + #[ingot(default = 128)] + pub hop_limit: u8, + + #[ingot(is = "[u8; 16]", default = Ipv6Addr::ANY_ADDR)] + pub source: Ipv6Addr, + #[ingot(is = "[u8; 16]", default = Ipv6Addr::ANY_ADDR)] + pub destination: Ipv6Addr, + + #[ingot(subparse(on_next_layer))] + pub v6ext: Repeated, +} diff --git a/lib/opte/src/engine/mod.rs b/lib/opte/src/engine/mod.rs index b2a813fc..777fea80 100644 --- a/lib/opte/src/engine/mod.rs +++ b/lib/opte/src/engine/mod.rs @@ -19,6 +19,7 @@ pub mod geneve; pub mod headers; pub mod icmp; pub mod ioctl; +pub mod ip; #[macro_use] pub mod ip4; #[macro_use] @@ -27,6 +28,7 @@ pub mod layer; pub mod nat; #[macro_use] pub mod packet; +pub mod parse; pub mod port; pub mod predicate; #[cfg(any(feature = "std", test))] @@ -39,7 +41,6 @@ pub mod tcp_state; #[macro_use] pub mod udp; -pub mod ingot_base; pub mod ingot_packet; use alloc::string::String; @@ -53,9 +54,9 @@ use ingot_packet::MsgBlk; use ingot_packet::OpteMeta; use ingot_packet::OpteParsed2; use ingot_packet::Packet2; -use ingot_packet::ValidNoEncap; use ip4::IpError; pub use opte_api::Direction; +use parse::ValidNoEncap; use rule::CompiledTransform; use zerocopy::ByteSlice; use zerocopy::ByteSliceMut; diff --git a/lib/opte/src/engine/parse.rs b/lib/opte/src/engine/parse.rs new file mode 100644 index 00000000..1c5c9982 --- /dev/null +++ b/lib/opte/src/engine/parse.rs @@ -0,0 +1,693 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2024 Oxide Computer Company + +//! Constructs used in packet parsing, such as choices over protocol +//! and complete packet definitions. + +use super::checksum::Checksum; +use super::checksum::HeaderChecksum; +use super::ether::EthernetMut; +use super::ether::EthernetPacket; +use super::ether::EthernetRef; +use super::ether::ValidEthernet; +use super::geneve::GENEVE_PORT; +use super::headers::IpMod; +use super::ingot_packet::OpteMeta; +use super::ingot_packet::ValidEncapMeta; +use super::ip::v4::Ipv4Mut; +use super::ip::v4::Ipv4Ref; +use super::ip::v6::Ipv6Mut; +use super::ip::v6::Ipv6Packet; +use super::ip::v6::Ipv6Ref; +use super::ip::ValidL3; +use super::ip::L3; +use super::packet::AddrPair; +use super::packet::InnerFlowId; +use super::packet::FLOW_ID_DEFAULT; +use super::rule::CompiledTransform; +use super::LightweightMeta; +use ingot::choice; +use ingot::ethernet::Ethertype; +use ingot::geneve::GenevePacket; +use ingot::icmp::IcmpV4; +use ingot::icmp::IcmpV4Mut; +use ingot::icmp::IcmpV4Ref; +use ingot::icmp::IcmpV6; +use ingot::icmp::IcmpV6Mut; +use ingot::icmp::IcmpV6Ref; +use ingot::icmp::ValidIcmpV4; +use ingot::icmp::ValidIcmpV6; +use ingot::ip::IpProtocol; +use ingot::tcp::Tcp; +use ingot::tcp::TcpFlags; +use ingot::tcp::TcpMut; +use ingot::tcp::TcpRef; +use ingot::tcp::ValidTcp; +use ingot::types::ByteSlice; +use ingot::types::Header; +use ingot::types::HeaderLen; +use ingot::types::InlineHeader; +use ingot::types::NextLayer; +use ingot::types::ParseControl; +use ingot::udp::Udp; +use ingot::udp::UdpMut; +use ingot::udp::UdpPacket; +use ingot::udp::UdpRef; +use ingot::udp::ValidUdp; +use ingot::Parse; +use zerocopy::ByteSliceMut; +use zerocopy::IntoBytes; + +#[choice(on = IpProtocol)] +pub enum L4 { + Tcp = IpProtocol::TCP, + Udp = IpProtocol::UDP, +} + +#[choice(on = IpProtocol)] +pub enum Ulp { + Tcp = IpProtocol::TCP, + Udp = IpProtocol::UDP, + IcmpV4 = IpProtocol::ICMP, + IcmpV6 = IpProtocol::ICMP_V6, +} + +impl ValidUlp { + pub fn csum(&self) -> [u8; 2] { + match self { + ValidUlp::Tcp(t) => t.checksum(), + ValidUlp::Udp(u) => u.checksum(), + ValidUlp::IcmpV4(i4) => i4.checksum(), + ValidUlp::IcmpV6(i6) => i6.checksum(), + } + .to_be_bytes() + } +} + +impl ValidUlp { + pub fn compute_checksum( + &mut self, + mut body_csum: Checksum, + l3: &ValidL3, + ) { + match self { + // ICMP4 requires the body_csum *without* + // the pseudoheader added back in. + ValidUlp::IcmpV4(i4) => { + i4.set_checksum(0); + body_csum.add_bytes(i4.0.as_bytes()); + i4.set_checksum(body_csum.finalize_for_ingot()); + } + ValidUlp::IcmpV6(i6) => { + body_csum += l3.pseudo_header(); + + i6.set_checksum(0); + body_csum.add_bytes(i6.0.as_bytes()); + i6.set_checksum(body_csum.finalize_for_ingot()); + } + ValidUlp::Tcp(tcp) => { + body_csum += l3.pseudo_header(); + + tcp.set_checksum(0); + body_csum.add_bytes(tcp.0.as_bytes()); + match &tcp.1 { + Header::Repr(opts) => { + body_csum.add_bytes(&*opts); + } + Header::Raw(opts) => { + body_csum.add_bytes(&*opts); + } + } + tcp.set_checksum(body_csum.finalize_for_ingot()); + } + ValidUlp::Udp(udp) => { + body_csum += l3.pseudo_header(); + + udp.set_checksum(0); + body_csum.add_bytes(udp.0.as_bytes()); + udp.set_checksum(body_csum.finalize_for_ingot()); + } + } + } +} + +impl Ulp { + pub fn src_port(&self) -> Option { + match self { + Ulp::Tcp(t) => Some(t.source()), + Ulp::Udp(u) => Some(u.source()), + _ => None, + } + } +} + +#[derive(Parse)] +pub struct GeneveOverV6 { + pub outer_eth: EthernetPacket, + #[ingot(from = "L3")] + pub outer_v6: Ipv6Packet, + #[ingot(from = "L4", control = geneve_dst_port)] + pub outer_udp: UdpPacket, + pub outer_encap: GenevePacket, + + pub inner_eth: EthernetPacket, + pub inner_l3: L3, + pub inner_ulp: Ulp, +} + +#[inline] +fn geneve_dst_port(l4: &ValidL4) -> ParseControl { + match l4 { + ValidL4::Udp(u) if u.destination() == GENEVE_PORT => { + ParseControl::Continue + } + _ => ParseControl::Reject, + } +} + +#[inline] +fn exit_on_arp(eth: &ValidEthernet) -> ParseControl { + if eth.ethertype() == Ethertype::ARP { + ParseControl::Accept + } else { + ParseControl::Continue + } +} + +#[derive(Parse)] +pub struct NoEncap { + #[ingot(control = exit_on_arp)] + pub inner_eth: EthernetPacket, + pub inner_l3: Option>, + pub inner_ulp: Option>, +} + +impl From> for OpteMeta { + #[inline] + fn from(value: ValidNoEncap) -> Self { + NoEncap::from(value).into() + } +} + +impl LightweightMeta for ValidNoEncap { + #[inline] + fn flow(&self) -> InnerFlowId { + let (proto, addrs) = match &self.inner_l3 { + Some(ValidL3::Ipv4(pkt)) => ( + pkt.protocol().0, + AddrPair::V4 { src: pkt.source(), dst: pkt.destination() }, + ), + Some(ValidL3::Ipv6(pkt)) => ( + pkt.next_layer().unwrap_or_default().0, + AddrPair::V6 { src: pkt.source(), dst: pkt.destination() }, + ), + None => (255, FLOW_ID_DEFAULT.addrs), + }; + + let (src_port, dst_port) = self + .inner_ulp + .as_ref() + .map(|ulp| { + ( + ulp.true_src_port() + .or_else(|| ulp.pseudo_port()) + .unwrap_or(0), + ulp.true_dst_port() + .or_else(|| ulp.pseudo_port()) + .unwrap_or(0), + ) + }) + .unwrap_or((0, 0)); + + InnerFlowId { proto: proto.into(), addrs, src_port, dst_port } + } + + #[inline] + fn run_compiled_transform(&mut self, transform: &CompiledTransform) + where + V: ByteSliceMut, + { + // TODO: break out commonalities for this and geneve. + if let Some(ether_tx) = &transform.inner_ether { + if let Some(new_src) = ðer_tx.src { + self.inner_eth.set_source(*new_src); + } + if let Some(new_dst) = ðer_tx.dst { + self.inner_eth.set_destination(*new_dst); + } + } + match (&mut self.inner_l3, &transform.inner_ip) { + (Some(ValidL3::Ipv4(pkt)), Some(IpMod::Ip4(tx))) => { + if let Some(new_src) = &tx.src { + pkt.set_source(*new_src); + } + if let Some(new_dst) = &tx.dst { + pkt.set_destination(*new_dst); + } + if let Some(new_proto) = &tx.proto { + pkt.set_protocol(IpProtocol(u8::from(*new_proto))); + } + } + (Some(ValidL3::Ipv6(pkt)), Some(IpMod::Ip6(tx))) => { + if let Some(new_src) = &tx.src { + pkt.set_source(*new_src); + } + if let Some(new_dst) = &tx.dst { + pkt.set_destination(*new_dst); + } + if let Some(new_proto) = &tx.proto { + // TODO: wrong in the face of EHs... + // For now, we never use this on our dataplane. + pkt.set_next_header(IpProtocol(u8::from(*new_proto))); + } + } + _ => {} + } + + match (&mut self.inner_ulp, &transform.inner_ulp) { + (Some(ValidUlp::Tcp(pkt)), Some(tx)) => { + if let Some(flags) = tx.tcp_flags { + pkt.set_flags(TcpFlags::from_bits_retain(flags)); + } + + if let Some(new_src) = &tx.generic.src_port { + pkt.set_source(*new_src); + } + + if let Some(new_dst) = &tx.generic.dst_port { + pkt.set_destination(*new_dst); + } + } + (Some(ValidUlp::Udp(pkt)), Some(tx)) => { + if let Some(new_src) = &tx.generic.src_port { + pkt.set_source(*new_src); + } + + if let Some(new_dst) = &tx.generic.dst_port { + pkt.set_destination(*new_dst); + } + } + (Some(ValidUlp::IcmpV4(pkt)), Some(tx)) + if pkt.ty() == 0 || pkt.ty() == 8 => + { + if let Some(new_id) = tx.icmp_id { + pkt.rest_of_hdr_mut()[..2] + .copy_from_slice(&new_id.to_be_bytes()) + } + } + (Some(ValidUlp::IcmpV6(pkt)), Some(tx)) + if pkt.ty() == 128 || pkt.ty() == 129 => + { + if let Some(new_id) = tx.icmp_id { + pkt.rest_of_hdr_mut()[..2] + .copy_from_slice(&new_id.to_be_bytes()) + } + } + _ => {} + } + } + + // FIXME: identical to Geneve. + #[inline] + fn compute_body_csum(&self) -> Option { + let use_pseudo = if let Some(v) = &self.inner_ulp { + !matches!(v, ValidUlp::IcmpV4(_)) + } else { + false + }; + + let pseudo_csum = match self.inner_eth.ethertype() { + Ethertype::IPV4 | Ethertype::IPV6 => { + self.inner_l3.as_ref().map(|v| v.pseudo_header()) + } + // Includes ARP. + _ => return None, + }; + + let Some(pseudo_csum) = pseudo_csum else { + return None; + }; + + self.inner_ulp.as_ref().and_then(csum_minus_hdr).map(|mut v| { + if use_pseudo { + v -= pseudo_csum; + } + v + }) + } + + #[inline] + fn encap_len(&self) -> u16 { + 0 + } + + #[inline] + fn update_inner_checksums(&mut self, body_csum: Checksum) { + if let Some(l3) = self.inner_l3.as_mut() { + if let Some(ulp) = self.inner_ulp.as_mut() { + ulp.compute_checksum(body_csum, l3); + } + l3.compute_checksum(); + } + } + + #[inline] + fn inner_tcp(&self) -> Option<&impl TcpRef> { + match self.inner_ulp.as_ref() { + Some(ValidUlp::Tcp(t)) => Some(t), + _ => None, + } + } +} + +impl From> for OpteMeta { + #[inline] + fn from(value: ValidGeneveOverV6) -> Self { + OpteMeta { + outer_eth: Some(value.outer_eth.into()), + outer_l3: Some(L3::Ipv6(value.outer_v6.into())), + outer_encap: Some(InlineHeader::Raw(ValidEncapMeta::Geneve( + value.outer_udp, + value.outer_encap, + ))), + inner_eth: value.inner_eth.into(), + inner_l3: Some(value.inner_l3.into()), + inner_ulp: Some(value.inner_ulp.into()), + } + } +} + +impl LightweightMeta for ValidGeneveOverV6 { + #[inline] + fn flow(&self) -> InnerFlowId { + let (proto, addrs) = match &self.inner_l3 { + ValidL3::Ipv4(pkt) => ( + pkt.protocol().0, + AddrPair::V4 { src: pkt.source(), dst: pkt.destination() }, + ), + ValidL3::Ipv6(pkt) => ( + pkt.next_layer().unwrap_or_default().0, + AddrPair::V6 { src: pkt.source(), dst: pkt.destination() }, + ), + }; + + let src_port = self + .inner_ulp + .true_src_port() + .or_else(|| self.inner_ulp.pseudo_port()) + .unwrap_or(0); + + let dst_port = self + .inner_ulp + .true_dst_port() + .or_else(|| self.inner_ulp.pseudo_port()) + .unwrap_or(0); + + InnerFlowId { proto: proto.into(), addrs, src_port, dst_port } + } + + #[inline] + fn run_compiled_transform(&mut self, transform: &CompiledTransform) + where + V: ByteSliceMut, + { + // TODO: break out commonalities for this and geneve. + if let Some(ether_tx) = &transform.inner_ether { + if let Some(new_src) = ðer_tx.src { + self.inner_eth.set_source(*new_src); + } + if let Some(new_dst) = ðer_tx.dst { + self.inner_eth.set_destination(*new_dst); + } + } + match (&mut self.inner_l3, &transform.inner_ip) { + (ValidL3::Ipv4(pkt), Some(IpMod::Ip4(tx))) => { + if let Some(new_src) = &tx.src { + pkt.set_source(*new_src); + } + if let Some(new_dst) = &tx.dst { + pkt.set_destination(*new_dst); + } + if let Some(new_proto) = &tx.proto { + pkt.set_protocol(IpProtocol(u8::from(*new_proto))); + } + } + (ValidL3::Ipv6(pkt), Some(IpMod::Ip6(tx))) => { + if let Some(new_src) = &tx.src { + pkt.set_source(*new_src); + } + if let Some(new_dst) = &tx.dst { + pkt.set_destination(*new_dst); + } + if let Some(new_proto) = &tx.proto { + // TODO: wrong in the face of EHs... + // For now, we never use this on our dataplane. + pkt.set_next_header(IpProtocol(u8::from(*new_proto))); + } + } + _ => {} + } + + match (&mut self.inner_ulp, &transform.inner_ulp) { + (ValidUlp::Tcp(pkt), Some(tx)) => { + if let Some(flags) = tx.tcp_flags { + pkt.set_flags(TcpFlags::from_bits_retain(flags)); + } + + if let Some(new_src) = &tx.generic.src_port { + pkt.set_source(*new_src); + } + + if let Some(new_dst) = &tx.generic.dst_port { + pkt.set_destination(*new_dst); + } + } + (ValidUlp::Udp(pkt), Some(tx)) => { + if let Some(new_src) = &tx.generic.src_port { + pkt.set_source(*new_src); + } + + if let Some(new_dst) = &tx.generic.dst_port { + pkt.set_destination(*new_dst); + } + } + (ValidUlp::IcmpV4(pkt), Some(tx)) + if pkt.ty() == 0 || pkt.ty() == 8 => + { + if let Some(new_id) = tx.icmp_id { + pkt.rest_of_hdr_mut()[..2] + .copy_from_slice(&new_id.to_be_bytes()) + } + } + (ValidUlp::IcmpV6(pkt), Some(tx)) + if pkt.ty() == 128 || pkt.ty() == 129 => + { + if let Some(new_id) = tx.icmp_id { + pkt.rest_of_hdr_mut()[..2] + .copy_from_slice(&new_id.to_be_bytes()) + } + } + _ => {} + } + } + + #[inline] + fn compute_body_csum(&self) -> Option { + let use_pseudo = !matches!(self.inner_ulp, ValidUlp::IcmpV4(_)); + + let pseudo_csum = match self.inner_eth.ethertype() { + Ethertype::IPV4 | Ethertype::IPV6 => { + Some(self.inner_l3.pseudo_header()) + } + // Includes ARP. + _ => return None, + }; + + let Some(pseudo_csum) = pseudo_csum else { + return None; + }; + + csum_minus_hdr(&self.inner_ulp).map(|mut v| { + if use_pseudo { + v -= pseudo_csum; + } + v + }) + } + + #[inline] + fn encap_len(&self) -> u16 { + (self.outer_eth.packet_length() + + self.outer_v6.packet_length() + + self.outer_udp.packet_length() + + self.outer_encap.packet_length()) as u16 + } + + #[inline] + fn update_inner_checksums(&mut self, body_csum: Checksum) { + self.inner_ulp.compute_checksum(body_csum, &self.inner_l3); + self.inner_l3.compute_checksum(); + } + + #[inline] + fn inner_tcp(&self) -> Option<&impl TcpRef> { + match &self.inner_ulp { + ValidUlp::Tcp(t) => Some(t), + _ => None, + } + } +} + +#[inline] +fn csum_minus_hdr(ulp: &ValidUlp) -> Option { + match ulp { + ValidUlp::IcmpV4(icmp) => { + if icmp.checksum() == 0 { + return None; + } + + let mut csum = Checksum::from(HeaderChecksum::wrap( + icmp.checksum().to_be_bytes(), + )); + + csum.sub_bytes(&[icmp.ty(), icmp.code()]); + csum.sub_bytes(icmp.rest_of_hdr_ref()); + + Some(csum) + } + ValidUlp::IcmpV6(icmp) => { + if icmp.checksum() == 0 { + return None; + } + + let mut csum = Checksum::from(HeaderChecksum::wrap( + icmp.checksum().to_be_bytes(), + )); + + csum.sub_bytes(&[icmp.ty(), icmp.code()]); + csum.sub_bytes(icmp.rest_of_hdr_ref()); + + Some(csum) + } + ValidUlp::Tcp(tcp) => { + if tcp.checksum() == 0 { + return None; + } + + let mut csum = Checksum::from(HeaderChecksum::wrap( + tcp.checksum().to_be_bytes(), + )); + + let b = tcp.0.as_bytes(); + + csum.sub_bytes(&b[0..16]); + csum.sub_bytes(&b[18..]); + + csum.sub_bytes(match &tcp.1 { + ingot::types::Header::Repr(v) => &v[..], + ingot::types::Header::Raw(v) => &v[..], + }); + + Some(csum) + } + ValidUlp::Udp(udp) => { + if udp.checksum() == 0 { + return None; + } + + let mut csum = Checksum::from(HeaderChecksum::wrap( + udp.checksum().to_be_bytes(), + )); + + let b = udp.0.as_bytes(); + csum.sub_bytes(&b[0..6]); + + Some(csum) + } + } +} + +impl Ulp { + #[inline] + pub fn true_src_port(&self) -> Option { + match self { + Ulp::Tcp(pkt) => Some(pkt.source()), + Ulp::Udp(pkt) => Some(pkt.source()), + _ => None, + } + } + + #[inline] + pub fn true_dst_port(&self) -> Option { + match self { + Ulp::Tcp(pkt) => Some(pkt.destination()), + Ulp::Udp(pkt) => Some(pkt.destination()), + _ => None, + } + } + + #[inline] + pub fn pseudo_port(&self) -> Option { + match self { + Ulp::IcmpV4(pkt) + if pkt.code() == 0 && (pkt.ty() == 0 || pkt.ty() == 8) => + { + Some(u16::from_be_bytes( + pkt.rest_of_hdr()[..2].try_into().unwrap(), + )) + } + Ulp::IcmpV6(pkt) + if pkt.code() == 0 && (pkt.ty() == 128 || pkt.ty() == 129) => + { + Some(u16::from_be_bytes( + pkt.rest_of_hdr()[..2].try_into().unwrap(), + )) + } + _ => None, + } + } +} + +impl ValidUlp { + #[inline] + pub fn true_src_port(&self) -> Option { + match self { + ValidUlp::Tcp(pkt) => Some(pkt.source()), + ValidUlp::Udp(pkt) => Some(pkt.source()), + _ => None, + } + } + + #[inline] + pub fn true_dst_port(&self) -> Option { + match self { + ValidUlp::Tcp(pkt) => Some(pkt.destination()), + ValidUlp::Udp(pkt) => Some(pkt.destination()), + _ => None, + } + } + + #[inline] + pub fn pseudo_port(&self) -> Option { + match self { + ValidUlp::IcmpV4(pkt) + if pkt.code() == 0 && (pkt.ty() == 0 || pkt.ty() == 8) => + { + Some(u16::from_be_bytes( + pkt.rest_of_hdr()[..2].try_into().unwrap(), + )) + } + ValidUlp::IcmpV6(pkt) + if pkt.code() == 0 && (pkt.ty() == 128 || pkt.ty() == 129) => + { + Some(u16::from_be_bytes( + pkt.rest_of_hdr()[..2].try_into().unwrap(), + )) + } + _ => None, + } + } +} diff --git a/lib/opte/src/engine/port.rs b/lib/opte/src/engine/port.rs index cf50388f..0e835e87 100644 --- a/lib/opte/src/engine/port.rs +++ b/lib/opte/src/engine/port.rs @@ -8,6 +8,7 @@ use self::meta::ActionMeta; use super::ether::EtherMeta; +use super::ether::Ethernet; use super::flow_table::Dump; use super::flow_table::FlowEntry; use super::flow_table::FlowTable; @@ -17,10 +18,6 @@ use super::headers::EncapPush; use super::headers::HeaderAction; use super::headers::IpPush; use super::headers::UlpHeaderAction; -use super::ingot_base::Ethernet; -use super::ingot_base::Ipv4; -use super::ingot_base::Ipv6; -use super::ingot_base::L3Repr; use super::ingot_packet::FullParsed; use super::ingot_packet::LiteParsed; use super::ingot_packet::MblkFullParsed; @@ -32,6 +29,9 @@ use super::ioctl; use super::ioctl::TcpFlowEntryDump; use super::ioctl::TcpFlowStateDump; use super::ioctl::UftEntryDump; +use super::ip::v4::Ipv4; +use super::ip::v6::Ipv6; +use super::ip::L3Repr; use super::layer; use super::layer::Layer; use super::layer::LayerError; diff --git a/lib/opte/src/engine/predicate.rs b/lib/opte/src/engine/predicate.rs index 53ea8cb8..5cc06e46 100644 --- a/lib/opte/src/engine/predicate.rs +++ b/lib/opte/src/engine/predicate.rs @@ -9,15 +9,15 @@ use super::dhcp::MessageType as DhcpMessageType; use super::dhcpv6::MessageType as Dhcpv6MessageType; use super::ether::EtherType; +use super::ether::EthernetRef; use super::icmp::v4::MessageType as IcmpMessageType; use super::icmp::v6::MessageType as Icmpv6MessageType; -use super::ingot_base::EthernetRef; -use super::ingot_base::Ipv4Ref; -use super::ingot_base::Ipv6Ref; -use super::ingot_base::L3; use super::ingot_packet::ulp_dst_port; use super::ingot_packet::ulp_src_port; use super::ingot_packet::MblkPacketData; +use super::ip::v4::Ipv4Ref; +use super::ip::v6::Ipv6Ref; +use super::ip::L3; use super::ip4::Ipv4Addr; use super::ip4::Ipv4Cidr; use super::ip4::Protocol; diff --git a/lib/opte/src/engine/rule.rs b/lib/opte/src/engine/rule.rs index 35b4d888..5b00643e 100644 --- a/lib/opte/src/engine/rule.rs +++ b/lib/opte/src/engine/rule.rs @@ -8,6 +8,9 @@ use super::ether::EtherMeta; use super::ether::EtherMod; +use super::ether::Ethernet; +use super::ether::EthernetPacket; +use super::ether::ValidEthernet; use super::flow_table::StateSummary; use super::headers::EncapMod; use super::headers::EncapPush; @@ -18,15 +21,12 @@ use super::headers::IpPush; use super::headers::Transform; use super::headers::UlpHeaderAction; use super::headers::UlpMetaModify; -use super::ingot_base::Ethernet; -use super::ingot_base::EthernetPacket; -use super::ingot_base::ValidEthernet; -use super::ingot_base::L3; use super::ingot_packet::MblkFullParsed; use super::ingot_packet::MblkPacketData; use super::ingot_packet::MsgBlk; use super::ingot_packet::Packet2; use super::ingot_packet::PacketData; +use super::ip::L3; use super::packet::BodyTransform; use super::packet::InnerFlowId; use super::port::meta::ActionMeta; diff --git a/lib/oxide-vpc/src/engine/mod.rs b/lib/oxide-vpc/src/engine/mod.rs index 5642b5d6..7b9d9774 100644 --- a/lib/oxide-vpc/src/engine/mod.rs +++ b/lib/oxide-vpc/src/engine/mod.rs @@ -18,16 +18,16 @@ use opte::engine::arp::ArpEthIpv4Ref; use opte::engine::arp::ArpOp; use opte::engine::arp::ValidArpEthIpv4; use opte::engine::arp::ARP_HTYPE_ETHERNET; +use opte::engine::ether::EthernetRef; use opte::engine::flow_table::FlowTable; -use opte::engine::ingot_base::EthernetRef; use opte::engine::ingot_packet::FullParsed; use opte::engine::ingot_packet::OpteParsed2; use opte::engine::ingot_packet::Packet2; -use opte::engine::ingot_packet::ValidGeneveOverV6; -use opte::engine::ingot_packet::ValidNoEncap; use opte::engine::ip4::Ipv4Addr; use opte::engine::packet::InnerFlowId; use opte::engine::packet::ParseError; +use opte::engine::parse::ValidGeneveOverV6; +use opte::engine::parse::ValidNoEncap; use opte::engine::port::UftEntry; use opte::engine::Direction; use opte::engine::HdlPktAction; diff --git a/lib/oxide-vpc/tests/integration_tests.rs b/lib/oxide-vpc/tests/integration_tests.rs index 5194c1b3..013847ba 100644 --- a/lib/oxide-vpc/tests/integration_tests.rs +++ b/lib/oxide-vpc/tests/integration_tests.rs @@ -23,21 +23,21 @@ use opte::engine::arp::ArpEthIpv4Ref; use opte::engine::arp::ValidArpEthIpv4; use opte::engine::arp::ARP_HTYPE_ETHERNET; use opte::engine::dhcpv6; +use opte::engine::ether::Ethernet; +use opte::engine::ether::EthernetRef; use opte::engine::flow_table::FLOW_DEF_EXPIRE_SECS; use opte::engine::geneve::Vni; -use opte::engine::ingot_base::Ethernet; -use opte::engine::ingot_base::EthernetRef; -use opte::engine::ingot_base::Ipv4Ref; -use opte::engine::ingot_base::Ipv6; -use opte::engine::ingot_base::Ipv6Ref; -use opte::engine::ingot_base::ValidL3; -use opte::engine::ingot_base::ValidUlp; -use opte::engine::ingot_base::L3; use opte::engine::ingot_packet::MblkFullParsed; use opte::engine::ingot_packet::MsgBlk; use opte::engine::ingot_packet::Packet2; +use opte::engine::ip::v4::Ipv4Ref; +use opte::engine::ip::v6::Ipv6; +use opte::engine::ip::v6::Ipv6Ref; +use opte::engine::ip::ValidL3; +use opte::engine::ip::L3; use opte::engine::ip4::Ipv4Addr; use opte::engine::packet::InnerFlowId; +use opte::engine::parse::ValidUlp; use opte::engine::port::ProcessError; use opte::engine::tcp::TcpState; use opte::engine::tcp::TIME_WAIT_EXPIRE_SECS; From 06584e3f00abd7bd2f110a0f90ca6a6c6c8e8ab2 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 23 Oct 2024 16:02:14 -0700 Subject: [PATCH 063/115] More movement, reclaim some validation we didn't get via ingot. --- lib/opte-test-utils/src/lib.rs | 6 +- lib/opte/src/d_error.rs | 1 + lib/opte/src/engine/dhcp.rs | 6 +- lib/opte/src/engine/headers.rs | 10 +- lib/opte/src/engine/ingot_packet.rs | 14 +- lib/opte/src/engine/ip/mod.rs | 9 + lib/opte/src/engine/ip/v4.rs | 174 ++++++++- lib/opte/src/engine/ip/v6.rs | 458 +++++++++++++++++++++- lib/opte/src/engine/ip4.rs | 241 ------------ lib/opte/src/engine/ip6.rs | 463 ----------------------- lib/opte/src/engine/mod.rs | 58 +-- lib/opte/src/engine/packet.rs | 134 ++----- lib/opte/src/engine/parse.rs | 63 +++ lib/opte/src/engine/predicate.rs | 10 +- lib/opte/src/engine/rule.rs | 7 - lib/oxide-vpc/src/engine/gateway/dhcp.rs | 2 +- lib/oxide-vpc/src/engine/mod.rs | 2 +- lib/oxide-vpc/src/engine/overlay.rs | 8 +- lib/oxide-vpc/tests/integration_tests.rs | 2 +- xde/src/route.rs | 2 +- xde/src/xde.rs | 8 +- 21 files changed, 772 insertions(+), 906 deletions(-) delete mode 100644 lib/opte/src/engine/ip4.rs delete mode 100644 lib/opte/src/engine/ip6.rs diff --git a/lib/opte-test-utils/src/lib.rs b/lib/opte-test-utils/src/lib.rs index 389f7344..5f3845ff 100644 --- a/lib/opte-test-utils/src/lib.rs +++ b/lib/opte-test-utils/src/lib.rs @@ -33,11 +33,11 @@ pub use opte::engine::ingot_packet::MblkLiteParsed; pub use opte::engine::ingot_packet::MsgBlk; pub use opte::engine::ingot_packet::Packet2; pub use opte::engine::ip::v4::Ipv4; +pub use opte::engine::ip::v4::Ipv4Addr; +pub use opte::engine::ip::v4::Protocol; pub use opte::engine::ip::v6::Ipv6; +pub use opte::engine::ip::v6::Ipv6Addr; pub use opte::engine::ip::L3Repr; -pub use opte::engine::ip4::Ipv4Addr; -pub use opte::engine::ip4::Protocol; -pub use opte::engine::ip6::Ipv6Addr; pub use opte::engine::layer::DenyReason; pub use opte::engine::packet::ParseError; pub use opte::engine::port::meta::ActionMeta; diff --git a/lib/opte/src/d_error.rs b/lib/opte/src/d_error.rs index edbe61f4..5abba90a 100644 --- a/lib/opte/src/d_error.rs +++ b/lib/opte/src/d_error.rs @@ -8,6 +8,7 @@ //! static strings to avoid paying the `fmt` tax when calling an SDT. use core::ffi::CStr; +use core::fmt; pub use derror_macro::DError; // XXX: I think we want some way of doing the whole thing in one big chunk diff --git a/lib/opte/src/engine/dhcp.rs b/lib/opte/src/engine/dhcp.rs index 2fb8967a..6ad7b482 100644 --- a/lib/opte/src/engine/dhcp.rs +++ b/lib/opte/src/engine/dhcp.rs @@ -2,16 +2,14 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2024 Oxide Computer Company //! DHCP headers, data, and actions. use super::ether::Ethernet; use super::ingot_packet::MblkPacketData; use super::ingot_packet::MsgBlk; -use super::ip::v4::Ipv4; -use super::ip4::Ipv4Addr; -use super::ip4::Protocol; +use super::ip::v4::*; use super::predicate::DataPredicate; use super::predicate::EtherAddrMatch; use super::predicate::IpProtoMatch; diff --git a/lib/opte/src/engine/headers.rs b/lib/opte/src/engine/headers.rs index 4232b8b4..5a983207 100644 --- a/lib/opte/src/engine/headers.rs +++ b/lib/opte/src/engine/headers.rs @@ -2,17 +2,17 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2022 Oxide Computer Company +// Copyright 2024 Oxide Computer Company //! Header metadata combinations for IP, ULP, and Encap. use super::geneve::GeneveMeta; use super::geneve::GeneveMod; use super::geneve::GenevePush; -use super::ip4::Ipv4Mod; -use super::ip4::Ipv4Push; -use super::ip6::Ipv6Mod; -use super::ip6::Ipv6Push; +use super::ip::v4::Ipv4Mod; +use super::ip::v4::Ipv4Push; +use super::ip::v6::Ipv6Mod; +use super::ip::v6::Ipv6Push; use super::tcp::TcpMod; use super::tcp::TcpPush; use super::udp::UdpMod; diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index a477894a..55c3c7ce 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -1267,9 +1267,10 @@ where ) -> Result>>, ParseError> { let Packet2 { state: Initialized2 { len, inner } } = self; - Ok(Packet2 { - state: LiteParsed { meta: net.parse_inbound(inner)?, len }, - }) + let meta = net.parse_inbound(inner)?; + meta.stack.validate(len)?; + + Ok(Packet2 { state: LiteParsed { meta, len } }) } #[inline] @@ -1279,9 +1280,10 @@ where ) -> Result>>, ParseError> { let Packet2 { state: Initialized2 { len, inner } } = self; - Ok(Packet2 { - state: LiteParsed { meta: net.parse_outbound(inner)?, len }, - }) + let meta = net.parse_outbound(inner)?; + meta.stack.validate(len)?; + + Ok(Packet2 { state: LiteParsed { meta, len } }) } } diff --git a/lib/opte/src/engine/ip/mod.rs b/lib/opte/src/engine/ip/mod.rs index 8bbe0f22..0db2963e 100644 --- a/lib/opte/src/engine/ip/mod.rs +++ b/lib/opte/src/engine/ip/mod.rs @@ -8,6 +8,7 @@ pub mod v4; pub mod v6; use super::checksum::Checksum; +use super::packet::ParseError; use ingot::choice; use ingot::ethernet::Ethertype; use ingot::types::ByteSlice; @@ -107,6 +108,14 @@ impl ValidL3 { } .to_be_bytes() } + + #[inline] + pub fn validate(&self, bytes_after: usize) -> Result<(), ParseError> { + match self { + ValidL3::Ipv4(i4) => i4.validate(bytes_after), + ValidL3::Ipv6(i6) => i6.validate(bytes_after), + } + } } impl ValidL3 { diff --git a/lib/opte/src/engine/ip/v4.rs b/lib/opte/src/engine/ip/v4.rs index d63c7e95..4a5688b8 100644 --- a/lib/opte/src/engine/ip/v4.rs +++ b/lib/opte/src/engine/ip/v4.rs @@ -4,16 +4,36 @@ // Copyright 2024 Oxide Computer Company +//! IPv4 headers. + +use core::fmt; +use core::fmt::Display; +use core::num::ParseIntError; + use crate::engine::checksum::Checksum; +use crate::engine::packet::MismatchError; +use crate::engine::packet::ParseError; +use crate::engine::predicate::MatchExact; +use crate::engine::predicate::MatchExactVal; +use crate::engine::predicate::MatchPrefix; +use crate::engine::predicate::MatchPrefixVal; +use crate::engine::predicate::MatchRangeVal; use ingot::ip::Ecn; use ingot::ip::IpProtocol; use ingot::ip::Ipv4Flags; use ingot::types::primitives::*; use ingot::types::Emit; use ingot::types::Header; +use ingot::types::HeaderLen; use ingot::types::Vec; use ingot::Ingot; -use opte_api::Ipv4Addr; +pub use opte_api::Ipv4Addr; +pub use opte_api::Ipv4Cidr; +pub use opte_api::Ipv4PrefixLen; +pub use opte_api::Protocol; +use serde::Deserialize; +use serde::Serialize; +use zerocopy::ByteSlice; use zerocopy::ByteSliceMut; use zerocopy::IntoBytes; @@ -85,3 +105,155 @@ impl ValidIpv4 { self.set_checksum(csum.finalize_for_ingot()); } } + +impl ValidIpv4 { + #[inline] + pub fn validate(&self, bytes_after: usize) -> Result<(), ParseError> { + let v = self.version(); + if self.version() != 4 { + return Err(ParseError::IllegalValue(MismatchError { + location: c"Ipv4.version", + expected: 4, + actual: v as u64, + })); + } + + let own_len = self.packet_length(); + let ihl = self.ihl(); + let expt_ihl = (own_len >> 2) as u8; + if expt_ihl != ihl { + return Err(ParseError::IllegalValue(MismatchError { + location: c"Ipv4.ihl", + expected: expt_ihl as u64, + actual: ihl as u64, + })); + } + + let expt_total_len = bytes_after + own_len; + if expt_total_len != self.total_len() as usize { + return Err(ParseError::BadLength(MismatchError { + location: c"Ipv4.total_len", + expected: expt_total_len as u64, + actual: self.total_len() as u64, + })); + } + + Ok(()) + } +} + +impl MatchPrefixVal for Ipv4Cidr {} +impl MatchExactVal for Ipv4Addr {} +impl MatchRangeVal for Ipv4Addr {} + +impl MatchExact for Ipv4Addr { + fn match_exact(&self, val: &Ipv4Addr) -> bool { + *self == *val + } +} + +impl MatchPrefix for Ipv4Addr { + fn match_prefix(&self, prefix: &Ipv4Cidr) -> bool { + prefix.is_member(*self) + } +} + +impl MatchExactVal for Protocol {} + +impl MatchExact for Protocol { + fn match_exact(&self, val: &Protocol) -> bool { + *self == *val + } +} + +#[derive( + Clone, Copy, Debug, Deserialize, Eq, Ord, PartialEq, PartialOrd, Serialize, +)] +pub struct Ipv4Push { + pub src: Ipv4Addr, + pub dst: Ipv4Addr, + pub proto: Protocol, +} + +#[derive(Clone, Debug, Default, Deserialize, Serialize)] +pub struct Ipv4Mod { + pub src: Option, + pub dst: Option, + pub proto: Option, +} + +#[cfg(test)] +mod test { + use super::*; + use ingot::types::HeaderLen; + + pub const DEF_ROUTE: &str = "0.0.0.0/0"; + + #[test] + fn match_check() { + let ip = "192.168.2.11".parse::().unwrap(); + assert!(ip.match_exact(&ip)); + assert!(ip.match_prefix(&"192.168.2.0/24".parse::().unwrap())); + } + + #[test] + fn cidr_match() { + let ip1 = "192.168.2.22".parse::().unwrap(); + let cidr1 = "192.168.2.0/24".parse().unwrap(); + assert!(ip1.match_prefix(&cidr1)); + + let ip2 = "10.7.7.7".parse::().unwrap(); + let cidr2 = "10.0.0.0/8".parse().unwrap(); + assert!(ip2.match_prefix(&cidr2)); + + let ip3 = "52.10.128.69".parse::().unwrap(); + let cidr3 = DEF_ROUTE.parse().unwrap(); + assert!(ip3.match_prefix(&cidr3)); + } + + #[test] + fn emit() { + let ip = Ipv4 { + source: Ipv4Addr::from([10, 0, 0, 54]), + destination: Ipv4Addr::from([52, 10, 128, 69]), + protocol: IpProtocol::TCP, + hop_limit: 64, + identification: 2662, + ihl: 5, + total_len: 60, + + ..Default::default() + }; + + let len = ip.packet_length(); + assert_eq!(len, 20); + + let bytes = ip.emit_vec(); + assert_eq!(len, bytes.len()); + + #[rustfmt::skip] + let expected_bytes = vec![ + // version + IHL + 0x45, + // DSCP + ECN + 0x00, + // total length + 0x00, 0x3C, + // ident + 0x0A, 0x66, + // flags + frag offset + 0x40, 0x00, + // TTL + 0x40, + // protocol + 0x06, + // checksum + 0x00, 0x00, + // source + 0x0A, 0x00, 0x00, 0x36, + // dest + 0x34, 0x0A, 0x80, 0x45, + ]; + assert_eq!(&expected_bytes, &bytes); + } +} diff --git a/lib/opte/src/engine/ip/v6.rs b/lib/opte/src/engine/ip/v6.rs index f4b33220..0970ade7 100644 --- a/lib/opte/src/engine/ip/v6.rs +++ b/lib/opte/src/engine/ip/v6.rs @@ -4,13 +4,27 @@ // Copyright 2024 Oxide Computer Company +use crate::engine::packet::MismatchError; +use crate::engine::packet::ParseError; +use crate::engine::predicate::MatchExact; +use crate::engine::predicate::MatchExactVal; +use crate::engine::predicate::MatchPrefix; +use crate::engine::predicate::MatchPrefixVal; use ingot::ip::Ecn; use ingot::ip::IpProtocol; use ingot::ip::LowRentV6EhRepr; use ingot::types::primitives::*; use ingot::types::util::Repeated; +use ingot::types::HeaderLen; use ingot::Ingot; -use opte_api::Ipv6Addr; +pub use opte_api::Ipv6Addr; +pub use opte_api::Ipv6Cidr; +use opte_api::Protocol; +use serde::Deserialize; +use serde::Serialize; +use zerocopy::ByteSlice; + +pub const DDM_HEADER_ID: u8 = 0xFE; #[derive(Debug, Clone, Ingot, Eq, PartialEq)] #[ingot(impl_default)] @@ -36,3 +50,445 @@ pub struct Ipv6 { #[ingot(subparse(on_next_layer))] pub v6ext: Repeated, } + +impl MatchExactVal for Ipv6Addr {} +impl MatchPrefixVal for Ipv6Cidr {} + +impl MatchExact for Ipv6Addr { + fn match_exact(&self, val: &Ipv6Addr) -> bool { + *self == *val + } +} + +impl MatchPrefix for Ipv6Addr { + fn match_prefix(&self, prefix: &Ipv6Cidr) -> bool { + prefix.is_member(*self) + } +} + +impl ValidIpv6 { + #[inline] + pub fn validate(&self, bytes_after: usize) -> Result<(), ParseError> { + let v = self.version(); + if self.version() != 6 { + return Err(ParseError::IllegalValue(MismatchError { + location: c"Ipv6.version", + expected: 6, + actual: v as u64, + })); + } + + let ex_len = bytes_after + self.1.packet_length(); + let pll = self.payload_len(); + if ex_len != (self.payload_len() as usize) { + return Err(ParseError::BadLength(MismatchError { + location: c"Ipv6.payload_len", + expected: ex_len as u64, + actual: pll as u64, + })); + } + + Ok(()) + } +} + +#[derive( + Clone, Copy, Debug, Deserialize, Eq, Ord, PartialEq, PartialOrd, Serialize, +)] +pub struct Ipv6Push { + pub src: Ipv6Addr, + pub dst: Ipv6Addr, + pub proto: Protocol, +} + +#[derive(Clone, Debug, Default, Deserialize, Serialize)] +pub struct Ipv6Mod { + pub src: Option, + pub dst: Option, + pub proto: Option, +} + +#[cfg(test)] +pub(crate) mod test { + use super::*; + use crate::engine::packet::Packet; + use itertools::Itertools; + use smoltcp::wire::IpProtocol; + use smoltcp::wire::Ipv6Address; + use smoltcp::wire::Ipv6FragmentHeader; + use smoltcp::wire::Ipv6FragmentRepr; + use smoltcp::wire::Ipv6HopByHopHeader; + use smoltcp::wire::Ipv6HopByHopRepr; + use smoltcp::wire::Ipv6OptionRepr; + use smoltcp::wire::Ipv6Packet; + use smoltcp::wire::Ipv6Repr; + use smoltcp::wire::Ipv6RoutingHeader; + use smoltcp::wire::Ipv6RoutingRepr; + use std::vec::Vec; + + // Test packet size and payload length + const BUFFER_LEN: usize = 512; + const PAYLOAD_LEN: usize = 512 - Ipv6Hdr::BASE_SIZE; + pub(crate) const SUPPORTED_EXTENSIONS: [IpProtocol; 4] = [ + IpProtocol::HopByHop, + IpProtocol::Ipv6Route, + IpProtocol::Ipv6Frag, + IpProtocol::Unknown(DDM_HEADER_ID), + ]; + + #[test] + fn from_pairs() { + let ip6 = super::Ipv6Addr::from([ + 0x2601, 0x0284, 0x4100, 0xE240, 0x0000, 0x0000, 0xC0A8, 0x01F5, + ]); + + assert_eq!( + ip6.bytes(), + [ + 0x26, 0x01, 0x02, 0x84, 0x41, 0x00, 0xE2, 0x40, 0x00, 0x00, + 0x00, 0x00, 0xC0, 0xA8, 0x01, 0xF5 + ] + ); + } + + fn base_header() -> Ipv6Repr { + Ipv6Repr { + src_addr: Ipv6Address::new(0xfd00, 0, 0, 0, 0, 0, 0, 1), + dst_addr: Ipv6Address::new(0xfd00, 0, 0, 0, 0, 0, 0, 2), + next_header: IpProtocol::Tcp, + payload_len: PAYLOAD_LEN, + hop_limit: 6, + } + } + + fn hop_by_hop_header() -> Ipv6HopByHopRepr<'static> { + // in 8-octet units, not including the first + const OPTION_LEN: usize = 1; + // SmolTCP limits us to 2 max HBH options in its repr. + // Pad to the next multiple of 8, then one more 8-octet unit. + // - Ext header takes 2B + // - PadN(n) takes 2B, then n bytes. + // => 4 + fill + const LEN: usize = 4 + OPTION_LEN * 8; + static OPTIONS: [Ipv6OptionRepr; 1] = + [Ipv6OptionRepr::PadN(LEN as u8); 1]; + Ipv6HopByHopRepr { + options: heapless::Vec::from_slice(&OPTIONS).unwrap(), + } + } + + fn route_header() -> Ipv6RoutingRepr<'static> { + // In 8-octet units, not including the first, i.e., this just needs the + // home address, 128 bits. + let segments_left = 1; + let home_address = Ipv6Address::new(0xfd00, 0, 0, 0, 0, 0, 0, 1); + Ipv6RoutingRepr::Type2 { segments_left, home_address } + } + + fn fragment_header() -> Ipv6FragmentRepr { + Ipv6FragmentRepr { frag_offset: 128, more_frags: false, ident: 0x17 } + } + + // Generate a test packet. + // + // This creates a base IPv6 header, and any extension headers with protocols + // defined by `extensions`. There is always a base header, and the ULP is + // always defined to be TCP. `extensions` can be empty. + // + // This returns the byte array of the packet, plus the size of the entire + // header, including extensions. + pub(crate) fn generate_test_packet( + extensions: &[IpProtocol], + ) -> (Vec, usize) { + // Create a chain of headers, starting with the base. Emit them into + // byte arrays, to test parsing. + let mut data = vec![0; BUFFER_LEN]; + let mut header_start = 0; + let mut next_header_pos = 6; + let mut header_end = Ipv6Hdr::BASE_SIZE; + let mut buf = &mut data[header_start..]; + + // The base header. The payload length is always the same, but the base + // protocol may be updated. + let base = base_header(); + let mut packet = Ipv6Packet::new_checked(&mut buf).unwrap(); + base.emit(&mut packet); + + if extensions.is_empty() { + // No extensions at all, just base header with a TCP ULP + return (buf.to_vec(), Ipv6Hdr::BASE_SIZE); + } + + for extension in extensions { + // First, update the _previous_ next_header with the type of this + // extension header. They form a linked-list. We do this first, so + // that in the case of the first extension header, we're rewriting + // the `next_header` value in the base header. + buf[next_header_pos] = u8::from(*extension); + + // For every extension header, the `next_header` is the first octet. + // That is, the base header is the only one where it's a different + // position. + next_header_pos = 0; + + // Grab the remaining packet buffer, from the end of the previous + // header. This is where we'll start inserting the current extension + // header. + buf = &mut data[header_end..]; + + // Insert the bytes of each extension header, returning the number + // of octets written. + // + // For each extension header, we need to build the top level ExtHeader + // and set length manually: this is (inner_len / 8) := the number of + // 8-byte blocks FOLLOWING the first. + use IpProtocol::*; + let mut ext_packet = Ipv6ExtHeader::new_checked(&mut buf).unwrap(); + ext_packet.set_next_header(IpProtocol::Tcp); + // Temporarily set high enough to give us enough bytes to emit into. + // XXX: propose a joint emit + set_len for smoltcp. + ext_packet.set_header_len(3); + let len = 2 + match extension { + HopByHop => { + let hbh = hop_by_hop_header(); + let mut hbh_packet = Ipv6HopByHopHeader::new_checked( + ext_packet.payload_mut(), + ) + .unwrap(); + hbh.emit(&mut hbh_packet); + hbh.buffer_len() + } + Ipv6Frag => { + let frag = fragment_header(); + let mut frag_packet = Ipv6FragmentHeader::new_checked( + ext_packet.payload_mut(), + ) + .unwrap(); + fragment_header().emit(&mut frag_packet); + frag.buffer_len() + } + Ipv6Route => { + let route = route_header(); + let mut route_packet = Ipv6RoutingHeader::new_checked( + ext_packet.payload_mut(), + ) + .unwrap(); + route.emit(&mut route_packet); + route.buffer_len() + } + Unknown(x) if x == &DDM_HEADER_ID => { + // TODO: actually build DDM ID + Timestamp values here. + // for now we just emit an empty header here. + 14 + } + _ => unimplemented!( + "Extension header {:#?} unsupported", + extension + ), + }; + ext_packet.set_header_len(match V6ExtClass::from(*extension) { + V6ExtClass::Frag => 0, + V6ExtClass::Rfc6564 => u8::try_from((len - 8) / 8).unwrap(), + _ => unreachable!(), + }); + + // Move the position markers to the new header. + header_start = header_end; + header_end += len; + } + + // Set the last header to point to the ULP + data[header_start] = u8::from(IpProtocol::Tcp); + + (data, header_end) + } + + // Test every permuation of the supported extension headers, verifying the + // computed lengths of: + // + // - Payload length + // - ULP length + // - Extension header length + // - Full header length + #[test] + fn test_extension_header_lengths_ok() { + for n_extensions in 0..SUPPORTED_EXTENSIONS.len() { + for extensions in + SUPPORTED_EXTENSIONS.into_iter().permutations(n_extensions) + { + let (buf, pos) = generate_test_packet(extensions.as_slice()); + let mut pkt = Packet::copy(&buf); + let mut reader = pkt.get_rdr_mut(); + let header = Ipv6Hdr::parse(&mut reader).unwrap(); + assert_all_lengths_ok(&header, pos); + } + } + } + + fn assert_all_lengths_ok(header: &Ipv6Hdr, header_end: usize) { + assert_eq!( + header.hdr_len(), + header_end, + "Header length does not include all extension headers" + ); + assert_eq!( + header.pay_len(), + PAYLOAD_LEN, + "Payload length does not include all extension headers", + ); + assert_eq!( + header.ext_len(), + header_end - Ipv6Hdr::BASE_SIZE, + "Extension header size is incorrect", + ); + assert_eq!( + header.ulp_len(), + PAYLOAD_LEN - header.ext_len(), + "ULP length is not correct" + ); + assert_eq!( + header.total_len(), + PAYLOAD_LEN + Ipv6Hdr::BASE_SIZE, + "Total packet length is not correct", + ); + } + + #[test] + fn test_ipv6_addr_match_exact() { + let addr: Ipv6Addr = "fd00::1".parse().unwrap(); + assert!(addr.match_exact(&addr)); + assert!(!addr.match_exact(&("fd00::2".parse().unwrap()))); + } + + #[test] + fn test_ipv6_cidr_match_prefix() { + let cidr: Ipv6Cidr = "fd00::1/16".parse().unwrap(); + let addr: Ipv6Addr = "fd00::1".parse().unwrap(); + assert!(addr.match_prefix(&cidr)); + + let addr: Ipv6Addr = "fd00::2".parse().unwrap(); + assert!(addr.match_prefix(&cidr)); + + let addr: Ipv6Addr = "fd01::1".parse().unwrap(); + assert!(!addr.match_prefix(&cidr)); + + let addr: Ipv6Addr = "fd01::2".parse().unwrap(); + assert!(!addr.match_prefix(&cidr)); + } + + #[test] + fn emit() { + let ip = Ipv6Meta { + src: Ipv6Addr::from_const([ + 0xFE80, 0x0000, 0x0000, 0x0000, 0xBAF8, 0x53FF, 0xFEAF, 0x537D, + ]), + dst: Ipv6Addr::from_const([ + 0xFE80, 0x000, 0x0000, 0x0000, 0x56BE, 0xF7FF, 0xFE0B, 0x09EC, + ]), + proto: Protocol::ICMPv6, + next_hdr: IpProtocol::Icmpv6, + hop_limit: 255, + pay_len: 32, + ext: None, + ext_len: 0, + }; + + let len = ip.hdr_len(); + let mut pkt = Packet::alloc_and_expand(len); + let mut wtr = pkt.seg0_wtr(); + ip.emit(wtr.slice_mut(ip.hdr_len()).unwrap()); + assert_eq!(len, pkt.len()); + + #[rustfmt::skip] + let expected_bytes = [ + // version + class + label + 0x60, 0x00, 0x00, 0x00, + // payload len + 0x00, 0x20, + // next header + hop limit + 0x3A, 0xFF, + // source address + 0xFE, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xBA, 0xF8, 0x53, 0xFF, 0xFE, 0xAF, 0x53, 0x7D, + // dest address + 0xFE, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x56, 0xBE, 0xF7, 0xFF, 0xFE, 0x0B, 0x09, 0xEC, + ]; + assert_eq!(&expected_bytes, pkt.seg_bytes(0)); + } + + #[test] + fn test_set_total_len() { + // Create a packet with one extension header. + let (buf, _) = generate_test_packet(&[IpProtocol::Ipv6Frag]); + let mut pkt = Packet::copy(&buf); + let mut reader = pkt.get_rdr_mut(); + let mut header = Ipv6Hdr::parse(&mut reader).unwrap(); + + // Set the total length to 128. + // + // The Payload Length field contains the length of both the extension + // headers and the actual ULP. Because we have the Fragmentation header, + // which is a fixed 8-octet thing, this should result in a Payload + // Length of 128 - Ipv6Hdr::BASE_SIZE = 78. + const NEW_SIZE: usize = 128; + header.set_total_len(NEW_SIZE as _); + assert_eq!(header.total_len(), NEW_SIZE); + assert_eq!(header.hdr_len(), Ipv6Hdr::BASE_SIZE + 8); + assert_eq!(header.pay_len(), NEW_SIZE - Ipv6Hdr::BASE_SIZE); + } + + #[test] + fn test_ip6_meta_total_len() { + // Create a packet with one extension header. + let (buf, _) = generate_test_packet(&[IpProtocol::Ipv6Frag]); + let mut pkt = Packet::copy(&buf); + let mut reader = pkt.get_rdr_mut(); + let header = Ipv6Hdr::parse(&mut reader).unwrap(); + + // Previously, the `Ipv6Meta::total_len` method double-counted the + // extension header length. Assert we don't do that here. + let meta = Ipv6Meta::from(&header); + assert!(meta.ext.is_some()); + assert_eq!(meta.ext_len, 8); // Fixed size + assert_eq!( + meta.total_len() as usize, + header.hdr_len() + header.ulp_len() + ); + } + + #[test] + fn bad_ipv6_version_caught() { + // This packet was produced due to prior sidecar testing, + // and put 4B between Eth and IPv6. This should fail to + // parse 0x00 as a v6 version. + #[rustfmt::skip] + let buf: &[u8] = &[ + // Garbage + 0x00, 0xc8, 0x08, 0x00, + // IPv6 + 0x60, 0x00, 0x00, 0x00, 0x02, 0x27, 0x11, 0xfe, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xfd, 0x00, 0x11, 0x22, 0x33, 0x44, 0x01, 0x11, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x17, 0xc1, 0x17, 0xc1, + 0x02, 0x27, 0xcf, 0x4e, 0x01, 0x00, 0x65, 0x58, 0x00, 0x00, 0x64, + 0x00, 0x01, 0x29, 0x00, 0x00, 0xa8, 0x40, 0x25, 0xff, 0xe8, 0x5f, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x81, 0x00, 0x45, 0x00, 0x02, + 0x05, 0xe0, 0x80, 0x40, 0x00, 0x37, 0x06, 0x1a, 0x9f, 0xc6, 0xd3, + 0x7a, 0x40, 0x2d, 0x9a, 0xd8, 0x25, 0xa1, 0x22, 0x01, 0xbb, 0xad, + 0x22, 0x51, 0x93, 0xa5, 0xf8, 0x01, 0x58, 0x80, 0x18, 0x01, 0x26, + 0x02, 0x24, 0x00, 0x00, 0x01, 0x01, 0x08, 0x0a, 0x48, 0xd7, 0x9a, + 0x23, 0x04, 0x31, 0x9f, 0x43, 0x14, 0x03, 0x03, 0x00, 0x01, 0x01, + 0x17, 0x03, 0x03, 0x00, 0x45, 0xf6, 0xcd, 0xe2, 0xc1, 0xe5, 0xa0, + 0x65, 0xa7, 0xfe, 0x29, 0xa8, 0xa2, 0xb0, 0x57, 0x91, 0x7e, 0xac, + 0xc8, 0x34, 0xdd, 0x6b, 0xfa, 0x21, + ]; + + let mut pkt = Packet::copy(buf); + let mut reader = pkt.get_rdr_mut(); + assert!(matches!( + Ipv6Hdr::parse(&mut reader), + Err(Ipv6HdrError::BadVersion { vsn: 0 }) + )); + } +} diff --git a/lib/opte/src/engine/ip4.rs b/lib/opte/src/engine/ip4.rs deleted file mode 100644 index 0dba674e..00000000 --- a/lib/opte/src/engine/ip4.rs +++ /dev/null @@ -1,241 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -// Copyright 2024 Oxide Computer Company - -//! IPv4 headers. - -use super::predicate::MatchExact; -use super::predicate::MatchExactVal; -use super::predicate::MatchPrefix; -use super::predicate::MatchPrefixVal; -use super::predicate::MatchRangeVal; -use alloc::string::String; -use core::fmt; -use core::fmt::Debug; -use core::fmt::Display; -use core::num::ParseIntError; -use core::result; -pub use opte_api::Ipv4Addr; -pub use opte_api::Ipv4Cidr; -pub use opte_api::Ipv4PrefixLen; -pub use opte_api::Protocol; -use serde::Deserialize; -use serde::Serialize; - -pub const IPV4_HDR_LEN_MASK: u8 = 0x0F; -pub const IPV4_HDR_VER_MASK: u8 = 0xF0; -pub const IPV4_HDR_VER_SHIFT: u8 = 4; -pub const IPV4_VERSION: u8 = 4; - -pub const DEF_ROUTE: &str = "0.0.0.0/0"; - -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum IpError { - BadPrefix(u8), - Ipv4NonPrivateNetwork(Ipv4Addr), - MalformedCidr(String), - MalformedInt, - MalformedIp(String), - MalformedPrefix(String), - Other(String), -} - -impl From for IpError { - fn from(_err: ParseIntError) -> Self { - IpError::MalformedInt - } -} - -impl From for IpError { - fn from(err: String) -> Self { - IpError::Other(err) - } -} - -impl Display for IpError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - use IpError::*; - - match self { - BadPrefix(prefix) => { - write!(f, "bad prefix: {}", prefix) - } - - Ipv4NonPrivateNetwork(addr) => { - write!(f, "non-private network: {}", addr) - } - - MalformedCidr(cidr) => { - write!(f, "malformed CIDR: {}", cidr) - } - - MalformedInt => { - write!(f, "malformed integer") - } - - MalformedIp(ip) => { - write!(f, "malformed IP: {}", ip) - } - - MalformedPrefix(prefix) => { - write!(f, "malformed prefix: {}", prefix) - } - - Other(msg) => { - write!(f, "{}", msg) - } - } - } -} - -impl From for String { - fn from(err: IpError) -> Self { - format!("{}", err) - } -} - -impl MatchPrefixVal for Ipv4Cidr {} - -#[test] -fn cidr_match() { - let ip1 = "192.168.2.22".parse::().unwrap(); - let cidr1 = "192.168.2.0/24".parse().unwrap(); - assert!(ip1.match_prefix(&cidr1)); - - let ip2 = "10.7.7.7".parse::().unwrap(); - let cidr2 = "10.0.0.0/8".parse().unwrap(); - assert!(ip2.match_prefix(&cidr2)); - - let ip3 = "52.10.128.69".parse::().unwrap(); - let cidr3 = DEF_ROUTE.parse().unwrap(); - assert!(ip3.match_prefix(&cidr3)); -} - -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub struct Ipv4CidrPrefix { - val: u8, -} - -impl Ipv4CidrPrefix { - pub fn new(net_prefix: u8) -> result::Result { - if net_prefix > 32 { - return Err(IpError::BadPrefix(net_prefix)); - } - - Ok(Ipv4CidrPrefix { val: net_prefix }) - } -} - -impl MatchExactVal for Ipv4Addr {} -impl MatchRangeVal for Ipv4Addr {} - -impl MatchExact for Ipv4Addr { - fn match_exact(&self, val: &Ipv4Addr) -> bool { - *self == *val - } -} - -impl MatchPrefix for Ipv4Addr { - fn match_prefix(&self, prefix: &Ipv4Cidr) -> bool { - prefix.is_member(*self) - } -} - -#[test] -fn match_check() { - let ip = "192.168.2.11".parse::().unwrap(); - assert!(ip.match_exact(&ip)); - assert!(ip.match_prefix(&"192.168.2.0/24".parse::().unwrap())); -} - -impl MatchExactVal for Protocol {} - -impl MatchExact for Protocol { - fn match_exact(&self, val: &Protocol) -> bool { - *self == *val - } -} - -#[derive( - Clone, Copy, Debug, Deserialize, Eq, Ord, PartialEq, PartialOrd, Serialize, -)] -pub struct Ipv4Push { - pub src: Ipv4Addr, - pub dst: Ipv4Addr, - pub proto: Protocol, -} - -#[derive(Clone, Debug, Default, Deserialize, Serialize)] -pub struct Ipv4Mod { - pub src: Option, - pub dst: Option, - pub proto: Option, -} - -/// Options for computing a ULP checksum. -#[derive(Clone, Copy, Debug)] -pub enum UlpCsumOpt { - /// Compute a partial checksum, using only the pseudo-header. - /// - /// This is intended in situations in which computing the checksum of the - /// body itself can be offloaded to hardware. - Partial, - /// Compute the full checksum, including the pseudo-header, ULP header and - /// the ULP body. - Full, -} - -#[cfg(test)] -mod test { - use super::*; - use crate::engine::packet::Packet; - - #[test] - fn emit() { - let ip = Ipv4Meta { - src: Ipv4Addr::from([10, 0, 0, 54]), - dst: Ipv4Addr::from([52, 10, 128, 69]), - proto: Protocol::TCP, - ttl: 64, - ident: 2662, - hdr_len: 20, - total_len: 60, - csum: [0; 2], - }; - - let len = ip.hdr_len(); - assert_eq!(20, len); - - let mut pkt = Packet::alloc_and_expand(len); - let mut wtr = pkt.seg0_wtr(); - ip.emit(wtr.slice_mut(ip.hdr_len()).unwrap()); - assert_eq!(len, pkt.len()); - - #[rustfmt::skip] - let expected_bytes = vec![ - // version + IHL - 0x45, - // DSCP + ECN - 0x00, - // total length - 0x00, 0x3C, - // ident - 0x0A, 0x66, - // flags + frag offset - 0x40, 0x00, - // TTL - 0x40, - // protocol - 0x06, - // checksum - 0x00, 0x00, - // source - 0x0A, 0x00, 0x00, 0x36, - // dest - 0x34, 0x0A, 0x80, 0x45, - ]; - assert_eq!(&expected_bytes, pkt.seg_bytes(0)); - } -} diff --git a/lib/opte/src/engine/ip6.rs b/lib/opte/src/engine/ip6.rs deleted file mode 100644 index 8ab06d9f..00000000 --- a/lib/opte/src/engine/ip6.rs +++ /dev/null @@ -1,463 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -// Copyright 2024 Oxide Computer Company - -//! IPv6 headers. - -use super::ip4::Protocol; -pub use super::ip4::UlpCsumOpt; -use crate::engine::predicate::MatchExact; -use crate::engine::predicate::MatchExactVal; -use crate::engine::predicate::MatchPrefix; -use crate::engine::predicate::MatchPrefixVal; -pub use opte_api::Ipv6Addr; -pub use opte_api::Ipv6Cidr; -use serde::Deserialize; -use serde::Serialize; - -pub const IPV6_HDR_VSN_MASK: u8 = 0xF0; -pub const IPV6_HDR_VSN_SHIFT: u8 = 4; -pub const IPV6_VERSION: u8 = 6; -pub const DDM_HEADER_ID: u8 = 0xFE; - -impl MatchExactVal for Ipv6Addr {} -impl MatchPrefixVal for Ipv6Cidr {} - -impl MatchExact for Ipv6Addr { - fn match_exact(&self, val: &Ipv6Addr) -> bool { - *self == *val - } -} - -impl MatchPrefix for Ipv6Addr { - fn match_prefix(&self, prefix: &Ipv6Cidr) -> bool { - prefix.is_member(*self) - } -} - -#[derive( - Clone, Copy, Debug, Deserialize, Eq, Ord, PartialEq, PartialOrd, Serialize, -)] -pub struct Ipv6Push { - pub src: Ipv6Addr, - pub dst: Ipv6Addr, - pub proto: Protocol, -} - -#[derive(Clone, Debug, Default, Deserialize, Serialize)] -pub struct Ipv6Mod { - pub src: Option, - pub dst: Option, - pub proto: Option, -} - -#[cfg(test)] -pub(crate) mod test { - use super::*; - use crate::engine::packet::Packet; - use itertools::Itertools; - use smoltcp::wire::IpProtocol; - use smoltcp::wire::Ipv6Address; - use smoltcp::wire::Ipv6FragmentHeader; - use smoltcp::wire::Ipv6FragmentRepr; - use smoltcp::wire::Ipv6HopByHopHeader; - use smoltcp::wire::Ipv6HopByHopRepr; - use smoltcp::wire::Ipv6OptionRepr; - use smoltcp::wire::Ipv6Packet; - use smoltcp::wire::Ipv6Repr; - use smoltcp::wire::Ipv6RoutingHeader; - use smoltcp::wire::Ipv6RoutingRepr; - use std::vec::Vec; - - // Test packet size and payload length - const BUFFER_LEN: usize = 512; - const PAYLOAD_LEN: usize = 512 - Ipv6Hdr::BASE_SIZE; - pub(crate) const SUPPORTED_EXTENSIONS: [IpProtocol; 4] = [ - IpProtocol::HopByHop, - IpProtocol::Ipv6Route, - IpProtocol::Ipv6Frag, - IpProtocol::Unknown(DDM_HEADER_ID), - ]; - - #[test] - fn from_pairs() { - let ip6 = super::Ipv6Addr::from([ - 0x2601, 0x0284, 0x4100, 0xE240, 0x0000, 0x0000, 0xC0A8, 0x01F5, - ]); - - assert_eq!( - ip6.bytes(), - [ - 0x26, 0x01, 0x02, 0x84, 0x41, 0x00, 0xE2, 0x40, 0x00, 0x00, - 0x00, 0x00, 0xC0, 0xA8, 0x01, 0xF5 - ] - ); - } - - fn base_header() -> Ipv6Repr { - Ipv6Repr { - src_addr: Ipv6Address::new(0xfd00, 0, 0, 0, 0, 0, 0, 1), - dst_addr: Ipv6Address::new(0xfd00, 0, 0, 0, 0, 0, 0, 2), - next_header: IpProtocol::Tcp, - payload_len: PAYLOAD_LEN, - hop_limit: 6, - } - } - - fn hop_by_hop_header() -> Ipv6HopByHopRepr<'static> { - // in 8-octet units, not including the first - const OPTION_LEN: usize = 1; - // SmolTCP limits us to 2 max HBH options in its repr. - // Pad to the next multiple of 8, then one more 8-octet unit. - // - Ext header takes 2B - // - PadN(n) takes 2B, then n bytes. - // => 4 + fill - const LEN: usize = 4 + OPTION_LEN * 8; - static OPTIONS: [Ipv6OptionRepr; 1] = - [Ipv6OptionRepr::PadN(LEN as u8); 1]; - Ipv6HopByHopRepr { - options: heapless::Vec::from_slice(&OPTIONS).unwrap(), - } - } - - fn route_header() -> Ipv6RoutingRepr<'static> { - // In 8-octet units, not including the first, i.e., this just needs the - // home address, 128 bits. - let segments_left = 1; - let home_address = Ipv6Address::new(0xfd00, 0, 0, 0, 0, 0, 0, 1); - Ipv6RoutingRepr::Type2 { segments_left, home_address } - } - - fn fragment_header() -> Ipv6FragmentRepr { - Ipv6FragmentRepr { frag_offset: 128, more_frags: false, ident: 0x17 } - } - - // Generate a test packet. - // - // This creates a base IPv6 header, and any extension headers with protocols - // defined by `extensions`. There is always a base header, and the ULP is - // always defined to be TCP. `extensions` can be empty. - // - // This returns the byte array of the packet, plus the size of the entire - // header, including extensions. - pub(crate) fn generate_test_packet( - extensions: &[IpProtocol], - ) -> (Vec, usize) { - // Create a chain of headers, starting with the base. Emit them into - // byte arrays, to test parsing. - let mut data = vec![0; BUFFER_LEN]; - let mut header_start = 0; - let mut next_header_pos = 6; - let mut header_end = Ipv6Hdr::BASE_SIZE; - let mut buf = &mut data[header_start..]; - - // The base header. The payload length is always the same, but the base - // protocol may be updated. - let base = base_header(); - let mut packet = Ipv6Packet::new_checked(&mut buf).unwrap(); - base.emit(&mut packet); - - if extensions.is_empty() { - // No extensions at all, just base header with a TCP ULP - return (buf.to_vec(), Ipv6Hdr::BASE_SIZE); - } - - for extension in extensions { - // First, update the _previous_ next_header with the type of this - // extension header. They form a linked-list. We do this first, so - // that in the case of the first extension header, we're rewriting - // the `next_header` value in the base header. - buf[next_header_pos] = u8::from(*extension); - - // For every extension header, the `next_header` is the first octet. - // That is, the base header is the only one where it's a different - // position. - next_header_pos = 0; - - // Grab the remaining packet buffer, from the end of the previous - // header. This is where we'll start inserting the current extension - // header. - buf = &mut data[header_end..]; - - // Insert the bytes of each extension header, returning the number - // of octets written. - // - // For each extension header, we need to build the top level ExtHeader - // and set length manually: this is (inner_len / 8) := the number of - // 8-byte blocks FOLLOWING the first. - use IpProtocol::*; - let mut ext_packet = Ipv6ExtHeader::new_checked(&mut buf).unwrap(); - ext_packet.set_next_header(IpProtocol::Tcp); - // Temporarily set high enough to give us enough bytes to emit into. - // XXX: propose a joint emit + set_len for smoltcp. - ext_packet.set_header_len(3); - let len = 2 + match extension { - HopByHop => { - let hbh = hop_by_hop_header(); - let mut hbh_packet = Ipv6HopByHopHeader::new_checked( - ext_packet.payload_mut(), - ) - .unwrap(); - hbh.emit(&mut hbh_packet); - hbh.buffer_len() - } - Ipv6Frag => { - let frag = fragment_header(); - let mut frag_packet = Ipv6FragmentHeader::new_checked( - ext_packet.payload_mut(), - ) - .unwrap(); - fragment_header().emit(&mut frag_packet); - frag.buffer_len() - } - Ipv6Route => { - let route = route_header(); - let mut route_packet = Ipv6RoutingHeader::new_checked( - ext_packet.payload_mut(), - ) - .unwrap(); - route.emit(&mut route_packet); - route.buffer_len() - } - Unknown(x) if x == &DDM_HEADER_ID => { - // TODO: actually build DDM ID + Timestamp values here. - // for now we just emit an empty header here. - 14 - } - _ => unimplemented!( - "Extension header {:#?} unsupported", - extension - ), - }; - ext_packet.set_header_len(match V6ExtClass::from(*extension) { - V6ExtClass::Frag => 0, - V6ExtClass::Rfc6564 => u8::try_from((len - 8) / 8).unwrap(), - _ => unreachable!(), - }); - - // Move the position markers to the new header. - header_start = header_end; - header_end += len; - } - - // Set the last header to point to the ULP - data[header_start] = u8::from(IpProtocol::Tcp); - - (data, header_end) - } - - // Test every permuation of the supported extension headers, verifying the - // computed lengths of: - // - // - Payload length - // - ULP length - // - Extension header length - // - Full header length - #[test] - fn test_extension_header_lengths_ok() { - for n_extensions in 0..SUPPORTED_EXTENSIONS.len() { - for extensions in - SUPPORTED_EXTENSIONS.into_iter().permutations(n_extensions) - { - let (buf, pos) = generate_test_packet(extensions.as_slice()); - let mut pkt = Packet::copy(&buf); - let mut reader = pkt.get_rdr_mut(); - let header = Ipv6Hdr::parse(&mut reader).unwrap(); - assert_all_lengths_ok(&header, pos); - } - } - } - - fn assert_all_lengths_ok(header: &Ipv6Hdr, header_end: usize) { - assert_eq!( - header.hdr_len(), - header_end, - "Header length does not include all extension headers" - ); - assert_eq!( - header.pay_len(), - PAYLOAD_LEN, - "Payload length does not include all extension headers", - ); - assert_eq!( - header.ext_len(), - header_end - Ipv6Hdr::BASE_SIZE, - "Extension header size is incorrect", - ); - assert_eq!( - header.ulp_len(), - PAYLOAD_LEN - header.ext_len(), - "ULP length is not correct" - ); - assert_eq!( - header.total_len(), - PAYLOAD_LEN + Ipv6Hdr::BASE_SIZE, - "Total packet length is not correct", - ); - } - - #[test] - fn test_ipv6_addr_match_exact() { - let addr: Ipv6Addr = "fd00::1".parse().unwrap(); - assert!(addr.match_exact(&addr)); - assert!(!addr.match_exact(&("fd00::2".parse().unwrap()))); - } - - #[test] - fn test_ipv6_cidr_match_prefix() { - let cidr: Ipv6Cidr = "fd00::1/16".parse().unwrap(); - let addr: Ipv6Addr = "fd00::1".parse().unwrap(); - assert!(addr.match_prefix(&cidr)); - - let addr: Ipv6Addr = "fd00::2".parse().unwrap(); - assert!(addr.match_prefix(&cidr)); - - let addr: Ipv6Addr = "fd01::1".parse().unwrap(); - assert!(!addr.match_prefix(&cidr)); - - let addr: Ipv6Addr = "fd01::2".parse().unwrap(); - assert!(!addr.match_prefix(&cidr)); - } - - #[test] - fn emit() { - let ip = Ipv6Meta { - src: Ipv6Addr::from_const([ - 0xFE80, 0x0000, 0x0000, 0x0000, 0xBAF8, 0x53FF, 0xFEAF, 0x537D, - ]), - dst: Ipv6Addr::from_const([ - 0xFE80, 0x000, 0x0000, 0x0000, 0x56BE, 0xF7FF, 0xFE0B, 0x09EC, - ]), - proto: Protocol::ICMPv6, - next_hdr: IpProtocol::Icmpv6, - hop_limit: 255, - pay_len: 32, - ext: None, - ext_len: 0, - }; - - let len = ip.hdr_len(); - let mut pkt = Packet::alloc_and_expand(len); - let mut wtr = pkt.seg0_wtr(); - ip.emit(wtr.slice_mut(ip.hdr_len()).unwrap()); - assert_eq!(len, pkt.len()); - - #[rustfmt::skip] - let expected_bytes = [ - // version + class + label - 0x60, 0x00, 0x00, 0x00, - // payload len - 0x00, 0x20, - // next header + hop limit - 0x3A, 0xFF, - // source address - 0xFE, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xBA, 0xF8, 0x53, 0xFF, 0xFE, 0xAF, 0x53, 0x7D, - // dest address - 0xFE, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x56, 0xBE, 0xF7, 0xFF, 0xFE, 0x0B, 0x09, 0xEC, - ]; - assert_eq!(&expected_bytes, pkt.seg_bytes(0)); - } - - #[test] - fn test_set_total_len() { - // Create a packet with one extension header. - let (buf, _) = generate_test_packet(&[IpProtocol::Ipv6Frag]); - let mut pkt = Packet::copy(&buf); - let mut reader = pkt.get_rdr_mut(); - let mut header = Ipv6Hdr::parse(&mut reader).unwrap(); - - // Set the total length to 128. - // - // The Payload Length field contains the length of both the extension - // headers and the actual ULP. Because we have the Fragmentation header, - // which is a fixed 8-octet thing, this should result in a Payload - // Length of 128 - Ipv6Hdr::BASE_SIZE = 78. - const NEW_SIZE: usize = 128; - header.set_total_len(NEW_SIZE as _); - assert_eq!(header.total_len(), NEW_SIZE); - assert_eq!(header.hdr_len(), Ipv6Hdr::BASE_SIZE + 8); - assert_eq!(header.pay_len(), NEW_SIZE - Ipv6Hdr::BASE_SIZE); - } - - #[test] - fn test_ip6_meta_total_len() { - // Create a packet with one extension header. - let (buf, _) = generate_test_packet(&[IpProtocol::Ipv6Frag]); - let mut pkt = Packet::copy(&buf); - let mut reader = pkt.get_rdr_mut(); - let header = Ipv6Hdr::parse(&mut reader).unwrap(); - - // Previously, the `Ipv6Meta::total_len` method double-counted the - // extension header length. Assert we don't do that here. - let meta = Ipv6Meta::from(&header); - assert!(meta.ext.is_some()); - assert_eq!(meta.ext_len, 8); // Fixed size - assert_eq!( - meta.total_len() as usize, - header.hdr_len() + header.ulp_len() - ); - } - - #[test] - fn bad_ipv6_version_caught() { - // This packet was produced due to prior sidecar testing, - // and put 4B between Eth and IPv6. This should fail to - // parse 0x00 as a v6 version. - #[rustfmt::skip] - let buf: &[u8] = &[ - // Garbage - 0x00, 0xc8, 0x08, 0x00, - // IPv6 - 0x60, 0x00, 0x00, 0x00, 0x02, 0x27, 0x11, 0xfe, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0xfd, 0x00, 0x11, 0x22, 0x33, 0x44, 0x01, 0x11, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x17, 0xc1, 0x17, 0xc1, - 0x02, 0x27, 0xcf, 0x4e, 0x01, 0x00, 0x65, 0x58, 0x00, 0x00, 0x64, - 0x00, 0x01, 0x29, 0x00, 0x00, 0xa8, 0x40, 0x25, 0xff, 0xe8, 0x5f, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x81, 0x00, 0x45, 0x00, 0x02, - 0x05, 0xe0, 0x80, 0x40, 0x00, 0x37, 0x06, 0x1a, 0x9f, 0xc6, 0xd3, - 0x7a, 0x40, 0x2d, 0x9a, 0xd8, 0x25, 0xa1, 0x22, 0x01, 0xbb, 0xad, - 0x22, 0x51, 0x93, 0xa5, 0xf8, 0x01, 0x58, 0x80, 0x18, 0x01, 0x26, - 0x02, 0x24, 0x00, 0x00, 0x01, 0x01, 0x08, 0x0a, 0x48, 0xd7, 0x9a, - 0x23, 0x04, 0x31, 0x9f, 0x43, 0x14, 0x03, 0x03, 0x00, 0x01, 0x01, - 0x17, 0x03, 0x03, 0x00, 0x45, 0xf6, 0xcd, 0xe2, 0xc1, 0xe5, 0xa0, - 0x65, 0xa7, 0xfe, 0x29, 0xa8, 0xa2, 0xb0, 0x57, 0x91, 0x7e, 0xac, - 0xc8, 0x34, 0xdd, 0x6b, 0xfa, 0x21, - ]; - - let mut pkt = Packet::copy(buf); - let mut reader = pkt.get_rdr_mut(); - assert!(matches!( - Ipv6Hdr::parse(&mut reader), - Err(Ipv6HdrError::BadVersion { vsn: 0 }) - )); - } - - #[test] - fn too_many_exts_are_parse_error() { - // Create a packet with entirely too many extension headers. 80B! - let (buf, _) = generate_test_packet(&[ - IpProtocol::Ipv6Route, - IpProtocol::Ipv6Route, - IpProtocol::Ipv6Route, - IpProtocol::Ipv6Route, - IpProtocol::Ipv6Route, - IpProtocol::Ipv6Route, - IpProtocol::Ipv6Route, - IpProtocol::Ipv6Route, - IpProtocol::Ipv6Route, - IpProtocol::Ipv6Route, - IpProtocol::Ipv6Route, - ]); - let mut pkt = Packet::copy(&buf); - let mut reader = pkt.get_rdr_mut(); - assert!(matches!( - Ipv6Hdr::parse(&mut reader), - Err(Ipv6HdrError::ExtensionsTooLarge) - )); - } -} diff --git a/lib/opte/src/engine/mod.rs b/lib/opte/src/engine/mod.rs index 777fea80..b0852ec5 100644 --- a/lib/opte/src/engine/mod.rs +++ b/lib/opte/src/engine/mod.rs @@ -20,10 +20,6 @@ pub mod headers; pub mod icmp; pub mod ioctl; pub mod ip; -#[macro_use] -pub mod ip4; -#[macro_use] -pub mod ip6; pub mod layer; pub mod nat; #[macro_use] @@ -54,63 +50,12 @@ use ingot_packet::MsgBlk; use ingot_packet::OpteMeta; use ingot_packet::OpteParsed2; use ingot_packet::Packet2; -use ip4::IpError; pub use opte_api::Direction; use parse::ValidNoEncap; use rule::CompiledTransform; use zerocopy::ByteSlice; use zerocopy::ByteSliceMut; -// TODO Currently I'm using this for parsing many different things. It -// might be wise to have different parse error types. E.g., one for -// parsing ioctl strings, another for parsing IPv4 strings, for IPv6, -// etc. -// -// TODO This probably doesn't belong in this module. -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum ParseErr { - BadAction, - BadAddrError, - BadDirectionError, - BadProtoError, - BadToken(String), - InvalidPort, - IpError(IpError), - Malformed, - MalformedInt, - MalformedPort, - MissingField, - Other(String), - UnknownToken(String), - ValTooLong(String, usize), -} - -impl fmt::Display for ParseErr { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{:?}", self) - } -} - -pub type ParseResult = core::result::Result; - -impl From for ParseErr { - fn from(err: IpError) -> Self { - ParseErr::IpError(err) - } -} - -impl From for ParseErr { - fn from(_err: ParseIntError) -> Self { - ParseErr::MalformedInt - } -} - -impl From for ParseErr { - fn from(err: String) -> Self { - ParseErr::Other(err) - } -} - /// When set to 1 we will panic in some situations instead of just /// flagging in error. This can be useful for debugging certain /// scenarios in development. @@ -347,6 +292,9 @@ pub trait LightweightMeta: Into> { /// Provide a view of internal TCP state. fn inner_tcp(&self) -> Option<&impl TcpRef>; + + /// Determines whether headers have consistent lengths/mandatory fields set. + fn validate(&self, pkt_len: usize) -> Result<(), ParseError>; } /// A generic ULP parser, useful for testing inside of the opte crate diff --git a/lib/opte/src/engine/packet.rs b/lib/opte/src/engine/packet.rs index 621704ba..a1875c37 100644 --- a/lib/opte/src/engine/packet.rs +++ b/lib/opte/src/engine/packet.rs @@ -15,10 +15,11 @@ use super::headers::IpAddr; use super::headers::AF_INET; use super::headers::AF_INET6; use super::ingot_packet::MsgBlk; -use super::ip4::Ipv4Addr; -use super::ip4::Protocol; -use super::ip6::Ipv6Addr; +use super::ip::v4::Ipv4Addr; +use super::ip::v4::Protocol; +use super::ip::v6::Ipv6Addr; use crate::d_error::DError; +use core::ffi::CStr; use core::fmt; use core::fmt::Display; use core::hash::Hash; @@ -391,22 +392,20 @@ pub enum WrapError { Chain, } -/// Some functions may return multiple types of errors. -#[derive(Clone, Debug, DError)] -pub enum PacketError { - Parse(ParseError), - Wrap(WrapError), +#[derive(Clone, Debug, Eq, PartialEq, DError)] +pub enum ParseError { + IngotError(ingot::types::PacketParseError), + IllegalValue(MismatchError), + BadLength(MismatchError), } -impl From for PacketError { - fn from(e: ParseError) -> Self { - Self::Parse(e) +impl DError for ingot::types::PacketParseError { + fn discriminant(&self) -> &'static core::ffi::CStr { + self.header().as_cstr() } -} -impl From for PacketError { - fn from(e: WrapError) -> Self { - Self::Wrap(e) + fn child(&self) -> Option<&dyn DError> { + Some(self.error()) } } @@ -420,75 +419,28 @@ impl DError for ingot::types::ParseError { } } -impl DError for ingot::types::PacketParseError { - fn discriminant(&self) -> &'static core::ffi::CStr { - self.header().as_cstr() +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct MismatchError { + pub location: &'static CStr, + pub expected: u64, + pub actual: u64, +} + +impl DError for MismatchError { + fn discriminant(&self) -> &'static CStr { + self.location } fn child(&self) -> Option<&dyn DError> { - Some(self.error()) + None } -} - -#[derive(Clone, Debug, Eq, PartialEq, DError)] -#[derror(leaf_data = ParseError::data)] -pub enum ParseError { - // TODO: I think this may be the only err variant? - IngotError(ingot::types::PacketParseError), - BadInnerIpLen { - expected: usize, - actual: usize, - }, - BadInnerUlpLen { - expected: usize, - actual: usize, - }, - BadOuterIpLen { - expected: usize, - actual: usize, - }, - BadOuterUlpLen { - expected: usize, - actual: usize, - }, - BadRead(ReadErr), - TruncatedBody { - expected: usize, - actual: usize, - }, - #[leaf] - UnexpectedEtherType(super::ether::EtherType), - #[leaf] - UnsupportedEtherType(u16), - #[leaf] - UnexpectedProtocol(Protocol), - #[leaf] - UnexpectedDestPort(u16), - #[leaf] - UnsupportedProtocol(Protocol), -} - -impl ParseError { - fn data(&self, data: &mut [u64]) { - match self { - Self::BadInnerIpLen { expected, actual } - | Self::BadInnerUlpLen { expected, actual } - | Self::BadOuterIpLen { expected, actual } - | Self::BadOuterUlpLen { expected, actual } - | Self::TruncatedBody { expected, actual } => { - [data[0], data[1]] = [*expected as u64, *actual as u64] - } - Self::UnexpectedEtherType(eth) => data[0] = u16::from(*eth).into(), - Self::UnsupportedEtherType(eth) => data[0] = *eth as u64, - Self::UnexpectedProtocol(proto) => { - data[0] = u8::from(*proto).into() - } - Self::UnexpectedDestPort(port) => data[0] = (*port).into(), - Self::UnsupportedProtocol(proto) => { - data[0] = u8::from(*proto).into() - } - _ => {} + fn leaf_data(&self, data: &mut [u64]) { + if let Some(v) = data.get_mut(0) { + *v = self.expected; + } + if let Some(v) = data.get_mut(1) { + *v = self.expected; } } } @@ -499,38 +451,14 @@ impl From for ParseError { } } -impl From for ParseError { - fn from(err: ReadErr) -> Self { - Self::BadRead(err) - } -} - -#[derive(Clone, Copy, Debug, Eq, PartialEq, DError)] -pub enum ReadErr { - BadLayout, - EndOfPacket, - NotEnoughBytes, - OutOfRange, - StraddledRead, - NotImplemented, -} - #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum WriteError { BadLayout, EndOfPacket, NotEnoughBytes { available: usize, needed: usize }, - Read(ReadErr), StraddledWrite, } -impl From for WriteError { - fn from(e: ReadErr) -> Self { - Self::Read(e) - } -} - -pub type ReadResult = result::Result; pub type WriteResult = result::Result; /// The common entry into an `allocb(9F)` implementation that works in diff --git a/lib/opte/src/engine/parse.rs b/lib/opte/src/engine/parse.rs index 1c5c9982..35b4c958 100644 --- a/lib/opte/src/engine/parse.rs +++ b/lib/opte/src/engine/parse.rs @@ -26,6 +26,8 @@ use super::ip::ValidL3; use super::ip::L3; use super::packet::AddrPair; use super::packet::InnerFlowId; +use super::packet::MismatchError; +use super::packet::ParseError; use super::packet::FLOW_ID_DEFAULT; use super::rule::CompiledTransform; use super::LightweightMeta; @@ -361,6 +363,47 @@ impl LightweightMeta for ValidNoEncap { _ => None, } } + + #[inline] + fn validate(&self, pkt_len: usize) -> Result<(), ParseError> { + if let Some(l3) = &self.inner_l3 { + let rem_len = pkt_len - &(&self.inner_eth, l3).packet_length(); + l3.validate(rem_len)?; + if let Some(ulp) = &self.inner_ulp { + let rem_len = rem_len - ulp.packet_length(); + ulp.validate(rem_len)?; + } + } + + Ok(()) + } +} + +#[inline] +fn validate_udp( + pkt: &ValidUdp, + bytes_after: usize, +) -> Result<(), ParseError> { + let wanted_len = bytes_after + pkt.packet_length(); + if pkt.length() as usize == wanted_len { + Ok(()) + } else { + Err(ParseError::BadLength(MismatchError { + location: c"Udp.length", + expected: wanted_len as u64, + actual: pkt.length() as u64, + })) + } +} + +impl ValidUlp { + #[inline] + fn validate(&self, bytes_after: usize) -> Result<(), ParseError> { + match self { + ValidUlp::Udp(u) => validate_udp(u, bytes_after), + _ => Ok(()), + } + } } impl From> for OpteMeta { @@ -539,6 +582,26 @@ impl LightweightMeta for ValidGeneveOverV6 { _ => None, } } + + #[inline] + fn validate(&self, pkt_len: usize) -> Result<(), ParseError> { + let rem_len = + pkt_len - (&self.outer_eth, &self.outer_v6).packet_length(); + self.outer_v6.validate(rem_len)?; + + let rem_len = rem_len - self.outer_udp.packet_length(); + validate_udp(&self.outer_udp, rem_len)?; + + let rem_len = rem_len + - &(&self.outer_encap, &self.outer_eth, &self.inner_l3) + .packet_length(); + self.inner_l3.validate(rem_len)?; + + let rem_len = rem_len - self.inner_ulp.packet_length(); + self.inner_ulp.validate(rem_len)?; + + Ok(()) + } } #[inline] diff --git a/lib/opte/src/engine/predicate.rs b/lib/opte/src/engine/predicate.rs index 5cc06e46..ca74ad7f 100644 --- a/lib/opte/src/engine/predicate.rs +++ b/lib/opte/src/engine/predicate.rs @@ -15,14 +15,14 @@ use super::icmp::v6::MessageType as Icmpv6MessageType; use super::ingot_packet::ulp_dst_port; use super::ingot_packet::ulp_src_port; use super::ingot_packet::MblkPacketData; +use super::ip::v4::Ipv4Addr; +use super::ip::v4::Ipv4Cidr; use super::ip::v4::Ipv4Ref; +use super::ip::v4::Protocol; +use super::ip::v6::Ipv6Addr; +use super::ip::v6::Ipv6Cidr; use super::ip::v6::Ipv6Ref; use super::ip::L3; -use super::ip4::Ipv4Addr; -use super::ip4::Ipv4Cidr; -use super::ip4::Protocol; -use super::ip6::Ipv6Addr; -use super::ip6::Ipv6Cidr; use super::port::meta::ActionMeta; use alloc::boxed::Box; use alloc::string::String; diff --git a/lib/opte/src/engine/rule.rs b/lib/opte/src/engine/rule.rs index 5b00643e..cd2dfb45 100644 --- a/lib/opte/src/engine/rule.rs +++ b/lib/opte/src/engine/rule.rs @@ -620,18 +620,11 @@ pub trait MetaAction: Display { #[derive(Debug)] pub enum GenErr { - BadPayload(super::packet::ReadErr), Malformed, MissingMeta, Unexpected(String), } -impl From for GenErr { - fn from(err: super::packet::ReadErr) -> Self { - Self::BadPayload(err) - } -} - impl From for GenErr { fn from(_err: smoltcp::wire::Error) -> Self { Self::Malformed diff --git a/lib/oxide-vpc/src/engine/gateway/dhcp.rs b/lib/oxide-vpc/src/engine/gateway/dhcp.rs index ee616441..21c78373 100644 --- a/lib/oxide-vpc/src/engine/gateway/dhcp.rs +++ b/lib/oxide-vpc/src/engine/gateway/dhcp.rs @@ -17,7 +17,7 @@ use opte::api::Ipv4PrefixLen; use opte::api::OpteError; use opte::api::SubnetRouterPair; use opte::engine::dhcp::DhcpAction; -use opte::engine::ip4::Ipv4Cidr; +use opte::engine::ip::v4::Ipv4Cidr; use opte::engine::layer::Layer; use opte::engine::rule::Action; use opte::engine::rule::Rule; diff --git a/lib/oxide-vpc/src/engine/mod.rs b/lib/oxide-vpc/src/engine/mod.rs index 7b9d9774..8061fd92 100644 --- a/lib/oxide-vpc/src/engine/mod.rs +++ b/lib/oxide-vpc/src/engine/mod.rs @@ -23,7 +23,7 @@ use opte::engine::flow_table::FlowTable; use opte::engine::ingot_packet::FullParsed; use opte::engine::ingot_packet::OpteParsed2; use opte::engine::ingot_packet::Packet2; -use opte::engine::ip4::Ipv4Addr; +use opte::engine::ip::v4::Ipv4Addr; use opte::engine::packet::InnerFlowId; use opte::engine::packet::ParseError; use opte::engine::parse::ValidGeneveOverV6; diff --git a/lib/oxide-vpc/src/engine/overlay.rs b/lib/oxide-vpc/src/engine/overlay.rs index 01cf8452..bea86a66 100644 --- a/lib/oxide-vpc/src/engine/overlay.rs +++ b/lib/oxide-vpc/src/engine/overlay.rs @@ -43,10 +43,10 @@ use opte::engine::headers::IpAddr; use opte::engine::headers::IpCidr; use opte::engine::headers::IpPush; use opte::engine::ingot_packet::MblkPacketData; -use opte::engine::ip4::Protocol; -use opte::engine::ip6::Ipv6Addr; -use opte::engine::ip6::Ipv6Cidr; -use opte::engine::ip6::Ipv6Push; +use opte::engine::ip::v4::Protocol; +use opte::engine::ip::v6::Ipv6Addr; +use opte::engine::ip::v6::Ipv6Cidr; +use opte::engine::ip::v6::Ipv6Push; use opte::engine::layer::DefaultAction; use opte::engine::layer::Layer; use opte::engine::layer::LayerActions; diff --git a/lib/oxide-vpc/tests/integration_tests.rs b/lib/oxide-vpc/tests/integration_tests.rs index 013847ba..5047ccdc 100644 --- a/lib/oxide-vpc/tests/integration_tests.rs +++ b/lib/oxide-vpc/tests/integration_tests.rs @@ -30,12 +30,12 @@ use opte::engine::geneve::Vni; use opte::engine::ingot_packet::MblkFullParsed; use opte::engine::ingot_packet::MsgBlk; use opte::engine::ingot_packet::Packet2; +use opte::engine::ip::v4::Ipv4Addr; use opte::engine::ip::v4::Ipv4Ref; use opte::engine::ip::v6::Ipv6; use opte::engine::ip::v6::Ipv6Ref; use opte::engine::ip::ValidL3; use opte::engine::ip::L3; -use opte::engine::ip4::Ipv4Addr; use opte::engine::packet::InnerFlowId; use opte::engine::parse::ValidUlp; use opte::engine::port::ProcessError; diff --git a/xde/src/route.rs b/xde/src/route.rs index 089bc36c..ba863ae3 100644 --- a/xde/src/route.rs +++ b/xde/src/route.rs @@ -20,7 +20,7 @@ use opte::ddi::sync::KRwLock; use opte::ddi::sync::KRwLockType; use opte::ddi::time::Moment; use opte::engine::ether::EtherAddr; -use opte::engine::ip6::Ipv6Addr; +use opte::engine::ip::v6::Ipv6Addr; // XXX: completely arbitrary timeouts. /// The duration a cached route remains valid for before it must be diff --git a/xde/src/xde.rs b/xde/src/xde.rs index d6e1af3a..910211db 100644 --- a/xde/src/xde.rs +++ b/xde/src/xde.rs @@ -59,16 +59,16 @@ use opte::ddi::sync::KRwLockReadGuard; use opte::ddi::sync::KRwLockType; use opte::ddi::time::Interval; use opte::ddi::time::Periodic; +use opte::engine::ether::EthernetRef; use opte::engine::geneve::Vni; use opte::engine::headers::IpAddr; -use opte::engine::ingot_base::EthernetRef; use opte::engine::ingot_packet::MsgBlk; use opte::engine::ingot_packet::Packet2; use opte::engine::ioctl::{self as api}; -use opte::engine::ip6::Ipv6Addr; +use opte::engine::ip::v6::Ipv6Addr; use opte::engine::packet::InnerFlowId; use opte::engine::packet::PacketChain; -use opte::engine::packet::PacketError; +use opte::engine::packet::ParseError; use opte::engine::port::Port; use opte::engine::port::PortBuilder; use opte::engine::port::ProcessResult; @@ -153,7 +153,7 @@ fn bad_packet_parse_probe( port: Option<&CString>, dir: Direction, mp: uintptr_t, - err: &PacketError, + err: &ParseError, ) { let port_str = match port { None => c"unknown", From 90c79b5e79e0179087f8dbc7f28271e3fca4a6f3 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Thu, 24 Oct 2024 12:34:54 -0700 Subject: [PATCH 064/115] ExternalOpt critical checking, as req'd by RFC 8926 --- lib/opte/src/d_error.rs | 1 - lib/opte/src/engine/geneve.rs | 112 ++++++++++++++++++++++++++-- lib/opte/src/engine/ingot_packet.rs | 12 +-- lib/opte/src/engine/ip/v4.rs | 4 - lib/opte/src/engine/mod.rs | 3 - lib/opte/src/engine/packet.rs | 13 ++++ 6 files changed, 125 insertions(+), 20 deletions(-) diff --git a/lib/opte/src/d_error.rs b/lib/opte/src/d_error.rs index 5abba90a..edbe61f4 100644 --- a/lib/opte/src/d_error.rs +++ b/lib/opte/src/d_error.rs @@ -8,7 +8,6 @@ //! static strings to avoid paying the `fmt` tax when calling an SDT. use core::ffi::CStr; -use core::fmt; pub use derror_macro::DError; // XXX: I think we want some way of doing the whole thing in one big chunk diff --git a/lib/opte/src/engine/geneve.rs b/lib/opte/src/engine/geneve.rs index e906dd47..c912ea2d 100644 --- a/lib/opte/src/engine/geneve.rs +++ b/lib/opte/src/engine/geneve.rs @@ -10,7 +10,10 @@ use super::headers::ModifyAction; use super::headers::PushAction; +use super::packet::MismatchError; +use super::packet::ParseError; use ingot::geneve::Geneve; +use ingot::geneve::GeneveFlags; use ingot::geneve::GeneveOpt; use ingot::geneve::GeneveOptRef; use ingot::geneve::GeneveRef; @@ -36,6 +39,67 @@ pub const GENEVE_OPT_RESERVED_SHIFT: u8 = 5; pub const GENEVE_OPT_RESERVED_MASK: u8 = (1 << GENEVE_OPT_RESERVED_SHIFT) - 1; pub const GENEVE_OPT_CLASS_OXIDE: u16 = 0x0129; +#[inline] +pub fn validate_geneve( + pkt: &ValidGeneve, + bytes_after: usize, +) -> Result<(), ParseError> { + if pkt.version() != 0 { + return Err(ParseError::IllegalValue(MismatchError { + location: c"Geneve.version", + expected: 0, + actual: pkt.version() as u64, + })); + } + + if pkt.flags().contains(GeneveFlags::CRITICAL_OPTS) { + match pkt.options_ref() { + ingot::types::FieldRef::Repr(g) => { + for opt in g.iter() { + if !opt.option_type.is_critical() { + continue; + } + + GeneveOption::from_code_and_ty( + opt.class, + opt.option_type.0, + )?; + } + } + ingot::types::FieldRef::Raw(Header::Repr(g)) => { + for opt in g.iter() { + if !opt.option_type.is_critical() { + continue; + } + + GeneveOption::from_code_and_ty( + opt.class, + opt.option_type.0, + )?; + } + } + ingot::types::FieldRef::Raw(Header::Raw(g)) => { + for opt in g.iter(None) { + let Ok(opt) = opt else { + break; + }; + + if !opt.option_type().is_critical() { + continue; + } + + GeneveOption::from_code_and_ty( + opt.class(), + opt.option_type().0, + )?; + } + } + } + } + + Ok(()) +} + #[derive(Clone, Copy, Debug, Default, Eq, Ord, PartialEq, PartialOrd)] pub struct GeneveMeta { pub entropy: u16, @@ -118,6 +182,20 @@ pub enum GeneveOption { } impl GeneveOption { + #[inline] + pub fn from_code_and_ty(class: u16, ty: u8) -> Result { + match (class, ty) { + (GENEVE_OPT_CLASS_OXIDE, v) + if OxideOption::External.opt_type() == v => + { + Ok(Self::Oxide(OxideOption::External)) + } + _ => { + Err(ParseError::UnrecognisedTunnelOpt { class: class, ty: ty }) + } + } + } + /// Return the wire-length of this option in bytes, including headers. pub fn len(&self) -> usize { 4 + match self { @@ -144,7 +222,7 @@ impl OxideOption { } /// Return the option type number. - pub fn opt_type(&self) -> u8 { + pub const fn opt_type(&self) -> u8 { match self { OxideOption::External => 0, } @@ -155,14 +233,18 @@ impl OxideOption { // from the geneve options -- we only have the one today, however. #[inline] pub fn geneve_has_oxide_external(pkt: &Geneve) -> bool { + let mut out = false; for opt in pkt.options.iter() { - let out = geneve_opt_is_oxide_external::<&[u8]>(opt); + out = matches!( + GeneveOption::from_code_and_ty(opt.class, opt.option_type.0,), + Ok(GeneveOption::Oxide(OxideOption::External)) + ); if out { break; } } - false + out } #[inline] @@ -174,7 +256,13 @@ pub fn valid_geneve_has_oxide_external( match pkt.options_ref() { ingot::types::FieldRef::Repr(g) => { for opt in g.iter() { - out = geneve_opt_is_oxide_external::<&[u8]>(opt); + out = matches!( + GeneveOption::from_code_and_ty( + opt.class, + opt.option_type.0, + ), + Ok(GeneveOption::Oxide(OxideOption::External)) + ); if out { break; } @@ -182,7 +270,13 @@ pub fn valid_geneve_has_oxide_external( } ingot::types::FieldRef::Raw(Header::Repr(g)) => { for opt in g.iter() { - out = geneve_opt_is_oxide_external::<&[u8]>(opt); + out = matches!( + GeneveOption::from_code_and_ty( + opt.class, + opt.option_type.0, + ), + Ok(GeneveOption::Oxide(OxideOption::External)) + ); if out { break; } @@ -194,7 +288,13 @@ pub fn valid_geneve_has_oxide_external( break; }; - out = geneve_opt_is_oxide_external(&opt); + out = matches!( + GeneveOption::from_code_and_ty( + opt.class(), + opt.option_type().0, + ), + Ok(GeneveOption::Oxide(OxideOption::External)) + ); if out { break; } diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index 55c3c7ce..bdef64f8 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -1788,12 +1788,6 @@ impl Packet2> { return; } - // We expect that any body transform will necessarily invalidate - // the body_csum. Recompute from scratch. - if self.state.body_modified { - return self.compute_checksums(); - } - // Flag to indicate if an IP header/ULP checksums were // provided. If the checksum is zero, it's assumed heardware // checksum offload is being used, and OPTE should not update @@ -1801,6 +1795,12 @@ impl Packet2> { let update_ip = self.state.meta.has_ip_csum(); let update_ulp = self.state.meta.has_ulp_csum(); + // We expect that any body transform will necessarily invalidate + // the body_csum. Recompute from scratch. + if self.state.body_modified && (update_ip || update_ulp) { + return self.compute_checksums(); + } + // Start by reusing the known checksum of the body. let mut body_csum = self.body_csum().unwrap_or_default(); diff --git a/lib/opte/src/engine/ip/v4.rs b/lib/opte/src/engine/ip/v4.rs index 4a5688b8..04007698 100644 --- a/lib/opte/src/engine/ip/v4.rs +++ b/lib/opte/src/engine/ip/v4.rs @@ -6,10 +6,6 @@ //! IPv4 headers. -use core::fmt; -use core::fmt::Display; -use core::num::ParseIntError; - use crate::engine::checksum::Checksum; use crate::engine::packet::MismatchError; use crate::engine::packet::ParseError; diff --git a/lib/opte/src/engine/mod.rs b/lib/opte/src/engine/mod.rs index b0852ec5..2c8fa6f1 100644 --- a/lib/opte/src/engine/mod.rs +++ b/lib/opte/src/engine/mod.rs @@ -39,10 +39,7 @@ pub mod udp; pub mod ingot_packet; -use alloc::string::String; use checksum::Checksum; -use core::fmt; -use core::num::ParseIntError; use ingot::tcp::TcpRef; use ingot::types::Read; use ingot_packet::FullParsed; diff --git a/lib/opte/src/engine/packet.rs b/lib/opte/src/engine/packet.rs index a1875c37..3d654810 100644 --- a/lib/opte/src/engine/packet.rs +++ b/lib/opte/src/engine/packet.rs @@ -393,10 +393,23 @@ pub enum WrapError { } #[derive(Clone, Debug, Eq, PartialEq, DError)] +#[derror(leaf_data = ParseError::data)] pub enum ParseError { IngotError(ingot::types::PacketParseError), IllegalValue(MismatchError), BadLength(MismatchError), + UnrecognisedTunnelOpt { class: u16, ty: u8 }, +} + +impl ParseError { + fn data(&self, data: &mut [u64]) { + match self { + ParseError::UnrecognisedTunnelOpt { class, ty } => { + [data[0], data[1]] = [*class as u64, *ty as u64]; + } + _ => {} + } + } } impl DError for ingot::types::PacketParseError { From bf667f461e09ca7d299668572ee45190fca2767c Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Thu, 24 Oct 2024 16:53:03 -0700 Subject: [PATCH 065/115] Maybe actually do the Geneve validation. Also, less sucky append of mblks via b_cont. --- lib/opte-test-utils/src/icmp.rs | 4 ++-- lib/opte-test-utils/src/lib.rs | 2 +- lib/opte/src/engine/geneve.rs | 1 - lib/opte/src/engine/ingot_packet.rs | 20 ++++++++++++-------- lib/opte/src/engine/parse.rs | 3 +++ lib/opte/src/engine/rule.rs | 2 +- 6 files changed, 19 insertions(+), 13 deletions(-) diff --git a/lib/opte-test-utils/src/icmp.rs b/lib/opte-test-utils/src/icmp.rs index 2053dcab..14272a91 100644 --- a/lib/opte-test-utils/src/icmp.rs +++ b/lib/opte-test-utils/src/icmp.rs @@ -189,7 +189,7 @@ pub fn gen_icmp_echo( let chain = segments.pop().unwrap(); let new_el = segments.last_mut().unwrap(); - new_el.extend_if_one(chain); + new_el.append(chain); } segments.pop().unwrap() @@ -315,7 +315,7 @@ pub fn gen_icmpv6_echo( let chain = segments.pop().unwrap(); let new_el = segments.last_mut().unwrap(); - new_el.extend_if_one(chain); + new_el.append(chain); } segments.pop().unwrap() diff --git a/lib/opte-test-utils/src/lib.rs b/lib/opte-test-utils/src/lib.rs index 5f3845ff..26f3614f 100644 --- a/lib/opte-test-utils/src/lib.rs +++ b/lib/opte-test-utils/src/lib.rs @@ -1044,7 +1044,7 @@ fn _encap( outer_udp, outer_geneve, )); - encap_pkt.extend_if_one(inner_pkt); + encap_pkt.append(inner_pkt); encap_pkt } diff --git a/lib/opte/src/engine/geneve.rs b/lib/opte/src/engine/geneve.rs index c912ea2d..559b7cea 100644 --- a/lib/opte/src/engine/geneve.rs +++ b/lib/opte/src/engine/geneve.rs @@ -42,7 +42,6 @@ pub const GENEVE_OPT_CLASS_OXIDE: u16 = 0x0129; #[inline] pub fn validate_geneve( pkt: &ValidGeneve, - bytes_after: usize, ) -> Result<(), ParseError> { if pkt.version() != 0 { return Err(ParseError::IllegalValue(MismatchError { diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index bdef64f8..702940ef 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -464,15 +464,19 @@ impl MsgBlk { } } - // TODO: I really need to rethink this one in practice. - // hacked together for POC. - pub fn extend_if_one(&mut self, other: Self) { - let mut_self = unsafe { self.inner.as_mut() }; - if !mut_self.b_cont.is_null() { - panic!("oopsie daisy") + /// Places another `MsgBlk` at the end of this packet's + /// b_cont chain. + pub fn append(&mut self, other: Self) { + // Find the last element in the pkt chain + // i.e., whose b_cont is null. + let mut curr = self.inner.as_ptr(); + while unsafe { !(*curr).b_cont.is_null() } { + curr = unsafe { (*curr).b_cont }; } - mut_self.b_cont = other.unwrap_mblk().as_ptr(); + unsafe { + (*curr).b_cont = other.unwrap_mblk().as_ptr(); + } } /// Drop all bytes and move the cursor to the very back of the dblk. @@ -2137,7 +2141,7 @@ impl EmitSpec { } if let Some(mut prepend) = prepend { - prepend.extend_if_one(pkt); + prepend.append(pkt); prepend } else { pkt diff --git a/lib/opte/src/engine/parse.rs b/lib/opte/src/engine/parse.rs index 35b4c958..16286f5a 100644 --- a/lib/opte/src/engine/parse.rs +++ b/lib/opte/src/engine/parse.rs @@ -13,6 +13,7 @@ use super::ether::EthernetMut; use super::ether::EthernetPacket; use super::ether::EthernetRef; use super::ether::ValidEthernet; +use super::geneve::validate_geneve; use super::geneve::GENEVE_PORT; use super::headers::IpMod; use super::ingot_packet::OpteMeta; @@ -592,6 +593,8 @@ impl LightweightMeta for ValidGeneveOverV6 { let rem_len = rem_len - self.outer_udp.packet_length(); validate_udp(&self.outer_udp, rem_len)?; + validate_geneve(&self.outer_encap)?; + let rem_len = rem_len - &(&self.outer_encap, &self.outer_eth, &self.inner_l3) .packet_length(); diff --git a/lib/opte/src/engine/rule.rs b/lib/opte/src/engine/rule.rs index cd2dfb45..3cfad131 100644 --- a/lib/opte/src/engine/rule.rs +++ b/lib/opte/src/engine/rule.rs @@ -385,7 +385,7 @@ impl CompiledEncap { *l4_len_slot = (l4_len as u16).to_be_bytes(); if let Some(mut prepend) = prepend { - prepend.extend_if_one(pkt); + prepend.append(pkt); prepend } else { pkt From 6e9ed60e72cb9e4fe7e064538dfe111642358d86 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Fri, 25 Oct 2024 12:51:52 -0700 Subject: [PATCH 066/115] Wherein I Moev Things. --- bench/benches/userland.rs | 12 +- lib/opte-test-utils/src/dhcp.rs | 2 +- lib/opte-test-utils/src/icmp.rs | 2 +- lib/opte-test-utils/src/lib.rs | 17 +- lib/opte-test-utils/src/pcap.rs | 2 +- lib/opte/src/ddi/mblk.rs | 828 +++++++++++++++++++++++ lib/opte/src/engine/arp.rs | 2 +- lib/opte/src/engine/dhcp.rs | 2 +- lib/opte/src/engine/dhcpv6/protocol.rs | 8 +- lib/opte/src/engine/geneve.rs | 131 ++-- lib/opte/src/engine/icmp/v4.rs | 4 +- lib/opte/src/engine/icmp/v6.rs | 4 +- lib/opte/src/engine/ingot_packet.rs | 616 +---------------- lib/opte/src/engine/layer.rs | 20 +- lib/opte/src/engine/mod.rs | 6 +- lib/opte/src/engine/nat.rs | 20 +- lib/opte/src/engine/packet.rs | 232 ------- lib/opte/src/engine/port.rs | 22 +- lib/opte/src/engine/rule.rs | 12 +- lib/opte/src/engine/snat.rs | 20 +- lib/oxide-vpc/src/engine/mod.rs | 6 +- lib/oxide-vpc/tests/firewall_tests.rs | 2 +- lib/oxide-vpc/tests/fuzz_regression.rs | 8 +- lib/oxide-vpc/tests/integration_tests.rs | 8 +- xde/src/dls/mod.rs | 2 +- xde/src/mac/mod.rs | 2 +- xde/src/xde.rs | 16 +- 27 files changed, 986 insertions(+), 1020 deletions(-) diff --git a/bench/benches/userland.rs b/bench/benches/userland.rs index e0e07db9..f8dbd493 100644 --- a/bench/benches/userland.rs +++ b/bench/benches/userland.rs @@ -10,7 +10,7 @@ use criterion::criterion_group; use criterion::criterion_main; use criterion::BenchmarkId; use criterion::Criterion; -use opte::engine::ingot_packet::Packet2; +use opte::engine::ingot_packet::Packet; use opte_bench::alloc::*; use opte_bench::packet::BenchPacket; use opte_bench::packet::BenchPacketInstance; @@ -83,8 +83,7 @@ pub fn test_parse( match parser { ParserKind::Generic => { |(mut in_pkt, direction): TestCase| { - let pkt = - black_box(Packet2::new(in_pkt.iter_mut())); + let pkt = black_box(Packet::new(in_pkt.iter_mut())); black_box(match direction { In => pkt.parse_inbound(GenericUlp {}), Out => pkt.parse_outbound(GenericUlp {}), @@ -94,8 +93,7 @@ pub fn test_parse( } ParserKind::OxideVpc => { |(mut in_pkt, direction): TestCase| { - let pkt = - black_box(Packet2::new(in_pkt.iter_mut())); + let pkt = black_box(Packet::new(in_pkt.iter_mut())); black_box(match direction { In => { pkt.parse_inbound(VpcParser {}).unwrap(); @@ -154,7 +152,7 @@ pub fn test_handle( // packet is now a view over the generated pkt. |(mut pkt_m, dir): TestCase| match parser { ParserKind::Generic => { - let pkt = Packet2::new(pkt_m.iter_mut()); + let pkt = Packet::new(pkt_m.iter_mut()); let res = match dir { In => { let pkt = @@ -173,7 +171,7 @@ pub fn test_handle( } } ParserKind::OxideVpc => { - let pkt = Packet2::new(pkt_m.iter_mut()); + let pkt = Packet::new(pkt_m.iter_mut()); let res = match dir { In => { let pkt = diff --git a/lib/opte-test-utils/src/dhcp.rs b/lib/opte-test-utils/src/dhcp.rs index 5029d363..84ca5ce2 100644 --- a/lib/opte-test-utils/src/dhcp.rs +++ b/lib/opte-test-utils/src/dhcp.rs @@ -8,11 +8,11 @@ use super::*; use dhcpv6::protocol::MessageType; +use opte::ddi::mblk::MsgBlk; use opte::engine::dhcp::DHCP_CLIENT_PORT; use opte::engine::dhcp::DHCP_SERVER_PORT; use opte::engine::dhcpv6; use opte::engine::ether::Ethernet; -use opte::engine::ingot_packet::MsgBlk; use opte::engine::ip::v4::Ipv4; use opte::engine::ip::v6::Ipv6; use opte::ingot::ethernet::Ethertype; diff --git a/lib/opte-test-utils/src/icmp.rs b/lib/opte-test-utils/src/icmp.rs index 14272a91..17039536 100644 --- a/lib/opte-test-utils/src/icmp.rs +++ b/lib/opte-test-utils/src/icmp.rs @@ -7,8 +7,8 @@ //! Routines for ICMP testing. use opte::api::*; +use opte::ddi::mblk::MsgBlk; use opte::engine::ether::Ethernet; -use opte::engine::ingot_packet::MsgBlk; use opte::engine::ip::v4::Ipv4; use opte::engine::ip::v6::Ipv6; use opte::engine::ip::L3; diff --git a/lib/opte-test-utils/src/lib.rs b/lib/opte-test-utils/src/lib.rs index 26f3614f..b482d678 100644 --- a/lib/opte-test-utils/src/lib.rs +++ b/lib/opte-test-utils/src/lib.rs @@ -18,6 +18,7 @@ pub mod port_state; // Let's make our lives easier and pub use a bunch of stuff. pub use opte::api::Direction::*; pub use opte::api::MacAddr; +pub use opte::ddi::mblk::MsgBlk; pub use opte::engine::ether::EtherMeta; pub use opte::engine::ether::EtherType; pub use opte::engine::ether::Ethernet; @@ -30,8 +31,7 @@ pub use opte::engine::geneve::GENEVE_PORT; pub use opte::engine::headers::IpAddr; pub use opte::engine::headers::IpCidr; pub use opte::engine::ingot_packet::MblkLiteParsed; -pub use opte::engine::ingot_packet::MsgBlk; -pub use opte::engine::ingot_packet::Packet2; +pub use opte::engine::ingot_packet::Packet; pub use opte::engine::ip::v4::Ipv4; pub use opte::engine::ip::v4::Ipv4Addr; pub use opte::engine::ip::v4::Protocol; @@ -111,17 +111,16 @@ macro_rules! expect_modified { pub fn parse_inbound( pkt: &mut MsgBlk, parser: NP, -) -> Result>>, ParseError> { - let pkt = Packet2::new(pkt.iter_mut()); +) -> Result>>, ParseError> { + let pkt = Packet::new(pkt.iter_mut()); pkt.parse_inbound(parser) } pub fn parse_outbound( pkt: &mut MsgBlk, parser: NP, -) -> Result>>, ParseError> -{ - let pkt = Packet2::new(pkt.iter_mut()); +) -> Result>>, ParseError> { + let pkt = Packet::new(pkt.iter_mut()); pkt.parse_outbound(parser) } @@ -481,7 +480,7 @@ pub fn ulp_pkt< ) -> MsgBlk { let mut pkt = MsgBlk::new_ethernet_pkt((eth, ip, ulp, body)); - let view = Packet2::new(pkt.iter_mut()); + let view = Packet::new(pkt.iter_mut()); let view = view.parse_outbound(GenericUlp {}).unwrap(); let mut view = view.to_full_meta(); view.compute_checksums(); @@ -999,7 +998,7 @@ fn _encap( dst: TestIpPhys, external_snat: bool, ) -> MsgBlk { - let pkt = Packet2::new(inner_pkt.iter_mut()); + let pkt = Packet::new(inner_pkt.iter_mut()); let base_len = pkt.len(); drop(pkt); diff --git a/lib/opte-test-utils/src/pcap.rs b/lib/opte-test-utils/src/pcap.rs index 4983a810..6244ddbd 100644 --- a/lib/opte-test-utils/src/pcap.rs +++ b/lib/opte-test-utils/src/pcap.rs @@ -6,7 +6,7 @@ //! Routines for building packet capture files. -use opte::engine::ingot_packet::MsgBlk; +use opte::ddi::mblk::MsgBlk; use pcap_parser::pcap; use pcap_parser::pcap::LegacyPcapBlock; use pcap_parser::pcap::PcapHeader; diff --git a/lib/opte/src/ddi/mblk.rs b/lib/opte/src/ddi/mblk.rs index 5ca7b789..da8769c1 100644 --- a/lib/opte/src/ddi/mblk.rs +++ b/lib/opte/src/ddi/mblk.rs @@ -3,3 +3,831 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. // Copyright 2024 Oxide Computer Company + +use crate::engine::ingot_packet::QueryLen; +use crate::engine::packet::allocb; +#[cfg(any(feature = "std", test))] +use crate::engine::packet::mock_freemsg; +use crate::engine::packet::SegAdjustError; +use crate::engine::packet::WrapError; +use crate::engine::packet::WriteError; +use alloc::vec::Vec; +use core::marker::PhantomData; +use core::mem::ManuallyDrop; +use core::mem::MaybeUninit; +use core::ops::Deref; +use core::ops::DerefMut; +use core::ptr; +use core::ptr::NonNull; +use core::slice; +#[cfg(all(not(feature = "std"), not(test)))] +use illumos_sys_hdrs as ddi; +use illumos_sys_hdrs::mblk_t; +use illumos_sys_hdrs::uintptr_t; +use ingot::types::Emit; +use ingot::types::EmitDoesNotRelyOnBufContents; +use ingot::types::ParseError as IngotParseErr; +use ingot::types::Read; + +/// The head and tail of an mblk_t list. +struct MsgBlkChainInner { + head: NonNull, + tail: NonNull, +} + +/// A chain of illumos MsgBlk/`mblk_t` buffers. +/// +/// Network packets are provided by illumos as a linked list of linked lists, +/// using the `b_next` and `b_prev` fields. +/// +/// See the documentation for [`super::ingot_packet::Packet`] and/or [`MsgBlk`] for full context. +// TODO: We might retool this type now that MsgBlk does not decompose +// each mblk_t into individual segments (i.e., packets could be allocated +// a lifetime via PhantomData based on whether we want to remove them from the chain or modify in place). +// Today's code is all equivalent to always using 'static, because +// we remove and re-add the mblks to work on them. +// We might want also want to return either a chain/mblk_t in an enum, but +// practically XDE will always assume it has a chain from MAC. +pub struct MsgBlkChain { + inner: Option, +} + +impl MsgBlkChain { + /// Create an empty packet chain. + pub fn empty() -> Self { + Self { inner: None } + } + + /// Convert an mblk_t packet chain into a safe source of `MsgBlk`s. + /// + /// # Safety + /// The `mp` pointer must point to an `mblk_t` allocated by + /// `allocb(9F)` or provided by some kernel API which itself used + /// one of the DDI/DKI APIs to allocate it. + /// Packets must form a valid linked list (no loops). + /// The original mblk_t pointer must not be used again. + pub unsafe fn new(mp: *mut mblk_t) -> Result { + let head = NonNull::new(mp).ok_or(WrapError::NullPtr)?; + + // Walk the chain to find the tail, and support faster append. + let mut tail = head; + while let Some(next_ptr) = NonNull::new((*tail.as_ptr()).b_next) { + tail = next_ptr; + } + + Ok(Self { inner: Some(MsgBlkChainInner { head, tail }) }) + } + + /// Removes the next packet from the top of the chain and returns + /// it, taking ownership. + pub fn pop_front(&mut self) -> Option { + if let Some(ref mut list) = &mut self.inner { + unsafe { + let curr_b = list.head; + let curr = curr_b.as_ptr(); + let next = NonNull::new((*curr).b_next); + + // Break the forward link on the packet we have access to, + // and the backward link on the next element if possible. + if let Some(next) = next { + (*next.as_ptr()).b_prev = ptr::null_mut(); + } + (*curr).b_next = ptr::null_mut(); + + // Update the current head. If the next element is null, + // we're now empty. + if let Some(next) = next { + list.head = next; + } else { + self.inner = None; + } + + Some(MsgBlk { inner: curr_b }) + } + } else { + None + } + } + + /// Adds an owned `MsgBlk` to the end of this chain. + /// + /// Internally, this unwraps the `MsgBlk` back into an mblk_t, + /// before placing it at the tail. + pub fn append(&mut self, packet: MsgBlk) { + // Unwrap safety: a valid Packet implies a non-null mblk_t. + // Jamming `NonNull` into PacketSeg/Packet might take some + // work just to avoid this unwrap. + let pkt = packet.unwrap_mblk(); + + // We're guaranteeing today that a 'static Packet has + // no neighbours and is not part of a chain. + // This simplifies tail updates in both cases (no chain walk). + unsafe { + assert!((*pkt.as_ptr()).b_prev.is_null()); + assert!((*pkt.as_ptr()).b_next.is_null()); + } + + if let Some(ref mut list) = &mut self.inner { + let pkt_p = pkt.as_ptr(); + let tail_p = list.tail.as_ptr(); + unsafe { + (*tail_p).b_next = pkt_p; + (*pkt_p).b_prev = tail_p; + // pkt_p->b_next is already null. + } + list.tail = pkt; + } else { + self.inner = Some(MsgBlkChainInner { head: pkt, tail: pkt }); + } + } + + /// Return the head of the underlying `mblk_t` packet chain and + /// consume `self`. The caller of this function now owns the + /// `mblk_t` segment chain. + pub fn unwrap_mblk(mut self) -> Option> { + self.inner.take().map(|v| v.head) + } +} + +impl Drop for MsgBlkChain { + fn drop(&mut self) { + // This is a minor variation on MsgBlk's logic. illumos + // contains helper functions from STREAMS to just drop a whole + // chain. + cfg_if! { + if #[cfg(all(not(feature = "std"), not(test)))] { + // Safety: This is safe as long as the original + // `mblk_t` came from a call to `allocb(9F)` (or + // similar API). + if let Some(list) = &self.inner { + unsafe { ddi::freemsgchain(list.head.as_ptr()) }; + } + } else { + while let Some(pkt) = self.pop_front() { + drop(pkt); + } + } + } + } +} + +/// An individual illumos `mblk_t` -- a single bytestream +/// comprised of a linked list of data segments. +/// +/// To facilitate testing the OPTE core, [`MsgBlk`] is an abstraction for +/// manipulating network packets in both a `std` and `no_std` environment. +/// The first is useful for writing tests against the OPTE core engine and +/// executing them in userland, without the need for standing up a full-blown +/// virtual machine. +/// +/// The `no_std` implementation is used when running in-kernel. The +/// main difference is the `mblk_t` and `dblk_t` structures are coming +/// from viona (outbound/Tx) and mac (inbound/Rx), and we consume them +/// via [`Packet::wrap_mblk()`]. In reality this is typically holding +/// an Ethernet _frame_, but we prefer to use the colloquial +/// nomenclature of "packet". +#[derive(Debug)] +pub struct MsgBlk { + pub inner: NonNull, +} + +impl Deref for MsgBlk { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + unsafe { + let self_ref = self.inner.as_ref(); + let rptr = self_ref.b_rptr; + let len = self_ref.b_wptr.offset_from(rptr) as usize; + slice::from_raw_parts(rptr, len) + } + } +} + +impl DerefMut for MsgBlk { + fn deref_mut(&mut self) -> &mut Self::Target { + unsafe { + let self_ref = self.inner.as_mut(); + let rptr = self_ref.b_rptr; + let len = self_ref.b_wptr.offset_from(rptr) as usize; + slice::from_raw_parts_mut(rptr, len) + } + } +} + +#[derive(Debug)] +pub struct MsgBlkNode(mblk_t); + +impl Deref for MsgBlkNode { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + unsafe { + let rptr = self.0.b_rptr; + let len = self.0.b_wptr.offset_from(rptr) as usize; + slice::from_raw_parts(rptr, len) + } + } +} + +impl DerefMut for MsgBlkNode { + fn deref_mut(&mut self) -> &mut Self::Target { + unsafe { + let rptr = self.0.b_rptr; + let len = self.0.b_wptr.offset_from(rptr) as usize; + slice::from_raw_parts_mut(rptr, len) + } + } +} + +impl MsgBlkNode { + /// Shrink the writable/readable area by shifting the `b_rptr` by + /// `len`; effectively removing bytes from the start of the packet. + /// + /// # Errors + /// + /// `SegAdjustError::StartPastEnd`: Shifting the read pointer by + /// `len` would move `b_rptr` past `b_wptr`. + pub fn drop_front_bytes(&mut self, n: usize) -> Result<(), SegAdjustError> { + unsafe { + if self.0.b_wptr.offset_from(self.0.b_rptr) < n as isize { + return Err(SegAdjustError::StartPastEnd); + } + self.0.b_rptr = self.0.b_rptr.add(n); + } + + Ok(()) + } +} + +impl MsgBlk { + /// Allocate a new [`MsgBlk`] containing a data buffer of `len` + /// bytes. + /// + /// The returned packet consists of exactly one segment. + /// + /// In the kernel environment this uses `allocb(9F)` and + /// `freemsg(9F)` under the hood. + /// + /// In the `std` environment this uses a mock implementation of + /// `allocb(9F)` and `freeb(9F)`, which contains enough scaffolding + /// to satisfy OPTE's use of the underlying `mblk_t` and `dblk_t` + /// structures. + pub fn new(len: usize) -> Self { + let inner = NonNull::new(allocb(len)) + .expect("somehow failed to get an mblk..."); + + Self { inner } + } + + /// Allocates a new [`MsgBlk`] of size `buf.len()`, copying its + /// contents. + pub fn copy(buf: impl AsRef<[u8]>) -> Self { + let mut out = Self::new(buf.as_ref().len()); + // Unwrap safety -- just allocated length of input buffer. + out.write_bytes_back(buf).unwrap(); + out + } + + /// Creates a new [`MsgBlk`] using a given set of packet headers. + pub fn new_pkt(emit: impl Emit + EmitDoesNotRelyOnBufContents) -> Self { + let mut pkt = Self::new(emit.packet_length()); + pkt.emit_back(emit).unwrap(); + pkt + } + + /// Returns the number of bytes available for writing before + pub fn headroom(&self) -> usize { + unsafe { + let inner = self.inner.as_ref(); + + inner.b_rptr.offset_from((*inner.b_datap).db_base) as usize + } + } + + /// Creates a new [`MsgBlk`] containing a data buffer of `len` + /// bytes with 2B of headroom/alignment. + /// + /// This sets up 4B alignment on all post-ethernet headers. + pub fn new_ethernet(len: usize) -> Self { + Self::new_with_headroom(2, len) + } + + /// Creates a new [`MsgBlk`] using a given set of packet headers + /// with 2B of headroom/alignment. + /// + /// This sets up 4B alignment on all post-ethernet headers. + pub fn new_ethernet_pkt( + emit: impl Emit + EmitDoesNotRelyOnBufContents, + ) -> Self { + let mut pkt = Self::new_ethernet(emit.packet_length()); + pkt.emit_back(emit).unwrap(); + pkt + } + + /// Return the number of initialised bytes in this `MsgBlk` over + /// all linked segments. + pub fn byte_len(&self) -> usize { + self.iter().map(|el| el.len()).sum() + } + + /// Return the number of initialised bytes in this `MsgBlk` in + /// the head segment. + pub fn seg_len(&self) -> usize { + self.iter().count() + } + + /// Allocate a new [`MsgBlk`] containing a data buffer of size + /// `head_len + body_len`. + /// + /// The read/write pointer is set to have `head_len` bytes of + /// headroom and `body_len` bytes of capacity at the back. + pub fn new_with_headroom(head_len: usize, body_len: usize) -> Self { + let mut out = Self::new(head_len + body_len); + + // SAFETY: alloc is contiguous and always larger than head_len. + let mut_out = unsafe { out.inner.as_mut() }; + mut_out.b_rptr = unsafe { mut_out.b_rptr.add(head_len) }; + mut_out.b_wptr = mut_out.b_rptr; + + out + } + + /// Provides a slice of length `n_bytes` at the back of an [`MsgBlk`] + /// (if capacity exists) to be initialised, before increasing `len` + /// by `n_bytes`. + /// + /// # Safety + /// Users must write a value to every element of the `MaybeUninit` + /// buffer at least once in the `MsgBlk` lifecycle -- all `n_bytes` + /// are assumed to be initialised. + pub unsafe fn write_back( + &mut self, + n_bytes: usize, + f: impl FnOnce(&mut [MaybeUninit]), + ) -> Result<(), WriteError> { + let mut_out = unsafe { self.inner.as_mut() }; + let avail_bytes = + unsafe { (*mut_out.b_datap).db_lim.offset_from(mut_out.b_wptr) }; + + if avail_bytes < 0 || (avail_bytes as usize) < n_bytes { + return Err(WriteError::NotEnoughBytes { + available: avail_bytes.max(0) as usize, + needed: n_bytes, + }); + } + + let in_slice = unsafe { + slice::from_raw_parts_mut( + mut_out.b_wptr as *mut MaybeUninit, + n_bytes, + ) + }; + + f(in_slice); + + mut_out.b_wptr = unsafe { mut_out.b_wptr.add(n_bytes) }; + + Ok(()) + } + + /// Provides a slice of length `n_bytes` at the front of an [`MsgBlk`] + /// (if capacity exists) to be initialised, before increasing `len` + /// by `n_bytes`. + /// + /// # Safety + /// Users must write a value to every element of the `MaybeUninit` + /// buffer at least once in the `MsgBlk` lifecycle -- all `n_bytes` + /// are assumed to be initialised. + pub unsafe fn write_front( + &mut self, + n_bytes: usize, + f: impl FnOnce(&mut [MaybeUninit]), + ) -> Result<(), WriteError> { + let mut_out = unsafe { self.inner.as_mut() }; + let avail_bytes = + unsafe { mut_out.b_rptr.offset_from((*mut_out.b_datap).db_base) }; + + if avail_bytes < 0 || (avail_bytes as usize) < n_bytes { + return Err(WriteError::NotEnoughBytes { + available: avail_bytes.max(0) as usize, + needed: n_bytes, + }); + } + + let new_head = unsafe { mut_out.b_rptr.sub(n_bytes) }; + + let in_slice = unsafe { + slice::from_raw_parts_mut(new_head as *mut MaybeUninit, n_bytes) + }; + + f(in_slice); + + mut_out.b_rptr = new_head; + + Ok(()) + } + + /// Adjusts the write pointer for this MsgBlk, initialising any extra bytes to 0. + pub fn resize(&mut self, new_len: usize) -> Result<(), WriteError> { + let len = self.len(); + if new_len < len { + unsafe { + let mut_inner = self.inner.as_mut(); + mut_inner.b_wptr = mut_inner.b_wptr.sub(len - new_len); + } + Ok(()) + } else if new_len > len { + unsafe { + self.write_back(new_len - len, |v| { + // MaybeUninit::fill is unstable. + let n = v.len(); + v.as_mut_ptr().write_bytes(0, n); + }) + } + } else { + Ok(()) + } + } + + /// Emits an `ingot` packet after any bytes present in this mblk. + pub fn emit_back( + &mut self, + pkt: impl Emit + EmitDoesNotRelyOnBufContents, + ) -> Result<(), WriteError> { + unsafe { + self.write_back(pkt.packet_length(), |v| { + // Unwrap safety: write will return an Error if + // unsuccessful. + pkt.emit_uninit(v).unwrap(); + }) + } + } + + /// Emits an `ingot` packet before any bytes present in this mblk. + pub fn emit_front( + &mut self, + pkt: impl Emit + EmitDoesNotRelyOnBufContents, + ) -> Result<(), WriteError> { + unsafe { + self.write_front(pkt.packet_length(), |v| { + pkt.emit_uninit(v).unwrap(); + }) + } + } + + /// Copies a byte slice into the region after any bytes present in this mblk. + pub fn write_bytes_back( + &mut self, + bytes: impl AsRef<[u8]>, + ) -> Result<(), WriteError> { + let bytes = bytes.as_ref(); + unsafe { + self.write_back(bytes.len(), |v| { + // feat(maybe_uninit_write_slice) -> copy_from_slice + // is unstable. + let uninit_src: &[MaybeUninit] = + core::mem::transmute(bytes); + v.copy_from_slice(uninit_src); + }) + } + } + + /// Copies a byte slice into the region before any bytes present in this mblk. + pub fn write_bytes_front( + &mut self, + bytes: impl AsRef<[u8]>, + ) -> Result<(), WriteError> { + let bytes = bytes.as_ref(); + unsafe { + self.write_front(bytes.len(), |v| { + // feat(maybe_uninit_write_slice) -> copy_from_slice + // is unstable. + let uninit_src: &[MaybeUninit] = + core::mem::transmute(bytes); + v.copy_from_slice(uninit_src); + }) + } + } + + /// Places another `MsgBlk` at the end of this packet's + /// b_cont chain. + pub fn append(&mut self, other: Self) { + // Find the last element in the pkt chain + // i.e., whose b_cont is null. + let mut curr = self.inner.as_ptr(); + while unsafe { !(*curr).b_cont.is_null() } { + curr = unsafe { (*curr).b_cont }; + } + + unsafe { + (*curr).b_cont = other.unwrap_mblk().as_ptr(); + } + } + + /// Drop all bytes and move the cursor to the very back of the dblk. + pub fn pop_all(&mut self) { + unsafe { + (*self.inner.as_ptr()).b_rptr = + (*(*self.inner.as_ptr()).b_datap).db_lim; + (*self.inner.as_ptr()).b_wptr = + (*(*self.inner.as_ptr()).b_datap).db_lim; + } + } + + /// Returns a shared cursor over all segments in this `MsgBlk`. + pub fn iter(&self) -> MsgBlkIter { + MsgBlkIter { curr: Some(self.inner), marker: PhantomData } + } + + /// Returns a mutable cursor over all segments in this `MsgBlk`. + pub fn iter_mut(&mut self) -> MsgBlkIterMut { + MsgBlkIterMut { curr: Some(self.inner), marker: PhantomData } + } + + /// Return the pointer address of the underlying mblk_t. + /// + /// NOTE: This is purely to allow passing the pointer value up to + /// DTrace so that the mblk can be inspected (read only) in probe + /// context. + pub fn mblk_addr(&self) -> uintptr_t { + self.inner.as_ptr() as uintptr_t + } + + /// Return the head of the underlying `mblk_t` segment chain and + /// consume `self`. The caller of this function now owns the + /// `mblk_t` segment chain. + pub fn unwrap_mblk(self) -> NonNull { + let ptr_out = self.inner; + _ = ManuallyDrop::new(self); + ptr_out + } + + /// Wrap the `mblk_t` packet in a [`MsgBlk`], taking ownership of + /// the `mblk_t` packet as a result. An `mblk_t` packet consists + /// of one or more `mblk_t` segments chained together via + /// `b_cont`. When the [`MsgBlk`] is dropped, the + /// underlying `mblk_t` segment chain is freed. If you wish to + /// pass on ownership you must call the [`MsgBlk::unwrap_mblk()`] + /// function. + /// + /// # Safety + /// + /// The `mp` pointer must point to an `mblk_t` allocated by + /// `allocb(9F)` or provided by some kernel API which itself used + /// one of the DDI/DKI APIs to allocate it. + /// + /// # Errors + /// + /// * Return [`WrapError::NullPtr`] is `mp` is `NULL`. + /// * Return [`WrapError::Chain`] is `mp->b_next` or `mp->b_next` are set. + pub unsafe fn wrap_mblk(ptr: *mut mblk_t) -> Result { + let inner = NonNull::new(ptr).ok_or(WrapError::NullPtr)?; + let inner_ref = inner.as_ref(); + + if inner_ref.b_next.is_null() && inner_ref.b_prev.is_null() { + Ok(Self { inner }) + } else { + Err(WrapError::Chain) + } + } + + /// Copy out all bytes within this mblk and its successors + /// to a single contiguous buffer. + pub fn copy_all(&self) -> Vec { + let mut out = vec![]; + + for node in self.iter() { + out.extend_from_slice(node) + } + + out + } + + /// Drops all empty mblks from the start of this chain where possible + /// (i.e., any empty mblk is followed by another mblk). + pub fn drop_empty_segments(&mut self) { + // We should not be creating message block continuations to zero + // sized blocks. This is not a generally expected thing and has + // caused NIC hardware to stop working. + // Stripping these out where possible is necessary. + let mut head = self.inner; + let mut neighbour = unsafe { (*head.as_ptr()).b_cont }; + + while !neighbour.is_null() + && unsafe { (*head.as_ptr()).b_rptr == (*head.as_ptr()).b_wptr } + { + // Replace head with neighbour. + // Disconnect head from neighbour, and drop head. + unsafe { + (*head.as_ptr()).b_cont = ptr::null_mut(); + drop(MsgBlk::wrap_mblk(head.as_ptr())); + + // SAFETY: we know neighbour is non_null. + head = NonNull::new_unchecked(neighbour); + neighbour = (*head.as_ptr()).b_cont + } + } + + self.inner = head; + } +} + +#[derive(Debug)] +pub struct MsgBlkIter<'a> { + curr: Option>, + marker: PhantomData<&'a MsgBlk>, +} + +#[derive(Debug)] +pub struct MsgBlkIterMut<'a> { + curr: Option>, + marker: PhantomData<&'a mut MsgBlk>, +} + +impl<'a> MsgBlkIterMut<'a> { + pub fn next_iter(&self) -> MsgBlkIter { + let curr = self + .curr + .and_then(|ptr| NonNull::new(unsafe { ptr.as_ref() }.b_cont)); + MsgBlkIter { curr, marker: PhantomData } + } + + pub fn next_iter_mut(&mut self) -> MsgBlkIterMut { + let curr = self + .curr + .and_then(|ptr| NonNull::new(unsafe { ptr.as_ref() }.b_cont)); + MsgBlkIterMut { curr, marker: PhantomData } + } +} + +impl<'a> Iterator for MsgBlkIter<'a> { + type Item = &'a MsgBlkNode; + + fn next(&mut self) -> Option { + if let Some(ptr) = self.curr { + self.curr = NonNull::new(unsafe { (*ptr.as_ptr()).b_cont }); + // SAFETY: MsgBlkNode is identical to mblk_t. + unsafe { Some(&*(ptr.as_ptr() as *const MsgBlkNode)) } + } else { + None + } + } +} + +impl<'a> Read for MsgBlkIter<'a> { + type Chunk = &'a [u8]; + + fn next_chunk(&mut self) -> ingot::types::ParseResult { + self.next().ok_or(IngotParseErr::TooSmall).map(|v| v.as_ref()) + } +} + +impl<'a> Iterator for MsgBlkIterMut<'a> { + type Item = &'a mut MsgBlkNode; + + fn next(&mut self) -> Option { + if let Some(ptr) = self.curr { + self.curr = NonNull::new(unsafe { (*ptr.as_ptr()).b_cont }); + // SAFETY: MsgBlkNode is identical to mblk_t. + unsafe { Some(&mut *(ptr.as_ptr() as *mut MsgBlkNode)) } + } else { + None + } + } +} + +impl<'a> Read for MsgBlkIterMut<'a> { + type Chunk = &'a mut [u8]; + + fn next_chunk(&mut self) -> ingot::types::ParseResult { + self.next().ok_or(IngotParseErr::TooSmall).map(|v| v.as_mut()) + } +} + +impl<'a> QueryLen for MsgBlkIterMut<'a> { + #[inline] + fn len(&self) -> usize { + let own_blk_len = self + .curr + .map(|v| unsafe { + let v = v.as_ref(); + v.b_wptr.offset_from(v.b_rptr) as usize + }) + .unwrap_or_default(); + + own_blk_len + self.next_iter().map(|v| v.len()).sum::() + } +} + +/// For the `no_std`/illumos kernel environment, we want the `mblk_t` +/// drop to occur at the [`Packet`] level, where we can make use of +/// `freemsg(9F)`. +impl Drop for MsgBlk { + fn drop(&mut self) { + // Drop the segment chain if there is one. Consumers of MsgBlk + // will never own a packet with no segments. + // This guarantees that we only free the segment chain once. + cfg_if! { + if #[cfg(all(not(feature = "std"), not(test)))] { + // Safety: This is safe as long as the original + // `mblk_t` came from a call to `allocb(9F)` (or + // similar API). + unsafe { ddi::freemsg(self.inner.as_ptr()) }; + } else { + mock_freemsg(self.inner.as_ptr()); + } + } + } +} + +#[cfg(test)] +mod test { + fn create_linked_mblks(n: usize) -> Vec<*mut mblk_t> { + let mut els = vec![]; + for _ in 0..n { + els.push(allocb(8)); + } + + // connect the elements in a chain + for (lhs, rhs) in els.iter().zip(els[1..].iter()) { + unsafe { + (**lhs).b_next = *rhs; + (**rhs).b_prev = *lhs; + } + } + + els + } + + #[test] + fn chain_has_correct_ends() { + let els = create_linked_mblks(3); + + let chain = unsafe { MsgBlkChain::new(els[0]) }.unwrap(); + let chain_inner = chain.inner.as_ref().unwrap(); + assert_eq!(chain_inner.head.as_ptr(), els[0]); + assert_eq!(chain_inner.tail.as_ptr(), els[2]); + } + + #[test] + fn chain_breaks_links() { + let els = create_linked_mblks(3); + + let mut chain = unsafe { MsgBlkChain::new(els[0]) }.unwrap(); + + let p0 = chain.pop_front().unwrap(); + assert_eq!(p0.mblk_addr(), els[0] as uintptr_t); + unsafe { + assert!((*els[0]).b_prev.is_null()); + assert!((*els[0]).b_next.is_null()); + } + + // Chain head/tail ptrs are correct + let chain_inner = chain.inner.as_ref().unwrap(); + assert_eq!(chain_inner.head.as_ptr(), els[1]); + assert_eq!(chain_inner.tail.as_ptr(), els[2]); + unsafe { + assert!((*els[1]).b_prev.is_null()); + assert!((*els[2]).b_next.is_null()); + } + } + + #[test] + fn chain_append_links() { + let els = create_linked_mblks(3); + let new_el = allocb(8); + + let mut chain = unsafe { MsgBlkChain::new(els[0]) }.unwrap(); + let pkt = unsafe { Packet::wrap_mblk(new_el) }.unwrap(); + + chain.append(pkt); + + // Chain head/tail ptrs are correct + let chain_inner = chain.inner.as_ref().unwrap(); + assert_eq!(chain_inner.head.as_ptr(), els[0]); + assert_eq!(chain_inner.tail.as_ptr(), new_el); + + // Last el has been linked to the new pkt, and it has a valid + // backward link. + unsafe { + assert_eq!((*new_el).b_prev, els[2]); + assert!((*new_el).b_next.is_null()); + assert_eq!((*els[2]).b_next, new_el); + } + } + + #[test] + fn chain_drain_complete() { + let els = create_linked_mblks(64); + + let mut chain = unsafe { MsgBlkChain::new(els[0]) }.unwrap(); + + for i in 0..els.len() { + let pkt = chain.pop_front().unwrap(); + assert_eq!(pkt.mblk_addr(), els[i] as uintptr_t); + } + + assert!(chain.pop_front().is_none()); + } +} diff --git a/lib/opte/src/engine/arp.rs b/lib/opte/src/engine/arp.rs index bbabfe22..185b1f43 100644 --- a/lib/opte/src/engine/arp.rs +++ b/lib/opte/src/engine/arp.rs @@ -7,7 +7,7 @@ //! ARP headers and data. use super::ether::Ethernet; -use super::ingot_packet::MsgBlk; +use crate::ddi::mblk::MsgBlk; use core::fmt; use core::fmt::Display; use ingot::ethernet::Ethertype; diff --git a/lib/opte/src/engine/dhcp.rs b/lib/opte/src/engine/dhcp.rs index 6ad7b482..1d456aa3 100644 --- a/lib/opte/src/engine/dhcp.rs +++ b/lib/opte/src/engine/dhcp.rs @@ -8,7 +8,6 @@ use super::ether::Ethernet; use super::ingot_packet::MblkPacketData; -use super::ingot_packet::MsgBlk; use super::ip::v4::*; use super::predicate::DataPredicate; use super::predicate::EtherAddrMatch; @@ -19,6 +18,7 @@ use super::predicate::Predicate; use super::rule::AllowOrDeny; use super::rule::GenPacketResult; use super::rule::HairpinAction; +use crate::ddi::mblk::MsgBlk; use alloc::string::ToString; use alloc::vec::Vec; use core::fmt; diff --git a/lib/opte/src/engine/dhcpv6/protocol.rs b/lib/opte/src/engine/dhcpv6/protocol.rs index e7f19404..3bac64ea 100644 --- a/lib/opte/src/engine/dhcpv6/protocol.rs +++ b/lib/opte/src/engine/dhcpv6/protocol.rs @@ -2,12 +2,13 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2024 Oxide Computer Company //! Implementation of the main message types for DHCPv6. use super::Dhcpv6Action; use super::TransactionId; +use crate::ddi::mblk::MsgBlk; use crate::engine::dhcpv6::options::Code as OptionCode; use crate::engine::dhcpv6::options::IaAddr; use crate::engine::dhcpv6::options::IaNa; @@ -23,7 +24,6 @@ use crate::engine::dhcpv6::CLIENT_PORT; use crate::engine::dhcpv6::SERVER_PORT; use crate::engine::ether::Ethernet; use crate::engine::ingot_packet::MblkPacketData; -use crate::engine::ingot_packet::MsgBlk; use crate::engine::ip::v6::Ipv6; use crate::engine::ip::v6::Ipv6Ref; use crate::engine::predicate::DataPredicate; @@ -690,7 +690,7 @@ mod test { use super::OptionCode; use crate::engine::dhcpv6::test_data; use crate::engine::ingot_packet::MsgBlk; - use crate::engine::ingot_packet::Packet2; + use crate::engine::ingot_packet::Packet; use crate::engine::port::meta::ActionMeta; use crate::engine::GenericUlp; @@ -722,7 +722,7 @@ mod test { #[test] fn test_predicates_match_snooped_solicit_message() { let mut pkt = MsgBlk::copy(test_data::TEST_SOLICIT_PACKET); - let pkt = Packet2::new(pkt.iter_mut()) + let pkt = Packet::new(pkt.iter_mut()) .parse_outbound(GenericUlp {}) .unwrap() .to_full_meta(); diff --git a/lib/opte/src/engine/geneve.rs b/lib/opte/src/engine/geneve.rs index 559b7cea..fc910b81 100644 --- a/lib/opte/src/engine/geneve.rs +++ b/lib/opte/src/engine/geneve.rs @@ -316,8 +316,21 @@ pub fn geneve_opt_is_oxide_external( mod test { use core::matches; + use ingot::ethernet::Ethernet; + use ingot::ethernet::Ethertype; + use ingot::ip::IpProtocol; + use ingot::ip::Ipv6; + use ingot::types::Emit; + use ingot::types::HeaderParse; + use ingot::udp::UdpRef; + use ingot::udp::ValidUdp; + use super::*; + use crate::engine::headers::EncapMeta; + use crate::engine::ingot_packet::MsgBlk; + use crate::engine::ingot_packet::Packet; use crate::engine::packet::Packet; + use crate::engine::parse::ValidGeneveOverV6; #[test] fn emit_no_opts() { @@ -329,13 +342,9 @@ mod test { }; let len = geneve.hdr_len(); - let mut pkt = Packet::alloc_and_expand(len); - let mut wtr = pkt.seg0_wtr(); - geneve.emit( - geneve.hdr_len().try_into().unwrap(), - wtr.slice_mut(len).unwrap(), - ); + let emitted = EncapMeta::Geneve(geneve).emit_vec(); assert_eq!(len, pkt.len()); + #[rustfmt::skip] let expected_bytes = vec![ // source @@ -355,7 +364,7 @@ mod test { // vni + reserved 0x00, 0x04, 0xD2, 0x00 ]; - assert_eq!(&expected_bytes, pkt.seg_bytes(0)); + assert_eq!(expected_bytes, emitted); } #[test] @@ -367,13 +376,9 @@ mod test { }; let len = geneve.hdr_len(); - let mut pkt = Packet::alloc_and_expand(len); - let mut wtr = pkt.seg0_wtr(); - geneve.emit( - geneve.hdr_len().try_into().unwrap(), - wtr.slice_mut(len).unwrap(), - ); + let emitted = EncapMeta::Geneve(geneve).emit_vec(); assert_eq!(len, pkt.len()); + #[rustfmt::skip] let expected_bytes = vec![ // source @@ -400,7 +405,7 @@ mod test { // rsvd + len 0x00, ]; - assert_eq!(&expected_bytes, pkt.seg_bytes(0)); + assert_eq!(&expected_bytes, emitted); } #[test] @@ -432,64 +437,20 @@ mod test { // rsvd + len 0x00, ]; - let mut pkt = Packet::copy(&buf); - let mut reader = pkt.get_rdr_mut(); - let udp = UdpHdr::parse(&mut reader).unwrap(); - let header = GeneveHdr::parse(&mut reader).unwrap(); - - // Previously, the `Ipv6Meta::total_len` method double-counted the - // extension header length. Assert we don't do that here. - let meta = GeneveMeta::from((&udp, &header)); - assert_eq!( - meta.entropy, - u16::from_be_bytes(buf[0..2].try_into().unwrap()) - ); - assert!(meta.oxide_external_pkt); - } - #[test] - fn bad_opt_len_fails() { - // Create a packet with one extension header. - #[rustfmt::skip] - let buf = vec![ - // source - 0x1E, 0x61, - // dest - 0x17, 0xC1, - // length - 0x00, 0x14, - // csum - 0x00, 0x00, - // ver + BAD opt len - 0x01, - // flags - 0x00, - // proto - 0x65, 0x58, - // vni + reserved - 0x00, 0x04, 0xD2, 0x00, + let (.., rem) = ValidUdp::parse(&buf[..]).unwrap(); + let (geneve, ..) = ValidGeneve::parse(rem).unwrap(); - // option class - 0x01, 0x29, - // crt + type - 0x01, - // rsvd + len - 0x01, - // body - 0x00, 0x00, 0x00, 0x00 - ]; - let mut pkt = Packet::copy(&buf); - let mut reader = pkt.get_rdr_mut(); - UdpHdr::parse(&mut reader).unwrap(); - assert!(matches!( - GeneveHdr::parse(&mut reader), - Err(GeneveHdrError::BadLength { .. }), - )); + validate_geneve(&geneve).unwrap(); + + assert!(geneve_opt_is_oxide_external(&geneve)); } #[test] fn unknown_crit_option_fails() { - // Create a packet with one extension header. + // Create a packet with one extension header with the critical + // flag set. + // We do not unsdertand this extension, so must drop the packet. #[rustfmt::skip] let buf = vec![ // source @@ -516,21 +477,22 @@ mod test { // rsvd + len 0x00, ]; - let mut pkt = Packet::copy(&buf); - let mut reader = pkt.get_rdr_mut(); - UdpHdr::parse(&mut reader).unwrap(); + + let (_udp, _, rem) = ValidUdp::parse(&buf[..]).unwrap(); + let (geneve, ..) = ValidGeneve::parse(rem).unwrap(); + assert!(matches!( - GeneveHdr::parse(&mut reader), - Err(GeneveHdrError::UnknownCriticalOption { - class: 0xff_ff, - opt_type: 0 - }), + validate_geneve(&geneve), + Err(ParseError::UnrecognisedTunnelOpt { class: 0xffff, ty: 0x80 }), )); } #[test] fn parse_multi_opt() { - // Create a packet with one extension header. + // Create a packet with three extension headers. + // None are critical, so the fact that we + // We shoukld also be able to extract info on the options we *do* + // care about. #[rustfmt::skip] let buf = vec![ // source @@ -575,18 +537,11 @@ mod test { // body 0x00, 0x00, 0x00, 0x00, ]; - let mut pkt = Packet::copy(&buf); - let mut reader = pkt.get_rdr_mut(); - let udp = UdpHdr::parse(&mut reader).unwrap(); - let header = GeneveHdr::parse(&mut reader).unwrap(); - - // Previously, the `Ipv6Meta::total_len` method double-counted the - // extension header length. Assert we don't do that here. - let meta = GeneveMeta::from((&udp, &header)); - assert_eq!( - meta.entropy, - u16::from_be_bytes(buf[0..2].try_into().unwrap()) - ); - assert!(meta.oxide_external_pkt); + + let (.., rem) = ValidUdp::parse(&buf[..]).unwrap(); + let (geneve, ..) = ValidGeneve::parse(rem).unwrap(); + + validate_geneve(&geneve).unwrap(); + assert!(geneve_opt_is_oxide_external(&geneve)); } } diff --git a/lib/opte/src/engine/icmp/v4.rs b/lib/opte/src/engine/icmp/v4.rs index 0e210d95..abcb4773 100644 --- a/lib/opte/src/engine/icmp/v4.rs +++ b/lib/opte/src/engine/icmp/v4.rs @@ -2,14 +2,14 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2024 Oxide Computer Company //! ICMPv4 headers and processing. use super::*; +use crate::ddi::mblk::MsgBlk; use crate::engine::ether::Ethernet; use crate::engine::ingot_packet::MblkPacketData; -use crate::engine::ingot_packet::MsgBlk; use crate::engine::ip::v4::Ipv4; use crate::engine::ip::L3; use crate::engine::predicate::Ipv4AddrMatch; diff --git a/lib/opte/src/engine/icmp/v6.rs b/lib/opte/src/engine/icmp/v6.rs index c95016c6..d443303a 100644 --- a/lib/opte/src/engine/icmp/v6.rs +++ b/lib/opte/src/engine/icmp/v6.rs @@ -2,14 +2,14 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2024 Oxide Computer Company //! ICMPv6 headers and processing. use super::*; +use crate::ddi::mblk::MsgBlk; use crate::engine::ether::Ethernet; use crate::engine::ingot_packet::MblkPacketData; -use crate::engine::ingot_packet::MsgBlk; use crate::engine::ip::v6::Ipv6; use crate::engine::ip::v6::Ipv6Ref; use crate::engine::predicate::Ipv6AddrMatch; diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index 702940ef..16d32c9b 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -39,16 +39,12 @@ use super::ip::v6::Ipv6Ref; use super::ip::L3Repr; use super::ip::ValidL3; use super::ip::L3; -use super::packet::allocb; use super::packet::AddrPair; use super::packet::BodyTransform; use super::packet::BodyTransformError; use super::packet::InnerFlowId; use super::packet::PacketState; use super::packet::ParseError; -use super::packet::SegAdjustError; -use super::packet::WrapError; -use super::packet::WriteError; use super::packet::FLOW_ID_DEFAULT; use super::parse::NoEncap; use super::parse::Ulp; @@ -59,27 +55,21 @@ use super::rule::HdrTransform; use super::rule::HdrTransformError; use super::LightweightMeta; use super::NetworkParser; +use crate::ddi::mblk::MsgBlk; +use crate::ddi::mblk::MsgBlkIterMut; +use crate::ddi::mblk::MsgBlkNode; use crate::engine::geneve::valid_geneve_has_oxide_external; use crate::engine::geneve::GeneveMeta; -#[cfg(any(feature = "std", test))] -use crate::engine::packet::mock_freemsg; use alloc::boxed::Box; use alloc::sync::Arc; use alloc::vec::Vec; use core::cell::Cell; use core::hash::Hash; -use core::marker::PhantomData; -use core::mem::ManuallyDrop; -use core::mem::MaybeUninit; use core::ops::Deref; use core::ops::DerefMut; -use core::ptr; -use core::ptr::NonNull; -use core::slice; use core::sync::atomic::AtomicPtr; #[cfg(all(not(feature = "std"), not(test)))] use illumos_sys_hdrs as ddi; -use illumos_sys_hdrs::mblk_t; use illumos_sys_hdrs::uintptr_t; use ingot::ethernet::Ethertype; use ingot::geneve::Geneve; @@ -103,13 +93,11 @@ use ingot::tcp::TcpRef; use ingot::types::util::Repeated; use ingot::types::BoxedHeader; use ingot::types::Emit; -use ingot::types::EmitDoesNotRelyOnBufContents; use ingot::types::Header as IngotHeader; use ingot::types::HeaderLen; use ingot::types::HeaderParse; use ingot::types::InlineHeader; use ingot::types::NextLayer; -use ingot::types::ParseError as IngotParseErr; use ingot::types::Parsed as IngotParsed; use ingot::types::Read; use ingot::types::ToOwnedPacket; @@ -125,561 +113,6 @@ use zerocopy::ByteSlice; use zerocopy::ByteSliceMut; use zerocopy::IntoBytes; -/// An individual illumos `mblk_t` -- a single bytestream -/// comprised of a linked list of data segments. -/// -/// To facilitate testing the OPTE core, [`MsgBlk`] is an abstraction for -/// manipulating network packets in both a `std` and `no_std` environment. -/// The first is useful for writing tests against the OPTE core engine and -/// executing them in userland, without the need for standing up a full-blown -/// virtual machine. -/// -/// The `no_std` implementation is used when running in-kernel. The -/// main difference is the `mblk_t` and `dblk_t` structures are coming -/// from viona (outbound/Tx) and mac (inbound/Rx), and we consume them -/// via [`Packet::wrap_mblk()`]. In reality this is typically holding -/// an Ethernet _frame_, but we prefer to use the colloquial -/// nomenclature of "packet". -#[derive(Debug)] -pub struct MsgBlk { - pub inner: NonNull, -} - -impl Deref for MsgBlk { - type Target = [u8]; - - fn deref(&self) -> &Self::Target { - unsafe { - let self_ref = self.inner.as_ref(); - let rptr = self_ref.b_rptr; - let len = self_ref.b_wptr.offset_from(rptr) as usize; - slice::from_raw_parts(rptr, len) - } - } -} - -impl DerefMut for MsgBlk { - fn deref_mut(&mut self) -> &mut Self::Target { - unsafe { - let self_ref = self.inner.as_mut(); - let rptr = self_ref.b_rptr; - let len = self_ref.b_wptr.offset_from(rptr) as usize; - slice::from_raw_parts_mut(rptr, len) - } - } -} - -#[derive(Debug)] -pub struct MsgBlkNode(mblk_t); - -impl Deref for MsgBlkNode { - type Target = [u8]; - - fn deref(&self) -> &Self::Target { - unsafe { - let rptr = self.0.b_rptr; - let len = self.0.b_wptr.offset_from(rptr) as usize; - slice::from_raw_parts(rptr, len) - } - } -} - -impl DerefMut for MsgBlkNode { - fn deref_mut(&mut self) -> &mut Self::Target { - unsafe { - let rptr = self.0.b_rptr; - let len = self.0.b_wptr.offset_from(rptr) as usize; - slice::from_raw_parts_mut(rptr, len) - } - } -} - -impl MsgBlkNode { - /// Shrink the writable/readable area by shifting the `b_rptr` by - /// `len`; effectively removing bytes from the start of the packet. - /// - /// # Errors - /// - /// `SegAdjustError::StartPastEnd`: Shifting the read pointer by - /// `len` would move `b_rptr` past `b_wptr`. - pub fn drop_front_bytes(&mut self, n: usize) -> Result<(), SegAdjustError> { - unsafe { - if self.0.b_wptr.offset_from(self.0.b_rptr) < n as isize { - return Err(SegAdjustError::StartPastEnd); - } - self.0.b_rptr = self.0.b_rptr.add(n); - } - - Ok(()) - } -} - -impl MsgBlk { - /// Allocate a new [`MsgBlk`] containing a data buffer of `len` - /// bytes. - /// - /// The returned packet consists of exactly one segment. - /// - /// In the kernel environment this uses `allocb(9F)` and - /// `freemsg(9F)` under the hood. - /// - /// In the `std` environment this uses a mock implementation of - /// `allocb(9F)` and `freeb(9F)`, which contains enough scaffolding - /// to satisfy OPTE's use of the underlying `mblk_t` and `dblk_t` - /// structures. - pub fn new(len: usize) -> Self { - let inner = NonNull::new(allocb(len)) - .expect("somehow failed to get an mblk..."); - - Self { inner } - } - - /// Allocates a new [`MsgBlk`] of size `buf.len()`, copying its - /// contents. - pub fn copy(buf: impl AsRef<[u8]>) -> Self { - let mut out = Self::new(buf.as_ref().len()); - // Unwrap safety -- just allocated length of input buffer. - out.write_bytes_back(buf).unwrap(); - out - } - - /// Creates a new [`MsgBlk`] using a given set of packet headers. - pub fn new_pkt(emit: impl Emit + EmitDoesNotRelyOnBufContents) -> Self { - let mut pkt = Self::new(emit.packet_length()); - pkt.emit_back(emit).unwrap(); - pkt - } - - /// Returns the number of bytes available for writing before - pub fn headroom(&self) -> usize { - unsafe { - let inner = self.inner.as_ref(); - - inner.b_rptr.offset_from((*inner.b_datap).db_base) as usize - } - } - - /// Creates a new [`MsgBlk`] containing a data buffer of `len` - /// bytes with 2B of headroom/alignment. - /// - /// This sets up 4B alignment on all post-ethernet headers. - pub fn new_ethernet(len: usize) -> Self { - Self::new_with_headroom(2, len) - } - - /// Creates a new [`MsgBlk`] using a given set of packet headers - /// with 2B of headroom/alignment. - /// - /// This sets up 4B alignment on all post-ethernet headers. - pub fn new_ethernet_pkt( - emit: impl Emit + EmitDoesNotRelyOnBufContents, - ) -> Self { - let mut pkt = Self::new_ethernet(emit.packet_length()); - pkt.emit_back(emit).unwrap(); - pkt - } - - /// Return the number of initialised bytes in this `MsgBlk` over - /// all linked segments. - pub fn byte_len(&self) -> usize { - self.iter().map(|el| el.len()).sum() - } - - /// Return the number of initialised bytes in this `MsgBlk` in - /// the head segment. - pub fn seg_len(&self) -> usize { - self.iter().count() - } - - /// Allocate a new [`MsgBlk`] containing a data buffer of size - /// `head_len + body_len`. - /// - /// The read/write pointer is set to have `head_len` bytes of - /// headroom and `body_len` bytes of capacity at the back. - pub fn new_with_headroom(head_len: usize, body_len: usize) -> Self { - let mut out = Self::new(head_len + body_len); - - // SAFETY: alloc is contiguous and always larger than head_len. - let mut_out = unsafe { out.inner.as_mut() }; - mut_out.b_rptr = unsafe { mut_out.b_rptr.add(head_len) }; - mut_out.b_wptr = mut_out.b_rptr; - - out - } - - /// Provides a slice of length `n_bytes` at the back of an [`MsgBlk`] - /// (if capacity exists) to be initialised, before increasing `len` - /// by `n_bytes`. - /// - /// # Safety - /// Users must write a value to every element of the `MaybeUninit` - /// buffer at least once in the `MsgBlk` lifecycle -- all `n_bytes` - /// are assumed to be initialised. - pub unsafe fn write_back( - &mut self, - n_bytes: usize, - f: impl FnOnce(&mut [MaybeUninit]), - ) -> Result<(), WriteError> { - let mut_out = unsafe { self.inner.as_mut() }; - let avail_bytes = - unsafe { (*mut_out.b_datap).db_lim.offset_from(mut_out.b_wptr) }; - - if avail_bytes < 0 || (avail_bytes as usize) < n_bytes { - return Err(WriteError::NotEnoughBytes { - available: avail_bytes.max(0) as usize, - needed: n_bytes, - }); - } - - let in_slice = unsafe { - slice::from_raw_parts_mut( - mut_out.b_wptr as *mut MaybeUninit, - n_bytes, - ) - }; - - f(in_slice); - - mut_out.b_wptr = unsafe { mut_out.b_wptr.add(n_bytes) }; - - Ok(()) - } - - /// Provides a slice of length `n_bytes` at the front of an [`MsgBlk`] - /// (if capacity exists) to be initialised, before increasing `len` - /// by `n_bytes`. - /// - /// # Safety - /// Users must write a value to every element of the `MaybeUninit` - /// buffer at least once in the `MsgBlk` lifecycle -- all `n_bytes` - /// are assumed to be initialised. - pub unsafe fn write_front( - &mut self, - n_bytes: usize, - f: impl FnOnce(&mut [MaybeUninit]), - ) -> Result<(), WriteError> { - let mut_out = unsafe { self.inner.as_mut() }; - let avail_bytes = - unsafe { mut_out.b_rptr.offset_from((*mut_out.b_datap).db_base) }; - - if avail_bytes < 0 || (avail_bytes as usize) < n_bytes { - return Err(WriteError::NotEnoughBytes { - available: avail_bytes.max(0) as usize, - needed: n_bytes, - }); - } - - let new_head = unsafe { mut_out.b_rptr.sub(n_bytes) }; - - let in_slice = unsafe { - slice::from_raw_parts_mut(new_head as *mut MaybeUninit, n_bytes) - }; - - f(in_slice); - - mut_out.b_rptr = new_head; - - Ok(()) - } - - /// Adjusts the write pointer for this MsgBlk, initialising any extra bytes to 0. - pub fn resize(&mut self, new_len: usize) -> Result<(), WriteError> { - let len = self.len(); - if new_len < len { - unsafe { - let mut_inner = self.inner.as_mut(); - mut_inner.b_wptr = mut_inner.b_wptr.sub(len - new_len); - } - Ok(()) - } else if new_len > len { - unsafe { - self.write_back(new_len - len, |v| { - // MaybeUninit::fill is unstable. - let n = v.len(); - v.as_mut_ptr().write_bytes(0, n); - }) - } - } else { - Ok(()) - } - } - - /// Emits an `ingot` packet after any bytes present in this mblk. - pub fn emit_back( - &mut self, - pkt: impl Emit + EmitDoesNotRelyOnBufContents, - ) -> Result<(), WriteError> { - unsafe { - self.write_back(pkt.packet_length(), |v| { - // Unwrap safety: write will return an Error if - // unsuccessful. - pkt.emit_uninit(v).unwrap(); - }) - } - } - - /// Emits an `ingot` packet before any bytes present in this mblk. - pub fn emit_front( - &mut self, - pkt: impl Emit + EmitDoesNotRelyOnBufContents, - ) -> Result<(), WriteError> { - unsafe { - self.write_front(pkt.packet_length(), |v| { - pkt.emit_uninit(v).unwrap(); - }) - } - } - - /// Copies a byte slice into the region after any bytes present in this mblk. - pub fn write_bytes_back( - &mut self, - bytes: impl AsRef<[u8]>, - ) -> Result<(), WriteError> { - let bytes = bytes.as_ref(); - unsafe { - self.write_back(bytes.len(), |v| { - // feat(maybe_uninit_write_slice) -> copy_from_slice - // is unstable. - let uninit_src: &[MaybeUninit] = - core::mem::transmute(bytes); - v.copy_from_slice(uninit_src); - }) - } - } - - /// Copies a byte slice into the region before any bytes present in this mblk. - pub fn write_bytes_front( - &mut self, - bytes: impl AsRef<[u8]>, - ) -> Result<(), WriteError> { - let bytes = bytes.as_ref(); - unsafe { - self.write_front(bytes.len(), |v| { - // feat(maybe_uninit_write_slice) -> copy_from_slice - // is unstable. - let uninit_src: &[MaybeUninit] = - core::mem::transmute(bytes); - v.copy_from_slice(uninit_src); - }) - } - } - - /// Places another `MsgBlk` at the end of this packet's - /// b_cont chain. - pub fn append(&mut self, other: Self) { - // Find the last element in the pkt chain - // i.e., whose b_cont is null. - let mut curr = self.inner.as_ptr(); - while unsafe { !(*curr).b_cont.is_null() } { - curr = unsafe { (*curr).b_cont }; - } - - unsafe { - (*curr).b_cont = other.unwrap_mblk().as_ptr(); - } - } - - /// Drop all bytes and move the cursor to the very back of the dblk. - pub fn pop_all(&mut self) { - unsafe { - (*self.inner.as_ptr()).b_rptr = - (*(*self.inner.as_ptr()).b_datap).db_lim; - (*self.inner.as_ptr()).b_wptr = - (*(*self.inner.as_ptr()).b_datap).db_lim; - } - } - - /// Returns a shared cursor over all segments in this `MsgBlk`. - pub fn iter(&self) -> MsgBlkIter { - MsgBlkIter { curr: Some(self.inner), marker: PhantomData } - } - - /// Returns a mutable cursor over all segments in this `MsgBlk`. - pub fn iter_mut(&mut self) -> MsgBlkIterMut { - MsgBlkIterMut { curr: Some(self.inner), marker: PhantomData } - } - - /// Return the pointer address of the underlying mblk_t. - /// - /// NOTE: This is purely to allow passing the pointer value up to - /// DTrace so that the mblk can be inspected (read only) in probe - /// context. - pub fn mblk_addr(&self) -> uintptr_t { - self.inner.as_ptr() as uintptr_t - } - - /// Return the head of the underlying `mblk_t` segment chain and - /// consume `self`. The caller of this function now owns the - /// `mblk_t` segment chain. - pub fn unwrap_mblk(self) -> NonNull { - let ptr_out = self.inner; - _ = ManuallyDrop::new(self); - ptr_out - } - - /// Wrap the `mblk_t` packet in a [`MsgBlk`], taking ownership of - /// the `mblk_t` packet as a result. An `mblk_t` packet consists - /// of one or more `mblk_t` segments chained together via - /// `b_cont`. When the [`MsgBlk`] is dropped, the - /// underlying `mblk_t` segment chain is freed. If you wish to - /// pass on ownership you must call the [`MsgBlk::unwrap_mblk()`] - /// function. - /// - /// # Safety - /// - /// The `mp` pointer must point to an `mblk_t` allocated by - /// `allocb(9F)` or provided by some kernel API which itself used - /// one of the DDI/DKI APIs to allocate it. - /// - /// # Errors - /// - /// * Return [`WrapError::NullPtr`] is `mp` is `NULL`. - /// * Return [`WrapError::Chain`] is `mp->b_next` or `mp->b_next` are set. - pub unsafe fn wrap_mblk(ptr: *mut mblk_t) -> Result { - let inner = NonNull::new(ptr).ok_or(WrapError::NullPtr)?; - let inner_ref = inner.as_ref(); - - if inner_ref.b_next.is_null() && inner_ref.b_prev.is_null() { - Ok(Self { inner }) - } else { - Err(WrapError::Chain) - } - } - - /// Copy out all bytes within this mblk and its successors - /// to a single contiguous buffer. - pub fn copy_all(&self) -> Vec { - let mut out = vec![]; - - for node in self.iter() { - out.extend_from_slice(node) - } - - out - } - - /// Drops all empty mblks from the start of this chain where possible - /// (i.e., any empty mblk is followed by another mblk). - pub fn drop_empty_segments(&mut self) { - // We should not be creating message block continuations to zero - // sized blocks. This is not a generally expected thing and has - // caused NIC hardware to stop working. - // Stripping these out where possible is necessary. - let mut head = self.inner; - let mut neighbour = unsafe { (*head.as_ptr()).b_cont }; - - while !neighbour.is_null() - && unsafe { (*head.as_ptr()).b_rptr == (*head.as_ptr()).b_wptr } - { - // Replace head with neighbour. - // Disconnect head from neighbour, and drop head. - unsafe { - (*head.as_ptr()).b_cont = ptr::null_mut(); - drop(MsgBlk::wrap_mblk(head.as_ptr())); - - // SAFETY: we know neighbour is non_null. - head = NonNull::new_unchecked(neighbour); - neighbour = (*head.as_ptr()).b_cont - } - } - - self.inner = head; - } -} - -#[derive(Debug)] -pub struct MsgBlkIter<'a> { - curr: Option>, - marker: PhantomData<&'a MsgBlk>, -} - -#[derive(Debug)] -pub struct MsgBlkIterMut<'a> { - curr: Option>, - marker: PhantomData<&'a mut MsgBlk>, -} - -impl<'a> MsgBlkIterMut<'a> { - pub fn next_iter(&self) -> MsgBlkIter { - let curr = self - .curr - .and_then(|ptr| NonNull::new(unsafe { ptr.as_ref() }.b_cont)); - MsgBlkIter { curr, marker: PhantomData } - } - - pub fn next_iter_mut(&mut self) -> MsgBlkIterMut { - let curr = self - .curr - .and_then(|ptr| NonNull::new(unsafe { ptr.as_ref() }.b_cont)); - MsgBlkIterMut { curr, marker: PhantomData } - } -} - -impl<'a> Iterator for MsgBlkIter<'a> { - type Item = &'a MsgBlkNode; - - fn next(&mut self) -> Option { - if let Some(ptr) = self.curr { - self.curr = NonNull::new(unsafe { (*ptr.as_ptr()).b_cont }); - // SAFETY: MsgBlkNode is identical to mblk_t. - unsafe { Some(&*(ptr.as_ptr() as *const MsgBlkNode)) } - } else { - None - } - } -} - -impl<'a> Read for MsgBlkIter<'a> { - type Chunk = &'a [u8]; - - fn next_chunk(&mut self) -> ingot::types::ParseResult { - self.next().ok_or(IngotParseErr::TooSmall).map(|v| v.as_ref()) - } -} - -impl<'a> Iterator for MsgBlkIterMut<'a> { - type Item = &'a mut MsgBlkNode; - - fn next(&mut self) -> Option { - if let Some(ptr) = self.curr { - self.curr = NonNull::new(unsafe { (*ptr.as_ptr()).b_cont }); - // SAFETY: MsgBlkNode is identical to mblk_t. - unsafe { Some(&mut *(ptr.as_ptr() as *mut MsgBlkNode)) } - } else { - None - } - } -} - -impl<'a> Read for MsgBlkIterMut<'a> { - type Chunk = &'a mut [u8]; - - fn next_chunk(&mut self) -> ingot::types::ParseResult { - self.next().ok_or(IngotParseErr::TooSmall).map(|v| v.as_mut()) - } -} - -/// For the `no_std`/illumos kernel environment, we want the `mblk_t` -/// drop to occur at the [`Packet`] level, where we can make use of -/// `freemsg(9F)`. -impl Drop for MsgBlk { - fn drop(&mut self) { - // Drop the segment chain if there is one. Consumers of MsgBlk - // will never own a packet with no segments. - // This guarantees that we only free the segment chain once. - cfg_if! { - if #[cfg(all(not(feature = "std"), not(test)))] { - // Safety: This is safe as long as the original - // `mblk_t` came from a call to `allocb(9F)` (or - // similar API). - unsafe { ddi::freemsg(self.inner.as_ptr()) }; - } else { - mock_freemsg(self.inner.as_ptr()); - } - } - } -} - pub struct OpteUnifiedLengths { pub outer_eth: usize, pub outer_l3: usize, @@ -1239,11 +672,11 @@ impl From<&PacketData> for InnerFlowId { // is a bridge too far for the `ingot` datapath rewrite. This might have // value in future. #[derive(Debug)] -pub struct Packet2 { +pub struct Packet { state: S, } -impl Packet2> { +impl Packet> { pub fn new(pkt: T) -> Self where Initialized2: PacketState, @@ -1253,7 +686,7 @@ impl Packet2> { } } -impl<'a, T: Read + 'a> Packet2> +impl<'a, T: Read + 'a> Packet> where T::Chunk: ingot::types::IntoBufPointer<'a> + ByteSliceMut, { @@ -1268,36 +701,36 @@ where pub fn parse_inbound( self, net: NP, - ) -> Result>>, ParseError> { - let Packet2 { state: Initialized2 { len, inner } } = self; + ) -> Result>>, ParseError> { + let Packet { state: Initialized2 { len, inner } } = self; let meta = net.parse_inbound(inner)?; meta.stack.validate(len)?; - Ok(Packet2 { state: LiteParsed { meta, len } }) + Ok(Packet { state: LiteParsed { meta, len } }) } #[inline] pub fn parse_outbound( self, net: NP, - ) -> Result>>, ParseError> { - let Packet2 { state: Initialized2 { len, inner } } = self; + ) -> Result>>, ParseError> { + let Packet { state: Initialized2 { len, inner } } = self; let meta = net.parse_outbound(inner)?; meta.stack.validate(len)?; - Ok(Packet2 { state: LiteParsed { meta, len } }) + Ok(Packet { state: LiteParsed { meta, len } }) } } -impl<'a, T: Read + 'a, M: LightweightMeta> Packet2> +impl<'a, T: Read + 'a, M: LightweightMeta> Packet> where T::Chunk: ingot::types::IntoBufPointer<'a>, { #[inline] - pub fn to_full_meta(self) -> Packet2> { - let Packet2 { state: LiteParsed { len, meta } } = self; + pub fn to_full_meta(self) -> Packet> { + let Packet { state: LiteParsed { len, meta } } = self; let IngotParsed { stack: headers, data, last_chunk } = meta; // TODO: we can probably not do this in some cases, but we @@ -1324,7 +757,7 @@ where }; let meta = Box::new(PacketData { headers, initial_lens, body }); - Packet2 { + Packet { state: FullParsed { meta, flow, @@ -1358,7 +791,7 @@ where } } -impl Packet2> { +impl Packet> { pub fn meta(&self) -> &PacketData { &self.state.meta } @@ -1961,21 +1394,6 @@ pub trait QueryLen { fn len(&self) -> usize; } -impl<'a> QueryLen for MsgBlkIterMut<'a> { - #[inline] - fn len(&self) -> usize { - let own_blk_len = self - .curr - .map(|v| unsafe { - let v = v.as_ref(); - v.b_wptr.offset_from(v.b_rptr) as usize - }) - .unwrap_or_default(); - - own_blk_len + self.next_iter().map(|v| v.len()).sum::() - } -} - // TODO: don't really care about pushing 'inner' reprs today. #[derive(Clone, Debug, Default)] pub struct OpteEmit { diff --git a/lib/opte/src/engine/layer.rs b/lib/opte/src/engine/layer.rs index 1843d7a9..13e5732e 100644 --- a/lib/opte/src/engine/layer.rs +++ b/lib/opte/src/engine/layer.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2024 Oxide Computer Company //! A layer in a port. @@ -13,8 +13,7 @@ use super::flow_table::FlowTableDump; use super::flow_table::FLOW_DEF_EXPIRE_SECS; use super::ingot_packet::MblkFullParsed; use super::ingot_packet::MblkPacketData; -use super::ingot_packet::MsgBlk; -use super::ingot_packet::Packet2; +use super::ingot_packet::Packet; use super::ioctl; use super::ioctl::ActionDescEntryDump; use super::packet::BodyTransformError; @@ -38,6 +37,7 @@ use crate::ddi::kstat; use crate::ddi::kstat::KStatNamed; use crate::ddi::kstat::KStatProvider; use crate::ddi::kstat::KStatU64; +use crate::ddi::mblk::MsgBlk; use crate::ddi::time::Moment; use crate::ExecCtx; use crate::LogLevel; @@ -798,7 +798,7 @@ impl Layer { &mut self, ectx: &ExecCtx, dir: Direction, - pkt: &mut Packet2, + pkt: &mut Packet, xforms: &mut Transforms, ameta: &mut ActionMeta, ) -> result::Result { @@ -816,7 +816,7 @@ impl Layer { fn process_in( &mut self, ectx: &ExecCtx, - pkt: &mut Packet2, + pkt: &mut Packet, xforms: &mut Transforms, ameta: &mut ActionMeta, ) -> result::Result { @@ -886,7 +886,7 @@ impl Layer { fn process_in_rules( &mut self, ectx: &ExecCtx, - pkt: &mut Packet2, + pkt: &mut Packet, xforms: &mut Transforms, ameta: &mut ActionMeta, ) -> result::Result { @@ -1103,7 +1103,7 @@ impl Layer { fn process_out( &mut self, ectx: &ExecCtx, - pkt: &mut Packet2, + pkt: &mut Packet, xforms: &mut Transforms, ameta: &mut ActionMeta, ) -> result::Result { @@ -1173,7 +1173,7 @@ impl Layer { fn process_out_rules( &mut self, ectx: &ExecCtx, - pkt: &mut Packet2, + pkt: &mut Packet, xforms: &mut Transforms, ameta: &mut ActionMeta, ) -> result::Result { @@ -1833,7 +1833,7 @@ mod test { use ingot::tcp::Tcp; use ingot::types::HeaderLen; - use crate::engine::ingot_base::Ipv4; + use crate::engine::ip::v4::Ipv4; use crate::engine::GenericUlp; use super::*; @@ -1874,7 +1874,7 @@ mod test { }, )); - let pkt_view = Packet2::new(test_pkt.iter_mut()); + let pkt_view = Packet::new(test_pkt.iter_mut()); let pmeta = pkt_view.parse_outbound(GenericUlp {}).unwrap().to_full_meta(); diff --git a/lib/opte/src/engine/mod.rs b/lib/opte/src/engine/mod.rs index 2c8fa6f1..146e1319 100644 --- a/lib/opte/src/engine/mod.rs +++ b/lib/opte/src/engine/mod.rs @@ -39,14 +39,14 @@ pub mod udp; pub mod ingot_packet; +use crate::ddi::mblk::MsgBlk; use checksum::Checksum; use ingot::tcp::TcpRef; use ingot::types::Read; use ingot_packet::FullParsed; -use ingot_packet::MsgBlk; use ingot_packet::OpteMeta; use ingot_packet::OpteParsed2; -use ingot_packet::Packet2; +use ingot_packet::Packet; pub use opte_api::Direction; use parse::ValidNoEncap; use rule::CompiledTransform; @@ -221,7 +221,7 @@ pub trait NetworkImpl { fn handle_pkt( &self, dir: Direction, - pkt: &mut Packet2>, + pkt: &mut Packet>, uft_in: &FlowTable>, uft_out: &FlowTable>, ) -> Result diff --git a/lib/opte/src/engine/nat.rs b/lib/opte/src/engine/nat.rs index 1d6c56d9..f1e87abd 100644 --- a/lib/opte/src/engine/nat.rs +++ b/lib/opte/src/engine/nat.rs @@ -9,7 +9,7 @@ use super::headers::HeaderAction; use super::headers::IpMod; use super::ingot_packet::MblkFullParsed; -use super::ingot_packet::Packet2; +use super::ingot_packet::Packet; use super::packet::InnerFlowId; use super::port::meta::ActionMeta; use super::predicate::DataPredicate; @@ -85,7 +85,7 @@ impl StatefulAction for OutboundNat { fn gen_desc( &self, flow_id: &InnerFlowId, - _pkt: &Packet2, + _pkt: &Packet, _meta: &mut ActionMeta, ) -> rule::GenDescResult { // When we have several external IPs at our disposal, we are @@ -148,7 +148,7 @@ impl StatefulAction for InboundNat { fn gen_desc( &self, flow_id: &InnerFlowId, - _pkt: &Packet2, + _pkt: &Packet, _meta: &mut ActionMeta, ) -> rule::GenDescResult { // We rely on the attached predicates to filter out IPs which are *not* @@ -216,11 +216,11 @@ impl ActionDesc for NatDesc { mod test { use super::*; - use crate::engine::ingot_base::Ethernet; - use crate::engine::ingot_base::EthernetRef; - use crate::engine::ingot_base::Ipv4; - use crate::engine::ingot_base::Ipv4Ref; - use crate::engine::ingot_packet::MsgBlk; + use crate::engine::ether::Ethernet; + use crate::engine::ether::EthernetRef; + use crate::engine::ip::v4::Ipv4; + use crate::engine::ip::v4::Ipv4Ref; + use crate::ddi::mblk::MsgBlk; use crate::engine::GenericUlp; use ingot::ethernet::Ethertype; use ingot::ip::IpProtocol; @@ -278,7 +278,7 @@ mod test { }; let mut pkt_m = MsgBlk::new_ethernet_pkt((ð, &ip4, &tcp, &body)); - let mut pkt = Packet2::new(pkt_m.iter_mut()) + let mut pkt = Packet::new(pkt_m.iter_mut()) .parse_outbound(GenericUlp {}) .unwrap() .to_full_meta(); @@ -347,7 +347,7 @@ mod test { }; let mut pkt_m = MsgBlk::new_ethernet_pkt((ð, &ip4, &tcp, &body)); - let mut pkt = Packet2::new(pkt_m.iter_mut()) + let mut pkt = Packet::new(pkt_m.iter_mut()) .parse_inbound(GenericUlp {}) .unwrap() .to_full_meta(); diff --git a/lib/opte/src/engine/packet.rs b/lib/opte/src/engine/packet.rs index 3d654810..d5c04e63 100644 --- a/lib/opte/src/engine/packet.rs +++ b/lib/opte/src/engine/packet.rs @@ -14,7 +14,6 @@ use super::headers::IpAddr; use super::headers::AF_INET; use super::headers::AF_INET6; -use super::ingot_packet::MsgBlk; use super::ip::v4::Ipv4Addr; use super::ip::v4::Protocol; use super::ip::v6::Ipv6Addr; @@ -24,7 +23,6 @@ use core::fmt; use core::fmt::Display; use core::hash::Hash; use core::ptr; -use core::ptr::NonNull; use core::result; use crc32fast::Hasher; use dyn_clone::DynClone; @@ -178,148 +176,6 @@ impl Display for InnerFlowId { } } -/// The head and tail of an mblk_t list. -struct PacketChainInner { - head: NonNull, - tail: NonNull, -} - -/// A chain of network packets. -/// -/// Network packets are provided by illumos as a linked list, using -/// the `b_next` and `b_prev` fields. -/// -/// See the documentation for [`Packet`] and/or [`MsgBlk`] for full context. -// TODO: We might retool this type now that MsgBlk does not decompose -// each mblk_t into individual segments (i.e., packets could be allocated -// a lifetime via PhantomData based on whether we want to remove them from the chain or modify in place). -// Today's code is all equivalent to always using 'static, because -// we remove and re-add the mblks to work on them. -// We might want also want to return either a chain/mblk_t in an enum, but -// practically XDE will always assume it has a chain from MAC. -pub struct PacketChain { - inner: Option, -} - -impl PacketChain { - /// Create an empty packet chain. - pub fn empty() -> Self { - Self { inner: None } - } - - /// Convert an mblk_t packet chain into a safe source of `MsgBlk`s. - /// - /// # Safety - /// The `mp` pointer must point to an `mblk_t` allocated by - /// `allocb(9F)` or provided by some kernel API which itself used - /// one of the DDI/DKI APIs to allocate it. - /// Packets must form a valid linked list (no loops). - /// The original mblk_t pointer must not be used again. - pub unsafe fn new(mp: *mut mblk_t) -> Result { - let head = NonNull::new(mp).ok_or(WrapError::NullPtr)?; - - // Walk the chain to find the tail, and support faster append. - let mut tail = head; - while let Some(next_ptr) = NonNull::new((*tail.as_ptr()).b_next) { - tail = next_ptr; - } - - Ok(Self { inner: Some(PacketChainInner { head, tail }) }) - } - - /// Removes the next packet from the top of the chain and returns - /// it, taking ownership. - pub fn pop_front(&mut self) -> Option { - if let Some(ref mut list) = &mut self.inner { - unsafe { - let curr_b = list.head; - let curr = curr_b.as_ptr(); - let next = NonNull::new((*curr).b_next); - - // Break the forward link on the packet we have access to, - // and the backward link on the next element if possible. - if let Some(next) = next { - (*next.as_ptr()).b_prev = ptr::null_mut(); - } - (*curr).b_next = ptr::null_mut(); - - // Update the current head. If the next element is null, - // we're now empty. - if let Some(next) = next { - list.head = next; - } else { - self.inner = None; - } - - Some(MsgBlk { inner: curr_b }) - } - } else { - None - } - } - - /// Adds an owned `MsgBlk` to the end of this chain. - /// - /// Internally, this unwraps the `MsgBlk` back into an mblk_t, - /// before placing it at the tail. - pub fn append(&mut self, packet: MsgBlk) { - // Unwrap safety: a valid Packet implies a non-null mblk_t. - // Jamming `NonNull` into PacketSeg/Packet might take some - // work just to avoid this unwrap. - let pkt = packet.unwrap_mblk(); - - // We're guaranteeing today that a 'static Packet has - // no neighbours and is not part of a chain. - // This simplifies tail updates in both cases (no chain walk). - unsafe { - assert!((*pkt.as_ptr()).b_prev.is_null()); - assert!((*pkt.as_ptr()).b_next.is_null()); - } - - if let Some(ref mut list) = &mut self.inner { - let pkt_p = pkt.as_ptr(); - let tail_p = list.tail.as_ptr(); - unsafe { - (*tail_p).b_next = pkt_p; - (*pkt_p).b_prev = tail_p; - // pkt_p->b_next is already null. - } - list.tail = pkt; - } else { - self.inner = Some(PacketChainInner { head: pkt, tail: pkt }); - } - } - - /// Return the head of the underlying `mblk_t` packet chain and - /// consume `self`. The caller of this function now owns the - /// `mblk_t` segment chain. - pub fn unwrap_mblk(mut self) -> Option> { - self.inner.take().map(|v| v.head) - } -} - -impl Drop for PacketChain { - fn drop(&mut self) { - // This is a minor variation on MsgBlk's logic. illumos - // contains helper functions from STREAMS to just drop a whole - // chain. - cfg_if! { - if #[cfg(all(not(feature = "std"), not(test)))] { - // Safety: This is safe as long as the original - // `mblk_t` came from a call to `allocb(9F)` (or - // similar API). - if let Some(list) = &self.inner { - unsafe { ddi::freemsgchain(list.head.as_ptr()) }; - } - } else { - while let Some(pkt) = self.pop_front() { - drop(pkt); - } - } - } - } -} - pub trait PacketState {} /// A packet body transformation. @@ -1317,92 +1173,4 @@ mod test { // // And make sure they don't include the padding bytes // assert_eq!(ip6_hdr.pay_len(), udp_hdr.hdr_len() + body.len()); // } - - fn create_linked_mblks(n: usize) -> Vec<*mut mblk_t> { - let mut els = vec![]; - for _ in 0..n { - els.push(allocb(8)); - } - - // connect the elements in a chain - for (lhs, rhs) in els.iter().zip(els[1..].iter()) { - unsafe { - (**lhs).b_next = *rhs; - (**rhs).b_prev = *lhs; - } - } - - els - } - - #[test] - fn chain_has_correct_ends() { - let els = create_linked_mblks(3); - - let chain = unsafe { PacketChain::new(els[0]) }.unwrap(); - let chain_inner = chain.inner.as_ref().unwrap(); - assert_eq!(chain_inner.head.as_ptr(), els[0]); - assert_eq!(chain_inner.tail.as_ptr(), els[2]); - } - - #[test] - fn chain_breaks_links() { - let els = create_linked_mblks(3); - - let mut chain = unsafe { PacketChain::new(els[0]) }.unwrap(); - - let p0 = chain.pop_front().unwrap(); - assert_eq!(p0.mblk_addr(), els[0] as uintptr_t); - unsafe { - assert!((*els[0]).b_prev.is_null()); - assert!((*els[0]).b_next.is_null()); - } - - // Chain head/tail ptrs are correct - let chain_inner = chain.inner.as_ref().unwrap(); - assert_eq!(chain_inner.head.as_ptr(), els[1]); - assert_eq!(chain_inner.tail.as_ptr(), els[2]); - unsafe { - assert!((*els[1]).b_prev.is_null()); - assert!((*els[2]).b_next.is_null()); - } - } - - #[test] - fn chain_append_links() { - let els = create_linked_mblks(3); - let new_el = allocb(8); - - let mut chain = unsafe { PacketChain::new(els[0]) }.unwrap(); - let pkt = unsafe { Packet::wrap_mblk(new_el) }.unwrap(); - - chain.append(pkt); - - // Chain head/tail ptrs are correct - let chain_inner = chain.inner.as_ref().unwrap(); - assert_eq!(chain_inner.head.as_ptr(), els[0]); - assert_eq!(chain_inner.tail.as_ptr(), new_el); - - // Last el has been linked to the new pkt, and it has a valid - // backward link. - unsafe { - assert_eq!((*new_el).b_prev, els[2]); - assert!((*new_el).b_next.is_null()); - assert_eq!((*els[2]).b_next, new_el); - } - } - - #[test] - fn chain_drain_complete() { - let els = create_linked_mblks(64); - - let mut chain = unsafe { PacketChain::new(els[0]) }.unwrap(); - - for i in 0..els.len() { - let pkt = chain.pop_front().unwrap(); - assert_eq!(pkt.mblk_addr(), els[i] as uintptr_t); - } - - assert!(chain.pop_front().is_none()); - } } diff --git a/lib/opte/src/engine/port.rs b/lib/opte/src/engine/port.rs index 0e835e87..a2e4e9d0 100644 --- a/lib/opte/src/engine/port.rs +++ b/lib/opte/src/engine/port.rs @@ -22,9 +22,7 @@ use super::ingot_packet::FullParsed; use super::ingot_packet::LiteParsed; use super::ingot_packet::MblkFullParsed; use super::ingot_packet::MblkPacketData; -use super::ingot_packet::MsgBlk; -use super::ingot_packet::MsgBlkIterMut; -use super::ingot_packet::Packet2; +use super::ingot_packet::Packet; use super::ioctl; use super::ioctl::TcpFlowEntryDump; use super::ioctl::TcpFlowStateDump; @@ -63,6 +61,8 @@ use crate::ddi::kstat; use crate::ddi::kstat::KStatNamed; use crate::ddi::kstat::KStatProvider; use crate::ddi::kstat::KStatU64; +use crate::ddi::mblk::MsgBlk; +use crate::ddi::mblk::MsgBlkIterMut; use crate::ddi::sync::KMutex; use crate::ddi::sync::KMutexType; use crate::ddi::time::Moment; @@ -907,7 +907,7 @@ impl Port { data: &FlowTable, dir: Direction, msg: String, - pkt: &mut Packet2, + pkt: &mut Packet, ) { if unsafe { super::opte_panic_debug != 0 } { super::err!("mblk: {}", pkt.mblk_addr()); @@ -923,7 +923,7 @@ impl Port { fn tcp_err_probe( &self, dir: Direction, - pkt: Option<&Packet2>, + pkt: Option<&Packet>, flow: &InnerFlowId, msg: String, ) { @@ -1218,7 +1218,7 @@ impl Port { // which can advance to (and hold) light->full-fat metadata. // My gutfeel is that there's a perf cost here -- this struct // is pretty fat, but expressing the transform on a &mut also sucks. - mut pkt: Packet2, M>>, + mut pkt: Packet, M>>, ) -> result::Result where M: LightweightMeta< as Read>::Chunk>, @@ -1698,7 +1698,7 @@ impl Transforms { #[inline] fn apply( &self, - pkt: &mut Packet2>, + pkt: &mut Packet>, dir: Direction, ) -> result::Result<(), ProcessError> where @@ -1932,7 +1932,7 @@ impl Port { &self, data: &mut PortData, dir: Direction, - pkt: &mut Packet2, + pkt: &mut Packet, xforms: &mut Transforms, ameta: &mut ActionMeta, ) -> result::Result { @@ -1976,7 +1976,7 @@ impl Port { dir: Direction, flow: &InnerFlowId, epoch: u64, - pkt: &Packet2, + pkt: &Packet, ) { cfg_if::cfg_if! { if #[cfg(all(not(feature = "std"), not(test)))] { @@ -2277,7 +2277,7 @@ impl Port { &self, data: &mut PortData, epoch: u64, - pkt: &mut Packet2, + pkt: &mut Packet, ufid_in: &InnerFlowId, ameta: &mut ActionMeta, ) -> result::Result { @@ -2477,7 +2477,7 @@ impl Port { &self, data: &mut PortData, epoch: u64, - pkt: &mut Packet2, + pkt: &mut Packet, ameta: &mut ActionMeta, ) -> result::Result { use Direction::Out; diff --git a/lib/opte/src/engine/rule.rs b/lib/opte/src/engine/rule.rs index 3cfad131..f03d3e3e 100644 --- a/lib/opte/src/engine/rule.rs +++ b/lib/opte/src/engine/rule.rs @@ -23,8 +23,7 @@ use super::headers::UlpHeaderAction; use super::headers::UlpMetaModify; use super::ingot_packet::MblkFullParsed; use super::ingot_packet::MblkPacketData; -use super::ingot_packet::MsgBlk; -use super::ingot_packet::Packet2; +use super::ingot_packet::Packet; use super::ingot_packet::PacketData; use super::ip::L3; use super::packet::BodyTransform; @@ -32,6 +31,7 @@ use super::packet::InnerFlowId; use super::port::meta::ActionMeta; use super::predicate::DataPredicate; use super::predicate::Predicate; +use crate::ddi::mblk::MsgBlk; use alloc::boxed::Box; use alloc::ffi::CString; use alloc::string::String; @@ -565,7 +565,7 @@ pub trait StatefulAction: Display { fn gen_desc( &self, flow_id: &InnerFlowId, - pkt: &Packet2, + pkt: &Packet, meta: &mut ActionMeta, ) -> GenDescResult; @@ -1000,8 +1000,8 @@ impl From<&Rule> for super::ioctl::RuleDump { #[test] fn rule_matching() { - use crate::engine::ingot_base::Ipv4; - use crate::engine::ingot_base::Ipv4Mut; + use crate::engine::ip::v4::Ipv4; + use crate::engine::ip::v4::Ipv4Mut; use crate::engine::predicate::Ipv4AddrMatch; use crate::engine::predicate::Predicate; use crate::engine::GenericUlp; @@ -1035,7 +1035,7 @@ fn rule_matching() { let eth = Ethernet { ethertype: Ethertype::IPV4, ..Default::default() }; let mut pkt_m = MsgBlk::new_ethernet_pkt((ð, &ip4, &tcp)); - let mut pkt = Packet2::new(pkt_m.iter_mut()) + let mut pkt = Packet::new(pkt_m.iter_mut()) .parse_outbound(GenericUlp {}) .unwrap() .to_full_meta(); diff --git a/lib/opte/src/engine/snat.rs b/lib/opte/src/engine/snat.rs index f28d4f96..f89f5c8a 100644 --- a/lib/opte/src/engine/snat.rs +++ b/lib/opte/src/engine/snat.rs @@ -12,7 +12,7 @@ use super::headers::UlpGenericModify; use super::headers::UlpHeaderAction; use super::headers::UlpMetaModify; use super::ingot_packet::MblkFullParsed; -use super::ingot_packet::Packet2; +use super::ingot_packet::Packet; use super::packet::InnerFlowId; use super::port::meta::ActionMeta; use super::predicate::DataPredicate; @@ -241,7 +241,7 @@ impl SNat { fn gen_icmp_desc( &self, nat: SNatAlloc, - pkt: &Packet2, + pkt: &Packet, ) -> GenDescResult { let meta = pkt.meta(); @@ -303,7 +303,7 @@ where fn gen_desc( &self, flow_id: &InnerFlowId, - pkt: &Packet2, + pkt: &Packet, _meta: &mut ActionMeta, ) -> GenDescResult { let priv_port = flow_id.src_port; @@ -469,11 +469,11 @@ mod test { use ingot::tcp::TcpRef; use ingot::types::HeaderLen; - use crate::engine::ingot_base::Ethernet; - use crate::engine::ingot_base::EthernetRef; - use crate::engine::ingot_base::Ipv4; - use crate::engine::ingot_base::Ipv4Ref; - use crate::engine::ingot_packet::MsgBlk; + use crate::engine::ether::Ethernet; + use crate::engine::ether::EthernetRef; + use crate::engine::ip::v4::Ipv4; + use crate::engine::ip::v4::Ipv4Ref; + use crate::ddi::mblk::MsgBlk; use super::*; @@ -541,7 +541,7 @@ mod test { }; let mut pkt_m = MsgBlk::new_ethernet_pkt((ð, &ip4, &tcp, &body)); - let mut pkt = Packet2::new(pkt_m.iter_mut()) + let mut pkt = Packet::new(pkt_m.iter_mut()) .parse_outbound(GenericUlp {}) .unwrap() .to_full_meta(); @@ -609,7 +609,7 @@ mod test { }; let mut pkt_m = MsgBlk::new_ethernet_pkt((ð, &ip4, &tcp, &body)); - let mut pkt = Packet2::new(pkt_m.iter_mut()) + let mut pkt = Packet::new(pkt_m.iter_mut()) .parse_inbound(GenericUlp {}) .unwrap() .to_full_meta(); diff --git a/lib/oxide-vpc/src/engine/mod.rs b/lib/oxide-vpc/src/engine/mod.rs index 8061fd92..f7c9e4bf 100644 --- a/lib/oxide-vpc/src/engine/mod.rs +++ b/lib/oxide-vpc/src/engine/mod.rs @@ -22,7 +22,7 @@ use opte::engine::ether::EthernetRef; use opte::engine::flow_table::FlowTable; use opte::engine::ingot_packet::FullParsed; use opte::engine::ingot_packet::OpteParsed2; -use opte::engine::ingot_packet::Packet2; +use opte::engine::ingot_packet::Packet; use opte::engine::ip::v4::Ipv4Addr; use opte::engine::packet::InnerFlowId; use opte::engine::packet::ParseError; @@ -66,7 +66,7 @@ fn is_arp_req_for_tpa(tpa: Ipv4Addr, arp: &impl ArpEthIpv4Ref) -> bool { impl VpcNetwork { fn handle_arp_out( &self, - pkt: &mut Packet2>, + pkt: &mut Packet>, ) -> Result where T::Chunk: ByteSliceMut, @@ -102,7 +102,7 @@ impl NetworkImpl for VpcNetwork { fn handle_pkt( &self, dir: Direction, - pkt: &mut Packet2>, + pkt: &mut Packet>, _uft_in: &FlowTable>, _uft_out: &FlowTable>, ) -> Result diff --git a/lib/oxide-vpc/tests/firewall_tests.rs b/lib/oxide-vpc/tests/firewall_tests.rs index aad20b71..c7db7882 100644 --- a/lib/oxide-vpc/tests/firewall_tests.rs +++ b/lib/oxide-vpc/tests/firewall_tests.rs @@ -1,4 +1,4 @@ -use opte::engine::ingot_packet::MsgBlk; +use opte::ddi::mblk::MsgBlk; use opte_test_utils as common; use common::*; diff --git a/lib/oxide-vpc/tests/fuzz_regression.rs b/lib/oxide-vpc/tests/fuzz_regression.rs index ef7b92d1..ab52ee04 100644 --- a/lib/oxide-vpc/tests/fuzz_regression.rs +++ b/lib/oxide-vpc/tests/fuzz_regression.rs @@ -9,8 +9,8 @@ //! These tests capture past known-bad packets which have made some part //! of OPTE panic in the past, and ensure that it does not today. -use opte::engine::ingot_packet::MsgBlk; -use opte::engine::ingot_packet::Packet2; +use opte::ddi::mblk::MsgBlk; +use opte::engine::ingot_packet::Packet; use oxide_vpc::engine::VpcParser; use serde::Deserialize; use serde::Serialize; @@ -111,7 +111,7 @@ fn run_tests( fn parse_in_regression() { run_tests("parse_in", |data| { let mut msg = MsgBlk::copy(data); - let parsed = Packet2::new(msg.iter_mut()); + let parsed = Packet::new(msg.iter_mut()); let _ = parsed.parse_inbound(VpcParser {}); }); } @@ -120,7 +120,7 @@ fn parse_in_regression() { fn parse_out_regression() { run_tests("parse_out", |data| { let mut msg = MsgBlk::copy(data); - let parsed = Packet2::new(msg.iter_mut()); + let parsed = Packet::new(msg.iter_mut()); let _ = parsed.parse_outbound(VpcParser {}); }); } diff --git a/lib/oxide-vpc/tests/integration_tests.rs b/lib/oxide-vpc/tests/integration_tests.rs index 5047ccdc..0a3e646e 100644 --- a/lib/oxide-vpc/tests/integration_tests.rs +++ b/lib/oxide-vpc/tests/integration_tests.rs @@ -17,6 +17,7 @@ use common::icmp::*; use common::*; use opte::api::MacAddr; use opte::api::OpteError; +use opte::ddi::mblk::MsgBlk; use opte::ddi::time::Moment; use opte::engine::arp::ArpEthIpv4; use opte::engine::arp::ArpEthIpv4Ref; @@ -28,8 +29,7 @@ use opte::engine::ether::EthernetRef; use opte::engine::flow_table::FLOW_DEF_EXPIRE_SECS; use opte::engine::geneve::Vni; use opte::engine::ingot_packet::MblkFullParsed; -use opte::engine::ingot_packet::MsgBlk; -use opte::engine::ingot_packet::Packet2; +use opte::engine::ingot_packet::Packet; use opte::engine::ip::v4::Ipv4Addr; use opte::engine::ip::v4::Ipv4Ref; use opte::engine::ip::v6::Ipv6; @@ -1557,7 +1557,7 @@ fn unpack_and_verify_icmp( } fn unpack_and_verify_icmp4( - pkt: &Packet2, + pkt: &Packet, expected_ident: u16, seq_no: u16, ) { @@ -1574,7 +1574,7 @@ fn unpack_and_verify_icmp4( } fn unpack_and_verify_icmp6( - pkt: &Packet2, + pkt: &Packet, expected_ident: u16, seq_no: u16, src_ip: Ipv6Addr, diff --git a/xde/src/dls/mod.rs b/xde/src/dls/mod.rs index c56c2637..32bf2482 100644 --- a/xde/src/dls/mod.rs +++ b/xde/src/dls/mod.rs @@ -21,7 +21,7 @@ use illumos_sys_hdrs::c_int; use illumos_sys_hdrs::datalink_id_t; use illumos_sys_hdrs::uintptr_t; use illumos_sys_hdrs::ENOENT; -use opte::engine::ingot_packet::MsgBlk; +use opte::ddi::mblk::MsgBlk; pub use sys::*; /// An integer ID used by DLS to refer to a given link. diff --git a/xde/src/mac/mod.rs b/xde/src/mac/mod.rs index 80198a44..568e9792 100644 --- a/xde/src/mac/mod.rs +++ b/xde/src/mac/mod.rs @@ -21,7 +21,7 @@ use core::fmt; use core::ptr; use illumos_sys_hdrs::*; use opte::engine::ether::EtherAddr; -use opte::engine::ingot_packet::MsgBlk; +use opte::ddi::mblk::MsgBlk; pub use sys::*; /// Errors while opening a MAC handle. diff --git a/xde/src/xde.rs b/xde/src/xde.rs index 910211db..0731bf11 100644 --- a/xde/src/xde.rs +++ b/xde/src/xde.rs @@ -62,12 +62,12 @@ use opte::ddi::time::Periodic; use opte::engine::ether::EthernetRef; use opte::engine::geneve::Vni; use opte::engine::headers::IpAddr; -use opte::engine::ingot_packet::MsgBlk; -use opte::engine::ingot_packet::Packet2; +use opte::ddi::mblk::MsgBlk; +use opte::engine::ingot_packet::Packet; use opte::engine::ioctl::{self as api}; use opte::engine::ip::v6::Ipv6Addr; use opte::engine::packet::InnerFlowId; -use opte::engine::packet::PacketChain; +use opte::ddi::mblk::MsgBlkChain; use opte::engine::packet::ParseError; use opte::engine::port::Port; use opte::engine::port::PortBuilder; @@ -1407,7 +1407,7 @@ fn guest_loopback<'a>( use Direction::*; let mblk_addr = pkt.mblk_addr(); - let parsed_pkt = Packet2::new(pkt.iter_mut()); + let parsed_pkt = Packet::new(pkt.iter_mut()); // TODO: Rework currently requires a reparse on loopback to account for UFT fastpath. @@ -1514,7 +1514,7 @@ unsafe extern "C" fn xde_mc_tx( // pointers are `Copy`. // ================================================================ __dtrace_probe_tx(mp_chain as uintptr_t); - let Ok(mut chain) = PacketChain::new(mp_chain) else { + let Ok(mut chain) = MsgBlkChain::new(mp_chain) else { bad_packet_probe( Some(src_dev.port.name_cstr()), Direction::Out, @@ -1539,7 +1539,7 @@ unsafe extern "C" fn xde_mc_tx( unsafe fn xde_mc_tx_one(src_dev: &XdeDev, mut pkt: MsgBlk) -> *mut mblk_t { let parser = src_dev.port.network().parser(); let mblk_addr = pkt.mblk_addr(); - let parsed_pkt = Packet2::new(pkt.iter_mut()); + let parsed_pkt = Packet::new(pkt.iter_mut()); let parsed_pkt = match parsed_pkt.parse_outbound(parser) { Ok(pkt) => pkt, Err(e) => { @@ -1822,7 +1822,7 @@ unsafe extern "C" fn xde_rx( Arc::increment_strong_count(mch_ptr); let stream: Arc = Arc::from_raw(mch_ptr); - let Ok(mut chain) = PacketChain::new(mp_chain) else { + let Ok(mut chain) = MsgBlkChain::new(mp_chain) else { bad_packet_probe( None, Direction::Out, @@ -1848,7 +1848,7 @@ unsafe fn xde_rx_one( mut pkt: MsgBlk, ) { let mblk_addr = pkt.mblk_addr(); - let parsed_pkt = Packet2::new(pkt.iter_mut()); + let parsed_pkt = Packet::new(pkt.iter_mut()); // We must first parse the packet in order to determine where it // is to be delivered. From d187bbedaf6b6e73afcf7c53d43b7a71a31d135f Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Mon, 28 Oct 2024 11:51:06 +0000 Subject: [PATCH 067/115] A test suite. --- lib/opte/src/ddi/mblk.rs | 209 +++++- lib/opte/src/engine/dhcp.rs | 4 +- lib/opte/src/engine/dhcpv6/protocol.rs | 2 +- lib/opte/src/engine/ether.rs | 23 +- lib/opte/src/engine/flow_table.rs | 2 +- lib/opte/src/engine/geneve.rs | 28 +- lib/opte/src/engine/icmp/v4.rs | 35 - lib/opte/src/engine/ip/v4.rs | 8 +- lib/opte/src/engine/ip/v6.rs | 135 ++-- lib/opte/src/engine/nat.rs | 2 +- lib/opte/src/engine/packet.rs | 943 ++++++++----------------- lib/opte/src/engine/parse.rs | 41 +- lib/opte/src/engine/rule.rs | 2 +- lib/opte/src/engine/snat.rs | 2 +- lib/opte/src/engine/tcp.rs | 260 ------- lib/opte/src/engine/udp.rs | 25 - rust-toolchain.toml | 2 +- xde/src/mac/mod.rs | 2 +- xde/src/xde.rs | 4 +- 19 files changed, 637 insertions(+), 1092 deletions(-) diff --git a/lib/opte/src/ddi/mblk.rs b/lib/opte/src/ddi/mblk.rs index da8769c1..739f6e22 100644 --- a/lib/opte/src/ddi/mblk.rs +++ b/lib/opte/src/ddi/mblk.rs @@ -296,8 +296,9 @@ impl MsgBlk { pkt } - /// Returns the number of bytes available for writing before - pub fn headroom(&self) -> usize { + /// Returns the number of bytes available for writing ahead of the + /// read pointer in the current datablock. + pub fn head_capacity(&self) -> usize { unsafe { let inner = self.inner.as_ref(); @@ -305,6 +306,29 @@ impl MsgBlk { } } + /// Returns the number of bytes available for writing after the + /// write pointer in the current datablock. + pub fn tail_capacity(&self) -> usize { + unsafe { + let inner = self.inner.as_ref(); + + (*inner.b_datap).db_lim.offset_from(inner.b_wptr) as usize + } + } + + /// Returns the number of bytes allocated in all datablocks in + /// this message. + pub fn all_segs_capacity(&self) -> usize { + self.iter() + .map(|v| unsafe { + let tail = (*v.0.b_datap).db_lim; + let head = (*v.0.b_datap).db_base; + + tail.offset_from(head) as usize + }) + .sum() + } + /// Creates a new [`MsgBlk`] containing a data buffer of `len` /// bytes with 2B of headroom/alignment. /// @@ -450,6 +474,34 @@ impl MsgBlk { } } + /// Adjusts the write pointer for this MsgBlk, initialising any extra bytes to 0. + pub fn expand_front(&mut self, n: usize) -> Result<(), SegAdjustError> { + unsafe { + self.write_front(n, |v| { + // MaybeUninit::fill is unstable. + let n = v.len(); + v.as_mut_ptr().write_bytes(0, n); + }) + .map_err(|_| SegAdjustError::StartBeforeBase) + } + } + + /// Shrink the writable/readable area by shifting the `b_rptr` by + /// `len`; effectively removing bytes from the start of the packet. + /// + /// # Errors + /// + /// `SegAdjustError::StartPastEnd`: Shifting the read pointer by + /// `len` would move `b_rptr` past `b_wptr`. + pub fn drop_front_bytes(&mut self, n: usize) -> Result<(), SegAdjustError> { + let node = self + .iter_mut() + .next() + .expect("There will always be a front element by definition"); + + node.drop_front_bytes(n) + } + /// Emits an `ingot` packet after any bytes present in this mblk. pub fn emit_back( &mut self, @@ -743,6 +795,157 @@ impl Drop for MsgBlk { #[cfg(test)] mod test { + use ingot::types::PacketParseError; + use ingot::types::ParseError as IngotParseError; + + use crate::engine::ingot_packet::Packet; + use crate::engine::packet::mock_desballoc; + use crate::engine::packet::ParseError; + use crate::engine::GenericUlp; + + use super::*; + + #[test] + fn zero_byte_packet() { + let mut pkt = MsgBlk::new(0); + assert_eq!(pkt.len(), 0); + assert_eq!(pkt.seg_len(), 1); + assert_eq!(pkt.tail_capacity(), 16); + + let res = Packet::new(pkt.iter_mut()).parse_outbound(GenericUlp {}); + match res { + Err(ParseError::IngotError(err)) => { + assert_eq!(err.header().as_str(), "inner_eth"); + assert_eq!(err.error(), &IngotParseError::TooSmall); + } + + Err(e) => panic!("expected read error, got: {:?}", e), + _ => panic!("expected failure, accidentally succeeded at parsing"), + } + + let pkt2 = MsgBlk::copy(&[]); + assert_eq!(pkt2.len(), 0); + assert_eq!(pkt2.seg_len(), 1); + assert_eq!(pkt2.tail_capacity(), 16); + let res = Packet::new(pkt.iter_mut()).parse_outbound(GenericUlp {}); + match res { + Err(ParseError::IngotError(err)) => { + assert_eq!(err.header().as_str(), "inner_eth"); + assert_eq!(err.error(), &IngotParseError::TooSmall); + } + + Err(e) => panic!("expected read error, got: {:?}", e), + _ => panic!("expected failure, accidentally succeeded at parsing"), + } + } + + #[test] + fn wrap() { + let mut buf1 = Vec::with_capacity(20); + let mut buf2 = Vec::with_capacity(2); + buf1.extend_from_slice(&[0x1, 0x2, 0x3, 0x4]); + buf2.extend_from_slice(&[0x5, 0x6]); + let mp1 = mock_desballoc(buf1); + let mp2 = mock_desballoc(buf2); + + unsafe { + (*mp1).b_cont = mp2; + } + + let pkt = unsafe { MsgBlk::wrap_mblk(mp1).unwrap() }; + assert_eq!(pkt.seg_len(), 2); + assert_eq!(pkt.all_segs_capacity(), 22); + assert_eq!(pkt.byte_len(), 6); + } + + #[test] + fn read_seg() { + let buf1 = vec![0x1, 0x2, 0x3, 0x4]; + let buf2 = vec![0x5, 0x6]; + let mp1 = mock_desballoc(buf1); + let mp2 = mock_desballoc(buf2); + + unsafe { + (*mp1).b_cont = mp2; + } + + let pkt = unsafe { MsgBlk::wrap_mblk(mp1).unwrap() }; + assert_eq!(pkt.byte_len(), 6); + assert_eq!(pkt.seg_len(), 2); + + let mut segs = pkt.iter(); + assert_eq!(segs.next().map(|v| &v[..]).unwrap(), &[0x1, 0x2, 0x3, 0x4]); + assert_eq!(segs.next().map(|v| &v[..]).unwrap(), &[0x5, 0x6]); + } + + // Verify uninitialized packet. + #[test] + fn uninitialized_packet() { + let pkt = MsgBlk::new(200); + assert_eq!(pkt.len(), 0); + assert_eq!(pkt.seg_len(), 1); + assert_eq!(pkt.tail_capacity(), 200); + } + + #[test] + fn expand_and_shrink() { + let mut seg = MsgBlk::new(18); + assert_eq!(seg.len(), 0); + seg.resize(18).unwrap(); + assert_eq!(seg.len(), 18); + seg.drop_front_bytes(4).unwrap(); + assert_eq!(seg.len(), 14); + seg.expand_front(4).unwrap(); + assert_eq!(seg.len(), 18); + + assert!(seg.resize(20).is_err()); + assert!(seg.drop_front_bytes(20).is_err()); + assert!(seg.expand_front(4).is_err()); + } + + #[test] + fn prefix_len() { + let mut seg = MsgBlk::new(18); + assert_eq!(seg.head_capacity(), 0); + seg.resize(18).unwrap(); + assert_eq!(seg.head_capacity(), 0); + seg.drop_front_bytes(4).unwrap(); + assert_eq!(seg.head_capacity(), 4); + seg.expand_front(4).unwrap(); + assert_eq!(seg.head_capacity(), 0); + } + + // Verify that we do not panic when we get long chains of mblks linked by + // `b_cont`. This is a regression test for + // https://github.com/oxidecomputer/opte/issues/335 + #[test] + fn test_long_packet_continuation() { + const N_SEGMENTS: usize = 8; + let mut blocks: Vec<*mut mblk_t> = Vec::with_capacity(N_SEGMENTS); + for i in 0..N_SEGMENTS { + let mp = allocb(32); + + // Link previous block to this one. + if i > 0 { + let prev = blocks[i - 1]; + unsafe { + (*prev).b_cont = mp; + } + } + blocks.push(mp); + } + + // Wrap the first mblk in a Packet, and check that we still have a + // reference to everything. + let packet = unsafe { MsgBlk::wrap_mblk(blocks[0]) } + .expect("Failed to wrap mblk chain with many segments"); + + assert_eq!(packet.seg_len(), N_SEGMENTS); + for (seg, mblk) in packet.iter().zip(blocks) { + assert_eq!(core::ptr::addr_of!(seg.0) as *mut _, mblk); + } + } + fn create_linked_mblks(n: usize) -> Vec<*mut mblk_t> { let mut els = vec![]; for _ in 0..n { @@ -799,7 +1002,7 @@ mod test { let new_el = allocb(8); let mut chain = unsafe { MsgBlkChain::new(els[0]) }.unwrap(); - let pkt = unsafe { Packet::wrap_mblk(new_el) }.unwrap(); + let pkt = unsafe { MsgBlk::wrap_mblk(new_el) }.unwrap(); chain.append(pkt); diff --git a/lib/opte/src/engine/dhcp.rs b/lib/opte/src/engine/dhcp.rs index 1d456aa3..233ec174 100644 --- a/lib/opte/src/engine/dhcp.rs +++ b/lib/opte/src/engine/dhcp.rs @@ -617,8 +617,8 @@ impl HairpinAction for DhcpAction { #[cfg(test)] mod test { use super::*; - use crate::engine::ip4::Ipv4Addr; - use crate::engine::ip4::Ipv4Cidr; + use crate::engine::ip::v4::Ipv4Addr; + use crate::engine::ip::v4::Ipv4Cidr; fn test_option_emit(opt: impl DhcpOption, truth: Vec) { let buf = gen_dhcp_from_option(opt); diff --git a/lib/opte/src/engine/dhcpv6/protocol.rs b/lib/opte/src/engine/dhcpv6/protocol.rs index 3bac64ea..05f0e093 100644 --- a/lib/opte/src/engine/dhcpv6/protocol.rs +++ b/lib/opte/src/engine/dhcpv6/protocol.rs @@ -688,8 +688,8 @@ mod test { use super::Message; use super::MessageType; use super::OptionCode; + use crate::ddi::mblk::MsgBlk; use crate::engine::dhcpv6::test_data; - use crate::engine::ingot_packet::MsgBlk; use crate::engine::ingot_packet::Packet; use crate::engine::port::meta::ActionMeta; use crate::engine::GenericUlp; diff --git a/lib/opte/src/engine/ether.rs b/lib/opte/src/engine/ether.rs index 8bee880c..217a0360 100644 --- a/lib/opte/src/engine/ether.rs +++ b/lib/opte/src/engine/ether.rs @@ -242,21 +242,20 @@ impl EtherMeta { #[cfg(test)] mod test { use super::*; - use crate::engine::packet::Packet; + use ingot::types::Emit; + use ingot::types::HeaderParse; #[test] fn emit() { - let eth = EtherMeta { - dst: MacAddr::from([0xA8, 0x40, 0x25, 0xFF, 0x77, 0x77]), - src: MacAddr::from([0xA8, 0x40, 0x25, 0xFA, 0xFA, 0x37]), - ether_type: EtherType::Ipv4, + let eth = Ethernet { + destination: MacAddr::from([0xA8, 0x40, 0x25, 0xFF, 0x77, 0x77]), + source: MacAddr::from([0xA8, 0x40, 0x25, 0xFA, 0xFA, 0x37]), + ethertype: Ethertype::IPV4, }; // Verify bytes are written and segment length is correct. - let mut pkt = Packet::alloc_and_expand(14); - let mut wtr = pkt.seg0_wtr(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - assert_eq!(pkt.len(), 14); + let out = eth.emit_vec(); + assert_eq!(out.len(), 14); #[rustfmt::skip] let expected_bytes = vec![ // destination @@ -266,11 +265,9 @@ mod test { // ether type 0x08, 0x00, ]; - assert_eq!(&expected_bytes, pkt.seg_bytes(0)); + assert_eq!(expected_bytes, out); // Verify error when the mblk is not large enough. - let mut pkt = Packet::alloc_and_expand(10); - let mut wtr = pkt.seg0_wtr(); - assert!(wtr.slice_mut(EtherHdr::SIZE).is_err()); + assert!(ValidEthernet::parse(&[0; 10][..]).is_err()); } } diff --git a/lib/opte/src/engine/flow_table.rs b/lib/opte/src/engine/flow_table.rs index 0f80ebfe..a3809ed9 100644 --- a/lib/opte/src/engine/flow_table.rs +++ b/lib/opte/src/engine/flow_table.rs @@ -391,7 +391,7 @@ impl Dump for () { #[cfg(test)] mod test { use super::*; - use crate::engine::ip4::Protocol; + use crate::engine::ip::v4::Protocol; use crate::engine::packet::AddrPair; use crate::engine::packet::FLOW_ID_DEFAULT; use core::time::Duration; diff --git a/lib/opte/src/engine/geneve.rs b/lib/opte/src/engine/geneve.rs index fc910b81..f3d0f119 100644 --- a/lib/opte/src/engine/geneve.rs +++ b/lib/opte/src/engine/geneve.rs @@ -314,8 +314,11 @@ pub fn geneve_opt_is_oxide_external( #[cfg(test)] mod test { - use core::matches; - + use super::*; + use crate::ddi::mblk::MsgBlk; + use crate::engine::headers::EncapMeta; + use crate::engine::ingot_packet::Packet; + use crate::engine::parse::ValidGeneveOverV6; use ingot::ethernet::Ethernet; use ingot::ethernet::Ethertype; use ingot::ip::IpProtocol; @@ -325,13 +328,6 @@ mod test { use ingot::udp::UdpRef; use ingot::udp::ValidUdp; - use super::*; - use crate::engine::headers::EncapMeta; - use crate::engine::ingot_packet::MsgBlk; - use crate::engine::ingot_packet::Packet; - use crate::engine::packet::Packet; - use crate::engine::parse::ValidGeneveOverV6; - #[test] fn emit_no_opts() { let geneve = GeneveMeta { @@ -342,8 +338,8 @@ mod test { }; let len = geneve.hdr_len(); - let emitted = EncapMeta::Geneve(geneve).emit_vec(); - assert_eq!(len, pkt.len()); + let emitted = EncapMeta::Geneve(geneve).to_vec(); + assert_eq!(len, emitted.len()); #[rustfmt::skip] let expected_bytes = vec![ @@ -376,8 +372,8 @@ mod test { }; let len = geneve.hdr_len(); - let emitted = EncapMeta::Geneve(geneve).emit_vec(); - assert_eq!(len, pkt.len()); + let emitted = EncapMeta::Geneve(geneve).to_vec(); + assert_eq!(len, emitted.len()); #[rustfmt::skip] let expected_bytes = vec![ @@ -405,7 +401,7 @@ mod test { // rsvd + len 0x00, ]; - assert_eq!(&expected_bytes, emitted); + assert_eq!(&expected_bytes, &emitted[..]); } #[test] @@ -443,7 +439,7 @@ mod test { validate_geneve(&geneve).unwrap(); - assert!(geneve_opt_is_oxide_external(&geneve)); + assert!(valid_geneve_has_oxide_external(&geneve)); } #[test] @@ -542,6 +538,6 @@ mod test { let (geneve, ..) = ValidGeneve::parse(rem).unwrap(); validate_geneve(&geneve).unwrap(); - assert!(geneve_opt_is_oxide_external(&geneve)); + assert!(valid_geneve_has_oxide_external(&geneve)); } } diff --git a/lib/opte/src/engine/icmp/v4.rs b/lib/opte/src/engine/icmp/v4.rs index abcb4773..689ccec6 100644 --- a/lib/opte/src/engine/icmp/v4.rs +++ b/lib/opte/src/engine/icmp/v4.rs @@ -172,38 +172,3 @@ impl Display for MessageType { write!(f, "{}", self.inner) } } - -#[cfg(test)] -mod test { - use crate::engine::checksum::Checksum as OpteCsum; - use crate::engine::headers::RawHeader; - use crate::engine::icmp::IcmpHdr; - use crate::engine::icmp::IcmpHdrRaw; - use smoltcp::wire::Icmpv4Packet; - use smoltcp::wire::Icmpv4Repr; - - use super::*; - - #[test] - fn icmp4_body_csum_equals_body() { - let data = b"reunion\0"; - let mut body_csum = OpteCsum::default(); - body_csum.add_bytes(data); - - let mut cksum_cfg = Csum::ignored(); - cksum_cfg.icmpv4 = Checksum::Both; - - let test_pkt = Icmpv4Repr::EchoRequest { ident: 7, seq_no: 7777, data }; - let mut out = vec![0u8; test_pkt.buffer_len()]; - let mut packet = Icmpv4Packet::new_unchecked(&mut out); - test_pkt.emit(&mut packet, &cksum_cfg); - - let src = &mut out[..IcmpHdr::SIZE]; - let icmp = IcmpHdr { base: IcmpHdrRaw::new_mut(src).unwrap() }; - - assert_eq!( - Some(body_csum.finalize()), - icmp.csum_minus_hdr().map(|mut v| v.finalize()) - ); - } -} diff --git a/lib/opte/src/engine/ip/v4.rs b/lib/opte/src/engine/ip/v4.rs index 04007698..77109959 100644 --- a/lib/opte/src/engine/ip/v4.rs +++ b/lib/opte/src/engine/ip/v4.rs @@ -125,8 +125,12 @@ impl ValidIpv4 { })); } + // Packets can have arbitrary zero-padding at the end so + // our length *could* be larger than the packet reports. + // Unlikely in practice as Encap headers push us past the 64B + // minimum packet size. let expt_total_len = bytes_after + own_len; - if expt_total_len != self.total_len() as usize { + if expt_total_len < self.total_len() as usize { return Err(ParseError::BadLength(MismatchError { location: c"Ipv4.total_len", expected: expt_total_len as u64, @@ -181,6 +185,7 @@ pub struct Ipv4Mod { #[cfg(test)] mod test { use super::*; + use ingot::tcp::TcpFlags; use ingot::types::HeaderLen; pub const DEF_ROUTE: &str = "0.0.0.0/0"; @@ -213,6 +218,7 @@ mod test { source: Ipv4Addr::from([10, 0, 0, 54]), destination: Ipv4Addr::from([52, 10, 128, 69]), protocol: IpProtocol::TCP, + flags: Ipv4Flags::DONT_FRAGMENT, hop_limit: 64, identification: 2662, ihl: 5, diff --git a/lib/opte/src/engine/ip/v6.rs b/lib/opte/src/engine/ip/v6.rs index 0970ade7..aae031cd 100644 --- a/lib/opte/src/engine/ip/v6.rs +++ b/lib/opte/src/engine/ip/v6.rs @@ -23,6 +23,7 @@ use opte_api::Protocol; use serde::Deserialize; use serde::Serialize; use zerocopy::ByteSlice; +use zerocopy::ByteSliceMut; pub const DDM_HEADER_ID: u8 = 0xFE; @@ -78,9 +79,13 @@ impl ValidIpv6 { })); } + // Packets can have arbitrary zero-padding at the end so + // our length *could* be larger than the packet reports. + // Unlikely in practice as Encap headers push us past the 64B + // minimum packet size. let ex_len = bytes_after + self.1.packet_length(); let pll = self.payload_len(); - if ex_len != (self.payload_len() as usize) { + if ex_len < (self.payload_len() as usize) { return Err(ParseError::BadLength(MismatchError { location: c"Ipv6.payload_len", expected: ex_len as u64, @@ -90,6 +95,21 @@ impl ValidIpv6 { Ok(()) } + + pub fn ulp_len(&self) -> usize { + self.payload_len() as usize - self.1.packet_length() + } + + pub fn set_ulp_len(&mut self, len: usize) + where + V: ByteSliceMut, + { + self.set_payload_len((self.1.packet_length() + len) as u16) + } + + pub fn ext_len(&self) -> usize { + self.1.packet_length() + } } #[derive( @@ -111,10 +131,17 @@ pub struct Ipv6Mod { #[cfg(test)] pub(crate) mod test { use super::*; - use crate::engine::packet::Packet; + use crate::ddi::mblk::MsgBlk; + use crate::engine::ingot_packet::Packet; + use ingot::ip::IpProtocol as IngotIpProtocol; + use ingot::types::Accessor; + use ingot::types::Emit; + use ingot::types::Header; + use ingot::types::HeaderParse; use itertools::Itertools; use smoltcp::wire::IpProtocol; use smoltcp::wire::Ipv6Address; + use smoltcp::wire::Ipv6ExtHeader; use smoltcp::wire::Ipv6FragmentHeader; use smoltcp::wire::Ipv6FragmentRepr; use smoltcp::wire::Ipv6HopByHopHeader; @@ -128,7 +155,7 @@ pub(crate) mod test { // Test packet size and payload length const BUFFER_LEN: usize = 512; - const PAYLOAD_LEN: usize = 512 - Ipv6Hdr::BASE_SIZE; + const PAYLOAD_LEN: usize = 512 - Ipv6::MINIMUM_LENGTH; pub(crate) const SUPPORTED_EXTENSIONS: [IpProtocol; 4] = [ IpProtocol::HopByHop, IpProtocol::Ipv6Route, @@ -205,7 +232,7 @@ pub(crate) mod test { let mut data = vec![0; BUFFER_LEN]; let mut header_start = 0; let mut next_header_pos = 6; - let mut header_end = Ipv6Hdr::BASE_SIZE; + let mut header_end = Ipv6::MINIMUM_LENGTH; let mut buf = &mut data[header_start..]; // The base header. The payload length is always the same, but the base @@ -216,7 +243,7 @@ pub(crate) mod test { if extensions.is_empty() { // No extensions at all, just base header with a TCP ULP - return (buf.to_vec(), Ipv6Hdr::BASE_SIZE); + return (buf.to_vec(), Ipv6::MINIMUM_LENGTH); } for extension in extensions { @@ -286,10 +313,9 @@ pub(crate) mod test { extension ), }; - ext_packet.set_header_len(match V6ExtClass::from(*extension) { - V6ExtClass::Frag => 0, - V6ExtClass::Rfc6564 => u8::try_from((len - 8) / 8).unwrap(), - _ => unreachable!(), + ext_packet.set_header_len(match extension { + Ipv6Frag => 0, + _ => u8::try_from((len - 8) / 8).unwrap(), }); // Move the position markers to the new header. @@ -317,28 +343,29 @@ pub(crate) mod test { SUPPORTED_EXTENSIONS.into_iter().permutations(n_extensions) { let (buf, pos) = generate_test_packet(extensions.as_slice()); - let mut pkt = Packet::copy(&buf); - let mut reader = pkt.get_rdr_mut(); - let header = Ipv6Hdr::parse(&mut reader).unwrap(); + let (header, ..) = ValidIpv6::parse(&buf[..]).unwrap(); assert_all_lengths_ok(&header, pos); } } } - fn assert_all_lengths_ok(header: &Ipv6Hdr, header_end: usize) { + fn assert_all_lengths_ok( + header: &ValidIpv6, + header_end: usize, + ) { assert_eq!( - header.hdr_len(), + header.packet_length() as usize, header_end, "Header length does not include all extension headers" ); assert_eq!( - header.pay_len(), + header.payload_len() as usize, PAYLOAD_LEN, "Payload length does not include all extension headers", ); assert_eq!( - header.ext_len(), - header_end - Ipv6Hdr::BASE_SIZE, + header.1.packet_length(), + header_end - Ipv6::MINIMUM_LENGTH, "Extension header size is incorrect", ); assert_eq!( @@ -346,11 +373,6 @@ pub(crate) mod test { PAYLOAD_LEN - header.ext_len(), "ULP length is not correct" ); - assert_eq!( - header.total_len(), - PAYLOAD_LEN + Ipv6Hdr::BASE_SIZE, - "Total packet length is not correct", - ); } #[test] @@ -378,25 +400,21 @@ pub(crate) mod test { #[test] fn emit() { - let ip = Ipv6Meta { - src: Ipv6Addr::from_const([ + let ip = Ipv6 { + source: Ipv6Addr::from_const([ 0xFE80, 0x0000, 0x0000, 0x0000, 0xBAF8, 0x53FF, 0xFEAF, 0x537D, ]), - dst: Ipv6Addr::from_const([ + destination: Ipv6Addr::from_const([ 0xFE80, 0x000, 0x0000, 0x0000, 0x56BE, 0xF7FF, 0xFE0B, 0x09EC, ]), - proto: Protocol::ICMPv6, - next_hdr: IpProtocol::Icmpv6, + next_header: IngotIpProtocol::ICMP_V6, hop_limit: 255, - pay_len: 32, - ext: None, - ext_len: 0, + payload_len: 32, + ..Default::default() }; - let len = ip.hdr_len(); - let mut pkt = Packet::alloc_and_expand(len); - let mut wtr = pkt.seg0_wtr(); - ip.emit(wtr.slice_mut(ip.hdr_len()).unwrap()); + let len = ip.packet_length(); + let pkt = ip.emit_vec(); assert_eq!(len, pkt.len()); #[rustfmt::skip] @@ -414,16 +432,14 @@ pub(crate) mod test { 0xFE, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x56, 0xBE, 0xF7, 0xFF, 0xFE, 0x0B, 0x09, 0xEC, ]; - assert_eq!(&expected_bytes, pkt.seg_bytes(0)); + assert_eq!(&expected_bytes, &pkt[..]); } #[test] fn test_set_total_len() { // Create a packet with one extension header. - let (buf, _) = generate_test_packet(&[IpProtocol::Ipv6Frag]); - let mut pkt = Packet::copy(&buf); - let mut reader = pkt.get_rdr_mut(); - let mut header = Ipv6Hdr::parse(&mut reader).unwrap(); + let (mut buf, _) = generate_test_packet(&[IpProtocol::Ipv6Frag]); + let (mut header, ..) = ValidIpv6::parse(&mut buf[..]).unwrap(); // Set the total length to 128. // @@ -432,29 +448,10 @@ pub(crate) mod test { // which is a fixed 8-octet thing, this should result in a Payload // Length of 128 - Ipv6Hdr::BASE_SIZE = 78. const NEW_SIZE: usize = 128; - header.set_total_len(NEW_SIZE as _); - assert_eq!(header.total_len(), NEW_SIZE); - assert_eq!(header.hdr_len(), Ipv6Hdr::BASE_SIZE + 8); - assert_eq!(header.pay_len(), NEW_SIZE - Ipv6Hdr::BASE_SIZE); - } - - #[test] - fn test_ip6_meta_total_len() { - // Create a packet with one extension header. - let (buf, _) = generate_test_packet(&[IpProtocol::Ipv6Frag]); - let mut pkt = Packet::copy(&buf); - let mut reader = pkt.get_rdr_mut(); - let header = Ipv6Hdr::parse(&mut reader).unwrap(); - - // Previously, the `Ipv6Meta::total_len` method double-counted the - // extension header length. Assert we don't do that here. - let meta = Ipv6Meta::from(&header); - assert!(meta.ext.is_some()); - assert_eq!(meta.ext_len, 8); // Fixed size - assert_eq!( - meta.total_len() as usize, - header.hdr_len() + header.ulp_len() - ); + header.set_ulp_len(NEW_SIZE); + assert_eq!(header.ulp_len(), NEW_SIZE); + assert_eq!(header.packet_length(), Ipv6::MINIMUM_LENGTH + 8); + assert_eq!(header.payload_len() as usize, NEW_SIZE + 8); } #[test] @@ -484,11 +481,13 @@ pub(crate) mod test { 0xc8, 0x34, 0xdd, 0x6b, 0xfa, 0x21, ]; - let mut pkt = Packet::copy(buf); - let mut reader = pkt.get_rdr_mut(); - assert!(matches!( - Ipv6Hdr::parse(&mut reader), - Err(Ipv6HdrError::BadVersion { vsn: 0 }) - )); + // Parsing this one will fail -- next header is hop-by-hop, which is + // an RFC6564 header -- we don't have (0xc1 * 8) bytes here!! + assert!(ValidIpv6::parse(&buf[..]).is_err()); + + // We can construct this manually via ingot... + let (v6, rem) = Accessor::read_from_prefix(&buf[..]).unwrap(); + let ip = ValidIpv6(v6, Header::Repr(Default::default())); + assert!(ip.validate(120).is_err()); } } diff --git a/lib/opte/src/engine/nat.rs b/lib/opte/src/engine/nat.rs index f1e87abd..ec62edb7 100644 --- a/lib/opte/src/engine/nat.rs +++ b/lib/opte/src/engine/nat.rs @@ -216,11 +216,11 @@ impl ActionDesc for NatDesc { mod test { use super::*; + use crate::ddi::mblk::MsgBlk; use crate::engine::ether::Ethernet; use crate::engine::ether::EthernetRef; use crate::engine::ip::v4::Ipv4; use crate::engine::ip::v4::Ipv4Ref; - use crate::ddi::mblk::MsgBlk; use crate::engine::GenericUlp; use ingot::ethernet::Ethertype; use ingot::ip::IpProtocol; diff --git a/lib/opte/src/engine/packet.rs b/lib/opte/src/engine/packet.rs index d5c04e63..e95523d0 100644 --- a/lib/opte/src/engine/packet.rs +++ b/lib/opte/src/engine/packet.rs @@ -26,6 +26,7 @@ use core::ptr; use core::result; use crc32fast::Hasher; use dyn_clone::DynClone; +use ingot::types::PacketParseError; use serde::Deserialize; use serde::Serialize; // TODO should probably move these two into this module now. @@ -251,7 +252,7 @@ pub enum WrapError { #[derive(Clone, Debug, Eq, PartialEq, DError)] #[derror(leaf_data = ParseError::data)] pub enum ParseError { - IngotError(ingot::types::PacketParseError), + IngotError(PacketParseError), IllegalValue(MismatchError), BadLength(MismatchError), UnrecognisedTunnelOpt { class: u16, ty: u8 }, @@ -268,7 +269,7 @@ impl ParseError { } } -impl DError for ingot::types::PacketParseError { +impl DError for PacketParseError { fn discriminant(&self) -> &'static core::ffi::CStr { self.header().as_cstr() } @@ -314,8 +315,8 @@ impl DError for MismatchError { } } -impl From for ParseError { - fn from(value: ingot::types::PacketParseError) -> Self { +impl From for ParseError { + fn from(value: PacketParseError) -> Self { Self::IngotError(value) } } @@ -456,11 +457,23 @@ fn mock_freeb(mp: *mut mblk_t) { #[cfg(test)] mod test { use super::*; - use crate::engine::ether::EtherHdr; - use crate::engine::ether::EtherType; - use crate::engine::ip4::Ipv4Hdr; - use crate::engine::tcp::TcpFlags; - + use crate::ddi::mblk::MsgBlk; + use crate::engine::ether::Ethernet; + use crate::engine::ether::EthernetRef; + use crate::engine::ingot_packet::OpteMeta; + use crate::engine::ingot_packet::Packet; + use crate::engine::ip::v4::Ipv4; + use crate::engine::ip::v4::Ipv4Ref; + use crate::engine::ip::v6::Ipv6; + use crate::engine::ip::v6::Ipv6Ref; + use crate::engine::GenericUlp; + use ingot::ethernet::Ethertype; + use ingot::ip::IpProtocol; + use ingot::tcp::Tcp; + use ingot::tcp::TcpFlags; + use ingot::tcp::TcpRef; + use ingot::types::HeaderLen; + use ingot::udp::Udp; use opte_api::Ipv6Addr; use opte_api::MacAddr; @@ -477,700 +490,312 @@ mod test { const DST_IP6: Ipv6Addr = Ipv6Addr::from_const([0xFD00, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2]); - fn tcp_pkt(body: &[u8]) -> Packet { - let tcp = TcpMeta { - src: 3839, - dst: 80, - seq: 4224936861, + fn tcp_pkt(body: &[u8]) -> MsgBlk { + let tcp = Tcp { + source: 3839, + destination: 80, + sequence: 4224936861, flags: TcpFlags::SYN, ..Default::default() }; - let ip4_total_len = Ipv4Hdr::BASE_SIZE + tcp.hdr_len() + body.len(); - let ip4 = Ipv4Meta { - src: SRC_IP4, - dst: DST_IP4, - proto: Protocol::TCP, - ttl: 64, - ident: 99, - hdr_len: Ipv4Hdr::BASE_SIZE.try_into().unwrap(), - total_len: ip4_total_len.try_into().unwrap(), - csum: [0; 2], + let ip4_total_len = + Ipv4::MINIMUM_LENGTH + (&tcp, &body).packet_length(); + let ip4 = Ipv4 { + source: SRC_IP4, + destination: DST_IP4, + protocol: IpProtocol::TCP, + hop_limit: 64, + identification: 99, + total_len: ip4_total_len as u16, + ..Default::default() }; - let eth = EtherMeta { - ether_type: EtherType::Ipv4, - src: SRC_MAC, - dst: DST_MAC, + let eth = Ethernet { + destination: DST_MAC, + source: SRC_MAC, + ethertype: Ethertype::IPV4, }; - let pkt_sz = EtherHdr::SIZE + ip4_total_len; - let mut seg = PacketSeg::alloc(pkt_sz); - seg.expand_end(pkt_sz).unwrap(); - let mut wtr = seg.get_writer(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip4.emit(wtr.slice_mut(ip4.hdr_len()).unwrap()); - tcp.emit(wtr.slice_mut(tcp.hdr_len()).unwrap()); - wtr.write(body).unwrap(); - let pkt = Packet::new(seg); - assert_eq!(pkt.len(), pkt_sz); - pkt + MsgBlk::new_ethernet_pkt((eth, ip4, tcp, body)) } - // TODO(kyle): equivalent for MsgBlk - // #[test] - // fn zero_byte_packet() { - // let pkt = Packet::alloc(0); - // assert_eq!(pkt.len(), 0); - // assert_eq!(pkt.num_segs(), 1); - // assert_eq!(pkt.avail(), 16); - // let res = pkt.parse(Out, GenericUlp {}); - // match res { - // Err(ParseError::BadHeader(msg)) => { - // assert_eq!( - // msg, - // EtherHdrError::ReadError(ReadErr::EndOfPacket).into() - // ); - // } - - // _ => panic!("expected read error, got: {:?}", res), - // } - - // let pkt2 = Packet::copy(&[]); - // assert_eq!(pkt2.len(), 0); - // assert_eq!(pkt2.num_segs(), 1); - // assert_eq!(pkt2.avail(), 16); - // let res = pkt2.parse(Out, GenericUlp {}); - // match res { - // Err(ParseError::BadHeader(msg)) => { - // assert_eq!( - // msg, - // EtherHdrError::ReadError(ReadErr::EndOfPacket).into() - // ); - // } - - // _ => panic!("expected read error, got: {:?}", res), - // } - // } - - // Verify uninitialized packet. #[test] - fn uninitialized_packet() { - let pkt = Packet::alloc(200); - assert_eq!(pkt.avail(), 200); - assert_eq!(pkt.num_segs(), 1); + fn read_single_segment() { + let mut pkt = tcp_pkt(&[]); + let parsed = Packet::new(pkt.iter_mut()) + .parse_outbound(GenericUlp {}) + .unwrap() + .to_full_meta(); + + let eth_meta = parsed.meta().inner_ether(); + assert_eq!(eth_meta.destination(), DST_MAC); + assert_eq!(eth_meta.source(), SRC_MAC); + assert_eq!(eth_meta.ethertype(), Ethertype::IPV4); + + let ip4_meta = parsed.meta().inner_ip4().unwrap(); + assert_eq!(ip4_meta.source(), SRC_IP4); + assert_eq!(ip4_meta.destination(), DST_IP4); + assert_eq!(ip4_meta.protocol(), IpProtocol::TCP); + + let tcp_meta = parsed.meta().inner_tcp().unwrap(); + assert_eq!(tcp_meta.source(), 3839); + assert_eq!(tcp_meta.destination(), 80); + assert_eq!(tcp_meta.flags(), TcpFlags::SYN); + assert_eq!(tcp_meta.sequence(), 4224936861); + assert_eq!(tcp_meta.acknowledgement(), 0); } - // Verify that a segment's bytes can be read in the CanRead state. #[test] - fn read_seg() { - let buf1 = vec![0x1, 0x2, 0x3, 0x4]; - let buf2 = vec![0x5, 0x6]; - let mp1 = mock_desballoc(buf1); - let mp2 = mock_desballoc(buf2); + fn read_multi_segment() { + let mp1 = allocb(34); + let mp2 = allocb(20); unsafe { (*mp1).b_cont = mp2; } - let pkt = unsafe { Packet::wrap_mblk(mp1).unwrap() }; - assert_eq!(pkt.len(), 6); - assert_eq!(pkt.num_segs(), 2); - assert_eq!(pkt.seg_bytes(0), &[0x1, 0x2, 0x3, 0x4]); - assert_eq!(pkt.seg_bytes(1), &[0x5, 0x6]); - } + let mut mp1 = MsgBlk::new_ethernet_pkt(Ethernet { + destination: DST_MAC, + source: SRC_MAC, + ethertype: Ethertype::IPV4, + }); - #[test] - fn wrap() { - let mut buf1 = Vec::with_capacity(20); - let mut buf2 = Vec::with_capacity(2); - buf1.extend_from_slice(&[0x1, 0x2, 0x3, 0x4]); - buf2.extend_from_slice(&[0x5, 0x6]); - let mp1 = mock_desballoc(buf1); - let mp2 = mock_desballoc(buf2); + let tcp = Tcp { + source: 3839, + destination: 80, + flags: TcpFlags::SYN, + sequence: 4224936861, + ..Default::default() + }; - unsafe { - (*mp1).b_cont = mp2; - } + let ip4 = Ipv4 { + source: SRC_IP4, + destination: DST_IP4, + protocol: IpProtocol::TCP, + total_len: (Ipv4::MINIMUM_LENGTH + tcp.packet_length()) as u16, + ..Default::default() + }; - let pkt = unsafe { Packet::wrap_mblk(mp1).unwrap() }; - assert_eq!(pkt.num_segs(), 2); - assert_eq!(pkt.avail(), 22); - assert_eq!(pkt.len(), 6); - } + let mp2 = MsgBlk::new_pkt((ip4, tcp)); - // TODO(kyle): equivalents for MsgBlk? - // #[test] - // fn read_single_segment() { - // let parsed = tcp_pkt(&[]).parse(Out, GenericUlp {}).unwrap(); - // assert_eq!(parsed.state.hdr_offsets.inner.ether.seg_idx, 0); - // assert_eq!(parsed.state.hdr_offsets.inner.ether.seg_pos, 0); - - // let eth_meta = parsed.state.meta.inner.ether; - // assert_eq!(eth_meta.ether_type, EtherType::Ipv4); - // assert_eq!(eth_meta.dst, DST_MAC); - // assert_eq!(eth_meta.src, SRC_MAC); - - // let offsets = &parsed.state.hdr_offsets; - - // let ip4_meta = match parsed.state.meta.inner.ip.as_ref().unwrap() { - // IpMeta::Ip4(v) => v, - // _ => panic!("expected IPv4"), - // }; - // assert_eq!(ip4_meta.src, SRC_IP4); - // assert_eq!(ip4_meta.dst, DST_IP4); - // assert_eq!(ip4_meta.proto, Protocol::TCP); - // assert_eq!(offsets.inner.ip.as_ref().unwrap().seg_idx, 0); - // assert_eq!(offsets.inner.ip.as_ref().unwrap().seg_pos, 14); - - // let tcp_meta = match parsed.state.meta.inner.ulp.as_ref().unwrap() { - // UlpMeta::Tcp(v) => v, - // _ => panic!("expected TCP"), - // }; - // assert_eq!(tcp_meta.src, 3839); - // assert_eq!(tcp_meta.dst, 80); - // assert_eq!(tcp_meta.flags, TcpFlags::SYN); - // assert_eq!(tcp_meta.seq, 4224936861); - // assert_eq!(tcp_meta.ack, 0); - // assert_eq!(offsets.inner.ulp.as_ref().unwrap().seg_idx, 0); - // assert_eq!(offsets.inner.ulp.as_ref().unwrap().seg_pos, 34); - // } - - // TODO(kyle): equivalents for MsgBlk? - // #[test] - // fn write_and_read_multi_segment() { - // let mp1 = allocb(34); - // let mp2 = allocb(20); - - // unsafe { - // (*mp1).b_cont = mp2; - // } - - // let mut seg1 = unsafe { PacketSeg::wrap_mblk(mp1) }; - // let mut seg2 = unsafe { PacketSeg::wrap_mblk(mp2) }; - - // let tcp = TcpMeta { - // src: 3839, - // dst: 80, - // flags: TcpFlags::SYN, - // seq: 4224936861, - // ..Default::default() - // }; - // let ip4 = Ipv4Meta { - // src: SRC_IP4, - // dst: DST_IP4, - // proto: Protocol::TCP, - // total_len: (Ipv4Hdr::BASE_SIZE + tcp.hdr_len()) as u16, - // ..Default::default() - // }; - // let eth = EtherMeta { - // ether_type: EtherType::Ipv4, - // src: SRC_MAC, - // dst: DST_MAC, - // }; - // seg1.expand_end(34).unwrap(); - // let mut wtr1 = seg1.get_writer(); - // eth.emit(wtr1.slice_mut(EtherHdr::SIZE).unwrap()); - // ip4.emit(wtr1.slice_mut(ip4.hdr_len()).unwrap()); - - // seg2.expand_end(20).unwrap(); - // let mut wtr2 = seg2.get_writer(); - // tcp.emit(wtr2.slice_mut(tcp.hdr_len()).unwrap()); - // let pkt = Packet::new2(seg1, seg2); - // let parsed = pkt.parse(Out, GenericUlp {}).unwrap(); - - // let eth_parsed = parsed.state.meta.inner.ether; - // assert_eq!(parsed.state.hdr_offsets.inner.ether.seg_idx, 0); - // assert_eq!(parsed.state.hdr_offsets.inner.ether.seg_pos, 0); - // assert_eq!(eth_parsed.ether_type, EtherType::Ipv4); - // assert_eq!(eth_parsed.dst, DST_MAC); - // assert_eq!(eth_parsed.src, SRC_MAC); - - // let offsets = &parsed.state.hdr_offsets; - - // let ip4_parsed = match parsed.state.meta.inner.ip.unwrap() { - // IpMeta::Ip4(v) => v, - // _ => panic!("expected IPv4"), - // }; - // assert_eq!(ip4_parsed.src, SRC_IP4); - // assert_eq!(ip4_parsed.dst, DST_IP4); - // assert_eq!(ip4_parsed.proto, Protocol::TCP); - // assert_eq!(offsets.inner.ip.as_ref().unwrap().seg_idx, 0); - // assert_eq!(offsets.inner.ip.as_ref().unwrap().seg_pos, 14); - - // let tcp_parsed = match parsed.state.meta.inner.ulp.unwrap() { - // UlpMeta::Tcp(v) => v, - // _ => panic!("expected TCP"), - // }; - // assert_eq!(tcp_parsed.src, 3839); - // assert_eq!(tcp_parsed.dst, 80); - // assert_eq!(tcp_parsed.flags, TcpFlags::SYN); - // assert_eq!(tcp_parsed.seq, 4224936861); - // assert_eq!(tcp_parsed.ack, 0); - // assert_eq!(offsets.inner.ulp.as_ref().unwrap().seg_idx, 0); - // assert_eq!(offsets.inner.ulp.as_ref().unwrap().seg_pos, 34); - // } - - // Verify that we catch when a read requires more bytes than are - // available. - #[test] - fn not_enough_bytes_read() { - let eth = EtherMeta { - ether_type: EtherType::Ipv4, - src: SRC_MAC, - dst: DST_MAC, - }; + mp1.append(mp2); - let mut seg = PacketSeg::alloc(34); - seg.expand_end(24).unwrap(); - let mut wtr = seg.get_writer(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - // The actual bytes do not matter for this test. - let ip4_partial = [0xA; 10]; - wtr.write(&ip4_partial).unwrap(); - let pkt = Packet::new(seg); - assert_eq!(pkt.num_segs(), 1); - assert_eq!(pkt.len(), 24); - assert_eq!(pkt.avail(), 34); - let mut rdr = pkt.get_rdr(); - let _ = rdr.slice(EtherHdr::SIZE); - assert!(matches!( - rdr.slice(Ipv4Hdr::BASE_SIZE), - Err(ReadErr::NotEnoughBytes) - )); - } + let pkt = Packet::new(mp1.iter_mut()) + .parse_outbound(GenericUlp {}) + .unwrap() + .to_full_meta(); - // TODO(kyle): equivalents for MsgBlk? - // #[test] - // #[should_panic] - // fn slice_unchecked_bad_offset() { - // let parsed = tcp_pkt(&[]).parse(Out, GenericUlp {}).unwrap(); - // // Offset past end of segment. - // parsed.segs[0].slice_unchecked(99, None); - // } - - // #[test] - // #[should_panic] - // fn slice_mut_unchecked_bad_offset() { - // let mut parsed = tcp_pkt(&[]).parse(Out, GenericUlp {}).unwrap(); - // // Offset past end of segment. - // parsed.segs[0].slice_mut_unchecked(99, None); - // } - - // #[test] - // #[should_panic] - // fn slice_unchecked_bad_len() { - // let parsed = tcp_pkt(&[]).parse(Out, GenericUlp {}).unwrap(); - // // Length past end of segment. - // parsed.segs[0].slice_unchecked(0, Some(99)); - // } - - // #[test] - // #[should_panic] - // fn slice_mut_unchecked_bad_len() { - // let mut parsed = tcp_pkt(&[]).parse(Out, GenericUlp {}).unwrap(); - // // Length past end of segment. - // parsed.segs[0].slice_mut_unchecked(0, Some(99)); - // } - - // #[test] - // fn slice_unchecked_zero() { - // let parsed = tcp_pkt(&[]).parse(Out, GenericUlp {}).unwrap(); - // // Set offset to end of packet and slice the "rest" by - // // passing None. - // assert_eq!(parsed.segs[0].slice_unchecked(54, None).len(), 0); - // } - - // #[test] - // fn slice_mut_unchecked_zero() { - // let mut parsed = tcp_pkt(&[]).parse(Out, GenericUlp {}).unwrap(); - // // Set offset to end of packet and slice the "rest" by - // // passing None. - // assert_eq!(parsed.segs[0].slice_mut_unchecked(54, None).len(), 0); - // } + let eth_parsed = pkt.meta().inner_ether(); + assert_eq!(eth_parsed.destination(), DST_MAC); + assert_eq!(eth_parsed.source(), SRC_MAC); + assert_eq!(eth_parsed.ethertype(), Ethertype::IPV4); + + let ip4_parsed = pkt.meta().inner_ip4().unwrap(); + assert_eq!(ip4_parsed.source(), SRC_IP4); + assert_eq!(ip4_parsed.destination(), DST_IP4); + assert_eq!(ip4_parsed.protocol(), IpProtocol::TCP); + + let tcp_parsed = pkt.meta().inner_tcp().unwrap(); + assert_eq!(tcp_parsed.source(), 3839); + assert_eq!(tcp_parsed.destination(), 80); + assert_eq!(tcp_parsed.flags(), TcpFlags::SYN); + assert_eq!(tcp_parsed.sequence(), 4224936861); + assert_eq!(tcp_parsed.acknowledgement(), 0); + } - // TODO(kyle): equivalent for MsgBlk // Verify that if the TCP header straddles an mblk we return an // error. - // #[test] - // fn straddled_tcp() { - // let mp1 = allocb(46); - // let mp2 = allocb(8); - - // unsafe { - // (*mp1).b_cont = mp2; - // } - - // let mut seg1 = unsafe { PacketSeg::wrap_mblk(mp1) }; - // let mut seg2 = unsafe { PacketSeg::wrap_mblk(mp2) }; - - // let tcp = TcpMeta { src: 3839, dst: 80, ..Default::default() }; - // let ip4 = Ipv4Meta { - // src: SRC_IP4, - // dst: DST_IP4, - // proto: Protocol::TCP, - // total_len: (Ipv4Hdr::BASE_SIZE + tcp.hdr_len()) as u16, - // ..Default::default() - // }; - // let eth = EtherMeta { - // ether_type: EtherType::Ipv4, - // src: SRC_MAC, - // dst: DST_MAC, - // }; - // seg1.expand_end(46).unwrap(); - // let mut wtr1 = seg1.get_writer(); - // eth.emit(wtr1.slice_mut(EtherHdr::SIZE).unwrap()); - // ip4.emit(wtr1.slice_mut(ip4.hdr_len()).unwrap()); - // let mut tcp_bytes = vec![0u8; tcp.hdr_len()]; - // tcp.emit(&mut tcp_bytes); - // wtr1.write(&tcp_bytes[0..12]).unwrap(); - - // seg2.expand_end(8).unwrap(); - // let mut wtr2 = seg2.get_writer(); - // wtr2.write(&tcp_bytes[12..]).unwrap(); - // let pkt = Packet::new2(seg1, seg2); - // assert_eq!(pkt.num_segs(), 2); - // assert_eq!( - // pkt.len(), - // EtherHdr::SIZE + Ipv4Hdr::BASE_SIZE + TcpHdr::BASE_SIZE - // ); - // assert!(matches!( - // pkt.parse(Out, GenericUlp {}), - // Err(ParseError::BadHeader(_)) - // )); - // } + #[test] + fn straddled_tcp() { + let base = tcp_pkt(&[]); - // TODO(kyle): equivalent for MsgBlk - // Verify that we correctly parse an IPv6 packet with extension headers - // #[test] - // fn parse_ipv6_extension_headers_ok() { - // use crate::engine::ip6::test::generate_test_packet; - // use crate::engine::ip6::test::SUPPORTED_EXTENSIONS; - // use itertools::Itertools; - // use smoltcp::wire::IpProtocol; - // for n_extensions in 0..SUPPORTED_EXTENSIONS.len() { - // for extensions in - // SUPPORTED_EXTENSIONS.into_iter().permutations(n_extensions) - // { - // // Generate a full IPv6 test packet, but pull out the extension - // // headers as a byte array. - // let (buf, ipv6_header_size) = - // generate_test_packet(extensions.as_slice()); - - // let next_hdr = - // *(extensions.first().unwrap_or(&IpProtocol::Tcp)); - // let ext_hdrs = &buf[Ipv6Hdr::BASE_SIZE..ipv6_header_size]; - - // // Append a TCP header - // let tcp = TcpMeta { - // src: 3839, - // dst: 80, - // seq: 4224936861, - // ..Default::default() - // }; - // let mut ext_bytes = [0; 64]; - // let ext_len = ext_hdrs.len(); - // assert!(ext_len <= 64); - // ext_bytes[0..ext_len].copy_from_slice(ext_hdrs); - - // let pay_len = tcp.hdr_len() + ext_len; - // let ip6 = Ipv6Meta { - // src: SRC_IP6, - // dst: DST_IP6, - // proto: Protocol::TCP, - // next_hdr, - // hop_limit: 255, - // pay_len: pay_len as u16, - // ext: Some(ext_bytes), - // ext_len, - // }; - // let eth = EtherMeta { - // ether_type: EtherType::Ipv6, - // src: SRC_MAC, - // dst: DST_MAC, - // }; - - // let mut seg = PacketSeg::alloc(1024); - // seg.expand_end(14 + ipv6_header_size + tcp.hdr_len()).unwrap(); - // let mut wtr = seg.get_writer(); - // eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - // ip6.emit(wtr.slice_mut(ip6.hdr_len()).unwrap()); - // tcp.emit(wtr.slice_mut(tcp.hdr_len()).unwrap()); - // let parsed = - // Packet::new(seg).parse(Out, GenericUlp {}).unwrap(); - - // // Assert that the computed offsets of the headers and payloads - // // are accurate - // let offsets = &parsed.state.hdr_offsets; - // let ip = offsets - // .inner - // .ip - // .as_ref() - // .expect("Expected IP header offsets"); - // assert_eq!( - // ip.seg_idx, 0, - // "Expected IP headers to be in segment 0" - // ); - // assert_eq!( - // ip.seg_pos, - // EtherHdr::SIZE, - // "Expected the IP header to start immediately \ - // after the Ethernet header" - // ); - // assert_eq!( - // ip.pkt_pos, - // EtherHdr::SIZE, - // "Expected the IP header to start immediately \ - // after the Ethernet header" - // ); - // let ulp = &offsets - // .inner - // .ulp - // .as_ref() - // .expect("Expected ULP header offsets"); - // assert_eq!( - // ulp.seg_idx, 0, - // "Expected the ULP header to be in segment 0" - // ); - // assert_eq!( - // ulp.seg_pos, - // EtherHdr::SIZE + ipv6_header_size, - // "Expected the ULP header to start immediately \ - // after the IP header", - // ); - // assert_eq!( - // ulp.pkt_pos, - // EtherHdr::SIZE + ipv6_header_size, - // "Expected the ULP header to start immediately \ - // after the IP header", - // ); - // } - // } - // } + let mut st1 = MsgBlk::copy(&base[..42]); + let st2 = MsgBlk::copy(&base[42..]); - #[test] - fn seg_writer() { - let mut seg = PacketSeg::alloc(18); - seg.expand_end(18).unwrap(); + st1.append(st2); - // Verify that an offset past the end results in error. - assert!(matches!( - PacketSegWriter::new(&mut seg, 20, 20), - Err(ModifierCreateError::StartOutOfRange), - )); + assert_eq!(st1.seg_len(), 2); + assert_eq!(st1.byte_len(), base.len()); - // Verify that a length past the end results in error. assert!(matches!( - PacketSegWriter::new(&mut seg, 0, 20), - Err(ModifierCreateError::EndOutOfRange), + Packet::new(st1.iter_mut()).parse_outbound(GenericUlp {}), + Err(ParseError::IngotError(_)) )); - - // Writer for entire segment. - let wtr = PacketSegWriter::new(&mut seg, 0, 18).unwrap(); - assert_eq!(wtr.pos, 0); - assert_eq!(wtr.avail, 18); - - // Writer for last 4 bytes of segment. - let wtr = PacketSegWriter::new(&mut seg, 14, 4).unwrap(); - assert_eq!(wtr.pos, 0); - assert_eq!(wtr.avail, 4); } + // Verify that we correctly parse an IPv6 packet with extension headers #[test] - fn expand_and_shrink() { - let mut seg = PacketSeg::alloc(18); - assert_eq!(seg.len(), 0); - seg.expand_end(18).unwrap(); - assert_eq!(seg.len(), 18); - seg.shrink_start(4).unwrap(); - assert_eq!(seg.len(), 14); - seg.expand_start(4).unwrap(); - assert_eq!(seg.len(), 18); - assert!(seg.expand_end(20).is_err()); - assert!(seg.shrink_start(20).is_err()); - assert!(seg.expand_start(4).is_err()); + fn parse_ipv6_extension_headers_ok() { + use crate::engine::ip::v6::test::generate_test_packet; + use crate::engine::ip::v6::test::SUPPORTED_EXTENSIONS; + use itertools::Itertools; + use smoltcp::wire::IpProtocol; + for n_extensions in 0..SUPPORTED_EXTENSIONS.len() { + for extensions in + SUPPORTED_EXTENSIONS.into_iter().permutations(n_extensions) + { + // Generate a full IPv6 test packet, but pull out the extension + // headers as a byte array. + let (buf, ipv6_header_size) = + generate_test_packet(extensions.as_slice()); + + let next_hdr = + *(extensions.first().unwrap_or(&IpProtocol::Tcp)); + let ext_hdrs = &buf[Ipv6::MINIMUM_LENGTH..ipv6_header_size]; + + // Append a TCP header + let tcp = Tcp { + source: 3839, + destination: 80, + sequence: 4224936861, + ..Default::default() + }; + + let pay_len = tcp.packet_length() + ext_hdrs.len(); + let ip6 = Ipv6 { + source: SRC_IP6, + destination: DST_IP6, + next_header: IpProtocol(u8::from(next_hdr)), + hop_limit: 255, + payload_len: pay_len as u16, + + // Manually append extension hdrs rather than including + // here -- either way will test ingot's parsing logic. + ..Default::default() + }; + let eth = Ethernet { + destination: DST_MAC, + source: SRC_MAC, + ethertype: Ethertype::IPV6, + }; + + let mut pkt = + MsgBlk::new_ethernet_pkt((eth, ip6, ext_hdrs, tcp)); + let pkt = Packet::new(pkt.iter_mut()) + .parse_outbound(GenericUlp {}) + .unwrap() + .to_full_meta(); + + // Assert that the packet parses back out, and we can reach + // the TCP meta no matter which permutation of EHs we have. + assert_eq!( + pkt.meta().inner_ip6().unwrap().v6ext_ref().packet_length(), + ipv6_header_size - Ipv6::MINIMUM_LENGTH + ); + let tcp_meta = pkt.meta().inner_tcp().unwrap(); + assert_eq!(tcp_meta.source(), 3839); + assert_eq!(tcp_meta.destination(), 80); + assert_eq!(tcp_meta.sequence(), 4224936861); + } + } } #[test] - fn prefix_len() { - let mut seg = PacketSeg::alloc(18); - assert_eq!(seg.prefix_len(), 0); - seg.expand_end(18).unwrap(); - assert_eq!(seg.prefix_len(), 0); - seg.shrink_start(4).unwrap(); - assert_eq!(seg.prefix_len(), 4); - seg.expand_start(4).unwrap(); - assert_eq!(seg.prefix_len(), 0); + fn small_packet_with_padding() { + const MINIMUM_ETH_FRAME_SZ: usize = 64; + const FRAME_CHECK_SEQ_SZ: usize = 4; + + // Start with a test packet that's smaller than the minimum + // ethernet frame size (64). + let body = []; + let mut pkt = tcp_pkt(&body); + assert!(pkt.len() < MINIMUM_ETH_FRAME_SZ); + + // Many (most?) NICs will pad out any such frames so that + // the total size is 64. + let padding_len = MINIMUM_ETH_FRAME_SZ + - pkt.len() + // Discount the 4 bytes for the Frame Check Sequence (FCS) + // which is usually not visible to upstack software. + - FRAME_CHECK_SEQ_SZ; + + // Tack on a new segment filled with zero to pad the packet so that + // it meets the minimum frame size. + // Note that we do NOT update any of the packet headers themselves + // as this padding process should be transparent to the upper + // layers. + let mut padding_seg = MsgBlk::new(padding_len); + padding_seg.resize(padding_len).unwrap(); + + pkt.append(padding_seg); + assert_eq!(pkt.byte_len(), MINIMUM_ETH_FRAME_SZ - FRAME_CHECK_SEQ_SZ); + + // Generate the metadata by parsing the packet + let mut parsed = Packet::new(pkt.iter_mut()) + .parse_inbound(GenericUlp {}) + .unwrap() + .to_full_meta(); + + // Grab parsed metadata + let ip4_meta = parsed.meta().inner_ip4().unwrap(); + let tcp_meta = parsed.meta().inner_tcp().unwrap(); + + // Length in packet headers shouldn't reflect include padding + // This should not fail even though there are more bytes in + // the initialised area ofthe mblk chain than the packet expects. + assert_eq!( + usize::from(ip4_meta.total_len()), + (ip4_meta, tcp_meta, &body[..]).packet_length(), + ); } - // Verify that we do not panic when we get long chains of mblks linked by - // `b_cont`. This is a regression test for - // https://github.com/oxidecomputer/opte/issues/335 + // TODO(kyle): equivalent for MsgBlk #[test] - fn test_long_packet_continuation() { - const N_SEGMENTS: usize = 8; - let mut blocks: Vec<*mut mblk_t> = Vec::with_capacity(N_SEGMENTS); - for i in 0..N_SEGMENTS { - let mp = allocb(32); - - // Link previous block to this one. - if i > 0 { - let prev = blocks[i - 1]; - unsafe { - (*prev).b_cont = mp; - } - } - blocks.push(mp); - } - - // Wrap the first mblk in a Packet, and check that we still have a - // reference to everything. - let packet = unsafe { Packet::wrap_mblk(blocks[0]) } - .expect("Failed to wrap mblk chain with many segments"); + fn udp6_packet_with_padding() { + let body = [1, 2, 3, 4]; + let udp = Udp { + source: 124, + destination: 5673, + length: u16::try_from(Udp::MINIMUM_LENGTH + body.len()).unwrap(), + ..Default::default() + }; + let ip6 = Ipv6 { + source: SRC_IP6, + destination: DST_IP6, + next_header: IpProtocol::UDP, + hop_limit: 255, + payload_len: (&udp, &body[..]).packet_length() as u16, - assert_eq!(packet.segs.len(), N_SEGMENTS); - assert_eq!(packet.segs.len(), blocks.len()); - for (seg, mblk) in packet.segs.iter().zip(blocks) { - assert_eq!(seg.mp, mblk); - } - } + ..Default::default() + }; + let eth = Ethernet { + destination: DST_MAC, + source: SRC_MAC, + ethertype: Ethertype::IPV6, + }; - // TODO(kyle): equivalent for MsgBlk - // #[test] - // fn small_packet_with_padding() { - // const MINIMUM_ETH_FRAME_SZ: usize = 64; - // const FRAME_CHECK_SEQ_SZ: usize = 4; - - // // Start with a test packet that's smaller than the minimum - // // ethernet frame size (64). - // let body = []; - // let mut pkt = tcp_pkt(&body); - // assert!(pkt.len() < MINIMUM_ETH_FRAME_SZ); - - // // Many (most?) NICs will pad out any such frames so that - // // the total size is 64. - // let padding_len = MINIMUM_ETH_FRAME_SZ - // - pkt.len() - // // Discount the 4 bytes for the Frame Check Sequence (FCS) - // // which is usually not visible to upstack software. - // - FRAME_CHECK_SEQ_SZ; - - // // Tack on a new segment filled with zero to pad the packet so that - // // it meets the minimum frame size. - // // Note that we do NOT update any of the packet headers themselves - // // as this padding process should be transparent to the upper - // // layers. - // let mut padding_seg_wtr = pkt.add_seg(padding_len).unwrap(); - // padding_seg_wtr.write(&vec![0; padding_len]).unwrap(); - // assert_eq!(pkt.len(), MINIMUM_ETH_FRAME_SZ - FRAME_CHECK_SEQ_SZ); - - // // Generate the metadata by parsing the packet - // let mut pkt = pkt.parse(Direction::In, GenericUlp {}).unwrap(); - - // // Grab parsed metadata - // let ip4_meta = pkt.meta().inner_ip4().cloned().unwrap(); - // let tcp_meta = pkt.meta().inner_tcp().cloned().unwrap(); - - // // Length in packet headers shouldn't reflect include padding - // assert_eq!( - // usize::from(ip4_meta.total_len), - // ip4_meta.hdr_len() + tcp_meta.hdr_len() + body.len(), - // ); - - // // The computed body length also shouldn't include the padding - // assert_eq!(pkt.state.body.len, body.len()); - - // // Pretend some processing happened... - // // And now we need to update the packet headers based on the - // // modified packet metadata. - // pkt.emit_new_headers().unwrap(); - - // // Grab the actual packet headers - // let ip4_off = pkt.hdr_offsets().inner.ip.unwrap().pkt_pos; - // let mut rdr = pkt.get_rdr_mut(); - // rdr.seek(ip4_off).unwrap(); - // let ip4_hdr = Ipv4Hdr::parse(&mut rdr).unwrap(); - // let tcp_hdr = TcpHdr::parse(&mut rdr).unwrap(); - - // // And make sure they don't include the padding bytes - // assert_eq!( - // usize::from(ip4_hdr.total_len()), - // usize::from(ip4_hdr.hdr_len()) + tcp_hdr.hdr_len() + body.len() - // ); - // } + let pkt_sz = eth.packet_length() + + ip6.packet_length() + + usize::from(ip6.payload_len); + let mut pkt = MsgBlk::new_ethernet_pkt((eth, ip6, udp, &body[..])); + assert_eq!(pkt.len(), pkt_sz); - // TODO(kyle): equivalent for MsgBlk - // #[test] - // fn udp6_packet_with_padding() { - // let body = [1, 2, 3, 4]; - // let udp = UdpMeta { - // src: 124, - // dst: 5673, - // len: u16::try_from(UdpHdr::SIZE + body.len()).unwrap(), - // ..Default::default() - // }; - // let ip6 = Ipv6Meta { - // src: SRC_IP6, - // dst: DST_IP6, - // proto: Protocol::UDP, - // next_hdr: smoltcp::wire::IpProtocol::Udp, - // hop_limit: 255, - // pay_len: udp.len, - // ext: None, - // ext_len: 0, - // }; - // let eth = EtherMeta { - // ether_type: EtherType::Ipv6, - // src: SRC_MAC, - // dst: DST_MAC, - // }; - - // let pkt_sz = eth.hdr_len() + ip6.hdr_len() + usize::from(ip6.pay_len); - // let mut pkt = Packet::alloc_and_expand(pkt_sz); - // let mut wtr = pkt.seg0_wtr(); - // eth.emit(wtr.slice_mut(eth.hdr_len()).unwrap()); - // ip6.emit(wtr.slice_mut(ip6.hdr_len()).unwrap()); - // udp.emit(wtr.slice_mut(udp.hdr_len()).unwrap()); - // wtr.write(&body).unwrap(); - // assert_eq!(pkt.len(), pkt_sz); - - // // Tack on a new segment filled zero padding at - // // the end that's not part of the payload as indicated - // // by the packet headers. - // let padding_len = 8; - // let mut padding_seg_wtr = pkt.add_seg(padding_len).unwrap(); - // padding_seg_wtr.write(&vec![0; padding_len]).unwrap(); - // assert_eq!(pkt.len(), pkt_sz + padding_len); - - // // Generate the metadata by parsing the packet - // let mut pkt = pkt.parse(Direction::In, GenericUlp {}).unwrap(); - - // // Grab parsed metadata - // let ip6_meta = pkt.meta().inner_ip6().cloned().unwrap(); - // let udp_meta = pkt.meta().inner_udp().cloned().unwrap(); - - // // Length in packet headers shouldn't reflect include padding - // assert_eq!( - // usize::from(ip6_meta.pay_len), - // udp_meta.hdr_len() + body.len(), - // ); - - // // The computed body length also shouldn't include the padding - // assert_eq!(pkt.state.body.len, body.len()); - - // // Pretend some processing happened... - // // And now we need to update the packet headers based on the - // // modified packet metadata. - // pkt.emit_new_headers().unwrap(); - - // // Grab the actual packet headers - // let ip6_off = pkt.hdr_offsets().inner.ip.unwrap().pkt_pos; - // let mut rdr = pkt.get_rdr_mut(); - // rdr.seek(ip6_off).unwrap(); - // let ip6_hdr = Ipv6Hdr::parse(&mut rdr).unwrap(); - // let udp_hdr = UdpHdr::parse(&mut rdr).unwrap(); - - // // And make sure they don't include the padding bytes - // assert_eq!(ip6_hdr.pay_len(), udp_hdr.hdr_len() + body.len()); - // } + // Tack on a new segment filled zero padding at + // the end that's not part of the payload as indicated + // by the packet headers. + let padding_len = 8; + let mut padding_seg = MsgBlk::new(padding_len); + padding_seg.resize(padding_len).unwrap(); + pkt.append(padding_seg); + assert_eq!(pkt.byte_len(), pkt_sz + padding_len); + + // Generate the metadata by parsing the packet. + // This should not fail even though there are more bytes in + // the initialised area ofthe mblk chain than the packet expects. + let mut pkt = Packet::new(pkt.iter_mut()) + .parse_inbound(GenericUlp {}) + .unwrap() + .to_full_meta(); + + // Grab parsed metadata + let ip6_meta = pkt.meta().inner_ip6().unwrap(); + let udp_meta = pkt.meta().inner_udp().unwrap(); + + // Length in packet headers shouldn't reflect include padding + assert_eq!( + usize::from(ip6_meta.payload_len()), + udp_meta.packet_length() + body.len(), + ); + } } diff --git a/lib/opte/src/engine/parse.rs b/lib/opte/src/engine/parse.rs index 16286f5a..44b5b571 100644 --- a/lib/opte/src/engine/parse.rs +++ b/lib/opte/src/engine/parse.rs @@ -385,8 +385,12 @@ fn validate_udp( pkt: &ValidUdp, bytes_after: usize, ) -> Result<(), ParseError> { + // Packets can have arbitrary zero-padding at the end so + // our length *could* be larger than the packet reports. + // Unlikely in practice as Encap headers push us past the 64B + // minimum packet size. let wanted_len = bytes_after + pkt.packet_length(); - if pkt.length() as usize == wanted_len { + if pkt.length() as usize <= wanted_len { Ok(()) } else { Err(ParseError::BadLength(MismatchError { @@ -757,3 +761,38 @@ impl ValidUlp { } } } + +#[cfg(test)] +mod test { + use crate::engine::checksum::Checksum as OpteCsum; + use ingot::types::ParseChoice; + use smoltcp::phy::ChecksumCapabilities; + use smoltcp::wire::Icmpv4Packet; + use smoltcp::wire::Icmpv4Repr; + + use super::*; + + #[test] + fn icmp4_body_csum_equals_body() { + let data = b"reunion\0"; + let mut body_csum = OpteCsum::default(); + body_csum.add_bytes(data); + + let mut cksum_cfg = ChecksumCapabilities::ignored(); + cksum_cfg.icmpv4 = smoltcp::phy::Checksum::Both; + + let test_pkt = Icmpv4Repr::EchoRequest { ident: 7, seq_no: 7777, data }; + let mut out = vec![0u8; test_pkt.buffer_len()]; + let mut packet = Icmpv4Packet::new_unchecked(&mut out); + test_pkt.emit(&mut packet, &cksum_cfg); + + let src = &mut out[..IcmpV4::MINIMUM_LENGTH]; + let (ulp, ..) = + ValidUlp::parse_choice(src, Some(IpProtocol::ICMP)).unwrap(); + + assert_eq!( + Some(body_csum.finalize()), + csum_minus_hdr(&ulp).map(|mut v| v.finalize()), + ); + } +} diff --git a/lib/opte/src/engine/rule.rs b/lib/opte/src/engine/rule.rs index f03d3e3e..fb5a6784 100644 --- a/lib/opte/src/engine/rule.rs +++ b/lib/opte/src/engine/rule.rs @@ -350,7 +350,7 @@ impl CompiledEncap { return pkt; }; - let mut prepend = if pkt.headroom() < bytes.len() { + let mut prepend = if pkt.head_capacity() < bytes.len() { let mut pkt = MsgBlk::new_ethernet(bytes.len()); pkt.pop_all(); Some(pkt) diff --git a/lib/opte/src/engine/snat.rs b/lib/opte/src/engine/snat.rs index f89f5c8a..9795d043 100644 --- a/lib/opte/src/engine/snat.rs +++ b/lib/opte/src/engine/snat.rs @@ -469,11 +469,11 @@ mod test { use ingot::tcp::TcpRef; use ingot::types::HeaderLen; + use crate::ddi::mblk::MsgBlk; use crate::engine::ether::Ethernet; use crate::engine::ether::EthernetRef; use crate::engine::ip::v4::Ipv4; use crate::engine::ip::v4::Ipv4Ref; - use crate::ddi::mblk::MsgBlk; use super::*; diff --git a/lib/opte/src/engine/tcp.rs b/lib/opte/src/engine/tcp.rs index 81496ab5..5517002f 100644 --- a/lib/opte/src/engine/tcp.rs +++ b/lib/opte/src/engine/tcp.rs @@ -93,263 +93,3 @@ pub struct TcpMod { src: Option, dst: Option, } - -#[cfg(test)] -mod test { - use super::*; - use crate::engine::packet::Packet; - - #[test] - fn emit_no_opts() { - let tcp = TcpMeta { - src: 49154, - dst: 80, - seq: 2511121667, - ack: 754208397, - flags: TcpFlags::ACK, - window_size: 64436, - options_bytes: None, - options_len: 0, - csum: [0; 2], - }; - - let len = tcp.hdr_len(); - let mut pkt = Packet::alloc_and_expand(len); - let mut wtr = pkt.seg0_wtr(); - tcp.emit(wtr.slice_mut(tcp.hdr_len()).unwrap()); - assert_eq!(len, pkt.len()); - #[rustfmt::skip] - let expected_bytes = vec![ - // source - 0xC0, 0x02, - // dest - 0x00, 0x50, - // seq - 0x95, 0xAC, 0xAD, 0x03, - // ack - 0x2C, 0xF4, 0x4E, 0x8D, - // offset + flags - 0x50, 0x10, - // window - 0xFB, 0xB4, - // checksum - 0x00, 0x00, - // URG pointer - 0x00, 0x00, - ]; - assert_eq!(&expected_bytes, pkt.seg_bytes(0)); - } - - #[test] - fn emit_opts() { - let mut opts = [0x00; TcpHdr::MAX_OPTION_SIZE]; - let bytes = [ - 0x02, 0x04, 0x05, 0xB4, 0x04, 0x02, 0x08, 0x0A, 0x09, 0xB4, 0x2A, - 0xA9, 0x00, 0x00, 0x00, 0x00, 0x01, 0x03, 0x03, 0x01, - ]; - opts[0..bytes.len()].copy_from_slice(&bytes); - - let tcp = TcpMeta { - src: 49154, - dst: 80, - seq: 2511121590, - ack: 0, - flags: TcpFlags::SYN, - window_size: 64240, - options_bytes: Some(opts), - options_len: bytes.len(), - csum: [0; 2], - }; - - let len = tcp.hdr_len(); - assert_eq!(40, len); - let mut pkt = Packet::alloc_and_expand(len); - let mut wtr = pkt.seg0_wtr(); - tcp.emit(wtr.slice_mut(tcp.hdr_len()).unwrap()); - assert_eq!(len, pkt.len()); - - #[rustfmt::skip] - let expected_bytes = vec![ - // source - 0xC0, 0x02, - // dest - 0x00, 0x50, - // seq - 0x95, 0xAC, 0xAC, 0xB6, - // ack - 0x00, 0x00, 0x00, 0x00, - // offset + flags - 0xA0, 0x02, - // window - 0xFA, 0xF0, - // checksum - 0x00, 0x00, - // URG pointer - 0x00, 0x00, - // MSS - 0x02, 0x04, 0x05, 0xB4, - // SACK permitted - 0x04, 0x02, - // Timestamps - 0x08, 0x0A, 0x09, 0xB4, 0x2A, 0xA9, 0x00, 0x00, 0x00, 0x00, - // No-op - 0x01, - // Window Scale - 0x03, 0x03, 0x01, - - ]; - assert_eq!(&expected_bytes, pkt.seg_bytes(0)); - } - - #[test] - fn parse_no_opts() { - let hdr_len = TcpHdr::BASE_SIZE; - #[rustfmt::skip] - let base_bytes = vec![ - // source - 0xC0, 0x02, - // dest - 0x00, 0x50, - // seq - 0x95, 0xAC, 0xAC, 0xB6, - // ack - 0x00, 0x00, 0x00, 0x00, - // offset - ((hdr_len / 4) as u8) << TCP_HDR_OFFSET_SHIFT, - // flags - 0x02, - // window - 0xFA, 0xF0, - // checksum - 0x00, 0x00, - // URG pointer - 0x00, 0x00, - ]; - assert_eq!(base_bytes.len(), TcpHdr::BASE_SIZE); - - let mut pkt = Packet::copy(&base_bytes); - let mut rdr = pkt.get_rdr_mut(); - let tcp_hdr = TcpHdr::parse(&mut rdr).unwrap(); - - assert_eq!(tcp_hdr.base_bytes(), &base_bytes); - assert_eq!(tcp_hdr.options_bytes(), None); - } - - #[test] - fn parse_max_opts() { - #[rustfmt::skip] - let option_bytes = [ - // MSS - 0x02, 0x04, 0x05, 0xB4, - // SACK permitted - 0x04, 0x02, - // Timestamps - 0x08, 0x0A, 0x09, 0xB4, 0x2A, 0xA9, 0x00, 0x00, 0x00, 0x00, - // No-op - 0x01, - // Window Scale - 0x03, 0x03, 0x01, - // No-ops - 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, - ]; - - let hdr_len = TcpHdr::BASE_SIZE + option_bytes.len(); - #[rustfmt::skip] - let base_bytes = [ - // source - 0xC0, 0x02, - // dest - 0x00, 0x50, - // seq - 0x95, 0xAC, 0xAC, 0xB6, - // ack - 0x00, 0x00, 0x00, 0x00, - // offset - ((hdr_len / 4) as u8) << TCP_HDR_OFFSET_SHIFT, - // flags - 0x02, - // window - 0xFA, 0xF0, - // checksum - 0x00, 0x00, - // URG pointer - 0x00, 0x00, - ]; - assert_eq!(base_bytes.len(), TcpHdr::BASE_SIZE); - - let pkt_bytes = base_bytes - .iter() - .copied() - .chain(option_bytes.iter().copied()) - .collect::>(); - - let mut pkt = Packet::copy(&pkt_bytes); - let mut rdr = pkt.get_rdr_mut(); - let tcp_hdr = TcpHdr::parse(&mut rdr).unwrap(); - - assert_eq!(tcp_hdr.base_bytes(), &base_bytes); - assert_eq!(tcp_hdr.options_bytes(), Some(&option_bytes[..])); - } - - #[test] - fn parse_opts_truncated() { - #[rustfmt::skip] - let option_bytes = [ - // MSS - 0x02, 0x04, 0x05, 0xB4, - // SACK permitted - 0x04, 0x02, - // Timestamps - 0x08, 0x0A, 0x09, 0xB4, 0x2A, 0xA9, 0x00, 0x00, 0x00, 0x00, - // No-op - 0x01, - // Window Scale - 0x03, 0x03, 0x01, - ]; - - let hdr_len = TcpHdr::BASE_SIZE - + option_bytes.len() - // Indicate there's an extra 32-bit word of options - + 4; - - #[rustfmt::skip] - let base_bytes = [ - // source - 0xC0, 0x02, - // dest - 0x00, 0x50, - // seq - 0x95, 0xAC, 0xAC, 0xB6, - // ack - 0x00, 0x00, 0x00, 0x00, - // offset - ((hdr_len / 4) as u8) << TCP_HDR_OFFSET_SHIFT, - // flags - 0x02, - // window - 0xFA, 0xF0, - // checksum - 0x00, 0x00, - // URG pointer - 0x00, 0x00, - ]; - assert_eq!(base_bytes.len(), TcpHdr::BASE_SIZE); - - let pkt_bytes = base_bytes - .iter() - .copied() - .chain(option_bytes.iter().copied()) - .collect::>(); - - let mut pkt = Packet::copy(&pkt_bytes); - let mut rdr = pkt.get_rdr_mut(); - let tcp_hdr_err = TcpHdr::parse(&mut rdr) - .expect_err("expected to fail parsing malformed TCP header"); - - assert_eq!( - tcp_hdr_err, - TcpHdrError::TruncatedOptions(ReadErr::NotEnoughBytes) - ); - } -} diff --git a/lib/opte/src/engine/udp.rs b/lib/opte/src/engine/udp.rs index 615bef45..3fac26ab 100644 --- a/lib/opte/src/engine/udp.rs +++ b/lib/opte/src/engine/udp.rs @@ -31,28 +31,3 @@ pub struct UdpMod { src: Option, dst: Option, } - -#[cfg(test)] -mod test { - use super::*; - use crate::engine::packet::Packet; - - #[test] - fn emit() { - let udp = UdpMeta { src: 5353, dst: 5353, len: 142, csum: [0; 2] }; - let len = udp.hdr_len(); - let mut pkt = Packet::alloc_and_expand(len); - let mut wtr = pkt.seg0_wtr(); - udp.emit(wtr.slice_mut(udp.hdr_len()).unwrap()); - assert_eq!(len, pkt.len()); - - #[rustfmt::skip] - let expected_bytes = [ - // source port + dest port - 0x14, 0xE9, 0x14, 0xE9, - // length + checksum - 0x00, 0x8E, 0x00, 0x00, - ]; - assert_eq!(&expected_bytes, pkt.seg_bytes(0)); - } -} diff --git a/rust-toolchain.toml b/rust-toolchain.toml index bbf217f2..5f3ff177 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,3 +1,3 @@ [toolchain] -channel = "1.81.0" +channel = "1.82.0" profile = "default" diff --git a/xde/src/mac/mod.rs b/xde/src/mac/mod.rs index 568e9792..28dcbd2d 100644 --- a/xde/src/mac/mod.rs +++ b/xde/src/mac/mod.rs @@ -20,8 +20,8 @@ use core::ffi::CStr; use core::fmt; use core::ptr; use illumos_sys_hdrs::*; -use opte::engine::ether::EtherAddr; use opte::ddi::mblk::MsgBlk; +use opte::engine::ether::EtherAddr; pub use sys::*; /// Errors while opening a MAC handle. diff --git a/xde/src/xde.rs b/xde/src/xde.rs index 0731bf11..cd0e1dc1 100644 --- a/xde/src/xde.rs +++ b/xde/src/xde.rs @@ -52,6 +52,8 @@ use opte::api::OpteError; use opte::api::SetXdeUnderlayReq; use opte::api::XDE_IOC_OPTE_CMD; use opte::d_error::LabelBlock; +use opte::ddi::mblk::MsgBlk; +use opte::ddi::mblk::MsgBlkChain; use opte::ddi::sync::KMutex; use opte::ddi::sync::KMutexType; use opte::ddi::sync::KRwLock; @@ -62,12 +64,10 @@ use opte::ddi::time::Periodic; use opte::engine::ether::EthernetRef; use opte::engine::geneve::Vni; use opte::engine::headers::IpAddr; -use opte::ddi::mblk::MsgBlk; use opte::engine::ingot_packet::Packet; use opte::engine::ioctl::{self as api}; use opte::engine::ip::v6::Ipv6Addr; use opte::engine::packet::InnerFlowId; -use opte::ddi::mblk::MsgBlkChain; use opte::engine::packet::ParseError; use opte::engine::port::Port; use opte::engine::port::PortBuilder; From 5294b9944f865e8465a2bc15be5bcf7a5468cded Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Mon, 28 Oct 2024 12:21:03 +0000 Subject: [PATCH 068/115] One more test to reintroduce. --- lib/opte/src/ddi/mblk.rs | 2 +- lib/opte/src/engine/geneve.rs | 8 --- lib/opte/src/engine/ip/v4.rs | 15 ++++- lib/opte/src/engine/ip/v6.rs | 4 +- lib/opte/src/engine/packet.rs | 5 +- lib/oxide-vpc/tests/integration_tests.rs | 79 ++++++++++++------------ 6 files changed, 59 insertions(+), 54 deletions(-) diff --git a/lib/opte/src/ddi/mblk.rs b/lib/opte/src/ddi/mblk.rs index 739f6e22..0f4354e6 100644 --- a/lib/opte/src/ddi/mblk.rs +++ b/lib/opte/src/ddi/mblk.rs @@ -795,7 +795,7 @@ impl Drop for MsgBlk { #[cfg(test)] mod test { - use ingot::types::PacketParseError; + use ingot::types::ParseError as IngotParseError; use crate::engine::ingot_packet::Packet; diff --git a/lib/opte/src/engine/geneve.rs b/lib/opte/src/engine/geneve.rs index f3d0f119..5dd5d004 100644 --- a/lib/opte/src/engine/geneve.rs +++ b/lib/opte/src/engine/geneve.rs @@ -315,17 +315,9 @@ pub fn geneve_opt_is_oxide_external( #[cfg(test)] mod test { use super::*; - use crate::ddi::mblk::MsgBlk; use crate::engine::headers::EncapMeta; - use crate::engine::ingot_packet::Packet; - use crate::engine::parse::ValidGeneveOverV6; - use ingot::ethernet::Ethernet; - use ingot::ethernet::Ethertype; - use ingot::ip::IpProtocol; - use ingot::ip::Ipv6; use ingot::types::Emit; use ingot::types::HeaderParse; - use ingot::udp::UdpRef; use ingot::udp::ValidUdp; #[test] diff --git a/lib/opte/src/engine/ip/v4.rs b/lib/opte/src/engine/ip/v4.rs index 77109959..bff6f911 100644 --- a/lib/opte/src/engine/ip/v4.rs +++ b/lib/opte/src/engine/ip/v4.rs @@ -125,6 +125,19 @@ impl ValidIpv4 { })); } + // Packets can have arbitrary zero-padding at the end so + // our length *could* be larger than the packet reports. + // Unlikely in practice as Encap headers push us past the 64B + // minimum packet size. + let expt_internal_len = (self.ihl() as usize) << 2; + if (self.total_len() as usize) < expt_internal_len { + return Err(ParseError::BadLength(MismatchError { + location: c"Ipv4.total_len(min)", + expected: expt_internal_len as u64, + actual: self.total_len() as u64, + })); + } + // Packets can have arbitrary zero-padding at the end so // our length *could* be larger than the packet reports. // Unlikely in practice as Encap headers push us past the 64B @@ -185,7 +198,7 @@ pub struct Ipv4Mod { #[cfg(test)] mod test { use super::*; - use ingot::tcp::TcpFlags; + use ingot::types::HeaderLen; pub const DEF_ROUTE: &str = "0.0.0.0/0"; diff --git a/lib/opte/src/engine/ip/v6.rs b/lib/opte/src/engine/ip/v6.rs index aae031cd..cef00f3f 100644 --- a/lib/opte/src/engine/ip/v6.rs +++ b/lib/opte/src/engine/ip/v6.rs @@ -131,8 +131,6 @@ pub struct Ipv6Mod { #[cfg(test)] pub(crate) mod test { use super::*; - use crate::ddi::mblk::MsgBlk; - use crate::engine::ingot_packet::Packet; use ingot::ip::IpProtocol as IngotIpProtocol; use ingot::types::Accessor; use ingot::types::Emit; @@ -486,7 +484,7 @@ pub(crate) mod test { assert!(ValidIpv6::parse(&buf[..]).is_err()); // We can construct this manually via ingot... - let (v6, rem) = Accessor::read_from_prefix(&buf[..]).unwrap(); + let (v6, _rem) = Accessor::read_from_prefix(&buf[..]).unwrap(); let ip = ValidIpv6(v6, Header::Repr(Default::default())); assert!(ip.validate(120).is_err()); } diff --git a/lib/opte/src/engine/packet.rs b/lib/opte/src/engine/packet.rs index e95523d0..ee0b2a32 100644 --- a/lib/opte/src/engine/packet.rs +++ b/lib/opte/src/engine/packet.rs @@ -460,7 +460,6 @@ mod test { use crate::ddi::mblk::MsgBlk; use crate::engine::ether::Ethernet; use crate::engine::ether::EthernetRef; - use crate::engine::ingot_packet::OpteMeta; use crate::engine::ingot_packet::Packet; use crate::engine::ip::v4::Ipv4; use crate::engine::ip::v4::Ipv4Ref; @@ -722,7 +721,7 @@ mod test { assert_eq!(pkt.byte_len(), MINIMUM_ETH_FRAME_SZ - FRAME_CHECK_SEQ_SZ); // Generate the metadata by parsing the packet - let mut parsed = Packet::new(pkt.iter_mut()) + let parsed = Packet::new(pkt.iter_mut()) .parse_inbound(GenericUlp {}) .unwrap() .to_full_meta(); @@ -783,7 +782,7 @@ mod test { // Generate the metadata by parsing the packet. // This should not fail even though there are more bytes in // the initialised area ofthe mblk chain than the packet expects. - let mut pkt = Packet::new(pkt.iter_mut()) + let pkt = Packet::new(pkt.iter_mut()) .parse_inbound(GenericUlp {}) .unwrap() .to_full_meta(); diff --git a/lib/oxide-vpc/tests/integration_tests.rs b/lib/oxide-vpc/tests/integration_tests.rs index 0a3e646e..4e4636c4 100644 --- a/lib/oxide-vpc/tests/integration_tests.rs +++ b/lib/oxide-vpc/tests/integration_tests.rs @@ -37,6 +37,7 @@ use opte::engine::ip::v6::Ipv6Ref; use opte::engine::ip::ValidL3; use opte::engine::ip::L3; use opte::engine::packet::InnerFlowId; +use opte::engine::packet::MismatchError; use opte::engine::parse::ValidUlp; use opte::engine::port::ProcessError; use opte::engine::tcp::TcpState; @@ -1822,44 +1823,46 @@ fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { unpack_and_verify_icmp(&mut pkt5_m, &g1_cfg, &new_params, Out, seq_no); } -// TODO(kyle) -// #[test] -// fn bad_ip_len() { -// let cfg = lab_cfg(); - -// let eth = EtherMeta { -// src: cfg.guest_mac, -// dst: MacAddr::BROADCAST, -// ether_type: EtherType::Ipv4, -// }; - -// let ip = Ipv4Meta { -// src: "0.0.0.0".parse().unwrap(), -// dst: Ipv4Addr::LOCAL_BCAST, -// proto: Protocol::UDP, -// ttl: 64, -// ident: 1, -// hdr_len: 20, -// // We write a total legnth of 4 bytes, which is completely -// // bogus for an IP header and should return an error during -// // processing. -// total_len: 4, -// ..Default::default() -// }; - -// let udp = UdpMeta { src: 68, dst: 67, ..Default::default() }; -// let total_len = EtherHdr::SIZE + usize::from(ip.hdr_len) + udp.hdr_len(); -// let mut pkt = Packet::alloc_and_expand(total_len); -// let mut wtr = pkt.seg0_wtr(); -// eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); -// ip.emit(wtr.slice_mut(ip.hdr_len()).unwrap()); -// udp.emit(wtr.slice_mut(udp.hdr_len()).unwrap()); -// let res = pkt.parse(Out, VpcParser::new()); -// assert_eq!( -// res.err().unwrap(), -// Ipv4HdrError::BadTotalLen { total_len: 4 }.into() -// ); -// } +#[test] +fn bad_ip_len() { + let cfg = lab_cfg(); + + let eth = Ethernet { + destination: MacAddr::BROADCAST, + source: cfg.guest_mac, + ethertype: Ethertype::IPV4, + }; + + let ip = Ipv4 { + source: "0.0.0.0".parse().unwrap(), + destination: Ipv4Addr::LOCAL_BCAST, + protocol: IngotIpProto::UDP, + hop_limit: 64, + identification: 1, + ihl: 5, + // We write a total length of 4 bytes, which is completely + // bogus for an IP header and should return an error during + // processing. + total_len: 4, + ..Default::default() + }; + + let udp = Udp { source: 68, destination: 67, ..Default::default() }; + + let mut pkt_m = MsgBlk::new_ethernet_pkt((eth, ip, udp)); + let res = Packet::new(pkt_m.iter_mut()) + .parse_outbound(VpcParser {}) + .err() + .unwrap(); + assert_eq!( + res, + ParseError::BadLength(MismatchError { + location: c"Ipv4.total_len(min)", + expected: 20, + actual: 4 + }) + ); +} // Verify that OPTE generates a hairpin ARP reply when the guest // queries for the gateway. From cec6ebee897b8337ba0d4594dd257ddfe3106863 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Mon, 28 Oct 2024 12:37:55 +0000 Subject: [PATCH 069/115] More Movement of Stuff. --- lib/opte/src/ddi/mblk.rs | 141 ++++++++++++++++++++++++++++++-- lib/opte/src/engine/packet.rs | 150 +--------------------------------- 2 files changed, 134 insertions(+), 157 deletions(-) diff --git a/lib/opte/src/ddi/mblk.rs b/lib/opte/src/ddi/mblk.rs index 0f4354e6..5ad022b3 100644 --- a/lib/opte/src/ddi/mblk.rs +++ b/lib/opte/src/ddi/mblk.rs @@ -5,12 +5,11 @@ // Copyright 2024 Oxide Computer Company use crate::engine::ingot_packet::QueryLen; -use crate::engine::packet::allocb; -#[cfg(any(feature = "std", test))] -use crate::engine::packet::mock_freemsg; use crate::engine::packet::SegAdjustError; use crate::engine::packet::WrapError; use crate::engine::packet::WriteError; +#[cfg(any(feature = "std", test))] +use alloc::boxed::Box; use alloc::vec::Vec; use core::marker::PhantomData; use core::mem::ManuallyDrop; @@ -22,6 +21,9 @@ use core::ptr::NonNull; use core::slice; #[cfg(all(not(feature = "std"), not(test)))] use illumos_sys_hdrs as ddi; +#[cfg(any(feature = "std", test))] +use illumos_sys_hdrs::c_uchar; +use illumos_sys_hdrs::dblk_t; use illumos_sys_hdrs::mblk_t; use illumos_sys_hdrs::uintptr_t; use ingot::types::Emit; @@ -29,6 +31,8 @@ use ingot::types::EmitDoesNotRelyOnBufContents; use ingot::types::ParseError as IngotParseErr; use ingot::types::Read; +pub static MBLK_MAX_SIZE: usize = u16::MAX as usize; + /// The head and tail of an mblk_t list. struct MsgBlkChainInner { head: NonNull, @@ -793,17 +797,136 @@ impl Drop for MsgBlk { } } -#[cfg(test)] -mod test { +/// The common entry into an `allocb(9F)` implementation that works in +/// both std and `no_std` environments. +/// +/// NOTE: We do not emulate the priority argument as it is not +/// relevant to OPTE's implementation. In the case of `no_std`, we +/// always pass a priority value of `0` to `allocb(9F)`. +pub fn allocb(size: usize) -> *mut mblk_t { + assert!(size <= MBLK_MAX_SIZE); + + #[cfg(any(feature = "std", test))] + return mock_allocb(size); + + // Safety: allocb(9F) should be safe for any size equal to or + // less than MBLK_MAX_SIZE. + #[cfg(all(not(feature = "std"), not(test)))] + unsafe { + ddi::allocb(size, 0) + } +} - use ingot::types::ParseError as IngotParseError; +#[cfg(any(feature = "std", test))] +pub fn mock_allocb(size: usize) -> *mut mblk_t { + // If the requested size is 0 we mimic allocb(9F) and allocate 16 + // bytes. See `uts/common/io/stream.c`. + let size = if size == 0 { 16 } else { size }; + let buf = Vec::with_capacity(size); + mock_desballoc(buf) +} + +#[cfg(any(feature = "std", test))] +pub fn mock_desballoc(buf: Vec) -> *mut mblk_t { + let mut buf = std::mem::ManuallyDrop::new(buf); + let ptr = buf.as_mut_ptr(); + let len = buf.len(); + let avail = buf.capacity(); + + // For the purposes of mocking in std the only fields that + // matter here are the ones relating to the data buffer: + // db_base and db_lim. + let dblk = Box::new(dblk_t { + db_frtnp: ptr::null(), + db_base: ptr, + // Safety: We rely on the Vec implementation to give us + // the correct value for avail. + db_lim: unsafe { ptr.add(avail) }, + db_ref: 0, + db_type: 0, + db_flags: 0, + db_struioflag: 0, + db_cpid: 0, + db_cache: ptr::null(), + db_mblk: ptr::null(), + db_free: ptr::null(), + db_lastfree: ptr::null(), + db_cksumstart: 0, + db_cksumend: 0, + db_cksumstuff: 0, + db_struioun: 0, + db_fthdr: ptr::null(), + db_credp: ptr::null(), + }); + + let dbp = Box::into_raw(dblk); + + // For the purposes of mocking in std the only fields that + // matter are b_rptr and b_wptr. However, in the future we + // will probably want to mock segments packets via b_cont and + // packet chains via b_next. + let mblk = Box::new(mblk_t { + b_next: ptr::null_mut(), + b_prev: ptr::null_mut(), + b_cont: ptr::null_mut(), + // Safety: We know dbp is valid because we just created it. + b_rptr: unsafe { (*dbp).db_base as *mut c_uchar }, + b_wptr: unsafe { (*dbp).db_base.add(len) as *mut c_uchar }, + b_datap: dbp, + b_band: 0, + b_tag: 0, + b_flag: 0, + b_queue: ptr::null(), + }); + + let mp = Box::into_raw(mblk); + // Safety: We know dbp is valid because we just created it. + unsafe { (*dbp).db_mblk = mp as *const mblk_t }; + + mp +} +// The std equivalent to `freemsg(9F)`. +#[cfg(any(feature = "std", test))] +pub(crate) fn mock_freemsg(mut mp: *mut mblk_t) { + while !mp.is_null() { + let cont = unsafe { (*mp).b_cont }; + mock_freeb(mp); + mp = cont; + } +} + +// The std equivalent to `freeb(9F)`. +#[cfg(any(feature = "std", test))] +fn mock_freeb(mp: *mut mblk_t) { + // Safety: All of these were created safely in `mock_alloc()`. + // As long as the other methods don't do any of the following, + // this is safe: + // + // * Modify the `mp`/`dblk` pointers. + // * Increase `len` beyond `limit`. + // * Modify `limit`. + unsafe { + let bmblk = Box::from_raw(mp); + let bdblk = Box::from_raw(bmblk.b_datap as *mut dblk_t); + let buffer = Vec::from_raw_parts( + bdblk.db_base, + bmblk.b_wptr.offset_from(bmblk.b_rptr) as usize, + bdblk.db_lim.offset_from(bdblk.db_base) as usize, + ); + drop(buffer); + drop(bdblk); + drop(bmblk); + } +} + +#[cfg(test)] +mod test { + use super::*; use crate::engine::ingot_packet::Packet; - use crate::engine::packet::mock_desballoc; use crate::engine::packet::ParseError; use crate::engine::GenericUlp; - - use super::*; + use ingot::types::ParseError as IngotParseError; #[test] fn zero_byte_packet() { diff --git a/lib/opte/src/engine/packet.rs b/lib/opte/src/engine/packet.rs index ee0b2a32..357976bb 100644 --- a/lib/opte/src/engine/packet.rs +++ b/lib/opte/src/engine/packet.rs @@ -17,35 +17,19 @@ use super::headers::AF_INET6; use super::ip::v4::Ipv4Addr; use super::ip::v4::Protocol; use super::ip::v6::Ipv6Addr; +use super::Direction; use crate::d_error::DError; +use alloc::string::String; use core::ffi::CStr; use core::fmt; use core::fmt::Display; use core::hash::Hash; -use core::ptr; use core::result; use crc32fast::Hasher; use dyn_clone::DynClone; use ingot::types::PacketParseError; use serde::Deserialize; use serde::Serialize; -// TODO should probably move these two into this module now. -use super::Direction; -use alloc::string::String; -use alloc::vec::Vec; -use illumos_sys_hdrs::dblk_t; -use illumos_sys_hdrs::mblk_t; - -cfg_if! { - if #[cfg(all(not(feature = "std"), not(test)))] { - use illumos_sys_hdrs as ddi; - } else { - use std::boxed::Box; - use illumos_sys_hdrs::c_uchar; - } -} - -pub static MBLK_MAX_SIZE: usize = u16::MAX as usize; pub static FLOW_ID_DEFAULT: InnerFlowId = InnerFlowId { proto: 255, @@ -331,129 +315,6 @@ pub enum WriteError { pub type WriteResult = result::Result; -/// The common entry into an `allocb(9F)` implementation that works in -/// both std and `no_std` environments. -/// -/// NOTE: We do not emulate the priority argument as it is not -/// relevant to OPTE's implementation. In the case of `no_std`, we -/// always pass a priority value of `0` to `allocb(9F)`. -pub fn allocb(size: usize) -> *mut mblk_t { - assert!(size <= MBLK_MAX_SIZE); - - #[cfg(any(feature = "std", test))] - return mock_allocb(size); - - // Safety: allocb(9F) should be safe for any size equal to or - // less than MBLK_MAX_SIZE. - #[cfg(all(not(feature = "std"), not(test)))] - unsafe { - ddi::allocb(size, 0) - } -} - -#[cfg(any(feature = "std", test))] -pub fn mock_allocb(size: usize) -> *mut mblk_t { - // If the requested size is 0 we mimic allocb(9F) and allocate 16 - // bytes. See `uts/common/io/stream.c`. - let size = if size == 0 { 16 } else { size }; - let buf = Vec::with_capacity(size); - mock_desballoc(buf) -} - -#[cfg(any(feature = "std", test))] -pub fn mock_desballoc(buf: Vec) -> *mut mblk_t { - let mut buf = std::mem::ManuallyDrop::new(buf); - let ptr = buf.as_mut_ptr(); - let len = buf.len(); - let avail = buf.capacity(); - - // For the purposes of mocking in std the only fields that - // matter here are the ones relating to the data buffer: - // db_base and db_lim. - let dblk = Box::new(dblk_t { - db_frtnp: ptr::null(), - db_base: ptr, - // Safety: We rely on the Vec implementation to give us - // the correct value for avail. - db_lim: unsafe { ptr.add(avail) }, - db_ref: 0, - db_type: 0, - db_flags: 0, - db_struioflag: 0, - db_cpid: 0, - db_cache: ptr::null(), - db_mblk: ptr::null(), - db_free: ptr::null(), - db_lastfree: ptr::null(), - db_cksumstart: 0, - db_cksumend: 0, - db_cksumstuff: 0, - db_struioun: 0, - db_fthdr: ptr::null(), - db_credp: ptr::null(), - }); - - let dbp = Box::into_raw(dblk); - - // For the purposes of mocking in std the only fields that - // matter are b_rptr and b_wptr. However, in the future we - // will probably want to mock segments packets via b_cont and - // packet chains via b_next. - let mblk = Box::new(mblk_t { - b_next: ptr::null_mut(), - b_prev: ptr::null_mut(), - b_cont: ptr::null_mut(), - // Safety: We know dbp is valid because we just created it. - b_rptr: unsafe { (*dbp).db_base as *mut c_uchar }, - b_wptr: unsafe { (*dbp).db_base.add(len) as *mut c_uchar }, - b_datap: dbp, - b_band: 0, - b_tag: 0, - b_flag: 0, - b_queue: ptr::null(), - }); - - let mp = Box::into_raw(mblk); - // Safety: We know dbp is valid because we just created it. - unsafe { (*dbp).db_mblk = mp as *const mblk_t }; - - mp -} - -// The std equivalent to `freemsg(9F)`. -#[cfg(any(feature = "std", test))] -pub(crate) fn mock_freemsg(mut mp: *mut mblk_t) { - while !mp.is_null() { - let cont = unsafe { (*mp).b_cont }; - mock_freeb(mp); - mp = cont; - } -} - -// The std equivalent to `freeb(9F)`. -#[cfg(any(feature = "std", test))] -fn mock_freeb(mp: *mut mblk_t) { - // Safety: All of these were created safely in `mock_alloc()`. - // As long as the other methods don't do any of the following, - // this is safe: - // - // * Modify the `mp`/`dblk` pointers. - // * Increase `len` beyond `limit`. - // * Modify `limit`. - unsafe { - let bmblk = Box::from_raw(mp); - let bdblk = Box::from_raw(bmblk.b_datap as *mut dblk_t); - let buffer = Vec::from_raw_parts( - bdblk.db_base, - bmblk.b_wptr.offset_from(bmblk.b_rptr) as usize, - bdblk.db_lim.offset_from(bdblk.db_base) as usize, - ); - drop(buffer); - drop(bdblk); - drop(bmblk); - } -} - #[cfg(test)] mod test { use super::*; @@ -547,13 +408,6 @@ mod test { #[test] fn read_multi_segment() { - let mp1 = allocb(34); - let mp2 = allocb(20); - - unsafe { - (*mp1).b_cont = mp2; - } - let mut mp1 = MsgBlk::new_ethernet_pkt(Ethernet { destination: DST_MAC, source: SRC_MAC, From af6bbe871c90cbafec7bc43f03a16c8dd2351756 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Mon, 28 Oct 2024 12:42:30 +0000 Subject: [PATCH 070/115] More tweaks. --- lib/opte/src/ddi/mblk.rs | 1 + lib/opte/src/engine/ingot_packet.rs | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/opte/src/ddi/mblk.rs b/lib/opte/src/ddi/mblk.rs index 5ad022b3..cd00a2a0 100644 --- a/lib/opte/src/ddi/mblk.rs +++ b/lib/opte/src/ddi/mblk.rs @@ -23,6 +23,7 @@ use core::slice; use illumos_sys_hdrs as ddi; #[cfg(any(feature = "std", test))] use illumos_sys_hdrs::c_uchar; +#[cfg(any(feature = "std", test))] use illumos_sys_hdrs::dblk_t; use illumos_sys_hdrs::mblk_t; use illumos_sys_hdrs::uintptr_t; diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index 16d32c9b..0d71350a 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -68,8 +68,6 @@ use core::hash::Hash; use core::ops::Deref; use core::ops::DerefMut; use core::sync::atomic::AtomicPtr; -#[cfg(all(not(feature = "std"), not(test)))] -use illumos_sys_hdrs as ddi; use illumos_sys_hdrs::uintptr_t; use ingot::ethernet::Ethertype; use ingot::geneve::Geneve; From e14bf222c2d6128ef5c8e1f0e627f46aa08b5d47 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Mon, 28 Oct 2024 14:08:52 +0000 Subject: [PATCH 071/115] Fix ubench. --- bench/src/kbench/remote.rs | 2 +- bench/src/packet.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bench/src/kbench/remote.rs b/bench/src/kbench/remote.rs index 022f776d..7e6b1965 100644 --- a/bench/src/kbench/remote.rs +++ b/bench/src/kbench/remote.rs @@ -8,7 +8,6 @@ //! over physical links. use super::*; -use std::collections::HashSet; use std::io::Read; use std::io::Write; use std::net::Ipv6Addr; @@ -26,6 +25,7 @@ pub struct Routes { pub underlay: Ipv6Addr, } +#[cfg_attr(not(target_os = "illumos"), allow(unused))] pub fn server_session( mut stream: TcpStream, route: Arc, diff --git a/bench/src/packet.rs b/bench/src/packet.rs index dd5f5682..c0f7f76b 100644 --- a/bench/src/packet.rs +++ b/bench/src/packet.rs @@ -4,9 +4,9 @@ // Copyright 2024 Oxide Computer Company +use opte::ddi::mblk::MsgBlk; use opte::engine::dhcpv6::MessageType; use opte::engine::ether::Ethernet; -use opte::engine::ingot_packet::MsgBlk; use opte::engine::ip::v4::Ipv4; use opte::engine::ip::v6::Ipv6; use opte::engine::ip::L3Repr; From d5fa92eb483cf45f28bb9b27ef0a24ac332d0c2c Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Mon, 28 Oct 2024 16:25:59 +0000 Subject: [PATCH 072/115] Fixup port entry/return probes --- lib/opte/src/ddi/mblk.rs | 9 ++++- lib/opte/src/engine/ingot_packet.rs | 61 +++++++++++++++++++---------- lib/opte/src/engine/port.rs | 28 ++++++------- 3 files changed, 58 insertions(+), 40 deletions(-) diff --git a/lib/opte/src/ddi/mblk.rs b/lib/opte/src/ddi/mblk.rs index cd00a2a0..2f1b9f6a 100644 --- a/lib/opte/src/ddi/mblk.rs +++ b/lib/opte/src/ddi/mblk.rs @@ -4,7 +4,7 @@ // Copyright 2024 Oxide Computer Company -use crate::engine::ingot_packet::QueryLen; +use crate::engine::ingot_packet::BufferState; use crate::engine::packet::SegAdjustError; use crate::engine::packet::WrapError; use crate::engine::packet::WriteError; @@ -762,7 +762,7 @@ impl<'a> Read for MsgBlkIterMut<'a> { } } -impl<'a> QueryLen for MsgBlkIterMut<'a> { +impl<'a> BufferState for MsgBlkIterMut<'a> { #[inline] fn len(&self) -> usize { let own_blk_len = self @@ -775,6 +775,11 @@ impl<'a> QueryLen for MsgBlkIterMut<'a> { own_blk_len + self.next_iter().map(|v| v.len()).sum::() } + + #[inline] + fn base_ptr(&self) -> uintptr_t { + self.curr.map(|v| v.as_ptr() as uintptr_t).unwrap_or(0) + } } /// For the `no_std`/illumos kernel environment, we want the `mblk_t` diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index 0d71350a..d27aee65 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -674,17 +674,16 @@ pub struct Packet { state: S, } -impl Packet> { +impl Packet> { pub fn new(pkt: T) -> Self where - Initialized2: PacketState, + Initialized: PacketState, { - let len = pkt.len(); - Self { state: Initialized2 { len, inner: pkt } } + Self { state: Initialized { inner: pkt } } } } -impl<'a, T: Read + 'a> Packet> +impl<'a, T: Read + BufferState + 'a> Packet> where T::Chunk: ingot::types::IntoBufPointer<'a> + ByteSliceMut, { @@ -692,7 +691,12 @@ where #[inline] pub fn len(&self) -> usize { - self.state.len + self.state.inner.len() + } + + #[inline] + pub fn mblk_addr(&self) -> uintptr_t { + self.state.inner.base_ptr() } #[inline] @@ -700,12 +704,14 @@ where self, net: NP, ) -> Result>>, ParseError> { - let Packet { state: Initialized2 { len, inner } } = self; + let len = self.len(); + let base_ptr = self.mblk_addr(); + let Packet { state: Initialized { inner } } = self; let meta = net.parse_inbound(inner)?; meta.stack.validate(len)?; - Ok(Packet { state: LiteParsed { meta, len } }) + Ok(Packet { state: LiteParsed { meta, base_ptr, len } }) } #[inline] @@ -713,12 +719,14 @@ where self, net: NP, ) -> Result>>, ParseError> { - let Packet { state: Initialized2 { len, inner } } = self; + let len = self.len(); + let base_ptr = self.mblk_addr(); + let Packet { state: Initialized { inner } } = self; let meta = net.parse_outbound(inner)?; meta.stack.validate(len)?; - Ok(Packet { state: LiteParsed { meta, len } }) + Ok(Packet { state: LiteParsed { meta, base_ptr, len } }) } } @@ -728,7 +736,7 @@ where { #[inline] pub fn to_full_meta(self) -> Packet> { - let Packet { state: LiteParsed { len, meta } } = self; + let Packet { state: LiteParsed { len, base_ptr, meta } } = self; let IngotParsed { stack: headers, data, last_chunk } = meta; // TODO: we can probably not do this in some cases, but we @@ -760,6 +768,7 @@ where meta, flow, body_csum, + base_ptr, l4_hash: Memoised::Uninit, body_modified: false, len, @@ -783,6 +792,11 @@ where self.state.len } + #[inline] + pub fn mblk_addr(&self) -> uintptr_t { + self.state.base_ptr + } + #[inline] pub fn flow(&self) -> InnerFlowId { self.meta().flow() @@ -1099,9 +1113,9 @@ impl Packet> { } } + #[inline] pub fn mblk_addr(&self) -> uintptr_t { - // TODO. - 0 + self.state.base_ptr } /// Compute ULP and IP header checksum from scratch. @@ -1319,16 +1333,11 @@ impl Packet> { /// The type state of a packet that has been initialized and allocated, but /// about which nothing else is known besides the length. #[derive(Debug)] -pub struct Initialized2 { - /// Total length of packet, in bytes. This is equal to the sum of - /// the length of the _initialized_ window in all the segments - /// (`b_wptr - b_rptr`). - len: usize, - +pub struct Initialized { inner: T, } -impl PacketState for Initialized2 {} +impl PacketState for Initialized {} impl PacketState for FullParsed {} /// Zerocopy view onto a parsed packet, accompanied by locally @@ -1338,6 +1347,9 @@ pub struct FullParsed { /// the length of the _initialized_ window in all the segments /// (`b_wptr - b_rptr`). len: usize, + /// Base pointer of the contained T, used in dtrace SDTs and the like + /// for correlation and inspection of packet events. + base_ptr: uintptr_t, /// Access to parsed packet headers and the packet body. meta: Box>, /// Current Flow ID of this packet, accountgin for any applied @@ -1373,7 +1385,13 @@ pub struct FullParsed { /// Minimum-size zerocopy view onto a parsed packet, sufficient for fast /// packet transformation. pub struct LiteParsed> { + /// Total length of packet, in bytes. This is equal to the sum of + /// the length of the _initialized_ window in all the segments + /// (`b_wptr - b_rptr`). len: usize, + /// Base pointer of the contained T, used in dtrace SDTs and the like + /// for correlation and inspection of packet events. + base_ptr: uintptr_t, meta: IngotParsed, } @@ -1388,8 +1406,9 @@ pub type MblkPacketData<'a> = PacketData>; pub type MblkFullParsed<'a> = FullParsed>; pub type MblkLiteParsed<'a, M> = LiteParsed, M>; -pub trait QueryLen { +pub trait BufferState { fn len(&self) -> usize; + fn base_ptr(&self) -> uintptr_t; } // TODO: don't really care about pushing 'inner' reprs today. diff --git a/lib/opte/src/engine/port.rs b/lib/opte/src/engine/port.rs index a2e4e9d0..72654265 100644 --- a/lib/opte/src/engine/port.rs +++ b/lib/opte/src/engine/port.rs @@ -85,7 +85,6 @@ use core::result; use core::str::FromStr; use core::sync::atomic::AtomicU64; use core::sync::atomic::Ordering::SeqCst; -#[cfg(all(not(feature = "std"), not(test)))] use illumos_sys_hdrs::uintptr_t; use ingot::geneve::Geneve; use ingot::tcp::TcpRef; @@ -1225,6 +1224,7 @@ impl Port { { let process_start = Moment::now(); let flow_before = pkt.flow(); + let mblk_addr = pkt.mblk_addr(); // Packet processing is split into a few mechanisms based on // expected speed, based on actions and the size of required metadata: @@ -1261,8 +1261,7 @@ impl Port { check_state!(data.state, [PortState::Running]) .map_err(|_| ProcessError::BadState(data.state))?; - // TODO: fixup types here. - // self.port_process_entry_probe(dir, &flow_before, epoch, &pkt); + self.port_process_entry_probe(dir, &flow_before, epoch, mblk_addr); let uft: Option<&Arc>>> = match dir { Direction::Out => data.uft_out.get(&flow_before), @@ -1434,7 +1433,7 @@ impl Port { &flow_before, &flow_after, epoch, - // &pkt, + mblk_addr, &res, ); return res; @@ -1447,9 +1446,6 @@ impl Port { let mut pkt = pkt.to_full_meta(); let mut ameta = ActionMeta::new(); - // TODO: remove/convert to a slopath indicator? - self.port_process_entry_probe(dir, &flow_before, epoch, &pkt); - let res = match (&decision, dir) { // (2) Apply retrieved transform. Lock is dropped. // Store cached l4 hash. @@ -1530,7 +1526,7 @@ impl Port { &flow_before, &flow_after, epoch, - // &pkt, + mblk_addr, &res, ); res @@ -1971,12 +1967,13 @@ impl Port { Ok(LayerResult::Allow) } + #[inline] fn port_process_entry_probe( &self, dir: Direction, flow: &InnerFlowId, epoch: u64, - pkt: &Packet, + mblk_addr: uintptr_t, ) { cfg_if::cfg_if! { if #[cfg(all(not(feature = "std"), not(test)))] { @@ -1986,16 +1983,16 @@ impl Port { self.name_cstr.as_ptr() as uintptr_t, flow, epoch as uintptr_t, - pkt.mblk_addr(), + mblk_addr, ); } } else if #[cfg(feature = "usdt")] { let flow_s = flow.to_string(); crate::opte_provider::port__process__entry!( - || (dir, &self.name, flow_s, epoch, pkt.mblk_addr()) + || (dir, &self.name, flow_s, epoch, mblk_addr) ); } else { - let (..) = (dir, flow, epoch, pkt); + let (..) = (dir, flow, epoch, mblk_addr); } } } @@ -2007,12 +2004,9 @@ impl Port { flow_before: &InnerFlowId, flow_after: &InnerFlowId, epoch: u64, - // pkt: &Packet2, + mblk_addr: uintptr_t, res: &result::Result, ) { - // let flow_after = pkt.flow(); - let mblk_addr = 0; // TODO. - cfg_if! { if #[cfg(all(not(feature = "std"), not(test)))] { @@ -2080,7 +2074,7 @@ impl Port { ) ); } else { - let (..) = (dir, flow_before, flow_after, epoch, /*pkt,*/ res); + let (..) = (dir, flow_before, flow_after, epoch, mblk_addr, res); } } } From fbc661dc4002419d45d9a012bc32d1dbca8f84ca Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Tue, 29 Oct 2024 14:55:06 +0000 Subject: [PATCH 073/115] Cleanup, find meaningful TODOs. --- Cargo.lock | 6 +- Cargo.toml | 2 +- lib/opte/src/engine/ether.rs | 120 +++++ lib/opte/src/engine/headers.rs | 210 +++++++- lib/opte/src/engine/icmp/v4.rs | 17 + lib/opte/src/engine/icmp/v6.rs | 17 + lib/opte/src/engine/ingot_packet.rs | 753 +++------------------------- lib/opte/src/engine/ip/mod.rs | 157 +++++- lib/opte/src/engine/ip/v6.rs | 72 +++ lib/opte/src/engine/mod.rs | 19 +- lib/opte/src/engine/packet.rs | 1 - lib/opte/src/engine/parse.rs | 80 ++- lib/opte/src/engine/port.rs | 18 +- lib/opte/src/engine/predicate.rs | 6 +- lib/opte/src/engine/rule.rs | 9 +- lib/oxide-vpc/src/engine/mod.rs | 6 +- 16 files changed, 755 insertions(+), 738 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 85ce71de..d82593a6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -895,7 +895,7 @@ dependencies = [ [[package]] name = "ingot" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=8cdf5c25833f485d9574aa3dc5c3d15964d19400#8cdf5c25833f485d9574aa3dc5c3d15964d19400" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=3b38859ca143eaa1287308359d0f1ddea07826fd#3b38859ca143eaa1287308359d0f1ddea07826fd" dependencies = [ "bitflags 2.6.0", "ingot-macros", @@ -908,7 +908,7 @@ dependencies = [ [[package]] name = "ingot-macros" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=8cdf5c25833f485d9574aa3dc5c3d15964d19400#8cdf5c25833f485d9574aa3dc5c3d15964d19400" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=3b38859ca143eaa1287308359d0f1ddea07826fd#3b38859ca143eaa1287308359d0f1ddea07826fd" dependencies = [ "darling", "itertools 0.13.0", @@ -921,7 +921,7 @@ dependencies = [ [[package]] name = "ingot-types" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=8cdf5c25833f485d9574aa3dc5c3d15964d19400#8cdf5c25833f485d9574aa3dc5c3d15964d19400" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=3b38859ca143eaa1287308359d0f1ddea07826fd#3b38859ca143eaa1287308359d0f1ddea07826fd" dependencies = [ "ingot-macros", "macaddr", diff --git a/Cargo.toml b/Cargo.toml index 6b14d832..d385a481 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,7 +50,7 @@ ctor = "0.2" darling = "0.20" dyn-clone = "1.0" heapless = "0.8" -ingot = { git = "https://github.com/oxidecomputer/ingot.git", rev = "8cdf5c25833f485d9574aa3dc5c3d15964d19400"} +ingot = { git = "https://github.com/oxidecomputer/ingot.git", rev = "3b38859ca143eaa1287308359d0f1ddea07826fd"} ipnetwork = { version = "0.20", default-features = false } itertools = { version = "0.13", default-features = false } libc = "0.2" diff --git a/lib/opte/src/engine/ether.rs b/lib/opte/src/engine/ether.rs index 217a0360..cbef3b24 100644 --- a/lib/opte/src/engine/ether.rs +++ b/lib/opte/src/engine/ether.rs @@ -6,6 +6,9 @@ //! Ethernet frames. +use super::headers::HasInnerCksum; +use super::headers::HeaderActionError; +use super::headers::HeaderActionModify; use super::headers::ModifyAction; use super::headers::PushAction; use alloc::string::String; @@ -16,11 +19,15 @@ use core::fmt::Display; use core::result; use core::str::FromStr; use ingot::ethernet::Ethertype; +use ingot::types::Header; use ingot::types::HeaderLen; +use ingot::types::InlineHeader; use ingot::Ingot; use opte_api::MacAddr; use serde::Deserialize; use serde::Serialize; +use zerocopy::ByteSlice; +use zerocopy::ByteSliceMut; pub const ETHER_TYPE_ETHER: u16 = 0x6558; pub const ETHER_TYPE_IPV4: u16 = 0x0800; @@ -239,6 +246,119 @@ impl EtherMeta { } } +impl HeaderActionModify for EthernetPacket { + #[inline] + fn run_modify( + &mut self, + mod_spec: &EtherMod, + ) -> Result<(), HeaderActionError> { + if let Some(src) = mod_spec.src { + self.set_source(src); + } + if let Some(dst) = mod_spec.dst { + self.set_destination(dst); + } + + Ok(()) + } +} + +impl HeaderActionModify + for InlineHeader> +{ + #[inline] + fn run_modify( + &mut self, + mod_spec: &EtherMod, + ) -> Result<(), HeaderActionError> { + match self { + InlineHeader::Repr(a) => { + if let Some(src) = mod_spec.src { + a.set_source(src); + } + if let Some(dst) = mod_spec.dst { + a.set_destination(dst); + } + } + InlineHeader::Raw(a) => { + if let Some(src) = mod_spec.src { + a.set_source(src); + } + if let Some(dst) = mod_spec.dst { + a.set_destination(dst); + } + } + } + + Ok(()) + } +} + +impl HasInnerCksum for InlineHeader> { + const HAS_CKSUM: bool = false; +} + +impl HasInnerCksum for EthernetPacket { + const HAS_CKSUM: bool = false; +} + +impl From for Header> { + #[inline] + fn from(value: EtherMeta) -> Self { + Header::Repr( + Ethernet { + destination: value.dst, + source: value.src, + ethertype: Ethertype(u16::from(value.ether_type)), + } + .into(), + ) + } +} + +impl From + for InlineHeader> +{ + #[inline] + fn from(value: EtherMeta) -> Self { + InlineHeader::Repr( + Ethernet { + destination: value.dst, + source: value.src, + ethertype: Ethertype(u16::from(value.ether_type)), + } + .into(), + ) + } +} + +impl PushAction>> + for EtherMeta +{ + #[inline] + fn push(&self) -> InlineHeader> { + InlineHeader::Repr(Ethernet { + destination: self.dst, + source: self.src, + ethertype: Ethertype(u16::from(self.ether_type)), + }) + } +} + +impl PushAction> for EtherMeta { + #[inline] + fn push(&self) -> EthernetPacket { + Header::Repr( + Ethernet { + destination: self.dst, + source: self.src, + ethertype: Ethertype(u16::from(self.ether_type)), + } + .into(), + ) + } +} + #[cfg(test)] mod test { use super::*; diff --git a/lib/opte/src/engine/headers.rs b/lib/opte/src/engine/headers.rs index 5a983207..637f755a 100644 --- a/lib/opte/src/engine/headers.rs +++ b/lib/opte/src/engine/headers.rs @@ -4,11 +4,14 @@ // Copyright 2024 Oxide Computer Company -//! Header metadata combinations for IP, ULP, and Encap. +//! Header metadata modifications for IP, ULP, and Encap. use super::geneve::GeneveMeta; use super::geneve::GeneveMod; use super::geneve::GenevePush; +use super::geneve::OxideOption; +use super::geneve::GENEVE_OPT_CLASS_OXIDE; +use super::geneve::GENEVE_PORT; use super::ip::v4::Ipv4Mod; use super::ip::v4::Ipv4Push; use super::ip::v6::Ipv6Mod; @@ -18,12 +21,27 @@ use super::tcp::TcpPush; use super::udp::UdpMod; use super::udp::UdpPush; use core::fmt; +use ingot::ethernet::Ethertype; +use ingot::geneve::Geneve; +use ingot::geneve::GeneveMut; +use ingot::geneve::GeneveOpt; +use ingot::geneve::GeneveOptionType; +use ingot::geneve::ValidGeneve; +use ingot::types::util::Repeated; +use ingot::types::Emit; +use ingot::types::Header; +use ingot::types::HeaderLen; +use ingot::types::InlineHeader; +use ingot::udp::Udp; +use ingot::udp::ValidUdp; pub use opte_api::IpAddr; pub use opte_api::IpCidr; pub use opte_api::Protocol; pub use opte_api::Vni; use serde::Deserialize; use serde::Serialize; +use zerocopy::ByteSlice; +use zerocopy::ByteSliceMut; pub const AF_INET: i32 = 2; pub const AF_INET6: i32 = 26; @@ -38,12 +56,6 @@ pub trait ModifyAction { fn modify(&self, meta: &mut HdrM); } -#[derive(Clone, Copy, Debug)] -pub enum IpType { - Ipv4, - Ipv6, -} - #[derive(Clone, Copy, Debug, Deserialize, Serialize)] pub enum IpPush { Ip4(Ipv4Push), @@ -159,6 +171,189 @@ impl EncapMeta { } } +impl HeaderActionModify + for InlineHeader> +{ + #[inline] + fn run_modify( + &mut self, + mod_spec: &EncapMod, + ) -> Result<(), HeaderActionError> { + match (self, mod_spec) { + ( + InlineHeader::Repr(EncapMeta::Geneve(g)), + EncapMod::Geneve(mod_spec), + ) => { + if let Some(vni) = mod_spec.vni { + g.vni = vni; + } + } + ( + InlineHeader::Raw(ValidEncapMeta::Geneve(_, g)), + EncapMod::Geneve(mod_spec), + ) => { + if let Some(vni) = mod_spec.vni { + g.set_vni(vni); + } + } + } + + Ok(()) + } +} + +impl HasInnerCksum + for InlineHeader> +{ + const HAS_CKSUM: bool = false; +} + +impl From for Header> { + #[inline] + fn from(value: EncapMeta) -> Self { + Header::Repr(value.into()) + } +} + +impl From + for InlineHeader> +{ + #[inline] + fn from(value: EncapMeta) -> Self { + InlineHeader::Repr(value) + } +} + +pub enum ValidEncapMeta { + Geneve(ValidUdp, ValidGeneve), +} + +impl Emit for EncapMeta { + #[inline] + fn emit_raw(&self, buf: V) -> usize { + SizeHoldingEncap { encapped_len: 0, meta: self }.emit_raw(buf) + } + + #[inline] + fn needs_emit(&self) -> bool { + true + } +} + +impl Emit for ValidEncapMeta { + #[inline] + fn emit_raw(&self, buf: V) -> usize { + match self { + ValidEncapMeta::Geneve(u, g) => (u, g).emit_raw(buf), + } + } + + #[inline] + fn needs_emit(&self) -> bool { + match self { + ValidEncapMeta::Geneve(u, g) => u.needs_emit() && g.needs_emit(), + } + } +} + +impl HeaderLen for EncapMeta { + const MINIMUM_LENGTH: usize = Udp::MINIMUM_LENGTH + Geneve::MINIMUM_LENGTH; + + #[inline] + fn packet_length(&self) -> usize { + match self { + EncapMeta::Geneve(g) => { + Self::MINIMUM_LENGTH + + g.oxide_external_pkt.then_some(4).unwrap_or_default() + } + } + } +} + +impl HeaderLen for ValidEncapMeta { + const MINIMUM_LENGTH: usize = Udp::MINIMUM_LENGTH + Geneve::MINIMUM_LENGTH; + + #[inline] + fn packet_length(&self) -> usize { + match self { + ValidEncapMeta::Geneve(u, g) => { + u.packet_length() + g.packet_length() + } + } + } +} + +pub struct SizeHoldingEncap<'a> { + pub encapped_len: u16, + pub meta: &'a EncapMeta, +} + +unsafe impl<'a> ingot::types::EmitDoesNotRelyOnBufContents + for SizeHoldingEncap<'a> +{ +} + +impl<'a> HeaderLen for SizeHoldingEncap<'a> { + const MINIMUM_LENGTH: usize = EncapMeta::MINIMUM_LENGTH; + + #[inline] + fn packet_length(&self) -> usize { + self.meta.packet_length() + } +} + +impl<'a> Emit for SizeHoldingEncap<'a> { + #[inline] + fn emit_raw(&self, buf: V) -> usize { + match self.meta { + EncapMeta::Geneve(g) => { + let mut opts = vec![]; + + if g.oxide_external_pkt { + opts.push(GeneveOpt { + class: GENEVE_OPT_CLASS_OXIDE, + option_type: GeneveOptionType( + OxideOption::External.opt_type(), + ), + ..Default::default() + }); + } + + let options = Repeated::new(opts); + let opt_len_unscaled = options.packet_length(); + let opt_len = (opt_len_unscaled >> 2) as u8; + + let geneve = Geneve { + protocol_type: Ethertype::ETHERNET, + vni: g.vni, + opt_len, + options, + ..Default::default() + }; + + let length = self.encapped_len + + (Udp::MINIMUM_LENGTH + geneve.packet_length()) as u16; + + ( + Udp { + source: g.entropy, + destination: GENEVE_PORT, + length, + ..Default::default() + }, + &geneve, + ) + .emit_raw(buf) + } + } + } + + #[inline] + fn needs_emit(&self) -> bool { + true + } +} + #[derive( Clone, Copy, Debug, Deserialize, Eq, Ord, PartialEq, PartialOrd, Serialize, )] @@ -313,6 +508,7 @@ impl HeaderAction { pub enum HeaderActionError { MissingHeader, CantPop, + MalformedExtension, } pub trait ModifyActionArg {} diff --git a/lib/opte/src/engine/icmp/v4.rs b/lib/opte/src/engine/icmp/v4.rs index 689ccec6..43871d15 100644 --- a/lib/opte/src/engine/icmp/v4.rs +++ b/lib/opte/src/engine/icmp/v4.rs @@ -14,9 +14,12 @@ use crate::engine::ip::v4::Ipv4; use crate::engine::ip::L3; use crate::engine::predicate::Ipv4AddrMatch; use ingot::ethernet::Ethertype; +use ingot::icmp::IcmpV4Packet; +use ingot::icmp::IcmpV4Ref; use ingot::ip::IpProtocol; use ingot::types::Emit; use ingot::types::HeaderLen; +use ingot::types::HeaderParse; pub use opte_api::ip::IcmpEchoReply; use smoltcp::wire; use smoltcp::wire::Icmpv4Packet; @@ -172,3 +175,17 @@ impl Display for MessageType { write!(f, "{}", self.inner) } } + +impl QueryEcho for IcmpV4Packet { + #[inline] + fn echo_id(&self) -> Option { + match (self.code(), self.ty()) { + (0, 0) | (0, 8) => { + ValidIcmpEcho::parse(self.rest_of_hdr_ref().as_slice()) + .ok() + .map(|(v, ..)| v.id()) + } + _ => None, + } + } +} diff --git a/lib/opte/src/engine/icmp/v6.rs b/lib/opte/src/engine/icmp/v6.rs index d443303a..281d7e8b 100644 --- a/lib/opte/src/engine/icmp/v6.rs +++ b/lib/opte/src/engine/icmp/v6.rs @@ -15,8 +15,11 @@ use crate::engine::ip::v6::Ipv6Ref; use crate::engine::predicate::Ipv6AddrMatch; use alloc::string::String; use ingot::ethernet::Ethertype; +use ingot::icmp::IcmpV6Packet; +use ingot::icmp::IcmpV6Ref; use ingot::ip::IpProtocol as IngotIpProto; use ingot::types::Emit; +use ingot::types::HeaderParse; pub use opte_api::ip::Icmpv6EchoReply; pub use opte_api::ip::Ipv6Addr; pub use opte_api::ip::Ipv6Cidr; @@ -632,3 +635,17 @@ impl HairpinAction for NeighborAdvertisement { )))) } } + +impl QueryEcho for IcmpV6Packet { + #[inline] + fn echo_id(&self) -> Option { + match (self.code(), self.ty()) { + (0, 128) | (0, 129) => { + ValidIcmpEcho::parse(&self.rest_of_hdr_ref()[..]) + .ok() + .map(|(v, ..)| v.id()) + } + _ => None, + } + } +} diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs index d27aee65..180fa13a 100644 --- a/lib/opte/src/engine/ingot_packet.rs +++ b/lib/opte/src/engine/ingot_packet.rs @@ -5,39 +5,19 @@ // Copyright 2024 Oxide Computer Company use super::checksum::Checksum; -use super::ether::EtherMeta; -use super::ether::EtherMod; use super::ether::Ethernet; -use super::ether::EthernetMut; use super::ether::EthernetPacket; use super::ether::ValidEthernet; -use super::geneve::OxideOption; -use super::geneve::GENEVE_OPT_CLASS_OXIDE; -use super::geneve::GENEVE_PORT; use super::headers::EncapMeta; -use super::headers::EncapMod; use super::headers::EncapPush; -use super::headers::HasInnerCksum; -use super::headers::HeaderActionError; -use super::headers::HeaderActionModify; -use super::headers::IpMod; use super::headers::IpPush; -use super::headers::PushAction; -use super::headers::UlpMetaModify; -use super::icmp::IcmpEchoMut; -use super::icmp::IcmpEchoRef; -use super::icmp::QueryEcho; -use super::icmp::ValidIcmpEcho; -use super::ip::v4::Ipv4; -use super::ip::v4::Ipv4Mut; +use super::headers::SizeHoldingEncap; +use super::headers::ValidEncapMeta; use super::ip::v4::Ipv4Packet; use super::ip::v4::Ipv4Ref; -use super::ip::v6::Ipv6; -use super::ip::v6::Ipv6Mut; use super::ip::v6::Ipv6Packet; use super::ip::v6::Ipv6Ref; use super::ip::L3Repr; -use super::ip::ValidL3; use super::ip::L3; use super::packet::AddrPair; use super::packet::BodyTransform; @@ -69,41 +49,29 @@ use core::ops::Deref; use core::ops::DerefMut; use core::sync::atomic::AtomicPtr; use illumos_sys_hdrs::uintptr_t; -use ingot::ethernet::Ethertype; -use ingot::geneve::Geneve; -use ingot::geneve::GeneveMut; -use ingot::geneve::GeneveOpt; -use ingot::geneve::GeneveOptionType; use ingot::geneve::GeneveRef; -use ingot::geneve::ValidGeneve; use ingot::icmp::IcmpV4Mut; use ingot::icmp::IcmpV4Packet; use ingot::icmp::IcmpV4Ref; use ingot::icmp::IcmpV6Mut; use ingot::icmp::IcmpV6Packet; use ingot::icmp::IcmpV6Ref; -use ingot::ip::IpProtocol; -use ingot::ip::Ipv4Flags; -use ingot::tcp::TcpFlags; use ingot::tcp::TcpMut; use ingot::tcp::TcpPacket; use ingot::tcp::TcpRef; -use ingot::types::util::Repeated; use ingot::types::BoxedHeader; use ingot::types::Emit; -use ingot::types::Header as IngotHeader; +use ingot::types::Header; use ingot::types::HeaderLen; -use ingot::types::HeaderParse; use ingot::types::InlineHeader; +use ingot::types::IntoBufPointer; use ingot::types::NextLayer; use ingot::types::Parsed as IngotParsed; use ingot::types::Read; use ingot::types::ToOwnedPacket; -use ingot::udp::Udp; use ingot::udp::UdpMut; use ingot::udp::UdpPacket; use ingot::udp::UdpRef; -use ingot::udp::ValidUdp; use opte_api::Direction; use opte_api::Ipv6Addr; use opte_api::Vni; @@ -111,7 +79,7 @@ use zerocopy::ByteSlice; use zerocopy::ByteSliceMut; use zerocopy::IntoBytes; -pub struct OpteUnifiedLengths { +pub struct InitialLayerLens { pub outer_eth: usize, pub outer_l3: usize, pub outer_encap: usize, @@ -121,7 +89,7 @@ pub struct OpteUnifiedLengths { pub inner_ulp: usize, } -impl OpteUnifiedLengths { +impl InitialLayerLens { #[inline] pub fn hdr_len(&self) -> usize { self.outer_eth @@ -133,10 +101,8 @@ impl OpteUnifiedLengths { } } -pub enum ValidEncapMeta { - Geneve(ValidUdp, ValidGeneve), -} - +/// Full metadata representation for a packet entering the standard ULP +/// path, or a full table walk over the slowpath. pub struct OpteMeta { pub outer_eth: Option>>, pub outer_l3: Option>, @@ -147,147 +113,13 @@ pub struct OpteMeta { pub inner_ulp: Option>, } -pub type OpteParsed = IngotParsed::Chunk>, T>; -pub type OpteParsed2 = IngotParsed; - -impl OpteMeta { - #[inline] - pub fn convert_ingot, Q: Read>( - value: IngotParsed, - ) -> OpteParsed { - let IngotParsed { stack: headers, data, last_chunk } = value; - - IngotParsed { stack: headers.into(), data, last_chunk } - } -} - -struct SizeHoldingEncap<'a> { - encapped_len: u16, - meta: &'a EncapMeta, -} - -unsafe impl<'a> ingot::types::EmitDoesNotRelyOnBufContents - for SizeHoldingEncap<'a> -{ -} - -impl<'a> HeaderLen for SizeHoldingEncap<'a> { - const MINIMUM_LENGTH: usize = EncapMeta::MINIMUM_LENGTH; - - #[inline] - fn packet_length(&self) -> usize { - self.meta.packet_length() - } -} - -impl<'a> Emit for SizeHoldingEncap<'a> { - #[inline] - fn emit_raw(&self, buf: V) -> usize { - match self.meta { - EncapMeta::Geneve(g) => { - let mut opts = vec![]; - - if g.oxide_external_pkt { - opts.push(GeneveOpt { - class: GENEVE_OPT_CLASS_OXIDE, - option_type: GeneveOptionType( - OxideOption::External.opt_type(), - ), - ..Default::default() - }); - } - - let options = Repeated::new(opts); - let opt_len_unscaled = options.packet_length(); - let opt_len = (opt_len_unscaled >> 2) as u8; - - let geneve = Geneve { - protocol_type: Ethertype::ETHERNET, - vni: g.vni, - opt_len, - options, - ..Default::default() - }; - - let length = self.encapped_len - + (Udp::MINIMUM_LENGTH + geneve.packet_length()) as u16; - - ( - Udp { - source: g.entropy, - destination: GENEVE_PORT, - length, - ..Default::default() - }, - &geneve, - ) - .emit_raw(buf) - } - } - } - - #[inline] - fn needs_emit(&self) -> bool { - true - } -} - -impl Emit for EncapMeta { - #[inline] - fn emit_raw(&self, buf: V) -> usize { - SizeHoldingEncap { encapped_len: 0, meta: self }.emit_raw(buf) - } - - #[inline] - fn needs_emit(&self) -> bool { - true - } -} - -impl Emit for ValidEncapMeta { - #[inline] - fn emit_raw(&self, buf: V) -> usize { - match self { - ValidEncapMeta::Geneve(u, g) => (u, g).emit_raw(buf), - } - } - - #[inline] - fn needs_emit(&self) -> bool { - match self { - ValidEncapMeta::Geneve(u, g) => u.needs_emit() && g.needs_emit(), - } - } -} - -impl HeaderLen for EncapMeta { - const MINIMUM_LENGTH: usize = Udp::MINIMUM_LENGTH + Geneve::MINIMUM_LENGTH; - - #[inline] - fn packet_length(&self) -> usize { - match self { - EncapMeta::Geneve(g) => { - Self::MINIMUM_LENGTH - + g.oxide_external_pkt.then_some(4).unwrap_or_default() - } - } - } -} - -impl HeaderLen for ValidEncapMeta { - const MINIMUM_LENGTH: usize = Udp::MINIMUM_LENGTH + Geneve::MINIMUM_LENGTH; - - #[inline] - fn packet_length(&self) -> usize { - match self { - ValidEncapMeta::Geneve(u, g) => { - u.packet_length() + g.packet_length() - } - } - } -} - -// This really needs a rethink, but also I just need to get this working... +/// Helper for reusing access to all packet body segments. +/// +/// This is necessary because `MsgBlk`s in particular do not +/// allow us to walk backward within a packet -- if we need them, +/// then we need to save them out for all future uses. +/// The other part is that the majority of packets (ULP hits) +/// do not want to interact with body segments at all. struct PktBodyWalker { base: Cell, T)>>, slice: AtomicPtr>, @@ -320,7 +152,7 @@ impl PktBodyWalker { // The next question is one of ownership. // We know that these chunks are at least &[u8]s, they // *will* be exclusive if ByteSliceMut is met (because they are - // sourced from an exclusive borrow on something which ownas a [u8]). + // sourced from an exclusive borrow on something which owns a [u8]). // This allows us to cast to &mut later, but not here! let mut to_hold = vec![]; if let Some(ref mut chunk) = first { @@ -365,7 +197,6 @@ impl PktBodyWalker { } assert!(!slice_ptr.is_null()); - // let use_ref: &[_] = &b; unsafe { let a = (&*(*slice_ptr)) as *const _; core::mem::transmute(a) @@ -395,9 +226,10 @@ impl PktBodyWalker { } } +/// Packet state for the standard ULP path, or a full table walk over the slowpath. pub struct PacketData { pub(crate) headers: OpteMeta, - initial_lens: Option>, + initial_lens: Option>, body: PktBodyWalker, } @@ -421,24 +253,8 @@ impl core::fmt::Debug for PacketData { } } -pub fn ulp_src_port(pkt: &Ulp) -> Option { - match pkt { - Ulp::Tcp(t) => Some(t.source()), - Ulp::Udp(t) => Some(t.source()), - _ => None, - } -} - -pub fn ulp_dst_port(pkt: &Ulp) -> Option { - match pkt { - Ulp::Tcp(t) => Some(t.destination()), - Ulp::Udp(t) => Some(t.destination()), - _ => None, - } -} - impl PacketData { - pub fn initial_lens(&self) -> Option<&OpteUnifiedLengths> { + pub fn initial_lens(&self) -> Option<&InitialLayerLens> { self.initial_lens.as_ref().map(|v| &**v) } @@ -452,7 +268,6 @@ impl PacketData { self.headers.outer_l3.as_ref() } - // Need to expose this a lil cleaner... /// Returns whether this packet is sourced from outside the rack, /// in addition to its VNI. pub fn outer_encap_geneve_vni_and_origin(&self) -> Option<(Vni, bool)> { @@ -467,15 +282,6 @@ impl PacketData { } } - // Again: really need to make Owned/Direct choices better-served by ingot. - // this interface sucks. - pub fn outer_ip6_addrs(&self) -> Option<(Ipv6Addr, Ipv6Addr)> { - match &self.headers.outer_l3 { - Some(L3::Ipv6(v6)) => Some((v6.source(), v6.destination())), - _ => None, - } - } - pub fn inner_ether(&self) -> &EthernetPacket { &self.headers.inner_eth } @@ -541,7 +347,6 @@ impl PacketData { self.body.body_segs() } - // right place for this to live? Or is `meta()` misnamed? pub fn copy_remaining(&self) -> Vec where T::Chunk: ByteSliceMut, @@ -685,10 +490,8 @@ impl Packet> { impl<'a, T: Read + BufferState + 'a> Packet> where - T::Chunk: ingot::types::IntoBufPointer<'a> + ByteSliceMut, + T::Chunk: IntoBufPointer<'a> + ByteSliceMut, { - // TODO: cleanup type aliases. - #[inline] pub fn len(&self) -> usize { self.state.inner.len() @@ -732,7 +535,7 @@ where impl<'a, T: Read + 'a, M: LightweightMeta> Packet> where - T::Chunk: ingot::types::IntoBufPointer<'a>, + T::Chunk: IntoBufPointer<'a>, { #[inline] pub fn to_full_meta(self) -> Packet> { @@ -747,7 +550,7 @@ where let headers: OpteMeta<_> = headers.into(); let initial_lens = Some( - OpteUnifiedLengths { + InitialLayerLens { outer_eth: headers.outer_eth.packet_length(), outer_l3: headers.outer_l3.packet_length(), outer_encap: headers.outer_encap.packet_length(), @@ -819,7 +622,7 @@ impl Packet> { #[inline] /// Convert a packet's metadata into a set of instructions /// needed to serialize all its changes to the wire. - pub fn emit_spec(self) -> Result + pub fn emit_spec(mut self) -> Result where T::Chunk: ByteSliceMut, { @@ -829,6 +632,7 @@ impl Packet> { // extant fields we rewound past. // - Rewind up to+including that point in original // pkt space. + let l4_hash = self.l4_hash(); let state = self.state; let init_lens = state.meta.initial_lens.unwrap(); let headers = state.meta.headers; @@ -846,8 +650,6 @@ impl Packet> { // do this sort of thing. We are so, so far from that... let mut force_serialize = false; - use ingot::types::InlineHeader; - match headers.inner_ulp { Some(ulp) => { let l = ulp.packet_length(); @@ -858,24 +660,16 @@ impl Packet> { push_spec.inner.get_or_insert_with(Default::default); inner.ulp = Some(match ulp { - Ulp::Tcp(IngotHeader::Repr(t)) => UlpRepr::Tcp(*t), - Ulp::Tcp(IngotHeader::Raw(t)) => { - UlpRepr::Tcp((&t).into()) - } - Ulp::Udp(IngotHeader::Repr(t)) => UlpRepr::Udp(*t), - Ulp::Udp(IngotHeader::Raw(t)) => { - UlpRepr::Udp((&t).into()) - } - Ulp::IcmpV4(IngotHeader::Repr(t)) => { - UlpRepr::IcmpV4(*t) - } - Ulp::IcmpV4(IngotHeader::Raw(t)) => { + Ulp::Tcp(Header::Repr(t)) => UlpRepr::Tcp(*t), + Ulp::Tcp(Header::Raw(t)) => UlpRepr::Tcp((&t).into()), + Ulp::Udp(Header::Repr(t)) => UlpRepr::Udp(*t), + Ulp::Udp(Header::Raw(t)) => UlpRepr::Udp((&t).into()), + Ulp::IcmpV4(Header::Repr(t)) => UlpRepr::IcmpV4(*t), + Ulp::IcmpV4(Header::Raw(t)) => { UlpRepr::IcmpV4((&t).into()) } - Ulp::IcmpV6(IngotHeader::Repr(t)) => { - UlpRepr::IcmpV6(*t) - } - Ulp::IcmpV6(IngotHeader::Raw(t)) => { + Ulp::IcmpV6(Header::Repr(t)) => UlpRepr::IcmpV6(*t), + Ulp::IcmpV6(Header::Raw(t)) => { UlpRepr::IcmpV6((&t).into()) } }); @@ -901,15 +695,13 @@ impl Packet> { push_spec.inner.get_or_insert_with(Default::default); inner.l3 = Some(match l3 { - L3::Ipv4(IngotHeader::Repr(v4)) => L3Repr::Ipv4(*v4), - L3::Ipv4(IngotHeader::Raw(v4)) => { - L3Repr::Ipv4((&v4).into()) - } - L3::Ipv6(IngotHeader::Repr(v6)) => L3Repr::Ipv6(*v6), + L3::Ipv4(Header::Repr(v4)) => L3Repr::Ipv4(*v4), + L3::Ipv4(Header::Raw(v4)) => L3Repr::Ipv4((&v4).into()), + L3::Ipv6(Header::Repr(v6)) => L3Repr::Ipv6(*v6), // We can't actually do structural mods here today using OPTE, // but account for the possibiliry at least. - L3::Ipv6(IngotHeader::Raw(v6)) => { + L3::Ipv6(Header::Raw(v6)) => { L3Repr::Ipv6(v6.to_owned(None)?) } }); @@ -929,8 +721,8 @@ impl Packet> { if force_serialize { let inner = push_spec.inner.get_or_insert_with(Default::default); inner.eth = match headers.inner_eth { - IngotHeader::Repr(p) => *p, - IngotHeader::Raw(p) => (&p).into(), + Header::Repr(p) => *p, + Header::Raw(p) => (&p).into(), }; rewind += init_lens.inner_eth; } @@ -1022,11 +814,11 @@ impl Packet> { _ => {} } - Ok(OldEmitSpec { + Ok(EmitSpec { rewind: rewind as u16, - payload_len: payload_len as u16, - encapped_len: encapped_len as u16, - push_spec, + ulp_len: encapped_len as u32, + prepend: PushSpec::Slowpath(push_spec.into()), + l4_hash, }) } @@ -1163,18 +955,18 @@ impl Packet> { Ulp::Tcp(tcp) => { tcp.set_checksum(0); match tcp { - IngotHeader::Repr(tcp) => { + Header::Repr(tcp) => { let mut bytes = [0u8; 56]; tcp.emit_raw(&mut bytes[..]); csum.add_bytes(&bytes[..]); } - IngotHeader::Raw(tcp) => { + Header::Raw(tcp) => { csum.add_bytes(tcp.0.as_bytes()); match &tcp.1 { - IngotHeader::Repr(opts) => { + Header::Repr(opts) => { csum.add_bytes(&*opts); } - IngotHeader::Raw(opts) => { + Header::Raw(opts) => { csum.add_bytes(&*opts); } } @@ -1185,12 +977,12 @@ impl Packet> { Ulp::Udp(udp) => { udp.set_checksum(0); match udp { - IngotHeader::Repr(udp) => { + Header::Repr(udp) => { let mut bytes = [0u8; 8]; udp.emit_raw(&mut bytes[..]); csum.add_bytes(&bytes[..]); } - IngotHeader::Raw(udp) => { + Header::Raw(udp) => { csum.add_bytes(udp.0.as_bytes()); } } @@ -1285,18 +1077,18 @@ impl Packet> { Ulp::Tcp(tcp) => { tcp.set_checksum(0); match tcp { - IngotHeader::Repr(tcp) => { + Header::Repr(tcp) => { let mut bytes = [0u8; 56]; tcp.emit_raw(&mut bytes[..]); csum.add_bytes(&bytes[..]); } - IngotHeader::Raw(tcp) => { + Header::Raw(tcp) => { csum.add_bytes(tcp.0.as_bytes()); match &tcp.1 { - IngotHeader::Repr(opts) => { + Header::Repr(opts) => { csum.add_bytes(&*opts); } - IngotHeader::Raw(opts) => { + Header::Raw(opts) => { csum.add_bytes(&*opts); } } @@ -1307,12 +1099,12 @@ impl Packet> { Ulp::Udp(udp) => { udp.set_checksum(0); match udp { - IngotHeader::Repr(udp) => { + Header::Repr(udp) => { let mut bytes = [0u8; 8]; udp.emit_raw(&mut bytes[..]); csum.add_bytes(&bytes[..]); } - IngotHeader::Raw(udp) => { + Header::Raw(udp) => { csum.add_bytes(udp.0.as_bytes()); } } @@ -1399,7 +1191,7 @@ impl> PacketState for LiteParsed {} impl> LiteParsed {} -// XXX: Needed for now to account for not wanting to redesign +// These are needed for now to account for not wanting to redesign // ActionDescs to be generic over T (trait object safety rules, etc.), // in addition to needing to rework Hairpin actions. pub type MblkPacketData<'a> = PacketData>; @@ -1411,7 +1203,7 @@ pub trait BufferState { fn base_ptr(&self) -> uintptr_t; } -// TODO: don't really care about pushing 'inner' reprs today. +/// A set of headers to be emitted at the head of a packet. #[derive(Clone, Debug, Default)] pub struct OpteEmit { outer_eth: Option, @@ -1419,10 +1211,11 @@ pub struct OpteEmit { outer_encap: Option, // We can (but do not often) push/pop inner meta. - // Splitting minimises struct size in the general case. + // Splitting via Box minimises struct size in the general case. inner: Option>, } +/// Inner headers needing completely rewritten/emitted in a packet. #[derive(Clone, Debug, Default)] pub struct OpteInnerEmit { eth: Ethernet, @@ -1430,6 +1223,11 @@ pub struct OpteInnerEmit { ulp: Option, } +/// A specification of how a packet should be modified to finish processing, +/// after existing fields have been updated. +/// +/// This will add and/or remove several layers from the underlying `MsgBlk`, +/// and can be queried for routing specific info (access to new encap, l4 hash). #[derive(Clone, Debug)] pub struct EmitSpec { pub(crate) prepend: PushSpec, @@ -1445,12 +1243,15 @@ impl Default for EmitSpec { } impl EmitSpec { + /// Return the L4 hash of the inner flow, used for multipath selection. #[inline] #[must_use] pub fn l4_hash(&self) -> u32 { self.l4_hash } + /// Perform final structural transformations to a packet (removal of + /// existing headers, and copying in new/replacement headers). #[inline] #[must_use] pub fn apply(&self, mut pkt: MsgBlk) -> MsgBlk { @@ -1588,6 +1389,7 @@ impl EmitSpec { out } + /// Returns the Geneve VNI when this spec pushes Geneve encapsulation. #[inline] pub fn outer_encap_vni(&self) -> Option { match &self.prepend { @@ -1605,6 +1407,7 @@ impl EmitSpec { } } + /// Returns the outer IPv6 src/dst when this spec pushes Geneve encapsulation. #[inline] pub fn outer_ip6_addrs(&self) -> Option<(Ipv6Addr, Ipv6Addr)> { match &self.prepend { @@ -1623,21 +1426,19 @@ impl EmitSpec { } } +/// Specification of additional header layers to push at the head of a packet. #[derive(Clone, Debug)] pub enum PushSpec { + /// Bytes to prepend to packet which have been serialised ahead of time + /// and can be copied in one shot. Fastpath(Arc), + /// Full representations of each header to serialise and prepend ahead + /// of the current packet contents. Slowpath(Box), + /// No prepend. NoOp, } -#[derive(Clone, Debug)] -pub struct OldEmitSpec { - pub rewind: u16, - pub encapped_len: u16, - pub payload_len: u16, - pub push_spec: OpteEmit, -} - #[derive(Copy, Clone, Debug, Hash, Eq, PartialEq, Ord, PartialOrd, Default)] pub enum Memoised { #[default] @@ -1668,409 +1469,3 @@ impl Memoised { *self = Self::Known(val); } } - -impl QueryEcho for IcmpV4Packet { - #[inline] - fn echo_id(&self) -> Option { - match (self.code(), self.ty()) { - (0, 0) | (0, 8) => { - ValidIcmpEcho::parse(self.rest_of_hdr_ref().as_slice()) - .ok() - .map(|(v, ..)| v.id()) - } - _ => None, - } - } -} - -impl QueryEcho for IcmpV6Packet { - #[inline] - fn echo_id(&self) -> Option { - match (self.code(), self.ty()) { - (0, 128) | (0, 129) => { - ValidIcmpEcho::parse(&self.rest_of_hdr_ref()[..]) - .ok() - .map(|(v, ..)| v.id()) - } - _ => None, - } - } -} - -impl HeaderActionModify - for InlineHeader> -{ - #[inline] - fn run_modify( - &mut self, - mod_spec: &EtherMod, - ) -> Result<(), HeaderActionError> { - match self { - InlineHeader::Repr(a) => { - if let Some(src) = mod_spec.src { - a.set_source(src); - } - if let Some(dst) = mod_spec.dst { - a.set_destination(dst); - } - } - InlineHeader::Raw(a) => { - if let Some(src) = mod_spec.src { - a.set_source(src); - } - if let Some(dst) = mod_spec.dst { - a.set_destination(dst); - } - } - } - - Ok(()) - } -} - -impl HeaderActionModify for EthernetPacket { - #[inline] - fn run_modify( - &mut self, - mod_spec: &EtherMod, - ) -> Result<(), HeaderActionError> { - if let Some(src) = mod_spec.src { - self.set_source(src); - } - if let Some(dst) = mod_spec.dst { - self.set_destination(dst); - } - - Ok(()) - } -} - -impl HeaderActionModify - for InlineHeader> -{ - #[inline] - fn run_modify( - &mut self, - mod_spec: &IpMod, - ) -> Result<(), HeaderActionError> { - match mod_spec { - IpMod::Ip4(mods) => match self { - InlineHeader::Repr(L3Repr::Ipv4(v4)) => { - if let Some(src) = mods.src { - >::set_source(v4, src); - } - if let Some(dst) = mods.dst { - >::set_destination(v4, dst); - } - if let Some(p) = mods.proto { - >::set_protocol( - v4, - IpProtocol(u8::from(p)), - ); - } - } - InlineHeader::Raw(ValidL3::Ipv4(v4)) => { - if let Some(src) = mods.src { - v4.set_source(src); - } - if let Some(dst) = mods.dst { - v4.set_destination(dst); - } - if let Some(p) = mods.proto { - v4.set_protocol(IpProtocol(u8::from(p))); - } - } - _ => return Err(HeaderActionError::MissingHeader), - }, - IpMod::Ip6(mods) => match self { - InlineHeader::Repr(L3Repr::Ipv6(v6)) => { - if let Some(src) = mods.src { - >::set_source(v6, src); - } - if let Some(dst) = mods.dst { - >::set_destination(v6, dst); - } - if let Some(p) = mods.proto { - // TODO(kyle) - // NOTE: I know this is broken for V6EHs - >::set_next_header( - v6, - IpProtocol(u8::from(p)), - ); - } - } - InlineHeader::Raw(ValidL3::Ipv6(v6)) => { - if let Some(src) = mods.src { - v6.set_source(src); - } - if let Some(dst) = mods.dst { - v6.set_destination(dst); - } - if let Some(p) = mods.proto { - // TODO(kyle) - // NOTE: I know this is broken for V6EHs - v6.set_next_header(IpProtocol(u8::from(p))); - } - } - _ => return Err(HeaderActionError::MissingHeader), - }, - } - - Ok(()) - } -} - -impl HeaderActionModify for L3 { - #[inline] - fn run_modify( - &mut self, - mod_spec: &IpMod, - ) -> Result<(), HeaderActionError> { - match (self, mod_spec) { - (L3::Ipv4(v4), IpMod::Ip4(mods)) => { - if let Some(src) = mods.src { - v4.set_source(src); - } - if let Some(dst) = mods.dst { - v4.set_destination(dst); - } - if let Some(p) = mods.proto { - v4.set_protocol(IpProtocol(u8::from(p))); - } - Ok(()) - } - (L3::Ipv6(v6), IpMod::Ip6(mods)) => { - if let Some(src) = mods.src { - v6.set_source(src); - } - if let Some(dst) = mods.dst { - v6.set_destination(dst); - } - if let Some(p) = mods.proto { - // NOTE: I know this is broken for V6EHs - v6.set_next_header(IpProtocol(u8::from(p))); - } - Ok(()) - } - _ => Err(HeaderActionError::MissingHeader), - } - } -} - -impl HeaderActionModify for Ulp { - #[inline] - fn run_modify( - &mut self, - mod_spec: &UlpMetaModify, - ) -> Result<(), HeaderActionError> { - match self { - Ulp::Tcp(t) => { - if let Some(src) = mod_spec.generic.src_port { - t.set_source(src); - } - if let Some(dst) = mod_spec.generic.dst_port { - t.set_destination(dst); - } - if let Some(flags) = mod_spec.tcp_flags { - t.set_flags(TcpFlags::from_bits_retain(flags)); - } - } - Ulp::Udp(u) => { - if let Some(src) = mod_spec.generic.src_port { - u.set_source(src); - } - if let Some(dst) = mod_spec.generic.dst_port { - u.set_destination(dst); - } - } - Ulp::IcmpV4(i4) => { - if let Some(id) = mod_spec.icmp_id { - if i4.echo_id().is_some() { - let roh = i4.rest_of_hdr_mut(); - ValidIcmpEcho::parse(&mut roh[..]) - .expect( - "ICMP ROH is exactly as large as ValidIcmpEcho", - ) - .0 - .set_id(id); - } - } - } - Ulp::IcmpV6(i6) => { - if let Some(id) = mod_spec.icmp_id { - if i6.echo_id().is_some() { - let roh = i6.rest_of_hdr_mut(); - ValidIcmpEcho::parse(&mut roh[..]) - .expect( - "ICMP ROH is exactly as large as ValidIcmpEcho", - ) - .0 - .set_id(id); - } - } - } - } - - Ok(()) - } -} - -impl HeaderActionModify - for InlineHeader> -{ - #[inline] - fn run_modify( - &mut self, - mod_spec: &EncapMod, - ) -> Result<(), HeaderActionError> { - match (self, mod_spec) { - ( - InlineHeader::Repr(EncapMeta::Geneve(g)), - EncapMod::Geneve(mod_spec), - ) => { - if let Some(vni) = mod_spec.vni { - g.vni = vni; - } - } - ( - InlineHeader::Raw(ValidEncapMeta::Geneve(_, g)), - EncapMod::Geneve(mod_spec), - ) => { - if let Some(vni) = mod_spec.vni { - g.set_vni(vni); - } - } - } - - Ok(()) - } -} - -impl HasInnerCksum for InlineHeader> { - const HAS_CKSUM: bool = false; -} - -impl HasInnerCksum for InlineHeader> { - const HAS_CKSUM: bool = true; -} - -impl HasInnerCksum - for InlineHeader> -{ - const HAS_CKSUM: bool = false; -} - -impl HasInnerCksum for EthernetPacket { - const HAS_CKSUM: bool = false; -} - -impl HasInnerCksum for L3 { - const HAS_CKSUM: bool = true; -} - -impl HasInnerCksum for Ulp { - const HAS_CKSUM: bool = true; -} - -impl From - for ingot::types::Header> -{ - #[inline] - fn from(value: EtherMeta) -> Self { - ingot::types::Header::Repr( - Ethernet { - destination: value.dst, - source: value.src, - ethertype: Ethertype(u16::from(value.ether_type)), - } - .into(), - ) - } -} - -impl From - for InlineHeader> -{ - #[inline] - fn from(value: EtherMeta) -> Self { - InlineHeader::Repr( - Ethernet { - destination: value.dst, - source: value.src, - ethertype: Ethertype(u16::from(value.ether_type)), - } - .into(), - ) - } -} - -impl From - for ingot::types::Header> -{ - #[inline] - fn from(value: EncapMeta) -> Self { - ingot::types::Header::Repr(value.into()) - } -} - -impl From - for InlineHeader> -{ - #[inline] - fn from(value: EncapMeta) -> Self { - InlineHeader::Repr(value) - } -} - -impl PushAction>> - for EtherMeta -{ - #[inline] - fn push(&self) -> InlineHeader> { - InlineHeader::Repr(Ethernet { - destination: self.dst, - source: self.src, - ethertype: Ethertype(u16::from(self.ether_type)), - }) - } -} - -impl PushAction> for EtherMeta { - #[inline] - fn push(&self) -> EthernetPacket { - ingot::types::Header::Repr( - Ethernet { - destination: self.dst, - source: self.src, - ethertype: Ethertype(u16::from(self.ether_type)), - } - .into(), - ) - } -} - -impl PushAction> for IpPush { - fn push(&self) -> L3 { - match self { - IpPush::Ip4(v4) => L3::Ipv4( - Ipv4 { - protocol: IpProtocol(u8::from(v4.proto)), - source: v4.src, - destination: v4.dst, - flags: Ipv4Flags::DONT_FRAGMENT, - ..Default::default() - } - .into(), - ), - IpPush::Ip6(v6) => L3::Ipv6( - Ipv6 { - next_header: IpProtocol(u8::from(v6.proto)), - source: v6.src, - destination: v6.dst, - ..Default::default() - } - .into(), - ), - } - } -} diff --git a/lib/opte/src/engine/ip/mod.rs b/lib/opte/src/engine/ip/mod.rs index 0db2963e..d1cad90b 100644 --- a/lib/opte/src/engine/ip/mod.rs +++ b/lib/opte/src/engine/ip/mod.rs @@ -8,14 +8,21 @@ pub mod v4; pub mod v6; use super::checksum::Checksum; +use super::headers::HasInnerCksum; +use super::headers::HeaderActionError; +use super::headers::HeaderActionModify; +use super::headers::IpMod; +use super::headers::IpPush; +use super::headers::PushAction; use super::packet::ParseError; use ingot::choice; use ingot::ethernet::Ethertype; +use ingot::ip::IpProtocol; +use ingot::ip::Ipv4Flags; use ingot::types::ByteSlice; use ingot::types::Header; +use ingot::types::InlineHeader; use ingot::types::NextLayer; -use ingot::Ingot; -use opte_api::MacAddr; use v4::*; use v6::*; use zerocopy::ByteSliceMut; @@ -140,13 +147,141 @@ impl ValidL3 { } } -#[derive(Copy, Clone, Debug, Eq, Hash, PartialEq, Ingot)] -#[ingot(impl_default)] -pub struct Ethernet { - #[ingot(is = "[u8; 6]")] - pub destination: MacAddr, - #[ingot(is = "[u8; 6]")] - pub source: MacAddr, - #[ingot(is = "u16be", next_layer)] - pub ethertype: Ethertype, +impl HeaderActionModify + for InlineHeader> +{ + #[inline] + fn run_modify( + &mut self, + mod_spec: &IpMod, + ) -> Result<(), HeaderActionError> { + match mod_spec { + IpMod::Ip4(mods) => match self { + InlineHeader::Repr(L3Repr::Ipv4(v4)) => { + if let Some(src) = mods.src { + v4.source = src; + } + if let Some(dst) = mods.dst { + v4.destination = dst; + } + if let Some(p) = mods.proto { + v4.protocol = IpProtocol(u8::from(p)); + } + } + InlineHeader::Raw(ValidL3::Ipv4(v4)) => { + if let Some(src) = mods.src { + v4.set_source(src); + } + if let Some(dst) = mods.dst { + v4.set_destination(dst); + } + if let Some(p) = mods.proto { + v4.set_protocol(IpProtocol(u8::from(p))); + } + } + _ => return Err(HeaderActionError::MissingHeader), + }, + IpMod::Ip6(mods) => match self { + InlineHeader::Repr(L3Repr::Ipv6(v6)) => { + if let Some(src) = mods.src { + v6.source = src; + } + if let Some(dst) = mods.dst { + v6.destination = dst; + } + if let Some(p) = mods.proto { + let ipp = IpProtocol(u8::from(p)); + + v6_set_next_header::<&mut [u8]>(ipp, v6)?; + } + } + InlineHeader::Raw(ValidL3::Ipv6(v6)) => { + if let Some(src) = mods.src { + v6.set_source(src); + } + if let Some(dst) = mods.dst { + v6.set_destination(dst); + } + if let Some(p) = mods.proto { + let ipp = IpProtocol(u8::from(p)); + v6_set_next_header(ipp, v6)?; + } + } + _ => return Err(HeaderActionError::MissingHeader), + }, + } + + Ok(()) + } +} + +impl HeaderActionModify for L3 { + #[inline] + fn run_modify( + &mut self, + mod_spec: &IpMod, + ) -> Result<(), HeaderActionError> { + match (self, mod_spec) { + (L3::Ipv4(v4), IpMod::Ip4(mods)) => { + if let Some(src) = mods.src { + v4.set_source(src); + } + if let Some(dst) = mods.dst { + v4.set_destination(dst); + } + if let Some(p) = mods.proto { + v4.set_protocol(IpProtocol(u8::from(p))); + } + Ok(()) + } + (L3::Ipv6(v6), IpMod::Ip6(mods)) => { + if let Some(src) = mods.src { + v6.set_source(src); + } + if let Some(dst) = mods.dst { + v6.set_destination(dst); + } + if let Some(p) = mods.proto { + let ipp = IpProtocol(u8::from(p)); + v6_set_next_header(ipp, v6)?; + } + Ok(()) + } + _ => Err(HeaderActionError::MissingHeader), + } + } +} + +impl HasInnerCksum for InlineHeader> { + const HAS_CKSUM: bool = true; +} + +impl HasInnerCksum for L3 { + const HAS_CKSUM: bool = true; +} + +impl PushAction> for IpPush { + fn push(&self) -> L3 { + match self { + IpPush::Ip4(v4) => L3::Ipv4( + Ipv4 { + protocol: IpProtocol(u8::from(v4.proto)), + source: v4.src, + destination: v4.dst, + flags: Ipv4Flags::DONT_FRAGMENT, + ..Default::default() + } + .into(), + ), + IpPush::Ip6(v6) => L3::Ipv6( + Ipv6 { + next_header: IpProtocol(u8::from(v6.proto)), + source: v6.src, + destination: v6.dst, + ..Default::default() + } + .into(), + ), + } + } } diff --git a/lib/opte/src/engine/ip/v6.rs b/lib/opte/src/engine/ip/v6.rs index cef00f3f..45ba42f1 100644 --- a/lib/opte/src/engine/ip/v6.rs +++ b/lib/opte/src/engine/ip/v6.rs @@ -4,6 +4,7 @@ // Copyright 2024 Oxide Computer Company +use crate::engine::headers::HeaderActionError; use crate::engine::packet::MismatchError; use crate::engine::packet::ParseError; use crate::engine::predicate::MatchExact; @@ -11,11 +12,18 @@ use crate::engine::predicate::MatchExactVal; use crate::engine::predicate::MatchPrefix; use crate::engine::predicate::MatchPrefixVal; use ingot::ip::Ecn; +use ingot::ip::ExtHdrClass; use ingot::ip::IpProtocol; +use ingot::ip::IpV6Ext6564Mut; +use ingot::ip::IpV6ExtFragmentMut; use ingot::ip::LowRentV6EhRepr; +use ingot::ip::ValidLowRentV6Eh; use ingot::types::primitives::*; use ingot::types::util::Repeated; +use ingot::types::FieldMut; +use ingot::types::Header; use ingot::types::HeaderLen; +use ingot::types::ParseChoice; use ingot::Ingot; pub use opte_api::Ipv6Addr; pub use opte_api::Ipv6Cidr; @@ -128,6 +136,70 @@ pub struct Ipv6Mod { pub proto: Option, } +pub fn v6_set_next_header( + ipp: IpProtocol, + v6: &mut (impl Ipv6Mut + Ipv6Ref), +) -> Result<(), HeaderActionError> { + let mut curr_ipp = v6.next_header(); + if matches!(curr_ipp.class(), ExtHdrClass::NotAnEh) { + v6.set_next_header(ipp); + return Ok(()); + } + + match v6.v6ext_mut() { + FieldMut::Repr(a) => match a.iter_mut().last() { + Some(LowRentV6EhRepr::IpV6ExtFragment(f)) => { + f.next_header = ipp; + } + Some(LowRentV6EhRepr::IpV6Ext6564(f)) => { + f.next_header = ipp; + } + None => { + v6.set_next_header(ipp); + } + }, + FieldMut::Raw(Header::Repr(a)) => match a.iter_mut().last() { + Some(LowRentV6EhRepr::IpV6ExtFragment(f)) => { + f.next_header = ipp; + } + Some(LowRentV6EhRepr::IpV6Ext6564(f)) => { + f.next_header = ipp; + } + None => { + v6.set_next_header(ipp); + } + }, + FieldMut::Raw(Header::Raw(a)) => { + // TODO: this, but more widely in ingot. + // making this generic over all Repeated in + // was... somewhat challenging. + let mut buf = a.as_mut(); + + while !matches!(curr_ipp.class(), ExtHdrClass::NotAnEh) { + let (hdr, nh, rem) = + ValidLowRentV6Eh::parse_choice(buf, Some(curr_ipp)) + .map_err(|_| HeaderActionError::MalformedExtension)?; + let nh = nh.expect("V6EHs always have a next_header field"); + buf = rem; + curr_ipp = nh; + + if matches!(nh.class(), ExtHdrClass::NotAnEh) { + match hdr { + ValidLowRentV6Eh::IpV6ExtFragment(mut f) => { + f.set_next_header(nh); + } + ValidLowRentV6Eh::IpV6Ext6564(mut f) => { + f.set_next_header(nh); + } + } + } + } + } + } + + Ok(()) +} + #[cfg(test)] pub(crate) mod test { use super::*; diff --git a/lib/opte/src/engine/mod.rs b/lib/opte/src/engine/mod.rs index 146e1319..d005186a 100644 --- a/lib/opte/src/engine/mod.rs +++ b/lib/opte/src/engine/mod.rs @@ -42,10 +42,11 @@ pub mod ingot_packet; use crate::ddi::mblk::MsgBlk; use checksum::Checksum; use ingot::tcp::TcpRef; +use ingot::types::IntoBufPointer; +use ingot::types::Parsed as IngotParsed; use ingot::types::Read; use ingot_packet::FullParsed; use ingot_packet::OpteMeta; -use ingot_packet::OpteParsed2; use ingot_packet::Packet; pub use opte_api::Direction; use parse::ValidNoEncap; @@ -248,9 +249,9 @@ pub trait NetworkParser { fn parse_outbound<'a, T: Read + 'a>( &self, rdr: T, - ) -> Result>, ParseError> + ) -> Result, T>, ParseError> where - T::Chunk: ingot::types::IntoBufPointer<'a> + ByteSliceMut; + T::Chunk: IntoBufPointer<'a> + ByteSliceMut; /// Parse an inbound packet. /// @@ -259,9 +260,9 @@ pub trait NetworkParser { fn parse_inbound<'a, T: Read + 'a>( &self, rdr: T, - ) -> Result>, ParseError> + ) -> Result, T>, ParseError> where - T::Chunk: ingot::types::IntoBufPointer<'a> + ByteSliceMut; + T::Chunk: IntoBufPointer<'a> + ByteSliceMut; } /// Header formats which allow a flow ID to be read out, and which can be converted @@ -305,9 +306,9 @@ impl NetworkParser for GenericUlp { fn parse_inbound<'a, T: Read + 'a>( &self, rdr: T, - ) -> Result>, ParseError> + ) -> Result, T>, ParseError> where - T::Chunk: ingot::types::IntoBufPointer<'a> + ByteSliceMut, + T::Chunk: IntoBufPointer<'a> + ByteSliceMut, { Ok(ValidNoEncap::parse_read(rdr)?) } @@ -315,9 +316,9 @@ impl NetworkParser for GenericUlp { fn parse_outbound<'a, T: Read + 'a>( &self, rdr: T, - ) -> Result>, ParseError> + ) -> Result, T>, ParseError> where - T::Chunk: ingot::types::IntoBufPointer<'a> + ByteSliceMut, + T::Chunk: IntoBufPointer<'a> + ByteSliceMut, { Ok(ValidNoEncap::parse_read(rdr)?) } diff --git a/lib/opte/src/engine/packet.rs b/lib/opte/src/engine/packet.rs index 357976bb..791cc2de 100644 --- a/lib/opte/src/engine/packet.rs +++ b/lib/opte/src/engine/packet.rs @@ -593,7 +593,6 @@ mod test { ); } - // TODO(kyle): equivalent for MsgBlk #[test] fn udp6_packet_with_padding() { let body = [1, 2, 3, 4]; diff --git a/lib/opte/src/engine/parse.rs b/lib/opte/src/engine/parse.rs index 44b5b571..032a3563 100644 --- a/lib/opte/src/engine/parse.rs +++ b/lib/opte/src/engine/parse.rs @@ -15,9 +15,16 @@ use super::ether::EthernetRef; use super::ether::ValidEthernet; use super::geneve::validate_geneve; use super::geneve::GENEVE_PORT; +use super::headers::HasInnerCksum; +use super::headers::HeaderActionError; +use super::headers::HeaderActionModify; use super::headers::IpMod; +use super::headers::UlpMetaModify; +use super::headers::ValidEncapMeta; +use super::icmp::IcmpEchoMut; +use super::icmp::QueryEcho; +use super::icmp::ValidIcmpEcho; use super::ingot_packet::OpteMeta; -use super::ingot_packet::ValidEncapMeta; use super::ip::v4::Ipv4Mut; use super::ip::v4::Ipv4Ref; use super::ip::v6::Ipv6Mut; @@ -52,6 +59,7 @@ use ingot::tcp::ValidTcp; use ingot::types::ByteSlice; use ingot::types::Header; use ingot::types::HeaderLen; +use ingot::types::HeaderParse; use ingot::types::InlineHeader; use ingot::types::NextLayer; use ingot::types::ParseControl; @@ -145,6 +153,14 @@ impl Ulp { _ => None, } } + + pub fn dst_port(&self) -> Option { + match self { + Ulp::Tcp(t) => Some(t.destination()), + Ulp::Udp(t) => Some(t.destination()), + _ => None, + } + } } #[derive(Parse)] @@ -762,6 +778,68 @@ impl ValidUlp { } } +impl HasInnerCksum for Ulp { + const HAS_CKSUM: bool = true; +} + +impl HeaderActionModify for Ulp { + #[inline] + fn run_modify( + &mut self, + mod_spec: &UlpMetaModify, + ) -> Result<(), HeaderActionError> { + match self { + Ulp::Tcp(t) => { + if let Some(src) = mod_spec.generic.src_port { + t.set_source(src); + } + if let Some(dst) = mod_spec.generic.dst_port { + t.set_destination(dst); + } + if let Some(flags) = mod_spec.tcp_flags { + t.set_flags(TcpFlags::from_bits_retain(flags)); + } + } + Ulp::Udp(u) => { + if let Some(src) = mod_spec.generic.src_port { + u.set_source(src); + } + if let Some(dst) = mod_spec.generic.dst_port { + u.set_destination(dst); + } + } + Ulp::IcmpV4(i4) => { + if let Some(id) = mod_spec.icmp_id { + if i4.echo_id().is_some() { + let roh = i4.rest_of_hdr_mut(); + ValidIcmpEcho::parse(&mut roh[..]) + .expect( + "ICMP ROH is exactly as large as ValidIcmpEcho", + ) + .0 + .set_id(id); + } + } + } + Ulp::IcmpV6(i6) => { + if let Some(id) = mod_spec.icmp_id { + if i6.echo_id().is_some() { + let roh = i6.rest_of_hdr_mut(); + ValidIcmpEcho::parse(&mut roh[..]) + .expect( + "ICMP ROH is exactly as large as ValidIcmpEcho", + ) + .0 + .set_id(id); + } + } + } + } + + Ok(()) + } +} + #[cfg(test)] mod test { use crate::engine::checksum::Checksum as OpteCsum; diff --git a/lib/opte/src/engine/port.rs b/lib/opte/src/engine/port.rs index 72654265..dc1b49e3 100644 --- a/lib/opte/src/engine/port.rs +++ b/lib/opte/src/engine/port.rs @@ -1506,20 +1506,10 @@ impl Port { Ok(ProcessResult::Drop { reason }) } InternalProcessResult::Hairpin(v) => Ok(ProcessResult::Hairpin(v)), - InternalProcessResult::Modified => { - let l4_hash = pkt.l4_hash(); - let emit_spec = - pkt.emit_spec().map_err(|_| ProcessError::BadEmitSpec)?; - - // TODO: remove EmitSpec and have above method just spit out the new - // variant. - Ok(ProcessResult::Modified(EmitSpec { - prepend: PushSpec::Slowpath(emit_spec.push_spec.into()), - l4_hash, - rewind: emit_spec.rewind, - ulp_len: emit_spec.encapped_len as u32, - })) - } + InternalProcessResult::Modified => pkt + .emit_spec() + .map_err(|_| ProcessError::BadEmitSpec) + .map(ProcessResult::Modified), }); self.port_process_return_probe( dir, diff --git a/lib/opte/src/engine/predicate.rs b/lib/opte/src/engine/predicate.rs index ca74ad7f..a9ea55f7 100644 --- a/lib/opte/src/engine/predicate.rs +++ b/lib/opte/src/engine/predicate.rs @@ -12,8 +12,6 @@ use super::ether::EtherType; use super::ether::EthernetRef; use super::icmp::v4::MessageType as IcmpMessageType; use super::icmp::v6::MessageType as Icmpv6MessageType; -use super::ingot_packet::ulp_dst_port; -use super::ingot_packet::ulp_src_port; use super::ingot_packet::MblkPacketData; use super::ip::v4::Ipv4Addr; use super::ip::v4::Ipv4Cidr; @@ -476,7 +474,7 @@ impl Predicate { }, Self::InnerSrcPort(list) => { - match meta.inner_ulp().map(ulp_src_port).flatten() { + match meta.inner_ulp().map(|v| v.src_port()).flatten() { // No ULP metadata or no source port (e.g. ICMPv6). None => return false, @@ -491,7 +489,7 @@ impl Predicate { } Self::InnerDstPort(list) => { - match meta.inner_ulp().map(ulp_dst_port).flatten() { + match meta.inner_ulp().map(|v| v.dst_port()).flatten() { // No ULP metadata or no destination port (e.g. ICMPv6). None => return false, diff --git a/lib/opte/src/engine/rule.rs b/lib/opte/src/engine/rule.rs index fb5a6784..7ff7ab71 100644 --- a/lib/opte/src/engine/rule.rs +++ b/lib/opte/src/engine/rule.rs @@ -490,15 +490,10 @@ impl HdrTransform { .act_on_option(&mut meta.headers.outer_encap) .map_err(Self::err_fn("outer encap"))?; - // If I set this up right, we can handle the above w/o panic on a - // dumb EtherDrop action... as Transform, _, _>>::act_on( &mut meta.headers.inner_eth, &self.inner_ether, ) - // meta.headers - // .inner_eth - // .act_on::(&self.inner_ether) .map_err(Self::err_fn("inner eth"))?; let l3_dirty = self @@ -525,6 +520,9 @@ impl HdrTransform { HeaderActionError::CantPop => { HdrTransformError::CantPop(header) } + HeaderActionError::MalformedExtension => { + HdrTransformError::MalformedExtension(header) + } } } } @@ -534,6 +532,7 @@ impl HdrTransform { pub enum HdrTransformError { MissingHeader(&'static str), CantPop(&'static str), + MalformedExtension(&'static str), } #[derive(Debug)] diff --git a/lib/oxide-vpc/src/engine/mod.rs b/lib/oxide-vpc/src/engine/mod.rs index f7c9e4bf..40dc3b98 100644 --- a/lib/oxide-vpc/src/engine/mod.rs +++ b/lib/oxide-vpc/src/engine/mod.rs @@ -21,7 +21,6 @@ use opte::engine::arp::ARP_HTYPE_ETHERNET; use opte::engine::ether::EthernetRef; use opte::engine::flow_table::FlowTable; use opte::engine::ingot_packet::FullParsed; -use opte::engine::ingot_packet::OpteParsed2; use opte::engine::ingot_packet::Packet; use opte::engine::ip::v4::Ipv4Addr; use opte::engine::packet::InnerFlowId; @@ -36,6 +35,7 @@ use opte::engine::NetworkImpl; use opte::engine::NetworkParser; use opte::ingot::ethernet::Ethertype; use opte::ingot::types::HeaderParse; +use opte::ingot::types::Parsed as IngotParsed; use opte::ingot::types::Read; use zerocopy::ByteSliceMut; @@ -129,7 +129,7 @@ impl NetworkParser for VpcParser { fn parse_outbound<'a, T: Read + 'a>( &self, rdr: T, - ) -> Result>, ParseError> + ) -> Result, T>, ParseError> where T::Chunk: opte::ingot::types::IntoBufPointer<'a> + ByteSliceMut, { @@ -140,7 +140,7 @@ impl NetworkParser for VpcParser { fn parse_inbound<'a, T: Read + 'a>( &self, rdr: T, - ) -> Result>, ParseError> + ) -> Result, T>, ParseError> where T::Chunk: opte::ingot::types::IntoBufPointer<'a> + ByteSliceMut, { From 7698a319f44a39bd0ca6a48917591d835f44e368 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Tue, 29 Oct 2024 15:15:19 +0000 Subject: [PATCH 074/115] Finally do away with ingot_packet.rs --- bench/benches/userland.rs | 2 +- lib/opte-test-utils/src/lib.rs | 4 +- lib/opte/src/ddi/mblk.rs | 6 +- lib/opte/src/engine/dhcp.rs | 2 +- lib/opte/src/engine/dhcpv6/protocol.rs | 4 +- lib/opte/src/engine/icmp/v4.rs | 2 +- lib/opte/src/engine/icmp/v6.rs | 2 +- lib/opte/src/engine/ingot_packet.rs | 1471 ---------------------- lib/opte/src/engine/layer.rs | 6 +- lib/opte/src/engine/mod.rs | 8 +- lib/opte/src/engine/nat.rs | 4 +- lib/opte/src/engine/packet.rs | 1457 ++++++++++++++++++++- lib/opte/src/engine/parse.rs | 2 +- lib/opte/src/engine/port.rs | 14 +- lib/opte/src/engine/predicate.rs | 2 +- lib/opte/src/engine/rule.rs | 8 +- lib/opte/src/engine/snat.rs | 4 +- lib/oxide-vpc/src/engine/gateway/mod.rs | 2 +- lib/oxide-vpc/src/engine/mod.rs | 4 +- lib/oxide-vpc/src/engine/overlay.rs | 2 +- lib/oxide-vpc/tests/fuzz_regression.rs | 2 +- lib/oxide-vpc/tests/integration_tests.rs | 4 +- xde/src/xde.rs | 2 +- 23 files changed, 1498 insertions(+), 1516 deletions(-) delete mode 100644 lib/opte/src/engine/ingot_packet.rs diff --git a/bench/benches/userland.rs b/bench/benches/userland.rs index f8dbd493..0b6f0dc1 100644 --- a/bench/benches/userland.rs +++ b/bench/benches/userland.rs @@ -10,7 +10,7 @@ use criterion::criterion_group; use criterion::criterion_main; use criterion::BenchmarkId; use criterion::Criterion; -use opte::engine::ingot_packet::Packet; +use opte::engine::packet::Packet; use opte_bench::alloc::*; use opte_bench::packet::BenchPacket; use opte_bench::packet::BenchPacketInstance; diff --git a/lib/opte-test-utils/src/lib.rs b/lib/opte-test-utils/src/lib.rs index b482d678..af806377 100644 --- a/lib/opte-test-utils/src/lib.rs +++ b/lib/opte-test-utils/src/lib.rs @@ -30,8 +30,6 @@ pub use opte::engine::geneve::GENEVE_OPT_CLASS_OXIDE; pub use opte::engine::geneve::GENEVE_PORT; pub use opte::engine::headers::IpAddr; pub use opte::engine::headers::IpCidr; -pub use opte::engine::ingot_packet::MblkLiteParsed; -pub use opte::engine::ingot_packet::Packet; pub use opte::engine::ip::v4::Ipv4; pub use opte::engine::ip::v4::Ipv4Addr; pub use opte::engine::ip::v4::Protocol; @@ -39,6 +37,8 @@ pub use opte::engine::ip::v6::Ipv6; pub use opte::engine::ip::v6::Ipv6Addr; pub use opte::engine::ip::L3Repr; pub use opte::engine::layer::DenyReason; +pub use opte::engine::packet::MblkLiteParsed; +pub use opte::engine::packet::Packet; pub use opte::engine::packet::ParseError; pub use opte::engine::port::meta::ActionMeta; pub use opte::engine::port::DropReason; diff --git a/lib/opte/src/ddi/mblk.rs b/lib/opte/src/ddi/mblk.rs index 2f1b9f6a..6f1abbed 100644 --- a/lib/opte/src/ddi/mblk.rs +++ b/lib/opte/src/ddi/mblk.rs @@ -4,7 +4,7 @@ // Copyright 2024 Oxide Computer Company -use crate::engine::ingot_packet::BufferState; +use crate::engine::packet::BufferState; use crate::engine::packet::SegAdjustError; use crate::engine::packet::WrapError; use crate::engine::packet::WriteError; @@ -45,7 +45,7 @@ struct MsgBlkChainInner { /// Network packets are provided by illumos as a linked list of linked lists, /// using the `b_next` and `b_prev` fields. /// -/// See the documentation for [`super::ingot_packet::Packet`] and/or [`MsgBlk`] for full context. +/// See the documentation for [`super::packet::Packet`] and/or [`MsgBlk`] for full context. // TODO: We might retool this type now that MsgBlk does not decompose // each mblk_t into individual segments (i.e., packets could be allocated // a lifetime via PhantomData based on whether we want to remove them from the chain or modify in place). @@ -929,7 +929,7 @@ fn mock_freeb(mp: *mut mblk_t) { #[cfg(test)] mod test { use super::*; - use crate::engine::ingot_packet::Packet; + use crate::engine::packet::Packet; use crate::engine::packet::ParseError; use crate::engine::GenericUlp; use ingot::types::ParseError as IngotParseError; diff --git a/lib/opte/src/engine/dhcp.rs b/lib/opte/src/engine/dhcp.rs index 233ec174..ae8af2ef 100644 --- a/lib/opte/src/engine/dhcp.rs +++ b/lib/opte/src/engine/dhcp.rs @@ -7,8 +7,8 @@ //! DHCP headers, data, and actions. use super::ether::Ethernet; -use super::ingot_packet::MblkPacketData; use super::ip::v4::*; +use super::packet::MblkPacketData; use super::predicate::DataPredicate; use super::predicate::EtherAddrMatch; use super::predicate::IpProtoMatch; diff --git a/lib/opte/src/engine/dhcpv6/protocol.rs b/lib/opte/src/engine/dhcpv6/protocol.rs index 05f0e093..9793f34e 100644 --- a/lib/opte/src/engine/dhcpv6/protocol.rs +++ b/lib/opte/src/engine/dhcpv6/protocol.rs @@ -23,9 +23,9 @@ use crate::engine::dhcpv6::ALL_SERVERS; use crate::engine::dhcpv6::CLIENT_PORT; use crate::engine::dhcpv6::SERVER_PORT; use crate::engine::ether::Ethernet; -use crate::engine::ingot_packet::MblkPacketData; use crate::engine::ip::v6::Ipv6; use crate::engine::ip::v6::Ipv6Ref; +use crate::engine::packet::MblkPacketData; use crate::engine::predicate::DataPredicate; use crate::engine::predicate::EtherAddrMatch; use crate::engine::predicate::IpProtoMatch; @@ -690,7 +690,7 @@ mod test { use super::OptionCode; use crate::ddi::mblk::MsgBlk; use crate::engine::dhcpv6::test_data; - use crate::engine::ingot_packet::Packet; + use crate::engine::packet::Packet; use crate::engine::port::meta::ActionMeta; use crate::engine::GenericUlp; diff --git a/lib/opte/src/engine/icmp/v4.rs b/lib/opte/src/engine/icmp/v4.rs index 43871d15..7563ea50 100644 --- a/lib/opte/src/engine/icmp/v4.rs +++ b/lib/opte/src/engine/icmp/v4.rs @@ -9,9 +9,9 @@ use super::*; use crate::ddi::mblk::MsgBlk; use crate::engine::ether::Ethernet; -use crate::engine::ingot_packet::MblkPacketData; use crate::engine::ip::v4::Ipv4; use crate::engine::ip::L3; +use crate::engine::packet::MblkPacketData; use crate::engine::predicate::Ipv4AddrMatch; use ingot::ethernet::Ethertype; use ingot::icmp::IcmpV4Packet; diff --git a/lib/opte/src/engine/icmp/v6.rs b/lib/opte/src/engine/icmp/v6.rs index 281d7e8b..918112f9 100644 --- a/lib/opte/src/engine/icmp/v6.rs +++ b/lib/opte/src/engine/icmp/v6.rs @@ -9,9 +9,9 @@ use super::*; use crate::ddi::mblk::MsgBlk; use crate::engine::ether::Ethernet; -use crate::engine::ingot_packet::MblkPacketData; use crate::engine::ip::v6::Ipv6; use crate::engine::ip::v6::Ipv6Ref; +use crate::engine::packet::MblkPacketData; use crate::engine::predicate::Ipv6AddrMatch; use alloc::string::String; use ingot::ethernet::Ethertype; diff --git a/lib/opte/src/engine/ingot_packet.rs b/lib/opte/src/engine/ingot_packet.rs deleted file mode 100644 index 180fa13a..00000000 --- a/lib/opte/src/engine/ingot_packet.rs +++ /dev/null @@ -1,1471 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -// Copyright 2024 Oxide Computer Company - -use super::checksum::Checksum; -use super::ether::Ethernet; -use super::ether::EthernetPacket; -use super::ether::ValidEthernet; -use super::headers::EncapMeta; -use super::headers::EncapPush; -use super::headers::IpPush; -use super::headers::SizeHoldingEncap; -use super::headers::ValidEncapMeta; -use super::ip::v4::Ipv4Packet; -use super::ip::v4::Ipv4Ref; -use super::ip::v6::Ipv6Packet; -use super::ip::v6::Ipv6Ref; -use super::ip::L3Repr; -use super::ip::L3; -use super::packet::AddrPair; -use super::packet::BodyTransform; -use super::packet::BodyTransformError; -use super::packet::InnerFlowId; -use super::packet::PacketState; -use super::packet::ParseError; -use super::packet::FLOW_ID_DEFAULT; -use super::parse::NoEncap; -use super::parse::Ulp; -use super::parse::UlpRepr; -use super::rule::CompiledEncap; -use super::rule::CompiledTransform; -use super::rule::HdrTransform; -use super::rule::HdrTransformError; -use super::LightweightMeta; -use super::NetworkParser; -use crate::ddi::mblk::MsgBlk; -use crate::ddi::mblk::MsgBlkIterMut; -use crate::ddi::mblk::MsgBlkNode; -use crate::engine::geneve::valid_geneve_has_oxide_external; -use crate::engine::geneve::GeneveMeta; -use alloc::boxed::Box; -use alloc::sync::Arc; -use alloc::vec::Vec; -use core::cell::Cell; -use core::hash::Hash; -use core::ops::Deref; -use core::ops::DerefMut; -use core::sync::atomic::AtomicPtr; -use illumos_sys_hdrs::uintptr_t; -use ingot::geneve::GeneveRef; -use ingot::icmp::IcmpV4Mut; -use ingot::icmp::IcmpV4Packet; -use ingot::icmp::IcmpV4Ref; -use ingot::icmp::IcmpV6Mut; -use ingot::icmp::IcmpV6Packet; -use ingot::icmp::IcmpV6Ref; -use ingot::tcp::TcpMut; -use ingot::tcp::TcpPacket; -use ingot::tcp::TcpRef; -use ingot::types::BoxedHeader; -use ingot::types::Emit; -use ingot::types::Header; -use ingot::types::HeaderLen; -use ingot::types::InlineHeader; -use ingot::types::IntoBufPointer; -use ingot::types::NextLayer; -use ingot::types::Parsed as IngotParsed; -use ingot::types::Read; -use ingot::types::ToOwnedPacket; -use ingot::udp::UdpMut; -use ingot::udp::UdpPacket; -use ingot::udp::UdpRef; -use opte_api::Direction; -use opte_api::Ipv6Addr; -use opte_api::Vni; -use zerocopy::ByteSlice; -use zerocopy::ByteSliceMut; -use zerocopy::IntoBytes; - -pub struct InitialLayerLens { - pub outer_eth: usize, - pub outer_l3: usize, - pub outer_encap: usize, - - pub inner_eth: usize, - pub inner_l3: usize, - pub inner_ulp: usize, -} - -impl InitialLayerLens { - #[inline] - pub fn hdr_len(&self) -> usize { - self.outer_eth - + self.outer_l3 - + self.outer_encap - + self.inner_eth - + self.inner_l3 - + self.inner_ulp - } -} - -/// Full metadata representation for a packet entering the standard ULP -/// path, or a full table walk over the slowpath. -pub struct OpteMeta { - pub outer_eth: Option>>, - pub outer_l3: Option>, - pub outer_encap: Option>>, - - pub inner_eth: EthernetPacket, - pub inner_l3: Option>, - pub inner_ulp: Option>, -} - -/// Helper for reusing access to all packet body segments. -/// -/// This is necessary because `MsgBlk`s in particular do not -/// allow us to walk backward within a packet -- if we need them, -/// then we need to save them out for all future uses. -/// The other part is that the majority of packets (ULP hits) -/// do not want to interact with body segments at all. -struct PktBodyWalker { - base: Cell, T)>>, - slice: AtomicPtr>, -} - -impl Drop for PktBodyWalker { - fn drop(&mut self) { - let ptr = self.slice.load(core::sync::atomic::Ordering::Relaxed); - if !ptr.is_null() { - // Reacquire and drop. - unsafe { - let _ = Box::from_raw(ptr); - } - } - } -} - -impl PktBodyWalker { - fn reify_body_segs(&self) - where - ::Chunk: ByteSliceMut, - { - if let Some((mut first, mut rest)) = self.base.take() { - // SAFETY: ByteSlice requires as part of its API - // that any implementors are stable, so we will always - // get the same view via deref. We are then consuming them - // into references which live exactly as long as their initial - // form. - // - // The next question is one of ownership. - // We know that these chunks are at least &[u8]s, they - // *will* be exclusive if ByteSliceMut is met (because they are - // sourced from an exclusive borrow on something which owns a [u8]). - // This allows us to cast to &mut later, but not here! - let mut to_hold = vec![]; - if let Some(ref mut chunk) = first { - let as_bytes = chunk.deref_mut(); - to_hold.push(unsafe { core::mem::transmute(as_bytes) }); - } - - // TODO(drop-safety): we need to give these chunks a longer life, too. - while let Ok(chunk) = rest.next_chunk() { - let as_bytes = chunk.deref(); - to_hold.push(unsafe { core::mem::transmute(as_bytes) }); - } - - let to_store = Box::into_raw(Box::new(to_hold.into_boxed_slice())); - - self.slice - .compare_exchange( - core::ptr::null_mut(), - to_store, - core::sync::atomic::Ordering::Relaxed, - core::sync::atomic::Ordering::Relaxed, - ) - .expect("unexpected concurrent access to body_seg memoiser"); - - // SAFETY: - // Replace contents to get correct drop behaviour on T. - // Currently the only ByteSlice impls are &[u8] and friends, - // but this may extend to e.g. Vec in future. - self.base.set(Some((first, rest))); - } - } - - fn body_segs(&self) -> &[&[u8]] - where - T::Chunk: ByteSliceMut, - { - let mut slice_ptr = - self.slice.load(core::sync::atomic::Ordering::Relaxed); - if slice_ptr.is_null() { - self.reify_body_segs(); - slice_ptr = self.slice.load(core::sync::atomic::Ordering::Relaxed); - } - assert!(!slice_ptr.is_null()); - - unsafe { - let a = (&*(*slice_ptr)) as *const _; - core::mem::transmute(a) - } - } - - fn body_segs_mut(&mut self) -> &mut [&mut [u8]] - where - T::Chunk: ByteSliceMut, - { - let mut slice_ptr = - self.slice.load(core::sync::atomic::Ordering::Relaxed); - if slice_ptr.is_null() { - self.reify_body_segs(); - slice_ptr = self.slice.load(core::sync::atomic::Ordering::Relaxed); - } - assert!(!slice_ptr.is_null()); - - // SAFETY: We have an exclusive reference, and the ByteSliceMut - // bound guarantees that this packet view was construced from - // an exclusive reference. In turn, we know that we are the only - // possible referent. - unsafe { - let a = (&mut *(*slice_ptr)) as *mut _; - core::mem::transmute(a) - } - } -} - -/// Packet state for the standard ULP path, or a full table walk over the slowpath. -pub struct PacketData { - pub(crate) headers: OpteMeta, - initial_lens: Option>, - body: PktBodyWalker, -} - -impl From> for OpteMeta { - #[inline] - fn from(value: NoEncap) -> Self { - OpteMeta { - outer_eth: None, - outer_l3: None, - outer_encap: None, - inner_eth: value.inner_eth, - inner_l3: value.inner_l3, - inner_ulp: value.inner_ulp, - } - } -} - -impl core::fmt::Debug for PacketData { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - f.write_str("PacketHeaders(..)") - } -} - -impl PacketData { - pub fn initial_lens(&self) -> Option<&InitialLayerLens> { - self.initial_lens.as_ref().map(|v| &**v) - } - - pub fn outer_ether( - &self, - ) -> Option<&InlineHeader>> { - self.headers.outer_eth.as_ref() - } - - pub fn outer_ip(&self) -> Option<&L3> { - self.headers.outer_l3.as_ref() - } - - /// Returns whether this packet is sourced from outside the rack, - /// in addition to its VNI. - pub fn outer_encap_geneve_vni_and_origin(&self) -> Option<(Vni, bool)> { - match &self.headers.outer_encap { - Some(InlineHeader::Repr(EncapMeta::Geneve(g))) => { - Some((g.vni, g.oxide_external_pkt)) - } - Some(InlineHeader::Raw(ValidEncapMeta::Geneve(_, g))) => { - Some((g.vni(), valid_geneve_has_oxide_external(&g))) - } - None => None, - } - } - - pub fn inner_ether(&self) -> &EthernetPacket { - &self.headers.inner_eth - } - - pub fn inner_l3(&self) -> Option<&L3> { - self.headers.inner_l3.as_ref() - } - - pub fn inner_ulp(&self) -> Option<&Ulp> { - self.headers.inner_ulp.as_ref() - } - - pub fn inner_ip4(&self) -> Option<&Ipv4Packet> { - self.inner_l3().and_then(|v| match v { - L3::Ipv4(v) => Some(v), - _ => None, - }) - } - - pub fn inner_ip6(&self) -> Option<&Ipv6Packet> { - self.inner_l3().and_then(|v| match v { - L3::Ipv6(v) => Some(v), - _ => None, - }) - } - - pub fn inner_icmp(&self) -> Option<&IcmpV4Packet> { - self.inner_ulp().and_then(|v| match v { - Ulp::IcmpV4(v) => Some(v), - _ => None, - }) - } - - pub fn inner_icmp6(&self) -> Option<&IcmpV6Packet> { - self.inner_ulp().and_then(|v| match v { - Ulp::IcmpV6(v) => Some(v), - _ => None, - }) - } - - pub fn inner_tcp(&self) -> Option<&TcpPacket> { - self.inner_ulp().and_then(|v| match v { - Ulp::Tcp(v) => Some(v), - _ => None, - }) - } - - pub fn inner_udp(&self) -> Option<&UdpPacket> { - self.inner_ulp().and_then(|v| match v { - Ulp::Udp(v) => Some(v), - _ => None, - }) - } - - pub fn is_inner_tcp(&self) -> bool { - matches!(self.inner_ulp(), Some(Ulp::Tcp(_))) - } - - pub fn body_segs(&self) -> &[&[u8]] - where - T::Chunk: ByteSliceMut, - { - self.body.body_segs() - } - - pub fn copy_remaining(&self) -> Vec - where - T::Chunk: ByteSliceMut, - { - let base = self.body_segs(); - let len = base.iter().map(|v| v.len()).sum(); - let mut out = Vec::with_capacity(len); - for el in base { - out.extend_from_slice(el); - } - out - } - - pub fn append_remaining(&self, buf: &mut Vec) - where - T::Chunk: ByteSliceMut, - { - let base = self.body_segs(); - let len = base.iter().map(|v| v.len()).sum(); - buf.reserve_exact(len); - for el in base { - buf.extend_from_slice(el); - } - } - - pub fn body_segs_mut(&mut self) -> &mut [&mut [u8]] - where - T::Chunk: ByteSliceMut, - { - self.body.body_segs_mut() - } - - /// Return whether the IP layer has a checksum both structurally - /// and that it is non-zero (i.e., not offloaded). - pub fn has_ip_csum(&self) -> bool { - match &self.headers.inner_l3 { - Some(L3::Ipv4(v4)) => v4.checksum() != 0, - Some(L3::Ipv6(_)) => false, - None => false, - } - } - - /// Return whether the ULP layer has a checksum both structurally - /// and that it is non-zero (i.e., not offloaded). - pub fn has_ulp_csum(&self) -> bool { - let csum = match &self.headers.inner_ulp { - Some(Ulp::Tcp(t)) => t.checksum(), - Some(Ulp::Udp(u)) => u.checksum(), - Some(Ulp::IcmpV4(i4)) => i4.checksum(), - Some(Ulp::IcmpV6(i6)) => i6.checksum(), - None => return false, - }; - - csum != 0 - } -} - -impl From<&PacketData> for InnerFlowId { - #[inline] - fn from(meta: &PacketData) -> Self { - let (proto, addrs) = match meta.inner_l3() { - Some(L3::Ipv4(pkt)) => ( - pkt.protocol().0, - AddrPair::V4 { src: pkt.source(), dst: pkt.destination() }, - ), - Some(L3::Ipv6(pkt)) => ( - pkt.next_layer().unwrap_or_default().0, - AddrPair::V6 { src: pkt.source(), dst: pkt.destination() }, - ), - None => (255, FLOW_ID_DEFAULT.addrs), - }; - - let (src_port, dst_port) = meta - .inner_ulp() - .map(|ulp| { - ( - ulp.true_src_port() - .or_else(|| ulp.pseudo_port()) - .unwrap_or(0), - ulp.true_dst_port() - .or_else(|| ulp.pseudo_port()) - .unwrap_or(0), - ) - }) - .unwrap_or((0, 0)); - - InnerFlowId { proto: proto.into(), addrs, src_port, dst_port } - } -} - -/// A network packet. -/// -/// A packet is made up of one or more segments. Any given header is -/// *always* contained in a single segment, i.e. a header never straddles -/// multiple segments. While it's preferable to have all headers in the -/// first segment, it *may* be the case that the headers span multiple -/// segments; but a *single* header type (e.g. the IP header) will *never* -/// straddle two segments. The payload, however, *may* span multiple segments. -/// -/// # illumos terminology -/// -/// In illumos there is no real notion of an mblk "packet" or -/// "segment": a packet is just a linked list of `mblk_t` values. -/// The "packet" is simply a pointer to the first `mblk_t` in the -/// list, which also happens to be the first "segment", and any -/// further segments are linked via `b_cont`. In the illumos -/// kernel code you'll *sometimes* find variables named `mp_head` -/// to indicate that it points to a packet. -/// -/// There is also the notion of a "chain" of packets. This is -/// represented by a list of `mblk_t` structure as well, but instead -/// of using `b_cont` the individual packets are linked via the -/// `b_next` field. In the illumos kernel code this this is often -/// referred to with the variable name `mp_chain`, but sometimes also -/// `mp_head` (or just `mp`). It's a bit ambiguous, and something you -/// kind of figure out as you work in the code more. Though part of me -/// would like to create some rust-like "new type pattern" in C to -/// disambiguate packets from packet chains across APIs so the -/// compiler can detect when your API is working against the wrong -/// contract (for example a function that expects a single packet but -/// is being fed a packet chain). -// -// TODO: In theory, this can be any `Read` type giving us `&mut [u8]`s, -// but in practice we are internally reliant on returning `MsgBlk`s in -// hairpin actions and the like. Fighting the battle of making this generic -// is a bridge too far for the `ingot` datapath rewrite. This might have -// value in future. -#[derive(Debug)] -pub struct Packet { - state: S, -} - -impl Packet> { - pub fn new(pkt: T) -> Self - where - Initialized: PacketState, - { - Self { state: Initialized { inner: pkt } } - } -} - -impl<'a, T: Read + BufferState + 'a> Packet> -where - T::Chunk: IntoBufPointer<'a> + ByteSliceMut, -{ - #[inline] - pub fn len(&self) -> usize { - self.state.inner.len() - } - - #[inline] - pub fn mblk_addr(&self) -> uintptr_t { - self.state.inner.base_ptr() - } - - #[inline] - pub fn parse_inbound( - self, - net: NP, - ) -> Result>>, ParseError> { - let len = self.len(); - let base_ptr = self.mblk_addr(); - let Packet { state: Initialized { inner } } = self; - - let meta = net.parse_inbound(inner)?; - meta.stack.validate(len)?; - - Ok(Packet { state: LiteParsed { meta, base_ptr, len } }) - } - - #[inline] - pub fn parse_outbound( - self, - net: NP, - ) -> Result>>, ParseError> { - let len = self.len(); - let base_ptr = self.mblk_addr(); - let Packet { state: Initialized { inner } } = self; - - let meta = net.parse_outbound(inner)?; - meta.stack.validate(len)?; - - Ok(Packet { state: LiteParsed { meta, base_ptr, len } }) - } -} - -impl<'a, T: Read + 'a, M: LightweightMeta> Packet> -where - T::Chunk: IntoBufPointer<'a>, -{ - #[inline] - pub fn to_full_meta(self) -> Packet> { - let Packet { state: LiteParsed { len, base_ptr, meta } } = self; - let IngotParsed { stack: headers, data, last_chunk } = meta; - - // TODO: we can probably not do this in some cases, but we - // don't have a way for headeractions to signal that they - // *may* change the fields we need in the slowpath. - let body_csum = headers.compute_body_csum(); - let flow = headers.flow(); - - let headers: OpteMeta<_> = headers.into(); - let initial_lens = Some( - InitialLayerLens { - outer_eth: headers.outer_eth.packet_length(), - outer_l3: headers.outer_l3.packet_length(), - outer_encap: headers.outer_encap.packet_length(), - inner_eth: headers.inner_eth.packet_length(), - inner_l3: headers.inner_l3.packet_length(), - inner_ulp: headers.inner_ulp.packet_length(), - } - .into(), - ); - let body = PktBodyWalker { - base: Some((last_chunk, data)).into(), - slice: Default::default(), - }; - let meta = Box::new(PacketData { headers, initial_lens, body }); - - Packet { - state: FullParsed { - meta, - flow, - body_csum, - base_ptr, - l4_hash: Memoised::Uninit, - body_modified: false, - len, - inner_csum_dirty: false, - }, - } - } - - #[inline] - pub fn meta(&self) -> &M { - &self.state.meta.stack - } - - #[inline] - pub fn meta_mut(&mut self) -> &mut M { - &mut self.state.meta.stack - } - - #[inline] - pub fn len(&self) -> usize { - self.state.len - } - - #[inline] - pub fn mblk_addr(&self) -> uintptr_t { - self.state.base_ptr - } - - #[inline] - pub fn flow(&self) -> InnerFlowId { - self.meta().flow() - } -} - -impl Packet> { - pub fn meta(&self) -> &PacketData { - &self.state.meta - } - - pub fn meta_mut(&mut self) -> &mut PacketData { - &mut self.state.meta - } - - pub fn checksums_dirty(&self) -> bool { - self.state.inner_csum_dirty - } - - #[inline] - /// Convert a packet's metadata into a set of instructions - /// needed to serialize all its changes to the wire. - pub fn emit_spec(mut self) -> Result - where - T::Chunk: ByteSliceMut, - { - // Roughly how this works: - // - Identify rightmost structural-changed field. - // - fill out owned versions into the push_spec of all - // extant fields we rewound past. - // - Rewind up to+including that point in original - // pkt space. - let l4_hash = self.l4_hash(); - let state = self.state; - let init_lens = state.meta.initial_lens.unwrap(); - let headers = state.meta.headers; - let payload_len = state.len - init_lens.hdr_len(); - let mut encapped_len = payload_len; - - let mut push_spec = OpteEmit::default(); - let mut rewind = 0; - - // structural change if: - // hdr_len is different. - // needs_emit is true (i.e., now on an owned repr). - - // Part of the initial design idea of ingot was the desire to automatically - // do this sort of thing. We are so, so far from that... - let mut force_serialize = false; - - match headers.inner_ulp { - Some(ulp) => { - let l = ulp.packet_length(); - encapped_len += l; - - if ulp.needs_emit() || l != init_lens.inner_ulp { - let inner = - push_spec.inner.get_or_insert_with(Default::default); - - inner.ulp = Some(match ulp { - Ulp::Tcp(Header::Repr(t)) => UlpRepr::Tcp(*t), - Ulp::Tcp(Header::Raw(t)) => UlpRepr::Tcp((&t).into()), - Ulp::Udp(Header::Repr(t)) => UlpRepr::Udp(*t), - Ulp::Udp(Header::Raw(t)) => UlpRepr::Udp((&t).into()), - Ulp::IcmpV4(Header::Repr(t)) => UlpRepr::IcmpV4(*t), - Ulp::IcmpV4(Header::Raw(t)) => { - UlpRepr::IcmpV4((&t).into()) - } - Ulp::IcmpV6(Header::Repr(t)) => UlpRepr::IcmpV6(*t), - Ulp::IcmpV6(Header::Raw(t)) => { - UlpRepr::IcmpV6((&t).into()) - } - }); - force_serialize = true; - rewind += init_lens.inner_ulp; - } - } - None if init_lens.inner_ulp != 0 => { - force_serialize = true; - rewind += init_lens.inner_ulp; - } - _ => {} - } - - match headers.inner_l3 { - Some(l3) => { - let l = l3.packet_length(); - encapped_len += l; - - if force_serialize || l3.needs_emit() || l != init_lens.inner_l3 - { - let inner = - push_spec.inner.get_or_insert_with(Default::default); - - inner.l3 = Some(match l3 { - L3::Ipv4(Header::Repr(v4)) => L3Repr::Ipv4(*v4), - L3::Ipv4(Header::Raw(v4)) => L3Repr::Ipv4((&v4).into()), - L3::Ipv6(Header::Repr(v6)) => L3Repr::Ipv6(*v6), - - // We can't actually do structural mods here today using OPTE, - // but account for the possibiliry at least. - L3::Ipv6(Header::Raw(v6)) => { - L3Repr::Ipv6(v6.to_owned(None)?) - } - }); - force_serialize = true; - rewind += init_lens.inner_l3; - } - } - None if init_lens.inner_l3 != 0 => { - force_serialize = true; - rewind += init_lens.inner_l3; - } - _ => {} - } - - // inner eth - encapped_len += headers.inner_eth.packet_length(); - if force_serialize { - let inner = push_spec.inner.get_or_insert_with(Default::default); - inner.eth = match headers.inner_eth { - Header::Repr(p) => *p, - Header::Raw(p) => (&p).into(), - }; - rewind += init_lens.inner_eth; - } - - match headers.outer_encap { - Some(encap) - if force_serialize - || encap.needs_emit() - || encap.packet_length() != init_lens.outer_encap => - { - push_spec.outer_encap = Some(match encap { - InlineHeader::Repr(o) => o, - InlineHeader::Raw(ValidEncapMeta::Geneve(u, g)) => { - EncapMeta::Geneve(GeneveMeta { - entropy: u.source(), - vni: g.vni(), - oxide_external_pkt: valid_geneve_has_oxide_external( - &g, - ), - }) - } - }); - - force_serialize = true; - rewind += init_lens.outer_encap; - } - None if init_lens.outer_encap != 0 => { - force_serialize = true; - rewind += init_lens.outer_encap; - } - _ => {} - } - - match headers.outer_l3 { - Some(l3) - if force_serialize - || l3.needs_emit() - || l3.packet_length() != init_lens.outer_l3 => - { - let encap_len = push_spec.outer_encap.packet_length(); - - push_spec.outer_ip = Some(match l3 { - L3::Ipv6(BoxedHeader::Repr(o)) => L3Repr::Ipv6(*o), - L3::Ipv4(BoxedHeader::Repr(o)) => L3Repr::Ipv4(*o), - L3::Ipv6(BoxedHeader::Raw(o)) => { - L3Repr::Ipv6((&o).to_owned(None)?) - } - L3::Ipv4(BoxedHeader::Raw(o)) => L3Repr::Ipv4((&o).into()), - }); - - let inner_sz = (encapped_len + encap_len) as u16; - - match &mut push_spec.outer_ip { - Some(L3Repr::Ipv4(v4)) => { - v4.total_len = (v4.ihl as u16) * 4 + inner_sz; - } - Some(L3Repr::Ipv6(v6)) => { - v6.payload_len = inner_sz; - } - _ => {} - } - - force_serialize = true; - rewind += init_lens.outer_l3; - } - None if init_lens.outer_l3 != 0 => { - force_serialize = true; - rewind += init_lens.outer_l3; - } - _ => {} - } - - match headers.outer_eth { - Some(eth) - if force_serialize - || eth.needs_emit() - || eth.packet_length() != init_lens.outer_eth => - { - push_spec.outer_eth = Some(match eth { - InlineHeader::Repr(o) => o, - InlineHeader::Raw(r) => (&r).into(), - }); - - rewind += init_lens.outer_eth; - } - None if init_lens.outer_eth != 0 => { - rewind += init_lens.outer_eth; - } - _ => {} - } - - Ok(EmitSpec { - rewind: rewind as u16, - ulp_len: encapped_len as u32, - prepend: PushSpec::Slowpath(push_spec.into()), - l4_hash, - }) - } - - pub fn len(&self) -> usize { - self.state.len - } - - #[inline] - pub fn flow(&self) -> &InnerFlowId { - &self.state.flow - } - - /// Run the [`HdrTransform`] against this packet. - #[inline] - pub fn hdr_transform( - &mut self, - xform: &HdrTransform, - ) -> Result<(), HdrTransformError> - where - T::Chunk: ByteSliceMut, - { - self.state.inner_csum_dirty |= xform.run(&mut self.state.meta)?; - - // Recomputing this is a little bit wasteful, since we're moving - // rebuilding a static repr from packet fields. This is a necessary - // part of slowpath use because layers are designed around intermediate - // flowkeys. - // - // We *could* elide this on non-compiled UFT transforms, but we do not - // need those today. - self.state.flow = InnerFlowId::from(self.meta()); - Ok(()) - } - - /// Run the [`BodyTransform`] against this packet. - pub fn body_transform( - &mut self, - dir: Direction, - xform: &dyn BodyTransform, - ) -> Result<(), BodyTransformError> - where - T::Chunk: ByteSliceMut, - { - // We set the flag now with the assumption that the transform - // could fail after modifying part of the body. In the future - // we could have something more sophisticated that only sets - // the flag if at least one byte was modified, but for now - // this does the job as nothing that needs top performance - // should make use of body transformations. - self.state.body_modified = true; - - match self.body_segs_mut() { - Some(mut body_segs) => xform.run(dir, &mut body_segs), - None => { - self.state.body_modified = false; - Err(BodyTransformError::NoPayload) - } - } - } - - #[inline] - pub fn body_segs(&self) -> Option<&[&[u8]]> - where - T::Chunk: ByteSliceMut, - { - let out = self.state.meta.body_segs(); - if out.is_empty() { - None - } else { - Some(out) - } - } - - #[inline] - pub fn body_segs_mut(&mut self) -> Option<&mut [&mut [u8]]> - where - T::Chunk: ByteSliceMut, - { - let out = self.state.meta.body_segs_mut(); - if out.is_empty() { - None - } else { - Some(out) - } - } - - #[inline] - pub fn mblk_addr(&self) -> uintptr_t { - self.state.base_ptr - } - - /// Compute ULP and IP header checksum from scratch. - /// - /// This should really only be used for testing, or in the case - /// where we have applied body transforms and know that any initial - /// body_csum cannot be valid. - pub fn compute_checksums(&mut self) - where - T::Chunk: ByteSliceMut, - { - let mut body_csum = Checksum::new(); - for seg in self.body_segs_mut().unwrap_or_default() { - body_csum.add_bytes(seg); - } - self.state.body_csum = Some(body_csum); - - if let Some(ulp) = &mut self.state.meta.headers.inner_ulp { - let mut csum = body_csum; - - // Unwrap: Can't have a ULP without an IP. - let ip = self.state.meta.headers.inner_l3.as_ref().unwrap(); - // Add pseudo header checksum. - let pseudo_csum = ip.pseudo_header(); - csum += pseudo_csum; - // Determine ULP slice and add its bytes to the - // checksum. - match ulp { - // ICMP4 requires the body_csum *without* - // the pseudoheader added back in. - Ulp::IcmpV4(i4) => { - let mut bytes = [0u8; 8]; - i4.set_checksum(0); - i4.emit_raw(&mut bytes[..]); - body_csum.add_bytes(&bytes[..]); - i4.set_checksum(body_csum.finalize_for_ingot()); - } - Ulp::IcmpV6(i6) => { - let mut bytes = [0u8; 8]; - i6.set_checksum(0); - i6.emit_raw(&mut bytes[..]); - csum.add_bytes(&bytes[..]); - i6.set_checksum(csum.finalize_for_ingot()); - } - Ulp::Tcp(tcp) => { - tcp.set_checksum(0); - match tcp { - Header::Repr(tcp) => { - let mut bytes = [0u8; 56]; - tcp.emit_raw(&mut bytes[..]); - csum.add_bytes(&bytes[..]); - } - Header::Raw(tcp) => { - csum.add_bytes(tcp.0.as_bytes()); - match &tcp.1 { - Header::Repr(opts) => { - csum.add_bytes(&*opts); - } - Header::Raw(opts) => { - csum.add_bytes(&*opts); - } - } - } - } - tcp.set_checksum(csum.finalize_for_ingot()); - } - Ulp::Udp(udp) => { - udp.set_checksum(0); - match udp { - Header::Repr(udp) => { - let mut bytes = [0u8; 8]; - udp.emit_raw(&mut bytes[..]); - csum.add_bytes(&bytes[..]); - } - Header::Raw(udp) => { - csum.add_bytes(udp.0.as_bytes()); - } - } - udp.set_checksum(csum.finalize_for_ingot()); - } - } - } - - // Compute and fill in the IPv4 header checksum. - if let Some(l3) = self.state.meta.headers.inner_l3.as_mut() { - l3.compute_checksum(); - } - } - - pub fn body_csum(&mut self) -> Option { - self.state.body_csum - } - - pub fn l4_hash(&mut self) -> u32 { - *self.state.l4_hash.get(|| { - let mut hasher = crc32fast::Hasher::new(); - self.state.flow.hash(&mut hasher); - hasher.finalize() - }) - } - - pub fn set_l4_hash(&mut self, hash: u32) { - self.state.l4_hash.set(hash); - } - - /// Perform an incremental checksum update for the ULP checksums - /// based on the stored body checksum. - /// - /// This avoids duplicating work already done by the client in the - /// case where checksums are **not** being offloaded to the hardware. - pub fn update_checksums(&mut self) - where - T::Chunk: ByteSliceMut, - { - // If we know that no transform touched a field which features in - // an inner transport cksum (L4/L3 src/dst, most realistically), - // and no body transform occurred then we can exit early. - if !self.checksums_dirty() && !self.state.body_modified { - return; - } - - // Flag to indicate if an IP header/ULP checksums were - // provided. If the checksum is zero, it's assumed heardware - // checksum offload is being used, and OPTE should not update - // the checksum. - let update_ip = self.state.meta.has_ip_csum(); - let update_ulp = self.state.meta.has_ulp_csum(); - - // We expect that any body transform will necessarily invalidate - // the body_csum. Recompute from scratch. - if self.state.body_modified && (update_ip || update_ulp) { - return self.compute_checksums(); - } - - // Start by reusing the known checksum of the body. - let mut body_csum = self.body_csum().unwrap_or_default(); - - // If a ULP exists, then compute and set its checksum. - if let (true, Some(ulp)) = - (update_ulp, &mut self.state.meta.headers.inner_ulp) - { - let mut csum = body_csum; - // Unwrap: Can't have a ULP without an IP. - let ip = self.state.meta.headers.inner_l3.as_ref().unwrap(); - // Add pseudo header checksum. - let pseudo_csum = ip.pseudo_header(); - csum += pseudo_csum; - // Determine ULP slice and add its bytes to the - // checksum. - match ulp { - // ICMP4 requires the body_csum *without* - // the pseudoheader added back in. - Ulp::IcmpV4(i4) => { - let mut bytes = [0u8; 8]; - i4.set_checksum(0); - i4.emit_raw(&mut bytes[..]); - body_csum.add_bytes(&bytes[..]); - i4.set_checksum(body_csum.finalize_for_ingot()); - } - Ulp::IcmpV6(i6) => { - let mut bytes = [0u8; 8]; - i6.set_checksum(0); - i6.emit_raw(&mut bytes[..]); - csum.add_bytes(&bytes[..]); - i6.set_checksum(csum.finalize_for_ingot()); - } - Ulp::Tcp(tcp) => { - tcp.set_checksum(0); - match tcp { - Header::Repr(tcp) => { - let mut bytes = [0u8; 56]; - tcp.emit_raw(&mut bytes[..]); - csum.add_bytes(&bytes[..]); - } - Header::Raw(tcp) => { - csum.add_bytes(tcp.0.as_bytes()); - match &tcp.1 { - Header::Repr(opts) => { - csum.add_bytes(&*opts); - } - Header::Raw(opts) => { - csum.add_bytes(&*opts); - } - } - } - } - tcp.set_checksum(csum.finalize_for_ingot()); - } - Ulp::Udp(udp) => { - udp.set_checksum(0); - match udp { - Header::Repr(udp) => { - let mut bytes = [0u8; 8]; - udp.emit_raw(&mut bytes[..]); - csum.add_bytes(&bytes[..]); - } - Header::Raw(udp) => { - csum.add_bytes(udp.0.as_bytes()); - } - } - udp.set_checksum(csum.finalize_for_ingot()); - } - } - } - - // Compute and fill in the IPv4 header checksum. - if let (true, Some(l3)) = - (update_ip, &mut self.state.meta.headers.inner_l3) - { - l3.compute_checksum(); - } - } -} - -/// The type state of a packet that has been initialized and allocated, but -/// about which nothing else is known besides the length. -#[derive(Debug)] -pub struct Initialized { - inner: T, -} - -impl PacketState for Initialized {} -impl PacketState for FullParsed {} - -/// Zerocopy view onto a parsed packet, accompanied by locally -/// computed state. -pub struct FullParsed { - /// Total length of packet, in bytes. This is equal to the sum of - /// the length of the _initialized_ window in all the segments - /// (`b_wptr - b_rptr`). - len: usize, - /// Base pointer of the contained T, used in dtrace SDTs and the like - /// for correlation and inspection of packet events. - base_ptr: uintptr_t, - /// Access to parsed packet headers and the packet body. - meta: Box>, - /// Current Flow ID of this packet, accountgin for any applied - /// transforms. - flow: InnerFlowId, - - /// The body's checksum. It is up to the `NetworkImpl::Parser` on - /// whether to populate this field or not. The reason for - /// populating this field is to avoid duplicate work if the client - /// has provided a ULP checksum. Rather than redoing the body - /// checksum calculation, we can use incremental checksum - /// techniques to stash the body's checksum for reuse when emitting - /// the new headers. - /// - /// However, if the client does not provide a checksum, presumably - /// because they are relying on checksum offload, this value should - /// be `None`. In such case, `emit_headers()` will perform no ULP - /// checksum update. - /// - /// This value may also be none if the packet has no notion of a - /// ULP checksum; e.g., ARP. - body_csum: Option, - /// L4 hash for this packet, computed from the flow ID. - l4_hash: Memoised, - /// Tracks whether any body transforms have been executed on this - /// packet. - body_modified: bool, - /// Tracks whether any transform has been applied to this packet - /// which would dirty the inner L3 and/or ULP header checksums. - inner_csum_dirty: bool, -} - -/// Minimum-size zerocopy view onto a parsed packet, sufficient for fast -/// packet transformation. -pub struct LiteParsed> { - /// Total length of packet, in bytes. This is equal to the sum of - /// the length of the _initialized_ window in all the segments - /// (`b_wptr - b_rptr`). - len: usize, - /// Base pointer of the contained T, used in dtrace SDTs and the like - /// for correlation and inspection of packet events. - base_ptr: uintptr_t, - meta: IngotParsed, -} - -impl> PacketState for LiteParsed {} - -impl> LiteParsed {} - -// These are needed for now to account for not wanting to redesign -// ActionDescs to be generic over T (trait object safety rules, etc.), -// in addition to needing to rework Hairpin actions. -pub type MblkPacketData<'a> = PacketData>; -pub type MblkFullParsed<'a> = FullParsed>; -pub type MblkLiteParsed<'a, M> = LiteParsed, M>; - -pub trait BufferState { - fn len(&self) -> usize; - fn base_ptr(&self) -> uintptr_t; -} - -/// A set of headers to be emitted at the head of a packet. -#[derive(Clone, Debug, Default)] -pub struct OpteEmit { - outer_eth: Option, - outer_ip: Option, - outer_encap: Option, - - // We can (but do not often) push/pop inner meta. - // Splitting via Box minimises struct size in the general case. - inner: Option>, -} - -/// Inner headers needing completely rewritten/emitted in a packet. -#[derive(Clone, Debug, Default)] -pub struct OpteInnerEmit { - eth: Ethernet, - l3: Option, - ulp: Option, -} - -/// A specification of how a packet should be modified to finish processing, -/// after existing fields have been updated. -/// -/// This will add and/or remove several layers from the underlying `MsgBlk`, -/// and can be queried for routing specific info (access to new encap, l4 hash). -#[derive(Clone, Debug)] -pub struct EmitSpec { - pub(crate) prepend: PushSpec, - pub(crate) l4_hash: u32, - pub(crate) rewind: u16, - pub(crate) ulp_len: u32, -} - -impl Default for EmitSpec { - fn default() -> Self { - Self { prepend: PushSpec::NoOp, l4_hash: 0, rewind: 0, ulp_len: 0 } - } -} - -impl EmitSpec { - /// Return the L4 hash of the inner flow, used for multipath selection. - #[inline] - #[must_use] - pub fn l4_hash(&self) -> u32 { - self.l4_hash - } - - /// Perform final structural transformations to a packet (removal of - /// existing headers, and copying in new/replacement headers). - #[inline] - #[must_use] - pub fn apply(&self, mut pkt: MsgBlk) -> MsgBlk { - // Rewind - { - let mut slots = heapless::Vec::<&mut MsgBlkNode, 6>::new(); - let mut to_rewind = self.rewind as usize; - - if to_rewind > 0 { - let mut reader = pkt.iter_mut(); - while to_rewind != 0 { - let this = reader.next(); - let Some(node) = this else { - break; - }; - - let has = node.len(); - let droppable = to_rewind.min(has); - node.drop_front_bytes(droppable) - .expect("droppable should be bounded above by len"); - to_rewind -= droppable; - - slots.push(node).unwrap(); - } - } - } - - // TODO: actually push in to existing slots we rewound past if needed, - // then run this step at the end. - // This is not really an issue in practice -- no packets should need - // to rewind *and* prepend new segments with how we're using OPTE today, - // much less so in the fastpath. - pkt.drop_empty_segments(); - - let out = match &self.prepend { - PushSpec::Fastpath(push_spec) => { - push_spec.encap.prepend(pkt, self.ulp_len as usize) - } - PushSpec::Slowpath(push_spec) => { - let mut needed_push = push_spec.outer_eth.packet_length() - + push_spec.outer_ip.packet_length() - + push_spec.outer_encap.packet_length(); - - if let Some(inner_new) = &push_spec.inner { - needed_push += inner_new.eth.packet_length() - + inner_new.l3.packet_length() - + inner_new.ulp.packet_length(); - } - - let needed_alloc = needed_push; - - let mut prepend = if needed_alloc > 0 { - let mut new_mblk = MsgBlk::new_ethernet(needed_alloc); - new_mblk.pop_all(); - Some(new_mblk) - } else { - None - }; - - if let Some(inner_new) = &push_spec.inner { - if let Some(inner_ulp) = &inner_new.ulp { - let target = if prepend.is_none() { - &mut pkt - } else { - prepend.as_mut().unwrap() - }; - - target.emit_front(inner_ulp).unwrap(); - } - - if let Some(inner_l3) = &inner_new.l3 { - let target = if prepend.is_none() { - &mut pkt - } else { - prepend.as_mut().unwrap() - }; - - target.emit_front(inner_l3).unwrap(); - } - - let target = if prepend.is_none() { - &mut pkt - } else { - prepend.as_mut().unwrap() - }; - - target.emit_front(&inner_new.eth).unwrap(); - } - - if let Some(outer_encap) = &push_spec.outer_encap { - let encap = SizeHoldingEncap { - encapped_len: self.ulp_len as u16, - meta: &outer_encap, - }; - - let target = if prepend.is_none() { - &mut pkt - } else { - prepend.as_mut().unwrap() - }; - - target.emit_front(&encap).unwrap(); - } - - if let Some(outer_ip) = &push_spec.outer_ip { - let target = if prepend.is_none() { - &mut pkt - } else { - prepend.as_mut().unwrap() - }; - - target.emit_front(outer_ip).unwrap(); - } - - if let Some(outer_eth) = &push_spec.outer_eth { - let target = if prepend.is_none() { - &mut pkt - } else { - prepend.as_mut().unwrap() - }; - - target.emit_front(outer_eth).unwrap(); - } - - if let Some(mut prepend) = prepend { - prepend.append(pkt); - prepend - } else { - pkt - } - } - PushSpec::NoOp => pkt, - }; - - out - } - - /// Returns the Geneve VNI when this spec pushes Geneve encapsulation. - #[inline] - pub fn outer_encap_vni(&self) -> Option { - match &self.prepend { - PushSpec::Fastpath(c) => match &c.encap { - CompiledEncap::Push { encap: EncapPush::Geneve(g), .. } => { - Some(g.vni) - } - _ => None, - }, - PushSpec::Slowpath(s) => match &s.outer_encap { - Some(EncapMeta::Geneve(g)) => Some(g.vni), - _ => None, - }, - PushSpec::NoOp => None, - } - } - - /// Returns the outer IPv6 src/dst when this spec pushes Geneve encapsulation. - #[inline] - pub fn outer_ip6_addrs(&self) -> Option<(Ipv6Addr, Ipv6Addr)> { - match &self.prepend { - PushSpec::Fastpath(c) => match &c.encap { - CompiledEncap::Push { ip: IpPush::Ip6(v6), .. } => { - Some((v6.src, v6.dst)) - } - _ => None, - }, - PushSpec::Slowpath(s) => match &s.outer_ip { - Some(L3Repr::Ipv6(v6)) => Some((v6.source, v6.destination)), - _ => None, - }, - PushSpec::NoOp => None, - } - } -} - -/// Specification of additional header layers to push at the head of a packet. -#[derive(Clone, Debug)] -pub enum PushSpec { - /// Bytes to prepend to packet which have been serialised ahead of time - /// and can be copied in one shot. - Fastpath(Arc), - /// Full representations of each header to serialise and prepend ahead - /// of the current packet contents. - Slowpath(Box), - /// No prepend. - NoOp, -} - -#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq, Ord, PartialOrd, Default)] -pub enum Memoised { - #[default] - Uninit, - Known(T), -} - -impl Memoised { - #[inline] - pub fn get(&mut self, or: impl FnOnce() -> T) -> &T { - if self.try_get().is_none() { - self.set(or()); - } - - self.try_get().unwrap() - } - - #[inline] - pub fn try_get(&self) -> Option<&T> { - match self { - Memoised::Uninit => None, - Memoised::Known(v) => Some(v), - } - } - - #[inline] - pub fn set(&mut self, val: T) { - *self = Self::Known(val); - } -} diff --git a/lib/opte/src/engine/layer.rs b/lib/opte/src/engine/layer.rs index 13e5732e..1a49b9f1 100644 --- a/lib/opte/src/engine/layer.rs +++ b/lib/opte/src/engine/layer.rs @@ -11,13 +11,13 @@ use super::flow_table::FlowEntry; use super::flow_table::FlowTable; use super::flow_table::FlowTableDump; use super::flow_table::FLOW_DEF_EXPIRE_SECS; -use super::ingot_packet::MblkFullParsed; -use super::ingot_packet::MblkPacketData; -use super::ingot_packet::Packet; use super::ioctl; use super::ioctl::ActionDescEntryDump; use super::packet::BodyTransformError; use super::packet::InnerFlowId; +use super::packet::MblkFullParsed; +use super::packet::MblkPacketData; +use super::packet::Packet; use super::packet::FLOW_ID_DEFAULT; use super::port::meta::ActionMeta; use super::port::Transforms; diff --git a/lib/opte/src/engine/mod.rs b/lib/opte/src/engine/mod.rs index d005186a..ac434abc 100644 --- a/lib/opte/src/engine/mod.rs +++ b/lib/opte/src/engine/mod.rs @@ -37,18 +37,16 @@ pub mod tcp_state; #[macro_use] pub mod udp; -pub mod ingot_packet; - use crate::ddi::mblk::MsgBlk; use checksum::Checksum; use ingot::tcp::TcpRef; use ingot::types::IntoBufPointer; use ingot::types::Parsed as IngotParsed; use ingot::types::Read; -use ingot_packet::FullParsed; -use ingot_packet::OpteMeta; -use ingot_packet::Packet; pub use opte_api::Direction; +use packet::FullParsed; +use packet::OpteMeta; +use packet::Packet; use parse::ValidNoEncap; use rule::CompiledTransform; use zerocopy::ByteSlice; diff --git a/lib/opte/src/engine/nat.rs b/lib/opte/src/engine/nat.rs index ec62edb7..1307f0a3 100644 --- a/lib/opte/src/engine/nat.rs +++ b/lib/opte/src/engine/nat.rs @@ -8,9 +8,9 @@ use super::headers::HeaderAction; use super::headers::IpMod; -use super::ingot_packet::MblkFullParsed; -use super::ingot_packet::Packet; use super::packet::InnerFlowId; +use super::packet::MblkFullParsed; +use super::packet::Packet; use super::port::meta::ActionMeta; use super::predicate::DataPredicate; use super::predicate::Predicate; diff --git a/lib/opte/src/engine/packet.rs b/lib/opte/src/engine/packet.rs index 791cc2de..6c3e1ed2 100644 --- a/lib/opte/src/engine/packet.rs +++ b/lib/opte/src/engine/packet.rs @@ -11,25 +11,89 @@ //! * Add hardware offload information to [`Packet`]. //! +use super::checksum::Checksum; +use super::ether::Ethernet; +use super::ether::EthernetPacket; +use super::ether::ValidEthernet; +use super::headers::EncapMeta; +use super::headers::EncapPush; use super::headers::IpAddr; +use super::headers::IpPush; +use super::headers::SizeHoldingEncap; +use super::headers::ValidEncapMeta; use super::headers::AF_INET; use super::headers::AF_INET6; use super::ip::v4::Ipv4Addr; +use super::ip::v4::Ipv4Packet; +use super::ip::v4::Ipv4Ref; use super::ip::v4::Protocol; use super::ip::v6::Ipv6Addr; +use super::ip::v6::Ipv6Packet; +use super::ip::v6::Ipv6Ref; +use super::ip::L3Repr; +use super::ip::L3; +use super::parse::NoEncap; +use super::parse::Ulp; +use super::parse::UlpRepr; +use super::rule::CompiledEncap; +use super::rule::CompiledTransform; +use super::rule::HdrTransform; +use super::rule::HdrTransformError; use super::Direction; +use super::LightweightMeta; +use super::NetworkParser; use crate::d_error::DError; +use crate::ddi::mblk::MsgBlk; +use crate::ddi::mblk::MsgBlkIterMut; +use crate::ddi::mblk::MsgBlkNode; +use crate::engine::geneve::valid_geneve_has_oxide_external; +use crate::engine::geneve::GeneveMeta; +use alloc::boxed::Box; use alloc::string::String; +use alloc::sync::Arc; +use alloc::vec::Vec; +use core::cell::Cell; use core::ffi::CStr; use core::fmt; use core::fmt::Display; use core::hash::Hash; +use core::ops::Deref; +use core::ops::DerefMut; use core::result; +use core::sync::atomic::AtomicPtr; use crc32fast::Hasher; use dyn_clone::DynClone; +use illumos_sys_hdrs::uintptr_t; +use ingot::geneve::GeneveRef; +use ingot::icmp::IcmpV4Mut; +use ingot::icmp::IcmpV4Packet; +use ingot::icmp::IcmpV4Ref; +use ingot::icmp::IcmpV6Mut; +use ingot::icmp::IcmpV6Packet; +use ingot::icmp::IcmpV6Ref; +use ingot::tcp::TcpMut; +use ingot::tcp::TcpPacket; +use ingot::tcp::TcpRef; +use ingot::types::BoxedHeader; +use ingot::types::Emit; +use ingot::types::Header; +use ingot::types::HeaderLen; +use ingot::types::InlineHeader; +use ingot::types::IntoBufPointer; +use ingot::types::NextLayer; use ingot::types::PacketParseError; +use ingot::types::Parsed as IngotParsed; +use ingot::types::Read; +use ingot::types::ToOwnedPacket; +use ingot::udp::UdpMut; +use ingot::udp::UdpPacket; +use ingot::udp::UdpRef; +use opte_api::Vni; use serde::Deserialize; use serde::Serialize; +use zerocopy::ByteSlice; +use zerocopy::ByteSliceMut; +use zerocopy::IntoBytes; pub static FLOW_ID_DEFAULT: InnerFlowId = InnerFlowId { proto: 255, @@ -315,17 +379,1408 @@ pub enum WriteError { pub type WriteResult = result::Result; +pub struct InitialLayerLens { + pub outer_eth: usize, + pub outer_l3: usize, + pub outer_encap: usize, + + pub inner_eth: usize, + pub inner_l3: usize, + pub inner_ulp: usize, +} + +impl InitialLayerLens { + #[inline] + pub fn hdr_len(&self) -> usize { + self.outer_eth + + self.outer_l3 + + self.outer_encap + + self.inner_eth + + self.inner_l3 + + self.inner_ulp + } +} + +/// Full metadata representation for a packet entering the standard ULP +/// path, or a full table walk over the slowpath. +pub struct OpteMeta { + pub outer_eth: Option>>, + pub outer_l3: Option>, + pub outer_encap: Option>>, + + pub inner_eth: EthernetPacket, + pub inner_l3: Option>, + pub inner_ulp: Option>, +} + +/// Helper for reusing access to all packet body segments. +/// +/// This is necessary because `MsgBlk`s in particular do not +/// allow us to walk backward within a packet -- if we need them, +/// then we need to save them out for all future uses. +/// The other part is that the majority of packets (ULP hits) +/// do not want to interact with body segments at all. +struct PktBodyWalker { + base: Cell, T)>>, + slice: AtomicPtr>, +} + +impl Drop for PktBodyWalker { + fn drop(&mut self) { + let ptr = self.slice.load(core::sync::atomic::Ordering::Relaxed); + if !ptr.is_null() { + // Reacquire and drop. + unsafe { + let _ = Box::from_raw(ptr); + } + } + } +} + +impl PktBodyWalker { + fn reify_body_segs(&self) + where + ::Chunk: ByteSliceMut, + { + if let Some((mut first, mut rest)) = self.base.take() { + // SAFETY: ByteSlice requires as part of its API + // that any implementors are stable, so we will always + // get the same view via deref. We are then consuming them + // into references which live exactly as long as their initial + // form. + // + // The next question is one of ownership. + // We know that these chunks are at least &[u8]s, they + // *will* be exclusive if ByteSliceMut is met (because they are + // sourced from an exclusive borrow on something which owns a [u8]). + // This allows us to cast to &mut later, but not here! + let mut to_hold = vec![]; + if let Some(ref mut chunk) = first { + let as_bytes = chunk.deref_mut(); + to_hold.push(unsafe { core::mem::transmute(as_bytes) }); + } + + // TODO(drop-safety): we need to give these chunks a longer life, too. + while let Ok(chunk) = rest.next_chunk() { + let as_bytes = chunk.deref(); + to_hold.push(unsafe { core::mem::transmute(as_bytes) }); + } + + let to_store = Box::into_raw(Box::new(to_hold.into_boxed_slice())); + + self.slice + .compare_exchange( + core::ptr::null_mut(), + to_store, + core::sync::atomic::Ordering::Relaxed, + core::sync::atomic::Ordering::Relaxed, + ) + .expect("unexpected concurrent access to body_seg memoiser"); + + // SAFETY: + // Replace contents to get correct drop behaviour on T. + // Currently the only ByteSlice impls are &[u8] and friends, + // but this may extend to e.g. Vec in future. + self.base.set(Some((first, rest))); + } + } + + fn body_segs(&self) -> &[&[u8]] + where + T::Chunk: ByteSliceMut, + { + let mut slice_ptr = + self.slice.load(core::sync::atomic::Ordering::Relaxed); + if slice_ptr.is_null() { + self.reify_body_segs(); + slice_ptr = self.slice.load(core::sync::atomic::Ordering::Relaxed); + } + assert!(!slice_ptr.is_null()); + + unsafe { + let a = (&*(*slice_ptr)) as *const _; + core::mem::transmute(a) + } + } + + fn body_segs_mut(&mut self) -> &mut [&mut [u8]] + where + T::Chunk: ByteSliceMut, + { + let mut slice_ptr = + self.slice.load(core::sync::atomic::Ordering::Relaxed); + if slice_ptr.is_null() { + self.reify_body_segs(); + slice_ptr = self.slice.load(core::sync::atomic::Ordering::Relaxed); + } + assert!(!slice_ptr.is_null()); + + // SAFETY: We have an exclusive reference, and the ByteSliceMut + // bound guarantees that this packet view was construced from + // an exclusive reference. In turn, we know that we are the only + // possible referent. + unsafe { + let a = (&mut *(*slice_ptr)) as *mut _; + core::mem::transmute(a) + } + } +} + +/// Packet state for the standard ULP path, or a full table walk over the slowpath. +pub struct PacketData { + pub(crate) headers: OpteMeta, + initial_lens: Option>, + body: PktBodyWalker, +} + +impl From> for OpteMeta { + #[inline] + fn from(value: NoEncap) -> Self { + OpteMeta { + outer_eth: None, + outer_l3: None, + outer_encap: None, + inner_eth: value.inner_eth, + inner_l3: value.inner_l3, + inner_ulp: value.inner_ulp, + } + } +} + +impl core::fmt::Debug for PacketData { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.write_str("PacketHeaders(..)") + } +} + +impl PacketData { + pub fn initial_lens(&self) -> Option<&InitialLayerLens> { + self.initial_lens.as_ref().map(|v| &**v) + } + + pub fn outer_ether( + &self, + ) -> Option<&InlineHeader>> { + self.headers.outer_eth.as_ref() + } + + pub fn outer_ip(&self) -> Option<&L3> { + self.headers.outer_l3.as_ref() + } + + /// Returns whether this packet is sourced from outside the rack, + /// in addition to its VNI. + pub fn outer_encap_geneve_vni_and_origin(&self) -> Option<(Vni, bool)> { + match &self.headers.outer_encap { + Some(InlineHeader::Repr(EncapMeta::Geneve(g))) => { + Some((g.vni, g.oxide_external_pkt)) + } + Some(InlineHeader::Raw(ValidEncapMeta::Geneve(_, g))) => { + Some((g.vni(), valid_geneve_has_oxide_external(&g))) + } + None => None, + } + } + + pub fn inner_ether(&self) -> &EthernetPacket { + &self.headers.inner_eth + } + + pub fn inner_l3(&self) -> Option<&L3> { + self.headers.inner_l3.as_ref() + } + + pub fn inner_ulp(&self) -> Option<&Ulp> { + self.headers.inner_ulp.as_ref() + } + + pub fn inner_ip4(&self) -> Option<&Ipv4Packet> { + self.inner_l3().and_then(|v| match v { + L3::Ipv4(v) => Some(v), + _ => None, + }) + } + + pub fn inner_ip6(&self) -> Option<&Ipv6Packet> { + self.inner_l3().and_then(|v| match v { + L3::Ipv6(v) => Some(v), + _ => None, + }) + } + + pub fn inner_icmp(&self) -> Option<&IcmpV4Packet> { + self.inner_ulp().and_then(|v| match v { + Ulp::IcmpV4(v) => Some(v), + _ => None, + }) + } + + pub fn inner_icmp6(&self) -> Option<&IcmpV6Packet> { + self.inner_ulp().and_then(|v| match v { + Ulp::IcmpV6(v) => Some(v), + _ => None, + }) + } + + pub fn inner_tcp(&self) -> Option<&TcpPacket> { + self.inner_ulp().and_then(|v| match v { + Ulp::Tcp(v) => Some(v), + _ => None, + }) + } + + pub fn inner_udp(&self) -> Option<&UdpPacket> { + self.inner_ulp().and_then(|v| match v { + Ulp::Udp(v) => Some(v), + _ => None, + }) + } + + pub fn is_inner_tcp(&self) -> bool { + matches!(self.inner_ulp(), Some(Ulp::Tcp(_))) + } + + pub fn body_segs(&self) -> &[&[u8]] + where + T::Chunk: ByteSliceMut, + { + self.body.body_segs() + } + + pub fn copy_remaining(&self) -> Vec + where + T::Chunk: ByteSliceMut, + { + let base = self.body_segs(); + let len = base.iter().map(|v| v.len()).sum(); + let mut out = Vec::with_capacity(len); + for el in base { + out.extend_from_slice(el); + } + out + } + + pub fn append_remaining(&self, buf: &mut Vec) + where + T::Chunk: ByteSliceMut, + { + let base = self.body_segs(); + let len = base.iter().map(|v| v.len()).sum(); + buf.reserve_exact(len); + for el in base { + buf.extend_from_slice(el); + } + } + + pub fn body_segs_mut(&mut self) -> &mut [&mut [u8]] + where + T::Chunk: ByteSliceMut, + { + self.body.body_segs_mut() + } + + /// Return whether the IP layer has a checksum both structurally + /// and that it is non-zero (i.e., not offloaded). + pub fn has_ip_csum(&self) -> bool { + match &self.headers.inner_l3 { + Some(L3::Ipv4(v4)) => v4.checksum() != 0, + Some(L3::Ipv6(_)) => false, + None => false, + } + } + + /// Return whether the ULP layer has a checksum both structurally + /// and that it is non-zero (i.e., not offloaded). + pub fn has_ulp_csum(&self) -> bool { + let csum = match &self.headers.inner_ulp { + Some(Ulp::Tcp(t)) => t.checksum(), + Some(Ulp::Udp(u)) => u.checksum(), + Some(Ulp::IcmpV4(i4)) => i4.checksum(), + Some(Ulp::IcmpV6(i6)) => i6.checksum(), + None => return false, + }; + + csum != 0 + } +} + +impl From<&PacketData> for InnerFlowId { + #[inline] + fn from(meta: &PacketData) -> Self { + let (proto, addrs) = match meta.inner_l3() { + Some(L3::Ipv4(pkt)) => ( + pkt.protocol().0, + AddrPair::V4 { src: pkt.source(), dst: pkt.destination() }, + ), + Some(L3::Ipv6(pkt)) => ( + pkt.next_layer().unwrap_or_default().0, + AddrPair::V6 { src: pkt.source(), dst: pkt.destination() }, + ), + None => (255, FLOW_ID_DEFAULT.addrs), + }; + + let (src_port, dst_port) = meta + .inner_ulp() + .map(|ulp| { + ( + ulp.true_src_port() + .or_else(|| ulp.pseudo_port()) + .unwrap_or(0), + ulp.true_dst_port() + .or_else(|| ulp.pseudo_port()) + .unwrap_or(0), + ) + }) + .unwrap_or((0, 0)); + + InnerFlowId { proto: proto.into(), addrs, src_port, dst_port } + } +} + +/// A network packet. +/// +/// A packet is made up of one or more segments. Any given header is +/// *always* contained in a single segment, i.e. a header never straddles +/// multiple segments. While it's preferable to have all headers in the +/// first segment, it *may* be the case that the headers span multiple +/// segments; but a *single* header type (e.g. the IP header) will *never* +/// straddle two segments. The payload, however, *may* span multiple segments. +/// +/// # illumos terminology +/// +/// In illumos there is no real notion of an mblk "packet" or +/// "segment": a packet is just a linked list of `mblk_t` values. +/// The "packet" is simply a pointer to the first `mblk_t` in the +/// list, which also happens to be the first "segment", and any +/// further segments are linked via `b_cont`. In the illumos +/// kernel code you'll *sometimes* find variables named `mp_head` +/// to indicate that it points to a packet. +/// +/// There is also the notion of a "chain" of packets. This is +/// represented by a list of `mblk_t` structure as well, but instead +/// of using `b_cont` the individual packets are linked via the +/// `b_next` field. In the illumos kernel code this this is often +/// referred to with the variable name `mp_chain`, but sometimes also +/// `mp_head` (or just `mp`). It's a bit ambiguous, and something you +/// kind of figure out as you work in the code more. Though part of me +/// would like to create some rust-like "new type pattern" in C to +/// disambiguate packets from packet chains across APIs so the +/// compiler can detect when your API is working against the wrong +/// contract (for example a function that expects a single packet but +/// is being fed a packet chain). +// +// TODO: In theory, this can be any `Read` type giving us `&mut [u8]`s, +// but in practice we are internally reliant on returning `MsgBlk`s in +// hairpin actions and the like. Fighting the battle of making this generic +// is a bridge too far for the `ingot` datapath rewrite. This might have +// value in future. +#[derive(Debug)] +pub struct Packet { + state: S, +} + +impl Packet> { + pub fn new(pkt: T) -> Self + where + Initialized: PacketState, + { + Self { state: Initialized { inner: pkt } } + } +} + +impl<'a, T: Read + BufferState + 'a> Packet> +where + T::Chunk: IntoBufPointer<'a> + ByteSliceMut, +{ + #[inline] + pub fn len(&self) -> usize { + self.state.inner.len() + } + + #[inline] + pub fn mblk_addr(&self) -> uintptr_t { + self.state.inner.base_ptr() + } + + #[inline] + pub fn parse_inbound( + self, + net: NP, + ) -> Result>>, ParseError> { + let len = self.len(); + let base_ptr = self.mblk_addr(); + let Packet { state: Initialized { inner } } = self; + + let meta = net.parse_inbound(inner)?; + meta.stack.validate(len)?; + + Ok(Packet { state: LiteParsed { meta, base_ptr, len } }) + } + + #[inline] + pub fn parse_outbound( + self, + net: NP, + ) -> Result>>, ParseError> { + let len = self.len(); + let base_ptr = self.mblk_addr(); + let Packet { state: Initialized { inner } } = self; + + let meta = net.parse_outbound(inner)?; + meta.stack.validate(len)?; + + Ok(Packet { state: LiteParsed { meta, base_ptr, len } }) + } +} + +impl<'a, T: Read + 'a, M: LightweightMeta> Packet> +where + T::Chunk: IntoBufPointer<'a>, +{ + #[inline] + pub fn to_full_meta(self) -> Packet> { + let Packet { state: LiteParsed { len, base_ptr, meta } } = self; + let IngotParsed { stack: headers, data, last_chunk } = meta; + + // TODO: we can probably not do this in some cases, but we + // don't have a way for headeractions to signal that they + // *may* change the fields we need in the slowpath. + let body_csum = headers.compute_body_csum(); + let flow = headers.flow(); + + let headers: OpteMeta<_> = headers.into(); + let initial_lens = Some( + InitialLayerLens { + outer_eth: headers.outer_eth.packet_length(), + outer_l3: headers.outer_l3.packet_length(), + outer_encap: headers.outer_encap.packet_length(), + inner_eth: headers.inner_eth.packet_length(), + inner_l3: headers.inner_l3.packet_length(), + inner_ulp: headers.inner_ulp.packet_length(), + } + .into(), + ); + let body = PktBodyWalker { + base: Some((last_chunk, data)).into(), + slice: Default::default(), + }; + let meta = Box::new(PacketData { headers, initial_lens, body }); + + Packet { + state: FullParsed { + meta, + flow, + body_csum, + base_ptr, + l4_hash: Memoised::Uninit, + body_modified: false, + len, + inner_csum_dirty: false, + }, + } + } + + #[inline] + pub fn meta(&self) -> &M { + &self.state.meta.stack + } + + #[inline] + pub fn meta_mut(&mut self) -> &mut M { + &mut self.state.meta.stack + } + + #[inline] + pub fn len(&self) -> usize { + self.state.len + } + + #[inline] + pub fn mblk_addr(&self) -> uintptr_t { + self.state.base_ptr + } + + #[inline] + pub fn flow(&self) -> InnerFlowId { + self.meta().flow() + } +} + +impl Packet> { + pub fn meta(&self) -> &PacketData { + &self.state.meta + } + + pub fn meta_mut(&mut self) -> &mut PacketData { + &mut self.state.meta + } + + pub fn checksums_dirty(&self) -> bool { + self.state.inner_csum_dirty + } + + #[inline] + /// Convert a packet's metadata into a set of instructions + /// needed to serialize all its changes to the wire. + pub fn emit_spec(mut self) -> Result + where + T::Chunk: ByteSliceMut, + { + // Roughly how this works: + // - Identify rightmost structural-changed field. + // - fill out owned versions into the push_spec of all + // extant fields we rewound past. + // - Rewind up to+including that point in original + // pkt space. + let l4_hash = self.l4_hash(); + let state = self.state; + let init_lens = state.meta.initial_lens.unwrap(); + let headers = state.meta.headers; + let payload_len = state.len - init_lens.hdr_len(); + let mut encapped_len = payload_len; + + let mut push_spec = OpteEmit::default(); + let mut rewind = 0; + + // structural change if: + // hdr_len is different. + // needs_emit is true (i.e., now on an owned repr). + + // Part of the initial design idea of ingot was the desire to automatically + // do this sort of thing. We are so, so far from that... + let mut force_serialize = false; + + match headers.inner_ulp { + Some(ulp) => { + let l = ulp.packet_length(); + encapped_len += l; + + if ulp.needs_emit() || l != init_lens.inner_ulp { + let inner = + push_spec.inner.get_or_insert_with(Default::default); + + inner.ulp = Some(match ulp { + Ulp::Tcp(Header::Repr(t)) => UlpRepr::Tcp(*t), + Ulp::Tcp(Header::Raw(t)) => UlpRepr::Tcp((&t).into()), + Ulp::Udp(Header::Repr(t)) => UlpRepr::Udp(*t), + Ulp::Udp(Header::Raw(t)) => UlpRepr::Udp((&t).into()), + Ulp::IcmpV4(Header::Repr(t)) => UlpRepr::IcmpV4(*t), + Ulp::IcmpV4(Header::Raw(t)) => { + UlpRepr::IcmpV4((&t).into()) + } + Ulp::IcmpV6(Header::Repr(t)) => UlpRepr::IcmpV6(*t), + Ulp::IcmpV6(Header::Raw(t)) => { + UlpRepr::IcmpV6((&t).into()) + } + }); + force_serialize = true; + rewind += init_lens.inner_ulp; + } + } + None if init_lens.inner_ulp != 0 => { + force_serialize = true; + rewind += init_lens.inner_ulp; + } + _ => {} + } + + match headers.inner_l3 { + Some(l3) => { + let l = l3.packet_length(); + encapped_len += l; + + if force_serialize || l3.needs_emit() || l != init_lens.inner_l3 + { + let inner = + push_spec.inner.get_or_insert_with(Default::default); + + inner.l3 = Some(match l3 { + L3::Ipv4(Header::Repr(v4)) => L3Repr::Ipv4(*v4), + L3::Ipv4(Header::Raw(v4)) => L3Repr::Ipv4((&v4).into()), + L3::Ipv6(Header::Repr(v6)) => L3Repr::Ipv6(*v6), + + // We can't actually do structural mods here today using OPTE, + // but account for the possibiliry at least. + L3::Ipv6(Header::Raw(v6)) => { + L3Repr::Ipv6(v6.to_owned(None)?) + } + }); + force_serialize = true; + rewind += init_lens.inner_l3; + } + } + None if init_lens.inner_l3 != 0 => { + force_serialize = true; + rewind += init_lens.inner_l3; + } + _ => {} + } + + // inner eth + encapped_len += headers.inner_eth.packet_length(); + if force_serialize { + let inner = push_spec.inner.get_or_insert_with(Default::default); + inner.eth = match headers.inner_eth { + Header::Repr(p) => *p, + Header::Raw(p) => (&p).into(), + }; + rewind += init_lens.inner_eth; + } + + match headers.outer_encap { + Some(encap) + if force_serialize + || encap.needs_emit() + || encap.packet_length() != init_lens.outer_encap => + { + push_spec.outer_encap = Some(match encap { + InlineHeader::Repr(o) => o, + InlineHeader::Raw(ValidEncapMeta::Geneve(u, g)) => { + EncapMeta::Geneve(GeneveMeta { + entropy: u.source(), + vni: g.vni(), + oxide_external_pkt: valid_geneve_has_oxide_external( + &g, + ), + }) + } + }); + + force_serialize = true; + rewind += init_lens.outer_encap; + } + None if init_lens.outer_encap != 0 => { + force_serialize = true; + rewind += init_lens.outer_encap; + } + _ => {} + } + + match headers.outer_l3 { + Some(l3) + if force_serialize + || l3.needs_emit() + || l3.packet_length() != init_lens.outer_l3 => + { + let encap_len = push_spec.outer_encap.packet_length(); + + push_spec.outer_ip = Some(match l3 { + L3::Ipv6(BoxedHeader::Repr(o)) => L3Repr::Ipv6(*o), + L3::Ipv4(BoxedHeader::Repr(o)) => L3Repr::Ipv4(*o), + L3::Ipv6(BoxedHeader::Raw(o)) => { + L3Repr::Ipv6((&o).to_owned(None)?) + } + L3::Ipv4(BoxedHeader::Raw(o)) => L3Repr::Ipv4((&o).into()), + }); + + let inner_sz = (encapped_len + encap_len) as u16; + + match &mut push_spec.outer_ip { + Some(L3Repr::Ipv4(v4)) => { + v4.total_len = (v4.ihl as u16) * 4 + inner_sz; + } + Some(L3Repr::Ipv6(v6)) => { + v6.payload_len = inner_sz; + } + _ => {} + } + + force_serialize = true; + rewind += init_lens.outer_l3; + } + None if init_lens.outer_l3 != 0 => { + force_serialize = true; + rewind += init_lens.outer_l3; + } + _ => {} + } + + match headers.outer_eth { + Some(eth) + if force_serialize + || eth.needs_emit() + || eth.packet_length() != init_lens.outer_eth => + { + push_spec.outer_eth = Some(match eth { + InlineHeader::Repr(o) => o, + InlineHeader::Raw(r) => (&r).into(), + }); + + rewind += init_lens.outer_eth; + } + None if init_lens.outer_eth != 0 => { + rewind += init_lens.outer_eth; + } + _ => {} + } + + Ok(EmitSpec { + rewind: rewind as u16, + ulp_len: encapped_len as u32, + prepend: PushSpec::Slowpath(push_spec.into()), + l4_hash, + }) + } + + pub fn len(&self) -> usize { + self.state.len + } + + #[inline] + pub fn flow(&self) -> &InnerFlowId { + &self.state.flow + } + + /// Run the [`HdrTransform`] against this packet. + #[inline] + pub fn hdr_transform( + &mut self, + xform: &HdrTransform, + ) -> Result<(), HdrTransformError> + where + T::Chunk: ByteSliceMut, + { + self.state.inner_csum_dirty |= xform.run(&mut self.state.meta)?; + + // Recomputing this is a little bit wasteful, since we're moving + // rebuilding a static repr from packet fields. This is a necessary + // part of slowpath use because layers are designed around intermediate + // flowkeys. + // + // We *could* elide this on non-compiled UFT transforms, but we do not + // need those today. + self.state.flow = InnerFlowId::from(self.meta()); + Ok(()) + } + + /// Run the [`BodyTransform`] against this packet. + pub fn body_transform( + &mut self, + dir: Direction, + xform: &dyn BodyTransform, + ) -> Result<(), BodyTransformError> + where + T::Chunk: ByteSliceMut, + { + // We set the flag now with the assumption that the transform + // could fail after modifying part of the body. In the future + // we could have something more sophisticated that only sets + // the flag if at least one byte was modified, but for now + // this does the job as nothing that needs top performance + // should make use of body transformations. + self.state.body_modified = true; + + match self.body_segs_mut() { + Some(mut body_segs) => xform.run(dir, &mut body_segs), + None => { + self.state.body_modified = false; + Err(BodyTransformError::NoPayload) + } + } + } + + #[inline] + pub fn body_segs(&self) -> Option<&[&[u8]]> + where + T::Chunk: ByteSliceMut, + { + let out = self.state.meta.body_segs(); + if out.is_empty() { + None + } else { + Some(out) + } + } + + #[inline] + pub fn body_segs_mut(&mut self) -> Option<&mut [&mut [u8]]> + where + T::Chunk: ByteSliceMut, + { + let out = self.state.meta.body_segs_mut(); + if out.is_empty() { + None + } else { + Some(out) + } + } + + #[inline] + pub fn mblk_addr(&self) -> uintptr_t { + self.state.base_ptr + } + + /// Compute ULP and IP header checksum from scratch. + /// + /// This should really only be used for testing, or in the case + /// where we have applied body transforms and know that any initial + /// body_csum cannot be valid. + pub fn compute_checksums(&mut self) + where + T::Chunk: ByteSliceMut, + { + let mut body_csum = Checksum::new(); + for seg in self.body_segs_mut().unwrap_or_default() { + body_csum.add_bytes(seg); + } + self.state.body_csum = Some(body_csum); + + if let Some(ulp) = &mut self.state.meta.headers.inner_ulp { + let mut csum = body_csum; + + // Unwrap: Can't have a ULP without an IP. + let ip = self.state.meta.headers.inner_l3.as_ref().unwrap(); + // Add pseudo header checksum. + let pseudo_csum = ip.pseudo_header(); + csum += pseudo_csum; + // Determine ULP slice and add its bytes to the + // checksum. + match ulp { + // ICMP4 requires the body_csum *without* + // the pseudoheader added back in. + Ulp::IcmpV4(i4) => { + let mut bytes = [0u8; 8]; + i4.set_checksum(0); + i4.emit_raw(&mut bytes[..]); + body_csum.add_bytes(&bytes[..]); + i4.set_checksum(body_csum.finalize_for_ingot()); + } + Ulp::IcmpV6(i6) => { + let mut bytes = [0u8; 8]; + i6.set_checksum(0); + i6.emit_raw(&mut bytes[..]); + csum.add_bytes(&bytes[..]); + i6.set_checksum(csum.finalize_for_ingot()); + } + Ulp::Tcp(tcp) => { + tcp.set_checksum(0); + match tcp { + Header::Repr(tcp) => { + let mut bytes = [0u8; 56]; + tcp.emit_raw(&mut bytes[..]); + csum.add_bytes(&bytes[..]); + } + Header::Raw(tcp) => { + csum.add_bytes(tcp.0.as_bytes()); + match &tcp.1 { + Header::Repr(opts) => { + csum.add_bytes(&*opts); + } + Header::Raw(opts) => { + csum.add_bytes(&*opts); + } + } + } + } + tcp.set_checksum(csum.finalize_for_ingot()); + } + Ulp::Udp(udp) => { + udp.set_checksum(0); + match udp { + Header::Repr(udp) => { + let mut bytes = [0u8; 8]; + udp.emit_raw(&mut bytes[..]); + csum.add_bytes(&bytes[..]); + } + Header::Raw(udp) => { + csum.add_bytes(udp.0.as_bytes()); + } + } + udp.set_checksum(csum.finalize_for_ingot()); + } + } + } + + // Compute and fill in the IPv4 header checksum. + if let Some(l3) = self.state.meta.headers.inner_l3.as_mut() { + l3.compute_checksum(); + } + } + + pub fn body_csum(&mut self) -> Option { + self.state.body_csum + } + + pub fn l4_hash(&mut self) -> u32 { + *self.state.l4_hash.get(|| { + let mut hasher = crc32fast::Hasher::new(); + self.state.flow.hash(&mut hasher); + hasher.finalize() + }) + } + + pub fn set_l4_hash(&mut self, hash: u32) { + self.state.l4_hash.set(hash); + } + + /// Perform an incremental checksum update for the ULP checksums + /// based on the stored body checksum. + /// + /// This avoids duplicating work already done by the client in the + /// case where checksums are **not** being offloaded to the hardware. + pub fn update_checksums(&mut self) + where + T::Chunk: ByteSliceMut, + { + // If we know that no transform touched a field which features in + // an inner transport cksum (L4/L3 src/dst, most realistically), + // and no body transform occurred then we can exit early. + if !self.checksums_dirty() && !self.state.body_modified { + return; + } + + // Flag to indicate if an IP header/ULP checksums were + // provided. If the checksum is zero, it's assumed heardware + // checksum offload is being used, and OPTE should not update + // the checksum. + let update_ip = self.state.meta.has_ip_csum(); + let update_ulp = self.state.meta.has_ulp_csum(); + + // We expect that any body transform will necessarily invalidate + // the body_csum. Recompute from scratch. + if self.state.body_modified && (update_ip || update_ulp) { + return self.compute_checksums(); + } + + // Start by reusing the known checksum of the body. + let mut body_csum = self.body_csum().unwrap_or_default(); + + // If a ULP exists, then compute and set its checksum. + if let (true, Some(ulp)) = + (update_ulp, &mut self.state.meta.headers.inner_ulp) + { + let mut csum = body_csum; + // Unwrap: Can't have a ULP without an IP. + let ip = self.state.meta.headers.inner_l3.as_ref().unwrap(); + // Add pseudo header checksum. + let pseudo_csum = ip.pseudo_header(); + csum += pseudo_csum; + // Determine ULP slice and add its bytes to the + // checksum. + match ulp { + // ICMP4 requires the body_csum *without* + // the pseudoheader added back in. + Ulp::IcmpV4(i4) => { + let mut bytes = [0u8; 8]; + i4.set_checksum(0); + i4.emit_raw(&mut bytes[..]); + body_csum.add_bytes(&bytes[..]); + i4.set_checksum(body_csum.finalize_for_ingot()); + } + Ulp::IcmpV6(i6) => { + let mut bytes = [0u8; 8]; + i6.set_checksum(0); + i6.emit_raw(&mut bytes[..]); + csum.add_bytes(&bytes[..]); + i6.set_checksum(csum.finalize_for_ingot()); + } + Ulp::Tcp(tcp) => { + tcp.set_checksum(0); + match tcp { + Header::Repr(tcp) => { + let mut bytes = [0u8; 56]; + tcp.emit_raw(&mut bytes[..]); + csum.add_bytes(&bytes[..]); + } + Header::Raw(tcp) => { + csum.add_bytes(tcp.0.as_bytes()); + match &tcp.1 { + Header::Repr(opts) => { + csum.add_bytes(&*opts); + } + Header::Raw(opts) => { + csum.add_bytes(&*opts); + } + } + } + } + tcp.set_checksum(csum.finalize_for_ingot()); + } + Ulp::Udp(udp) => { + udp.set_checksum(0); + match udp { + Header::Repr(udp) => { + let mut bytes = [0u8; 8]; + udp.emit_raw(&mut bytes[..]); + csum.add_bytes(&bytes[..]); + } + Header::Raw(udp) => { + csum.add_bytes(udp.0.as_bytes()); + } + } + udp.set_checksum(csum.finalize_for_ingot()); + } + } + } + + // Compute and fill in the IPv4 header checksum. + if let (true, Some(l3)) = + (update_ip, &mut self.state.meta.headers.inner_l3) + { + l3.compute_checksum(); + } + } +} + +/// The type state of a packet that has been initialized and allocated, but +/// about which nothing else is known besides the length. +#[derive(Debug)] +pub struct Initialized { + inner: T, +} + +impl PacketState for Initialized {} +impl PacketState for FullParsed {} + +/// Zerocopy view onto a parsed packet, accompanied by locally +/// computed state. +pub struct FullParsed { + /// Total length of packet, in bytes. This is equal to the sum of + /// the length of the _initialized_ window in all the segments + /// (`b_wptr - b_rptr`). + len: usize, + /// Base pointer of the contained T, used in dtrace SDTs and the like + /// for correlation and inspection of packet events. + base_ptr: uintptr_t, + /// Access to parsed packet headers and the packet body. + meta: Box>, + /// Current Flow ID of this packet, accountgin for any applied + /// transforms. + flow: InnerFlowId, + + /// The body's checksum. It is up to the `NetworkImpl::Parser` on + /// whether to populate this field or not. The reason for + /// populating this field is to avoid duplicate work if the client + /// has provided a ULP checksum. Rather than redoing the body + /// checksum calculation, we can use incremental checksum + /// techniques to stash the body's checksum for reuse when emitting + /// the new headers. + /// + /// However, if the client does not provide a checksum, presumably + /// because they are relying on checksum offload, this value should + /// be `None`. In such case, `emit_headers()` will perform no ULP + /// checksum update. + /// + /// This value may also be none if the packet has no notion of a + /// ULP checksum; e.g., ARP. + body_csum: Option, + /// L4 hash for this packet, computed from the flow ID. + l4_hash: Memoised, + /// Tracks whether any body transforms have been executed on this + /// packet. + body_modified: bool, + /// Tracks whether any transform has been applied to this packet + /// which would dirty the inner L3 and/or ULP header checksums. + inner_csum_dirty: bool, +} + +/// Minimum-size zerocopy view onto a parsed packet, sufficient for fast +/// packet transformation. +pub struct LiteParsed> { + /// Total length of packet, in bytes. This is equal to the sum of + /// the length of the _initialized_ window in all the segments + /// (`b_wptr - b_rptr`). + len: usize, + /// Base pointer of the contained T, used in dtrace SDTs and the like + /// for correlation and inspection of packet events. + base_ptr: uintptr_t, + meta: IngotParsed, +} + +impl> PacketState for LiteParsed {} + +impl> LiteParsed {} + +// These are needed for now to account for not wanting to redesign +// ActionDescs to be generic over T (trait object safety rules, etc.), +// in addition to needing to rework Hairpin actions. +pub type MblkPacketData<'a> = PacketData>; +pub type MblkFullParsed<'a> = FullParsed>; +pub type MblkLiteParsed<'a, M> = LiteParsed, M>; + +pub trait BufferState { + fn len(&self) -> usize; + fn base_ptr(&self) -> uintptr_t; +} + +/// A set of headers to be emitted at the head of a packet. +#[derive(Clone, Debug, Default)] +pub struct OpteEmit { + outer_eth: Option, + outer_ip: Option, + outer_encap: Option, + + // We can (but do not often) push/pop inner meta. + // Splitting via Box minimises struct size in the general case. + inner: Option>, +} + +/// Inner headers needing completely rewritten/emitted in a packet. +#[derive(Clone, Debug, Default)] +pub struct OpteInnerEmit { + eth: Ethernet, + l3: Option, + ulp: Option, +} + +/// A specification of how a packet should be modified to finish processing, +/// after existing fields have been updated. +/// +/// This will add and/or remove several layers from the underlying `MsgBlk`, +/// and can be queried for routing specific info (access to new encap, l4 hash). +#[derive(Clone, Debug)] +pub struct EmitSpec { + pub(crate) prepend: PushSpec, + pub(crate) l4_hash: u32, + pub(crate) rewind: u16, + pub(crate) ulp_len: u32, +} + +impl Default for EmitSpec { + fn default() -> Self { + Self { prepend: PushSpec::NoOp, l4_hash: 0, rewind: 0, ulp_len: 0 } + } +} + +impl EmitSpec { + /// Return the L4 hash of the inner flow, used for multipath selection. + #[inline] + #[must_use] + pub fn l4_hash(&self) -> u32 { + self.l4_hash + } + + /// Perform final structural transformations to a packet (removal of + /// existing headers, and copying in new/replacement headers). + #[inline] + #[must_use] + pub fn apply(&self, mut pkt: MsgBlk) -> MsgBlk { + // Rewind + { + let mut slots = heapless::Vec::<&mut MsgBlkNode, 6>::new(); + let mut to_rewind = self.rewind as usize; + + if to_rewind > 0 { + let mut reader = pkt.iter_mut(); + while to_rewind != 0 { + let this = reader.next(); + let Some(node) = this else { + break; + }; + + let has = node.len(); + let droppable = to_rewind.min(has); + node.drop_front_bytes(droppable) + .expect("droppable should be bounded above by len"); + to_rewind -= droppable; + + slots.push(node).unwrap(); + } + } + } + + // TODO: actually push in to existing slots we rewound past if needed, + // then run this step at the end. + // This is not really an issue in practice -- no packets should need + // to rewind *and* prepend new segments with how we're using OPTE today, + // much less so in the fastpath. + pkt.drop_empty_segments(); + + let out = match &self.prepend { + PushSpec::Fastpath(push_spec) => { + push_spec.encap.prepend(pkt, self.ulp_len as usize) + } + PushSpec::Slowpath(push_spec) => { + let mut needed_push = push_spec.outer_eth.packet_length() + + push_spec.outer_ip.packet_length() + + push_spec.outer_encap.packet_length(); + + if let Some(inner_new) = &push_spec.inner { + needed_push += inner_new.eth.packet_length() + + inner_new.l3.packet_length() + + inner_new.ulp.packet_length(); + } + + let needed_alloc = needed_push; + + let mut prepend = if needed_alloc > 0 { + let mut new_mblk = MsgBlk::new_ethernet(needed_alloc); + new_mblk.pop_all(); + Some(new_mblk) + } else { + None + }; + + if let Some(inner_new) = &push_spec.inner { + if let Some(inner_ulp) = &inner_new.ulp { + let target = if prepend.is_none() { + &mut pkt + } else { + prepend.as_mut().unwrap() + }; + + target.emit_front(inner_ulp).unwrap(); + } + + if let Some(inner_l3) = &inner_new.l3 { + let target = if prepend.is_none() { + &mut pkt + } else { + prepend.as_mut().unwrap() + }; + + target.emit_front(inner_l3).unwrap(); + } + + let target = if prepend.is_none() { + &mut pkt + } else { + prepend.as_mut().unwrap() + }; + + target.emit_front(&inner_new.eth).unwrap(); + } + + if let Some(outer_encap) = &push_spec.outer_encap { + let encap = SizeHoldingEncap { + encapped_len: self.ulp_len as u16, + meta: &outer_encap, + }; + + let target = if prepend.is_none() { + &mut pkt + } else { + prepend.as_mut().unwrap() + }; + + target.emit_front(&encap).unwrap(); + } + + if let Some(outer_ip) = &push_spec.outer_ip { + let target = if prepend.is_none() { + &mut pkt + } else { + prepend.as_mut().unwrap() + }; + + target.emit_front(outer_ip).unwrap(); + } + + if let Some(outer_eth) = &push_spec.outer_eth { + let target = if prepend.is_none() { + &mut pkt + } else { + prepend.as_mut().unwrap() + }; + + target.emit_front(outer_eth).unwrap(); + } + + if let Some(mut prepend) = prepend { + prepend.append(pkt); + prepend + } else { + pkt + } + } + PushSpec::NoOp => pkt, + }; + + out + } + + /// Returns the Geneve VNI when this spec pushes Geneve encapsulation. + #[inline] + pub fn outer_encap_vni(&self) -> Option { + match &self.prepend { + PushSpec::Fastpath(c) => match &c.encap { + CompiledEncap::Push { encap: EncapPush::Geneve(g), .. } => { + Some(g.vni) + } + _ => None, + }, + PushSpec::Slowpath(s) => match &s.outer_encap { + Some(EncapMeta::Geneve(g)) => Some(g.vni), + _ => None, + }, + PushSpec::NoOp => None, + } + } + + /// Returns the outer IPv6 src/dst when this spec pushes Geneve encapsulation. + #[inline] + pub fn outer_ip6_addrs(&self) -> Option<(Ipv6Addr, Ipv6Addr)> { + match &self.prepend { + PushSpec::Fastpath(c) => match &c.encap { + CompiledEncap::Push { ip: IpPush::Ip6(v6), .. } => { + Some((v6.src, v6.dst)) + } + _ => None, + }, + PushSpec::Slowpath(s) => match &s.outer_ip { + Some(L3Repr::Ipv6(v6)) => Some((v6.source, v6.destination)), + _ => None, + }, + PushSpec::NoOp => None, + } + } +} + +/// Specification of additional header layers to push at the head of a packet. +#[derive(Clone, Debug)] +pub enum PushSpec { + /// Bytes to prepend to packet which have been serialised ahead of time + /// and can be copied in one shot. + Fastpath(Arc), + /// Full representations of each header to serialise and prepend ahead + /// of the current packet contents. + Slowpath(Box), + /// No prepend. + NoOp, +} + +#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq, Ord, PartialOrd, Default)] +pub enum Memoised { + #[default] + Uninit, + Known(T), +} + +impl Memoised { + #[inline] + pub fn get(&mut self, or: impl FnOnce() -> T) -> &T { + if self.try_get().is_none() { + self.set(or()); + } + + self.try_get().unwrap() + } + + #[inline] + pub fn try_get(&self) -> Option<&T> { + match self { + Memoised::Uninit => None, + Memoised::Known(v) => Some(v), + } + } + + #[inline] + pub fn set(&mut self, val: T) { + *self = Self::Known(val); + } +} + #[cfg(test)] mod test { use super::*; use crate::ddi::mblk::MsgBlk; use crate::engine::ether::Ethernet; use crate::engine::ether::EthernetRef; - use crate::engine::ingot_packet::Packet; use crate::engine::ip::v4::Ipv4; use crate::engine::ip::v4::Ipv4Ref; use crate::engine::ip::v6::Ipv6; use crate::engine::ip::v6::Ipv6Ref; + use crate::engine::packet::Packet; use crate::engine::GenericUlp; use ingot::ethernet::Ethertype; use ingot::ip::IpProtocol; diff --git a/lib/opte/src/engine/parse.rs b/lib/opte/src/engine/parse.rs index 032a3563..5a7683a4 100644 --- a/lib/opte/src/engine/parse.rs +++ b/lib/opte/src/engine/parse.rs @@ -24,7 +24,6 @@ use super::headers::ValidEncapMeta; use super::icmp::IcmpEchoMut; use super::icmp::QueryEcho; use super::icmp::ValidIcmpEcho; -use super::ingot_packet::OpteMeta; use super::ip::v4::Ipv4Mut; use super::ip::v4::Ipv4Ref; use super::ip::v6::Ipv6Mut; @@ -35,6 +34,7 @@ use super::ip::L3; use super::packet::AddrPair; use super::packet::InnerFlowId; use super::packet::MismatchError; +use super::packet::OpteMeta; use super::packet::ParseError; use super::packet::FLOW_ID_DEFAULT; use super::rule::CompiledTransform; diff --git a/lib/opte/src/engine/port.rs b/lib/opte/src/engine/port.rs index dc1b49e3..db82f444 100644 --- a/lib/opte/src/engine/port.rs +++ b/lib/opte/src/engine/port.rs @@ -18,11 +18,6 @@ use super::headers::EncapPush; use super::headers::HeaderAction; use super::headers::IpPush; use super::headers::UlpHeaderAction; -use super::ingot_packet::FullParsed; -use super::ingot_packet::LiteParsed; -use super::ingot_packet::MblkFullParsed; -use super::ingot_packet::MblkPacketData; -use super::ingot_packet::Packet; use super::ioctl; use super::ioctl::TcpFlowEntryDump; use super::ioctl::TcpFlowStateDump; @@ -38,7 +33,12 @@ use super::layer::LayerStatsSnap; use super::layer::RuleId; use super::packet::BodyTransform; use super::packet::BodyTransformError; +use super::packet::FullParsed; use super::packet::InnerFlowId; +use super::packet::LiteParsed; +use super::packet::MblkFullParsed; +use super::packet::MblkPacketData; +use super::packet::Packet; use super::packet::FLOW_ID_DEFAULT; use super::rule::Action; use super::rule::CompiledTransform; @@ -67,8 +67,8 @@ use crate::ddi::sync::KMutex; use crate::ddi::sync::KMutexType; use crate::ddi::time::Moment; use crate::engine::flow_table::ExpiryPolicy; -use crate::engine::ingot_packet::EmitSpec; -use crate::engine::ingot_packet::PushSpec; +use crate::engine::packet::EmitSpec; +use crate::engine::packet::PushSpec; use crate::engine::rule::CompiledEncap; use crate::ExecCtx; use alloc::boxed::Box; diff --git a/lib/opte/src/engine/predicate.rs b/lib/opte/src/engine/predicate.rs index a9ea55f7..f709e27c 100644 --- a/lib/opte/src/engine/predicate.rs +++ b/lib/opte/src/engine/predicate.rs @@ -12,7 +12,6 @@ use super::ether::EtherType; use super::ether::EthernetRef; use super::icmp::v4::MessageType as IcmpMessageType; use super::icmp::v6::MessageType as Icmpv6MessageType; -use super::ingot_packet::MblkPacketData; use super::ip::v4::Ipv4Addr; use super::ip::v4::Ipv4Cidr; use super::ip::v4::Ipv4Ref; @@ -21,6 +20,7 @@ use super::ip::v6::Ipv6Addr; use super::ip::v6::Ipv6Cidr; use super::ip::v6::Ipv6Ref; use super::ip::L3; +use super::packet::MblkPacketData; use super::port::meta::ActionMeta; use alloc::boxed::Box; use alloc::string::String; diff --git a/lib/opte/src/engine/rule.rs b/lib/opte/src/engine/rule.rs index 7ff7ab71..40fd1bc2 100644 --- a/lib/opte/src/engine/rule.rs +++ b/lib/opte/src/engine/rule.rs @@ -21,13 +21,13 @@ use super::headers::IpPush; use super::headers::Transform; use super::headers::UlpHeaderAction; use super::headers::UlpMetaModify; -use super::ingot_packet::MblkFullParsed; -use super::ingot_packet::MblkPacketData; -use super::ingot_packet::Packet; -use super::ingot_packet::PacketData; use super::ip::L3; use super::packet::BodyTransform; use super::packet::InnerFlowId; +use super::packet::MblkFullParsed; +use super::packet::MblkPacketData; +use super::packet::Packet; +use super::packet::PacketData; use super::port::meta::ActionMeta; use super::predicate::DataPredicate; use super::predicate::Predicate; diff --git a/lib/opte/src/engine/snat.rs b/lib/opte/src/engine/snat.rs index 9795d043..90f0ab77 100644 --- a/lib/opte/src/engine/snat.rs +++ b/lib/opte/src/engine/snat.rs @@ -11,9 +11,9 @@ use super::headers::IpMod; use super::headers::UlpGenericModify; use super::headers::UlpHeaderAction; use super::headers::UlpMetaModify; -use super::ingot_packet::MblkFullParsed; -use super::ingot_packet::Packet; use super::packet::InnerFlowId; +use super::packet::MblkFullParsed; +use super::packet::Packet; use super::port::meta::ActionMeta; use super::predicate::DataPredicate; use super::predicate::Predicate; diff --git a/lib/oxide-vpc/src/engine/gateway/mod.rs b/lib/oxide-vpc/src/engine/gateway/mod.rs index 4c508e78..0067deb3 100644 --- a/lib/oxide-vpc/src/engine/gateway/mod.rs +++ b/lib/oxide-vpc/src/engine/gateway/mod.rs @@ -56,11 +56,11 @@ use opte::api::Direction; use opte::api::OpteError; use opte::engine::ether::EtherMod; use opte::engine::headers::HeaderAction; -use opte::engine::ingot_packet::MblkPacketData; use opte::engine::layer::DefaultAction; use opte::engine::layer::Layer; use opte::engine::layer::LayerActions; use opte::engine::packet::InnerFlowId; +use opte::engine::packet::MblkPacketData; use opte::engine::port::meta::ActionMeta; use opte::engine::port::PortBuilder; use opte::engine::port::Pos; diff --git a/lib/oxide-vpc/src/engine/mod.rs b/lib/oxide-vpc/src/engine/mod.rs index 40dc3b98..8b270197 100644 --- a/lib/oxide-vpc/src/engine/mod.rs +++ b/lib/oxide-vpc/src/engine/mod.rs @@ -20,10 +20,10 @@ use opte::engine::arp::ValidArpEthIpv4; use opte::engine::arp::ARP_HTYPE_ETHERNET; use opte::engine::ether::EthernetRef; use opte::engine::flow_table::FlowTable; -use opte::engine::ingot_packet::FullParsed; -use opte::engine::ingot_packet::Packet; use opte::engine::ip::v4::Ipv4Addr; +use opte::engine::packet::FullParsed; use opte::engine::packet::InnerFlowId; +use opte::engine::packet::Packet; use opte::engine::packet::ParseError; use opte::engine::parse::ValidGeneveOverV6; use opte::engine::parse::ValidNoEncap; diff --git a/lib/oxide-vpc/src/engine/overlay.rs b/lib/oxide-vpc/src/engine/overlay.rs index bea86a66..7999111e 100644 --- a/lib/oxide-vpc/src/engine/overlay.rs +++ b/lib/oxide-vpc/src/engine/overlay.rs @@ -42,7 +42,6 @@ use opte::engine::headers::HeaderAction; use opte::engine::headers::IpAddr; use opte::engine::headers::IpCidr; use opte::engine::headers::IpPush; -use opte::engine::ingot_packet::MblkPacketData; use opte::engine::ip::v4::Protocol; use opte::engine::ip::v6::Ipv6Addr; use opte::engine::ip::v6::Ipv6Cidr; @@ -51,6 +50,7 @@ use opte::engine::layer::DefaultAction; use opte::engine::layer::Layer; use opte::engine::layer::LayerActions; use opte::engine::packet::InnerFlowId; +use opte::engine::packet::MblkPacketData; use opte::engine::port::meta::ActionMeta; use opte::engine::port::meta::ActionMetaValue; use opte::engine::port::PortBuilder; diff --git a/lib/oxide-vpc/tests/fuzz_regression.rs b/lib/oxide-vpc/tests/fuzz_regression.rs index ab52ee04..40cdd24d 100644 --- a/lib/oxide-vpc/tests/fuzz_regression.rs +++ b/lib/oxide-vpc/tests/fuzz_regression.rs @@ -10,7 +10,7 @@ //! of OPTE panic in the past, and ensure that it does not today. use opte::ddi::mblk::MsgBlk; -use opte::engine::ingot_packet::Packet; +use opte::engine::packet::Packet; use oxide_vpc::engine::VpcParser; use serde::Deserialize; use serde::Serialize; diff --git a/lib/oxide-vpc/tests/integration_tests.rs b/lib/oxide-vpc/tests/integration_tests.rs index 4e4636c4..0d52584d 100644 --- a/lib/oxide-vpc/tests/integration_tests.rs +++ b/lib/oxide-vpc/tests/integration_tests.rs @@ -28,8 +28,6 @@ use opte::engine::ether::Ethernet; use opte::engine::ether::EthernetRef; use opte::engine::flow_table::FLOW_DEF_EXPIRE_SECS; use opte::engine::geneve::Vni; -use opte::engine::ingot_packet::MblkFullParsed; -use opte::engine::ingot_packet::Packet; use opte::engine::ip::v4::Ipv4Addr; use opte::engine::ip::v4::Ipv4Ref; use opte::engine::ip::v6::Ipv6; @@ -37,7 +35,9 @@ use opte::engine::ip::v6::Ipv6Ref; use opte::engine::ip::ValidL3; use opte::engine::ip::L3; use opte::engine::packet::InnerFlowId; +use opte::engine::packet::MblkFullParsed; use opte::engine::packet::MismatchError; +use opte::engine::packet::Packet; use opte::engine::parse::ValidUlp; use opte::engine::port::ProcessError; use opte::engine::tcp::TcpState; diff --git a/xde/src/xde.rs b/xde/src/xde.rs index cd0e1dc1..8bd9d52c 100644 --- a/xde/src/xde.rs +++ b/xde/src/xde.rs @@ -64,10 +64,10 @@ use opte::ddi::time::Periodic; use opte::engine::ether::EthernetRef; use opte::engine::geneve::Vni; use opte::engine::headers::IpAddr; -use opte::engine::ingot_packet::Packet; use opte::engine::ioctl::{self as api}; use opte::engine::ip::v6::Ipv6Addr; use opte::engine::packet::InnerFlowId; +use opte::engine::packet::Packet; use opte::engine::packet::ParseError; use opte::engine::port::Port; use opte::engine::port::PortBuilder; From 852cfd46a8b49e6d9dd421704fb2e2a5724f9a91 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Tue, 29 Oct 2024 15:34:17 +0000 Subject: [PATCH 075/115] Licensing, docs fixes. --- crates/opte-api/src/ip.rs | 2 +- crates/opte-api/src/mac.rs | 2 +- lib/opte/src/ddi/mblk.rs | 7 +++++-- lib/opte/src/ddi/time.rs | 2 +- lib/opte/src/engine/checksum.rs | 2 +- lib/opte/src/engine/flow_table.rs | 2 +- lib/opte/src/engine/ip/v6.rs | 2 ++ lib/opte/src/engine/nat.rs | 2 +- lib/opte/src/engine/predicate.rs | 2 +- lib/opte/src/engine/rule.rs | 9 ++++----- lib/opte/src/engine/snat.rs | 2 +- lib/opte/src/engine/tcp_state.rs | 2 +- lib/opte/src/lib.rs | 2 +- lib/oxide-vpc/src/engine/gateway/dhcp.rs | 2 +- lib/oxide-vpc/src/engine/mod.rs | 2 +- lib/oxide-vpc/src/engine/overlay.rs | 2 +- lib/oxide-vpc/tests/firewall_tests.rs | 6 ++++++ 17 files changed, 30 insertions(+), 20 deletions(-) diff --git a/crates/opte-api/src/ip.rs b/crates/opte-api/src/ip.rs index b23b05f9..c41b1017 100644 --- a/crates/opte-api/src/ip.rs +++ b/crates/opte-api/src/ip.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2024 Oxide Computer Company use super::mac::MacAddr; use crate::DomainName; diff --git a/crates/opte-api/src/mac.rs b/crates/opte-api/src/mac.rs index 36ec8c5e..48d70c93 100644 --- a/crates/opte-api/src/mac.rs +++ b/crates/opte-api/src/mac.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2022 Oxide Computer Company +// Copyright 2024 Oxide Computer Company use alloc::str::FromStr; use alloc::string::String; diff --git a/lib/opte/src/ddi/mblk.rs b/lib/opte/src/ddi/mblk.rs index 6f1abbed..34ed278b 100644 --- a/lib/opte/src/ddi/mblk.rs +++ b/lib/opte/src/ddi/mblk.rs @@ -45,7 +45,8 @@ struct MsgBlkChainInner { /// Network packets are provided by illumos as a linked list of linked lists, /// using the `b_next` and `b_prev` fields. /// -/// See the documentation for [`super::packet::Packet`] and/or [`MsgBlk`] for full context. +/// See the documentation for [`crate::engine::packet::Packet`] and/or [`MsgBlk`] +/// for full context. // TODO: We might retool this type now that MsgBlk does not decompose // each mblk_t into individual segments (i.e., packets could be allocated // a lifetime via PhantomData based on whether we want to remove them from the chain or modify in place). @@ -191,6 +192,8 @@ impl Drop for MsgBlkChain { /// via [`Packet::wrap_mblk()`]. In reality this is typically holding /// an Ethernet _frame_, but we prefer to use the colloquial /// nomenclature of "packet". +/// +/// [`Packet::wrap_mblk()`]: crate::engine::packet::Packet::wrap_mblk #[derive(Debug)] pub struct MsgBlk { pub inner: NonNull, @@ -783,7 +786,7 @@ impl<'a> BufferState for MsgBlkIterMut<'a> { } /// For the `no_std`/illumos kernel environment, we want the `mblk_t` -/// drop to occur at the [`Packet`] level, where we can make use of +/// drop to occur at the packet level, where we can make use of /// `freemsg(9F)`. impl Drop for MsgBlk { fn drop(&mut self) { diff --git a/lib/opte/src/ddi/time.rs b/lib/opte/src/ddi/time.rs index 09734850..f470033c 100644 --- a/lib/opte/src/ddi/time.rs +++ b/lib/opte/src/ddi/time.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2022 Oxide Computer Company +// Copyright 2024 Oxide Computer Company //! Moments, periodics, etc. use core::ops::Add; diff --git a/lib/opte/src/engine/checksum.rs b/lib/opte/src/engine/checksum.rs index 45a1ff8f..fc8ce80b 100644 --- a/lib/opte/src/engine/checksum.rs +++ b/lib/opte/src/engine/checksum.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2022 Oxide Computer Company +// Copyright 2024 Oxide Computer Company //! Types for calculating the internet checksum. //! diff --git a/lib/opte/src/engine/flow_table.rs b/lib/opte/src/engine/flow_table.rs index a3809ed9..4e165894 100644 --- a/lib/opte/src/engine/flow_table.rs +++ b/lib/opte/src/engine/flow_table.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2024 Oxide Computer Company //! The flow table implementation. //! diff --git a/lib/opte/src/engine/ip/v6.rs b/lib/opte/src/engine/ip/v6.rs index 45ba42f1..daafd6f2 100644 --- a/lib/opte/src/engine/ip/v6.rs +++ b/lib/opte/src/engine/ip/v6.rs @@ -4,6 +4,8 @@ // Copyright 2024 Oxide Computer Company +//! IPv6 Headers. + use crate::engine::headers::HeaderActionError; use crate::engine::packet::MismatchError; use crate::engine::packet::ParseError; diff --git a/lib/opte/src/engine/nat.rs b/lib/opte/src/engine/nat.rs index 1307f0a3..8f9c54c4 100644 --- a/lib/opte/src/engine/nat.rs +++ b/lib/opte/src/engine/nat.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2024 Oxide Computer Company //! 1:1 NAT. diff --git a/lib/opte/src/engine/predicate.rs b/lib/opte/src/engine/predicate.rs index f709e27c..4241ec6f 100644 --- a/lib/opte/src/engine/predicate.rs +++ b/lib/opte/src/engine/predicate.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2024 Oxide Computer Company //! Predicates used for `Rule` matching. diff --git a/lib/opte/src/engine/rule.rs b/lib/opte/src/engine/rule.rs index 40fd1bc2..68553b4c 100644 --- a/lib/opte/src/engine/rule.rs +++ b/lib/opte/src/engine/rule.rs @@ -460,7 +460,7 @@ impl HdrTransform { } /// Run this header transformation against the passed in - /// [`PacketMeta`], mutating it in place. + /// [`PacketData`], mutating it in place. /// /// Returns whether the inner checksum needs recomputed. /// @@ -652,10 +652,9 @@ impl From for GenBtError { pub trait HairpinAction: Display { /// Generate a [`Packet`] to hairpin back to the source. The /// `meta` argument holds the packet metadata, including any - /// modifications made by previous layers up to this point. The - /// `rdr` argument provides a [`PacketReader`] against - /// [`Packet`], with its starting position set to the - /// beginning of the packet's payload. + /// modifications made by previous layers up to this point. + /// This also provides access to a reader over the packet body, + /// positioned after the parsed metadata. fn gen_packet(&self, meta: &MblkPacketData) -> GenPacketResult; /// Return the predicates implicit to this action. diff --git a/lib/opte/src/engine/snat.rs b/lib/opte/src/engine/snat.rs index 90f0ab77..1d9675dc 100644 --- a/lib/opte/src/engine/snat.rs +++ b/lib/opte/src/engine/snat.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2024 Oxide Computer Company //! Types for working with IP Source NAT, both IPv4 and IPv6. diff --git a/lib/opte/src/engine/tcp_state.rs b/lib/opte/src/engine/tcp_state.rs index 97446e56..207839cf 100644 --- a/lib/opte/src/engine/tcp_state.rs +++ b/lib/opte/src/engine/tcp_state.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2022 Oxide Computer Company +// Copyright 2024 Oxide Computer Company //! Basic TCP state machine. diff --git a/lib/opte/src/lib.rs b/lib/opte/src/lib.rs index e2f1a31c..a8e12e44 100644 --- a/lib/opte/src/lib.rs +++ b/lib/opte/src/lib.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2024 Oxide Computer Company #![cfg_attr(not(feature = "std"), no_std)] #![allow(clippy::len_without_is_empty)] diff --git a/lib/oxide-vpc/src/engine/gateway/dhcp.rs b/lib/oxide-vpc/src/engine/gateway/dhcp.rs index 21c78373..d10698e6 100644 --- a/lib/oxide-vpc/src/engine/gateway/dhcp.rs +++ b/lib/oxide-vpc/src/engine/gateway/dhcp.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2024 Oxide Computer Company //! The DHCP implementation of the Virtual Gateway. diff --git a/lib/oxide-vpc/src/engine/mod.rs b/lib/oxide-vpc/src/engine/mod.rs index 8b270197..696e24dc 100644 --- a/lib/oxide-vpc/src/engine/mod.rs +++ b/lib/oxide-vpc/src/engine/mod.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2024 Oxide Computer Company pub mod firewall; pub mod gateway; diff --git a/lib/oxide-vpc/src/engine/overlay.rs b/lib/oxide-vpc/src/engine/overlay.rs index 7999111e..1255d52f 100644 --- a/lib/oxide-vpc/src/engine/overlay.rs +++ b/lib/oxide-vpc/src/engine/overlay.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2024 Oxide Computer Company //! The Oxide Network VPC Overlay. //! diff --git a/lib/oxide-vpc/tests/firewall_tests.rs b/lib/oxide-vpc/tests/firewall_tests.rs index c7db7882..22a4fa98 100644 --- a/lib/oxide-vpc/tests/firewall_tests.rs +++ b/lib/oxide-vpc/tests/firewall_tests.rs @@ -1,3 +1,9 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2024 Oxide Computer Company + use opte::ddi::mblk::MsgBlk; use opte_test_utils as common; From a60f5981a4f1fcf3730d87eb1d6dd7c08a8d0140 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Tue, 29 Oct 2024 15:37:40 +0000 Subject: [PATCH 076/115] An import. --- Cargo.lock | 18 +++++++++--------- bench/src/kbench/remote.rs | 2 ++ 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d82593a6..06ea71e8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -902,7 +902,7 @@ dependencies = [ "ingot-types", "macaddr", "serde", - "zerocopy 0.8.3", + "zerocopy 0.8.7", ] [[package]] @@ -925,7 +925,7 @@ source = "git+https://github.com/oxidecomputer/ingot.git?rev=3b38859ca143eaa1287 dependencies = [ "ingot-macros", "macaddr", - "zerocopy 0.8.3", + "zerocopy 0.8.7", ] [[package]] @@ -1288,7 +1288,7 @@ dependencies = [ "tabwriter", "usdt", "version_check", - "zerocopy 0.8.3", + "zerocopy 0.8.7", ] [[package]] @@ -1391,7 +1391,7 @@ dependencies = [ "tabwriter", "usdt", "uuid", - "zerocopy 0.8.3", + "zerocopy 0.8.7", ] [[package]] @@ -2760,11 +2760,11 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.3" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "199837a02c176ffe66ac6e3f6195ff49ed0ae9c0fc9c905970f924909812aba6" +checksum = "bb3da5f7220f919a6c7af7c856435a68ee1582fd7a77aa72936257d8335bd6f6" dependencies = [ - "zerocopy-derive 0.8.3", + "zerocopy-derive 0.8.7", ] [[package]] @@ -2780,9 +2780,9 @@ dependencies = [ [[package]] name = "zerocopy-derive" -version = "0.8.3" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c76c8bc3d9d3594dabe11d4ffab6cd71cc2c3ce38526c6de5a0d81dd0039627" +checksum = "2e5f54f3cc93cd80745404626681b4b9fca9a867bad5a8424b618eb0db1ae6ea" dependencies = [ "proc-macro2", "quote", diff --git a/bench/src/kbench/remote.rs b/bench/src/kbench/remote.rs index 7e6b1965..2c811407 100644 --- a/bench/src/kbench/remote.rs +++ b/bench/src/kbench/remote.rs @@ -8,6 +8,8 @@ //! over physical links. use super::*; +#[cfg(target_os = "illumos")] +use std::collections::HashSet; use std::io::Read; use std::io::Write; use std::net::Ipv6Addr; From de099e8590101f96973347a448af62a95d7b8651 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Tue, 29 Oct 2024 15:38:46 +0000 Subject: [PATCH 077/115] Again. --- lib/opte/src/ddi/mblk.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/opte/src/ddi/mblk.rs b/lib/opte/src/ddi/mblk.rs index 34ed278b..f0c241fc 100644 --- a/lib/opte/src/ddi/mblk.rs +++ b/lib/opte/src/ddi/mblk.rs @@ -189,11 +189,9 @@ impl Drop for MsgBlkChain { /// The `no_std` implementation is used when running in-kernel. The /// main difference is the `mblk_t` and `dblk_t` structures are coming /// from viona (outbound/Tx) and mac (inbound/Rx), and we consume them -/// via [`Packet::wrap_mblk()`]. In reality this is typically holding +/// via [`MsgBlk::wrap_mblk()`]. In reality this is typically holding /// an Ethernet _frame_, but we prefer to use the colloquial /// nomenclature of "packet". -/// -/// [`Packet::wrap_mblk()`]: crate::engine::packet::Packet::wrap_mblk #[derive(Debug)] pub struct MsgBlk { pub inner: NonNull, From 3fcedc96776d077a878f9230ef2c6cd6712f975e Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Tue, 29 Oct 2024 18:01:00 +0000 Subject: [PATCH 078/115] Thanks, clippy. Thippy. --- bin/opteadm/src/bin/opteadm.rs | 16 ++----- crates/derror-macro/src/lib.rs | 7 +-- crates/illumos-sys-hdrs/src/lib.rs | 24 +++++----- crates/opte-api/src/dns.rs | 2 +- crates/opte-api/src/lib.rs | 12 +++-- lib/opte-test-utils/src/lib.rs | 1 - lib/opte/src/d_error.rs | 12 +++-- lib/opte/src/ddi/mblk.rs | 21 ++++----- lib/opte/src/ddi/sync.rs | 7 +-- lib/opte/src/ddi/time.rs | 2 +- lib/opte/src/engine/dhcp.rs | 2 +- lib/opte/src/engine/dhcpv6/mod.rs | 12 ++--- lib/opte/src/engine/ether.rs | 13 ++--- lib/opte/src/engine/geneve.rs | 4 +- lib/opte/src/engine/headers.rs | 10 ++-- lib/opte/src/engine/icmp/v6.rs | 4 +- lib/opte/src/engine/ip/mod.rs | 12 ++--- lib/opte/src/engine/ip/v4.rs | 4 +- lib/opte/src/engine/layer.rs | 14 +++--- lib/opte/src/engine/mod.rs | 11 ++--- lib/opte/src/engine/packet.rs | 76 +++++++++++++++++------------- lib/opte/src/engine/parse.rs | 20 ++++---- lib/opte/src/engine/port.rs | 44 ++++++++--------- lib/opte/src/engine/predicate.rs | 16 +++---- lib/opte/src/engine/rule.rs | 15 +++--- lib/opte/src/lib.rs | 5 +- lib/oxide-vpc/src/api.rs | 16 +++---- lib/oxide-vpc/src/engine/mod.rs | 4 +- xde/src/dls/mod.rs | 3 +- xde/src/xde.rs | 14 +++--- 30 files changed, 197 insertions(+), 206 deletions(-) diff --git a/bin/opteadm/src/bin/opteadm.rs b/bin/opteadm/src/bin/opteadm.rs index f9d4b120..577eca9c 100644 --- a/bin/opteadm/src/bin/opteadm.rs +++ b/bin/opteadm/src/bin/opteadm.rs @@ -813,12 +813,8 @@ fn main() -> anyhow::Result<()> { .context("failed to allow on inbound direction")?; hdl.allow_cidr(&port, prefix, Direction::Out).inspect_err( |e| { - hdl.remove_cidr(&port, prefix, Direction::In).expect( - &format!( - "FATAL: failed to rollback in-direction allow \ - of {prefix} after {e}" - ), - ); + hdl.remove_cidr(&port, prefix, Direction::In).unwrap_or_else(|_| panic!("FATAL: failed to rollback in-direction allow \ + of {prefix} after {e}")); }, )?; } @@ -843,12 +839,8 @@ fn main() -> anyhow::Result<()> { remove_cidr(Direction::In) .context("failed to deny on inbound direction")?; remove_cidr(Direction::Out).inspect_err(|e| { - hdl.allow_cidr(&port, prefix, Direction::In).expect( - &format!( - "FATAL: failed to rollback in-direction remove \ - of {prefix} after {e}" - ), - ); + hdl.allow_cidr(&port, prefix, Direction::In).unwrap_or_else(|_| panic!("FATAL: failed to rollback in-direction remove \ + of {prefix} after {e}")); })?; } } diff --git a/crates/derror-macro/src/lib.rs b/crates/derror-macro/src/lib.rs index 4b6a8ebf..e60bf2af 100644 --- a/crates/derror-macro/src/lib.rs +++ b/crates/derror-macro/src/lib.rs @@ -18,9 +18,10 @@ struct Args { } /// Generate a `DError` implementation given a tree-structured enum -/// where only leaf nodes hold additional data. This allows for deeply -/// nested enums to be more easily understood in dtrace probes without -/// calling `format!()`. +/// where only leaf nodes hold additional data. +/// +/// This allows for deeply nested enums to be more easily understood in +/// dtrace probes without calling `format!()`. /// /// This is intended for annotating error chains such as: /// ```ignore diff --git a/crates/illumos-sys-hdrs/src/lib.rs b/crates/illumos-sys-hdrs/src/lib.rs index 1f52a7f7..e85a0680 100644 --- a/crates/illumos-sys-hdrs/src/lib.rs +++ b/crates/illumos-sys-hdrs/src/lib.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2022 Oxide Computer Company +// Copyright 2024 Oxide Computer Company #![cfg_attr(feature = "kernel", feature(extern_types))] #![allow(non_camel_case_types)] #![no_std] @@ -316,6 +316,8 @@ pub type offset_t = c_longlong; pub type pid_t = c_int; pub type zoneid_t = id_t; +/// A standard boolean in illumos. +/// /// This is a commonly used illumos kernel type. Originally I was /// basing these C types on the cty crate. But really we should just /// define the illumos types directly. These would make up the base @@ -331,13 +333,13 @@ pub enum boolean_t { B_TRUE, } -/// The source for this structure makes use of the -/// `_LONG_LONG_{LTOH,HTOL}` ISA macros. My guess is this is needed -/// for 32-bit userland applications using `long long *` for things -/// like file/memory addresses (where we have a 32-bit pointer -/// pointing to a 64-bit value). The macro determines if the pointer -/// is to the high 32 bits or the low 32 bits. Currently, illumos -/// always sets `_LONG_LONG_HTOL`. +// The source for this structure makes use of the +// `_LONG_LONG_{LTOH,HTOL}` ISA macros. My guess is this is needed +// for 32-bit userland applications using `long long *` for things +// like file/memory addresses (where we have a 32-bit pointer +// pointing to a 64-bit value). The macro determines if the pointer +// is to the high 32 bits or the low 32 bits. Currently, illumos +// always sets `_LONG_LONG_HTOL`. #[repr(C)] pub union lloff_t { pub _f: offset_t, // full 64-bits @@ -355,9 +357,9 @@ pub struct upper_lower { // uts/common/sys/uio.h // ====================================================================== -/// This definition assumes applications are compiled with XPG4v2 -/// (`_XPG4_2`) or later support. If we want Rust drivers to have -/// maximum userland support we will want to also support pre-XPG4v2. +// This definition assumes applications are compiled with XPG4v2 +// (`_XPG4_2`) or later support. If we want Rust drivers to have +// maximum userland support we will want to also support pre-XPG4v2. #[repr(C)] pub struct iovec_t { pub iov_base: *mut c_void, diff --git a/crates/opte-api/src/dns.rs b/crates/opte-api/src/dns.rs index 823e44fd..ad4e0d06 100644 --- a/crates/opte-api/src/dns.rs +++ b/crates/opte-api/src/dns.rs @@ -28,7 +28,7 @@ use serde::Serialize; /// /// - The string form of the name may not exceed 253 octets /// - Each label (except for possibly the last, root label) must be between 1 -/// and 63 octets. +/// and 63 octets. /// /// # Details /// diff --git a/crates/opte-api/src/lib.rs b/crates/opte-api/src/lib.rs index 5ead29a8..21159a0a 100644 --- a/crates/opte-api/src/lib.rs +++ b/crates/opte-api/src/lib.rs @@ -39,11 +39,13 @@ pub use mac::*; pub use ndp::*; pub use ulp::*; -/// The overall version of the API. Anytime an API is added, removed, -/// or modified, this number should increment. Currently we attach no -/// semantic meaning to the number other than as a means to verify -/// that the user and kernel are compiled for the same API. A u64 is -/// used to give future wiggle room to play bit games if neeeded. +/// The overall version of the API. +/// +/// Anytime an API is added, removed, or modified, this number should +/// increment. Currently we attach no semantic meaning to the number +/// other than as a means to verify that the user and kernel are compiled +/// for the same API. A u64 is used to give future wiggle room to play bit +/// games if needed. /// /// We rely on CI and the check-api-version.sh script to verify that /// this number is incremented anytime the oxide-api code changes. diff --git a/lib/opte-test-utils/src/lib.rs b/lib/opte-test-utils/src/lib.rs index af806377..3e1d1e46 100644 --- a/lib/opte-test-utils/src/lib.rs +++ b/lib/opte-test-utils/src/lib.rs @@ -1000,7 +1000,6 @@ fn _encap( ) -> MsgBlk { let pkt = Packet::new(inner_pkt.iter_mut()); let base_len = pkt.len(); - drop(pkt); let mut outer_geneve = Geneve { vni: dst.vni, ..Default::default() }; diff --git a/lib/opte/src/d_error.rs b/lib/opte/src/d_error.rs index edbe61f4..a9a88941 100644 --- a/lib/opte/src/d_error.rs +++ b/lib/opte/src/d_error.rs @@ -28,9 +28,11 @@ pub trait DError { static EMPTY_STRING: &CStr = c""; -/// An string list designed to be passed to a DTrace handler, which contains -/// the names of all `enum` discriminators encountered when resolving an error -/// or other result-like enum, as well as the data from a leaf node. +/// A string list designed to be passed to a DTrace handler. +/// +/// This contains the names of all `enum` discriminators encountered when +/// resolving an error or other result-like enum, as well as the data from a +/// leaf node. /// /// This wrapper cannot contain a null c_string pointer, so all entries are /// safe to dereference from a DTrace script. Additionally, it has a fixed @@ -170,7 +172,7 @@ pub struct LabelBlockIter<'a, const L: usize> { inner: &'a LabelBlock, } -impl<'a, const L: usize> Iterator for LabelBlockIter<'a, L> { +impl Iterator for LabelBlockIter<'_, L> { type Item = &'static CStr; fn next(&mut self) -> Option { @@ -193,7 +195,7 @@ impl<'a, const L: usize> Iterator for LabelBlockIter<'a, L> { } } -impl<'a, const L: usize> ExactSizeIterator for LabelBlockIter<'a, L> { +impl ExactSizeIterator for LabelBlockIter<'_, L> { fn len(&self) -> usize { self.inner.len - self.pos } diff --git a/lib/opte/src/ddi/mblk.rs b/lib/opte/src/ddi/mblk.rs index f0c241fc..721d4c17 100644 --- a/lib/opte/src/ddi/mblk.rs +++ b/lib/opte/src/ddi/mblk.rs @@ -11,6 +11,7 @@ use crate::engine::packet::WriteError; #[cfg(any(feature = "std", test))] use alloc::boxed::Box; use alloc::vec::Vec; +use core::cmp::Ordering; use core::marker::PhantomData; use core::mem::ManuallyDrop; use core::mem::MaybeUninit; @@ -461,22 +462,20 @@ impl MsgBlk { /// Adjusts the write pointer for this MsgBlk, initialising any extra bytes to 0. pub fn resize(&mut self, new_len: usize) -> Result<(), WriteError> { let len = self.len(); - if new_len < len { - unsafe { + match new_len.cmp(&len) { + Ordering::Less => unsafe { let mut_inner = self.inner.as_mut(); mut_inner.b_wptr = mut_inner.b_wptr.sub(len - new_len); - } - Ok(()) - } else if new_len > len { - unsafe { + Ok(()) + }, + Ordering::Greater => unsafe { self.write_back(new_len - len, |v| { // MaybeUninit::fill is unstable. let n = v.len(); v.as_mut_ptr().write_bytes(0, n); }) - } - } else { - Ok(()) + }, + Ordering::Equal => Ok(()), } } @@ -703,7 +702,7 @@ pub struct MsgBlkIterMut<'a> { marker: PhantomData<&'a mut MsgBlk>, } -impl<'a> MsgBlkIterMut<'a> { +impl MsgBlkIterMut<'_> { pub fn next_iter(&self) -> MsgBlkIter { let curr = self .curr @@ -763,7 +762,7 @@ impl<'a> Read for MsgBlkIterMut<'a> { } } -impl<'a> BufferState for MsgBlkIterMut<'a> { +impl BufferState for MsgBlkIterMut<'_> { #[inline] fn len(&self) -> usize { let own_blk_len = self diff --git a/lib/opte/src/ddi/sync.rs b/lib/opte/src/ddi/sync.rs index 6050a738..ac040150 100644 --- a/lib/opte/src/ddi/sync.rs +++ b/lib/opte/src/ddi/sync.rs @@ -28,9 +28,10 @@ cfg_if! { use illumos_sys_hdrs::kmutex_type_t; use illumos_sys_hdrs::krw_type_t; -/// Exposes the illumos mutex(9F) API in a safe manner. We name it -/// `KMutex` (Kernel Mutex) on purpose. The API for a kernel mutex -/// isn't quite the same as a userland `Mutex`, and there's no reason +/// Exposes the illumos mutex(9F) API in a safe manner. +/// +/// We name it `KMutex` (Kernel Mutex) on purpose. The API for a kernel +/// mutex isn't quite the same as a userland `Mutex`, and there's no reason /// that we have to use that exact name. Using `KMutex` makes it /// obvious that we are using a mutex, but not the one that comes from /// std. diff --git a/lib/opte/src/ddi/time.rs b/lib/opte/src/ddi/time.rs index f470033c..6efdd2ee 100644 --- a/lib/opte/src/ddi/time.rs +++ b/lib/opte/src/ddi/time.rs @@ -79,7 +79,7 @@ impl Moment { if #[cfg(all(not(feature = "std"), not(test)))] { Self { inner: unsafe { ddi::gethrtime() } } } else { - let first_ts = *FIRST_TS.get_or_init(|| Instant::now()); + let first_ts = *FIRST_TS.get_or_init(Instant::now); Self { inner: Instant::now().saturating_duration_since(first_ts) } } } diff --git a/lib/opte/src/engine/dhcp.rs b/lib/opte/src/engine/dhcp.rs index ae8af2ef..69f22519 100644 --- a/lib/opte/src/engine/dhcp.rs +++ b/lib/opte/src/engine/dhcp.rs @@ -121,7 +121,7 @@ impl From for MessageType { struct MessageTypeVisitor; -impl<'de> Visitor<'de> for MessageTypeVisitor { +impl Visitor<'_> for MessageTypeVisitor { type Value = MessageType; fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result { diff --git a/lib/opte/src/engine/dhcpv6/mod.rs b/lib/opte/src/engine/dhcpv6/mod.rs index 06e9864c..caacbb76 100644 --- a/lib/opte/src/engine/dhcpv6/mod.rs +++ b/lib/opte/src/engine/dhcpv6/mod.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2024 Oxide Computer Company //! Core implementation of DHCPv6 protocol. //! @@ -54,10 +54,10 @@ //! - Option Request: A list of Option codes for requested options. //! - Elapsed Time: The duration a client has been trying to talk to the server. //! - Rapid Commit: An option that tells the server to commit data to a client, -//! without waiting for a second ACK sequence of messages. +//! without waiting for a second ACK sequence of messages. //! - DNS Servers: A list of IPv6 addresses for DNS servers the client can use. //! - SNTP Servers: A list of IPv6 addresses for SNTP servers the client can -//! use. +//! use. //! //! See the `options` module for more details on the encoding of these in a //! message. @@ -108,18 +108,18 @@ pub const CLIENT_PORT: u16 = 546; #[derive(Clone, Debug, PartialEq)] pub struct TransactionId<'a>(pub Cow<'a, [u8]>); -impl<'a> TransactionId<'a> { +impl TransactionId<'_> { pub const SIZE: usize = 3; } -impl<'a> Deref for TransactionId<'a> { +impl Deref for TransactionId<'_> { type Target = [u8]; fn deref(&self) -> &Self::Target { &self.0 } } -impl<'a> AsRef<[u8]> for TransactionId<'a> { +impl AsRef<[u8]> for TransactionId<'_> { fn as_ref(&self) -> &[u8] { &self.0 } diff --git a/lib/opte/src/engine/ether.rs b/lib/opte/src/engine/ether.rs index cbef3b24..76dbc285 100644 --- a/lib/opte/src/engine/ether.rs +++ b/lib/opte/src/engine/ether.rs @@ -321,14 +321,11 @@ impl From { #[inline] fn from(value: EtherMeta) -> Self { - InlineHeader::Repr( - Ethernet { - destination: value.dst, - source: value.src, - ethertype: Ethertype(u16::from(value.ether_type)), - } - .into(), - ) + InlineHeader::Repr(Ethernet { + destination: value.dst, + source: value.src, + ethertype: Ethertype(u16::from(value.ether_type)), + }) } } diff --git a/lib/opte/src/engine/geneve.rs b/lib/opte/src/engine/geneve.rs index 5dd5d004..1074b18a 100644 --- a/lib/opte/src/engine/geneve.rs +++ b/lib/opte/src/engine/geneve.rs @@ -189,9 +189,7 @@ impl GeneveOption { { Ok(Self::Oxide(OxideOption::External)) } - _ => { - Err(ParseError::UnrecognisedTunnelOpt { class: class, ty: ty }) - } + _ => Err(ParseError::UnrecognisedTunnelOpt { class, ty }), } } diff --git a/lib/opte/src/engine/headers.rs b/lib/opte/src/engine/headers.rs index 637f755a..81a0f3f3 100644 --- a/lib/opte/src/engine/headers.rs +++ b/lib/opte/src/engine/headers.rs @@ -288,12 +288,12 @@ pub struct SizeHoldingEncap<'a> { pub meta: &'a EncapMeta, } -unsafe impl<'a> ingot::types::EmitDoesNotRelyOnBufContents - for SizeHoldingEncap<'a> +unsafe impl ingot::types::EmitDoesNotRelyOnBufContents + for SizeHoldingEncap<'_> { } -impl<'a> HeaderLen for SizeHoldingEncap<'a> { +impl HeaderLen for SizeHoldingEncap<'_> { const MINIMUM_LENGTH: usize = EncapMeta::MINIMUM_LENGTH; #[inline] @@ -302,7 +302,7 @@ impl<'a> HeaderLen for SizeHoldingEncap<'a> { } } -impl<'a> Emit for SizeHoldingEncap<'a> { +impl Emit for SizeHoldingEncap<'_> { #[inline] fn emit_raw(&self, buf: V) -> usize { match self.meta { @@ -557,7 +557,7 @@ impl UlpHeaderAction { meta.run_modify(arg)?; Ok(true) } - None => return Err(HeaderActionError::MissingHeader), + None => Err(HeaderActionError::MissingHeader), }, } } diff --git a/lib/opte/src/engine/icmp/v6.rs b/lib/opte/src/engine/icmp/v6.rs index 918112f9..878e5ba3 100644 --- a/lib/opte/src/engine/icmp/v6.rs +++ b/lib/opte/src/engine/icmp/v6.rs @@ -430,7 +430,7 @@ fn validate_neighbor_solicitation( // NS is only allowed from the unspecified address if the destination is a // solicited-node multicast address. if metadata.source() == Ipv6Addr::ANY_ADDR - && !Ipv6Addr::from(metadata.destination()).is_solicited_node_multicast() + && !metadata.destination().is_solicited_node_multicast() { return Err(GenErr::Unexpected(String::from( "Received NS from UNSPEC, but destination is not the solicited \ @@ -588,7 +588,7 @@ impl HairpinAction for NeighborAdvertisement { // Build the NA, whose data depends on how we received the packet. If // `None` is returned, the NS is not destined for us, and will be // dropped. - let conv_ip = metadata.source().into(); + let conv_ip = metadata.source(); let (dst_ip, advert) = match construct_neighbor_advert(self, &target_addr, &conv_ip) { Some(data) => data, diff --git a/lib/opte/src/engine/ip/mod.rs b/lib/opte/src/engine/ip/mod.rs index d1cad90b..823c12e3 100644 --- a/lib/opte/src/engine/ip/mod.rs +++ b/lib/opte/src/engine/ip/mod.rs @@ -53,9 +53,9 @@ impl L3 { } L3::Ipv6(v6) => { let mut pseudo_hdr_bytes = [0u8; 40]; - pseudo_hdr_bytes[0..16].copy_from_slice(&v6.source().as_ref()); + pseudo_hdr_bytes[0..16].copy_from_slice(v6.source().as_ref()); pseudo_hdr_bytes[16..32] - .copy_from_slice(&v6.destination().as_ref()); + .copy_from_slice(v6.destination().as_ref()); pseudo_hdr_bytes[39] = v6.next_layer().unwrap_or_default().0; let ulp_len = v6.payload_len() as u32; pseudo_hdr_bytes[32..36] @@ -95,9 +95,9 @@ impl ValidL3 { } ValidL3::Ipv6(v6) => { let mut pseudo_hdr_bytes = [0u8; 40]; - pseudo_hdr_bytes[0..16].copy_from_slice(&v6.source().as_ref()); + pseudo_hdr_bytes[0..16].copy_from_slice(v6.source().as_ref()); pseudo_hdr_bytes[16..32] - .copy_from_slice(&v6.destination().as_ref()); + .copy_from_slice(v6.destination().as_ref()); pseudo_hdr_bytes[39] = v6.next_layer().unwrap_or_default().0; let ulp_len = v6.payload_len() as u32; pseudo_hdr_bytes[32..36] @@ -135,10 +135,10 @@ impl ValidL3 { csum.add_bytes(ip.0.as_bytes()); match &ip.1 { Header::Repr(opts) => { - csum.add_bytes(&*opts); + csum.add_bytes(opts); } Header::Raw(opts) => { - csum.add_bytes(&*opts); + csum.add_bytes(opts); } } diff --git a/lib/opte/src/engine/ip/v4.rs b/lib/opte/src/engine/ip/v4.rs index bff6f911..29e6b1b7 100644 --- a/lib/opte/src/engine/ip/v4.rs +++ b/lib/opte/src/engine/ip/v4.rs @@ -91,10 +91,10 @@ impl ValidIpv4 { match &self.1 { Header::Repr(opts) => { - csum.add_bytes(&*opts); + csum.add_bytes(opts); } Header::Raw(opts) => { - csum.add_bytes(&*opts); + csum.add_bytes(opts); } } diff --git a/lib/opte/src/engine/layer.rs b/lib/opte/src/engine/layer.rs index 1a49b9f1..dc80b6c1 100644 --- a/lib/opte/src/engine/layer.rs +++ b/lib/opte/src/engine/layer.rs @@ -866,7 +866,7 @@ impl Layer { if let Some(body_segs) = pkt.body_segs() { if let Some(bt) = - desc.gen_bt(Direction::In, pkt.meta(), &body_segs)? + desc.gen_bt(Direction::In, pkt.meta(), body_segs)? { pkt.body_transform(Direction::In, &*bt)?; xforms.body.push(bt); @@ -1057,7 +1057,7 @@ impl Layer { ); if let Some(body_segs) = pkt.body_segs() { - if let Some(bt) = desc.gen_bt(In, pkt.meta(), &body_segs)? { + if let Some(bt) = desc.gen_bt(In, pkt.meta(), body_segs)? { pkt.body_transform(In, &*bt)?; xforms.body.push(bt); } @@ -1153,7 +1153,7 @@ impl Layer { if let Some(body_segs) = pkt.body_segs() { if let Some(bt) = - desc.gen_bt(Direction::Out, pkt.meta(), &body_segs)? + desc.gen_bt(Direction::Out, pkt.meta(), body_segs)? { pkt.body_transform(Direction::Out, &*bt)?; xforms.body.push(bt); @@ -1346,9 +1346,7 @@ impl Layer { ); if let Some(body_segs) = pkt.body_segs() { - if let Some(bt) = - desc.gen_bt(Out, pkt.meta(), &body_segs)? - { + if let Some(bt) = desc.gen_bt(Out, pkt.meta(), body_segs)? { pkt.body_transform(Out, &*bt)?; xforms.body.push(bt); } @@ -1575,7 +1573,7 @@ pub enum RuleRemoveErr { NotFound, } -impl<'a> RuleTable { +impl RuleTable { fn add(&mut self, rule: Rule) { match self.find_pos(&rule) { RulePlace::End => { @@ -1599,7 +1597,7 @@ impl<'a> RuleTable { dump } - fn find_match<'b>( + fn find_match( &mut self, ifid: &InnerFlowId, pmeta: &MblkPacketData, diff --git a/lib/opte/src/engine/mod.rs b/lib/opte/src/engine/mod.rs index ac434abc..c414703a 100644 --- a/lib/opte/src/engine/mod.rs +++ b/lib/opte/src/engine/mod.rs @@ -169,13 +169,13 @@ pub struct HdlPktError(pub &'static str); /// implementation does this is two ways. /// /// 1. It provides its own unique stack of [`layer::Layer`] -/// definitions; each made up of its unique set of [`rule::Rule`] & -/// [`rule::Action`] pairings. Furthermore, the actions themselves may -/// be built atop generic OPTE actions or may be provided in whole by -/// the network implementation. +/// definitions; each made up of its unique set of [`rule::Rule`] & +/// [`rule::Action`] pairings. Furthermore, the actions themselves may +/// be built atop generic OPTE actions or may be provided in whole by +/// the network implementation. /// /// 2. It uses this trait to provide hooks into the parsing of packets -/// as well as single packet processing (non-flow processing). +/// as well as single packet processing (non-flow processing). /// /// OPTE itself provides a general structure for parsing; limiting the /// possible parse graph to that of a typical L2 + L3 + L4 packet, @@ -225,7 +225,6 @@ pub trait NetworkImpl { uft_out: &FlowTable>, ) -> Result where - T: Read, T::Chunk: ByteSliceMut; /// Return the parser for this network implementation. diff --git a/lib/opte/src/engine/packet.rs b/lib/opte/src/engine/packet.rs index 6c3e1ed2..eb7fcb55 100644 --- a/lib/opte/src/engine/packet.rs +++ b/lib/opte/src/engine/packet.rs @@ -234,7 +234,7 @@ pub trait PacketState {} /// ULP header, and continues to the last byte of the packet. This /// transformation is currently limited to only modifying bytes; it /// does not allow adding or removing bytes (e.g. to encrypt the body). -pub trait BodyTransform: fmt::Display + DynClone { +pub trait BodyTransform: fmt::Display + DynClone + Send + Sync { /// Execute the body transformation. The body segments include /// **only** body data, starting directly after the end of the ULP /// header. @@ -308,6 +308,8 @@ pub enum ParseError { impl ParseError { fn data(&self, data: &mut [u64]) { + // Allow due to possibility of future options. + #[allow(clippy::single_match)] match self { ParseError::UnrecognisedTunnelOpt { class, ty } => { [data[0], data[1]] = [*class as u64, *ty as u64]; @@ -457,13 +459,19 @@ impl PktBodyWalker { let mut to_hold = vec![]; if let Some(ref mut chunk) = first { let as_bytes = chunk.deref_mut(); - to_hold.push(unsafe { core::mem::transmute(as_bytes) }); + to_hold.push(unsafe { + core::mem::transmute::<&mut [u8], (*mut u8, usize)>( + as_bytes, + ) + }); } // TODO(drop-safety): we need to give these chunks a longer life, too. while let Ok(chunk) = rest.next_chunk() { let as_bytes = chunk.deref(); - to_hold.push(unsafe { core::mem::transmute(as_bytes) }); + to_hold.push(unsafe { + core::mem::transmute::<&[u8], (*mut u8, usize)>(as_bytes) + }); } let to_store = Box::into_raw(Box::new(to_hold.into_boxed_slice())); @@ -499,7 +507,7 @@ impl PktBodyWalker { unsafe { let a = (&*(*slice_ptr)) as *const _; - core::mem::transmute(a) + &*(a as *const [&[u8]]) } } @@ -521,7 +529,7 @@ impl PktBodyWalker { // possible referent. unsafe { let a = (&mut *(*slice_ptr)) as *mut _; - core::mem::transmute(a) + &mut *(a as *mut [&mut [u8]]) } } } @@ -555,7 +563,7 @@ impl core::fmt::Debug for PacketData { impl PacketData { pub fn initial_lens(&self) -> Option<&InitialLayerLens> { - self.initial_lens.as_ref().map(|v| &**v) + self.initial_lens.as_deref() } pub fn outer_ether( @@ -576,7 +584,7 @@ impl PacketData { Some((g.vni, g.oxide_external_pkt)) } Some(InlineHeader::Raw(ValidEncapMeta::Geneve(_, g))) => { - Some((g.vni(), valid_geneve_has_oxide_external(&g))) + Some((g.vni(), valid_geneve_has_oxide_external(g))) } None => None, } @@ -733,7 +741,7 @@ impl From<&PacketData> for InnerFlowId { }) .unwrap_or((0, 0)); - InnerFlowId { proto: proto.into(), addrs, src_port, dst_port } + InnerFlowId { proto, addrs, src_port, dst_port } } } @@ -1068,7 +1076,7 @@ impl Packet> { L3::Ipv6(BoxedHeader::Repr(o)) => L3Repr::Ipv6(*o), L3::Ipv4(BoxedHeader::Repr(o)) => L3Repr::Ipv4(*o), L3::Ipv6(BoxedHeader::Raw(o)) => { - L3Repr::Ipv6((&o).to_owned(None)?) + L3Repr::Ipv6(o.to_owned(None)?) } L3::Ipv4(BoxedHeader::Raw(o)) => L3Repr::Ipv4((&o).into()), }); @@ -1171,7 +1179,7 @@ impl Packet> { self.state.body_modified = true; match self.body_segs_mut() { - Some(mut body_segs) => xform.run(dir, &mut body_segs), + Some(body_segs) => xform.run(dir, body_segs), None => { self.state.body_modified = false; Err(BodyTransformError::NoPayload) @@ -1264,10 +1272,10 @@ impl Packet> { csum.add_bytes(tcp.0.as_bytes()); match &tcp.1 { Header::Repr(opts) => { - csum.add_bytes(&*opts); + csum.add_bytes(opts); } Header::Raw(opts) => { - csum.add_bytes(&*opts); + csum.add_bytes(opts); } } } @@ -1386,10 +1394,10 @@ impl Packet> { csum.add_bytes(tcp.0.as_bytes()); match &tcp.1 { Header::Repr(opts) => { - csum.add_bytes(&*opts); + csum.add_bytes(opts); } Header::Raw(opts) => { - csum.add_bytes(&*opts); + csum.add_bytes(opts); } } } @@ -1613,64 +1621,64 @@ impl EmitSpec { if let Some(inner_new) = &push_spec.inner { if let Some(inner_ulp) = &inner_new.ulp { - let target = if prepend.is_none() { - &mut pkt + let target = if let Some(v) = prepend.as_mut() { + v } else { - prepend.as_mut().unwrap() + &mut pkt }; target.emit_front(inner_ulp).unwrap(); } if let Some(inner_l3) = &inner_new.l3 { - let target = if prepend.is_none() { - &mut pkt + let target = if let Some(v) = prepend.as_mut() { + v } else { - prepend.as_mut().unwrap() + &mut pkt }; target.emit_front(inner_l3).unwrap(); } - let target = if prepend.is_none() { - &mut pkt + let target = if let Some(v) = prepend.as_mut() { + v } else { - prepend.as_mut().unwrap() + &mut pkt }; - target.emit_front(&inner_new.eth).unwrap(); + target.emit_front(inner_new.eth).unwrap(); } if let Some(outer_encap) = &push_spec.outer_encap { let encap = SizeHoldingEncap { encapped_len: self.ulp_len as u16, - meta: &outer_encap, + meta: outer_encap, }; - let target = if prepend.is_none() { - &mut pkt + let target = if let Some(v) = prepend.as_mut() { + v } else { - prepend.as_mut().unwrap() + &mut pkt }; target.emit_front(&encap).unwrap(); } if let Some(outer_ip) = &push_spec.outer_ip { - let target = if prepend.is_none() { - &mut pkt + let target = if let Some(v) = prepend.as_mut() { + v } else { - prepend.as_mut().unwrap() + &mut pkt }; target.emit_front(outer_ip).unwrap(); } if let Some(outer_eth) = &push_spec.outer_eth { - let target = if prepend.is_none() { - &mut pkt + let target = if let Some(v) = prepend.as_mut() { + v } else { - prepend.as_mut().unwrap() + &mut pkt }; target.emit_front(outer_eth).unwrap(); diff --git a/lib/opte/src/engine/parse.rs b/lib/opte/src/engine/parse.rs index 5a7683a4..81a8b6f6 100644 --- a/lib/opte/src/engine/parse.rs +++ b/lib/opte/src/engine/parse.rs @@ -126,10 +126,10 @@ impl ValidUlp { body_csum.add_bytes(tcp.0.as_bytes()); match &tcp.1 { Header::Repr(opts) => { - body_csum.add_bytes(&*opts); + body_csum.add_bytes(opts); } Header::Raw(opts) => { - body_csum.add_bytes(&*opts); + body_csum.add_bytes(opts); } } tcp.set_checksum(body_csum.finalize_for_ingot()); @@ -241,7 +241,7 @@ impl LightweightMeta for ValidNoEncap { }) .unwrap_or((0, 0)); - InnerFlowId { proto: proto.into(), addrs, src_port, dst_port } + InnerFlowId { proto, addrs, src_port, dst_port } } #[inline] @@ -346,9 +346,7 @@ impl LightweightMeta for ValidNoEncap { _ => return None, }; - let Some(pseudo_csum) = pseudo_csum else { - return None; - }; + let pseudo_csum = pseudo_csum?; self.inner_ulp.as_ref().and_then(csum_minus_hdr).map(|mut v| { if use_pseudo { @@ -384,7 +382,7 @@ impl LightweightMeta for ValidNoEncap { #[inline] fn validate(&self, pkt_len: usize) -> Result<(), ParseError> { if let Some(l3) = &self.inner_l3 { - let rem_len = pkt_len - &(&self.inner_eth, l3).packet_length(); + let rem_len = pkt_len - (&self.inner_eth, l3).packet_length(); l3.validate(rem_len)?; if let Some(ulp) = &self.inner_ulp { let rem_len = rem_len - ulp.packet_length(); @@ -470,7 +468,7 @@ impl LightweightMeta for ValidGeneveOverV6 { .or_else(|| self.inner_ulp.pseudo_port()) .unwrap_or(0); - InnerFlowId { proto: proto.into(), addrs, src_port, dst_port } + InnerFlowId { proto, addrs, src_port, dst_port } } #[inline] @@ -570,9 +568,7 @@ impl LightweightMeta for ValidGeneveOverV6 { _ => return None, }; - let Some(pseudo_csum) = pseudo_csum else { - return None; - }; + let pseudo_csum = pseudo_csum?; csum_minus_hdr(&self.inner_ulp).map(|mut v| { if use_pseudo { @@ -616,7 +612,7 @@ impl LightweightMeta for ValidGeneveOverV6 { validate_geneve(&self.outer_encap)?; let rem_len = rem_len - - &(&self.outer_encap, &self.outer_eth, &self.inner_l3) + - (&self.outer_encap, &self.outer_eth, &self.inner_l3) .packet_length(); self.inner_l3.validate(rem_len)?; diff --git a/lib/opte/src/engine/port.rs b/lib/opte/src/engine/port.rs index db82f444..8e82efe0 100644 --- a/lib/opte/src/engine/port.rs +++ b/lib/opte/src/engine/port.rs @@ -142,18 +142,18 @@ impl From for ProcessError { /// The result of processing a packet. /// /// * Bypass: Let this packet bypass the system; do not process it at -/// all. XXX This is probably going away as its only use is for -/// punting on traffic I didn't want to deal with yet. +/// all. XXX This is probably going away as its only use is for +/// punting on traffic I didn't want to deal with yet. /// /// * Drop: The packet has been dropped, as determined by the rules -/// or because of resource exhaustion. Included is the reason for the -/// drop. +/// or because of resource exhaustion. Included is the reason for the +/// drop. /// /// * Modified: The packet has been modified based on its matching rules. /// /// * Hairpin: One of the layers has determined that it should reply -/// directly with a packet of its own. In this case the original -/// packet is dropped. +/// directly with a packet of its own. In this case the original +/// packet is dropped. #[derive(Debug, DError)] pub enum ProcessResult { Bypass, @@ -1253,8 +1253,7 @@ impl Port { // case wherein we need to reacquire the lock -- invalidation // by TCP state. let mut lock = Some(self.data.lock()); - let mut data = - lock.as_mut().expect("lock should be held on this codepath"); + let data = lock.as_mut().expect("lock should be held on this codepath"); // (1) Check for UFT and precompiled. let epoch = self.epoch(); @@ -1303,7 +1302,7 @@ impl Port { Direction::Out => (owned_pair.as_ref(), Some(&flow_before)), Direction::In => (Some(&flow_before), owned_pair.as_ref()), }; - self.uft_invalidate(&mut data, ufid_out, ufid_in, epoch); + self.uft_invalidate(data, ufid_out, ufid_in, epoch); FastPathDecision::Slow } @@ -1461,11 +1460,11 @@ impl Port { // (3) Full-table processing for the packet, then drop the lock. // Cksum updates are the only thing left undone. (_, Direction::In) => { - let mut data = lock + let data = lock .as_mut() .expect("lock should be held on this codepath"); let res = self.process_in_miss( - &mut data, + data, epoch, &mut pkt, &flow_before, @@ -1482,11 +1481,11 @@ impl Port { res } (_, Direction::Out) => { - let mut data = lock + let data = lock .as_mut() .expect("lock should be held on this codepath"); - let res = self - .process_out_miss(&mut data, epoch, &mut pkt, &mut ameta); + let res = + self.process_out_miss(data, epoch, &mut pkt, &mut ameta); // Prevent double-counting reprocessed modify entries. if !(reprocess && matches!(res, Ok(InternalProcessResult::Modified))) @@ -1768,7 +1767,7 @@ impl Transforms { continue; } HeaderAction::Modify(m) => { - still_permissable &= !inner_ether.replace(m).is_some(); + still_permissable &= inner_ether.replace(m).is_none(); } HeaderAction::Ignore => {} } @@ -1779,14 +1778,14 @@ impl Transforms { continue; } HeaderAction::Modify(m) => { - still_permissable &= !inner_ip.replace(m).is_some(); + still_permissable &= inner_ip.replace(m).is_none(); } HeaderAction::Ignore => {} } match &transform.inner_ulp { UlpHeaderAction::Modify(m) => { - still_permissable &= !inner_ulp.replace(m).is_some(); + still_permissable &= inner_ulp.replace(m).is_none(); } UlpHeaderAction::Ignore => {} } @@ -2252,8 +2251,7 @@ impl Port { &dir, pkt_len, ), - Ok(v) => Ok(v.into()), - Err(e) => Err(e), + v => v, } } @@ -2775,7 +2773,7 @@ enum TcpDirection<'a> { Out { ufid_out: &'a InnerFlowId }, } -impl<'a> TcpDirection<'a> { +impl TcpDirection<'_> { fn dir(&self) -> Direction { match self { Self::In { .. } => Direction::In, @@ -2838,8 +2836,7 @@ impl TcpFlowEntryState { bytes_out: 0, }, KMutexType::Spin, - ) - .into(), + ), } } @@ -2860,8 +2857,7 @@ impl TcpFlowEntryState { bytes_out, }, KMutexType::Spin, - ) - .into(), + ), } } diff --git a/lib/opte/src/engine/predicate.rs b/lib/opte/src/engine/predicate.rs index 4241ec6f..51440b97 100644 --- a/lib/opte/src/engine/predicate.rs +++ b/lib/opte/src/engine/predicate.rs @@ -421,7 +421,7 @@ impl Predicate { Self::InnerSrcIp4(list) => match meta.inner_ip4() { Some(v4) => { - let ip = v4.source().into(); + let ip = v4.source(); for m in list { if m.matches(ip) { return true; @@ -436,7 +436,7 @@ impl Predicate { Self::InnerDstIp4(list) => match meta.inner_ip4() { Some(v4) => { - let ip = v4.destination().into(); + let ip = v4.destination(); for m in list { if m.matches(ip) { return true; @@ -451,7 +451,7 @@ impl Predicate { Self::InnerSrcIp6(list) => match meta.inner_ip6() { Some(v6) => { - let ip = v6.source().into(); + let ip = v6.source(); for m in list { if m.matches(ip) { return true; @@ -463,7 +463,7 @@ impl Predicate { Self::InnerDstIp6(list) => match meta.inner_ip6() { Some(v6) => { - let ip = v6.destination().into(); + let ip = v6.destination(); for m in list { if m.matches(ip) { return true; @@ -474,7 +474,7 @@ impl Predicate { }, Self::InnerSrcPort(list) => { - match meta.inner_ulp().map(|v| v.src_port()).flatten() { + match meta.inner_ulp().and_then(|v| v.src_port()) { // No ULP metadata or no source port (e.g. ICMPv6). None => return false, @@ -489,7 +489,7 @@ impl Predicate { } Self::InnerDstPort(list) => { - match meta.inner_ulp().map(|v| v.dst_port()).flatten() { + match meta.inner_ulp().and_then(|v| v.dst_port()) { // No ULP metadata or no destination port (e.g. ICMPv6). None => return false, @@ -592,7 +592,7 @@ impl DataPredicate { // use `PacketMeta` to determine if there is a suitable payload to // be inspected. That is, if there is no metadata for a given // header, there is certainly no payload. - pub(crate) fn is_match<'a>(&self, meta: &MblkPacketData) -> bool { + pub(crate) fn is_match(&self, meta: &MblkPacketData) -> bool { match self { Self::Not(pred) => !pred.is_match(meta), @@ -650,7 +650,7 @@ impl DataPredicate { Self::Dhcpv6MsgType(mt) => { let body = meta.body_segs(); - if body.len() == 0 || body[0].len() == 0 { + if body.is_empty() || body[0].is_empty() { super::err!( "Failed to read DHCPv6 message type from packet" ); diff --git a/lib/opte/src/engine/rule.rs b/lib/opte/src/engine/rule.rs index 68553b4c..0c7607bf 100644 --- a/lib/opte/src/engine/rule.rs +++ b/lib/opte/src/engine/rule.rs @@ -557,10 +557,10 @@ pub trait StatefulAction: Display { /// # Errors /// /// * [`GenDescError::ResourceExhausted`]: This action relies on a - /// dynamic resource which has been exhausted. + /// dynamic resource which has been exhausted. /// /// * [`GenDescError::Unexpected`]: This action encountered an - /// unexpected error while trying to generate a descriptor. + /// unexpected error while trying to generate a descriptor. fn gen_desc( &self, flow_id: &InnerFlowId, @@ -646,9 +646,10 @@ impl From for GenBtError { /// A hairpin action is one that generates a new packet based on the /// current inbound/outbound packet, and then "hairpins" that new -/// packet back to the source of the original packet. For example, you -/// could use this to hairpin an ARP Reply in response to a guest's -/// ARP request. +/// packet back to the source of the original packet. +/// +/// For example, you could use this to hairpin an ARP Reply in response +/// to a guest's ARP request. pub trait HairpinAction: Display { /// Generate a [`Packet`] to hairpin back to the source. The /// `meta` argument holds the packet metadata, including any @@ -931,8 +932,8 @@ impl Rule { } } -impl<'a> Rule { - pub fn is_match<'b>( +impl Rule { + pub fn is_match( &self, meta: &MblkPacketData, action_meta: &ActionMeta, diff --git a/lib/opte/src/lib.rs b/lib/opte/src/lib.rs index a8e12e44..42ec1668 100644 --- a/lib/opte/src/lib.rs +++ b/lib/opte/src/lib.rs @@ -192,8 +192,9 @@ mod opte_provider { // ================================================================ /// A logging provider provides the means to log messages to some -/// destination based on the context in which OPTE is running. For -/// example, in a unit test this could map to `println!`. In the +/// destination based on the context in which OPTE is running. +/// +/// For example, in a unit test this could map to `println!`. In the /// illumos kernel it would map to `cmn_err(9F)`. /// /// Logging levels are provided by [`LogLevel`]. These levels will map diff --git a/lib/oxide-vpc/src/api.rs b/lib/oxide-vpc/src/api.rs index d148df53..7f8464f5 100644 --- a/lib/oxide-vpc/src/api.rs +++ b/lib/oxide-vpc/src/api.rs @@ -347,10 +347,10 @@ impl From for GuestPhysAddr { /// * Drop: Packets matching this entry are dropped. /// /// * InternetGateway: Packets matching this entry are forwarded to -/// the internet. In the case of the Oxide Network the IG is not an -/// actual destination, but rather a configuration that determines how -/// we should NAT the flow. The address in the gateway is the source -/// address that is to be used. +/// the internet. In the case of the Oxide Network the IG is not an +/// actual destination, but rather a configuration that determines how +/// we should NAT the flow. The address in the gateway is the source +/// address that is to be used. /// /// * Ip: Packets matching this entry are forwarded to the specified IP. /// @@ -358,10 +358,10 @@ impl From for GuestPhysAddr { /// matches the destination IP type. /// /// * VpcSubnet: Packets matching this entry are forwarded to the -/// specified VPC Subnet. In the Oxide Network this is just an -/// abstraction, it's simply allowing one subnet to talk to another. -/// There is no separate VPC router process, the real routing is done -/// by the underlay. +/// specified VPC Subnet. In the Oxide Network this is just an +/// abstraction, it's simply allowing one subnet to talk to another. +/// There is no separate VPC router process, the real routing is done +/// by the underlay. #[derive(Clone, Debug, Copy, Deserialize, Serialize)] pub enum RouterTarget { Drop, diff --git a/lib/oxide-vpc/src/engine/mod.rs b/lib/oxide-vpc/src/engine/mod.rs index 696e24dc..6ddf0ebd 100644 --- a/lib/oxide-vpc/src/engine/mod.rs +++ b/lib/oxide-vpc/src/engine/mod.rs @@ -73,8 +73,8 @@ impl VpcNetwork { { let body = pkt .body_segs() - .and_then(|v| v.get(0)) - .ok_or_else(|| HdlPktError("outbound ARP (no body)"))?; + .and_then(|v| v.first()) + .ok_or(HdlPktError("outbound ARP (no body)"))?; let (arp, ..) = ValidArpEthIpv4::parse(*body) .map_err(|_| HdlPktError("outbound ARP (parse)"))?; diff --git a/xde/src/dls/mod.rs b/xde/src/dls/mod.rs index 32bf2482..5cc7aaa0 100644 --- a/xde/src/dls/mod.rs +++ b/xde/src/dls/mod.rs @@ -145,8 +145,7 @@ impl DlsLink { Ok(DlsStream { inner: Some(DlsStreamInner { dld_str }), link: mph.link_id(), - } - .into()) + }) } else { self.release(mph); Err(res) diff --git a/xde/src/xde.rs b/xde/src/xde.rs index 8bd9d52c..787e3e65 100644 --- a/xde/src/xde.rs +++ b/xde/src/xde.rs @@ -895,7 +895,7 @@ fn clear_xde_underlay() -> Result { msg: "underlay not yet initialized".into(), }); } - if unsafe { xde_devs.read().len() } > 0 { + if unsafe { !xde_devs.read().is_empty() } { return Err(OpteError::System { errno: EBUSY, msg: "underlay in use by attached ports".into(), @@ -1208,7 +1208,7 @@ unsafe extern "C" fn xde_detach( _ => return DDI_FAILURE, } - if xde_devs.read().len() > 0 { + if !xde_devs.read().is_empty() { warn!("failed to detach: outstanding ports"); return DDI_FAILURE; } @@ -1398,9 +1398,9 @@ fn guest_loopback_probe( } #[no_mangle] -fn guest_loopback<'a>( +fn guest_loopback( src_dev: &XdeDev, - devs: &'a KRwLockReadGuard>>, + devs: &KRwLockReadGuard>>, mut pkt: MsgBlk, vni: Vni, ) { @@ -1415,7 +1415,7 @@ fn guest_loopback<'a>( Ok(pkt) => pkt, Err(e) => { opte::engine::dbg!("Loopback bad packet: {:?}", e); - bad_packet_parse_probe(None, Direction::In, mblk_addr, &e.into()); + bad_packet_parse_probe(None, Direction::In, mblk_addr, &e); return; } @@ -1553,7 +1553,7 @@ unsafe fn xde_mc_tx_one(src_dev: &XdeDev, mut pkt: MsgBlk) -> *mut mblk_t { Some(src_dev.port.name_cstr()), Direction::Out, mblk_addr, - &e.into(), + &e, ); return ptr::null_mut(); } @@ -1865,7 +1865,7 @@ unsafe fn xde_rx_one( // // We don't know the port yet, thus the None. opte::engine::dbg!("Tx bad packet: {:?}", e); - bad_packet_parse_probe(None, Direction::In, mblk_addr, &e.into()); + bad_packet_parse_probe(None, Direction::In, mblk_addr, &e); return; } From 54219daef78247007e36786ddb6a824b09df1ed4 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Tue, 29 Oct 2024 18:07:23 +0000 Subject: [PATCH 079/115] Clippy appeasement at last. --- lib/opte-test-utils/src/lib.rs | 7 +++++-- lib/opte/src/engine/packet.rs | 9 +++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/lib/opte-test-utils/src/lib.rs b/lib/opte-test-utils/src/lib.rs index 3e1d1e46..243d6843 100644 --- a/lib/opte-test-utils/src/lib.rs +++ b/lib/opte-test-utils/src/lib.rs @@ -19,6 +19,7 @@ pub mod port_state; pub use opte::api::Direction::*; pub use opte::api::MacAddr; pub use opte::ddi::mblk::MsgBlk; +use opte::ddi::mblk::MsgBlkIterMut; pub use opte::engine::ether::EtherMeta; pub use opte::engine::ether::EtherType; pub use opte::engine::ether::Ethernet; @@ -37,6 +38,8 @@ pub use opte::engine::ip::v6::Ipv6; pub use opte::engine::ip::v6::Ipv6Addr; pub use opte::engine::ip::L3Repr; pub use opte::engine::layer::DenyReason; +use opte::engine::packet::LiteInPkt; +use opte::engine::packet::LiteOutPkt; pub use opte::engine::packet::MblkLiteParsed; pub use opte::engine::packet::Packet; pub use opte::engine::packet::ParseError; @@ -111,7 +114,7 @@ macro_rules! expect_modified { pub fn parse_inbound( pkt: &mut MsgBlk, parser: NP, -) -> Result>>, ParseError> { +) -> Result, NP>, ParseError> { let pkt = Packet::new(pkt.iter_mut()); pkt.parse_inbound(parser) } @@ -119,7 +122,7 @@ pub fn parse_inbound( pub fn parse_outbound( pkt: &mut MsgBlk, parser: NP, -) -> Result>>, ParseError> { +) -> Result, NP>, ParseError> { let pkt = Packet::new(pkt.iter_mut()); pkt.parse_outbound(parser) } diff --git a/lib/opte/src/engine/packet.rs b/lib/opte/src/engine/packet.rs index eb7fcb55..3e80c391 100644 --- a/lib/opte/src/engine/packet.rs +++ b/lib/opte/src/engine/packet.rs @@ -796,6 +796,11 @@ impl Packet> { } } +pub type LiteInPkt = + Packet::InMeta<::Chunk>>>; +pub type LiteOutPkt = + Packet::OutMeta<::Chunk>>>; + impl<'a, T: Read + BufferState + 'a> Packet> where T::Chunk: IntoBufPointer<'a> + ByteSliceMut, @@ -814,7 +819,7 @@ where pub fn parse_inbound( self, net: NP, - ) -> Result>>, ParseError> { + ) -> Result, ParseError> { let len = self.len(); let base_ptr = self.mblk_addr(); let Packet { state: Initialized { inner } } = self; @@ -829,7 +834,7 @@ where pub fn parse_outbound( self, net: NP, - ) -> Result>>, ParseError> { + ) -> Result, ParseError> { let len = self.len(); let base_ptr = self.mblk_addr(); let Packet { state: Initialized { inner } } = self; From f7e64e5a9d9b199d159dd5a84acad08a8aa3b411 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 30 Oct 2024 10:20:16 +0000 Subject: [PATCH 080/115] Fixup fuzzers. --- README.adoc | 3 +++ fuzz/fuzz_targets/parse-in.rs | 9 ++++----- fuzz/fuzz_targets/parse-out.rs | 9 ++++----- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/README.adoc b/README.adoc index 32b33698..47865d64 100644 --- a/README.adoc +++ b/README.adoc @@ -30,6 +30,9 @@ While the XDE kernel module runs only on Helios, our test suite and microbenchma | `cargo kbench` | N/A + +| N/A +| `cargo +nightly fuzz run parse-in`, `cargo +nightly fuzz run parse-out` |=== More detail on our benchmarks can be found in xref:bench/README.adoc[bench/README]. diff --git a/fuzz/fuzz_targets/parse-in.rs b/fuzz/fuzz_targets/parse-in.rs index 8c55afcd..eedba5f3 100644 --- a/fuzz/fuzz_targets/parse-in.rs +++ b/fuzz/fuzz_targets/parse-in.rs @@ -1,13 +1,12 @@ #![no_main] use libfuzzer_sys::fuzz_target; +use opte::ddi::mblk::MsgBlk; use opte::engine::packet::Packet; -use oxide_vpc::api::Direction; use oxide_vpc::engine::VpcParser; fuzz_target!(|data: &[u8]| { - let mut pkt = Packet::alloc_and_expand(data.len()); - let mut wtr = pkt.seg0_wtr(); - wtr.write(data).unwrap(); - pkt.parse(Direction::In, VpcParser {}); + let mut pkt_m = MsgBlk::copy(data); + let pkt = Packet::new(pkt_m.iter_mut()); + pkt.parse_inbound(VpcParser {}); }); diff --git a/fuzz/fuzz_targets/parse-out.rs b/fuzz/fuzz_targets/parse-out.rs index fbb43b2e..7a1601c8 100644 --- a/fuzz/fuzz_targets/parse-out.rs +++ b/fuzz/fuzz_targets/parse-out.rs @@ -1,13 +1,12 @@ #![no_main] use libfuzzer_sys::fuzz_target; +use opte::ddi::mblk::MsgBlk; use opte::engine::packet::Packet; -use oxide_vpc::api::Direction; use oxide_vpc::engine::VpcParser; fuzz_target!(|data: &[u8]| { - let mut pkt = Packet::alloc_and_expand(data.len()); - let mut wtr = pkt.seg0_wtr(); - wtr.write(data).unwrap(); - pkt.parse(Direction::Out, VpcParser {}); + let mut pkt_m = MsgBlk::copy(data); + let pkt = Packet::new(pkt_m.iter_mut()); + pkt.parse_outbound(VpcParser {}); }); From 22a0edef9ddd53fdd228403095abfacd57603c12 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 30 Oct 2024 14:11:13 +0000 Subject: [PATCH 081/115] Self-review pt.1 --- lib/opte-test-utils/src/icmp.rs | 65 +++++--------------------- lib/opte/Cargo.toml | 5 +- lib/opte/src/ddi/mblk.rs | 6 +-- lib/opte/src/ddi/time.rs | 5 +- lib/opte/src/engine/arp.rs | 2 - lib/opte/src/engine/dhcp.rs | 1 - lib/opte/src/engine/dhcpv6/protocol.rs | 2 + lib/opte/src/engine/flow_table.rs | 7 +-- lib/opte/src/engine/headers.rs | 10 ++-- lib/opte/src/engine/icmp/v6.rs | 2 - 10 files changed, 33 insertions(+), 72 deletions(-) diff --git a/lib/opte-test-utils/src/icmp.rs b/lib/opte-test-utils/src/icmp.rs index 17039536..bb4d26aa 100644 --- a/lib/opte-test-utils/src/icmp.rs +++ b/lib/opte-test-utils/src/icmp.rs @@ -145,40 +145,20 @@ pub fn gen_icmp_echo( .into(); ip.compute_checksum(); - let total_len = eth.packet_length() + ip.packet_length() + icmp_bytes.len(); let mut segments = vec![]; match n_segments { 1 => { - let mut pkt = MsgBlk::new_ethernet(total_len); - pkt.emit_back(&(eth, ip)).unwrap(); - pkt.write_bytes_back(&icmp_bytes).unwrap(); - - return pkt; + return MsgBlk::new_ethernet_pkt((ð, &ip, &icmp_bytes)); } 2 => { - let mut pkt = MsgBlk::new_ethernet(eth.packet_length()); - pkt.emit_back(eth).unwrap(); - segments.push(pkt); - - let t_len = ip.packet_length() + icmp.buffer_len(); - let mut pkt = MsgBlk::new(t_len); - pkt.emit_back(ip).unwrap(); - pkt.write_bytes_back(&icmp_bytes).unwrap(); - segments.push(pkt); + segments.push(MsgBlk::new_ethernet_pkt(eth)); + segments.push(MsgBlk::new_pkt((&ip, &icmp_bytes))); } 3 => { - let mut pkt = MsgBlk::new_ethernet(eth.packet_length()); - pkt.emit_back(eth).unwrap(); - segments.push(pkt); - - let mut pkt = MsgBlk::new(ip.packet_length()); - pkt.emit_back(ip).unwrap(); - segments.push(pkt); - - let mut pkt = MsgBlk::new(icmp.buffer_len()); - pkt.write_bytes_back(&icmp_bytes).unwrap(); - segments.push(pkt); + segments.push(MsgBlk::new_ethernet_pkt(eth)); + segments.push(MsgBlk::new_pkt(ip)); + segments.push(MsgBlk::new_pkt(&icmp_bytes)); } _ => { panic!("only 1 2 or 3 segments allowed") @@ -270,41 +250,20 @@ pub fn gen_icmpv6_echo( ..Default::default() }; - let total_len = - eth.packet_length() + ip.packet_length() + icmp.buffer_len(); let mut segments = vec![]; match n_segments { 1 => { - let mut pkt = MsgBlk::new_ethernet(total_len); - pkt.emit_back(&(eth, ip)).unwrap(); - pkt.write_bytes_back(&body_bytes).unwrap(); - - return pkt; + return MsgBlk::new_ethernet_pkt((ð, &ip, &body_bytes)); } 2 => { - let mut pkt = MsgBlk::new_ethernet(eth.packet_length()); - pkt.emit_back(eth).unwrap(); - segments.push(pkt); - - let t_len = ip.packet_length() + icmp.buffer_len(); - let mut pkt = MsgBlk::new(t_len); - pkt.emit_back(ip).unwrap(); - pkt.write_bytes_back(&body_bytes).unwrap(); - segments.push(pkt); + segments.push(MsgBlk::new_ethernet_pkt(eth)); + segments.push(MsgBlk::new_pkt((&ip, &body_bytes))); } 3 => { - let mut pkt = MsgBlk::new_ethernet(eth.packet_length()); - pkt.emit_back(eth).unwrap(); - segments.push(pkt); - - let mut pkt = MsgBlk::new(ip.packet_length()); - pkt.emit_back(ip).unwrap(); - segments.push(pkt); - - let mut pkt = MsgBlk::new(icmp.buffer_len()); - pkt.write_bytes_back(&body_bytes).unwrap(); - segments.push(pkt); + segments.push(MsgBlk::new_ethernet_pkt(eth)); + segments.push(MsgBlk::new_pkt(ip)); + segments.push(MsgBlk::new_pkt(&body_bytes)); } _ => { panic!("only 1 2 or 3 segments allowed") diff --git a/lib/opte/Cargo.toml b/lib/opte/Cargo.toml index 88c04cf2..dcc77f47 100644 --- a/lib/opte/Cargo.toml +++ b/lib/opte/Cargo.toml @@ -7,7 +7,7 @@ license.workspace = true repository.workspace = true [features] -default = ["api", "std", "alloc"] +default = ["api", "std"] api = [] engine = ["api", "dep:crc32fast", "dep:derror-macro", "dep:heapless", "dep:itertools", "dep:zerocopy"] kernel = ["illumos-sys-hdrs/kernel"] @@ -21,9 +21,6 @@ std = ["dep:tabwriter", "opte-api/std"] test-help = [] usdt = ["std", "dep:usdt"] -# I have made a mistake in ingot. -alloc = [] - [dependencies] derror-macro = { workspace = true, optional = true } illumos-sys-hdrs.workspace = true diff --git a/lib/opte/src/ddi/mblk.rs b/lib/opte/src/ddi/mblk.rs index 721d4c17..5129f2e1 100644 --- a/lib/opte/src/ddi/mblk.rs +++ b/lib/opte/src/ddi/mblk.rs @@ -637,7 +637,7 @@ impl MsgBlk { /// # Errors /// /// * Return [`WrapError::NullPtr`] is `mp` is `NULL`. - /// * Return [`WrapError::Chain`] is `mp->b_next` or `mp->b_next` are set. + /// * Return [`WrapError::Chain`] is `mp->b_next` or `mp->b_prev` are set. pub unsafe fn wrap_mblk(ptr: *mut mblk_t) -> Result { let inner = NonNull::new(ptr).ok_or(WrapError::NullPtr)?; let inner_ref = inner.as_ref(); @@ -724,7 +724,7 @@ impl<'a> Iterator for MsgBlkIter<'a> { fn next(&mut self) -> Option { if let Some(ptr) = self.curr { self.curr = NonNull::new(unsafe { (*ptr.as_ptr()).b_cont }); - // SAFETY: MsgBlkNode is identical to mblk_t. + // SAFETY: MsgBlkNode has identical layout to mblk_t. unsafe { Some(&*(ptr.as_ptr() as *const MsgBlkNode)) } } else { None @@ -746,7 +746,7 @@ impl<'a> Iterator for MsgBlkIterMut<'a> { fn next(&mut self) -> Option { if let Some(ptr) = self.curr { self.curr = NonNull::new(unsafe { (*ptr.as_ptr()).b_cont }); - // SAFETY: MsgBlkNode is identical to mblk_t. + // SAFETY: MsgBlkNode has identical layout to mblk_t. unsafe { Some(&mut *(ptr.as_ptr() as *mut MsgBlkNode)) } } else { None diff --git a/lib/opte/src/ddi/time.rs b/lib/opte/src/ddi/time.rs index 6efdd2ee..f0067906 100644 --- a/lib/opte/src/ddi/time.rs +++ b/lib/opte/src/ddi/time.rs @@ -36,7 +36,10 @@ pub struct Moment { inner: ddi::hrtime_t, // This is a duration masquerading as an instant -- this - // allows us to and from raw ns counts when needed on std. + // allows us to move to and from raw ns counts when needed on std. + // + // Ultimately, this is a requirement for us to place `Moment`s into + // e.g. `AtomicU64`s for table design. #[cfg(any(feature = "std", test))] inner: Duration, } diff --git a/lib/opte/src/engine/arp.rs b/lib/opte/src/engine/arp.rs index 185b1f43..ab53cc73 100644 --- a/lib/opte/src/engine/arp.rs +++ b/lib/opte/src/engine/arp.rs @@ -101,8 +101,6 @@ pub struct ArpEthIpv4 { #[ingot(default = size_of::() as u8)] pub plen: u8, - // TODO: I think we need to make NetworkRepr fallible when - // reading. #[ingot(is = "u16be")] pub op: ArpOp, diff --git a/lib/opte/src/engine/dhcp.rs b/lib/opte/src/engine/dhcp.rs index 69f22519..4d3904ea 100644 --- a/lib/opte/src/engine/dhcp.rs +++ b/lib/opte/src/engine/dhcp.rs @@ -479,7 +479,6 @@ impl HairpinAction for DhcpAction { } fn gen_packet(&self, meta: &MblkPacketData) -> GenPacketResult { - // TODO: fold reader access into PacketHeaders2 let body = meta.copy_remaining(); let client_pkt = DhcpPacket::new_checked(&body)?; let client_dhcp = DhcpRepr::parse(&client_pkt)?; diff --git a/lib/opte/src/engine/dhcpv6/protocol.rs b/lib/opte/src/engine/dhcpv6/protocol.rs index 9793f34e..68ed575c 100644 --- a/lib/opte/src/engine/dhcpv6/protocol.rs +++ b/lib/opte/src/engine/dhcpv6/protocol.rs @@ -619,6 +619,8 @@ fn generate_packet<'a>( let ip = Ipv6 { source: Ipv6Addr::from_eui64(&action.server_mac), + // Safety: We're only here if the predicates match, one of which is + // IPv6. destination: meta.inner_ip6().unwrap().source(), next_header: IngotIpProto::UDP, payload_len: udp.length, diff --git a/lib/opte/src/engine/flow_table.rs b/lib/opte/src/engine/flow_table.rs index 4e165894..4b762270 100644 --- a/lib/opte/src/engine/flow_table.rs +++ b/lib/opte/src/engine/flow_table.rs @@ -89,7 +89,6 @@ pub struct FlowTable { impl FlowTable where - // S: Clone + fmt::Debug + Dump, S: fmt::Debug + Dump, { /// Add a new entry to the flow table. @@ -311,12 +310,14 @@ impl FlowEntry { self.hits.load(Ordering::Relaxed) } - /// Increments this flow's hit counter and + /// Increments this flow's hit counter and updates its timestamp to + /// the current instant. pub fn hit(&self) { self.hit_at(Moment::now()) } - /// Increments a flow's hit counter and sets th + /// Increments this flow's hit counter and updates its timestamp to + /// a given timestamp. /// /// This is used to minimise calls to `gethrtime` in fastpath /// operations. Callers *MUST* be certain that expiry logic for this flow diff --git a/lib/opte/src/engine/headers.rs b/lib/opte/src/engine/headers.rs index 81a0f3f3..36b6bab2 100644 --- a/lib/opte/src/engine/headers.rs +++ b/lib/opte/src/engine/headers.rs @@ -288,6 +288,8 @@ pub struct SizeHoldingEncap<'a> { pub meta: &'a EncapMeta, } +// SAFETY: All Emit writes are done via ingot-generated methods, +// and we don't read any element of `buf` in `SizeHoldingEncap::emit_raw`. unsafe impl ingot::types::EmitDoesNotRelyOnBufContents for SizeHoldingEncap<'_> { @@ -396,14 +398,16 @@ pub trait HasInnerCksum { const HAS_CKSUM: bool; } -/// Turn HeaderAction on its head a little bit: anyone can allow -/// themselves to take an action on certain params. +/// Transform a header layer using an OPTE action. pub trait Transform: HasInnerCksum where P: PushAction + fmt::Debug, M: fmt::Debug, { - /// Returns whether we will need a checksum recompute on the target field. + /// Modify/push/pop self, dependent on a given action. + /// + /// Returns whether we will need a checksum recompute on the target field + /// if it is still present. fn act_on( &mut self, action: &HeaderAction, diff --git a/lib/opte/src/engine/icmp/v6.rs b/lib/opte/src/engine/icmp/v6.rs index 878e5ba3..b32bcc0c 100644 --- a/lib/opte/src/engine/icmp/v6.rs +++ b/lib/opte/src/engine/icmp/v6.rs @@ -105,7 +105,6 @@ impl HairpinAction for Icmpv6EchoReply { } fn gen_packet(&self, meta: &MblkPacketData) -> GenPacketResult { - // TODO: fold reader access into PacketHeaders2 let Some(icmp6) = meta.inner_icmp6() else { // Getting here implies the predicate matched, but that the // extracted metadata indicates this isn't an ICMPv6 packet. That @@ -233,7 +232,6 @@ impl HairpinAction for RouterAdvertisement { } fn gen_packet(&self, meta: &MblkPacketData) -> GenPacketResult { - // TODO: fold reader access into PacketHeaders2 use smoltcp::time::Duration; use smoltcp::wire::NdiscRouterFlags; From 40c8840c727b7bdaa9ed45246b863b4baef1e614 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 30 Oct 2024 16:19:13 +0000 Subject: [PATCH 082/115] Self review pt.2 -- less copies on gateway ping --- lib/opte/src/engine/icmp/v4.rs | 95 +++++++++++++++++++---------- lib/opte/src/engine/icmp/v6.rs | 106 +++++++++++++++++++-------------- 2 files changed, 123 insertions(+), 78 deletions(-) diff --git a/lib/opte/src/engine/icmp/v4.rs b/lib/opte/src/engine/icmp/v4.rs index 7563ea50..bd830f51 100644 --- a/lib/opte/src/engine/icmp/v4.rs +++ b/lib/opte/src/engine/icmp/v4.rs @@ -8,22 +8,21 @@ use super::*; use crate::ddi::mblk::MsgBlk; +use crate::engine::checksum::HeaderChecksum; use crate::engine::ether::Ethernet; use crate::engine::ip::v4::Ipv4; -use crate::engine::ip::L3; use crate::engine::packet::MblkPacketData; use crate::engine::predicate::Ipv4AddrMatch; use ingot::ethernet::Ethertype; +use ingot::icmp::IcmpV4; use ingot::icmp::IcmpV4Packet; use ingot::icmp::IcmpV4Ref; use ingot::ip::IpProtocol; -use ingot::types::Emit; use ingot::types::HeaderLen; use ingot::types::HeaderParse; +use opte::engine::Checksum as OpteCsum; pub use opte_api::ip::IcmpEchoReply; use smoltcp::wire; -use smoltcp::wire::Icmpv4Packet; -use smoltcp::wire::Icmpv4Repr; impl HairpinAction for IcmpEchoReply { fn implicit_preds(&self) -> (Vec, Vec) { @@ -62,53 +61,70 @@ impl HairpinAction for IcmpEchoReply { ))); }; - // TODO: prealloc right size. - let mut body = icmp.emit_vec(); - meta.append_remaining(&mut body); + let ty = MessageType::from(icmp.ty()); - let src_pkt = Icmpv4Packet::new_checked(&body)?; - let src_icmp = Icmpv4Repr::parse(&src_pkt, &Csum::ignored())?; - - let (src_ident, src_seq_no, src_data) = match src_icmp { - Icmpv4Repr::EchoRequest { ident, seq_no, data } => { - (ident, seq_no, data) + let rest_of_hdr = match (ty, icmp.code()) { + (MessageType { inner: wire::Icmpv4Message::EchoRequest }, 0) => { + icmp.rest_of_hdr() } - - _ => { + (ty, code) => { // We should never hit this case because the predicate // should have verified that we are dealing with an // Echo Request. However, programming error could // cause this to happen -- let's not take any chances. return Err(GenErr::Unexpected(format!( "expected an ICMPv4 Echo Request, got {} {}", - src_pkt.msg_type(), - src_pkt.msg_code() + ty, code, ))); } }; - let reply = Icmpv4Repr::EchoReply { - ident: src_ident, - seq_no: src_seq_no, - data: src_data, + // Checksum update is minimal for a ping reply. + // May need to compute from scratch if offloading / request + // cksum is elided. + let mut csum = match icmp.checksum() { + 0 => { + let mut csum = OpteCsum::new(); + + for el in meta.body_segs().into_iter() { + csum.add_bytes(el); + } + + csum.add_bytes(icmp.rest_of_hdr_ref()); + + csum + } + valid => { + let mut csum = + OpteCsum::from(HeaderChecksum::wrap(valid.to_be_bytes())); + csum.sub_bytes(&[icmp.ty(), icmp.code()]); + csum + } }; - let reply_len = reply.buffer_len(); - let mut tmp = vec![0u8; reply_len]; - let mut icmp_reply = Icmpv4Packet::new_unchecked(&mut tmp); - let mut csum = Csum::ignored(); - csum.icmpv4 = Checksum::Tx; - reply.emit(&mut icmp_reply, &csum); + let ty = wire::Icmpv4Message::EchoReply.into(); + let code = 0; + csum.add_bytes(&[ty, code]); - let mut ip4: L3<&mut [u8]> = Ipv4 { + // Build the reply in place, and send it out. + let body_len: usize = + meta.body_segs().into_iter().map(|v| v.len()).sum(); + + let icmp = IcmpV4 { + ty, + code, + checksum: csum.finalize_for_ingot(), + rest_of_hdr, + }; + + let mut ip4 = Ipv4 { source: self.echo_dst_ip, destination: self.echo_src_ip, protocol: IpProtocol::ICMP, - total_len: (Ipv4::MINIMUM_LENGTH + reply_len) as u16, + total_len: (Ipv4::MINIMUM_LENGTH + icmp.packet_length() + body_len) + as u16, ..Default::default() - } - .into(); - + }; ip4.compute_checksum(); let eth = Ethernet { @@ -117,7 +133,20 @@ impl HairpinAction for IcmpEchoReply { ethertype: Ethertype::IPV4, }; - Ok(AllowOrDeny::Allow(MsgBlk::new_ethernet_pkt((ð, &ip4, &tmp)))) + let total_len = body_len + (ð, &ip4, &icmp).packet_length(); + + let mut pkt_out = MsgBlk::new_ethernet(total_len); + pkt_out + .emit_back((ð, &ip4, &icmp)) + .expect("Allocated space for pkt headers and body"); + + for el in meta.body_segs() { + pkt_out + .write_bytes_back(el) + .expect("allocated enough bytes for all body copy"); + } + + Ok(AllowOrDeny::Allow(pkt_out)) } } diff --git a/lib/opte/src/engine/icmp/v6.rs b/lib/opte/src/engine/icmp/v6.rs index b32bcc0c..40d9e917 100644 --- a/lib/opte/src/engine/icmp/v6.rs +++ b/lib/opte/src/engine/icmp/v6.rs @@ -8,6 +8,7 @@ use super::*; use crate::ddi::mblk::MsgBlk; +use crate::engine::checksum::HeaderChecksum; use crate::engine::ether::Ethernet; use crate::engine::ip::v6::Ipv6; use crate::engine::ip::v6::Ipv6Ref; @@ -15,11 +16,14 @@ use crate::engine::packet::MblkPacketData; use crate::engine::predicate::Ipv6AddrMatch; use alloc::string::String; use ingot::ethernet::Ethertype; +use ingot::icmp::IcmpV6; use ingot::icmp::IcmpV6Packet; use ingot::icmp::IcmpV6Ref; use ingot::ip::IpProtocol as IngotIpProto; use ingot::types::Emit; +use ingot::types::HeaderLen; use ingot::types::HeaderParse; +use opte::engine::Checksum as OpteCsum; pub use opte_api::ip::Icmpv6EchoReply; pub use opte_api::ip::Ipv6Addr; pub use opte_api::ip::Ipv6Cidr; @@ -116,68 +120,70 @@ impl HairpinAction for Icmpv6EchoReply { ))); }; - // Collect the src / dst IP addresses, which are needed to emit the - // resulting ICMPv6 echo reply. - let (src_ip, dst_ip) = if let Some(metadata) = meta.inner_ip6() { - ( - IpAddress::Ipv6(Ipv6Address(metadata.source().bytes())), - IpAddress::Ipv6(Ipv6Address(metadata.destination().bytes())), - ) - } else { - // We got the ICMPv6 metadata above but no IPv6 somehow? - return Err(GenErr::Unexpected(format!( - "Expected IPv6 packet metadata, but found: {:?}", - meta - ))); - }; + let ty = MessageType::from(icmp6.ty()); - // `Icmpv6Packet` requires the ICMPv6 header and not just the message payload. - // Given we successfully got the ICMPv6 metadata, rewinding here is fine. - let mut body = icmp6.emit_vec(); - meta.append_remaining(&mut body); - - let src_pkt = Icmpv6Packet::new_checked(&body)?; - let src_icmp = - Icmpv6Repr::parse(&src_ip, &dst_ip, &src_pkt, &Csum::ignored())?; - - let (src_ident, src_seq_no, src_data) = match src_icmp { - Icmpv6Repr::EchoRequest { ident, seq_no, data } => { - (ident, seq_no, data) + // We'll be recycling the sequence and identity. + let rest_of_hdr = match (ty, icmp6.code()) { + (MessageType { inner: Icmpv6Message::EchoRequest }, 0) => { + icmp6.rest_of_hdr() } - - _ => { + (ty, code) => { // We should never hit this case because the predicate // should have verified that we are dealing with an // Echo Request. However, programming error could // cause this to happen -- let's not take any chances. return Err(GenErr::Unexpected(format!( "expected an ICMPv6 Echo Request, got {} {}", - src_pkt.msg_type(), - src_pkt.msg_code() + ty, code, ))); } }; - let reply = Icmpv6Repr::EchoReply { - ident: src_ident, - seq_no: src_seq_no, - data: src_data, + // Checksum update is minimal for a ping reply. + // May need to compute from scratch if offloading / request + // cksum is elided. + let mut csum = match icmp6.checksum() { + 0 => { + let mut csum = OpteCsum::new(); + + for el in meta.body_segs().into_iter() { + csum.add_bytes(el); + } + + csum.add_bytes(icmp6.rest_of_hdr_ref()); + + csum + } + valid => { + let mut csum = + OpteCsum::from(HeaderChecksum::wrap(valid.to_be_bytes())); + csum.sub_bytes(&[icmp6.ty(), icmp6.code()]); + csum + } }; - // TODO: less Vec + let ty = Icmpv6Message::EchoReply.into(); + let code = 0; + csum.add_bytes(&[ty, code]); - let reply_len = reply.buffer_len(); - let mut ulp_body = vec![0u8; reply_len]; - let mut icmp_reply = Icmpv6Packet::new_unchecked(&mut ulp_body); - let mut csum = Csum::ignored(); - csum.icmpv6 = Checksum::Tx; - reply.emit(&dst_ip, &src_ip, &mut icmp_reply, &csum); + // Build the reply in place, and send it out. + let body_len: usize = + meta.body_segs().into_iter().map(|v| v.len()).sum(); + let icmp = IcmpV6 { + ty, + code, + checksum: csum.finalize_for_ingot(), + rest_of_hdr, + }; + + // Note: an IP address swap does not require addition/removal from + // the internet checksum. let ip6 = Ipv6 { source: self.dst_ip, destination: self.src_ip, next_header: IngotIpProto::ICMP_V6, - payload_len: reply_len as u16, + payload_len: (icmp.packet_length() + body_len) as u16, ..Default::default() }; @@ -187,9 +193,19 @@ impl HairpinAction for Icmpv6EchoReply { ethertype: Ethertype::IPV6, }; - Ok(AllowOrDeny::Allow(MsgBlk::new_ethernet_pkt(( - ð, &ip6, &ulp_body, - )))) + let total_len = body_len + (ð, &ip6, &icmp).packet_length(); + let mut pkt_out = MsgBlk::new_ethernet(total_len); + pkt_out + .emit_back((ð, &ip6, &icmp)) + .expect("Allocated space for pkt headers and body"); + + for el in meta.body_segs() { + pkt_out + .write_bytes_back(el) + .expect("allocated enough bytes for all body copy"); + } + + Ok(AllowOrDeny::Allow(pkt_out)) } } From f0070f9d587110bd5406f71c571e7ea461386798 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 30 Oct 2024 17:12:12 +0000 Subject: [PATCH 083/115] Self review pt.4 --- lib/opte/src/engine/icmp/v4.rs | 1 + lib/opte/src/engine/icmp/v6.rs | 1 + lib/opte/src/engine/ip/mod.rs | 6 +- lib/opte/src/engine/ip/v4.rs | 7 +- lib/opte/src/engine/ip/v6.rs | 7 +- lib/opte/src/engine/parse.rs | 249 ++++++--------------------------- lib/opte/src/engine/rule.rs | 112 +++++++++++++++ 7 files changed, 167 insertions(+), 216 deletions(-) diff --git a/lib/opte/src/engine/icmp/v4.rs b/lib/opte/src/engine/icmp/v4.rs index bd830f51..9a2f180f 100644 --- a/lib/opte/src/engine/icmp/v4.rs +++ b/lib/opte/src/engine/icmp/v4.rs @@ -63,6 +63,7 @@ impl HairpinAction for IcmpEchoReply { let ty = MessageType::from(icmp.ty()); + // We'll be recycling the sequence and identity. let rest_of_hdr = match (ty, icmp.code()) { (MessageType { inner: wire::Icmpv4Message::EchoRequest }, 0) => { icmp.rest_of_hdr() diff --git a/lib/opte/src/engine/icmp/v6.rs b/lib/opte/src/engine/icmp/v6.rs index 40d9e917..e5719d7c 100644 --- a/lib/opte/src/engine/icmp/v6.rs +++ b/lib/opte/src/engine/icmp/v6.rs @@ -364,6 +364,7 @@ impl HairpinAction for RouterAdvertisement { let ip6 = Ipv6 { source: *self.ip(), + // Safety: We match on this being Some(_) above, so unwrap is safe. destination: meta.inner_ip6().unwrap().source(), next_header: IngotIpProto::ICMP_V6, payload_len: reply_len as u16, diff --git a/lib/opte/src/engine/ip/mod.rs b/lib/opte/src/engine/ip/mod.rs index 823c12e3..adc55594 100644 --- a/lib/opte/src/engine/ip/mod.rs +++ b/lib/opte/src/engine/ip/mod.rs @@ -45,6 +45,7 @@ impl L3 { pseudo_hdr_bytes[0..4].copy_from_slice(v4.source().as_ref()); pseudo_hdr_bytes[4..8] .copy_from_slice(v4.destination().as_ref()); + // pseudo_hdr_bytes[8] reserved pseudo_hdr_bytes[9] = v4.protocol().0; let ulp_len = v4.total_len() - 4 * (v4.ihl() as u16); pseudo_hdr_bytes[10..].copy_from_slice(&ulp_len.to_be_bytes()); @@ -56,10 +57,11 @@ impl L3 { pseudo_hdr_bytes[0..16].copy_from_slice(v6.source().as_ref()); pseudo_hdr_bytes[16..32] .copy_from_slice(v6.destination().as_ref()); - pseudo_hdr_bytes[39] = v6.next_layer().unwrap_or_default().0; let ulp_len = v6.payload_len() as u32; pseudo_hdr_bytes[32..36] .copy_from_slice(&ulp_len.to_be_bytes()); + pseudo_hdr_bytes[39] = v6.next_layer().unwrap_or_default().0; + Checksum::compute(&pseudo_hdr_bytes) } } @@ -98,10 +100,10 @@ impl ValidL3 { pseudo_hdr_bytes[0..16].copy_from_slice(v6.source().as_ref()); pseudo_hdr_bytes[16..32] .copy_from_slice(v6.destination().as_ref()); - pseudo_hdr_bytes[39] = v6.next_layer().unwrap_or_default().0; let ulp_len = v6.payload_len() as u32; pseudo_hdr_bytes[32..36] .copy_from_slice(&ulp_len.to_be_bytes()); + pseudo_hdr_bytes[39] = v6.next_layer().unwrap_or_default().0; Checksum::compute(&pseudo_hdr_bytes) } diff --git a/lib/opte/src/engine/ip/v4.rs b/lib/opte/src/engine/ip/v4.rs index 29e6b1b7..f91d1aa0 100644 --- a/lib/opte/src/engine/ip/v4.rs +++ b/lib/opte/src/engine/ip/v4.rs @@ -125,10 +125,9 @@ impl ValidIpv4 { })); } - // Packets can have arbitrary zero-padding at the end so - // our length *could* be larger than the packet reports. - // Unlikely in practice as Encap headers push us past the 64B - // minimum packet size. + // Bail if our total len value is less than the IPv4 header + // itself requires. + // Note: IHL checks are baked into ingot. let expt_internal_len = (self.ihl() as usize) << 2; if (self.total_len() as usize) < expt_internal_len { return Err(ParseError::BadLength(MismatchError { diff --git a/lib/opte/src/engine/ip/v6.rs b/lib/opte/src/engine/ip/v6.rs index daafd6f2..ef69e853 100644 --- a/lib/opte/src/engine/ip/v6.rs +++ b/lib/opte/src/engine/ip/v6.rs @@ -172,9 +172,9 @@ pub fn v6_set_next_header( } }, FieldMut::Raw(Header::Raw(a)) => { - // TODO: this, but more widely in ingot. - // making this generic over all Repeated in - // was... somewhat challenging. + // This would be better done over all `Repeated` in ingot, + // however making mutable access generic in that case proved + // challenging. We can just do it manually for now. let mut buf = a.as_mut(); while !matches!(curr_ipp.class(), ExtHdrClass::NotAnEh) { @@ -185,6 +185,7 @@ pub fn v6_set_next_header( buf = rem; curr_ipp = nh; + // We're at the last EH -- now we can update the next header. if matches!(nh.class(), ExtHdrClass::NotAnEh) { match hdr { ValidLowRentV6Eh::IpV6ExtFragment(mut f) => { diff --git a/lib/opte/src/engine/parse.rs b/lib/opte/src/engine/parse.rs index 81a8b6f6..67623488 100644 --- a/lib/opte/src/engine/parse.rs +++ b/lib/opte/src/engine/parse.rs @@ -26,6 +26,7 @@ use super::icmp::QueryEcho; use super::icmp::ValidIcmpEcho; use super::ip::v4::Ipv4Mut; use super::ip::v4::Ipv4Ref; +use super::ip::v6::v6_set_next_header; use super::ip::v6::Ipv6Mut; use super::ip::v6::Ipv6Packet; use super::ip::v6::Ipv6Ref; @@ -196,6 +197,35 @@ fn exit_on_arp(eth: &ValidEthernet) -> ParseControl { } } +#[inline(always)] +fn flow_id( + l3: Option<&ValidL3>, + ulp: Option<&ValidUlp>, +) -> InnerFlowId { + let (proto, addrs) = match l3 { + Some(ValidL3::Ipv4(pkt)) => ( + pkt.protocol().0, + AddrPair::V4 { src: pkt.source(), dst: pkt.destination() }, + ), + Some(ValidL3::Ipv6(pkt)) => ( + pkt.next_layer().unwrap_or_default().0, + AddrPair::V6 { src: pkt.source(), dst: pkt.destination() }, + ), + None => (255, FLOW_ID_DEFAULT.addrs), + }; + + let (src_port, dst_port) = ulp + .map(|ulp| { + ( + ulp.true_src_port().or_else(|| ulp.pseudo_port()).unwrap_or(0), + ulp.true_dst_port().or_else(|| ulp.pseudo_port()).unwrap_or(0), + ) + }) + .unwrap_or((0, 0)); + + InnerFlowId { proto, addrs, src_port, dst_port } +} + #[derive(Parse)] pub struct NoEncap { #[ingot(control = exit_on_arp)] @@ -214,34 +244,7 @@ impl From> for OpteMeta { impl LightweightMeta for ValidNoEncap { #[inline] fn flow(&self) -> InnerFlowId { - let (proto, addrs) = match &self.inner_l3 { - Some(ValidL3::Ipv4(pkt)) => ( - pkt.protocol().0, - AddrPair::V4 { src: pkt.source(), dst: pkt.destination() }, - ), - Some(ValidL3::Ipv6(pkt)) => ( - pkt.next_layer().unwrap_or_default().0, - AddrPair::V6 { src: pkt.source(), dst: pkt.destination() }, - ), - None => (255, FLOW_ID_DEFAULT.addrs), - }; - - let (src_port, dst_port) = self - .inner_ulp - .as_ref() - .map(|ulp| { - ( - ulp.true_src_port() - .or_else(|| ulp.pseudo_port()) - .unwrap_or(0), - ulp.true_dst_port() - .or_else(|| ulp.pseudo_port()) - .unwrap_or(0), - ) - }) - .unwrap_or((0, 0)); - - InnerFlowId { proto, addrs, src_port, dst_port } + flow_id(self.inner_l3.as_ref(), self.inner_ulp.as_ref()) } #[inline] @@ -249,87 +252,15 @@ impl LightweightMeta for ValidNoEncap { where V: ByteSliceMut, { - // TODO: break out commonalities for this and geneve. - if let Some(ether_tx) = &transform.inner_ether { - if let Some(new_src) = ðer_tx.src { - self.inner_eth.set_source(*new_src); - } - if let Some(new_dst) = ðer_tx.dst { - self.inner_eth.set_destination(*new_dst); - } - } - match (&mut self.inner_l3, &transform.inner_ip) { - (Some(ValidL3::Ipv4(pkt)), Some(IpMod::Ip4(tx))) => { - if let Some(new_src) = &tx.src { - pkt.set_source(*new_src); - } - if let Some(new_dst) = &tx.dst { - pkt.set_destination(*new_dst); - } - if let Some(new_proto) = &tx.proto { - pkt.set_protocol(IpProtocol(u8::from(*new_proto))); - } - } - (Some(ValidL3::Ipv6(pkt)), Some(IpMod::Ip6(tx))) => { - if let Some(new_src) = &tx.src { - pkt.set_source(*new_src); - } - if let Some(new_dst) = &tx.dst { - pkt.set_destination(*new_dst); - } - if let Some(new_proto) = &tx.proto { - // TODO: wrong in the face of EHs... - // For now, we never use this on our dataplane. - pkt.set_next_header(IpProtocol(u8::from(*new_proto))); - } - } - _ => {} + transform.transform_ether(&mut self.inner_eth); + if let Some(l3) = self.inner_l3.as_mut() { + transform.transform_l3(l3); } - - match (&mut self.inner_ulp, &transform.inner_ulp) { - (Some(ValidUlp::Tcp(pkt)), Some(tx)) => { - if let Some(flags) = tx.tcp_flags { - pkt.set_flags(TcpFlags::from_bits_retain(flags)); - } - - if let Some(new_src) = &tx.generic.src_port { - pkt.set_source(*new_src); - } - - if let Some(new_dst) = &tx.generic.dst_port { - pkt.set_destination(*new_dst); - } - } - (Some(ValidUlp::Udp(pkt)), Some(tx)) => { - if let Some(new_src) = &tx.generic.src_port { - pkt.set_source(*new_src); - } - - if let Some(new_dst) = &tx.generic.dst_port { - pkt.set_destination(*new_dst); - } - } - (Some(ValidUlp::IcmpV4(pkt)), Some(tx)) - if pkt.ty() == 0 || pkt.ty() == 8 => - { - if let Some(new_id) = tx.icmp_id { - pkt.rest_of_hdr_mut()[..2] - .copy_from_slice(&new_id.to_be_bytes()) - } - } - (Some(ValidUlp::IcmpV6(pkt)), Some(tx)) - if pkt.ty() == 128 || pkt.ty() == 129 => - { - if let Some(new_id) = tx.icmp_id { - pkt.rest_of_hdr_mut()[..2] - .copy_from_slice(&new_id.to_be_bytes()) - } - } - _ => {} + if let Some(ulp) = self.inner_ulp.as_mut() { + transform.transform_ulp(ulp); } } - // FIXME: identical to Geneve. #[inline] fn compute_body_csum(&self) -> Option { let use_pseudo = if let Some(v) = &self.inner_ulp { @@ -445,30 +376,7 @@ impl From> for OpteMeta { impl LightweightMeta for ValidGeneveOverV6 { #[inline] fn flow(&self) -> InnerFlowId { - let (proto, addrs) = match &self.inner_l3 { - ValidL3::Ipv4(pkt) => ( - pkt.protocol().0, - AddrPair::V4 { src: pkt.source(), dst: pkt.destination() }, - ), - ValidL3::Ipv6(pkt) => ( - pkt.next_layer().unwrap_or_default().0, - AddrPair::V6 { src: pkt.source(), dst: pkt.destination() }, - ), - }; - - let src_port = self - .inner_ulp - .true_src_port() - .or_else(|| self.inner_ulp.pseudo_port()) - .unwrap_or(0); - - let dst_port = self - .inner_ulp - .true_dst_port() - .or_else(|| self.inner_ulp.pseudo_port()) - .unwrap_or(0); - - InnerFlowId { proto, addrs, src_port, dst_port } + flow_id(Some(&self.inner_l3), Some(&self.inner_ulp)) } #[inline] @@ -476,84 +384,9 @@ impl LightweightMeta for ValidGeneveOverV6 { where V: ByteSliceMut, { - // TODO: break out commonalities for this and geneve. - if let Some(ether_tx) = &transform.inner_ether { - if let Some(new_src) = ðer_tx.src { - self.inner_eth.set_source(*new_src); - } - if let Some(new_dst) = ðer_tx.dst { - self.inner_eth.set_destination(*new_dst); - } - } - match (&mut self.inner_l3, &transform.inner_ip) { - (ValidL3::Ipv4(pkt), Some(IpMod::Ip4(tx))) => { - if let Some(new_src) = &tx.src { - pkt.set_source(*new_src); - } - if let Some(new_dst) = &tx.dst { - pkt.set_destination(*new_dst); - } - if let Some(new_proto) = &tx.proto { - pkt.set_protocol(IpProtocol(u8::from(*new_proto))); - } - } - (ValidL3::Ipv6(pkt), Some(IpMod::Ip6(tx))) => { - if let Some(new_src) = &tx.src { - pkt.set_source(*new_src); - } - if let Some(new_dst) = &tx.dst { - pkt.set_destination(*new_dst); - } - if let Some(new_proto) = &tx.proto { - // TODO: wrong in the face of EHs... - // For now, we never use this on our dataplane. - pkt.set_next_header(IpProtocol(u8::from(*new_proto))); - } - } - _ => {} - } - - match (&mut self.inner_ulp, &transform.inner_ulp) { - (ValidUlp::Tcp(pkt), Some(tx)) => { - if let Some(flags) = tx.tcp_flags { - pkt.set_flags(TcpFlags::from_bits_retain(flags)); - } - - if let Some(new_src) = &tx.generic.src_port { - pkt.set_source(*new_src); - } - - if let Some(new_dst) = &tx.generic.dst_port { - pkt.set_destination(*new_dst); - } - } - (ValidUlp::Udp(pkt), Some(tx)) => { - if let Some(new_src) = &tx.generic.src_port { - pkt.set_source(*new_src); - } - - if let Some(new_dst) = &tx.generic.dst_port { - pkt.set_destination(*new_dst); - } - } - (ValidUlp::IcmpV4(pkt), Some(tx)) - if pkt.ty() == 0 || pkt.ty() == 8 => - { - if let Some(new_id) = tx.icmp_id { - pkt.rest_of_hdr_mut()[..2] - .copy_from_slice(&new_id.to_be_bytes()) - } - } - (ValidUlp::IcmpV6(pkt), Some(tx)) - if pkt.ty() == 128 || pkt.ty() == 129 => - { - if let Some(new_id) = tx.icmp_id { - pkt.rest_of_hdr_mut()[..2] - .copy_from_slice(&new_id.to_be_bytes()) - } - } - _ => {} - } + transform.transform_ether(&mut self.inner_eth); + transform.transform_l3(&mut self.inner_l3); + transform.transform_ulp(&mut self.inner_ulp); } #[inline] @@ -602,6 +435,7 @@ impl LightweightMeta for ValidGeneveOverV6 { #[inline] fn validate(&self, pkt_len: usize) -> Result<(), ParseError> { + // Outer layers. let rem_len = pkt_len - (&self.outer_eth, &self.outer_v6).packet_length(); self.outer_v6.validate(rem_len)?; @@ -611,6 +445,7 @@ impl LightweightMeta for ValidGeneveOverV6 { validate_geneve(&self.outer_encap)?; + // Inner layers. let rem_len = rem_len - (&self.outer_encap, &self.outer_eth, &self.inner_l3) .packet_length(); diff --git a/lib/opte/src/engine/rule.rs b/lib/opte/src/engine/rule.rs index 0c7607bf..02396da7 100644 --- a/lib/opte/src/engine/rule.rs +++ b/lib/opte/src/engine/rule.rs @@ -9,6 +9,7 @@ use super::ether::EtherMeta; use super::ether::EtherMod; use super::ether::Ethernet; +use super::ether::EthernetMut; use super::ether::EthernetPacket; use super::ether::ValidEthernet; use super::flow_table::StateSummary; @@ -21,6 +22,10 @@ use super::headers::IpPush; use super::headers::Transform; use super::headers::UlpHeaderAction; use super::headers::UlpMetaModify; +use super::ip::v4::Ipv4Mut; +use super::ip::v6::v6_set_next_header; +use super::ip::v6::Ipv6Mut; +use super::ip::ValidL3; use super::ip::L3; use super::packet::BodyTransform; use super::packet::InnerFlowId; @@ -28,6 +33,7 @@ use super::packet::MblkFullParsed; use super::packet::MblkPacketData; use super::packet::Packet; use super::packet::PacketData; +use super::parse::ValidUlp; use super::port::meta::ActionMeta; use super::predicate::DataPredicate; use super::predicate::Predicate; @@ -44,8 +50,16 @@ use core::fmt::Debug; use core::fmt::Display; use illumos_sys_hdrs::c_char; use illumos_sys_hdrs::uintptr_t; +use ingot::icmp::IcmpV4Mut; +use ingot::icmp::IcmpV4Ref; +use ingot::icmp::IcmpV6Mut; +use ingot::icmp::IcmpV6Ref; +use ingot::ip::IpProtocol; +use ingot::tcp::TcpFlags; +use ingot::tcp::TcpMut; use ingot::types::InlineHeader; use ingot::types::Read; +use ingot::udp::UdpMut; use opte_api::Direction; use serde::Deserialize; use serde::Serialize; @@ -319,6 +333,104 @@ pub struct CompiledTransform { pub checksums_dirty: bool, } +impl CompiledTransform { + #[inline(always)] + pub fn transform_ether( + &self, + ether: &mut ValidEthernet, + ) { + if let Some(ether_tx) = &self.inner_ether { + if let Some(new_src) = ðer_tx.src { + ether.set_source(*new_src); + } + if let Some(new_dst) = ðer_tx.dst { + ether.set_destination(*new_dst); + } + } + } + + #[inline(always)] + pub fn transform_l3(&self, l3: &mut ValidL3) { + match (l3, &self.inner_ip) { + (ValidL3::Ipv4(pkt), Some(IpMod::Ip4(tx))) => { + if let Some(new_src) = &tx.src { + pkt.set_source(*new_src); + } + if let Some(new_dst) = &tx.dst { + pkt.set_destination(*new_dst); + } + if let Some(new_proto) = &tx.proto { + pkt.set_protocol(IpProtocol(u8::from(*new_proto))); + } + } + (ValidL3::Ipv6(pkt), Some(IpMod::Ip6(tx))) => { + if let Some(new_src) = &tx.src { + pkt.set_source(*new_src); + } + if let Some(new_dst) = &tx.dst { + pkt.set_destination(*new_dst); + } + if let Some(new_proto) = &tx.proto { + let ipp = IpProtocol(u8::from(*new_proto)); + + // `expect`ing is too risky, but we know we won't fail + // here for two reasons: + // * We just succeeded at parsing. + // * Compiled transforms cannot perform *structural* + // changes to packets (incl. push/pop/modify EHs). + let _ = v6_set_next_header(ipp, pkt); + } + } + _ => {} + } + } + + #[inline(always)] + pub fn transform_ulp(&self, ulp: &mut ValidUlp) { + match (ulp, &self.inner_ulp) { + (ValidUlp::Tcp(pkt), Some(tx)) => { + if let Some(flags) = tx.tcp_flags { + pkt.set_flags(TcpFlags::from_bits_retain(flags)); + } + + if let Some(new_src) = &tx.generic.src_port { + pkt.set_source(*new_src); + } + + if let Some(new_dst) = &tx.generic.dst_port { + pkt.set_destination(*new_dst); + } + } + (ValidUlp::Udp(pkt), Some(tx)) => { + if let Some(new_src) = &tx.generic.src_port { + pkt.set_source(*new_src); + } + + if let Some(new_dst) = &tx.generic.dst_port { + pkt.set_destination(*new_dst); + } + } + (ValidUlp::IcmpV4(pkt), Some(tx)) + if pkt.ty() == 0 || pkt.ty() == 8 => + { + if let Some(new_id) = tx.icmp_id { + pkt.rest_of_hdr_mut()[..2] + .copy_from_slice(&new_id.to_be_bytes()) + } + } + (ValidUlp::IcmpV6(pkt), Some(tx)) + if pkt.ty() == 128 || pkt.ty() == 129 => + { + if let Some(new_id) = tx.icmp_id { + pkt.rest_of_hdr_mut()[..2] + .copy_from_slice(&new_id.to_be_bytes()) + } + } + _ => {} + } + } +} + #[derive(Clone, Debug, Deserialize, Serialize)] pub enum CompiledEncap { Pop, From 603f9161b24ac4b8a88f892ab863ba0a553861ab Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 30 Oct 2024 17:18:43 +0000 Subject: [PATCH 084/115] SR Pt.5 --- lib/opte/src/engine/icmp/v4.rs | 15 ++++++++++++++ lib/opte/src/engine/icmp/v6.rs | 15 ++++++++++++++ lib/opte/src/engine/parse.rs | 37 ++++------------------------------ 3 files changed, 34 insertions(+), 33 deletions(-) diff --git a/lib/opte/src/engine/icmp/v4.rs b/lib/opte/src/engine/icmp/v4.rs index 9a2f180f..331d4b0d 100644 --- a/lib/opte/src/engine/icmp/v4.rs +++ b/lib/opte/src/engine/icmp/v4.rs @@ -17,6 +17,7 @@ use ingot::ethernet::Ethertype; use ingot::icmp::IcmpV4; use ingot::icmp::IcmpV4Packet; use ingot::icmp::IcmpV4Ref; +use ingot::icmp::ValidIcmpV4; use ingot::ip::IpProtocol; use ingot::types::HeaderLen; use ingot::types::HeaderParse; @@ -219,3 +220,17 @@ impl QueryEcho for IcmpV4Packet { } } } + +impl QueryEcho for ValidIcmpV4 { + #[inline] + fn echo_id(&self) -> Option { + match (self.code(), self.ty()) { + (0, 0) | (0, 8) => { + ValidIcmpEcho::parse(self.rest_of_hdr_ref().as_slice()) + .ok() + .map(|(v, ..)| v.id()) + } + _ => None, + } + } +} diff --git a/lib/opte/src/engine/icmp/v6.rs b/lib/opte/src/engine/icmp/v6.rs index e5719d7c..6182b2e8 100644 --- a/lib/opte/src/engine/icmp/v6.rs +++ b/lib/opte/src/engine/icmp/v6.rs @@ -19,6 +19,7 @@ use ingot::ethernet::Ethertype; use ingot::icmp::IcmpV6; use ingot::icmp::IcmpV6Packet; use ingot::icmp::IcmpV6Ref; +use ingot::icmp::ValidIcmpV6; use ingot::ip::IpProtocol as IngotIpProto; use ingot::types::Emit; use ingot::types::HeaderLen; @@ -664,3 +665,17 @@ impl QueryEcho for IcmpV6Packet { } } } + +impl QueryEcho for ValidIcmpV6 { + #[inline] + fn echo_id(&self) -> Option { + match (self.code(), self.ty()) { + (0, 128) | (0, 129) => { + ValidIcmpEcho::parse(&self.rest_of_hdr_ref()[..]) + .ok() + .map(|(v, ..)| v.id()) + } + _ => None, + } + } +} diff --git a/lib/opte/src/engine/parse.rs b/lib/opte/src/engine/parse.rs index 67623488..39ed81ff 100644 --- a/lib/opte/src/engine/parse.rs +++ b/lib/opte/src/engine/parse.rs @@ -9,7 +9,6 @@ use super::checksum::Checksum; use super::checksum::HeaderChecksum; -use super::ether::EthernetMut; use super::ether::EthernetPacket; use super::ether::EthernetRef; use super::ether::ValidEthernet; @@ -18,16 +17,12 @@ use super::geneve::GENEVE_PORT; use super::headers::HasInnerCksum; use super::headers::HeaderActionError; use super::headers::HeaderActionModify; -use super::headers::IpMod; use super::headers::UlpMetaModify; use super::headers::ValidEncapMeta; use super::icmp::IcmpEchoMut; use super::icmp::QueryEcho; use super::icmp::ValidIcmpEcho; -use super::ip::v4::Ipv4Mut; use super::ip::v4::Ipv4Ref; -use super::ip::v6::v6_set_next_header; -use super::ip::v6::Ipv6Mut; use super::ip::v6::Ipv6Packet; use super::ip::v6::Ipv6Ref; use super::ip::ValidL3; @@ -549,20 +544,8 @@ impl Ulp { #[inline] pub fn pseudo_port(&self) -> Option { match self { - Ulp::IcmpV4(pkt) - if pkt.code() == 0 && (pkt.ty() == 0 || pkt.ty() == 8) => - { - Some(u16::from_be_bytes( - pkt.rest_of_hdr()[..2].try_into().unwrap(), - )) - } - Ulp::IcmpV6(pkt) - if pkt.code() == 0 && (pkt.ty() == 128 || pkt.ty() == 129) => - { - Some(u16::from_be_bytes( - pkt.rest_of_hdr()[..2].try_into().unwrap(), - )) - } + Ulp::IcmpV4(pkt) => pkt.echo_id(), + Ulp::IcmpV6(pkt) => pkt.echo_id(), _ => None, } } @@ -590,20 +573,8 @@ impl ValidUlp { #[inline] pub fn pseudo_port(&self) -> Option { match self { - ValidUlp::IcmpV4(pkt) - if pkt.code() == 0 && (pkt.ty() == 0 || pkt.ty() == 8) => - { - Some(u16::from_be_bytes( - pkt.rest_of_hdr()[..2].try_into().unwrap(), - )) - } - ValidUlp::IcmpV6(pkt) - if pkt.code() == 0 && (pkt.ty() == 128 || pkt.ty() == 129) => - { - Some(u16::from_be_bytes( - pkt.rest_of_hdr()[..2].try_into().unwrap(), - )) - } + ValidUlp::IcmpV4(pkt) => pkt.echo_id(), + ValidUlp::IcmpV6(pkt) => pkt.echo_id(), _ => None, } } From 3dc3a59a37674d4c316a1769affe1235b041d4c6 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 30 Oct 2024 17:29:11 +0000 Subject: [PATCH 085/115] SR Pt.6 --- lib/opte/src/engine/ip/v6.rs | 32 ++++++++++++++++++++++++++++++++ lib/opte/src/engine/predicate.rs | 8 ++++++-- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/lib/opte/src/engine/ip/v6.rs b/lib/opte/src/engine/ip/v6.rs index ef69e853..d2baaf65 100644 --- a/lib/opte/src/engine/ip/v6.rs +++ b/lib/opte/src/engine/ip/v6.rs @@ -17,12 +17,15 @@ use ingot::ip::Ecn; use ingot::ip::ExtHdrClass; use ingot::ip::IpProtocol; use ingot::ip::IpV6Ext6564Mut; +use ingot::ip::IpV6Ext6564Ref; use ingot::ip::IpV6ExtFragmentMut; +use ingot::ip::IpV6ExtFragmentRef; use ingot::ip::LowRentV6EhRepr; use ingot::ip::ValidLowRentV6Eh; use ingot::types::primitives::*; use ingot::types::util::Repeated; use ingot::types::FieldMut; +use ingot::types::FieldRef; use ingot::types::Header; use ingot::types::HeaderLen; use ingot::types::ParseChoice; @@ -138,6 +141,7 @@ pub struct Ipv6Mod { pub proto: Option, } +#[inline] pub fn v6_set_next_header( ipp: IpProtocol, v6: &mut (impl Ipv6Mut + Ipv6Ref), @@ -203,6 +207,34 @@ pub fn v6_set_next_header( Ok(()) } +#[inline] +pub fn v6_get_next_header( + v6: &impl Ipv6Ref, +) -> Result { + let curr_ipp = v6.next_header(); + if matches!(curr_ipp.class(), ExtHdrClass::NotAnEh) { + return Ok(curr_ipp); + } + + Ok(match v6.v6ext_ref() { + FieldRef::Repr(a) => match a.iter().last() { + Some(LowRentV6EhRepr::IpV6ExtFragment(f)) => f.next_header, + Some(LowRentV6EhRepr::IpV6Ext6564(f)) => f.next_header, + None => curr_ipp, + }, + FieldRef::Raw(Header::Repr(a)) => match a.iter().last() { + Some(LowRentV6EhRepr::IpV6ExtFragment(f)) => f.next_header, + Some(LowRentV6EhRepr::IpV6Ext6564(f)) => f.next_header, + None => curr_ipp, + }, + FieldRef::Raw(Header::Raw(a)) => match a.iter(Some(curr_ipp)).last() { + Some(Ok(ValidLowRentV6Eh::IpV6ExtFragment(f))) => f.next_header(), + Some(Ok(ValidLowRentV6Eh::IpV6Ext6564(f))) => f.next_header(), + _ => curr_ipp, + }, + }) +} + #[cfg(test)] pub(crate) mod test { use super::*; diff --git a/lib/opte/src/engine/predicate.rs b/lib/opte/src/engine/predicate.rs index 51440b97..07a73252 100644 --- a/lib/opte/src/engine/predicate.rs +++ b/lib/opte/src/engine/predicate.rs @@ -16,6 +16,7 @@ use super::ip::v4::Ipv4Addr; use super::ip::v4::Ipv4Cidr; use super::ip::v4::Ipv4Ref; use super::ip::v4::Protocol; +use super::ip::v6::v6_get_next_header; use super::ip::v6::Ipv6Addr; use super::ip::v6::Ipv6Cidr; use super::ip::v6::Ipv6Ref; @@ -408,8 +409,11 @@ impl Predicate { } Some(L3::Ipv6(ipv6)) => { - // NOTE: I know this is bugged on EHs. - let proto = Protocol::from(ipv6.next_header().0); + let proto = Protocol::from( + v6_get_next_header(ipv6) + .unwrap_or_else(|_| ipv6.next_header()) + .0, + ); for m in list { if m.matches(proto) { From cda3564e2d9ba9ea45b56c8567b7a6c96e1af459 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 30 Oct 2024 17:40:21 +0000 Subject: [PATCH 086/115] SR pt.7 --- xde/src/xde.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/xde/src/xde.rs b/xde/src/xde.rs index 787e3e65..8c82b1b2 100644 --- a/xde/src/xde.rs +++ b/xde/src/xde.rs @@ -1409,7 +1409,10 @@ fn guest_loopback( let mblk_addr = pkt.mblk_addr(); let parsed_pkt = Packet::new(pkt.iter_mut()); - // TODO: Rework currently requires a reparse on loopback to account for UFT fastpath. + // Loopback now requires a reparse on loopback to account for UFT fastpath. + // When viona serves us larger packets, we needn't worry about allocing + // the encap on. + // We might be able to do better in the interim, but that costs us time. let parsed_pkt = match parsed_pkt.parse_inbound(VpcParser {}) { Ok(pkt) => pkt, @@ -1610,12 +1613,10 @@ unsafe fn xde_mc_tx_one(src_dev: &XdeDev, mut pkt: MsgBlk) -> *mut mblk_t { } }; - // what we WANT to do is pass in the parsed pkt, handle the - // emitspec in the same place, then send elsewhere. + let devs = unsafe { xde_devs.read() }; let l4_hash = emit_spec.l4_hash(); - let out_pkt = emit_spec.apply(pkt); if ip6_src == ip6_dst { @@ -1853,7 +1854,6 @@ unsafe fn xde_rx_one( // We must first parse the packet in order to determine where it // is to be delivered. let parser = VpcParser {}; - // let mblk_addr = parsed_pkt.mblk_addr(); let parsed_pkt = match parsed_pkt.parse_inbound(parser) { Ok(pkt) => pkt, Err(e) => { From 956779d47ed0b49bf30124f36469541e7b5e29dc Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 30 Oct 2024 18:30:49 +0000 Subject: [PATCH 087/115] .. --- xde/src/xde.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/xde/src/xde.rs b/xde/src/xde.rs index 8c82b1b2..ecb66b86 100644 --- a/xde/src/xde.rs +++ b/xde/src/xde.rs @@ -1613,7 +1613,6 @@ unsafe fn xde_mc_tx_one(src_dev: &XdeDev, mut pkt: MsgBlk) -> *mut mblk_t { } }; - let devs = unsafe { xde_devs.read() }; let l4_hash = emit_spec.l4_hash(); From 84d86517b81da776e74dd1bf8d931503a2cf8e7d Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Thu, 31 Oct 2024 11:21:20 +0000 Subject: [PATCH 088/115] Bump ingot. --- Cargo.lock | 6 +++--- Cargo.toml | 2 +- lib/opte/src/engine/ip/v6.rs | 13 ++++++------- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 06ea71e8..b71313ba 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -895,7 +895,7 @@ dependencies = [ [[package]] name = "ingot" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=3b38859ca143eaa1287308359d0f1ddea07826fd#3b38859ca143eaa1287308359d0f1ddea07826fd" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=0b961bc1ff7355ceff1325959746fdc8bf661b87#0b961bc1ff7355ceff1325959746fdc8bf661b87" dependencies = [ "bitflags 2.6.0", "ingot-macros", @@ -908,7 +908,7 @@ dependencies = [ [[package]] name = "ingot-macros" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=3b38859ca143eaa1287308359d0f1ddea07826fd#3b38859ca143eaa1287308359d0f1ddea07826fd" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=0b961bc1ff7355ceff1325959746fdc8bf661b87#0b961bc1ff7355ceff1325959746fdc8bf661b87" dependencies = [ "darling", "itertools 0.13.0", @@ -921,7 +921,7 @@ dependencies = [ [[package]] name = "ingot-types" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=3b38859ca143eaa1287308359d0f1ddea07826fd#3b38859ca143eaa1287308359d0f1ddea07826fd" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=0b961bc1ff7355ceff1325959746fdc8bf661b87#0b961bc1ff7355ceff1325959746fdc8bf661b87" dependencies = [ "ingot-macros", "macaddr", diff --git a/Cargo.toml b/Cargo.toml index d385a481..17f7b313 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,7 +50,7 @@ ctor = "0.2" darling = "0.20" dyn-clone = "1.0" heapless = "0.8" -ingot = { git = "https://github.com/oxidecomputer/ingot.git", rev = "3b38859ca143eaa1287308359d0f1ddea07826fd"} +ingot = { git = "https://github.com/oxidecomputer/ingot.git", rev = "0b961bc1ff7355ceff1325959746fdc8bf661b87"} ipnetwork = { version = "0.20", default-features = false } itertools = { version = "0.13", default-features = false } libc = "0.2" diff --git a/lib/opte/src/engine/ip/v6.rs b/lib/opte/src/engine/ip/v6.rs index d2baaf65..a08beac6 100644 --- a/lib/opte/src/engine/ip/v6.rs +++ b/lib/opte/src/engine/ip/v6.rs @@ -14,7 +14,6 @@ use crate::engine::predicate::MatchExactVal; use crate::engine::predicate::MatchPrefix; use crate::engine::predicate::MatchPrefixVal; use ingot::ip::Ecn; -use ingot::ip::ExtHdrClass; use ingot::ip::IpProtocol; use ingot::ip::IpV6Ext6564Mut; use ingot::ip::IpV6Ext6564Ref; @@ -147,7 +146,7 @@ pub fn v6_set_next_header( v6: &mut (impl Ipv6Mut + Ipv6Ref), ) -> Result<(), HeaderActionError> { let mut curr_ipp = v6.next_header(); - if matches!(curr_ipp.class(), ExtHdrClass::NotAnEh) { + if curr_ipp.class().is_none() { v6.set_next_header(ipp); return Ok(()); } @@ -181,7 +180,7 @@ pub fn v6_set_next_header( // challenging. We can just do it manually for now. let mut buf = a.as_mut(); - while !matches!(curr_ipp.class(), ExtHdrClass::NotAnEh) { + while curr_ipp.class().is_some() { let (hdr, nh, rem) = ValidLowRentV6Eh::parse_choice(buf, Some(curr_ipp)) .map_err(|_| HeaderActionError::MalformedExtension)?; @@ -190,13 +189,13 @@ pub fn v6_set_next_header( curr_ipp = nh; // We're at the last EH -- now we can update the next header. - if matches!(nh.class(), ExtHdrClass::NotAnEh) { + if nh.class().is_none() { match hdr { ValidLowRentV6Eh::IpV6ExtFragment(mut f) => { - f.set_next_header(nh); + f.set_next_header(ipp); } ValidLowRentV6Eh::IpV6Ext6564(mut f) => { - f.set_next_header(nh); + f.set_next_header(ipp); } } } @@ -212,7 +211,7 @@ pub fn v6_get_next_header( v6: &impl Ipv6Ref, ) -> Result { let curr_ipp = v6.next_header(); - if matches!(curr_ipp.class(), ExtHdrClass::NotAnEh) { + if curr_ipp.class().is_none() { return Ok(curr_ipp); } From aac32dc0a621f07ef3fd18f17b815564f5e03759 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Thu, 31 Oct 2024 14:51:16 +0000 Subject: [PATCH 089/115] SR pt.8 --- lib/opte/src/engine/icmp/v4.rs | 5 +- lib/opte/src/engine/icmp/v6.rs | 5 +- lib/opte/src/engine/packet.rs | 2 +- lib/opte/src/engine/port.rs | 183 +++++++++++++++++---------------- 4 files changed, 98 insertions(+), 97 deletions(-) diff --git a/lib/opte/src/engine/icmp/v4.rs b/lib/opte/src/engine/icmp/v4.rs index 331d4b0d..1820c804 100644 --- a/lib/opte/src/engine/icmp/v4.rs +++ b/lib/opte/src/engine/icmp/v4.rs @@ -88,7 +88,7 @@ impl HairpinAction for IcmpEchoReply { 0 => { let mut csum = OpteCsum::new(); - for el in meta.body_segs().into_iter() { + for el in meta.body_segs().iter() { csum.add_bytes(el); } @@ -109,8 +109,7 @@ impl HairpinAction for IcmpEchoReply { csum.add_bytes(&[ty, code]); // Build the reply in place, and send it out. - let body_len: usize = - meta.body_segs().into_iter().map(|v| v.len()).sum(); + let body_len: usize = meta.body_segs().iter().map(|v| v.len()).sum(); let icmp = IcmpV4 { ty, diff --git a/lib/opte/src/engine/icmp/v6.rs b/lib/opte/src/engine/icmp/v6.rs index 6182b2e8..5e6383e7 100644 --- a/lib/opte/src/engine/icmp/v6.rs +++ b/lib/opte/src/engine/icmp/v6.rs @@ -147,7 +147,7 @@ impl HairpinAction for Icmpv6EchoReply { 0 => { let mut csum = OpteCsum::new(); - for el in meta.body_segs().into_iter() { + for el in meta.body_segs().iter() { csum.add_bytes(el); } @@ -168,8 +168,7 @@ impl HairpinAction for Icmpv6EchoReply { csum.add_bytes(&[ty, code]); // Build the reply in place, and send it out. - let body_len: usize = - meta.body_segs().into_iter().map(|v| v.len()).sum(); + let body_len: usize = meta.body_segs().iter().map(|v| v.len()).sum(); let icmp = IcmpV6 { ty, diff --git a/lib/opte/src/engine/packet.rs b/lib/opte/src/engine/packet.rs index 3e80c391..0c1f324b 100644 --- a/lib/opte/src/engine/packet.rs +++ b/lib/opte/src/engine/packet.rs @@ -456,7 +456,7 @@ impl PktBodyWalker { // *will* be exclusive if ByteSliceMut is met (because they are // sourced from an exclusive borrow on something which owns a [u8]). // This allows us to cast to &mut later, but not here! - let mut to_hold = vec![]; + let mut to_hold = Vec::with_capacity(1); if let Some(ref mut chunk) = first { let as_bytes = chunk.deref_mut(); to_hold.push(unsafe { diff --git a/lib/opte/src/engine/port.rs b/lib/opte/src/engine/port.rs index 8e82efe0..f7e24412 100644 --- a/lib/opte/src/engine/port.rs +++ b/lib/opte/src/engine/port.rs @@ -7,7 +7,6 @@ //! A virtual switch port. use self::meta::ActionMeta; -use super::ether::EtherMeta; use super::ether::Ethernet; use super::flow_table::Dump; use super::flow_table::FlowEntry; @@ -86,7 +85,9 @@ use core::str::FromStr; use core::sync::atomic::AtomicU64; use core::sync::atomic::Ordering::SeqCst; use illumos_sys_hdrs::uintptr_t; +use ingot::ethernet::Ethertype; use ingot::geneve::Geneve; +use ingot::ip::IpProtocol; use ingot::tcp::TcpRef; use ingot::types::Emit; use ingot::types::HeaderLen; @@ -521,7 +522,6 @@ pub enum DumpLayerError { } /// An entry in the Unified Flow Table. -// #[derive(Debug)] pub struct UftEntry { /// The flow ID for the other side. pair: KMutex>, @@ -1216,7 +1216,8 @@ impl Port { // TODO: might want to pass in a &mut to an enum // which can advance to (and hold) light->full-fat metadata. // My gutfeel is that there's a perf cost here -- this struct - // is pretty fat, but expressing the transform on a &mut also sucks. + // is pretty large, but expressing the transform on a &mut is also + // less than ideal. mut pkt: Packet, M>>, ) -> result::Result where @@ -1315,17 +1316,16 @@ impl Port { // forces a reprocess, but I believe this is a necessary evil to keep work // out of the portlock today. The correct fix is to AtomicU64 those stats, // which we'll need for later metrics too. - // However, accounting for this below is simple enough. + // However, fixing this up if we get it wrong is simple enough. let mut invalidated_tcp = None; let mut reprocess = false; match &decision { FastPathDecision::CompiledUft(entry) | FastPathDecision::Uft(entry) => { - // XXX: Ideally the Kstat should be holding AtomicU64s, then we get + // TODO: Ideally the Kstat should be holding AtomicU64s, then we get // out of the lock sooner. Note that we don't need to *apply* a given // set of transforms in order to know which stats we'll modify. - // Also, not an elegant hack! let dummy_res = Ok(InternalProcessResult::Modified); match dir { Direction::In => { @@ -1380,18 +1380,27 @@ impl Port { _ => {} } - // If we're in here, we took a faster-path. We know the lock is dropped. - // Reacquire the lock to remove the flow. + // reprocess => invalidated_tcp.is_some(); + debug_assert!(!reprocess || invalidated_tcp.is_some()); + + // We've determined we're actually starting a new TCP flow (e.g., SYN + // on any other state) from an existing UFT entry. + // We know the lock is dropped -- reacquire the lock to remove the flow. + // Elevate lock to full scope, if we are reprocessing as well. if let Some(entry) = invalidated_tcp { - let mut lock = self.data.lock(); + let mut local_lock = self.data.lock(); let flow_lock = entry.state().inner.lock(); let ufid_out = &flow_lock.outbound_ufid; let ufid_in = flow_lock.inbound_ufid.as_ref(); - self.uft_tcp_closed(&mut lock, ufid_out, ufid_in); + self.uft_tcp_closed(&mut local_lock, ufid_out, ufid_in); + + let _ = local_lock.tcp_flows.remove(ufid_out).unwrap(); - let _ = lock.tcp_flows.remove(ufid_out).unwrap(); + if reprocess { + lock = Some(local_lock); + } } if !reprocess { @@ -1437,8 +1446,6 @@ impl Port { ); return res; } - } else { - lock = Some(self.data.lock()); } // (2)/(3) Full-fat metadata is required. @@ -1447,7 +1454,7 @@ impl Port { let res = match (&decision, dir) { // (2) Apply retrieved transform. Lock is dropped. - // Store cached l4 hash. + // Reuse cached l4 hash. (FastPathDecision::Uft(entry), _) if !reprocess => { let l4_hash = entry.state().l4_hash; let tx = Arc::clone(&entry.state().xforms); @@ -1458,11 +1465,13 @@ impl Port { } // (3) Full-table processing for the packet, then drop the lock. - // Cksum updates are the only thing left undone. + // Cksum updates are left undone, so we perform those manually + // outside the port lock. (_, Direction::In) => { let data = lock .as_mut() .expect("lock should be held on this codepath"); + let res = self.process_in_miss( data, epoch, @@ -1470,6 +1479,7 @@ impl Port { &flow_before, &mut ameta, ); + // Prevent double-counting reprocessed modify entries. if !(reprocess && matches!(res, Ok(InternalProcessResult::Modified))) @@ -1477,6 +1487,7 @@ impl Port { Self::update_stats_in(&mut data.stats.vals, &res); } drop(lock); + pkt.update_checksums(); res } @@ -1484,8 +1495,10 @@ impl Port { let data = lock .as_mut() .expect("lock should be held on this codepath"); + let res = self.process_out_miss(data, epoch, &mut pkt, &mut ameta); + // Prevent double-counting reprocessed modify entries. if !(reprocess && matches!(res, Ok(InternalProcessResult::Modified))) @@ -1493,6 +1506,7 @@ impl Port { Self::update_stats_out(&mut data.stats.vals, &res); } drop(lock); + pkt.update_checksums(); res } @@ -1559,8 +1573,8 @@ impl Port { // future we could eliminate this window by passing a // reference to the epoch to `Layer::remove_rule()` // and let it perform the increment. - // XXX(kyle) Above comment misunderstands TOCTOU -- - // THE TABLE IS LOCKED. + // XXX(kyle) This is not a concern while we have the + // port lock in place. self.epoch.fetch_add(1, SeqCst); return Ok(()); } @@ -1655,12 +1669,6 @@ enum TcpMaybeClosed { NewState(TcpState, Arc>), } -pub enum ThinProcRes { - PushEncap(EtherMeta, IpPush, EncapPush), - PopEncap, - Na, -} - // This is a convenience wrapper for keeping the header and body // transformations under one structure, allowing them to be passes as // one argument. @@ -1689,7 +1697,9 @@ impl Transforms { where T::Chunk: ByteSliceMut, { - // TODO: prebake these into one transform? + // TODO: It should be possible to combine header transforms + // into a single operation per layer, particularly when + // they are disjoint like we do in the Compiled case. for ht in &self.hdr { pkt.hdr_transform(ht)?; } @@ -1721,67 +1731,64 @@ impl Transforms { continue; } - // TODO: refactor. - // All outer layers must be pushed (or popped/ignored) at the same // time for compilation. No modifications are permissable. - match transform.outer_ether { - HeaderAction::Push(p) => outer_ether = Some(p), - HeaderAction::Pop => { - outer_ether = None; + fn store_outer_push( + tx: &HeaderAction, + still_permissable: &mut bool, + slot: &mut Option

, + ) { + match tx { + HeaderAction::Push(p) => *slot = Some(*p), + HeaderAction::Pop => *slot = None, + HeaderAction::Modify(_) => *still_permissable = false, + HeaderAction::Ignore => {} } - HeaderAction::Modify(_) => { - still_permissable = false; - } - HeaderAction::Ignore => {} - } - - match transform.outer_ip { - HeaderAction::Push(p) => outer_ip = Some(p), - HeaderAction::Pop => { - outer_ip = None; - } - HeaderAction::Modify(_) => { - still_permissable = false; - } - HeaderAction::Ignore => {} - } - - match transform.outer_encap { - HeaderAction::Push(p) => outer_encap = Some(p), - HeaderAction::Pop => { - outer_encap = None; - } - HeaderAction::Modify(_) => { - still_permissable = false; - } - HeaderAction::Ignore => {} } + store_outer_push( + &transform.outer_ether, + &mut still_permissable, + &mut outer_ether, + ); + store_outer_push( + &transform.outer_ip, + &mut still_permissable, + &mut outer_ip, + ); + store_outer_push( + &transform.outer_encap, + &mut still_permissable, + &mut outer_encap, + ); // Allow up to one action per ULP field, which must be modify. // We can't yet combine sets of `Modify` actions, // but the Oxide dataplane does not use this in practice. - match &transform.inner_ether { - HeaderAction::Push(_) | HeaderAction::Pop => { - still_permissable = false; - continue; - } - HeaderAction::Modify(m) => { - still_permissable &= inner_ether.replace(m).is_none(); - } - HeaderAction::Ignore => {} - } - - match &transform.inner_ip { - HeaderAction::Push(_) | HeaderAction::Pop => { - still_permissable = false; - continue; - } - HeaderAction::Modify(m) => { - still_permissable &= inner_ip.replace(m).is_none(); + fn store_inner_mod<'a, P, M>( + tx: &'a HeaderAction, + still_permissable: &mut bool, + slot: &mut Option<&'a M>, + ) { + match tx { + HeaderAction::Push(_) | HeaderAction::Pop => { + *still_permissable = false; + } + HeaderAction::Modify(m) => { + *still_permissable &= slot.replace(m).is_none(); + } + HeaderAction::Ignore => {} } - HeaderAction::Ignore => {} } + store_inner_mod( + &transform.inner_ether, + &mut still_permissable, + &mut inner_ether, + ); + store_inner_mod( + &transform.inner_ip, + &mut still_permissable, + &mut inner_ip, + ); match &transform.inner_ulp { UlpHeaderAction::Modify(m) => { @@ -1806,32 +1813,26 @@ impl Transforms { }; let eth_repr = Ethernet { - destination: eth.dst.bytes().into(), - source: eth.src.bytes().into(), - ethertype: ingot::ethernet::Ethertype( - eth.ether_type.into(), - ), + destination: eth.dst, + source: eth.src, + ethertype: Ethertype(eth.ether_type.into()), }; let (ip_repr, l3_extra_bytes, ip_len_offset) = match ip { IpPush::Ip4(v4) => ( L3Repr::Ipv4(Ipv4 { - protocol: ingot::ip::IpProtocol( - v4.proto.into(), - ), + protocol: IpProtocol(v4.proto.into()), source: v4.src, destination: v4.dst, - total_len: 20, + total_len: Ipv4::MINIMUM_LENGTH as u16, ..Default::default() }), - 20, + Ipv4::MINIMUM_LENGTH, 2, ), IpPush::Ip6(v6) => ( L3Repr::Ipv6(Ipv6 { - next_header: ingot::ip::IpProtocol( - v6.proto.into(), - ), + next_header: IpProtocol(v6.proto.into()), source: v6.src, destination: v6.dst, payload_len: 0, @@ -1845,6 +1846,9 @@ impl Transforms { let encap_sz = encap_repr.packet_length(); let l3_len_offset = eth_repr.packet_length() + ip_len_offset; + + // UDP has a length field 4B into its header. + // in event of TCP, l4_len_offset is ignored. let l4_len_offset = eth_repr.packet_length() + ip_repr.packet_length() + 4; @@ -2469,7 +2473,6 @@ impl Port { // For outbound traffic the TCP flow table must be checked // _before_ processing take place. - // TODO: uncork let tcp_flow = if pkt.meta().is_inner_tcp() { match self.process_out_tcp_new( data, @@ -2525,7 +2528,7 @@ impl Port { let mut xforms = Transforms::new(); let flow_before = *pkt.flow(); let res = self.layers_process(data, Out, pkt, &mut xforms, ameta); - // XXXX: may be hashing the wrong thing. + let hte = UftEntry { pair: KMutex::new(None, KMutexType::Spin), xforms: xforms.compile(pkt.checksums_dirty()), From bd7641a8de7098cf2143c83927fa4b36011033a7 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Thu, 31 Oct 2024 16:09:42 +0000 Subject: [PATCH 090/115] Self-review pt.9 Will see if I can cleanup PktBodyWalker further. --- lib/opte/src/engine/mod.rs | 4 +- lib/opte/src/engine/packet.rs | 77 ++++++++++++++------------------- lib/opte/src/engine/port.rs | 5 ++- lib/oxide-vpc/src/engine/mod.rs | 9 ++-- 4 files changed, 42 insertions(+), 53 deletions(-) diff --git a/lib/opte/src/engine/mod.rs b/lib/opte/src/engine/mod.rs index c414703a..13b405f2 100644 --- a/lib/opte/src/engine/mod.rs +++ b/lib/opte/src/engine/mod.rs @@ -217,7 +217,7 @@ pub trait NetworkImpl { /// myriad of reasons. The error returned is for informational /// purposes, rather than having any obvious direct action to take /// in response. - fn handle_pkt( + fn handle_pkt<'a, T: Read + 'a>( &self, dir: Direction, pkt: &mut Packet>, @@ -225,7 +225,7 @@ pub trait NetworkImpl { uft_out: &FlowTable>, ) -> Result where - T::Chunk: ByteSliceMut; + T::Chunk: ByteSliceMut + IntoBufPointer<'a>; /// Return the parser for this network implementation. fn parser(&self) -> Self::Parser; diff --git a/lib/opte/src/engine/packet.rs b/lib/opte/src/engine/packet.rs index 0c1f324b..442526dc 100644 --- a/lib/opte/src/engine/packet.rs +++ b/lib/opte/src/engine/packet.rs @@ -57,8 +57,6 @@ use core::ffi::CStr; use core::fmt; use core::fmt::Display; use core::hash::Hash; -use core::ops::Deref; -use core::ops::DerefMut; use core::result; use core::sync::atomic::AtomicPtr; use crc32fast::Hasher; @@ -425,6 +423,7 @@ pub struct OpteMeta { struct PktBodyWalker { base: Cell, T)>>, slice: AtomicPtr>, + // slice: AtomicPtr>, } impl Drop for PktBodyWalker { @@ -439,42 +438,42 @@ impl Drop for PktBodyWalker { } } -impl PktBodyWalker { - fn reify_body_segs(&self) - where - ::Chunk: ByteSliceMut, - { - if let Some((mut first, mut rest)) = self.base.take() { +impl<'a, T: Read + 'a> PktBodyWalker +where + ::Chunk: ByteSliceMut + IntoBufPointer<'a>, +{ + fn reify_body_segs(&self) { + if let Some((first, mut rest)) = self.base.take() { // SAFETY: ByteSlice requires as part of its API // that any implementors are stable, so we will always // get the same view via deref. We are then consuming them // into references which live exactly as long as their initial // form. // + // IntoBufPointer guarantees that what we are working with are, + // in actual fact, slices (or, at least, that any necessary behaviour + // on owned conversion into a slice [Drop, etc.] occurs). + // // The next question is one of ownership. // We know that these chunks are at least &[u8]s, they // *will* be exclusive if ByteSliceMut is met (because they are // sourced from an exclusive borrow on something which owns a [u8]). // This allows us to cast to &mut later, but not here! let mut to_hold = Vec::with_capacity(1); - if let Some(ref mut chunk) = first { - let as_bytes = chunk.deref_mut(); - to_hold.push(unsafe { - core::mem::transmute::<&mut [u8], (*mut u8, usize)>( - as_bytes, - ) - }); + if let Some(chunk) = first { + let len = chunk.len(); + let ptr = unsafe { chunk.into_buf_ptr() }; + to_hold.push((ptr, len)); } - // TODO(drop-safety): we need to give these chunks a longer life, too. while let Ok(chunk) = rest.next_chunk() { - let as_bytes = chunk.deref(); - to_hold.push(unsafe { - core::mem::transmute::<&[u8], (*mut u8, usize)>(as_bytes) - }); + let len = chunk.len(); + let ptr = unsafe { chunk.into_buf_ptr() }; + to_hold.push((ptr, len)); } let to_store = Box::into_raw(Box::new(to_hold.into_boxed_slice())); + // let to_store = Box::into_raw(Box::new(to_hold)); self.slice .compare_exchange( @@ -484,19 +483,10 @@ impl PktBodyWalker { core::sync::atomic::Ordering::Relaxed, ) .expect("unexpected concurrent access to body_seg memoiser"); - - // SAFETY: - // Replace contents to get correct drop behaviour on T. - // Currently the only ByteSlice impls are &[u8] and friends, - // but this may extend to e.g. Vec in future. - self.base.set(Some((first, rest))); } } - fn body_segs(&self) -> &[&[u8]] - where - T::Chunk: ByteSliceMut, - { + fn body_segs(&self) -> &[&[u8]] { let mut slice_ptr = self.slice.load(core::sync::atomic::Ordering::Relaxed); if slice_ptr.is_null() { @@ -511,10 +501,7 @@ impl PktBodyWalker { } } - fn body_segs_mut(&mut self) -> &mut [&mut [u8]] - where - T::Chunk: ByteSliceMut, - { + fn body_segs_mut(&mut self) -> &mut [&mut [u8]] { let mut slice_ptr = self.slice.load(core::sync::atomic::Ordering::Relaxed); if slice_ptr.is_null() { @@ -561,7 +548,7 @@ impl core::fmt::Debug for PacketData { } } -impl PacketData { +impl<'a, T: Read + 'a> PacketData { pub fn initial_lens(&self) -> Option<&InitialLayerLens> { self.initial_lens.as_deref() } @@ -650,14 +637,14 @@ impl PacketData { pub fn body_segs(&self) -> &[&[u8]] where - T::Chunk: ByteSliceMut, + T::Chunk: ByteSliceMut + IntoBufPointer<'a>, { self.body.body_segs() } pub fn copy_remaining(&self) -> Vec where - T::Chunk: ByteSliceMut, + T::Chunk: ByteSliceMut + IntoBufPointer<'a>, { let base = self.body_segs(); let len = base.iter().map(|v| v.len()).sum(); @@ -670,7 +657,7 @@ impl PacketData { pub fn append_remaining(&self, buf: &mut Vec) where - T::Chunk: ByteSliceMut, + T::Chunk: ByteSliceMut + IntoBufPointer<'a>, { let base = self.body_segs(); let len = base.iter().map(|v| v.len()).sum(); @@ -682,7 +669,7 @@ impl PacketData { pub fn body_segs_mut(&mut self) -> &mut [&mut [u8]] where - T::Chunk: ByteSliceMut, + T::Chunk: ByteSliceMut + IntoBufPointer<'a>, { self.body.body_segs_mut() } @@ -919,7 +906,7 @@ where } } -impl Packet> { +impl<'a, T: Read + 'a> Packet> { pub fn meta(&self) -> &PacketData { &self.state.meta } @@ -1173,7 +1160,7 @@ impl Packet> { xform: &dyn BodyTransform, ) -> Result<(), BodyTransformError> where - T::Chunk: ByteSliceMut, + T::Chunk: ByteSliceMut + IntoBufPointer<'a>, { // We set the flag now with the assumption that the transform // could fail after modifying part of the body. In the future @@ -1195,7 +1182,7 @@ impl Packet> { #[inline] pub fn body_segs(&self) -> Option<&[&[u8]]> where - T::Chunk: ByteSliceMut, + T::Chunk: ByteSliceMut + IntoBufPointer<'a>, { let out = self.state.meta.body_segs(); if out.is_empty() { @@ -1208,7 +1195,7 @@ impl Packet> { #[inline] pub fn body_segs_mut(&mut self) -> Option<&mut [&mut [u8]]> where - T::Chunk: ByteSliceMut, + T::Chunk: ByteSliceMut + IntoBufPointer<'a>, { let out = self.state.meta.body_segs_mut(); if out.is_empty() { @@ -1230,7 +1217,7 @@ impl Packet> { /// body_csum cannot be valid. pub fn compute_checksums(&mut self) where - T::Chunk: ByteSliceMut, + T::Chunk: ByteSliceMut + IntoBufPointer<'a>, { let mut body_csum = Checksum::new(); for seg in self.body_segs_mut().unwrap_or_default() { @@ -1333,7 +1320,7 @@ impl Packet> { /// case where checksums are **not** being offloaded to the hardware. pub fn update_checksums(&mut self) where - T::Chunk: ByteSliceMut, + T::Chunk: ByteSliceMut + IntoBufPointer<'a>, { // If we know that no transform touched a field which features in // an inner transport cksum (L4/L3 src/dst, most realistically), diff --git a/lib/opte/src/engine/port.rs b/lib/opte/src/engine/port.rs index f7e24412..37f97d45 100644 --- a/lib/opte/src/engine/port.rs +++ b/lib/opte/src/engine/port.rs @@ -91,6 +91,7 @@ use ingot::ip::IpProtocol; use ingot::tcp::TcpRef; use ingot::types::Emit; use ingot::types::HeaderLen; +use ingot::types::IntoBufPointer; use ingot::types::Read; use ingot::udp::Udp; use kstat_macro::KStatProvider; @@ -1689,13 +1690,13 @@ impl Transforms { } #[inline] - fn apply( + fn apply<'a, T: Read + 'a>( &self, pkt: &mut Packet>, dir: Direction, ) -> result::Result<(), ProcessError> where - T::Chunk: ByteSliceMut, + T::Chunk: ByteSliceMut + IntoBufPointer<'a>, { // TODO: It should be possible to combine header transforms // into a single operation per layer, particularly when diff --git a/lib/oxide-vpc/src/engine/mod.rs b/lib/oxide-vpc/src/engine/mod.rs index 6ddf0ebd..d4ec2384 100644 --- a/lib/oxide-vpc/src/engine/mod.rs +++ b/lib/oxide-vpc/src/engine/mod.rs @@ -35,6 +35,7 @@ use opte::engine::NetworkImpl; use opte::engine::NetworkParser; use opte::ingot::ethernet::Ethertype; use opte::ingot::types::HeaderParse; +use opte::ingot::types::IntoBufPointer; use opte::ingot::types::Parsed as IngotParsed; use opte::ingot::types::Read; use zerocopy::ByteSliceMut; @@ -64,12 +65,12 @@ fn is_arp_req_for_tpa(tpa: Ipv4Addr, arp: &impl ArpEthIpv4Ref) -> bool { } impl VpcNetwork { - fn handle_arp_out( + fn handle_arp_out<'a, T: Read + 'a>( &self, pkt: &mut Packet>, ) -> Result where - T::Chunk: ByteSliceMut, + T::Chunk: ByteSliceMut + IntoBufPointer<'a>, { let body = pkt .body_segs() @@ -99,7 +100,7 @@ impl VpcNetwork { impl NetworkImpl for VpcNetwork { type Parser = VpcParser; - fn handle_pkt( + fn handle_pkt<'a, T: Read + 'a>( &self, dir: Direction, pkt: &mut Packet>, @@ -107,7 +108,7 @@ impl NetworkImpl for VpcNetwork { _uft_out: &FlowTable>, ) -> Result where - T::Chunk: ByteSliceMut, + T::Chunk: ByteSliceMut + IntoBufPointer<'a>, { match (dir, pkt.meta().inner_ether().ethertype()) { (Direction::Out, Ethertype::ARP) => self.handle_arp_out(pkt), From c3ebd68665445844897ee703e3b10d471575facf Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Thu, 31 Oct 2024 17:34:23 +0000 Subject: [PATCH 091/115] SR Pt.9 --- crates/derror-macro/src/lib.rs | 2 + lib/opte/src/engine/packet.rs | 94 +++++++++++++++++++--------------- 2 files changed, 56 insertions(+), 40 deletions(-) diff --git a/crates/derror-macro/src/lib.rs b/crates/derror-macro/src/lib.rs index e60bf2af..d2f3e21e 100644 --- a/crates/derror-macro/src/lib.rs +++ b/crates/derror-macro/src/lib.rs @@ -143,6 +143,7 @@ pub fn derive_derror( quote! { impl DError for #ident { #[allow(non_upper_case_globals)] + #[inline] fn discriminant(&self) -> &'static ::core::ffi::CStr { use ::core::ffi::CStr; #( #cstr_decls )* @@ -151,6 +152,7 @@ pub fn derive_derror( } } + #[inline] fn child(&self) -> Option<&dyn DError> { match self { #( #child_arms )* diff --git a/lib/opte/src/engine/packet.rs b/lib/opte/src/engine/packet.rs index 442526dc..2749bd35 100644 --- a/lib/opte/src/engine/packet.rs +++ b/lib/opte/src/engine/packet.rs @@ -318,20 +318,24 @@ impl ParseError { } impl DError for PacketParseError { + #[inline] fn discriminant(&self) -> &'static core::ffi::CStr { self.header().as_cstr() } + #[inline] fn child(&self) -> Option<&dyn DError> { Some(self.error()) } } impl DError for ingot::types::ParseError { + #[inline] fn discriminant(&self) -> &'static core::ffi::CStr { self.as_cstr() } + #[inline] fn child(&self) -> Option<&dyn DError> { None } @@ -379,6 +383,12 @@ pub enum WriteError { pub type WriteResult = result::Result; +/// The initial parsed length of every header in a packet. +/// +/// Used to track structural changes to any packet headers +/// which would require full serialisation of a header and +/// its prior layers. +#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] pub struct InitialLayerLens { pub outer_eth: usize, pub outer_l3: usize, @@ -493,7 +503,7 @@ where self.reify_body_segs(); slice_ptr = self.slice.load(core::sync::atomic::Ordering::Relaxed); } - assert!(!slice_ptr.is_null()); + debug_assert!(!slice_ptr.is_null()); unsafe { let a = (&*(*slice_ptr)) as *const _; @@ -508,7 +518,7 @@ where self.reify_body_segs(); slice_ptr = self.slice.load(core::sync::atomic::Ordering::Relaxed); } - assert!(!slice_ptr.is_null()); + debug_assert!(!slice_ptr.is_null()); // SAFETY: We have an exclusive reference, and the ByteSliceMut // bound guarantees that this packet view was construced from @@ -745,6 +755,8 @@ impl From<&PacketData> for InnerFlowId { /// /// In illumos there is no real notion of an mblk "packet" or /// "segment": a packet is just a linked list of `mblk_t` values. +/// This type indicates that an `mblk_t` chain is to be treated as +/// a network packet, as far as its bytes are concerned. /// The "packet" is simply a pointer to the first `mblk_t` in the /// list, which also happens to be the first "segment", and any /// further segments are linked via `b_cont`. In the illumos @@ -757,12 +769,10 @@ impl From<&PacketData> for InnerFlowId { /// `b_next` field. In the illumos kernel code this this is often /// referred to with the variable name `mp_chain`, but sometimes also /// `mp_head` (or just `mp`). It's a bit ambiguous, and something you -/// kind of figure out as you work in the code more. Though part of me -/// would like to create some rust-like "new type pattern" in C to -/// disambiguate packets from packet chains across APIs so the -/// compiler can detect when your API is working against the wrong -/// contract (for example a function that expects a single packet but -/// is being fed a packet chain). +/// kind of figure out as you work in the code more. In OPTE, we +/// disambiguate using the `MsgBlk` and `MsgBlkChain` types. The former +/// enforces that `b_next` and `b_prev` are disconnected. + // // TODO: In theory, this can be any `Read` type giving us `&mut [u8]`s, // but in practice we are internally reliant on returning `MsgBlk`s in @@ -843,7 +853,7 @@ where let IngotParsed { stack: headers, data, last_chunk } = meta; // TODO: we can probably not do this in some cases, but we - // don't have a way for headeractions to signal that they + // don't have a way for `HeaderAction`s to signal that they // *may* change the fields we need in the slowpath. let body_csum = headers.compute_body_csum(); let flow = headers.flow(); @@ -922,20 +932,20 @@ impl<'a, T: Read + 'a> Packet> { #[inline] /// Convert a packet's metadata into a set of instructions /// needed to serialize all its changes to the wire. - pub fn emit_spec(mut self) -> Result + pub fn emit_spec(&mut self) -> Result where T::Chunk: ByteSliceMut, { // Roughly how this works: // - Identify rightmost structural-changed field. // - fill out owned versions into the push_spec of all - // extant fields we rewound past. + // present fields we rewound past. // - Rewind up to+including that point in original // pkt space. let l4_hash = self.l4_hash(); - let state = self.state; - let init_lens = state.meta.initial_lens.unwrap(); - let headers = state.meta.headers; + let state = &self.state; + let init_lens = state.meta.initial_lens.as_ref().unwrap(); + let headers = &state.meta.headers; let payload_len = state.len - init_lens.hdr_len(); let mut encapped_len = payload_len; @@ -950,7 +960,7 @@ impl<'a, T: Read + 'a> Packet> { // do this sort of thing. We are so, so far from that... let mut force_serialize = false; - match headers.inner_ulp { + match &headers.inner_ulp { Some(ulp) => { let l = ulp.packet_length(); encapped_len += l; @@ -960,17 +970,21 @@ impl<'a, T: Read + 'a> Packet> { push_spec.inner.get_or_insert_with(Default::default); inner.ulp = Some(match ulp { - Ulp::Tcp(Header::Repr(t)) => UlpRepr::Tcp(*t), - Ulp::Tcp(Header::Raw(t)) => UlpRepr::Tcp((&t).into()), - Ulp::Udp(Header::Repr(t)) => UlpRepr::Udp(*t), - Ulp::Udp(Header::Raw(t)) => UlpRepr::Udp((&t).into()), - Ulp::IcmpV4(Header::Repr(t)) => UlpRepr::IcmpV4(*t), + Ulp::Tcp(Header::Repr(t)) => UlpRepr::Tcp(*t.clone()), + Ulp::Tcp(Header::Raw(t)) => UlpRepr::Tcp(t.into()), + Ulp::Udp(Header::Repr(t)) => UlpRepr::Udp(*t.clone()), + Ulp::Udp(Header::Raw(t)) => UlpRepr::Udp(t.into()), + Ulp::IcmpV4(Header::Repr(t)) => { + UlpRepr::IcmpV4(*t.clone()) + } Ulp::IcmpV4(Header::Raw(t)) => { - UlpRepr::IcmpV4((&t).into()) + UlpRepr::IcmpV4(t.into()) + } + Ulp::IcmpV6(Header::Repr(t)) => { + UlpRepr::IcmpV6(*t.clone()) } - Ulp::IcmpV6(Header::Repr(t)) => UlpRepr::IcmpV6(*t), Ulp::IcmpV6(Header::Raw(t)) => { - UlpRepr::IcmpV6((&t).into()) + UlpRepr::IcmpV6(t.into()) } }); force_serialize = true; @@ -984,7 +998,7 @@ impl<'a, T: Read + 'a> Packet> { _ => {} } - match headers.inner_l3 { + match &headers.inner_l3 { Some(l3) => { let l = l3.packet_length(); encapped_len += l; @@ -995,12 +1009,12 @@ impl<'a, T: Read + 'a> Packet> { push_spec.inner.get_or_insert_with(Default::default); inner.l3 = Some(match l3 { - L3::Ipv4(Header::Repr(v4)) => L3Repr::Ipv4(*v4), - L3::Ipv4(Header::Raw(v4)) => L3Repr::Ipv4((&v4).into()), - L3::Ipv6(Header::Repr(v6)) => L3Repr::Ipv6(*v6), + L3::Ipv4(Header::Repr(v4)) => L3Repr::Ipv4(*v4.clone()), + L3::Ipv4(Header::Raw(v4)) => L3Repr::Ipv4(v4.into()), + L3::Ipv6(Header::Repr(v6)) => L3Repr::Ipv6(*v6.clone()), // We can't actually do structural mods here today using OPTE, - // but account for the possibiliry at least. + // but account for the possibility at least. L3::Ipv6(Header::Raw(v6)) => { L3Repr::Ipv6(v6.to_owned(None)?) } @@ -1020,21 +1034,21 @@ impl<'a, T: Read + 'a> Packet> { encapped_len += headers.inner_eth.packet_length(); if force_serialize { let inner = push_spec.inner.get_or_insert_with(Default::default); - inner.eth = match headers.inner_eth { - Header::Repr(p) => *p, - Header::Raw(p) => (&p).into(), + inner.eth = match &headers.inner_eth { + Header::Repr(p) => **p, + Header::Raw(p) => p.into(), }; rewind += init_lens.inner_eth; } - match headers.outer_encap { + match &headers.outer_encap { Some(encap) if force_serialize || encap.needs_emit() || encap.packet_length() != init_lens.outer_encap => { push_spec.outer_encap = Some(match encap { - InlineHeader::Repr(o) => o, + InlineHeader::Repr(o) => *o, InlineHeader::Raw(ValidEncapMeta::Geneve(u, g)) => { EncapMeta::Geneve(GeneveMeta { entropy: u.source(), @@ -1056,7 +1070,7 @@ impl<'a, T: Read + 'a> Packet> { _ => {} } - match headers.outer_l3 { + match &headers.outer_l3 { Some(l3) if force_serialize || l3.needs_emit() @@ -1065,12 +1079,12 @@ impl<'a, T: Read + 'a> Packet> { let encap_len = push_spec.outer_encap.packet_length(); push_spec.outer_ip = Some(match l3 { - L3::Ipv6(BoxedHeader::Repr(o)) => L3Repr::Ipv6(*o), - L3::Ipv4(BoxedHeader::Repr(o)) => L3Repr::Ipv4(*o), + L3::Ipv6(BoxedHeader::Repr(o)) => L3Repr::Ipv6(*o.clone()), + L3::Ipv4(BoxedHeader::Repr(o)) => L3Repr::Ipv4(*o.clone()), L3::Ipv6(BoxedHeader::Raw(o)) => { L3Repr::Ipv6(o.to_owned(None)?) } - L3::Ipv4(BoxedHeader::Raw(o)) => L3Repr::Ipv4((&o).into()), + L3::Ipv4(BoxedHeader::Raw(o)) => L3Repr::Ipv4(o.into()), }); let inner_sz = (encapped_len + encap_len) as u16; @@ -1095,15 +1109,15 @@ impl<'a, T: Read + 'a> Packet> { _ => {} } - match headers.outer_eth { + match &headers.outer_eth { Some(eth) if force_serialize || eth.needs_emit() || eth.packet_length() != init_lens.outer_eth => { push_spec.outer_eth = Some(match eth { - InlineHeader::Repr(o) => o, - InlineHeader::Raw(r) => (&r).into(), + InlineHeader::Repr(o) => *o, + InlineHeader::Raw(r) => r.into(), }); rewind += init_lens.outer_eth; From 532899465bb88c41361b4008372eb2d1ddeda339 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Thu, 31 Oct 2024 18:06:00 +0000 Subject: [PATCH 092/115] Indicate fast/slowpath in port-process-return Pretty helpful for showing off operation. --- dtrace/common.h | 8 ++++++++ dtrace/opte-port-process.d | 13 +++++++------ lib/opte/src/engine/port.rs | 16 +++++++++++----- 3 files changed, 26 insertions(+), 11 deletions(-) diff --git a/dtrace/common.h b/dtrace/common.h index bb642d03..710b6301 100644 --- a/dtrace/common.h +++ b/dtrace/common.h @@ -56,5 +56,13 @@ */ #define DIR_STR(dir) ((dir) == 1 ? "IN" : "OUT") +/* + * Packet processing path. + * 1 = UFT Compiled/Fast + * 2 = UFT Hit/Medium + * 3 = UFT Miss/Slow + */ +#define PATH_STR(path) ((path) == 1 ? "FAST" : ((path) == 2 ? "MED" : "SLOW")) + #define EL_DELIMIT "->" #define EL_FMT "->%s" diff --git a/dtrace/opte-port-process.d b/dtrace/opte-port-process.d index 93b7cbc7..fa0e6546 100644 --- a/dtrace/opte-port-process.d +++ b/dtrace/opte-port-process.d @@ -6,12 +6,12 @@ #include "common.h" #include "protos.d" -#define HDR_FMT "%-12s %-3s %-8s %-43s %-43s %-5s %s\n" -#define LINE_FMT "%-12s %-3s %-8u %-43s %-43s %-5u %s\n" +#define HDR_FMT "%-12s %-3s %-8s %-43s %-43s %-5s %s %s\n" +#define LINE_FMT "%-12s %-3s %-8u %-43s %-43s %-5u %s %s\n" BEGIN { printf(HDR_FMT, "NAME", "DIR", "EPOCH", "FLOW BEFORE", "FLOW AFTER", - "LEN", "RESULT"); + "LEN", "RESULT", "PATH"); num = 0; } @@ -27,10 +27,11 @@ port-process-return { this->msgs = (derror_sdt_arg_t*) arg7; this->msg_len = this->msgs->len; this->res = stringof(""); + this->path = PATH_STR(arg8); if (num >= 10) { printf(HDR_FMT, "NAME", "DIR", "EPOCH", "FLOW BEFORE", - "FLOW AFTER", "LEN", "RESULT"); + "FLOW AFTER", "LEN", "RESULT", "PATH"); num = 0; } @@ -58,7 +59,7 @@ port-process-return /this->af == AF_INET/ { FLOW_FMT(this->s_before, this->flow_before); FLOW_FMT(this->s_after, this->flow_after); printf(LINE_FMT, this->name, this->dir, this->epoch, this->s_before, - this->s_after, msgsize(this->mp), this->res); + this->s_after, msgsize(this->mp), this->res, this->path); num++; } @@ -66,7 +67,7 @@ port-process-return /this->af == AF_INET6/ { FLOW_FMT6(this->s_before, this->flow_before); FLOW_FMT6(this->s_after, this->flow_after); printf(LINE_FMT, this->name, this->dir, this->epoch, this->s_before, - this->s_after, msgsize(this->mp), this->res); + this->s_after, msgsize(this->mp), this->res, this->path); num++; } diff --git a/lib/opte/src/engine/port.rs b/lib/opte/src/engine/port.rs index 37f97d45..94ae9c9a 100644 --- a/lib/opte/src/engine/port.rs +++ b/lib/opte/src/engine/port.rs @@ -1444,6 +1444,7 @@ impl Port { epoch, mblk_addr, &res, + 1, ); return res; } @@ -1453,7 +1454,7 @@ impl Port { let mut pkt = pkt.to_full_meta(); let mut ameta = ActionMeta::new(); - let res = match (&decision, dir) { + let (res, path) = match (&decision, dir) { // (2) Apply retrieved transform. Lock is dropped. // Reuse cached l4 hash. (FastPathDecision::Uft(entry), _) if !reprocess => { @@ -1462,7 +1463,7 @@ impl Port { pkt.set_l4_hash(l4_hash); tx.apply(&mut pkt, dir)?; - Ok(InternalProcessResult::Modified) + (Ok(InternalProcessResult::Modified), 2) } // (3) Full-table processing for the packet, then drop the lock. @@ -1490,7 +1491,7 @@ impl Port { drop(lock); pkt.update_checksums(); - res + (res, 3) } (_, Direction::Out) => { let data = lock @@ -1509,7 +1510,7 @@ impl Port { drop(lock); pkt.update_checksums(); - res + (res, 3) } }; @@ -1532,6 +1533,7 @@ impl Port { epoch, mblk_addr, &res, + path, ); res } @@ -2000,6 +2002,7 @@ impl Port { epoch: u64, mblk_addr: uintptr_t, res: &result::Result, + path: u64, ) { cfg_if! { if #[cfg(all(not(feature = "std"), not(test)))] { @@ -2048,6 +2051,7 @@ impl Port { mblk_addr, hp_pkt_ptr, eb.as_ptr(), + path as uintptr_t, ); } } else if #[cfg(feature = "usdt")] { @@ -2057,6 +2061,7 @@ impl Port { Ok(v) => format!("{:?}", v), Err(e) => format!("ERROR: {:?}", e), }; + let _ = path; crate::opte_provider::port__process__return!( || ( @@ -2068,7 +2073,7 @@ impl Port { ) ); } else { - let (..) = (dir, flow_before, flow_after, epoch, mblk_addr, res); + let (..) = (dir, flow_before, flow_after, epoch, mblk_addr, res, path); } } } @@ -3001,6 +3006,7 @@ extern "C" { pkt: uintptr_t, hp_pkt: uintptr_t, err_b: *const LabelBlock<2>, + path: uintptr_t, ); pub fn __dtrace_probe_tcp__err( dir: uintptr_t, From 9fa48560bd5833da9df3e29273d14fbe7d0ef92f Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Thu, 31 Oct 2024 18:39:17 +0000 Subject: [PATCH 093/115] Inline to prevent port-process-return from dinging us. --- lib/opte/src/engine/port.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/opte/src/engine/port.rs b/lib/opte/src/engine/port.rs index 94ae9c9a..34bde01e 100644 --- a/lib/opte/src/engine/port.rs +++ b/lib/opte/src/engine/port.rs @@ -1211,6 +1211,7 @@ impl Port { /// # States /// /// This command is valid only for [`PortState::Running`]. + #[inline(always)] pub fn process<'a, M>( &self, dir: Direction, @@ -1993,7 +1994,7 @@ impl Port { } } - #[inline] + #[inline(always)] fn port_process_return_probe( &self, dir: Direction, From d682ea0c3e5891d807fabc4b8594deca6b90f426 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Fri, 1 Nov 2024 12:52:30 +0000 Subject: [PATCH 094/115] SR Pt.9 --- lib/opte/src/engine/packet.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/lib/opte/src/engine/packet.rs b/lib/opte/src/engine/packet.rs index 2749bd35..006b3a8b 100644 --- a/lib/opte/src/engine/packet.rs +++ b/lib/opte/src/engine/packet.rs @@ -432,8 +432,7 @@ pub struct OpteMeta { /// do not want to interact with body segments at all. struct PktBodyWalker { base: Cell, T)>>, - slice: AtomicPtr>, - // slice: AtomicPtr>, + slice: AtomicPtr>, } impl Drop for PktBodyWalker { @@ -482,8 +481,7 @@ where to_hold.push((ptr, len)); } - let to_store = Box::into_raw(Box::new(to_hold.into_boxed_slice())); - // let to_store = Box::into_raw(Box::new(to_hold)); + let to_store = Box::into_raw(Box::new(to_hold)); self.slice .compare_exchange( @@ -493,6 +491,12 @@ where core::sync::atomic::Ordering::Relaxed, ) .expect("unexpected concurrent access to body_seg memoiser"); + + // While today the only T we're operating on are IterMuts bound + // to the lifetime of an actual packet (via &mut), there's a chance + // in future that dropping the iterator could invalidate the byte + // slices we're holding onto. Hang onto `rest` to prevent this. + self.base.set(Some((None, rest))); } } From cdf1d598d6f0ae13b2c7bba79478367dc0dcdada Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Fri, 1 Nov 2024 14:19:25 +0000 Subject: [PATCH 095/115] Remove vestigial Packet. --- bench/benches/userland.rs | 24 +++++-- lib/opte-test-utils/src/lib.rs | 20 +++--- lib/opte/src/ddi/mblk.rs | 4 +- lib/opte/src/engine/dhcpv6/protocol.rs | 3 +- lib/opte/src/engine/layer.rs | 6 +- lib/opte/src/engine/nat.rs | 6 +- lib/opte/src/engine/packet.rs | 83 ++++++------------------ lib/opte/src/engine/port.rs | 1 + lib/opte/src/engine/rule.rs | 3 +- lib/opte/src/engine/snat.rs | 6 +- lib/oxide-vpc/tests/fuzz_regression.rs | 6 +- lib/oxide-vpc/tests/integration_tests.rs | 6 +- xde/src/xde.rs | 9 +-- 13 files changed, 66 insertions(+), 111 deletions(-) diff --git a/bench/benches/userland.rs b/bench/benches/userland.rs index 0b6f0dc1..cb036b7d 100644 --- a/bench/benches/userland.rs +++ b/bench/benches/userland.rs @@ -83,23 +83,35 @@ pub fn test_parse( match parser { ParserKind::Generic => { |(mut in_pkt, direction): TestCase| { - let pkt = black_box(Packet::new(in_pkt.iter_mut())); black_box(match direction { - In => pkt.parse_inbound(GenericUlp {}), - Out => pkt.parse_outbound(GenericUlp {}), + In => pkt.parse_inbound( + in_pkt.iter_mut(), + GenericUlp {}, + ), + Out => pkt.parse_outbound( + in_pkt.iter_mut(), + GenericUlp {}, + ), }) .unwrap(); } } ParserKind::OxideVpc => { |(mut in_pkt, direction): TestCase| { - let pkt = black_box(Packet::new(in_pkt.iter_mut())); black_box(match direction { In => { - pkt.parse_inbound(VpcParser {}).unwrap(); + pkt.parse_inbound( + in_pkt.iter_mut(), + VpcParser {}, + ) + .unwrap(); } Out => { - pkt.parse_outbound(VpcParser {}).unwrap(); + pkt.parse_outbound( + in_pkt.iter_mut(), + VpcParser {}, + ) + .unwrap(); } }); } diff --git a/lib/opte-test-utils/src/lib.rs b/lib/opte-test-utils/src/lib.rs index 243d6843..18be3bef 100644 --- a/lib/opte-test-utils/src/lib.rs +++ b/lib/opte-test-utils/src/lib.rs @@ -19,7 +19,7 @@ pub mod port_state; pub use opte::api::Direction::*; pub use opte::api::MacAddr; pub use opte::ddi::mblk::MsgBlk; -use opte::ddi::mblk::MsgBlkIterMut; +pub use opte::ddi::mblk::MsgBlkIterMut; pub use opte::engine::ether::EtherMeta; pub use opte::engine::ether::EtherType; pub use opte::engine::ether::Ethernet; @@ -38,8 +38,8 @@ pub use opte::engine::ip::v6::Ipv6; pub use opte::engine::ip::v6::Ipv6Addr; pub use opte::engine::ip::L3Repr; pub use opte::engine::layer::DenyReason; -use opte::engine::packet::LiteInPkt; -use opte::engine::packet::LiteOutPkt; +pub use opte::engine::packet::LiteInPkt; +pub use opte::engine::packet::LiteOutPkt; pub use opte::engine::packet::MblkLiteParsed; pub use opte::engine::packet::Packet; pub use opte::engine::packet::ParseError; @@ -115,16 +115,14 @@ pub fn parse_inbound( pkt: &mut MsgBlk, parser: NP, ) -> Result, NP>, ParseError> { - let pkt = Packet::new(pkt.iter_mut()); - pkt.parse_inbound(parser) + Packet::parse_inbound(pkt.iter_mut(), parser) } pub fn parse_outbound( pkt: &mut MsgBlk, parser: NP, ) -> Result, NP>, ParseError> { - let pkt = Packet::new(pkt.iter_mut()); - pkt.parse_outbound(parser) + Packet::parse_outbound(pkt.iter_mut(), parser) } // It's imperative that this list stays in sync with the layers that @@ -483,8 +481,7 @@ pub fn ulp_pkt< ) -> MsgBlk { let mut pkt = MsgBlk::new_ethernet_pkt((eth, ip, ulp, body)); - let view = Packet::new(pkt.iter_mut()); - let view = view.parse_outbound(GenericUlp {}).unwrap(); + let view = Packet::parse_outbound(pkt.iter_mut(), GenericUlp {}).unwrap(); let mut view = view.to_full_meta(); view.compute_checksums(); drop(view); @@ -996,13 +993,12 @@ pub fn encap(inner_pkt: MsgBlk, src: TestIpPhys, dst: TestIpPhys) -> MsgBlk { /// Encapsulate a guest packet. #[must_use] fn _encap( - mut inner_pkt: MsgBlk, + inner_pkt: MsgBlk, src: TestIpPhys, dst: TestIpPhys, external_snat: bool, ) -> MsgBlk { - let pkt = Packet::new(inner_pkt.iter_mut()); - let base_len = pkt.len(); + let base_len = inner_pkt.byte_len(); let mut outer_geneve = Geneve { vni: dst.vni, ..Default::default() }; diff --git a/lib/opte/src/ddi/mblk.rs b/lib/opte/src/ddi/mblk.rs index 5129f2e1..ffa21184 100644 --- a/lib/opte/src/ddi/mblk.rs +++ b/lib/opte/src/ddi/mblk.rs @@ -941,7 +941,7 @@ mod test { assert_eq!(pkt.seg_len(), 1); assert_eq!(pkt.tail_capacity(), 16); - let res = Packet::new(pkt.iter_mut()).parse_outbound(GenericUlp {}); + let res = Packet::parse_outbound(pkt.iter_mut(), GenericUlp {}); match res { Err(ParseError::IngotError(err)) => { assert_eq!(err.header().as_str(), "inner_eth"); @@ -956,7 +956,7 @@ mod test { assert_eq!(pkt2.len(), 0); assert_eq!(pkt2.seg_len(), 1); assert_eq!(pkt2.tail_capacity(), 16); - let res = Packet::new(pkt.iter_mut()).parse_outbound(GenericUlp {}); + let res = Packet::parse_outbound(pkt.iter_mut(), GenericUlp {}); match res { Err(ParseError::IngotError(err)) => { assert_eq!(err.header().as_str(), "inner_eth"); diff --git a/lib/opte/src/engine/dhcpv6/protocol.rs b/lib/opte/src/engine/dhcpv6/protocol.rs index 68ed575c..c1c575f0 100644 --- a/lib/opte/src/engine/dhcpv6/protocol.rs +++ b/lib/opte/src/engine/dhcpv6/protocol.rs @@ -724,8 +724,7 @@ mod test { #[test] fn test_predicates_match_snooped_solicit_message() { let mut pkt = MsgBlk::copy(test_data::TEST_SOLICIT_PACKET); - let pkt = Packet::new(pkt.iter_mut()) - .parse_outbound(GenericUlp {}) + let pkt = Packet::parse_outbound(pkt.iter_mut(), GenericUlp {}) .unwrap() .to_full_meta(); let pmeta = pkt.meta(); diff --git a/lib/opte/src/engine/layer.rs b/lib/opte/src/engine/layer.rs index dc80b6c1..95f2c803 100644 --- a/lib/opte/src/engine/layer.rs +++ b/lib/opte/src/engine/layer.rs @@ -1872,9 +1872,9 @@ mod test { }, )); - let pkt_view = Packet::new(test_pkt.iter_mut()); - let pmeta = - pkt_view.parse_outbound(GenericUlp {}).unwrap().to_full_meta(); + let pmeta = Packet::parse_outbound(test_pkt.iter_mut(), GenericUlp {}) + .unwrap() + .to_full_meta(); // The pkt/rdr aren't actually used in this case. let ameta = ActionMeta::new(); diff --git a/lib/opte/src/engine/nat.rs b/lib/opte/src/engine/nat.rs index 8f9c54c4..5e0e0e4e 100644 --- a/lib/opte/src/engine/nat.rs +++ b/lib/opte/src/engine/nat.rs @@ -278,8 +278,7 @@ mod test { }; let mut pkt_m = MsgBlk::new_ethernet_pkt((ð, &ip4, &tcp, &body)); - let mut pkt = Packet::new(pkt_m.iter_mut()) - .parse_outbound(GenericUlp {}) + let mut pkt = Packet::parse_outbound(pkt_m.iter_mut(), GenericUlp {}) .unwrap() .to_full_meta(); @@ -347,8 +346,7 @@ mod test { }; let mut pkt_m = MsgBlk::new_ethernet_pkt((ð, &ip4, &tcp, &body)); - let mut pkt = Packet::new(pkt_m.iter_mut()) - .parse_inbound(GenericUlp {}) + let mut pkt = Packet::parse_inbound(pkt_m.iter_mut(), GenericUlp {}) .unwrap() .to_full_meta(); diff --git a/lib/opte/src/engine/packet.rs b/lib/opte/src/engine/packet.rs index 006b3a8b..9d685303 100644 --- a/lib/opte/src/engine/packet.rs +++ b/lib/opte/src/engine/packet.rs @@ -776,8 +776,6 @@ impl From<&PacketData> for InnerFlowId { /// kind of figure out as you work in the code more. In OPTE, we /// disambiguate using the `MsgBlk` and `MsgBlkChain` types. The former /// enforces that `b_next` and `b_prev` are disconnected. - -// // TODO: In theory, this can be any `Read` type giving us `&mut [u8]`s, // but in practice we are internally reliant on returning `MsgBlk`s in // hairpin actions and the like. Fighting the battle of making this generic @@ -788,69 +786,44 @@ pub struct Packet { state: S, } -impl Packet> { - pub fn new(pkt: T) -> Self - where - Initialized: PacketState, - { - Self { state: Initialized { inner: pkt } } - } -} - pub type LiteInPkt = Packet::InMeta<::Chunk>>>; pub type LiteOutPkt = Packet::OutMeta<::Chunk>>>; -impl<'a, T: Read + BufferState + 'a> Packet> +impl<'a, T: Read + BufferState + 'a, M: LightweightMeta> + Packet> where - T::Chunk: IntoBufPointer<'a> + ByteSliceMut, + T::Chunk: ByteSliceMut + IntoBufPointer<'a>, { #[inline] - pub fn len(&self) -> usize { - self.state.inner.len() - } - - #[inline] - pub fn mblk_addr(&self) -> uintptr_t { - self.state.inner.base_ptr() - } - - #[inline] - pub fn parse_inbound( - self, + pub fn parse_inbound = M>>( + pkt: T, net: NP, ) -> Result, ParseError> { - let len = self.len(); - let base_ptr = self.mblk_addr(); - let Packet { state: Initialized { inner } } = self; + let len = pkt.len(); + let base_ptr = pkt.base_ptr(); - let meta = net.parse_inbound(inner)?; + let meta = net.parse_inbound(pkt)?; meta.stack.validate(len)?; Ok(Packet { state: LiteParsed { meta, base_ptr, len } }) } #[inline] - pub fn parse_outbound( - self, + pub fn parse_outbound = M>>( + pkt: T, net: NP, ) -> Result, ParseError> { - let len = self.len(); - let base_ptr = self.mblk_addr(); - let Packet { state: Initialized { inner } } = self; + let len = pkt.len(); + let base_ptr = pkt.base_ptr(); - let meta = net.parse_outbound(inner)?; + let meta = net.parse_outbound(pkt)?; meta.stack.validate(len)?; Ok(Packet { state: LiteParsed { meta, base_ptr, len } }) } -} -impl<'a, T: Read + 'a, M: LightweightMeta> Packet> -where - T::Chunk: IntoBufPointer<'a>, -{ #[inline] pub fn to_full_meta(self) -> Packet> { let Packet { state: LiteParsed { len, base_ptr, meta } } = self; @@ -1058,7 +1031,7 @@ impl<'a, T: Read + 'a> Packet> { entropy: u.source(), vni: g.vni(), oxide_external_pkt: valid_geneve_has_oxide_external( - &g, + g, ), }) } @@ -1440,14 +1413,7 @@ impl<'a, T: Read + 'a> Packet> { } } -/// The type state of a packet that has been initialized and allocated, but -/// about which nothing else is known besides the length. -#[derive(Debug)] -pub struct Initialized { - inner: T, -} - -impl PacketState for Initialized {} +impl> PacketState for LiteParsed {} impl PacketState for FullParsed {} /// Zerocopy view onto a parsed packet, accompanied by locally @@ -1505,8 +1471,6 @@ pub struct LiteParsed> { meta: IngotParsed, } -impl> PacketState for LiteParsed {} - impl> LiteParsed {} // These are needed for now to account for not wanting to redesign @@ -1856,8 +1820,7 @@ mod test { #[test] fn read_single_segment() { let mut pkt = tcp_pkt(&[]); - let parsed = Packet::new(pkt.iter_mut()) - .parse_outbound(GenericUlp {}) + let parsed = Packet::parse_outbound(pkt.iter_mut(), GenericUlp {}) .unwrap() .to_full_meta(); @@ -1907,8 +1870,7 @@ mod test { mp1.append(mp2); - let pkt = Packet::new(mp1.iter_mut()) - .parse_outbound(GenericUlp {}) + let pkt = Packet::parse_outbound(mp1.iter_mut(), GenericUlp {}) .unwrap() .to_full_meta(); @@ -1945,7 +1907,7 @@ mod test { assert_eq!(st1.byte_len(), base.len()); assert!(matches!( - Packet::new(st1.iter_mut()).parse_outbound(GenericUlp {}), + Packet::parse_outbound(st1.iter_mut(), GenericUlp {}), Err(ParseError::IngotError(_)) )); } @@ -1998,8 +1960,7 @@ mod test { let mut pkt = MsgBlk::new_ethernet_pkt((eth, ip6, ext_hdrs, tcp)); - let pkt = Packet::new(pkt.iter_mut()) - .parse_outbound(GenericUlp {}) + let pkt = Packet::parse_outbound(pkt.iter_mut(), GenericUlp {}) .unwrap() .to_full_meta(); @@ -2048,8 +2009,7 @@ mod test { assert_eq!(pkt.byte_len(), MINIMUM_ETH_FRAME_SZ - FRAME_CHECK_SEQ_SZ); // Generate the metadata by parsing the packet - let parsed = Packet::new(pkt.iter_mut()) - .parse_inbound(GenericUlp {}) + let parsed = Packet::parse_inbound(pkt.iter_mut(), GenericUlp {}) .unwrap() .to_full_meta(); @@ -2108,8 +2068,7 @@ mod test { // Generate the metadata by parsing the packet. // This should not fail even though there are more bytes in // the initialised area ofthe mblk chain than the packet expects. - let pkt = Packet::new(pkt.iter_mut()) - .parse_inbound(GenericUlp {}) + let pkt = Packet::parse_inbound(pkt.iter_mut(), GenericUlp {}) .unwrap() .to_full_meta(); diff --git a/lib/opte/src/engine/port.rs b/lib/opte/src/engine/port.rs index 34bde01e..2a667e35 100644 --- a/lib/opte/src/engine/port.rs +++ b/lib/opte/src/engine/port.rs @@ -1994,6 +1994,7 @@ impl Port { } } + #[allow(clippy::too_many_arguments)] #[inline(always)] fn port_process_return_probe( &self, diff --git a/lib/opte/src/engine/rule.rs b/lib/opte/src/engine/rule.rs index 02396da7..8bd9f55d 100644 --- a/lib/opte/src/engine/rule.rs +++ b/lib/opte/src/engine/rule.rs @@ -1146,8 +1146,7 @@ fn rule_matching() { let eth = Ethernet { ethertype: Ethertype::IPV4, ..Default::default() }; let mut pkt_m = MsgBlk::new_ethernet_pkt((ð, &ip4, &tcp)); - let mut pkt = Packet::new(pkt_m.iter_mut()) - .parse_outbound(GenericUlp {}) + let mut pkt = Packet::parse_outbound(pkt_m.iter_mut(), GenericUlp {}) .unwrap() .to_full_meta(); pkt.compute_checksums(); diff --git a/lib/opte/src/engine/snat.rs b/lib/opte/src/engine/snat.rs index 1d9675dc..2b0a8ede 100644 --- a/lib/opte/src/engine/snat.rs +++ b/lib/opte/src/engine/snat.rs @@ -541,8 +541,7 @@ mod test { }; let mut pkt_m = MsgBlk::new_ethernet_pkt((ð, &ip4, &tcp, &body)); - let mut pkt = Packet::new(pkt_m.iter_mut()) - .parse_outbound(GenericUlp {}) + let mut pkt = Packet::parse_outbound(pkt_m.iter_mut(), GenericUlp {}) .unwrap() .to_full_meta(); pkt.compute_checksums(); @@ -609,8 +608,7 @@ mod test { }; let mut pkt_m = MsgBlk::new_ethernet_pkt((ð, &ip4, &tcp, &body)); - let mut pkt = Packet::new(pkt_m.iter_mut()) - .parse_inbound(GenericUlp {}) + let mut pkt = Packet::parse_inbound(pkt_m.iter_mut(), GenericUlp {}) .unwrap() .to_full_meta(); pkt.compute_checksums(); diff --git a/lib/oxide-vpc/tests/fuzz_regression.rs b/lib/oxide-vpc/tests/fuzz_regression.rs index 40cdd24d..2f454b78 100644 --- a/lib/oxide-vpc/tests/fuzz_regression.rs +++ b/lib/oxide-vpc/tests/fuzz_regression.rs @@ -111,8 +111,7 @@ fn run_tests( fn parse_in_regression() { run_tests("parse_in", |data| { let mut msg = MsgBlk::copy(data); - let parsed = Packet::new(msg.iter_mut()); - let _ = parsed.parse_inbound(VpcParser {}); + let _ = Packet::parse_inbound(msg.iter_mut(), VpcParser {}); }); } @@ -120,7 +119,6 @@ fn parse_in_regression() { fn parse_out_regression() { run_tests("parse_out", |data| { let mut msg = MsgBlk::copy(data); - let parsed = Packet::new(msg.iter_mut()); - let _ = parsed.parse_outbound(VpcParser {}); + let _ = Packet::parse_outbound(msg.iter_mut(), VpcParser {}); }); } diff --git a/lib/oxide-vpc/tests/integration_tests.rs b/lib/oxide-vpc/tests/integration_tests.rs index 0d52584d..34eaa1bf 100644 --- a/lib/oxide-vpc/tests/integration_tests.rs +++ b/lib/oxide-vpc/tests/integration_tests.rs @@ -1850,10 +1850,8 @@ fn bad_ip_len() { let udp = Udp { source: 68, destination: 67, ..Default::default() }; let mut pkt_m = MsgBlk::new_ethernet_pkt((eth, ip, udp)); - let res = Packet::new(pkt_m.iter_mut()) - .parse_outbound(VpcParser {}) - .err() - .unwrap(); + let res = + Packet::parse_outbound(pkt_m.iter_mut(), VpcParser {}).err().unwrap(); assert_eq!( res, ParseError::BadLength(MismatchError { diff --git a/xde/src/xde.rs b/xde/src/xde.rs index ecb66b86..14dc5493 100644 --- a/xde/src/xde.rs +++ b/xde/src/xde.rs @@ -1407,14 +1407,13 @@ fn guest_loopback( use Direction::*; let mblk_addr = pkt.mblk_addr(); - let parsed_pkt = Packet::new(pkt.iter_mut()); // Loopback now requires a reparse on loopback to account for UFT fastpath. // When viona serves us larger packets, we needn't worry about allocing // the encap on. // We might be able to do better in the interim, but that costs us time. - let parsed_pkt = match parsed_pkt.parse_inbound(VpcParser {}) { + let parsed_pkt = match Packet::parse_inbound(pkt.iter_mut(), VpcParser {}) { Ok(pkt) => pkt, Err(e) => { opte::engine::dbg!("Loopback bad packet: {:?}", e); @@ -1542,8 +1541,7 @@ unsafe extern "C" fn xde_mc_tx( unsafe fn xde_mc_tx_one(src_dev: &XdeDev, mut pkt: MsgBlk) -> *mut mblk_t { let parser = src_dev.port.network().parser(); let mblk_addr = pkt.mblk_addr(); - let parsed_pkt = Packet::new(pkt.iter_mut()); - let parsed_pkt = match parsed_pkt.parse_outbound(parser) { + let parsed_pkt = match Packet::parse_outbound(pkt.iter_mut(), parser) { Ok(pkt) => pkt, Err(e) => { // TODO Add bad packet stat. @@ -1848,12 +1846,11 @@ unsafe fn xde_rx_one( mut pkt: MsgBlk, ) { let mblk_addr = pkt.mblk_addr(); - let parsed_pkt = Packet::new(pkt.iter_mut()); // We must first parse the packet in order to determine where it // is to be delivered. let parser = VpcParser {}; - let parsed_pkt = match parsed_pkt.parse_inbound(parser) { + let parsed_pkt = match Packet::parse_inbound(pkt.iter_mut(), parser) { Ok(pkt) => pkt, Err(e) => { // TODO Add bad packet stat. From 5e290a39f101c9bf7eb320980b0bdf1ae4315ecd Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Fri, 1 Nov 2024 14:21:53 +0000 Subject: [PATCH 096/115] Whoops. --- bench/benches/userland.rs | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/bench/benches/userland.rs b/bench/benches/userland.rs index cb036b7d..59de9a42 100644 --- a/bench/benches/userland.rs +++ b/bench/benches/userland.rs @@ -84,11 +84,11 @@ pub fn test_parse( ParserKind::Generic => { |(mut in_pkt, direction): TestCase| { black_box(match direction { - In => pkt.parse_inbound( + In => Packet::parse_inbound( in_pkt.iter_mut(), GenericUlp {}, ), - Out => pkt.parse_outbound( + Out => Packet::parse_outbound( in_pkt.iter_mut(), GenericUlp {}, ), @@ -100,14 +100,14 @@ pub fn test_parse( |(mut in_pkt, direction): TestCase| { black_box(match direction { In => { - pkt.parse_inbound( + Packet::parse_inbound( in_pkt.iter_mut(), VpcParser {}, ) .unwrap(); } Out => { - pkt.parse_outbound( + Packet::parse_outbound( in_pkt.iter_mut(), VpcParser {}, ) @@ -164,16 +164,21 @@ pub fn test_handle( // packet is now a view over the generated pkt. |(mut pkt_m, dir): TestCase| match parser { ParserKind::Generic => { - let pkt = Packet::new(pkt_m.iter_mut()); let res = match dir { In => { - let pkt = - pkt.parse_inbound(GenericUlp {}).unwrap(); + let pkt = Packet::parse_inbound( + pkt_m.iter_mut(), + GenericUlp {}, + ) + .unwrap(); port.port.process(dir, black_box(pkt)).unwrap() } Out => { - let pkt = - pkt.parse_outbound(GenericUlp {}).unwrap(); + let pkt = Packet::parse_outbound( + pkt_m.iter_mut(), + GenericUlp {}, + ) + .unwrap(); port.port.process(dir, black_box(pkt)).unwrap() } }; @@ -183,16 +188,21 @@ pub fn test_handle( } } ParserKind::OxideVpc => { - let pkt = Packet::new(pkt_m.iter_mut()); let res = match dir { In => { - let pkt = - pkt.parse_inbound(VpcParser {}).unwrap(); + let pkt = Packet::parse_inbound( + pkt_m.iter_mut(), + VpcParser {}, + ) + .unwrap(); port.port.process(dir, black_box(pkt)).unwrap() } Out => { - let pkt = - pkt.parse_outbound(VpcParser {}).unwrap(); + let pkt = Packet::parse_outbound( + pkt_m.iter_mut(), + VpcParser {}, + ) + .unwrap(); port.port.process(dir, black_box(pkt)).unwrap() } }; From 9393ad64d6ad53894c9fa3e02b0e1ea4e71b5b52 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Fri, 1 Nov 2024 17:22:26 +0000 Subject: [PATCH 097/115] Update position on UFT compilation in the architecture doc --- lib/opte/README.adoc | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/lib/opte/README.adoc b/lib/opte/README.adoc index 8c680485..3bf6fe79 100644 --- a/lib/opte/README.adoc +++ b/lib/opte/README.adoc @@ -357,7 +357,14 @@ which it is currently processing. The packet (`opte::engine::Packet`) abstraction forms a single view into the the underlying `mblk_t *` chain that makes up the underlying packet and its data. It attempts to hide the complexity of dealing -with mblk chains directly. +with mblk chains directly. Packets represent a set of byteslices cast +into senantically useful header types, and allow read/write access to +their fields. The `Packet` type is also responsible for computing any +changes which must be fully serialised back into the `mblk_t` chain once +OPTE has completed its processing. + +It is possible in future to support underlying buffer types other than +`mblk_t`s, but today all packets must be `mblk_t`s. === Layer Flow Table @@ -506,10 +513,9 @@ report a warning to the user)? Furthermore, you could give the engine the smarts to determine when there is a contradiction and report some kind of error. You could also effect a sort of "last write wins" for some sequences of transformations: e.g., two modifications on the same -header. In any event, OPTE has not implemented any sort of "compiling" -of header transformations at this time. It simply builds a list, -assuming its sequence of transformations are sane, and stores said -list in the UFT. +header. OPTE implements a limited form of compilation of disjoint +transforms, and falls back to a full list when more than one transform +is applied to any one header. === SDT probes From a4648b4aba1eab7cefca6ae406389a30628259e6 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Mon, 4 Nov 2024 12:05:16 +0000 Subject: [PATCH 098/115] Mark parse errors in mod-level kstat. --- Cargo.lock | 6 +++--- Cargo.toml | 2 +- crates/kstat-macro/src/lib.rs | 2 +- lib/opte/src/ddi/kstat.rs | 2 ++ lib/opte/src/engine/layer.rs | 2 -- lib/opte/src/engine/port.rs | 1 - xde/src/lib.rs | 1 + xde/src/xde.rs | 25 +++++++++++++++++++++---- 8 files changed, 29 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b71313ba..63be4510 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -895,7 +895,7 @@ dependencies = [ [[package]] name = "ingot" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=0b961bc1ff7355ceff1325959746fdc8bf661b87#0b961bc1ff7355ceff1325959746fdc8bf661b87" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=1cd34849387da4b0d63e0a3990889d8c04df537b#1cd34849387da4b0d63e0a3990889d8c04df537b" dependencies = [ "bitflags 2.6.0", "ingot-macros", @@ -908,7 +908,7 @@ dependencies = [ [[package]] name = "ingot-macros" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=0b961bc1ff7355ceff1325959746fdc8bf661b87#0b961bc1ff7355ceff1325959746fdc8bf661b87" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=1cd34849387da4b0d63e0a3990889d8c04df537b#1cd34849387da4b0d63e0a3990889d8c04df537b" dependencies = [ "darling", "itertools 0.13.0", @@ -921,7 +921,7 @@ dependencies = [ [[package]] name = "ingot-types" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=0b961bc1ff7355ceff1325959746fdc8bf661b87#0b961bc1ff7355ceff1325959746fdc8bf661b87" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=1cd34849387da4b0d63e0a3990889d8c04df537b#1cd34849387da4b0d63e0a3990889d8c04df537b" dependencies = [ "ingot-macros", "macaddr", diff --git a/Cargo.toml b/Cargo.toml index 17f7b313..8f8d55cb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,7 +50,7 @@ ctor = "0.2" darling = "0.20" dyn-clone = "1.0" heapless = "0.8" -ingot = { git = "https://github.com/oxidecomputer/ingot.git", rev = "0b961bc1ff7355ceff1325959746fdc8bf661b87"} +ingot = { git = "https://github.com/oxidecomputer/ingot.git", rev = "1cd34849387da4b0d63e0a3990889d8c04df537b"} ipnetwork = { version = "0.20", default-features = false } itertools = { version = "0.13", default-features = false } libc = "0.2" diff --git a/crates/kstat-macro/src/lib.rs b/crates/kstat-macro/src/lib.rs index 93628fdb..fc689c48 100644 --- a/crates/kstat-macro/src/lib.rs +++ b/crates/kstat-macro/src/lib.rs @@ -83,7 +83,7 @@ pub fn derive_kstat_provider(input: TokenStream) -> TokenStream { fn init( &mut self - ) -> core::result::Result<(), kstat::Error> { + ) -> core::result::Result<(), ::opte::ddi::kstat::Error> { #( self.#fields_ident.init(stringify!(#fields_ident))?; )* Ok(()) } diff --git a/lib/opte/src/ddi/kstat.rs b/lib/opte/src/ddi/kstat.rs index edbaa8a1..249cd927 100644 --- a/lib/opte/src/ddi/kstat.rs +++ b/lib/opte/src/ddi/kstat.rs @@ -12,6 +12,8 @@ use alloc::string::String; use core::fmt; use core::fmt::Display; +pub use kstat_macro::KStatProvider; + cfg_if! { if #[cfg(all(not(feature = "std"), not(test)))] { use alloc::ffi::CString; diff --git a/lib/opte/src/engine/layer.rs b/lib/opte/src/engine/layer.rs index 95f2c803..69fda942 100644 --- a/lib/opte/src/engine/layer.rs +++ b/lib/opte/src/engine/layer.rs @@ -33,7 +33,6 @@ use super::rule::Rule; use crate::d_error::DError; #[cfg(all(not(feature = "std"), not(test)))] use crate::d_error::LabelBlock; -use crate::ddi::kstat; use crate::ddi::kstat::KStatNamed; use crate::ddi::kstat::KStatProvider; use crate::ddi::kstat::KStatU64; @@ -53,7 +52,6 @@ use core::num::NonZeroU32; use core::result; use illumos_sys_hdrs::c_char; use illumos_sys_hdrs::uintptr_t; -use kstat_macro::KStatProvider; use opte_api::Direction; #[derive(Debug)] diff --git a/lib/opte/src/engine/port.rs b/lib/opte/src/engine/port.rs index 2a667e35..6a9c8947 100644 --- a/lib/opte/src/engine/port.rs +++ b/lib/opte/src/engine/port.rs @@ -94,7 +94,6 @@ use ingot::types::HeaderLen; use ingot::types::IntoBufPointer; use ingot::types::Read; use ingot::udp::Udp; -use kstat_macro::KStatProvider; use opte_api::Direction; use opte_api::MacAddr; use opte_api::OpteError; diff --git a/xde/src/lib.rs b/xde/src/lib.rs index f562ecc3..4c960f82 100644 --- a/xde/src/lib.rs +++ b/xde/src/lib.rs @@ -45,6 +45,7 @@ pub mod ip; pub mod mac; pub mod route; pub mod secpolicy; +pub mod stats; pub mod sys; pub mod xde; diff --git a/xde/src/xde.rs b/xde/src/xde.rs index 14dc5493..d35bb37e 100644 --- a/xde/src/xde.rs +++ b/xde/src/xde.rs @@ -26,6 +26,7 @@ use crate::route::Route; use crate::route::RouteCache; use crate::route::RouteKey; use crate::secpolicy; +use crate::stats::XdeStats; use crate::sys; use crate::warn; use alloc::boxed::Box; @@ -52,6 +53,8 @@ use opte::api::OpteError; use opte::api::SetXdeUnderlayReq; use opte::api::XDE_IOC_OPTE_CMD; use opte::d_error::LabelBlock; +use opte::ddi::kstat::KStatNamed; +use opte::ddi::kstat::KStatProvider; use opte::ddi::mblk::MsgBlk; use opte::ddi::mblk::MsgBlkChain; use opte::ddi::sync::KMutex; @@ -223,6 +226,7 @@ struct XdeState { vpc_map: Arc, v2b: Arc, underlay: KMutex>, + stats: KMutex>, } struct UnderlayState { @@ -250,10 +254,22 @@ impl XdeState { ectx, vpc_map: Arc::new(overlay::VpcMappings::new()), v2b: Arc::new(overlay::Virt2Boundary::new()), + stats: KMutex::new( + KStatNamed::new("xde", "xde", XdeStats::new()) + .expect("Name is well-constructed (len, no NUL bytes)"), + KMutexType::Driver, + ), } } } +fn stat_parse_error(dir: Direction, err: &ParseError) { + let xde = get_xde_state(); + let mut stats = xde.stats.lock(); + + stats.vals.parse_error(dir, err); +} + #[repr(C)] pub struct XdeDev { devname: String, @@ -1416,6 +1432,7 @@ fn guest_loopback( let parsed_pkt = match Packet::parse_inbound(pkt.iter_mut(), VpcParser {}) { Ok(pkt) => pkt, Err(e) => { + stat_parse_error(Direction::In, &e); opte::engine::dbg!("Loopback bad packet: {:?}", e); bad_packet_parse_probe(None, Direction::In, mblk_addr, &e); @@ -1544,8 +1561,8 @@ unsafe fn xde_mc_tx_one(src_dev: &XdeDev, mut pkt: MsgBlk) -> *mut mblk_t { let parsed_pkt = match Packet::parse_outbound(pkt.iter_mut(), parser) { Ok(pkt) => pkt, Err(e) => { - // TODO Add bad packet stat. - // + stat_parse_error(Direction::Out, &e); + // NOTE: We are using the individual mblk_t as read only // here to get the pointer value so that the DTrace consumer // can examine the packet on failure. @@ -1853,8 +1870,8 @@ unsafe fn xde_rx_one( let parsed_pkt = match Packet::parse_inbound(pkt.iter_mut(), parser) { Ok(pkt) => pkt, Err(e) => { - // TODO Add bad packet stat. - // + stat_parse_error(Direction::In, &e); + // NOTE: We are using the individual mblk_t as read only // here to get the pointer value so that the DTrace consumer // can examine the packet on failure. From 230e3c7af9763f443ebd7b3f46cc7001ac6346d2 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Mon, 4 Nov 2024 14:42:35 +0000 Subject: [PATCH 099/115] Fuzzers, accidentally a file. --- fuzz/fuzz_targets/parse-in.rs | 3 +- fuzz/fuzz_targets/parse-out.rs | 3 +- xde/src/stats.rs | 101 +++++++++++++++++++++++++++++++++ 3 files changed, 103 insertions(+), 4 deletions(-) create mode 100644 xde/src/stats.rs diff --git a/fuzz/fuzz_targets/parse-in.rs b/fuzz/fuzz_targets/parse-in.rs index eedba5f3..c9796faf 100644 --- a/fuzz/fuzz_targets/parse-in.rs +++ b/fuzz/fuzz_targets/parse-in.rs @@ -7,6 +7,5 @@ use oxide_vpc::engine::VpcParser; fuzz_target!(|data: &[u8]| { let mut pkt_m = MsgBlk::copy(data); - let pkt = Packet::new(pkt_m.iter_mut()); - pkt.parse_inbound(VpcParser {}); + let _ = Packet::parse_inbound(pkt_m.iter_mut(), VpcParser {}); }); diff --git a/fuzz/fuzz_targets/parse-out.rs b/fuzz/fuzz_targets/parse-out.rs index 7a1601c8..7806ad47 100644 --- a/fuzz/fuzz_targets/parse-out.rs +++ b/fuzz/fuzz_targets/parse-out.rs @@ -7,6 +7,5 @@ use oxide_vpc::engine::VpcParser; fuzz_target!(|data: &[u8]| { let mut pkt_m = MsgBlk::copy(data); - let pkt = Packet::new(pkt_m.iter_mut()); - pkt.parse_outbound(VpcParser {}); + let _ = Packet::parse_outbound(pkt_m.iter_mut(), VpcParser {}); }); diff --git a/xde/src/stats.rs b/xde/src/stats.rs new file mode 100644 index 00000000..cc02dc5d --- /dev/null +++ b/xde/src/stats.rs @@ -0,0 +1,101 @@ +use opte::api::Direction; +use opte::ddi::kstat::KStatProvider; +use opte::ddi::kstat::KStatU64; +use opte::engine::packet::ParseError; +use opte::ingot::types::ParseError as IngotError; + +/// Top-level KStats for XDE. +#[derive(KStatProvider)] +pub struct XdeStats { + /// The number of inbound packets dropped as explicitly + /// rejected during parsing. + in_drop_reject: KStatU64, + /// The number of inbound packets dropped with an unexpected + /// protocol number. + in_drop_unwanted_proto: KStatU64, + /// The number of inbound packets dropped for having + /// insufficient bytes to read the standard set of headers. + in_drop_truncated: KStatU64, + /// The number of inbound packets dropped due to a header being + /// split across `mblk_t` boundaries. + in_drop_straddled: KStatU64, + /// The number of inbound packets dropped due to having an illegal + /// value in a mandatory/critical field. + in_drop_illegal_val: KStatU64, + /// The number of inbound packets dropped due to reporting more + /// bytes than the packet contains. + in_drop_bad_len: KStatU64, + /// The number of inbound packets dropped due to the presence of + /// unrecognised critical options. + in_drop_bad_tun_opt: KStatU64, + /// The number of inbound packets dropped for other reasons, including + /// parser programming errors. + in_drop_misc: KStatU64, + + /// The number of outbound packets dropped as explicitly + /// rejected during parsing. + out_drop_reject: KStatU64, + /// The number of outbound packets dropped with an unexpected + /// protocol number. + out_drop_unwanted_proto: KStatU64, + /// The number of outbound packets dropped for having + /// insufficient bytes to read the standard set of headers. + out_drop_truncated: KStatU64, + /// The number of outbound packets dropped due to a header being + /// split across `mblk_t` boundaries. + out_drop_straddled: KStatU64, + /// The number of outbound packets dropped due to having an illegal + /// value in a mandatory/critical field. + out_drop_illegal_val: KStatU64, + /// The number of outbound packets dropped due to reporting more + /// bytes than the packet contains. + out_drop_bad_len: KStatU64, + /// The number of outbound packets dropped for other reasons, including + /// parser programming errors. + out_drop_misc: KStatU64, + // NOTE: tun_opt is not relevant to outbound packets -- no encapsulation + // is in use. +} + +impl XdeStats { + pub fn parse_error(&mut self, dir: Direction, err: &ParseError) { + use Direction::*; + match (dir, err) { + (In, ParseError::IngotError(e)) => match e.error() { + IngotError::Unwanted => self.in_drop_unwanted_proto += 1, + IngotError::TooSmall | IngotError::NoRemainingChunks => { + self.in_drop_truncated += 1 + } + IngotError::StraddledHeader => self.in_drop_straddled += 1, + IngotError::Reject => self.in_drop_reject += 1, + IngotError::IllegalValue => self.in_drop_illegal_val += 1, + IngotError::NeedsHint | IngotError::CannotAccept => { + self.in_drop_misc += 1 + } + }, + (In, ParseError::IllegalValue(_)) => self.in_drop_illegal_val += 1, + (In, ParseError::BadLength(_)) => self.in_drop_bad_len += 1, + (In, ParseError::UnrecognisedTunnelOpt { .. }) => { + self.in_drop_bad_tun_opt += 1 + } + + (Out, ParseError::IngotError(e)) => match e.error() { + IngotError::Unwanted => self.out_drop_unwanted_proto += 1, + IngotError::TooSmall | IngotError::NoRemainingChunks => { + self.out_drop_truncated += 1 + } + IngotError::StraddledHeader => self.out_drop_straddled += 1, + IngotError::Reject => self.out_drop_reject += 1, + IngotError::IllegalValue => self.out_drop_illegal_val += 1, + IngotError::NeedsHint | IngotError::CannotAccept => { + self.out_drop_misc += 1 + } + }, + (Out, ParseError::IllegalValue(_)) => { + self.out_drop_illegal_val += 1 + } + (Out, ParseError::BadLength(_)) => self.out_drop_bad_len += 1, + (Out, _) => self.out_drop_misc += 1, + } + } +} From 5afd63a0a9430c1f0a48a710680c5b1389c27fa6 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Mon, 4 Nov 2024 15:02:15 +0000 Subject: [PATCH 100/115] Missed doctests. --- lib/opte/src/ddi/kstat.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/opte/src/ddi/kstat.rs b/lib/opte/src/ddi/kstat.rs index 249cd927..a6587356 100644 --- a/lib/opte/src/ddi/kstat.rs +++ b/lib/opte/src/ddi/kstat.rs @@ -42,7 +42,6 @@ cfg_if! { /// /// ``` /// use opte::ddi::kstat::{self, KStatProvider, KStatU64}; -/// use kstat_macro::KStatProvider; /// /// #[derive(KStatProvider)] /// struct SomeStats { @@ -86,7 +85,6 @@ pub trait KStatProvider { /// /// ``` /// use opte::ddi::kstat::{self, KStatNamed, KStatProvider, KStatU64}; -/// use kstat_macro::KStatProvider; /// /// #[derive(KStatProvider)] /// pub struct StatProvider { From d8ab6668290251074b149b1c44232d839837937b Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Thu, 7 Nov 2024 10:53:55 +0000 Subject: [PATCH 101/115] Review feedback -- de-`pub` inner, de-`inner`. --- lib/opte/src/ddi/mblk.rs | 76 ++++++++++++++++++---------------------- lib/opte/src/ddi/time.rs | 5 ++- 2 files changed, 37 insertions(+), 44 deletions(-) diff --git a/lib/opte/src/ddi/mblk.rs b/lib/opte/src/ddi/mblk.rs index ffa21184..c2471a9d 100644 --- a/lib/opte/src/ddi/mblk.rs +++ b/lib/opte/src/ddi/mblk.rs @@ -55,14 +55,12 @@ struct MsgBlkChainInner { // we remove and re-add the mblks to work on them. // We might want also want to return either a chain/mblk_t in an enum, but // practically XDE will always assume it has a chain from MAC. -pub struct MsgBlkChain { - inner: Option, -} +pub struct MsgBlkChain(Option); impl MsgBlkChain { /// Create an empty packet chain. pub fn empty() -> Self { - Self { inner: None } + Self(None) } /// Convert an mblk_t packet chain into a safe source of `MsgBlk`s. @@ -82,13 +80,13 @@ impl MsgBlkChain { tail = next_ptr; } - Ok(Self { inner: Some(MsgBlkChainInner { head, tail }) }) + Ok(Self(Some(MsgBlkChainInner { head, tail }))) } /// Removes the next packet from the top of the chain and returns /// it, taking ownership. pub fn pop_front(&mut self) -> Option { - if let Some(ref mut list) = &mut self.inner { + if let Some(ref mut list) = &mut self.0 { unsafe { let curr_b = list.head; let curr = curr_b.as_ptr(); @@ -106,10 +104,10 @@ impl MsgBlkChain { if let Some(next) = next { list.head = next; } else { - self.inner = None; + self.0 = None; } - Some(MsgBlk { inner: curr_b }) + Some(MsgBlk(curr_b)) } } else { None @@ -134,7 +132,7 @@ impl MsgBlkChain { assert!((*pkt.as_ptr()).b_next.is_null()); } - if let Some(ref mut list) = &mut self.inner { + if let Some(ref mut list) = &mut self.0 { let pkt_p = pkt.as_ptr(); let tail_p = list.tail.as_ptr(); unsafe { @@ -144,7 +142,7 @@ impl MsgBlkChain { } list.tail = pkt; } else { - self.inner = Some(MsgBlkChainInner { head: pkt, tail: pkt }); + self.0 = Some(MsgBlkChainInner { head: pkt, tail: pkt }); } } @@ -152,7 +150,7 @@ impl MsgBlkChain { /// consume `self`. The caller of this function now owns the /// `mblk_t` segment chain. pub fn unwrap_mblk(mut self) -> Option> { - self.inner.take().map(|v| v.head) + self.0.take().map(|v| v.head) } } @@ -166,7 +164,7 @@ impl Drop for MsgBlkChain { // Safety: This is safe as long as the original // `mblk_t` came from a call to `allocb(9F)` (or // similar API). - if let Some(list) = &self.inner { + if let Some(list) = &self.0 { unsafe { ddi::freemsgchain(list.head.as_ptr()) }; } } else { @@ -194,16 +192,14 @@ impl Drop for MsgBlkChain { /// an Ethernet _frame_, but we prefer to use the colloquial /// nomenclature of "packet". #[derive(Debug)] -pub struct MsgBlk { - pub inner: NonNull, -} +pub struct MsgBlk(NonNull); impl Deref for MsgBlk { type Target = [u8]; fn deref(&self) -> &Self::Target { unsafe { - let self_ref = self.inner.as_ref(); + let self_ref = self.0.as_ref(); let rptr = self_ref.b_rptr; let len = self_ref.b_wptr.offset_from(rptr) as usize; slice::from_raw_parts(rptr, len) @@ -214,7 +210,7 @@ impl Deref for MsgBlk { impl DerefMut for MsgBlk { fn deref_mut(&mut self) -> &mut Self::Target { unsafe { - let self_ref = self.inner.as_mut(); + let self_ref = self.0.as_mut(); let rptr = self_ref.b_rptr; let len = self_ref.b_wptr.offset_from(rptr) as usize; slice::from_raw_parts_mut(rptr, len) @@ -284,7 +280,7 @@ impl MsgBlk { let inner = NonNull::new(allocb(len)) .expect("somehow failed to get an mblk..."); - Self { inner } + Self(inner) } /// Allocates a new [`MsgBlk`] of size `buf.len()`, copying its @@ -307,7 +303,7 @@ impl MsgBlk { /// read pointer in the current datablock. pub fn head_capacity(&self) -> usize { unsafe { - let inner = self.inner.as_ref(); + let inner = self.0.as_ref(); inner.b_rptr.offset_from((*inner.b_datap).db_base) as usize } @@ -317,7 +313,7 @@ impl MsgBlk { /// write pointer in the current datablock. pub fn tail_capacity(&self) -> usize { unsafe { - let inner = self.inner.as_ref(); + let inner = self.0.as_ref(); (*inner.b_datap).db_lim.offset_from(inner.b_wptr) as usize } @@ -377,7 +373,7 @@ impl MsgBlk { let mut out = Self::new(head_len + body_len); // SAFETY: alloc is contiguous and always larger than head_len. - let mut_out = unsafe { out.inner.as_mut() }; + let mut_out = unsafe { out.0.as_mut() }; mut_out.b_rptr = unsafe { mut_out.b_rptr.add(head_len) }; mut_out.b_wptr = mut_out.b_rptr; @@ -397,7 +393,7 @@ impl MsgBlk { n_bytes: usize, f: impl FnOnce(&mut [MaybeUninit]), ) -> Result<(), WriteError> { - let mut_out = unsafe { self.inner.as_mut() }; + let mut_out = unsafe { self.0.as_mut() }; let avail_bytes = unsafe { (*mut_out.b_datap).db_lim.offset_from(mut_out.b_wptr) }; @@ -435,7 +431,7 @@ impl MsgBlk { n_bytes: usize, f: impl FnOnce(&mut [MaybeUninit]), ) -> Result<(), WriteError> { - let mut_out = unsafe { self.inner.as_mut() }; + let mut_out = unsafe { self.0.as_mut() }; let avail_bytes = unsafe { mut_out.b_rptr.offset_from((*mut_out.b_datap).db_base) }; @@ -464,7 +460,7 @@ impl MsgBlk { let len = self.len(); match new_len.cmp(&len) { Ordering::Less => unsafe { - let mut_inner = self.inner.as_mut(); + let mut_inner = self.0.as_mut(); mut_inner.b_wptr = mut_inner.b_wptr.sub(len - new_len); Ok(()) }, @@ -572,7 +568,7 @@ impl MsgBlk { pub fn append(&mut self, other: Self) { // Find the last element in the pkt chain // i.e., whose b_cont is null. - let mut curr = self.inner.as_ptr(); + let mut curr = self.0.as_ptr(); while unsafe { !(*curr).b_cont.is_null() } { curr = unsafe { (*curr).b_cont }; } @@ -585,21 +581,19 @@ impl MsgBlk { /// Drop all bytes and move the cursor to the very back of the dblk. pub fn pop_all(&mut self) { unsafe { - (*self.inner.as_ptr()).b_rptr = - (*(*self.inner.as_ptr()).b_datap).db_lim; - (*self.inner.as_ptr()).b_wptr = - (*(*self.inner.as_ptr()).b_datap).db_lim; + (*self.0.as_ptr()).b_rptr = (*(*self.0.as_ptr()).b_datap).db_lim; + (*self.0.as_ptr()).b_wptr = (*(*self.0.as_ptr()).b_datap).db_lim; } } /// Returns a shared cursor over all segments in this `MsgBlk`. pub fn iter(&self) -> MsgBlkIter { - MsgBlkIter { curr: Some(self.inner), marker: PhantomData } + MsgBlkIter { curr: Some(self.0), marker: PhantomData } } /// Returns a mutable cursor over all segments in this `MsgBlk`. pub fn iter_mut(&mut self) -> MsgBlkIterMut { - MsgBlkIterMut { curr: Some(self.inner), marker: PhantomData } + MsgBlkIterMut { curr: Some(self.0), marker: PhantomData } } /// Return the pointer address of the underlying mblk_t. @@ -608,14 +602,14 @@ impl MsgBlk { /// DTrace so that the mblk can be inspected (read only) in probe /// context. pub fn mblk_addr(&self) -> uintptr_t { - self.inner.as_ptr() as uintptr_t + self.0.as_ptr() as uintptr_t } /// Return the head of the underlying `mblk_t` segment chain and /// consume `self`. The caller of this function now owns the /// `mblk_t` segment chain. pub fn unwrap_mblk(self) -> NonNull { - let ptr_out = self.inner; + let ptr_out = self.0; _ = ManuallyDrop::new(self); ptr_out } @@ -643,7 +637,7 @@ impl MsgBlk { let inner_ref = inner.as_ref(); if inner_ref.b_next.is_null() && inner_ref.b_prev.is_null() { - Ok(Self { inner }) + Ok(Self(inner)) } else { Err(WrapError::Chain) } @@ -668,7 +662,7 @@ impl MsgBlk { // sized blocks. This is not a generally expected thing and has // caused NIC hardware to stop working. // Stripping these out where possible is necessary. - let mut head = self.inner; + let mut head = self.0; let mut neighbour = unsafe { (*head.as_ptr()).b_cont }; while !neighbour.is_null() @@ -686,7 +680,7 @@ impl MsgBlk { } } - self.inner = head; + self.0 = head; } } @@ -795,9 +789,9 @@ impl Drop for MsgBlk { // Safety: This is safe as long as the original // `mblk_t` came from a call to `allocb(9F)` (or // similar API). - unsafe { ddi::freemsg(self.inner.as_ptr()) }; + unsafe { ddi::freemsg(self.0.as_ptr()) }; } else { - mock_freemsg(self.inner.as_ptr()); + mock_freemsg(self.0.as_ptr()); } } } @@ -1097,7 +1091,7 @@ mod test { let els = create_linked_mblks(3); let chain = unsafe { MsgBlkChain::new(els[0]) }.unwrap(); - let chain_inner = chain.inner.as_ref().unwrap(); + let chain_inner = chain.0.as_ref().unwrap(); assert_eq!(chain_inner.head.as_ptr(), els[0]); assert_eq!(chain_inner.tail.as_ptr(), els[2]); } @@ -1116,7 +1110,7 @@ mod test { } // Chain head/tail ptrs are correct - let chain_inner = chain.inner.as_ref().unwrap(); + let chain_inner = chain.0.as_ref().unwrap(); assert_eq!(chain_inner.head.as_ptr(), els[1]); assert_eq!(chain_inner.tail.as_ptr(), els[2]); unsafe { @@ -1136,7 +1130,7 @@ mod test { chain.append(pkt); // Chain head/tail ptrs are correct - let chain_inner = chain.inner.as_ref().unwrap(); + let chain_inner = chain.0.as_ref().unwrap(); assert_eq!(chain_inner.head.as_ptr(), els[0]); assert_eq!(chain_inner.tail.as_ptr(), new_el); diff --git a/lib/opte/src/ddi/time.rs b/lib/opte/src/ddi/time.rs index f0067906..1732f389 100644 --- a/lib/opte/src/ddi/time.rs +++ b/lib/opte/src/ddi/time.rs @@ -26,9 +26,6 @@ pub const NANOS: u64 = 1_000_000_000; /// The conversion from nanoseconds to milliseconds. pub const NANOS_TO_MILLIS: u64 = 1_000_000; -#[cfg(any(feature = "std", test))] -static FIRST_TS: OnceLock = OnceLock::new(); - /// A moment in time. #[derive(Clone, Copy, Debug)] pub struct Moment { @@ -82,6 +79,8 @@ impl Moment { if #[cfg(all(not(feature = "std"), not(test)))] { Self { inner: unsafe { ddi::gethrtime() } } } else { + static FIRST_TS: OnceLock = OnceLock::new(); + let first_ts = *FIRST_TS.get_or_init(Instant::now); Self { inner: Instant::now().saturating_duration_since(first_ts) } } From 470ed8a73245b260d2c83fc4adf3bf9b9d7bb9b6 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Thu, 7 Nov 2024 11:23:16 +0000 Subject: [PATCH 102/115] Document the UDP zero-checksum rationale. --- lib/opte/src/engine/headers.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/lib/opte/src/engine/headers.rs b/lib/opte/src/engine/headers.rs index 36b6bab2..83fa6326 100644 --- a/lib/opte/src/engine/headers.rs +++ b/lib/opte/src/engine/headers.rs @@ -336,6 +336,19 @@ impl Emit for SizeHoldingEncap<'_> { let length = self.encapped_len + (Udp::MINIMUM_LENGTH + geneve.packet_length()) as u16; + // It's worth noting that we have a zero UDP checksum here, + // which holds true even if we're sending out over IPv6. + // Ordinarily IPv6 requires a full checksum compute for UDP, + // however RFCs 6935 & 6936 make an optional exception for + // tunnelled transports (e.g., Geneve) over UDP/v6. + // Generally OPTE is covered on validity of this: + // * We preserve cksums on inner messages, so their headers and + // payloads are *always* valid. + // * OPTE ports will only accept inbound packets with correct + // Ethernet dest, next headers, L3 dest, and VNI. + // Misdelivery on the basis of IPv6 (or other) corruption + // will lead to a drop. + // This is also reflected in RFC 8200 §8.1 (IPv6 2017). ( Udp { source: g.entropy, From 5f53d2c438db0175315f2c953acc0cf4bba96488 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Thu, 7 Nov 2024 12:01:45 +0000 Subject: [PATCH 103/115] Review feedback: document mblk refcnt assumptions. --- lib/opte/src/ddi/mblk.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/lib/opte/src/ddi/mblk.rs b/lib/opte/src/ddi/mblk.rs index c2471a9d..8cec7257 100644 --- a/lib/opte/src/ddi/mblk.rs +++ b/lib/opte/src/ddi/mblk.rs @@ -267,7 +267,9 @@ impl MsgBlk { /// Allocate a new [`MsgBlk`] containing a data buffer of `len` /// bytes. /// - /// The returned packet consists of exactly one segment. + /// The returned packet consists of exactly one segment, and the + /// underlying `dblk_t` will have only a single referent making + /// mutable access safe. /// /// In the kernel environment this uses `allocb(9F)` and /// `freemsg(9F)` under the hood. @@ -628,6 +630,15 @@ impl MsgBlk { /// `allocb(9F)` or provided by some kernel API which itself used /// one of the DDI/DKI APIs to allocate it. /// + /// Users *must* be certain that, for any `mblk_t` in the `b_cont` chain, + /// any underlying `dblk_t`s have only a single referent (this chain) if + /// they are going to read (or &mut) the backing byteslice. This is a + /// possibility for, e.g., packets served by `viona` whose mblks after + /// the initial header pullup will point directly into guest memory (!!!). + /// We do not currently have an API for conditionally handing out slices + /// and performing pullup on the fly based on refcnt -- potentially untrusted + /// mblk uses (e.g. read/write of body segs) *must* perform a manual pullup. + /// /// # Errors /// /// * Return [`WrapError::NullPtr`] is `mp` is `NULL`. From 2e800b33e5d4f54497d75d3de1c654e730f35cdf Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Thu, 7 Nov 2024 12:52:36 +0000 Subject: [PATCH 104/115] =?UTF-8?q?Ingot=20repo=20is=20opened=20now=20?= =?UTF-8?q?=F0=9F=8E=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .cargo/config.toml | 3 --- .github/buildomat/jobs/bench.sh | 3 --- .github/buildomat/jobs/opte-api.sh | 3 --- .github/buildomat/jobs/opte-ioctl.sh | 3 --- .github/buildomat/jobs/opte.sh | 3 --- .github/buildomat/jobs/opteadm.sh | 3 --- .github/buildomat/jobs/oxide-vpc.sh | 3 --- .github/buildomat/jobs/p5p.sh | 3 --- .github/buildomat/jobs/test.sh | 3 --- .github/buildomat/jobs/xde.sh | 3 --- Cargo.lock | 6 ++--- Cargo.toml | 2 +- lib/opte/src/ddi/mblk.rs | 38 +++++++++++++++++++++++++--- lib/opte/src/engine/packet.rs | 10 ++++---- 14 files changed, 44 insertions(+), 42 deletions(-) diff --git a/.cargo/config.toml b/.cargo/config.toml index 7a9ef735..f37bce65 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -5,6 +5,3 @@ kbench = "bench --package opte-bench --bench xde --" [env] CARGO_WORKSPACE_DIR = { value = "", relative = true } - -[net] -git-fetch-with-cli = true diff --git a/.github/buildomat/jobs/bench.sh b/.github/buildomat/jobs/bench.sh index 5456d9cd..458c8bd0 100644 --- a/.github/buildomat/jobs/bench.sh +++ b/.github/buildomat/jobs/bench.sh @@ -7,9 +7,6 @@ #: output_rules = [ #: "=/work/bench-results.tgz", #: ] -#: access_repos = [ -#: "oxidecomputer/ingot", -#: ] #: #: [[publish]] #: series = "benchmark" diff --git a/.github/buildomat/jobs/opte-api.sh b/.github/buildomat/jobs/opte-api.sh index 9628a2cc..a4f10c00 100755 --- a/.github/buildomat/jobs/opte-api.sh +++ b/.github/buildomat/jobs/opte-api.sh @@ -5,9 +5,6 @@ #: target = "helios-2.0" #: rust_toolchain = "nightly-2024-10-12" #: output_rules = [] -#: access_repos = [ -#: "oxidecomputer/ingot", -#: ] #: set -o errexit diff --git a/.github/buildomat/jobs/opte-ioctl.sh b/.github/buildomat/jobs/opte-ioctl.sh index 2e6315a3..edb4ac74 100755 --- a/.github/buildomat/jobs/opte-ioctl.sh +++ b/.github/buildomat/jobs/opte-ioctl.sh @@ -5,9 +5,6 @@ #: target = "helios-2.0" #: rust_toolchain = "nightly-2024-10-12" #: output_rules = [] -#: access_repos = [ -#: "oxidecomputer/ingot", -#: ] #: set -o errexit diff --git a/.github/buildomat/jobs/opte.sh b/.github/buildomat/jobs/opte.sh index 635802d2..742cfdba 100755 --- a/.github/buildomat/jobs/opte.sh +++ b/.github/buildomat/jobs/opte.sh @@ -5,9 +5,6 @@ #: target = "helios-2.0" #: rust_toolchain = "nightly-2024-10-12" #: output_rules = [] -#: access_repos = [ -#: "oxidecomputer/ingot", -#: ] #: set -o errexit diff --git a/.github/buildomat/jobs/opteadm.sh b/.github/buildomat/jobs/opteadm.sh index 457a9771..1a299642 100755 --- a/.github/buildomat/jobs/opteadm.sh +++ b/.github/buildomat/jobs/opteadm.sh @@ -10,9 +10,6 @@ #: "=/work/release/opteadm", #: "=/work/release/opteadm.release.sha256", #: ] -#: access_repos = [ -#: "oxidecomputer/ingot", -#: ] #: #: [[publish]] #: series = "release" diff --git a/.github/buildomat/jobs/oxide-vpc.sh b/.github/buildomat/jobs/oxide-vpc.sh index e919bd93..edfbf0f9 100755 --- a/.github/buildomat/jobs/oxide-vpc.sh +++ b/.github/buildomat/jobs/oxide-vpc.sh @@ -5,9 +5,6 @@ #: target = "helios-2.0" #: rust_toolchain = "nightly-2024-10-12" #: output_rules = [] -#: access_repos = [ -#: "oxidecomputer/ingot", -#: ] #: set -o errexit diff --git a/.github/buildomat/jobs/p5p.sh b/.github/buildomat/jobs/p5p.sh index af2ec788..524f9495 100755 --- a/.github/buildomat/jobs/p5p.sh +++ b/.github/buildomat/jobs/p5p.sh @@ -8,9 +8,6 @@ #: "=/out/opte.p5p", #: "=/out/opte.p5p.sha256", #: ] -#: access_repos = [ -#: "oxidecomputer/ingot", -#: ] #: #: [[publish]] #: series = "repo" diff --git a/.github/buildomat/jobs/test.sh b/.github/buildomat/jobs/test.sh index f678ca2f..00262f91 100755 --- a/.github/buildomat/jobs/test.sh +++ b/.github/buildomat/jobs/test.sh @@ -7,9 +7,6 @@ #: output_rules = [ #: "/work/*.log", #: ] -#: access_repos = [ -#: "oxidecomputer/ingot", -#: ] #: #: [dependencies.xde] #: job = "opte-xde" diff --git a/.github/buildomat/jobs/xde.sh b/.github/buildomat/jobs/xde.sh index 4f12ae5f..973a3048 100755 --- a/.github/buildomat/jobs/xde.sh +++ b/.github/buildomat/jobs/xde.sh @@ -16,9 +16,6 @@ #: "=/work/test/loopback", #: "=/work/xde.conf", #: ] -#: access_repos = [ -#: "oxidecomputer/ingot", -#: ] #: #: [[publish]] #: series = "module" diff --git a/Cargo.lock b/Cargo.lock index 63be4510..5385c356 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -895,7 +895,7 @@ dependencies = [ [[package]] name = "ingot" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=1cd34849387da4b0d63e0a3990889d8c04df537b#1cd34849387da4b0d63e0a3990889d8c04df537b" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=a45c21cec49020316c3a04651ce80841bea224d7#a45c21cec49020316c3a04651ce80841bea224d7" dependencies = [ "bitflags 2.6.0", "ingot-macros", @@ -908,7 +908,7 @@ dependencies = [ [[package]] name = "ingot-macros" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=1cd34849387da4b0d63e0a3990889d8c04df537b#1cd34849387da4b0d63e0a3990889d8c04df537b" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=a45c21cec49020316c3a04651ce80841bea224d7#a45c21cec49020316c3a04651ce80841bea224d7" dependencies = [ "darling", "itertools 0.13.0", @@ -921,7 +921,7 @@ dependencies = [ [[package]] name = "ingot-types" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=1cd34849387da4b0d63e0a3990889d8c04df537b#1cd34849387da4b0d63e0a3990889d8c04df537b" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=a45c21cec49020316c3a04651ce80841bea224d7#a45c21cec49020316c3a04651ce80841bea224d7" dependencies = [ "ingot-macros", "macaddr", diff --git a/Cargo.toml b/Cargo.toml index 8f8d55cb..45891729 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,7 +50,7 @@ ctor = "0.2" darling = "0.20" dyn-clone = "1.0" heapless = "0.8" -ingot = { git = "https://github.com/oxidecomputer/ingot.git", rev = "1cd34849387da4b0d63e0a3990889d8c04df537b"} +ingot = { git = "https://github.com/oxidecomputer/ingot.git", rev = "a45c21cec49020316c3a04651ce80841bea224d7"} ipnetwork = { version = "0.20", default-features = false } itertools = { version = "0.13", default-features = false } libc = "0.2" diff --git a/lib/opte/src/ddi/mblk.rs b/lib/opte/src/ddi/mblk.rs index 8cec7257..fa3da3b0 100644 --- a/lib/opte/src/ddi/mblk.rs +++ b/lib/opte/src/ddi/mblk.rs @@ -360,10 +360,9 @@ impl MsgBlk { self.iter().map(|el| el.len()).sum() } - /// Return the number of initialised bytes in this `MsgBlk` in - /// the head segment. + /// Return the number of segments in this `MsgBlk`. pub fn seg_len(&self) -> usize { - self.iter().count() + self.iter().len() } /// Allocate a new [`MsgBlk`] containing a data buffer of size @@ -723,6 +722,17 @@ impl MsgBlkIterMut<'_> { } } +/// Counts the number of segments in an `mblk_t` from `head`, linked +/// via `b_cont`. +unsafe fn count_mblk_chain(mut head: Option>) -> usize { + let mut count = 0; + while let Some(valid_head) = head { + count += 1; + head = NonNull::new((*valid_head.as_ptr()).b_cont); + } + count +} + impl<'a> Iterator for MsgBlkIter<'a> { type Item = &'a MsgBlkNode; @@ -735,14 +745,25 @@ impl<'a> Iterator for MsgBlkIter<'a> { None } } + + fn size_hint(&self) -> (usize, Option) { + let len = unsafe { count_mblk_chain(self.curr) }; + (len, Some(len)) + } } +impl<'a> ExactSizeIterator for MsgBlkIter<'a> {} + impl<'a> Read for MsgBlkIter<'a> { type Chunk = &'a [u8]; fn next_chunk(&mut self) -> ingot::types::ParseResult { self.next().ok_or(IngotParseErr::TooSmall).map(|v| v.as_ref()) } + + fn chunks_len(&self) -> usize { + ExactSizeIterator::len(self) + } } impl<'a> Iterator for MsgBlkIterMut<'a> { @@ -757,14 +778,25 @@ impl<'a> Iterator for MsgBlkIterMut<'a> { None } } + + fn size_hint(&self) -> (usize, Option) { + let len = unsafe { count_mblk_chain(self.curr) }; + (len, Some(len)) + } } +impl<'a> ExactSizeIterator for MsgBlkIterMut<'a> {} + impl<'a> Read for MsgBlkIterMut<'a> { type Chunk = &'a mut [u8]; fn next_chunk(&mut self) -> ingot::types::ParseResult { self.next().ok_or(IngotParseErr::TooSmall).map(|v| v.as_mut()) } + + fn chunks_len(&self) -> usize { + ExactSizeIterator::len(self) + } } impl BufferState for MsgBlkIterMut<'_> { diff --git a/lib/opte/src/engine/packet.rs b/lib/opte/src/engine/packet.rs index 9d685303..6d936b96 100644 --- a/lib/opte/src/engine/packet.rs +++ b/lib/opte/src/engine/packet.rs @@ -805,7 +805,7 @@ where let base_ptr = pkt.base_ptr(); let meta = net.parse_inbound(pkt)?; - meta.stack.validate(len)?; + meta.headers.validate(len)?; Ok(Packet { state: LiteParsed { meta, base_ptr, len } }) } @@ -819,7 +819,7 @@ where let base_ptr = pkt.base_ptr(); let meta = net.parse_outbound(pkt)?; - meta.stack.validate(len)?; + meta.headers.validate(len)?; Ok(Packet { state: LiteParsed { meta, base_ptr, len } }) } @@ -827,7 +827,7 @@ where #[inline] pub fn to_full_meta(self) -> Packet> { let Packet { state: LiteParsed { len, base_ptr, meta } } = self; - let IngotParsed { stack: headers, data, last_chunk } = meta; + let IngotParsed { headers, data, last_chunk } = meta; // TODO: we can probably not do this in some cases, but we // don't have a way for `HeaderAction`s to signal that they @@ -869,12 +869,12 @@ where #[inline] pub fn meta(&self) -> &M { - &self.state.meta.stack + &self.state.meta.headers } #[inline] pub fn meta_mut(&mut self) -> &mut M { - &mut self.state.meta.stack + &mut self.state.meta.headers } #[inline] From 72a116a59b57b2e39efbe1bd78fde6ea617a40a9 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Fri, 8 Nov 2024 11:31:20 +0000 Subject: [PATCH 105/115] Review feedback: pullup packet bodies beyond header MsgBlks Necessary to safely handle cases where, e.g., viona has pulled up part of the packet for headers, but anything after this cutoff is guest memory (thus, unsafe to construct a `&[u8]` or `&mut [u8]` over). This also ensures that any time we count the bytes in a MsgBlk b_cont chain, we do so exclusively using rptr and wptr (rather than constructing a slice). One piece left TODO is making sure that body transforms on such packets are properly handled. --- lib/opte-test-utils/src/icmp.rs | 18 ++ lib/opte/src/ddi/mblk.rs | 81 +++++- lib/opte/src/engine/icmp/v4.rs | 17 +- lib/opte/src/engine/icmp/v6.rs | 14 +- lib/opte/src/engine/layer.rs | 8 +- lib/opte/src/engine/mod.rs | 3 +- lib/opte/src/engine/packet.rs | 347 ++++++++++++++--------- lib/opte/src/engine/port.rs | 6 +- lib/opte/src/engine/predicate.rs | 16 +- lib/opte/src/engine/rule.rs | 5 +- lib/oxide-vpc/src/engine/mod.rs | 12 +- lib/oxide-vpc/tests/integration_tests.rs | 40 +++ 12 files changed, 364 insertions(+), 203 deletions(-) diff --git a/lib/opte-test-utils/src/icmp.rs b/lib/opte-test-utils/src/icmp.rs index bb4d26aa..ad839efe 100644 --- a/lib/opte-test-utils/src/icmp.rs +++ b/lib/opte-test-utils/src/icmp.rs @@ -160,6 +160,15 @@ pub fn gen_icmp_echo( segments.push(MsgBlk::new_pkt(ip)); segments.push(MsgBlk::new_pkt(&icmp_bytes)); } + 4 => { + // Used to test pullup behaviour around longer mblks + // which still have pkt bodies in guest memory. + assert!(icmp_bytes.len() > 8); + segments.push(MsgBlk::new_ethernet_pkt(eth)); + segments.push(MsgBlk::new_pkt(ip)); + segments.push(MsgBlk::new_pkt(&icmp_bytes[..8])); + segments.push(MsgBlk::new_pkt(&icmp_bytes[8..])); + } _ => { panic!("only 1 2 or 3 segments allowed") } @@ -265,6 +274,15 @@ pub fn gen_icmpv6_echo( segments.push(MsgBlk::new_pkt(ip)); segments.push(MsgBlk::new_pkt(&body_bytes)); } + 4 => { + // Used to test pullup behaviour around longer mblks + // which still have pkt bodies in guest memory. + assert!(body_bytes.len() > 8); + segments.push(MsgBlk::new_ethernet_pkt(eth)); + segments.push(MsgBlk::new_pkt(ip)); + segments.push(MsgBlk::new_pkt(&body_bytes[..8])); + segments.push(MsgBlk::new_pkt(&body_bytes[8..])); + } _ => { panic!("only 1 2 or 3 segments allowed") } diff --git a/lib/opte/src/ddi/mblk.rs b/lib/opte/src/ddi/mblk.rs index fa3da3b0..02b4d2b2 100644 --- a/lib/opte/src/ddi/mblk.rs +++ b/lib/opte/src/ddi/mblk.rs @@ -5,6 +5,7 @@ // Copyright 2024 Oxide Computer Company use crate::engine::packet::BufferState; +use crate::engine::packet::Pullup; use crate::engine::packet::SegAdjustError; use crate::engine::packet::WrapError; use crate::engine::packet::WriteError; @@ -357,7 +358,7 @@ impl MsgBlk { /// Return the number of initialised bytes in this `MsgBlk` over /// all linked segments. pub fn byte_len(&self) -> usize { - self.iter().map(|el| el.len()).sum() + unsafe { count_mblk_bytes(Some(self.0)) } } /// Return the number of segments in this `MsgBlk`. @@ -722,6 +723,55 @@ impl MsgBlkIterMut<'_> { } } +impl Pullup for MsgBlkIterMut<'_> { + fn pullup(&self, prepend: Option<&[u8]>) -> MsgBlk { + let prepend = prepend.unwrap_or_default(); + let bytes_in_self = BufferState::len(self); + let needed_alloc = prepend.len() + bytes_in_self; + let mut new_seg = MsgBlk::new(needed_alloc); + + new_seg + .write_bytes_back(prepend) + .expect("allocated enough bytes for prepend and self"); + + if bytes_in_self != 0 { + // SAFETY: We need to make use of ptr::copy for a pullup + // because we cannot guarantee a dblk refcnt of 1 -- thus + // using Deref<[u8]> for these segments is not safe. + unsafe { + new_seg + .write_back(bytes_in_self, |mut buf| { + let mut curr = self.curr; + while let Some(valid_curr) = curr { + let valid_curr = valid_curr.as_ref(); + let src = valid_curr.b_rptr; + let seg_len = usize::try_from( + valid_curr.b_wptr.offset_from(src), + ) + .expect("invalid mblk -- slice end before start"); + + // SAFETY: MaybeUninit has identical layout to u8, + // &[T] can be cast down to &T (discarding len). + let dst: *mut u8 = + core::mem::transmute(buf.as_mut_ptr()); + + dst.copy_from_nonoverlapping( + valid_curr.b_rptr, + seg_len, + ); + + curr = NonNull::new(valid_curr.b_cont); + buf = buf.split_at_mut(seg_len).1; + } + }) + .expect("allocated enough bytes for prepend and self"); + } + } + + new_seg + } +} + /// Counts the number of segments in an `mblk_t` from `head`, linked /// via `b_cont`. unsafe fn count_mblk_chain(mut head: Option>) -> usize { @@ -733,6 +783,21 @@ unsafe fn count_mblk_chain(mut head: Option>) -> usize { count } +/// Counts the number of bytes in an `mblk_t` from `head`, linked +/// via `b_cont`. +/// +/// This is used to avoid contructing a &[] over slices which may/may not +/// have a higher refcnt. +unsafe fn count_mblk_bytes(mut head: Option>) -> usize { + let mut count = 0; + while let Some(valid_head) = head { + let headref = valid_head.as_ref(); + count += headref.b_wptr.offset_from(headref.b_rptr).max(0) as usize; + head = NonNull::new((*valid_head.as_ptr()).b_cont); + } + count +} + impl<'a> Iterator for MsgBlkIter<'a> { type Item = &'a MsgBlkNode; @@ -752,7 +817,7 @@ impl<'a> Iterator for MsgBlkIter<'a> { } } -impl<'a> ExactSizeIterator for MsgBlkIter<'a> {} +impl ExactSizeIterator for MsgBlkIter<'_> {} impl<'a> Read for MsgBlkIter<'a> { type Chunk = &'a [u8]; @@ -785,7 +850,7 @@ impl<'a> Iterator for MsgBlkIterMut<'a> { } } -impl<'a> ExactSizeIterator for MsgBlkIterMut<'a> {} +impl ExactSizeIterator for MsgBlkIterMut<'_> {} impl<'a> Read for MsgBlkIterMut<'a> { type Chunk = &'a mut [u8]; @@ -802,15 +867,7 @@ impl<'a> Read for MsgBlkIterMut<'a> { impl BufferState for MsgBlkIterMut<'_> { #[inline] fn len(&self) -> usize { - let own_blk_len = self - .curr - .map(|v| unsafe { - let v = v.as_ref(); - v.b_wptr.offset_from(v.b_rptr) as usize - }) - .unwrap_or_default(); - - own_blk_len + self.next_iter().map(|v| v.len()).sum::() + unsafe { count_mblk_bytes(self.curr) } } #[inline] diff --git a/lib/opte/src/engine/icmp/v4.rs b/lib/opte/src/engine/icmp/v4.rs index 1820c804..98bc812e 100644 --- a/lib/opte/src/engine/icmp/v4.rs +++ b/lib/opte/src/engine/icmp/v4.rs @@ -87,13 +87,8 @@ impl HairpinAction for IcmpEchoReply { let mut csum = match icmp.checksum() { 0 => { let mut csum = OpteCsum::new(); - - for el in meta.body_segs().iter() { - csum.add_bytes(el); - } - + csum.add_bytes(meta.body()); csum.add_bytes(icmp.rest_of_hdr_ref()); - csum } valid => { @@ -109,7 +104,7 @@ impl HairpinAction for IcmpEchoReply { csum.add_bytes(&[ty, code]); // Build the reply in place, and send it out. - let body_len: usize = meta.body_segs().iter().map(|v| v.len()).sum(); + let body_len: usize = meta.body().len(); let icmp = IcmpV4 { ty, @@ -138,15 +133,9 @@ impl HairpinAction for IcmpEchoReply { let mut pkt_out = MsgBlk::new_ethernet(total_len); pkt_out - .emit_back((ð, &ip4, &icmp)) + .emit_back((ð, &ip4, &icmp, meta.body())) .expect("Allocated space for pkt headers and body"); - for el in meta.body_segs() { - pkt_out - .write_bytes_back(el) - .expect("allocated enough bytes for all body copy"); - } - Ok(AllowOrDeny::Allow(pkt_out)) } } diff --git a/lib/opte/src/engine/icmp/v6.rs b/lib/opte/src/engine/icmp/v6.rs index 5e6383e7..c1234d6c 100644 --- a/lib/opte/src/engine/icmp/v6.rs +++ b/lib/opte/src/engine/icmp/v6.rs @@ -147,9 +147,7 @@ impl HairpinAction for Icmpv6EchoReply { 0 => { let mut csum = OpteCsum::new(); - for el in meta.body_segs().iter() { - csum.add_bytes(el); - } + csum.add_bytes(meta.body()); csum.add_bytes(icmp6.rest_of_hdr_ref()); @@ -168,7 +166,7 @@ impl HairpinAction for Icmpv6EchoReply { csum.add_bytes(&[ty, code]); // Build the reply in place, and send it out. - let body_len: usize = meta.body_segs().iter().map(|v| v.len()).sum(); + let body_len: usize = meta.body().len(); let icmp = IcmpV6 { ty, @@ -196,15 +194,9 @@ impl HairpinAction for Icmpv6EchoReply { let total_len = body_len + (ð, &ip6, &icmp).packet_length(); let mut pkt_out = MsgBlk::new_ethernet(total_len); pkt_out - .emit_back((ð, &ip6, &icmp)) + .emit_back((ð, &ip6, &icmp, meta.body())) .expect("Allocated space for pkt headers and body"); - for el in meta.body_segs() { - pkt_out - .write_bytes_back(el) - .expect("allocated enough bytes for all body copy"); - } - Ok(AllowOrDeny::Allow(pkt_out)) } } diff --git a/lib/opte/src/engine/layer.rs b/lib/opte/src/engine/layer.rs index 69fda942..45b67557 100644 --- a/lib/opte/src/engine/layer.rs +++ b/lib/opte/src/engine/layer.rs @@ -862,7 +862,7 @@ impl Layer { pkt.flow(), ); - if let Some(body_segs) = pkt.body_segs() { + if let Some(body_segs) = pkt.body() { if let Some(bt) = desc.gen_bt(Direction::In, pkt.meta(), body_segs)? { @@ -1054,7 +1054,7 @@ impl Layer { pkt.flow(), ); - if let Some(body_segs) = pkt.body_segs() { + if let Some(body_segs) = pkt.body() { if let Some(bt) = desc.gen_bt(In, pkt.meta(), body_segs)? { pkt.body_transform(In, &*bt)?; xforms.body.push(bt); @@ -1149,7 +1149,7 @@ impl Layer { pkt.flow(), ); - if let Some(body_segs) = pkt.body_segs() { + if let Some(body_segs) = pkt.body() { if let Some(bt) = desc.gen_bt(Direction::Out, pkt.meta(), body_segs)? { @@ -1343,7 +1343,7 @@ impl Layer { pkt.flow(), ); - if let Some(body_segs) = pkt.body_segs() { + if let Some(body_segs) = pkt.body() { if let Some(bt) = desc.gen_bt(Out, pkt.meta(), body_segs)? { pkt.body_transform(Out, &*bt)?; xforms.body.push(bt); diff --git a/lib/opte/src/engine/mod.rs b/lib/opte/src/engine/mod.rs index 13b405f2..fc53c0e0 100644 --- a/lib/opte/src/engine/mod.rs +++ b/lib/opte/src/engine/mod.rs @@ -47,6 +47,7 @@ pub use opte_api::Direction; use packet::FullParsed; use packet::OpteMeta; use packet::Packet; +use packet::Pullup; use parse::ValidNoEncap; use rule::CompiledTransform; use zerocopy::ByteSlice; @@ -217,7 +218,7 @@ pub trait NetworkImpl { /// myriad of reasons. The error returned is for informational /// purposes, rather than having any obvious direct action to take /// in response. - fn handle_pkt<'a, T: Read + 'a>( + fn handle_pkt<'a, T: Read + Pullup + 'a>( &self, dir: Direction, pkt: &mut Packet>, diff --git a/lib/opte/src/engine/packet.rs b/lib/opte/src/engine/packet.rs index 6d936b96..05d9f76d 100644 --- a/lib/opte/src/engine/packet.rs +++ b/lib/opte/src/engine/packet.rs @@ -57,10 +57,13 @@ use core::ffi::CStr; use core::fmt; use core::fmt::Display; use core::hash::Hash; +use core::ptr::NonNull; use core::result; use core::sync::atomic::AtomicPtr; +use core::sync::atomic::Ordering; use crc32fast::Hasher; use dyn_clone::DynClone; +use illumos_sys_hdrs::mblk_t; use illumos_sys_hdrs::uintptr_t; use ingot::geneve::GeneveRef; use ingot::icmp::IcmpV4Mut; @@ -246,7 +249,7 @@ pub trait BodyTransform: fmt::Display + DynClone + Send + Sync { fn run( &self, dir: Direction, - body_segs: &mut [&mut [u8]], + body: &mut [u8], ) -> Result<(), BodyTransformError>; } @@ -423,120 +426,172 @@ pub struct OpteMeta { pub inner_ulp: Option>, } -/// Helper for reusing access to all packet body segments. +/// Helper for conditionally pulling up a packet when required, +/// to provide safe read/write access to the packet body. /// -/// This is necessary because `MsgBlk`s in particular do not -/// allow us to walk backward within a packet -- if we need them, -/// then we need to save them out for all future uses. -/// The other part is that the majority of packets (ULP hits) -/// do not want to interact with body segments at all. -struct PktBodyWalker { - base: Cell, T)>>, - slice: AtomicPtr>, +/// This is necessary because we must account for a condition +/// where MsgBlks containing packet headers are fully owned by +/// the host OS, but the packet body points to guest memory. +/// In this case, it is unsafe to take either a `&[]` or `&mut[]` +/// against the underlying packet contents, as the guest may +/// modify them. [`MsgBlk::wrap_mblk`] details this condition. +/// +/// The current disposition is that if we have any non-header +/// segments, then we're pulling the remainder of the packet up. +/// In theory we could check ref counts to determine whether we +/// can in fact serve a `&[&[u8]]`, but no fastpath packets need +/// this capability so it's wasted effort on that front. +struct PktBodyWalker { + last_chunk: Option, + remainder: T, + // The use of atomics/interior mutability here is primarily to + // allow us to work under &self for `body()`, dynamically filling + // out the pulled up mblk as needed. + state: Cell, + // TODO: It would be nice to separate this from MsgBlk. + // `T::Owned` in future? + msg_blk: AtomicPtr, } -impl Drop for PktBodyWalker { - fn drop(&mut self) { - let ptr = self.slice.load(core::sync::atomic::Ordering::Relaxed); - if !ptr.is_null() { - // Reacquire and drop. - unsafe { - let _ = Box::from_raw(ptr); - } +#[derive(Copy, Clone, Debug)] +enum BodySegState { + NoPullup, + NeedsPullup, + PulledUp, +} + +impl PktBodyWalker { + fn new(last_chunk: Option, remainder: T) -> Self { + let state = if remainder.is_empty() { + BodySegState::NoPullup + } else { + BodySegState::NeedsPullup + } + .into(); + + Self { + last_chunk, + remainder, + state, + msg_blk: core::ptr::null_mut::().into(), } } } -impl<'a, T: Read + 'a> PktBodyWalker -where - ::Chunk: ByteSliceMut + IntoBufPointer<'a>, -{ - fn reify_body_segs(&self) { - if let Some((first, mut rest)) = self.base.take() { - // SAFETY: ByteSlice requires as part of its API - // that any implementors are stable, so we will always - // get the same view via deref. We are then consuming them - // into references which live exactly as long as their initial - // form. - // - // IntoBufPointer guarantees that what we are working with are, - // in actual fact, slices (or, at least, that any necessary behaviour - // on owned conversion into a slice [Drop, etc.] occurs). - // - // The next question is one of ownership. - // We know that these chunks are at least &[u8]s, they - // *will* be exclusive if ByteSliceMut is met (because they are - // sourced from an exclusive borrow on something which owns a [u8]). - // This allows us to cast to &mut later, but not here! - let mut to_hold = Vec::with_capacity(1); - if let Some(chunk) = first { - let len = chunk.len(); - let ptr = unsafe { chunk.into_buf_ptr() }; - to_hold.push((ptr, len)); - } +impl PktBodyWalker { + #[inline(always)] + fn prepare(&self) { + let BodySegState::NeedsPullup = self.state.clone().into_inner() else { + return; + }; - while let Ok(chunk) = rest.next_chunk() { - let len = chunk.len(); - let ptr = unsafe { chunk.into_buf_ptr() }; - to_hold.push((ptr, len)); - } + let prepend_slice = self.last_chunk.as_ref().map(|v| &v[..]); + let mblk = self.remainder.pullup(prepend_slice); - let to_store = Box::into_raw(Box::new(to_hold)); + let mblk_ptr = mblk.unwrap_mblk(); - self.slice - .compare_exchange( - core::ptr::null_mut(), - to_store, - core::sync::atomic::Ordering::Relaxed, - core::sync::atomic::Ordering::Relaxed, - ) - .expect("unexpected concurrent access to body_seg memoiser"); + self.msg_blk + .compare_exchange( + core::ptr::null_mut(), + mblk_ptr.as_ptr(), + Ordering::Relaxed, + Ordering::Relaxed, + ) + .expect("invariant violated: tried to double-prepare mblk"); + self.state.set(BodySegState::PulledUp); + } - // While today the only T we're operating on are IterMuts bound - // to the lifetime of an actual packet (via &mut), there's a chance - // in future that dropping the iterator could invalidate the byte - // slices we're holding onto. Hang onto `rest` to prevent this. - self.base.set(Some((None, rest))); + fn body(&self) -> &[u8] { + self.prepare(); + + match self.state.clone().into_inner() { + BodySegState::NoPullup => { + self.last_chunk.as_ref().map(|v| &v[..]).unwrap_or_default() + } + BodySegState::NeedsPullup => unreachable!(), + BodySegState::PulledUp => { + let ptr = NonNull::new(self.msg_blk.load(Ordering::Relaxed)) + .expect("invariant violated: PulledUp with nullptr"); + + // SAFETY: MsgBlk(NonNull) has identical layout to + // NonNull, and the inner mblk lives as long as self. + // Since ownership is unaffected, the &[u8] derived from msg_blk + // is valid for the same lifetime as &self. + unsafe { + let mblk_ref = + core::mem::transmute::<&NonNull, &MsgBlk>(&ptr); + + core::mem::transmute::<&[u8], &[u8]>(&mblk_ref[..]) + } + } } } - fn body_segs(&self) -> &[&[u8]] { - let mut slice_ptr = - self.slice.load(core::sync::atomic::Ordering::Relaxed); - if slice_ptr.is_null() { - self.reify_body_segs(); - slice_ptr = self.slice.load(core::sync::atomic::Ordering::Relaxed); - } - debug_assert!(!slice_ptr.is_null()); + fn body_mut(&mut self) -> &mut [u8] + where + T::Chunk: ByteSliceMut, + { + self.prepare(); - unsafe { - let a = (&*(*slice_ptr)) as *const _; - &*(a as *const [&[u8]]) + match self.state.clone().into_inner() { + BodySegState::NoPullup => { + self.last_chunk.as_mut().map(|v| &mut v[..]).unwrap_or_default() + } + BodySegState::NeedsPullup => unreachable!(), + BodySegState::PulledUp => { + let mut ptr = + NonNull::new(self.msg_blk.load(Ordering::Relaxed)) + .expect("invariant violated: PulledUp with nullptr"); + + // SAFETY: MsgBlk(NonNull) has identical layout to + // NonNull, and the inner mblk lives as long as self. + // Since ownership is unaffected, the &mut [u8] derived from msg_blk + // is valid for the same lifetime as &mut self. + unsafe { + let mblk_ref = core::mem::transmute::< + &mut NonNull, + &mut MsgBlk, + >(&mut ptr); + + core::mem::transmute::<&mut [u8], &mut [u8]>( + &mut mblk_ref[..], + ) + } + } } } - fn body_segs_mut(&mut self) -> &mut [&mut [u8]] { - let mut slice_ptr = - self.slice.load(core::sync::atomic::Ordering::Relaxed); - if slice_ptr.is_null() { - self.reify_body_segs(); - slice_ptr = self.slice.load(core::sync::atomic::Ordering::Relaxed); + fn extract_mblk(&mut self) -> Option { + let state = self.state.get_mut(); + + if !matches!(state, BodySegState::PulledUp) { + return None; } - debug_assert!(!slice_ptr.is_null()); - // SAFETY: We have an exclusive reference, and the ByteSliceMut - // bound guarantees that this packet view was construced from - // an exclusive reference. In turn, we know that we are the only - // possible referent. + // If we were pulled up, a later prepare will need to pullup again. + *state = BodySegState::NeedsPullup; + + let ptr = self.msg_blk.load(Ordering::Relaxed); + + // SAFETY: this mblk was created by using the MsgBlk::new api. + // PulledUp asserts its value is non-null. unsafe { - let a = (&mut *(*slice_ptr)) as *mut _; - &mut *(a as *mut [&mut [u8]]) + Some( + MsgBlk::wrap_mblk(ptr) + .expect("invariant violated: PulledUp with nullptr"), + ) } } } +impl Drop for PktBodyWalker { + fn drop(&mut self) { + self.extract_mblk(); + } +} + /// Packet state for the standard ULP path, or a full table walk over the slowpath. -pub struct PacketData { +pub struct PacketData { pub(crate) headers: OpteMeta, initial_lens: Option>, body: PktBodyWalker, @@ -556,13 +611,13 @@ impl From> for OpteMeta { } } -impl core::fmt::Debug for PacketData { +impl core::fmt::Debug for PacketData { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { f.write_str("PacketHeaders(..)") } } -impl<'a, T: Read + 'a> PacketData { +impl PacketData { pub fn initial_lens(&self) -> Option<&InitialLayerLens> { self.initial_lens.as_deref() } @@ -649,43 +704,46 @@ impl<'a, T: Read + 'a> PacketData { matches!(self.inner_ulp(), Some(Ulp::Tcp(_))) } - pub fn body_segs(&self) -> &[&[u8]] + pub fn prep_body(&mut self) where - T::Chunk: ByteSliceMut + IntoBufPointer<'a>, + T::Chunk: ByteSliceMut, + T: Pullup, { - self.body.body_segs() + self.body.prepare() + } + + pub fn body(&self) -> &[u8] + where + T::Chunk: ByteSliceMut, + T: Pullup, + { + self.body.body() } pub fn copy_remaining(&self) -> Vec where - T::Chunk: ByteSliceMut + IntoBufPointer<'a>, + T::Chunk: ByteSliceMut, + T: Pullup, { - let base = self.body_segs(); - let len = base.iter().map(|v| v.len()).sum(); - let mut out = Vec::with_capacity(len); - for el in base { - out.extend_from_slice(el); - } - out + let base = self.body(); + base.to_vec() } pub fn append_remaining(&self, buf: &mut Vec) where - T::Chunk: ByteSliceMut + IntoBufPointer<'a>, + T::Chunk: ByteSliceMut, + T: Pullup, { - let base = self.body_segs(); - let len = base.iter().map(|v| v.len()).sum(); - buf.reserve_exact(len); - for el in base { - buf.extend_from_slice(el); - } + let base = self.body(); + buf.extend_from_slice(base); } - pub fn body_segs_mut(&mut self) -> &mut [&mut [u8]] + pub fn body_mut(&mut self) -> &mut [u8] where - T::Chunk: ByteSliceMut + IntoBufPointer<'a>, + T::Chunk: ByteSliceMut, + T: Pullup, { - self.body.body_segs_mut() + self.body.body_mut() } /// Return whether the IP layer has a checksum both structurally @@ -713,7 +771,7 @@ impl<'a, T: Read + 'a> PacketData { } } -impl From<&PacketData> for InnerFlowId { +impl From<&PacketData> for InnerFlowId { #[inline] fn from(meta: &PacketData) -> Self { let (proto, addrs) = match meta.inner_l3() { @@ -791,7 +849,7 @@ pub type LiteInPkt = pub type LiteOutPkt = Packet::OutMeta<::Chunk>>>; -impl<'a, T: Read + BufferState + 'a, M: LightweightMeta> +impl<'a, T: Read + BufferState + Pullup + 'a, M: LightweightMeta> Packet> where T::Chunk: ByteSliceMut + IntoBufPointer<'a>, @@ -847,10 +905,7 @@ where } .into(), ); - let body = PktBodyWalker { - base: Some((last_chunk, data)).into(), - slice: Default::default(), - }; + let body = PktBodyWalker::new(last_chunk, data); let meta = Box::new(PacketData { headers, initial_lens, body }); Packet { @@ -893,7 +948,7 @@ where } } -impl<'a, T: Read + 'a> Packet> { +impl Packet> { pub fn meta(&self) -> &PacketData { &self.state.meta } @@ -1151,7 +1206,8 @@ impl<'a, T: Read + 'a> Packet> { xform: &dyn BodyTransform, ) -> Result<(), BodyTransformError> where - T::Chunk: ByteSliceMut + IntoBufPointer<'a>, + T::Chunk: ByteSliceMut, + T: Pullup, { // We set the flag now with the assumption that the transform // could fail after modifying part of the body. In the future @@ -1160,8 +1216,14 @@ impl<'a, T: Read + 'a> Packet> { // this does the job as nothing that needs top performance // should make use of body transformations. self.state.body_modified = true; + self.state.meta.body.prepare(); + + // TODO TODO TODO + // We need to put the pulled up body into the EmitSpec. + // Not using it today but we NEED to get it right. + // TODO TODO TODO - match self.body_segs_mut() { + match self.body_mut() { Some(body_segs) => xform.run(dir, body_segs), None => { self.state.body_modified = false; @@ -1171,11 +1233,12 @@ impl<'a, T: Read + 'a> Packet> { } #[inline] - pub fn body_segs(&self) -> Option<&[&[u8]]> + pub fn body(&self) -> Option<&[u8]> where - T::Chunk: ByteSliceMut + IntoBufPointer<'a>, + T::Chunk: ByteSliceMut, + T: Pullup, { - let out = self.state.meta.body_segs(); + let out = self.state.meta.body(); if out.is_empty() { None } else { @@ -1184,11 +1247,12 @@ impl<'a, T: Read + 'a> Packet> { } #[inline] - pub fn body_segs_mut(&mut self) -> Option<&mut [&mut [u8]]> + pub fn body_mut(&mut self) -> Option<&mut [u8]> where - T::Chunk: ByteSliceMut + IntoBufPointer<'a>, + T::Chunk: ByteSliceMut, + T: Pullup, { - let out = self.state.meta.body_segs_mut(); + let out = self.state.meta.body_mut(); if out.is_empty() { None } else { @@ -1208,12 +1272,11 @@ impl<'a, T: Read + 'a> Packet> { /// body_csum cannot be valid. pub fn compute_checksums(&mut self) where - T::Chunk: ByteSliceMut + IntoBufPointer<'a>, + T::Chunk: ByteSliceMut, + T: Pullup, { let mut body_csum = Checksum::new(); - for seg in self.body_segs_mut().unwrap_or_default() { - body_csum.add_bytes(seg); - } + body_csum.add_bytes(self.body().unwrap_or_default()); self.state.body_csum = Some(body_csum); if let Some(ulp) = &mut self.state.meta.headers.inner_ulp { @@ -1311,7 +1374,8 @@ impl<'a, T: Read + 'a> Packet> { /// case where checksums are **not** being offloaded to the hardware. pub fn update_checksums(&mut self) where - T::Chunk: ByteSliceMut + IntoBufPointer<'a>, + T::Chunk: ByteSliceMut, + T: Pullup, { // If we know that no transform touched a field which features in // an inner transport cksum (L4/L3 src/dst, most realistically), @@ -1413,12 +1477,15 @@ impl<'a, T: Read + 'a> Packet> { } } -impl> PacketState for LiteParsed {} -impl PacketState for FullParsed {} +impl> PacketState + for LiteParsed +{ +} +impl PacketState for FullParsed {} /// Zerocopy view onto a parsed packet, accompanied by locally /// computed state. -pub struct FullParsed { +pub struct FullParsed { /// Total length of packet, in bytes. This is equal to the sum of /// the length of the _initialized_ window in all the segments /// (`b_wptr - b_rptr`). @@ -1460,7 +1527,7 @@ pub struct FullParsed { /// Minimum-size zerocopy view onto a parsed packet, sufficient for fast /// packet transformation. -pub struct LiteParsed> { +pub struct LiteParsed> { /// Total length of packet, in bytes. This is equal to the sum of /// the length of the _initialized_ window in all the segments /// (`b_wptr - b_rptr`). @@ -1471,7 +1538,7 @@ pub struct LiteParsed> { meta: IngotParsed, } -impl> LiteParsed {} +impl> LiteParsed {} // These are needed for now to account for not wanting to redesign // ActionDescs to be generic over T (trait object safety rules, etc.), @@ -1485,6 +1552,12 @@ pub trait BufferState { fn base_ptr(&self) -> uintptr_t; } +pub trait Pullup { + /// Pulls all remaining segments of a packet into a new + /// `Self` containing a single buffer. + fn pullup(&self, prepend: Option<&[u8]>) -> MsgBlk; +} + /// A set of headers to be emitted at the head of a packet. #[derive(Clone, Debug, Default)] pub struct OpteEmit { diff --git a/lib/opte/src/engine/port.rs b/lib/opte/src/engine/port.rs index 6a9c8947..94fe776a 100644 --- a/lib/opte/src/engine/port.rs +++ b/lib/opte/src/engine/port.rs @@ -38,6 +38,7 @@ use super::packet::LiteParsed; use super::packet::MblkFullParsed; use super::packet::MblkPacketData; use super::packet::Packet; +use super::packet::Pullup; use super::packet::FLOW_ID_DEFAULT; use super::rule::Action; use super::rule::CompiledTransform; @@ -91,7 +92,6 @@ use ingot::ip::IpProtocol; use ingot::tcp::TcpRef; use ingot::types::Emit; use ingot::types::HeaderLen; -use ingot::types::IntoBufPointer; use ingot::types::Read; use ingot::udp::Udp; use opte_api::Direction; @@ -1692,13 +1692,13 @@ impl Transforms { } #[inline] - fn apply<'a, T: Read + 'a>( + fn apply<'a, T: Read + Pullup + 'a>( &self, pkt: &mut Packet>, dir: Direction, ) -> result::Result<(), ProcessError> where - T::Chunk: ByteSliceMut + IntoBufPointer<'a>, + T::Chunk: ByteSliceMut, { // TODO: It should be possible to combine header transforms // into a single operation per layer, particularly when diff --git a/lib/opte/src/engine/predicate.rs b/lib/opte/src/engine/predicate.rs index 07a73252..141d5b4c 100644 --- a/lib/opte/src/engine/predicate.rs +++ b/lib/opte/src/engine/predicate.rs @@ -601,15 +601,7 @@ impl DataPredicate { Self::Not(pred) => !pred.is_match(meta), Self::DhcpMsgType(mt) => { - // Not sure that I like that this is a complete clone/parse... - let body; - - let bytes = if meta.body_segs().len() > 1 { - body = meta.copy_remaining(); - &body - } else { - meta.body_segs()[0] - }; + let bytes = meta.body(); let pkt = match DhcpPacket::new_checked(&bytes) { Ok(v) => v, @@ -653,14 +645,14 @@ impl DataPredicate { } Self::Dhcpv6MsgType(mt) => { - let body = meta.body_segs(); - if body.is_empty() || body[0].is_empty() { + let body = meta.body(); + if body.is_empty() { super::err!( "Failed to read DHCPv6 message type from packet" ); false } else { - mt.is_match(&body[0][0].into()) + mt.is_match(&body[0].into()) } } } diff --git a/lib/opte/src/engine/rule.rs b/lib/opte/src/engine/rule.rs index 8bd9f55d..e4fa9936 100644 --- a/lib/opte/src/engine/rule.rs +++ b/lib/opte/src/engine/rule.rs @@ -33,6 +33,7 @@ use super::packet::MblkFullParsed; use super::packet::MblkPacketData; use super::packet::Packet; use super::packet::PacketData; +use super::packet::Pullup; use super::parse::ValidUlp; use super::port::meta::ActionMeta; use super::predicate::DataPredicate; @@ -174,7 +175,7 @@ pub trait ActionDesc { &self, _dir: Direction, _meta: &MblkPacketData, - _payload_segs: &[&[u8]], + _payload_seg: &[u8], ) -> Result>, GenBtError> { Ok(None) } @@ -581,7 +582,7 @@ impl HdrTransform { /// If there is an [`HeaderAction::Modify`], but no metadata is /// present for that particular header, then a /// [`HdrTransformError::MissingHeader`] is returned. - pub fn run( + pub fn run( &self, meta: &mut PacketData, ) -> Result diff --git a/lib/oxide-vpc/src/engine/mod.rs b/lib/oxide-vpc/src/engine/mod.rs index d4ec2384..d3ae3c4a 100644 --- a/lib/oxide-vpc/src/engine/mod.rs +++ b/lib/oxide-vpc/src/engine/mod.rs @@ -25,6 +25,7 @@ use opte::engine::packet::FullParsed; use opte::engine::packet::InnerFlowId; use opte::engine::packet::Packet; use opte::engine::packet::ParseError; +use opte::engine::packet::Pullup; use opte::engine::parse::ValidGeneveOverV6; use opte::engine::parse::ValidNoEncap; use opte::engine::port::UftEntry; @@ -65,19 +66,16 @@ fn is_arp_req_for_tpa(tpa: Ipv4Addr, arp: &impl ArpEthIpv4Ref) -> bool { } impl VpcNetwork { - fn handle_arp_out<'a, T: Read + 'a>( + fn handle_arp_out<'a, T: Read + Pullup + 'a>( &self, pkt: &mut Packet>, ) -> Result where T::Chunk: ByteSliceMut + IntoBufPointer<'a>, { - let body = pkt - .body_segs() - .and_then(|v| v.first()) - .ok_or(HdlPktError("outbound ARP (no body)"))?; + let body = pkt.body().ok_or(HdlPktError("outbound ARP (no body)"))?; - let (arp, ..) = ValidArpEthIpv4::parse(*body) + let (arp, ..) = ValidArpEthIpv4::parse(body) .map_err(|_| HdlPktError("outbound ARP (parse)"))?; if !arp.values_valid() { @@ -100,7 +98,7 @@ impl VpcNetwork { impl NetworkImpl for VpcNetwork { type Parser = VpcParser; - fn handle_pkt<'a, T: Read + 'a>( + fn handle_pkt<'a, T: Read + Pullup + 'a>( &self, dir: Direction, pkt: &mut Packet>, diff --git a/lib/oxide-vpc/tests/integration_tests.rs b/lib/oxide-vpc/tests/integration_tests.rs index 34eaa1bf..f796408a 100644 --- a/lib/oxide-vpc/tests/integration_tests.rs +++ b/lib/oxide-vpc/tests/integration_tests.rs @@ -440,6 +440,46 @@ fn gateway_icmp4_ping() { } } +// Verify that guest packet bodies are correctly pulled up if they run +// past the same segment(s) containing the rest of the headers. +#[test] +fn packet_body_pullup() { + let g1_cfg = g1_cfg(); + let mut g1 = oxide_net_setup("g1_port", &g1_cfg, None, None); + g1.port.start(); + set!(g1, "port_state=running"); + let ident = 7; + let seq_no = 777; + let data = c"...did Sephiroth do this?"; + + // ================================================================ + // Generate an ICMP Echo Request from G1 to Virtual GW + // ================================================================ + let mut pkt1_m = gen_icmp_echo_req( + g1_cfg.guest_mac, + g1_cfg.gateway_mac, + g1_cfg.ipv4_cfg().unwrap().private_ip.into(), + g1_cfg.ipv4_cfg().unwrap().gateway_ip.into(), + ident, + seq_no, + data.to_bytes_with_nul(), + // Instruct the packet builder to split the body 8 bytes in. + 4, + ); + + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt1); + let hp = match res { + Ok(Hairpin(hp)) => hp, + _ => panic!("expected Hairpin, got {:?}", res), + }; + + // Verify that the contents are correctly replicated. + let (_hdrs, new_body) = + hp.split_at(hp.len() - data.to_bytes_with_nul().len()); + assert_eq!(new_body, data.to_bytes_with_nul()); +} + // Try to send a TCP packet from one guest to another; but in this // case the guest has not route to the other guest, resulting in the // packet being dropped. From 69e844c0c0fc848085bf4b5f12bbe9d35ce6de5b Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Fri, 8 Nov 2024 12:55:46 +0000 Subject: [PATCH 106/115] Fixup body transform + pullup interaction. --- lib/opte/src/ddi/mblk.rs | 37 ++++++++++++++++++++++++++++++++ lib/opte/src/engine/packet.rs | 40 ++++++++++++++++++++++------------- 2 files changed, 62 insertions(+), 15 deletions(-) diff --git a/lib/opte/src/ddi/mblk.rs b/lib/opte/src/ddi/mblk.rs index 02b4d2b2..dd8d06d5 100644 --- a/lib/opte/src/ddi/mblk.rs +++ b/lib/opte/src/ddi/mblk.rs @@ -366,6 +366,43 @@ impl MsgBlk { self.iter().len() } + /// Truncates an `MsgBlk` chain, dropping any elements such that + /// it contains at most `len` bytes. + pub fn truncate_chain(&mut self, len: usize) { + let mut seen = 0; + let mut curr = Some(self.0); + let mut old_tail = ptr::null_mut(); + + while let Some(mut valid_curr) = curr.take() { + let valid_curr = unsafe { valid_curr.as_mut() }; + + let seg_len = usize::try_from(unsafe { + valid_curr.b_wptr.offset_from(valid_curr.b_rptr) + }) + .expect("operating on packet with end before start"); + + let seen_til_now = seen; + seen += seg_len; + + if seen <= len { + let to_keep = len.saturating_sub(seen_til_now); + + // SAFETY: this will only reduce the read window of this slice, + // so derived byteslices will remain in capacity. + valid_curr.b_wptr = unsafe { valid_curr.b_rptr.add(to_keep) }; + + core::mem::swap(&mut valid_curr.b_cont, &mut old_tail); + } else { + curr = NonNull::new(valid_curr.b_cont); + } + } + + // SAFETY: we have exclusive ownership of this element + // via self, and we have just disconnected it from the chain. + // This method also handles the nullptr case on our behalf. + drop(unsafe { Self::wrap_mblk(old_tail) }) + } + /// Allocate a new [`MsgBlk`] containing a data buffer of size /// `head_len + body_len`. /// diff --git a/lib/opte/src/engine/packet.rs b/lib/opte/src/engine/packet.rs index 05d9f76d..f09cd70d 100644 --- a/lib/opte/src/engine/packet.rs +++ b/lib/opte/src/engine/packet.rs @@ -975,7 +975,7 @@ impl Packet> { // - Rewind up to+including that point in original // pkt space. let l4_hash = self.l4_hash(); - let state = &self.state; + let state = &mut self.state; let init_lens = state.meta.initial_lens.as_ref().unwrap(); let headers = &state.meta.headers; let payload_len = state.len - init_lens.hdr_len(); @@ -984,6 +984,10 @@ impl Packet> { let mut push_spec = OpteEmit::default(); let mut rewind = 0; + if state.body_modified { + push_spec.replace_body = state.meta.body.extract_mblk(); + } + // structural change if: // hdr_len is different. // needs_emit is true (i.e., now on an owned repr). @@ -1218,11 +1222,6 @@ impl Packet> { self.state.body_modified = true; self.state.meta.body.prepare(); - // TODO TODO TODO - // We need to put the pulled up body into the EmitSpec. - // Not using it today but we NEED to get it right. - // TODO TODO TODO - match self.body_mut() { Some(body_segs) => xform.run(dir, body_segs), None => { @@ -1558,8 +1557,9 @@ pub trait Pullup { fn pullup(&self, prepend: Option<&[u8]>) -> MsgBlk; } -/// A set of headers to be emitted at the head of a packet. -#[derive(Clone, Debug, Default)] +/// A set of headers to be emitted at the head of a packet, and +/// possibly a replacement body as required in the slowpath. +#[derive(Debug, Default)] pub struct OpteEmit { outer_eth: Option, outer_ip: Option, @@ -1568,6 +1568,11 @@ pub struct OpteEmit { // We can (but do not often) push/pop inner meta. // Splitting via Box minimises struct size in the general case. inner: Option>, + + // In some cases, applying body transforms requires a packet pullup, + // which the body transforms will then be applied to. If there is a + // modified body, it must be taken from here. + replace_body: Option, } /// Inner headers needing completely rewritten/emitted in a packet. @@ -1583,7 +1588,7 @@ pub struct OpteInnerEmit { /// /// This will add and/or remove several layers from the underlying `MsgBlk`, /// and can be queried for routing specific info (access to new encap, l4 hash). -#[derive(Clone, Debug)] +#[derive(Debug)] pub struct EmitSpec { pub(crate) prepend: PushSpec, pub(crate) l4_hash: u32, @@ -1609,7 +1614,7 @@ impl EmitSpec { /// existing headers, and copying in new/replacement headers). #[inline] #[must_use] - pub fn apply(&self, mut pkt: MsgBlk) -> MsgBlk { + pub fn apply(self, mut pkt: MsgBlk) -> MsgBlk { // Rewind { let mut slots = heapless::Vec::<&mut MsgBlkNode, 6>::new(); @@ -1641,7 +1646,7 @@ impl EmitSpec { // much less so in the fastpath. pkt.drop_empty_segments(); - let out = match &self.prepend { + match self.prepend { PushSpec::Fastpath(push_spec) => { push_spec.encap.prepend(pkt, self.ulp_len as usize) } @@ -1656,6 +1661,13 @@ impl EmitSpec { + inner_new.ulp.packet_length(); } + if let Some(replace_body) = push_spec.replace_body { + pkt.truncate_chain( + self.ulp_len as usize - replace_body.byte_len(), + ); + pkt.append(replace_body); + } + let needed_alloc = needed_push; let mut prepend = if needed_alloc > 0 { @@ -1739,9 +1751,7 @@ impl EmitSpec { } } PushSpec::NoOp => pkt, - }; - - out + } } /// Returns the Geneve VNI when this spec pushes Geneve encapsulation. @@ -1782,7 +1792,7 @@ impl EmitSpec { } /// Specification of additional header layers to push at the head of a packet. -#[derive(Clone, Debug)] +#[derive(Debug)] pub enum PushSpec { /// Bytes to prepend to packet which have been serialised ahead of time /// and can be copied in one shot. From ac4464228a2184add64863fb4b4edfb3b751a158 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Fri, 8 Nov 2024 13:14:39 +0000 Subject: [PATCH 107/115] Fix chain truncation. --- lib/opte/src/ddi/mblk.rs | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/lib/opte/src/ddi/mblk.rs b/lib/opte/src/ddi/mblk.rs index dd8d06d5..366a3d41 100644 --- a/lib/opte/src/ddi/mblk.rs +++ b/lib/opte/src/ddi/mblk.rs @@ -384,7 +384,7 @@ impl MsgBlk { let seen_til_now = seen; seen += seg_len; - if seen <= len { + if seen >= len { let to_keep = len.saturating_sub(seen_til_now); // SAFETY: this will only reduce the read window of this slice, @@ -1138,6 +1138,26 @@ mod test { assert_eq!(segs.next().map(|v| &v[..]).unwrap(), &[0x5, 0x6]); } + #[test] + fn truncate() { + let mut p1 = MsgBlk::copy(&[0, 1, 2, 3]); + p1.append(MsgBlk::copy(&[4, 5, 6, 7])); + p1.append(MsgBlk::copy(&[8, 9, 10, 11])); + + assert_eq!(p1.seg_len(), 3); + assert_eq!(p1.byte_len(), 12); + + // Assert drop of followup segments. + p1.truncate_chain(7); + assert_eq!(p1.seg_len(), 2); + assert_eq!(p1.byte_len(), 7); + let mut iter = p1.iter(); + let el1 = iter.next().unwrap(); + let el2 = iter.next().unwrap(); + assert_eq!(&el1[..], &[0, 1, 2, 3]); + assert_eq!(&el2[..], &[4, 5, 6]); + } + // Verify uninitialized packet. #[test] fn uninitialized_packet() { From b386de8c57ec15e1d71b24b8057b84e14d9aa6e7 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Fri, 8 Nov 2024 13:36:44 +0000 Subject: [PATCH 108/115] Inline fastpath/lightweight parsing paths Seems to more reliably push us up to >=3.0Gbps, primarily be eliding the fat `memcpy`s needed to move some of the metadata structs out (>128B). --- lib/opte/src/engine/packet.rs | 4 ++-- lib/oxide-vpc/src/engine/mod.rs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/opte/src/engine/packet.rs b/lib/opte/src/engine/packet.rs index f09cd70d..3269c0da 100644 --- a/lib/opte/src/engine/packet.rs +++ b/lib/opte/src/engine/packet.rs @@ -854,7 +854,7 @@ impl<'a, T: Read + BufferState + Pullup + 'a, M: LightweightMeta> where T::Chunk: ByteSliceMut + IntoBufPointer<'a>, { - #[inline] + #[inline(always)] pub fn parse_inbound = M>>( pkt: T, net: NP, @@ -868,7 +868,7 @@ where Ok(Packet { state: LiteParsed { meta, base_ptr, len } }) } - #[inline] + #[inline(always)] pub fn parse_outbound = M>>( pkt: T, net: NP, diff --git a/lib/oxide-vpc/src/engine/mod.rs b/lib/oxide-vpc/src/engine/mod.rs index d3ae3c4a..94ef64cf 100644 --- a/lib/oxide-vpc/src/engine/mod.rs +++ b/lib/oxide-vpc/src/engine/mod.rs @@ -124,7 +124,7 @@ impl NetworkParser for VpcParser { type InMeta = ValidGeneveOverV6; type OutMeta = ValidNoEncap; - #[inline] + #[inline(always)] fn parse_outbound<'a, T: Read + 'a>( &self, rdr: T, @@ -135,7 +135,7 @@ impl NetworkParser for VpcParser { Ok(ValidNoEncap::parse_read(rdr)?) } - #[inline] + #[inline(always)] fn parse_inbound<'a, T: Read + 'a>( &self, rdr: T, From 27ecc8d7a3be805373dc3bedce8d51a54e98a26a Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Mon, 11 Nov 2024 16:05:18 +0000 Subject: [PATCH 109/115] Review feedback. --- lib/opte/src/ddi/mblk.rs | 105 ++++++++++++++++++++------------------- 1 file changed, 55 insertions(+), 50 deletions(-) diff --git a/lib/opte/src/ddi/mblk.rs b/lib/opte/src/ddi/mblk.rs index 366a3d41..e1bd01f9 100644 --- a/lib/opte/src/ddi/mblk.rs +++ b/lib/opte/src/ddi/mblk.rs @@ -219,51 +219,6 @@ impl DerefMut for MsgBlk { } } -#[derive(Debug)] -pub struct MsgBlkNode(mblk_t); - -impl Deref for MsgBlkNode { - type Target = [u8]; - - fn deref(&self) -> &Self::Target { - unsafe { - let rptr = self.0.b_rptr; - let len = self.0.b_wptr.offset_from(rptr) as usize; - slice::from_raw_parts(rptr, len) - } - } -} - -impl DerefMut for MsgBlkNode { - fn deref_mut(&mut self) -> &mut Self::Target { - unsafe { - let rptr = self.0.b_rptr; - let len = self.0.b_wptr.offset_from(rptr) as usize; - slice::from_raw_parts_mut(rptr, len) - } - } -} - -impl MsgBlkNode { - /// Shrink the writable/readable area by shifting the `b_rptr` by - /// `len`; effectively removing bytes from the start of the packet. - /// - /// # Errors - /// - /// `SegAdjustError::StartPastEnd`: Shifting the read pointer by - /// `len` would move `b_rptr` past `b_wptr`. - pub fn drop_front_bytes(&mut self, n: usize) -> Result<(), SegAdjustError> { - unsafe { - if self.0.b_wptr.offset_from(self.0.b_rptr) < n as isize { - return Err(SegAdjustError::StartPastEnd); - } - self.0.b_rptr = self.0.b_rptr.add(n); - } - - Ok(()) - } -} - impl MsgBlk { /// Allocate a new [`MsgBlk`] containing a data buffer of `len` /// bytes. @@ -694,7 +649,8 @@ impl MsgBlk { /// Copy out all bytes within this mblk and its successors /// to a single contiguous buffer. pub fn copy_all(&self) -> Vec { - let mut out = vec![]; + let len = self.byte_len(); + let mut out = Vec::with_capacity(len); for node in self.iter() { out.extend_from_slice(node) @@ -732,6 +688,56 @@ impl MsgBlk { } } +/// An interior node of an [`MsgBlk`]'s chain, accessed via iterator. +/// +/// This supports a reduced set of operations compared to [`MsgBlk`], +/// primarily to allow (mutable) access to the inner bytes while preventing +/// iterator invalidation. +#[derive(Debug)] +pub struct MsgBlkNode(mblk_t); + +impl Deref for MsgBlkNode { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + unsafe { + let rptr = self.0.b_rptr; + let len = self.0.b_wptr.offset_from(rptr) as usize; + slice::from_raw_parts(rptr, len) + } + } +} + +impl DerefMut for MsgBlkNode { + fn deref_mut(&mut self) -> &mut Self::Target { + unsafe { + let rptr = self.0.b_rptr; + let len = self.0.b_wptr.offset_from(rptr) as usize; + slice::from_raw_parts_mut(rptr, len) + } + } +} + +impl MsgBlkNode { + /// Shrink the writable/readable area by shifting the `b_rptr` by + /// `len`; effectively removing bytes from the start of the packet. + /// + /// # Errors + /// + /// `SegAdjustError::StartPastEnd`: Shifting the read pointer by + /// `len` would move `b_rptr` past `b_wptr`. + pub fn drop_front_bytes(&mut self, n: usize) -> Result<(), SegAdjustError> { + unsafe { + if self.0.b_wptr.offset_from(self.0.b_rptr) < n as isize { + return Err(SegAdjustError::StartPastEnd); + } + self.0.b_rptr = self.0.b_rptr.add(n); + } + + Ok(()) + } +} + #[derive(Debug)] pub struct MsgBlkIter<'a> { curr: Option>, @@ -787,10 +793,9 @@ impl Pullup for MsgBlkIterMut<'_> { ) .expect("invalid mblk -- slice end before start"); - // SAFETY: MaybeUninit has identical layout to u8, - // &[T] can be cast down to &T (discarding len). - let dst: *mut u8 = - core::mem::transmute(buf.as_mut_ptr()); + // Safety: slice contains exactly bytes_in_self bytes (!= 0). + // Cast replicates `MaybeUninit::slice_as_mut_ptr` (unstable). + let dst = buf.as_mut_ptr() as *mut u8; dst.copy_from_nonoverlapping( valid_curr.b_rptr, From d7c8b21eff1d4ffbe96ad271881f0adfad7a728e Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Mon, 11 Nov 2024 17:27:27 +0000 Subject: [PATCH 110/115] Review feedback: copy `db_struioun` on pullup and chain trim --- crates/illumos-sys-hdrs/src/lib.rs | 4 +-- lib/opte/src/ddi/mblk.rs | 44 ++++++++++++++++++++++++++---- 2 files changed, 40 insertions(+), 8 deletions(-) diff --git a/crates/illumos-sys-hdrs/src/lib.rs b/crates/illumos-sys-hdrs/src/lib.rs index e85a0680..12bb8d1d 100644 --- a/crates/illumos-sys-hdrs/src/lib.rs +++ b/crates/illumos-sys-hdrs/src/lib.rs @@ -271,7 +271,7 @@ pub struct mblk_t { pub b_cont: *mut mblk_t, pub b_rptr: *mut c_uchar, pub b_wptr: *mut c_uchar, - pub b_datap: *const dblk_t, + pub b_datap: *mut dblk_t, pub b_band: c_uchar, pub b_tag: c_uchar, pub b_flag: c_ushort, @@ -290,7 +290,7 @@ impl Default for mblk_t { b_cont: ptr::null_mut(), b_rptr: ptr::null_mut(), b_wptr: ptr::null_mut(), - b_datap: ptr::null(), + b_datap: ptr::null_mut(), b_band: 0, b_tag: 0, b_flag: 0, diff --git a/lib/opte/src/ddi/mblk.rs b/lib/opte/src/ddi/mblk.rs index e1bd01f9..4d0e48a6 100644 --- a/lib/opte/src/ddi/mblk.rs +++ b/lib/opte/src/ddi/mblk.rs @@ -200,9 +200,8 @@ impl Deref for MsgBlk { fn deref(&self) -> &Self::Target { unsafe { - let self_ref = self.0.as_ref(); - let rptr = self_ref.b_rptr; - let len = self_ref.b_wptr.offset_from(rptr) as usize; + let rptr = (*self_ref).b_rptr; + let len = (*self_ref).b_wptr.offset_from(rptr) as usize; slice::from_raw_parts(rptr, len) } } @@ -211,9 +210,8 @@ impl Deref for MsgBlk { impl DerefMut for MsgBlk { fn deref_mut(&mut self) -> &mut Self::Target { unsafe { - let self_ref = self.0.as_mut(); - let rptr = self_ref.b_rptr; - let len = self_ref.b_wptr.offset_from(rptr) as usize; + let rptr = (*self_ref).b_rptr; + let len = (*self_ref).b_wptr.offset_from(rptr) as usize; slice::from_raw_parts_mut(rptr, len) } } @@ -669,6 +667,8 @@ impl MsgBlk { let mut head = self.0; let mut neighbour = unsafe { (*head.as_ptr()).b_cont }; + let offload_info = unsafe { offload_info(head) }; + while !neighbour.is_null() && unsafe { (*head.as_ptr()).b_rptr == (*head.as_ptr()).b_wptr } { @@ -684,6 +684,13 @@ impl MsgBlk { } } + // Carry over offload flags and MSS information. + // SAFETY: db_struioun contains no payload-specific offsets, + // only flags pertaining to *required* offloads and the path MTU/MSS. + unsafe { + set_offload_info(head, offload_info); + } + self.0 = head; } } @@ -777,6 +784,8 @@ impl Pullup for MsgBlkIterMut<'_> { .write_bytes_back(prepend) .expect("allocated enough bytes for prepend and self"); + let offload_info = self.curr.map(|v| unsafe { offload_info(v) }); + if bytes_in_self != 0 { // SAFETY: We need to make use of ptr::copy for a pullup // because we cannot guarantee a dblk refcnt of 1 -- thus @@ -810,6 +819,15 @@ impl Pullup for MsgBlkIterMut<'_> { } } + // Carry over offload flags and MSS information. + // SAFETY: db_struioun contains no payload-specific offsets, + // only flags pertaining to *required* offloads and the path MTU/MSS. + if let Some(info) = offload_info { + unsafe { + set_offload_info(new_seg.0, info); + } + } + new_seg } } @@ -840,6 +858,20 @@ unsafe fn count_mblk_bytes(mut head: Option>) -> usize { count } +/// Copy out the opaque representation of offload flags and sizes +/// associated with this packet. +unsafe fn offload_info(head: NonNull) -> u64 { + unsafe { (*(*head.as_ptr()).b_datap).db_struioun } +} + +/// Set the opaque representation of offload flags and sizes +/// associated with this packet. +unsafe fn set_offload_info(head: NonNull, info: u64) { + unsafe { + (*(*head.as_ptr()).b_datap).db_struioun = info; + } +} + impl<'a> Iterator for MsgBlkIter<'a> { type Item = &'a MsgBlkNode; From 33137dd0dfc3ae5cf79a728611058c08f7b8220c Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Mon, 11 Nov 2024 18:02:04 +0000 Subject: [PATCH 111/115] All ptrs. --- lib/opte/src/ddi/mblk.rs | 94 ++++++++++++++++++++++------------------ 1 file changed, 53 insertions(+), 41 deletions(-) diff --git a/lib/opte/src/ddi/mblk.rs b/lib/opte/src/ddi/mblk.rs index 4d0e48a6..047c5a56 100644 --- a/lib/opte/src/ddi/mblk.rs +++ b/lib/opte/src/ddi/mblk.rs @@ -200,8 +200,9 @@ impl Deref for MsgBlk { fn deref(&self) -> &Self::Target { unsafe { - let rptr = (*self_ref).b_rptr; - let len = (*self_ref).b_wptr.offset_from(rptr) as usize; + let self_ptr = self.0.as_ptr(); + let rptr = (*self_ptr).b_rptr; + let len = (*self_ptr).b_wptr.offset_from(rptr) as usize; slice::from_raw_parts(rptr, len) } } @@ -210,8 +211,9 @@ impl Deref for MsgBlk { impl DerefMut for MsgBlk { fn deref_mut(&mut self) -> &mut Self::Target { unsafe { - let rptr = (*self_ref).b_rptr; - let len = (*self_ref).b_wptr.offset_from(rptr) as usize; + let self_ptr = self.0.as_ptr(); + let rptr = (*self_ptr).b_rptr; + let len = (*self_ptr).b_wptr.offset_from(rptr) as usize; slice::from_raw_parts_mut(rptr, len) } } @@ -259,9 +261,9 @@ impl MsgBlk { /// read pointer in the current datablock. pub fn head_capacity(&self) -> usize { unsafe { - let inner = self.0.as_ref(); + let inner = self.0.as_ptr(); - inner.b_rptr.offset_from((*inner.b_datap).db_base) as usize + (*inner).b_rptr.offset_from((*(*inner).b_datap).db_base) as usize } } @@ -269,9 +271,9 @@ impl MsgBlk { /// write pointer in the current datablock. pub fn tail_capacity(&self) -> usize { unsafe { - let inner = self.0.as_ref(); + let inner = self.0.as_ptr(); - (*inner.b_datap).db_lim.offset_from(inner.b_wptr) as usize + (*(*inner).b_datap).db_lim.offset_from((*inner).b_wptr) as usize } } @@ -327,10 +329,10 @@ impl MsgBlk { let mut old_tail = ptr::null_mut(); while let Some(mut valid_curr) = curr.take() { - let valid_curr = unsafe { valid_curr.as_mut() }; + let valid_curr = valid_curr.as_ptr(); let seg_len = usize::try_from(unsafe { - valid_curr.b_wptr.offset_from(valid_curr.b_rptr) + (*valid_curr).b_wptr.offset_from((*valid_curr).b_rptr) }) .expect("operating on packet with end before start"); @@ -342,11 +344,16 @@ impl MsgBlk { // SAFETY: this will only reduce the read window of this slice, // so derived byteslices will remain in capacity. - valid_curr.b_wptr = unsafe { valid_curr.b_rptr.add(to_keep) }; + unsafe { + (*valid_curr).b_wptr = (*valid_curr).b_rptr.add(to_keep); - core::mem::swap(&mut valid_curr.b_cont, &mut old_tail); + core::ptr::swap( + &raw mut (*valid_curr).b_cont, + &raw mut old_tail, + ); + } } else { - curr = NonNull::new(valid_curr.b_cont); + curr = NonNull::new(unsafe {(*valid_curr).b_cont}); } } @@ -362,12 +369,14 @@ impl MsgBlk { /// The read/write pointer is set to have `head_len` bytes of /// headroom and `body_len` bytes of capacity at the back. pub fn new_with_headroom(head_len: usize, body_len: usize) -> Self { - let mut out = Self::new(head_len + body_len); + let out = Self::new(head_len + body_len); // SAFETY: alloc is contiguous and always larger than head_len. - let mut_out = unsafe { out.0.as_mut() }; - mut_out.b_rptr = unsafe { mut_out.b_rptr.add(head_len) }; - mut_out.b_wptr = mut_out.b_rptr; + let mut_out = out.0.as_ptr(); + unsafe { + (*mut_out).b_rptr = (*mut_out).b_rptr.add(head_len); + (*mut_out).b_wptr = (*mut_out).b_rptr; + } out } @@ -385,9 +394,9 @@ impl MsgBlk { n_bytes: usize, f: impl FnOnce(&mut [MaybeUninit]), ) -> Result<(), WriteError> { - let mut_out = unsafe { self.0.as_mut() }; + let mut_out = self.0.as_ptr(); let avail_bytes = - unsafe { (*mut_out.b_datap).db_lim.offset_from(mut_out.b_wptr) }; + unsafe { (*(*mut_out).b_datap).db_lim.offset_from((*mut_out).b_wptr) }; if avail_bytes < 0 || (avail_bytes as usize) < n_bytes { return Err(WriteError::NotEnoughBytes { @@ -398,14 +407,16 @@ impl MsgBlk { let in_slice = unsafe { slice::from_raw_parts_mut( - mut_out.b_wptr as *mut MaybeUninit, + (*mut_out).b_wptr as *mut MaybeUninit, n_bytes, ) }; f(in_slice); - mut_out.b_wptr = unsafe { mut_out.b_wptr.add(n_bytes) }; + unsafe { + (*mut_out).b_wptr = (*mut_out).b_wptr.add(n_bytes); + } Ok(()) } @@ -423,9 +434,9 @@ impl MsgBlk { n_bytes: usize, f: impl FnOnce(&mut [MaybeUninit]), ) -> Result<(), WriteError> { - let mut_out = unsafe { self.0.as_mut() }; + let mut_out = self.0.as_ptr(); let avail_bytes = - unsafe { mut_out.b_rptr.offset_from((*mut_out.b_datap).db_base) }; + unsafe { (*mut_out).b_rptr.offset_from((*(*mut_out).b_datap).db_base) }; if avail_bytes < 0 || (avail_bytes as usize) < n_bytes { return Err(WriteError::NotEnoughBytes { @@ -434,7 +445,7 @@ impl MsgBlk { }); } - let new_head = unsafe { mut_out.b_rptr.sub(n_bytes) }; + let new_head = unsafe { (*mut_out).b_rptr.sub(n_bytes) }; let in_slice = unsafe { slice::from_raw_parts_mut(new_head as *mut MaybeUninit, n_bytes) @@ -442,7 +453,7 @@ impl MsgBlk { f(in_slice); - mut_out.b_rptr = new_head; + (*mut_out).b_rptr = new_head; Ok(()) } @@ -452,8 +463,8 @@ impl MsgBlk { let len = self.len(); match new_len.cmp(&len) { Ordering::Less => unsafe { - let mut_inner = self.0.as_mut(); - mut_inner.b_wptr = mut_inner.b_wptr.sub(len - new_len); + let mut_inner = self.0.as_ptr(); + (*mut_inner).b_wptr = (*mut_inner).b_wptr.sub(len - new_len); Ok(()) }, Ordering::Greater => unsafe { @@ -572,9 +583,10 @@ impl MsgBlk { /// Drop all bytes and move the cursor to the very back of the dblk. pub fn pop_all(&mut self) { + let mut_out = self.0.as_ptr(); unsafe { - (*self.0.as_ptr()).b_rptr = (*(*self.0.as_ptr()).b_datap).db_lim; - (*self.0.as_ptr()).b_wptr = (*(*self.0.as_ptr()).b_datap).db_lim; + (*mut_out).b_rptr = (*(*mut_out).b_datap).db_lim; + (*mut_out).b_wptr = (*(*mut_out).b_datap).db_lim; } } @@ -635,9 +647,9 @@ impl MsgBlk { /// * Return [`WrapError::Chain`] is `mp->b_next` or `mp->b_prev` are set. pub unsafe fn wrap_mblk(ptr: *mut mblk_t) -> Result { let inner = NonNull::new(ptr).ok_or(WrapError::NullPtr)?; - let inner_ref = inner.as_ref(); + let inner_ref = inner.as_ptr(); - if inner_ref.b_next.is_null() && inner_ref.b_prev.is_null() { + if (*inner_ref).b_next.is_null() && (*inner_ref).b_prev.is_null() { Ok(Self(inner)) } else { Err(WrapError::Chain) @@ -761,14 +773,14 @@ impl MsgBlkIterMut<'_> { pub fn next_iter(&self) -> MsgBlkIter { let curr = self .curr - .and_then(|ptr| NonNull::new(unsafe { ptr.as_ref() }.b_cont)); + .and_then(|ptr| NonNull::new(unsafe { (*ptr.as_ptr()).b_cont })); MsgBlkIter { curr, marker: PhantomData } } pub fn next_iter_mut(&mut self) -> MsgBlkIterMut { let curr = self .curr - .and_then(|ptr| NonNull::new(unsafe { ptr.as_ref() }.b_cont)); + .and_then(|ptr| NonNull::new(unsafe { (*ptr.as_ptr()).b_cont })); MsgBlkIterMut { curr, marker: PhantomData } } } @@ -795,10 +807,10 @@ impl Pullup for MsgBlkIterMut<'_> { .write_back(bytes_in_self, |mut buf| { let mut curr = self.curr; while let Some(valid_curr) = curr { - let valid_curr = valid_curr.as_ref(); - let src = valid_curr.b_rptr; + let valid_curr = valid_curr.as_ptr(); + let src = (*valid_curr).b_rptr; let seg_len = usize::try_from( - valid_curr.b_wptr.offset_from(src), + (*valid_curr).b_wptr.offset_from(src), ) .expect("invalid mblk -- slice end before start"); @@ -807,11 +819,11 @@ impl Pullup for MsgBlkIterMut<'_> { let dst = buf.as_mut_ptr() as *mut u8; dst.copy_from_nonoverlapping( - valid_curr.b_rptr, + (*valid_curr).b_rptr, seg_len, ); - curr = NonNull::new(valid_curr.b_cont); + curr = NonNull::new((*valid_curr).b_cont); buf = buf.split_at_mut(seg_len).1; } }) @@ -851,9 +863,9 @@ unsafe fn count_mblk_chain(mut head: Option>) -> usize { unsafe fn count_mblk_bytes(mut head: Option>) -> usize { let mut count = 0; while let Some(valid_head) = head { - let headref = valid_head.as_ref(); - count += headref.b_wptr.offset_from(headref.b_rptr).max(0) as usize; - head = NonNull::new((*valid_head.as_ptr()).b_cont); + let headref = valid_head.as_ptr(); + count += (*headref).b_wptr.offset_from((*headref).b_rptr).max(0) as usize; + head = NonNull::new((*headref).b_cont); } count } From 569db382eaf9acc280c4e84ac8ef319ada90606b Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Mon, 11 Nov 2024 19:43:23 +0000 Subject: [PATCH 112/115] Fmt. --- lib/opte/src/ddi/mblk.rs | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/lib/opte/src/ddi/mblk.rs b/lib/opte/src/ddi/mblk.rs index 047c5a56..7fa9a857 100644 --- a/lib/opte/src/ddi/mblk.rs +++ b/lib/opte/src/ddi/mblk.rs @@ -353,7 +353,7 @@ impl MsgBlk { ); } } else { - curr = NonNull::new(unsafe {(*valid_curr).b_cont}); + curr = NonNull::new(unsafe { (*valid_curr).b_cont }); } } @@ -395,8 +395,9 @@ impl MsgBlk { f: impl FnOnce(&mut [MaybeUninit]), ) -> Result<(), WriteError> { let mut_out = self.0.as_ptr(); - let avail_bytes = - unsafe { (*(*mut_out).b_datap).db_lim.offset_from((*mut_out).b_wptr) }; + let avail_bytes = unsafe { + (*(*mut_out).b_datap).db_lim.offset_from((*mut_out).b_wptr) + }; if avail_bytes < 0 || (avail_bytes as usize) < n_bytes { return Err(WriteError::NotEnoughBytes { @@ -435,8 +436,9 @@ impl MsgBlk { f: impl FnOnce(&mut [MaybeUninit]), ) -> Result<(), WriteError> { let mut_out = self.0.as_ptr(); - let avail_bytes = - unsafe { (*mut_out).b_rptr.offset_from((*(*mut_out).b_datap).db_base) }; + let avail_bytes = unsafe { + (*mut_out).b_rptr.offset_from((*(*mut_out).b_datap).db_base) + }; if avail_bytes < 0 || (avail_bytes as usize) < n_bytes { return Err(WriteError::NotEnoughBytes { @@ -864,7 +866,8 @@ unsafe fn count_mblk_bytes(mut head: Option>) -> usize { let mut count = 0; while let Some(valid_head) = head { let headref = valid_head.as_ptr(); - count += (*headref).b_wptr.offset_from((*headref).b_rptr).max(0) as usize; + count += + (*headref).b_wptr.offset_from((*headref).b_rptr).max(0) as usize; head = NonNull::new((*headref).b_cont); } count From 6ac41b869a5379df07cd8f4024849b20c514487a Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Tue, 19 Nov 2024 17:48:43 +0000 Subject: [PATCH 113/115] Bump to merged ingot, one nit. --- Cargo.lock | 6 +++--- Cargo.toml | 2 +- lib/opte/src/ddi/mblk.rs | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5385c356..5a7e2d6e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -895,7 +895,7 @@ dependencies = [ [[package]] name = "ingot" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=a45c21cec49020316c3a04651ce80841bea224d7#a45c21cec49020316c3a04651ce80841bea224d7" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=d4667db28b0a2246dcf5a36e4ceef34f4ead8d2f#d4667db28b0a2246dcf5a36e4ceef34f4ead8d2f" dependencies = [ "bitflags 2.6.0", "ingot-macros", @@ -908,7 +908,7 @@ dependencies = [ [[package]] name = "ingot-macros" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=a45c21cec49020316c3a04651ce80841bea224d7#a45c21cec49020316c3a04651ce80841bea224d7" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=d4667db28b0a2246dcf5a36e4ceef34f4ead8d2f#d4667db28b0a2246dcf5a36e4ceef34f4ead8d2f" dependencies = [ "darling", "itertools 0.13.0", @@ -921,7 +921,7 @@ dependencies = [ [[package]] name = "ingot-types" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/ingot.git?rev=a45c21cec49020316c3a04651ce80841bea224d7#a45c21cec49020316c3a04651ce80841bea224d7" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=d4667db28b0a2246dcf5a36e4ceef34f4ead8d2f#d4667db28b0a2246dcf5a36e4ceef34f4ead8d2f" dependencies = [ "ingot-macros", "macaddr", diff --git a/Cargo.toml b/Cargo.toml index 45891729..c1abc032 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,7 +50,7 @@ ctor = "0.2" darling = "0.20" dyn-clone = "1.0" heapless = "0.8" -ingot = { git = "https://github.com/oxidecomputer/ingot.git", rev = "a45c21cec49020316c3a04651ce80841bea224d7"} +ingot = { git = "https://github.com/oxidecomputer/ingot.git", rev = "d4667db28b0a2246dcf5a36e4ceef34f4ead8d2f"} ipnetwork = { version = "0.20", default-features = false } itertools = { version = "0.13", default-features = false } libc = "0.2" diff --git a/lib/opte/src/ddi/mblk.rs b/lib/opte/src/ddi/mblk.rs index 7fa9a857..f3d41c27 100644 --- a/lib/opte/src/ddi/mblk.rs +++ b/lib/opte/src/ddi/mblk.rs @@ -328,7 +328,7 @@ impl MsgBlk { let mut curr = Some(self.0); let mut old_tail = ptr::null_mut(); - while let Some(mut valid_curr) = curr.take() { + while let Some(valid_curr) = curr.take() { let valid_curr = valid_curr.as_ptr(); let seg_len = usize::try_from(unsafe { From 540e396ab55386bcd138b1dfc6a5b1ddfeb9d499 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Tue, 19 Nov 2024 18:29:40 +0000 Subject: [PATCH 114/115] Bump nightly compiler, fix clippy. Needed to adjust our target, one of the old keys is no longer accepted: ``` warning: target json file contains unused fields: is-builtin ``` --- .github/buildomat/jobs/opte-api.sh | 4 ++-- .github/buildomat/jobs/opte-ioctl.sh | 4 ++-- .github/buildomat/jobs/opte.sh | 8 ++++---- .github/buildomat/jobs/opteadm.sh | 4 ++-- .github/buildomat/jobs/oxide-vpc.sh | 8 ++++---- .github/buildomat/jobs/p5p.sh | 2 +- .github/buildomat/jobs/xde.sh | 6 +++--- lib/opte/src/ddi/mblk.rs | 2 +- lib/opte/src/engine/mod.rs | 8 ++++++-- xde/rust-toolchain.toml | 2 +- xde/x86_64-unknown-unknown.json | 1 - 11 files changed, 26 insertions(+), 23 deletions(-) diff --git a/.github/buildomat/jobs/opte-api.sh b/.github/buildomat/jobs/opte-api.sh index a4f10c00..c835cc19 100755 --- a/.github/buildomat/jobs/opte-api.sh +++ b/.github/buildomat/jobs/opte-api.sh @@ -3,7 +3,7 @@ #: name = "opte-api" #: variety = "basic" #: target = "helios-2.0" -#: rust_toolchain = "nightly-2024-10-12" +#: rust_toolchain = "nightly-2024-11-18" #: output_rules = [] #: @@ -24,7 +24,7 @@ header "check API_VERSION" ./check-api-version.sh header "check style" -ptime -m cargo +nightly-2024-10-12 fmt -- --check +ptime -m cargo +nightly-2024-11-18 fmt -- --check header "analyze std" ptime -m cargo clippy --all-targets diff --git a/.github/buildomat/jobs/opte-ioctl.sh b/.github/buildomat/jobs/opte-ioctl.sh index edb4ac74..f67e22a2 100755 --- a/.github/buildomat/jobs/opte-ioctl.sh +++ b/.github/buildomat/jobs/opte-ioctl.sh @@ -3,7 +3,7 @@ #: name = "opte-ioctl" #: variety = "basic" #: target = "helios-2.0" -#: rust_toolchain = "nightly-2024-10-12" +#: rust_toolchain = "nightly-2024-11-18" #: output_rules = [] #: @@ -21,7 +21,7 @@ rustc --version cd lib/opte-ioctl header "check style" -ptime -m cargo +nightly-2024-10-12 fmt -- --check +ptime -m cargo +nightly-2024-11-18 fmt -- --check header "analyze" ptime -m cargo clippy --all-targets diff --git a/.github/buildomat/jobs/opte.sh b/.github/buildomat/jobs/opte.sh index 742cfdba..8b7747dd 100755 --- a/.github/buildomat/jobs/opte.sh +++ b/.github/buildomat/jobs/opte.sh @@ -3,7 +3,7 @@ #: name = "opte" #: variety = "basic" #: target = "helios-2.0" -#: rust_toolchain = "nightly-2024-10-12" +#: rust_toolchain = "nightly-2024-11-18" #: output_rules = [] #: @@ -21,7 +21,7 @@ rustc --version cd lib/opte header "check style" -ptime -m cargo +nightly-2024-10-12 fmt -- --check +ptime -m cargo +nightly-2024-11-18 fmt -- --check header "check docs" # @@ -30,13 +30,13 @@ header "check docs" # # Use nightly which is needed for the `kernel` feature. RUSTDOCFLAGS="-D warnings" ptime -m \ - cargo +nightly-2024-10-12 doc --no-default-features --features=api,std,engine,kernel + cargo +nightly-2024-11-18 doc --no-default-features --features=api,std,engine,kernel header "analyze std + api" ptime -m cargo clippy --all-targets header "analyze no_std + engine + kernel" -ptime -m cargo +nightly-2024-10-12 clippy --no-default-features --features engine,kernel +ptime -m cargo +nightly-2024-11-18 clippy --no-default-features --features engine,kernel header "test" ptime -m cargo test diff --git a/.github/buildomat/jobs/opteadm.sh b/.github/buildomat/jobs/opteadm.sh index 1a299642..d0b69784 100755 --- a/.github/buildomat/jobs/opteadm.sh +++ b/.github/buildomat/jobs/opteadm.sh @@ -3,7 +3,7 @@ #: name = "opteadm" #: variety = "basic" #: target = "helios-2.0" -#: rust_toolchain = "nightly-2024-10-12" +#: rust_toolchain = "nightly-2024-11-18" #: output_rules = [ #: "=/work/debug/opteadm", #: "=/work/debug/opteadm.debug.sha256", @@ -30,7 +30,7 @@ rustc --version pushd bin/opteadm header "check style" -ptime -m cargo +nightly-2024-10-12 fmt -- --check +ptime -m cargo +nightly-2024-11-18 fmt -- --check header "analyze" ptime -m cargo clippy --all-targets diff --git a/.github/buildomat/jobs/oxide-vpc.sh b/.github/buildomat/jobs/oxide-vpc.sh index edfbf0f9..436013a7 100755 --- a/.github/buildomat/jobs/oxide-vpc.sh +++ b/.github/buildomat/jobs/oxide-vpc.sh @@ -3,7 +3,7 @@ #: name = "oxide-vpc" #: variety = "basic" #: target = "helios-2.0" -#: rust_toolchain = "nightly-2024-10-12" +#: rust_toolchain = "nightly-2024-11-18" #: output_rules = [] #: @@ -21,7 +21,7 @@ rustc --version cd lib/oxide-vpc header "check style" -ptime -m cargo +nightly-2024-10-12 fmt -- --check +ptime -m cargo +nightly-2024-11-18 fmt -- --check header "check docs" # @@ -30,13 +30,13 @@ header "check docs" # # Use nightly which is needed for the `kernel` feature. RUSTDOCFLAGS="-D warnings" ptime -m \ - cargo +nightly-2024-10-12 doc --no-default-features --features=api,std,engine,kernel + cargo +nightly-2024-11-18 doc --no-default-features --features=api,std,engine,kernel header "analyze std + api + usdt" ptime -m cargo clippy --features usdt --all-targets header "analyze no_std + engine + kernel" -ptime -m cargo +nightly-2024-10-12 clippy --no-default-features --features engine,kernel +ptime -m cargo +nightly-2024-11-18 clippy --no-default-features --features engine,kernel header "test" ptime -m cargo test diff --git a/.github/buildomat/jobs/p5p.sh b/.github/buildomat/jobs/p5p.sh index 524f9495..c6eb6f61 100755 --- a/.github/buildomat/jobs/p5p.sh +++ b/.github/buildomat/jobs/p5p.sh @@ -3,7 +3,7 @@ #: name = "opte-p5p" #: variety = "basic" #: target = "helios-2.0" -#: rust_toolchain = "nightly-2024-10-12" +#: rust_toolchain = "nightly-2024-11-18" #: output_rules = [ #: "=/out/opte.p5p", #: "=/out/opte.p5p.sha256", diff --git a/.github/buildomat/jobs/xde.sh b/.github/buildomat/jobs/xde.sh index 973a3048..faaebdf2 100755 --- a/.github/buildomat/jobs/xde.sh +++ b/.github/buildomat/jobs/xde.sh @@ -3,7 +3,7 @@ #: name = "opte-xde" #: variety = "basic" #: target = "helios-2.0" -#: rust_toolchain = "nightly-2024-10-12" +#: rust_toolchain = "nightly-2024-11-18" #: output_rules = [ #: "=/work/debug/xde.dbg", #: "=/work/debug/xde.dbg.sha256", @@ -75,7 +75,7 @@ pushd xde cp xde.conf /work/xde.conf header "check style" -ptime -m cargo +nightly-2024-10-12 fmt -p xde -p xde-link -- --check +ptime -m cargo +nightly-2024-11-18 fmt -p xde -p xde-link -- --check header "analyze" ptime -m cargo clippy -- \ @@ -123,7 +123,7 @@ sha256sum $REL_TGT/xde_link.so > $REL_TGT/xde_link.so.sha256 header "build xde integration tests" pushd xde-tests -cargo +nightly-2024-10-12 fmt -- --check +cargo +nightly-2024-11-18 fmt -- --check cargo clippy --all-targets cargo build --test loopback loopback_test=$( diff --git a/lib/opte/src/ddi/mblk.rs b/lib/opte/src/ddi/mblk.rs index f3d41c27..13cacfe9 100644 --- a/lib/opte/src/ddi/mblk.rs +++ b/lib/opte/src/ddi/mblk.rs @@ -1097,7 +1097,7 @@ fn mock_freeb(mp: *mut mblk_t) { // * Modify `limit`. unsafe { let bmblk = Box::from_raw(mp); - let bdblk = Box::from_raw(bmblk.b_datap as *mut dblk_t); + let bdblk = Box::from_raw(bmblk.b_datap); let buffer = Vec::from_raw_parts( bdblk.db_base, bmblk.b_wptr.offset_from(bmblk.b_rptr) as usize, diff --git a/lib/opte/src/engine/mod.rs b/lib/opte/src/engine/mod.rs index fc53c0e0..217cee37 100644 --- a/lib/opte/src/engine/mod.rs +++ b/lib/opte/src/engine/mod.rs @@ -112,20 +112,24 @@ cfg_if! { #[macro_export] macro_rules! err_macro { ($s:tt) => { + { + let out_str = format!(concat!($s, "\0")); unsafe { - let out_str = format!(concat!($s, "\0")); // Unwrap safety: we just concat'd a NUL. let cstr = ::core::ffi::CStr::from_bytes_with_nul(out_str.as_bytes()).unwrap(); ::illumos_sys_hdrs::cmn_err(::illumos_sys_hdrs::CE_WARN, cstr.as_ptr()); } + } }; ($s:tt, $($arg:tt)*) => { + { + let out_str = format!(concat!($s, "\0"), $($arg)*); unsafe { - let out_str = format!(concat!($s, "\0"), $($arg)*); // Unwrap safety: we just concat'd a NUL. let cstr = ::core::ffi::CStr::from_bytes_with_nul(out_str.as_bytes()).unwrap(); ::illumos_sys_hdrs::cmn_err(::illumos_sys_hdrs::CE_WARN, cstr.as_ptr()); } + } }; } } diff --git a/xde/rust-toolchain.toml b/xde/rust-toolchain.toml index 5b5cdf89..6965878b 100644 --- a/xde/rust-toolchain.toml +++ b/xde/rust-toolchain.toml @@ -1,5 +1,5 @@ [toolchain] -channel = "nightly-2024-10-12" +channel = "nightly-2024-11-18" target = "x86_64-unknown-illumos" components = [ "clippy", "rustfmt", "rust-src" ] profile = "minimal" diff --git a/xde/x86_64-unknown-unknown.json b/xde/x86_64-unknown-unknown.json index d8c5ead2..c96cd9d0 100644 --- a/xde/x86_64-unknown-unknown.json +++ b/xde/x86_64-unknown-unknown.json @@ -10,7 +10,6 @@ "executables": true, "features": "-mmx,-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,+soft-float", "has-rpath": true, - "is-builtin": false, "is-like-solaris": true, "limit-rdylib-exports": false, "linker": "ld", From 98017555e072c15fa32448319ec38607314dcf3a Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Tue, 19 Nov 2024 18:45:23 +0000 Subject: [PATCH 115/115] Pick up open Renovate PRs. --- Cargo.lock | 116 ++++++++++++++++++++++++++--------------------------- 1 file changed, 58 insertions(+), 58 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5a7e2d6e..d1e17a91 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -83,9 +83,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.89" +version = "1.0.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86fdf8605db99b54d3cd748a44c6d04df638eb5dafb219b135d0149bd0db01f6" +checksum = "4c95c10ba0b00a02636238b814946408b1322d5ac4760326e6fb8ec956d85775" [[package]] name = "arbitrary" @@ -264,9 +264,9 @@ checksum = "b0fc239e0f6cb375d2402d48afb92f76f5404fd1df208a41930ec81eda078bea" [[package]] name = "clap" -version = "4.5.18" +version = "4.5.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0956a43b323ac1afaffc053ed5c4b7c1f1800bacd1683c353aabbb752515dd3" +checksum = "fb3b4b9e5a7c7514dfa52869339ee98b3156b0bfb4e8a77c4ff4babb64b1604f" dependencies = [ "clap_builder", "clap_derive", @@ -274,9 +274,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.18" +version = "4.5.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d72166dd41634086d5803a47eb71ae740e61d84709c36f3c34110173db3961b" +checksum = "b17a95aa67cc7b5ebd32aa5370189aa0d79069ef1c64ce893bd30fb24bff20ec" dependencies = [ "anstream", "anstyle", @@ -294,7 +294,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.77", + "syn 2.0.87", ] [[package]] @@ -455,7 +455,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "edb49164822f3ee45b17acd4a208cfc1251410cf0cad9a833234c9890774dd9f" dependencies = [ "quote", - "syn 2.0.77", + "syn 2.0.87", ] [[package]] @@ -479,7 +479,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.77", + "syn 2.0.87", ] [[package]] @@ -490,7 +490,7 @@ checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" dependencies = [ "darling_core", "quote", - "syn 2.0.77", + "syn 2.0.87", ] [[package]] @@ -513,7 +513,7 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn 2.0.77", + "syn 2.0.87", ] [[package]] @@ -541,7 +541,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.77", + "syn 2.0.87", ] [[package]] @@ -677,7 +677,7 @@ checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn 2.0.87", ] [[package]] @@ -742,7 +742,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn 2.0.87", ] [[package]] @@ -902,7 +902,7 @@ dependencies = [ "ingot-types", "macaddr", "serde", - "zerocopy 0.8.7", + "zerocopy 0.8.10", ] [[package]] @@ -915,7 +915,7 @@ dependencies = [ "proc-macro2", "quote", "regex", - "syn 2.0.77", + "syn 2.0.87", ] [[package]] @@ -925,7 +925,7 @@ source = "git+https://github.com/oxidecomputer/ingot.git?rev=d4667db28b0a2246dcf dependencies = [ "ingot-macros", "macaddr", - "zerocopy 0.8.7", + "zerocopy 0.8.10", ] [[package]] @@ -1010,7 +1010,7 @@ name = "kstat-macro" version = "0.1.0" dependencies = [ "quote", - "syn 2.0.77", + "syn 2.0.87", ] [[package]] @@ -1021,9 +1021,9 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "libc" -version = "0.2.158" +version = "0.2.164" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8adc4bb1803a324070e64a98ae98f38934d91957a99cfb3a43dcbc01bc56439" +checksum = "433bfe06b8c75da9b2e3fbea6e5329ff87748f0b144ef75306e674c3f6f7c13f" [[package]] name = "libdlpi-sys" @@ -1288,7 +1288,7 @@ dependencies = [ "tabwriter", "usdt", "version_check", - "zerocopy 0.8.7", + "zerocopy 0.8.10", ] [[package]] @@ -1391,7 +1391,7 @@ dependencies = [ "tabwriter", "usdt", "uuid", - "zerocopy 0.8.7", + "zerocopy 0.8.10", ] [[package]] @@ -1471,7 +1471,7 @@ dependencies = [ "pest_meta", "proc-macro2", "quote", - "syn 2.0.77", + "syn 2.0.87", ] [[package]] @@ -1611,9 +1611,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.86" +version = "1.0.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +checksum = "f139b0662de085916d1fb67d2b4169d1addddda1919e696f3252b740b629986e" dependencies = [ "unicode-ident", ] @@ -1826,7 +1826,7 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn 2.0.77", + "syn 2.0.87", ] [[package]] @@ -1852,7 +1852,7 @@ checksum = "7f81c2fde025af7e69b1d1420531c8a8811ca898919db177141a85313b1cb932" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn 2.0.87", ] [[package]] @@ -1866,22 +1866,22 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.210" +version = "1.0.215" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" +checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.210" +version = "1.0.215" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" +checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn 2.0.87", ] [[package]] @@ -1892,14 +1892,14 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn 2.0.87", ] [[package]] name = "serde_json" -version = "1.0.128" +version = "1.0.133" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" +checksum = "c7fceb2473b9166b2294ef05efcb65a3db80803f0b03ef86a5fc88a2b85ee377" dependencies = [ "itoa", "memchr", @@ -1925,7 +1925,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.77", + "syn 2.0.87", ] [[package]] @@ -2086,9 +2086,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.77" +version = "2.0.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f35bcdf61fd8e7be6caf75f429fdca8beb3ed76584befb503b1569faee373ed" +checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d" dependencies = [ "proc-macro2", "quote", @@ -2123,12 +2123,12 @@ dependencies = [ [[package]] name = "terminal_size" -version = "0.3.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21bebf2b7c9e0a515f6e0f8c51dc0f8e4696391e6f1ff30379559f8365fb0df7" +checksum = "4f599bd7ca042cfdf8f4512b277c02ba102247820f9d9d4a9f521f496751a6ef" dependencies = [ "rustix", - "windows-sys 0.48.0", + "windows-sys 0.59.0", ] [[package]] @@ -2148,7 +2148,7 @@ checksum = "08904e7672f5eb876eaaf87e0ce17857500934f4981c4a0ab2b4aa98baac7fc3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn 2.0.87", ] [[package]] @@ -2238,7 +2238,7 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn 2.0.87", ] [[package]] @@ -2305,7 +2305,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn 2.0.87", ] [[package]] @@ -2367,7 +2367,7 @@ dependencies = [ "proc-macro2", "quote", "serde_tokenstream", - "syn 2.0.77", + "syn 2.0.87", "usdt-impl", ] @@ -2385,7 +2385,7 @@ dependencies = [ "quote", "serde", "serde_json", - "syn 2.0.77", + "syn 2.0.87", "thiserror", "thread-id", "version_check", @@ -2401,7 +2401,7 @@ dependencies = [ "proc-macro2", "quote", "serde_tokenstream", - "syn 2.0.77", + "syn 2.0.87", "usdt-impl", ] @@ -2413,9 +2413,9 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.10.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314" +checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a" dependencies = [ "serde", ] @@ -2464,7 +2464,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.77", + "syn 2.0.87", "wasm-bindgen-shared", ] @@ -2486,7 +2486,7 @@ checksum = "afc340c74d9005395cf9dd098506f7f44e38f2b4a21c6aaacf9a105ea5e1e836" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn 2.0.87", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -2760,11 +2760,11 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.7" +version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb3da5f7220f919a6c7af7c856435a68ee1582fd7a77aa72936257d8335bd6f6" +checksum = "a13a42ed30c63171d820889b2981318736915150575b8d2d6dbee7edd68336ca" dependencies = [ - "zerocopy-derive 0.8.7", + "zerocopy-derive 0.8.10", ] [[package]] @@ -2775,18 +2775,18 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn 2.0.87", ] [[package]] name = "zerocopy-derive" -version = "0.8.7" +version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e5f54f3cc93cd80745404626681b4b9fca9a867bad5a8424b618eb0db1ae6ea" +checksum = "593e7c96176495043fcb9e87cf7659f4d18679b5bab6b92bdef359c76a7795dd" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn 2.0.87", ] [[package]]