diff --git a/.github/buildomat/jobs/opte-api.sh b/.github/buildomat/jobs/opte-api.sh index eb4d0a7b..c835cc19 100755 --- a/.github/buildomat/jobs/opte-api.sh +++ b/.github/buildomat/jobs/opte-api.sh @@ -3,7 +3,7 @@ #: name = "opte-api" #: variety = "basic" #: target = "helios-2.0" -#: rust_toolchain = "nightly-2024-05-12" +#: rust_toolchain = "nightly-2024-11-18" #: output_rules = [] #: @@ -24,7 +24,7 @@ header "check API_VERSION" ./check-api-version.sh header "check style" -ptime -m cargo +nightly-2024-05-12 fmt -- --check +ptime -m cargo +nightly-2024-11-18 fmt -- --check header "analyze std" ptime -m cargo clippy --all-targets diff --git a/.github/buildomat/jobs/opte-ioctl.sh b/.github/buildomat/jobs/opte-ioctl.sh index fdc61df0..f67e22a2 100755 --- a/.github/buildomat/jobs/opte-ioctl.sh +++ b/.github/buildomat/jobs/opte-ioctl.sh @@ -3,7 +3,7 @@ #: name = "opte-ioctl" #: variety = "basic" #: target = "helios-2.0" -#: rust_toolchain = "nightly-2024-05-12" +#: rust_toolchain = "nightly-2024-11-18" #: output_rules = [] #: @@ -21,7 +21,7 @@ rustc --version cd lib/opte-ioctl header "check style" -ptime -m cargo +nightly-2024-05-12 fmt -- --check +ptime -m cargo +nightly-2024-11-18 fmt -- --check header "analyze" ptime -m cargo clippy --all-targets diff --git a/.github/buildomat/jobs/opte.sh b/.github/buildomat/jobs/opte.sh index a04d14a5..8b7747dd 100755 --- a/.github/buildomat/jobs/opte.sh +++ b/.github/buildomat/jobs/opte.sh @@ -3,7 +3,7 @@ #: name = "opte" #: variety = "basic" #: target = "helios-2.0" -#: rust_toolchain = "nightly-2024-05-12" +#: rust_toolchain = "nightly-2024-11-18" #: output_rules = [] #: @@ -21,7 +21,7 @@ rustc --version cd lib/opte header "check style" -ptime -m cargo +nightly-2024-05-12 fmt -- --check +ptime -m cargo +nightly-2024-11-18 fmt -- --check header "check docs" # @@ -30,13 +30,13 @@ header "check docs" # # Use nightly which is needed for the `kernel` feature. RUSTDOCFLAGS="-D warnings" ptime -m \ - cargo +nightly-2024-05-12 doc --no-default-features --features=api,std,engine,kernel + cargo +nightly-2024-11-18 doc --no-default-features --features=api,std,engine,kernel header "analyze std + api" ptime -m cargo clippy --all-targets header "analyze no_std + engine + kernel" -ptime -m cargo +nightly-2024-05-12 clippy --no-default-features --features engine,kernel +ptime -m cargo +nightly-2024-11-18 clippy --no-default-features --features engine,kernel header "test" ptime -m cargo test diff --git a/.github/buildomat/jobs/opteadm.sh b/.github/buildomat/jobs/opteadm.sh index 3e0cd56f..d0b69784 100755 --- a/.github/buildomat/jobs/opteadm.sh +++ b/.github/buildomat/jobs/opteadm.sh @@ -3,7 +3,7 @@ #: name = "opteadm" #: variety = "basic" #: target = "helios-2.0" -#: rust_toolchain = "nightly-2024-05-12" +#: rust_toolchain = "nightly-2024-11-18" #: output_rules = [ #: "=/work/debug/opteadm", #: "=/work/debug/opteadm.debug.sha256", @@ -30,7 +30,7 @@ rustc --version pushd bin/opteadm header "check style" -ptime -m cargo +nightly-2024-05-12 fmt -- --check +ptime -m cargo +nightly-2024-11-18 fmt -- --check header "analyze" ptime -m cargo clippy --all-targets diff --git a/.github/buildomat/jobs/oxide-vpc.sh b/.github/buildomat/jobs/oxide-vpc.sh index 65e97ab9..436013a7 100755 --- a/.github/buildomat/jobs/oxide-vpc.sh +++ b/.github/buildomat/jobs/oxide-vpc.sh @@ -3,7 +3,7 @@ #: name = "oxide-vpc" #: variety = "basic" #: target = "helios-2.0" -#: rust_toolchain = "nightly-2024-05-12" +#: rust_toolchain = "nightly-2024-11-18" #: output_rules = [] #: @@ -21,7 +21,7 @@ rustc --version cd lib/oxide-vpc header "check style" -ptime -m cargo +nightly-2024-05-12 fmt -- --check +ptime -m cargo +nightly-2024-11-18 fmt -- --check header "check docs" # @@ -30,13 +30,13 @@ header "check docs" # # Use nightly which is needed for the `kernel` feature. RUSTDOCFLAGS="-D warnings" ptime -m \ - cargo +nightly-2024-05-12 doc --no-default-features --features=api,std,engine,kernel + cargo +nightly-2024-11-18 doc --no-default-features --features=api,std,engine,kernel header "analyze std + api + usdt" ptime -m cargo clippy --features usdt --all-targets header "analyze no_std + engine + kernel" -ptime -m cargo +nightly-2024-05-12 clippy --no-default-features --features engine,kernel +ptime -m cargo +nightly-2024-11-18 clippy --no-default-features --features engine,kernel header "test" ptime -m cargo test diff --git a/.github/buildomat/jobs/p5p.sh b/.github/buildomat/jobs/p5p.sh index 1d51caff..c6eb6f61 100755 --- a/.github/buildomat/jobs/p5p.sh +++ b/.github/buildomat/jobs/p5p.sh @@ -3,7 +3,7 @@ #: name = "opte-p5p" #: variety = "basic" #: target = "helios-2.0" -#: rust_toolchain = "nightly-2024-05-12" +#: rust_toolchain = "nightly-2024-11-18" #: output_rules = [ #: "=/out/opte.p5p", #: "=/out/opte.p5p.sha256", diff --git a/.github/buildomat/jobs/xde.sh b/.github/buildomat/jobs/xde.sh index 750d7535..faaebdf2 100755 --- a/.github/buildomat/jobs/xde.sh +++ b/.github/buildomat/jobs/xde.sh @@ -3,7 +3,7 @@ #: name = "opte-xde" #: variety = "basic" #: target = "helios-2.0" -#: rust_toolchain = "nightly-2024-05-12" +#: rust_toolchain = "nightly-2024-11-18" #: output_rules = [ #: "=/work/debug/xde.dbg", #: "=/work/debug/xde.dbg.sha256", @@ -75,7 +75,7 @@ pushd xde cp xde.conf /work/xde.conf header "check style" -ptime -m cargo +nightly-2024-05-12 fmt -p xde -p xde-link -- --check +ptime -m cargo +nightly-2024-11-18 fmt -p xde -p xde-link -- --check header "analyze" ptime -m cargo clippy -- \ @@ -123,7 +123,7 @@ sha256sum $REL_TGT/xde_link.so > $REL_TGT/xde_link.so.sha256 header "build xde integration tests" pushd xde-tests -cargo +nightly-2024-05-12 fmt -- --check +cargo +nightly-2024-11-18 fmt -- --check cargo clippy --all-targets cargo build --test loopback loopback_test=$( diff --git a/Cargo.lock b/Cargo.lock index 273d40ec..d1e17a91 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -83,9 +83,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.89" +version = "1.0.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86fdf8605db99b54d3cd748a44c6d04df638eb5dafb219b135d0149bd0db01f6" +checksum = "4c95c10ba0b00a02636238b814946408b1322d5ac4760326e6fb8ec956d85775" [[package]] name = "arbitrary" @@ -264,9 +264,9 @@ checksum = "b0fc239e0f6cb375d2402d48afb92f76f5404fd1df208a41930ec81eda078bea" [[package]] name = "clap" -version = "4.5.18" +version = "4.5.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0956a43b323ac1afaffc053ed5c4b7c1f1800bacd1683c353aabbb752515dd3" +checksum = "fb3b4b9e5a7c7514dfa52869339ee98b3156b0bfb4e8a77c4ff4babb64b1604f" dependencies = [ "clap_builder", "clap_derive", @@ -274,9 +274,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.18" +version = "4.5.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d72166dd41634086d5803a47eb71ae740e61d84709c36f3c34110173db3961b" +checksum = "b17a95aa67cc7b5ebd32aa5370189aa0d79069ef1c64ce893bd30fb24bff20ec" dependencies = [ "anstream", "anstyle", @@ -294,7 +294,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.77", + "syn 2.0.87", ] [[package]] @@ -455,7 +455,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "edb49164822f3ee45b17acd4a208cfc1251410cf0cad9a833234c9890774dd9f" dependencies = [ "quote", - "syn 2.0.77", + "syn 2.0.87", ] [[package]] @@ -479,7 +479,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.77", + "syn 2.0.87", ] [[package]] @@ -490,7 +490,7 @@ checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" dependencies = [ "darling_core", "quote", - "syn 2.0.77", + "syn 2.0.87", ] [[package]] @@ -513,7 +513,7 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn 2.0.77", + "syn 2.0.87", ] [[package]] @@ -541,7 +541,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.77", + "syn 2.0.87", ] [[package]] @@ -599,7 +599,7 @@ dependencies = [ "serde", "serde_json", "thiserror", - "zerocopy", + "zerocopy 0.7.35", ] [[package]] @@ -677,7 +677,7 @@ checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn 2.0.87", ] [[package]] @@ -742,7 +742,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn 2.0.87", ] [[package]] @@ -892,6 +892,42 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "ingot" +version = "0.1.0" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=d4667db28b0a2246dcf5a36e4ceef34f4ead8d2f#d4667db28b0a2246dcf5a36e4ceef34f4ead8d2f" +dependencies = [ + "bitflags 2.6.0", + "ingot-macros", + "ingot-types", + "macaddr", + "serde", + "zerocopy 0.8.10", +] + +[[package]] +name = "ingot-macros" +version = "0.1.0" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=d4667db28b0a2246dcf5a36e4ceef34f4ead8d2f#d4667db28b0a2246dcf5a36e4ceef34f4ead8d2f" +dependencies = [ + "darling", + "itertools 0.13.0", + "proc-macro2", + "quote", + "regex", + "syn 2.0.87", +] + +[[package]] +name = "ingot-types" +version = "0.1.0" +source = "git+https://github.com/oxidecomputer/ingot.git?rev=d4667db28b0a2246dcf5a36e4ceef34f4ead8d2f#d4667db28b0a2246dcf5a36e4ceef34f4ead8d2f" +dependencies = [ + "ingot-macros", + "macaddr", + "zerocopy 0.8.10", +] + [[package]] name = "ipnetwork" version = "0.20.0" @@ -974,7 +1010,7 @@ name = "kstat-macro" version = "0.1.0" dependencies = [ "quote", - "syn 2.0.77", + "syn 2.0.87", ] [[package]] @@ -985,9 +1021,9 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "libc" -version = "0.2.158" +version = "0.2.164" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8adc4bb1803a324070e64a98ae98f38934d91957a99cfb3a43dcbc01bc56439" +checksum = "433bfe06b8c75da9b2e3fbea6e5329ff87748f0b144ef75306e674c3f6f7c13f" [[package]] name = "libdlpi-sys" @@ -1081,6 +1117,12 @@ version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" +[[package]] +name = "macaddr" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baee0bbc17ce759db233beb01648088061bf678383130602a298e6998eedb2d8" + [[package]] name = "managed" version = "0.8.0" @@ -1228,12 +1270,14 @@ checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9" name = "opte" version = "0.1.0" dependencies = [ + "bitflags 2.6.0", "cfg-if", "crc32fast", "derror-macro", "dyn-clone", "heapless", "illumos-sys-hdrs", + "ingot", "itertools 0.13.0", "kstat-macro", "opte", @@ -1244,7 +1288,7 @@ dependencies = [ "tabwriter", "usdt", "version_check", - "zerocopy", + "zerocopy 0.8.10", ] [[package]] @@ -1252,6 +1296,7 @@ name = "opte-api" version = "0.1.0" dependencies = [ "illumos-sys-hdrs", + "ingot", "ipnetwork", "postcard", "serde", @@ -1346,7 +1391,7 @@ dependencies = [ "tabwriter", "usdt", "uuid", - "zerocopy", + "zerocopy 0.8.10", ] [[package]] @@ -1426,7 +1471,7 @@ dependencies = [ "pest_meta", "proc-macro2", "quote", - "syn 2.0.77", + "syn 2.0.87", ] [[package]] @@ -1515,7 +1560,7 @@ version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" dependencies = [ - "zerocopy", + "zerocopy 0.7.35", ] [[package]] @@ -1566,9 +1611,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.86" +version = "1.0.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +checksum = "f139b0662de085916d1fb67d2b4169d1addddda1919e696f3252b740b629986e" dependencies = [ "unicode-ident", ] @@ -1781,7 +1826,7 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn 2.0.77", + "syn 2.0.87", ] [[package]] @@ -1807,7 +1852,7 @@ checksum = "7f81c2fde025af7e69b1d1420531c8a8811ca898919db177141a85313b1cb932" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn 2.0.87", ] [[package]] @@ -1821,22 +1866,22 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.210" +version = "1.0.215" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" +checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.210" +version = "1.0.215" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" +checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn 2.0.87", ] [[package]] @@ -1847,14 +1892,14 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn 2.0.87", ] [[package]] name = "serde_json" -version = "1.0.128" +version = "1.0.133" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" +checksum = "c7fceb2473b9166b2294ef05efcb65a3db80803f0b03ef86a5fc88a2b85ee377" dependencies = [ "itoa", "memchr", @@ -1880,7 +1925,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.77", + "syn 2.0.87", ] [[package]] @@ -2041,9 +2086,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.77" +version = "2.0.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f35bcdf61fd8e7be6caf75f429fdca8beb3ed76584befb503b1569faee373ed" +checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d" dependencies = [ "proc-macro2", "quote", @@ -2078,12 +2123,12 @@ dependencies = [ [[package]] name = "terminal_size" -version = "0.3.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21bebf2b7c9e0a515f6e0f8c51dc0f8e4696391e6f1ff30379559f8365fb0df7" +checksum = "4f599bd7ca042cfdf8f4512b277c02ba102247820f9d9d4a9f521f496751a6ef" dependencies = [ "rustix", - "windows-sys 0.48.0", + "windows-sys 0.59.0", ] [[package]] @@ -2103,7 +2148,7 @@ checksum = "08904e7672f5eb876eaaf87e0ce17857500934f4981c4a0ab2b4aa98baac7fc3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn 2.0.87", ] [[package]] @@ -2193,7 +2238,7 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn 2.0.87", ] [[package]] @@ -2260,7 +2305,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn 2.0.87", ] [[package]] @@ -2322,7 +2367,7 @@ dependencies = [ "proc-macro2", "quote", "serde_tokenstream", - "syn 2.0.77", + "syn 2.0.87", "usdt-impl", ] @@ -2340,7 +2385,7 @@ dependencies = [ "quote", "serde", "serde_json", - "syn 2.0.77", + "syn 2.0.87", "thiserror", "thread-id", "version_check", @@ -2356,7 +2401,7 @@ dependencies = [ "proc-macro2", "quote", "serde_tokenstream", - "syn 2.0.77", + "syn 2.0.87", "usdt-impl", ] @@ -2368,9 +2413,9 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.10.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314" +checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a" dependencies = [ "serde", ] @@ -2419,7 +2464,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.77", + "syn 2.0.87", "wasm-bindgen-shared", ] @@ -2441,7 +2486,7 @@ checksum = "afc340c74d9005395cf9dd098506f7f44e38f2b4a21c6aaacf9a105ea5e1e836" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn 2.0.87", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -2666,6 +2711,7 @@ dependencies = [ "bitflags 2.6.0", "crc32fast", "illumos-sys-hdrs", + "ingot", "opte", "oxide-vpc", "postcard", @@ -2709,7 +2755,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" dependencies = [ "byteorder", - "zerocopy-derive", + "zerocopy-derive 0.7.35", +] + +[[package]] +name = "zerocopy" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a13a42ed30c63171d820889b2981318736915150575b8d2d6dbee7edd68336ca" +dependencies = [ + "zerocopy-derive 0.8.10", ] [[package]] @@ -2720,7 +2775,18 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn 2.0.87", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "593e7c96176495043fcb9e87cf7659f4d18679b5bab6b92bdef359c76a7795dd" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 992a077c..c1abc032 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,6 +50,7 @@ ctor = "0.2" darling = "0.20" dyn-clone = "1.0" heapless = "0.8" +ingot = { git = "https://github.com/oxidecomputer/ingot.git", rev = "d4667db28b0a2246dcf5a36e4ceef34f4ead8d2f"} ipnetwork = { version = "0.20", default-features = false } itertools = { version = "0.13", default-features = false } libc = "0.2" @@ -75,10 +76,11 @@ toml = "0.8" uuid = { version = "1.0", default-features = false, features = ["serde"]} usdt = "0.5" version_check = "0.9" -zerocopy = { version = "0.7", features = ["derive"] } +zerocopy = { version = "0.8", features = ["derive"] } zone = { git = "https://github.com/oxidecomputer/zone" } ztest = { git = "https://github.com/oxidecomputer/falcon", branch = "main" } poptrie = { git = "https://github.com/oxidecomputer/poptrie", branch = "multipath" } [profile.release] debug = 2 +lto = true diff --git a/README.adoc b/README.adoc index 32b33698..47865d64 100644 --- a/README.adoc +++ b/README.adoc @@ -30,6 +30,9 @@ While the XDE kernel module runs only on Helios, our test suite and microbenchma | `cargo kbench` | N/A + +| N/A +| `cargo +nightly fuzz run parse-in`, `cargo +nightly fuzz run parse-out` |=== More detail on our benchmarks can be found in xref:bench/README.adoc[bench/README]. diff --git a/bench/benches/userland.rs b/bench/benches/userland.rs index 4014291f..59de9a42 100644 --- a/bench/benches/userland.rs +++ b/bench/benches/userland.rs @@ -10,6 +10,7 @@ use criterion::criterion_group; use criterion::criterion_main; use criterion::BenchmarkId; use criterion::Criterion; +use opte::engine::packet::Packet; use opte_bench::alloc::*; use opte_bench::packet::BenchPacket; use opte_bench::packet::BenchPacketInstance; @@ -80,14 +81,43 @@ pub fn test_parse( || inp.generate(), // match *outside* the closure to prevent its selection from being timed. match parser { - ParserKind::Generic => |(in_pkt, direction): TestCase| { - in_pkt.parse(direction, GenericUlp {}) - }, - ParserKind::OxideVpc => |(in_pkt, direction): TestCase| { - in_pkt.parse(direction, VpcParser {}) - }, + ParserKind::Generic => { + |(mut in_pkt, direction): TestCase| { + black_box(match direction { + In => Packet::parse_inbound( + in_pkt.iter_mut(), + GenericUlp {}, + ), + Out => Packet::parse_outbound( + in_pkt.iter_mut(), + GenericUlp {}, + ), + }) + .unwrap(); + } + } + ParserKind::OxideVpc => { + |(mut in_pkt, direction): TestCase| { + black_box(match direction { + In => { + Packet::parse_inbound( + in_pkt.iter_mut(), + VpcParser {}, + ) + .unwrap(); + } + Out => { + Packet::parse_outbound( + in_pkt.iter_mut(), + VpcParser {}, + ) + .unwrap(); + } + }); + } + } }, - criterion::BatchSize::PerIteration, + criterion::BatchSize::LargeInput, ) }, ); @@ -117,6 +147,7 @@ pub fn test_handle( M::label() )); + let parser = case.parse_with(); c.bench_with_input( BenchmarkId::from_parameter(case.instance_name()), &case, @@ -124,32 +155,64 @@ pub fn test_handle( b.iter_batched( || { let (init_pkt, dir) = case.generate(); - let parsed_pkt = match case.parse_with() { - ParserKind::Generic => { - init_pkt.parse(dir, GenericUlp {}).unwrap() - } - ParserKind::OxideVpc => { - init_pkt.parse(dir, VpcParser {}).unwrap() - } - }; - case.pre_handle(&port); - (parsed_pkt, dir) + (init_pkt, dir) }, - |(mut pkt, dir)| { - assert!(!matches!( - port.port - .process( - dir, - black_box(&mut pkt), - ActionMeta::new(), - ) - .unwrap(), - ProcessResult::Drop { .. } - )) + // Can't seem to match outside here -- must be missing something. + // Sadly, we can't elide parsing here as the + // packet is now a view over the generated pkt. + |(mut pkt_m, dir): TestCase| match parser { + ParserKind::Generic => { + let res = match dir { + In => { + let pkt = Packet::parse_inbound( + pkt_m.iter_mut(), + GenericUlp {}, + ) + .unwrap(); + port.port.process(dir, black_box(pkt)).unwrap() + } + Out => { + let pkt = Packet::parse_outbound( + pkt_m.iter_mut(), + GenericUlp {}, + ) + .unwrap(); + port.port.process(dir, black_box(pkt)).unwrap() + } + }; + assert!(!matches!(res, ProcessResult::Drop { .. })); + if let Modified(spec) = res { + black_box(spec.apply(pkt_m)); + } + } + ParserKind::OxideVpc => { + let res = match dir { + In => { + let pkt = Packet::parse_inbound( + pkt_m.iter_mut(), + VpcParser {}, + ) + .unwrap(); + port.port.process(dir, black_box(pkt)).unwrap() + } + Out => { + let pkt = Packet::parse_outbound( + pkt_m.iter_mut(), + VpcParser {}, + ) + .unwrap(); + port.port.process(dir, black_box(pkt)).unwrap() + } + }; + assert!(!matches!(res, ProcessResult::Drop { .. })); + if let Modified(spec) = res { + black_box(spec.apply(pkt_m)); + } + } }, - criterion::BatchSize::PerIteration, + criterion::BatchSize::LargeInput, ) }, ); diff --git a/bench/src/kbench/remote.rs b/bench/src/kbench/remote.rs index 022f776d..2c811407 100644 --- a/bench/src/kbench/remote.rs +++ b/bench/src/kbench/remote.rs @@ -8,6 +8,7 @@ //! over physical links. use super::*; +#[cfg(target_os = "illumos")] use std::collections::HashSet; use std::io::Read; use std::io::Write; @@ -26,6 +27,7 @@ pub struct Routes { pub underlay: Ipv6Addr, } +#[cfg_attr(not(target_os = "illumos"), allow(unused))] pub fn server_session( mut stream: TcpStream, route: Arc, diff --git a/bench/src/packet.rs b/bench/src/packet.rs index 97cef04e..c0f7f76b 100644 --- a/bench/src/packet.rs +++ b/bench/src/packet.rs @@ -4,23 +4,31 @@ // Copyright 2024 Oxide Computer Company +use opte::ddi::mblk::MsgBlk; use opte::engine::dhcpv6::MessageType; -use opte::engine::packet::Initialized; -use opte::engine::packet::Packet; +use opte::engine::ether::Ethernet; +use opte::engine::ip::v4::Ipv4; +use opte::engine::ip::v6::Ipv6; +use opte::engine::ip::L3Repr; +use opte::engine::parse::UlpRepr; use opte::engine::Direction; +use opte::ingot::tcp::Tcp; +use opte::ingot::tcp::TcpFlags; +use opte::ingot::types::HeaderLen; +use opte::ingot::udp::Udp; use opte_test_utils::dhcp::dhcpv6_with_reasonable_defaults; -use opte_test_utils::dhcp::packet_from_client_dhcpv4_message_unparsed; -use opte_test_utils::dhcp::packet_from_client_dhcpv6_message_unparsed; +use opte_test_utils::dhcp::packet_from_client_dhcpv4_message; +use opte_test_utils::dhcp::packet_from_client_dhcpv6_message; use opte_test_utils::dhcp::DhcpRepr; -use opte_test_utils::icmp::gen_icmp_echo_unparsed; -use opte_test_utils::icmp::gen_icmpv6_echo_unparsed; -use opte_test_utils::icmp::generate_ndisc_unparsed; +use opte_test_utils::icmp::gen_icmp_echo; +use opte_test_utils::icmp::gen_icmpv6_echo; +use opte_test_utils::icmp::generate_ndisc; use opte_test_utils::icmp::NdiscRepr; use opte_test_utils::icmp::RawHardwareAddress; use opte_test_utils::overlay::BOUNDARY_SERVICES_VNI; use opte_test_utils::*; -pub type TestCase = (Packet, Direction); +pub type TestCase = (MsgBlk, Direction); pub enum ParserKind { Generic, @@ -42,7 +50,7 @@ pub trait BenchPacketInstance { fn instance_name(&self) -> String; /// Generate a single test packet. - fn generate(&self) -> (Packet, Direction); + fn generate(&self) -> (MsgBlk, Direction); /// Create a custom port for this benchmark instance. fn create_port(&self) -> Option { @@ -150,12 +158,12 @@ impl BenchPacketInstance for UlpProcessInstance { // flowkey. This will also set up our UFT entry. let self_but_out = Self { direction: Direction::Out, ..self.clone() }; - let (pkt, dir) = self_but_out.generate(); - let mut pkt = pkt.parse(dir, VpcParser {}).unwrap(); + let (mut pkt_m, dir) = self_but_out.generate(); + let pkt = parse_outbound(&mut pkt_m, VpcParser {}).unwrap(); if self.fast_path { if let ProcessResult::Drop { reason } = - port.port.process(dir, &mut pkt, ActionMeta::new()).unwrap() + port.port.process(dir, pkt).unwrap() { panic!("failed to pass in pkt: {reason:?}"); }; @@ -165,6 +173,9 @@ impl BenchPacketInstance for UlpProcessInstance { port.port.clear_lft(layer).unwrap(); } } + + // Note: don't need to finish processing the packet + // -- the op we care about is just establishing the UFT state. } fn instance_name(&self) -> String { @@ -174,8 +185,8 @@ impl BenchPacketInstance for UlpProcessInstance { ) } - fn generate(&self) -> (Packet, Direction) { - let (my_ip, my_guest_ip, partner_ip, ether_type): ( + fn generate(&self) -> (MsgBlk, Direction) { + let (my_ip, my_guest_ip, partner_ip, ethertype): ( IpAddr, IpAddr, IpAddr, @@ -185,13 +196,13 @@ impl BenchPacketInstance for UlpProcessInstance { self.cfg.ipv4().external_ips.ephemeral_ip.unwrap().into(), self.cfg.ipv4().private_ip.into(), "93.184.216.34".parse().unwrap(), - EtherType::Ipv4, + Ethertype::IPV4, ), IpVariant::V6 => ( self.cfg.ipv6().external_ips.ephemeral_ip.unwrap().into(), self.cfg.ipv6().private_ip.into(), "2606:2800:220:1:248:1893:25c8:1946".parse().unwrap(), - EtherType::Ipv6, + Ethertype::IPV6, ), }; let (src_mac, dst_mac) = match self.direction { @@ -202,57 +213,55 @@ impl BenchPacketInstance for UlpProcessInstance { Direction::Out => (my_guest_ip, partner_ip, 10010, 80), Direction::In => (partner_ip, my_ip, 80, 10010), }; - let eth = EtherMeta { dst: dst_mac, src: src_mac, ether_type }; + let eth = Ethernet { destination: dst_mac, source: src_mac, ethertype }; let body = vec![0u8; self.body_len]; - let (ulp, next_hdr): (UlpMeta, _) = match self.proto { + let (ulp, next_header) = match self.proto { ProtoVariant::Tcp => ( - TcpMeta { - src: src_port, - dst: dst_port, + UlpRepr::Tcp(Tcp { + source: src_port, + destination: dst_port, flags: TcpFlags::ACK, - seq: 1234, - ack: 3456, + sequence: 1234, + acknowledgement: 3456, window_size: 1, - csum: [0; 2], - options_bytes: None, - options_len: 0, - } - .into(), - IpProtocol::Tcp, + ..Default::default() + }), + IngotIpProto::TCP, ), ProtoVariant::Udp => ( - UdpMeta { - src: src_port, - dst: dst_port, - len: (UdpHdr::SIZE + body.len()) as u16, - csum: [0; 2], - } - .into(), - IpProtocol::Udp, + UlpRepr::Udp(Udp { + source: src_port, + destination: dst_port, + length: (Udp::MINIMUM_LENGTH + body.len()) as u16, + ..Default::default() + }), + IngotIpProto::UDP, ), }; - let proto = Protocol::from(next_hdr); - let ip: IpMeta = match (src_ip, dst_ip) { - (IpAddr::Ip4(src), IpAddr::Ip4(dst)) => Ipv4Meta { - src, - dst, - proto, - total_len: (Ipv4Hdr::BASE_SIZE + ulp.hdr_len() + body.len()) - as u16, - ..Ipv4Meta::default() + let protocol = next_header; + let ip = match (src_ip, dst_ip) { + (IpAddr::Ip4(source), IpAddr::Ip4(destination)) => { + L3Repr::Ipv4(Ipv4 { + source, + destination, + protocol, + total_len: (Ipv4::MINIMUM_LENGTH + + (&ulp, &body).packet_length()) + as u16, + ..Default::default() + }) } - .into(), - (IpAddr::Ip6(src), IpAddr::Ip6(dst)) => Ipv6Meta { - src, - dst, - next_hdr, - proto, - pay_len: (ulp.hdr_len() + body.len()) as u16, - ..Ipv6Meta::default() + (IpAddr::Ip6(source), IpAddr::Ip6(destination)) => { + L3Repr::Ipv6(Ipv6 { + source, + destination, + next_header, + payload_len: (&ulp, &body).packet_length() as u16, + ..Default::default() + }) } - .into(), _ => unreachable!(), }; @@ -276,14 +285,7 @@ impl BenchPacketInstance for UlpProcessInstance { } }; - let buf = out_pkt.all_bytes(); - - let len = buf.len(); - let mut pkt = Packet::alloc_and_expand(len); - let mut wtr = pkt.seg0_wtr(); - wtr.slice_mut(len).unwrap().copy_from_slice(&buf[..]); - - (pkt, self.direction) + (out_pkt, self.direction) } fn create_port(&self) -> Option { @@ -359,7 +361,7 @@ impl BenchPacketInstance for Dhcp4Instance { format!("{self:?}") } - fn generate(&self) -> (Packet, Direction) { + fn generate(&self) -> (MsgBlk, Direction) { let cfg = g1_cfg(); let message_type = match self { Dhcp4Instance::Discover => dhcp::DhcpMessageType::Discover, @@ -396,10 +398,7 @@ impl BenchPacketInstance for Dhcp4Instance { additional_options: &[], }; - ( - packet_from_client_dhcpv4_message_unparsed(&cfg, &repr), - Direction::Out, - ) + (packet_from_client_dhcpv4_message(&cfg, &repr), Direction::Out) } } @@ -429,7 +428,7 @@ impl BenchPacketInstance for Dhcp6Instance { format!("{self:?}") } - fn generate(&self) -> (Packet, Direction) { + fn generate(&self) -> (MsgBlk, Direction) { let cfg = g1_cfg(); let class = match self { Dhcp6Instance::Solicit => MessageType::Solicit, @@ -437,10 +436,7 @@ impl BenchPacketInstance for Dhcp6Instance { }; let repr = dhcpv6_with_reasonable_defaults(class, false, &cfg); - ( - packet_from_client_dhcpv6_message_unparsed(&cfg, &repr), - Direction::Out, - ) + (packet_from_client_dhcpv6_message(&cfg, &repr), Direction::Out) } } @@ -464,13 +460,13 @@ impl BenchPacketInstance for Icmp4 { "EchoRequest".into() } - fn generate(&self) -> (Packet, Direction) { + fn generate(&self) -> (MsgBlk, Direction) { let cfg = g1_cfg(); let ident = 7; let seq_no = 777; let data = b"reunion\0"; - let pkt = gen_icmp_echo_unparsed( + let pkt = gen_icmp_echo( icmp::IcmpEchoType::Req, cfg.guest_mac, cfg.gateway_mac, @@ -517,14 +513,14 @@ impl BenchPacketInstance for Icmp6Instance { format!("{self:?}") } - fn generate(&self) -> (Packet, Direction) { + fn generate(&self) -> (MsgBlk, Direction) { let cfg = g1_cfg(); let ident = 7; let seq_no = 777; let data = b"reunion\0"; let pkt = match self { - Icmp6Instance::EchoRequest => gen_icmpv6_echo_unparsed( + Icmp6Instance::EchoRequest => gen_icmpv6_echo( icmp::IcmpEchoType::Req, cfg.guest_mac, cfg.gateway_mac, @@ -542,7 +538,7 @@ impl BenchPacketInstance for Icmp6Instance { &cfg.guest_mac, )), }; - generate_ndisc_unparsed( + generate_ndisc( solicit, cfg.guest_mac, cfg.gateway_mac, @@ -558,7 +554,7 @@ impl BenchPacketInstance for Icmp6Instance { }; let dst_ip = Ipv6Addr::ALL_ROUTERS; - generate_ndisc_unparsed( + generate_ndisc( solicit, src_mac, // Must be destined for the All-Routers IPv6 address, and the corresponding diff --git a/bin/opteadm/src/bin/opteadm.rs b/bin/opteadm/src/bin/opteadm.rs index f9d4b120..577eca9c 100644 --- a/bin/opteadm/src/bin/opteadm.rs +++ b/bin/opteadm/src/bin/opteadm.rs @@ -813,12 +813,8 @@ fn main() -> anyhow::Result<()> { .context("failed to allow on inbound direction")?; hdl.allow_cidr(&port, prefix, Direction::Out).inspect_err( |e| { - hdl.remove_cidr(&port, prefix, Direction::In).expect( - &format!( - "FATAL: failed to rollback in-direction allow \ - of {prefix} after {e}" - ), - ); + hdl.remove_cidr(&port, prefix, Direction::In).unwrap_or_else(|_| panic!("FATAL: failed to rollback in-direction allow \ + of {prefix} after {e}")); }, )?; } @@ -843,12 +839,8 @@ fn main() -> anyhow::Result<()> { remove_cidr(Direction::In) .context("failed to deny on inbound direction")?; remove_cidr(Direction::Out).inspect_err(|e| { - hdl.allow_cidr(&port, prefix, Direction::In).expect( - &format!( - "FATAL: failed to rollback in-direction remove \ - of {prefix} after {e}" - ), - ); + hdl.allow_cidr(&port, prefix, Direction::In).unwrap_or_else(|_| panic!("FATAL: failed to rollback in-direction remove \ + of {prefix} after {e}")); })?; } } diff --git a/crates/derror-macro/src/lib.rs b/crates/derror-macro/src/lib.rs index 4b6a8ebf..d2f3e21e 100644 --- a/crates/derror-macro/src/lib.rs +++ b/crates/derror-macro/src/lib.rs @@ -18,9 +18,10 @@ struct Args { } /// Generate a `DError` implementation given a tree-structured enum -/// where only leaf nodes hold additional data. This allows for deeply -/// nested enums to be more easily understood in dtrace probes without -/// calling `format!()`. +/// where only leaf nodes hold additional data. +/// +/// This allows for deeply nested enums to be more easily understood in +/// dtrace probes without calling `format!()`. /// /// This is intended for annotating error chains such as: /// ```ignore @@ -142,6 +143,7 @@ pub fn derive_derror( quote! { impl DError for #ident { #[allow(non_upper_case_globals)] + #[inline] fn discriminant(&self) -> &'static ::core::ffi::CStr { use ::core::ffi::CStr; #( #cstr_decls )* @@ -150,6 +152,7 @@ pub fn derive_derror( } } + #[inline] fn child(&self) -> Option<&dyn DError> { match self { #( #child_arms )* diff --git a/crates/illumos-sys-hdrs/src/lib.rs b/crates/illumos-sys-hdrs/src/lib.rs index 1f52a7f7..12bb8d1d 100644 --- a/crates/illumos-sys-hdrs/src/lib.rs +++ b/crates/illumos-sys-hdrs/src/lib.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2022 Oxide Computer Company +// Copyright 2024 Oxide Computer Company #![cfg_attr(feature = "kernel", feature(extern_types))] #![allow(non_camel_case_types)] #![no_std] @@ -271,7 +271,7 @@ pub struct mblk_t { pub b_cont: *mut mblk_t, pub b_rptr: *mut c_uchar, pub b_wptr: *mut c_uchar, - pub b_datap: *const dblk_t, + pub b_datap: *mut dblk_t, pub b_band: c_uchar, pub b_tag: c_uchar, pub b_flag: c_ushort, @@ -290,7 +290,7 @@ impl Default for mblk_t { b_cont: ptr::null_mut(), b_rptr: ptr::null_mut(), b_wptr: ptr::null_mut(), - b_datap: ptr::null(), + b_datap: ptr::null_mut(), b_band: 0, b_tag: 0, b_flag: 0, @@ -316,6 +316,8 @@ pub type offset_t = c_longlong; pub type pid_t = c_int; pub type zoneid_t = id_t; +/// A standard boolean in illumos. +/// /// This is a commonly used illumos kernel type. Originally I was /// basing these C types on the cty crate. But really we should just /// define the illumos types directly. These would make up the base @@ -331,13 +333,13 @@ pub enum boolean_t { B_TRUE, } -/// The source for this structure makes use of the -/// `_LONG_LONG_{LTOH,HTOL}` ISA macros. My guess is this is needed -/// for 32-bit userland applications using `long long *` for things -/// like file/memory addresses (where we have a 32-bit pointer -/// pointing to a 64-bit value). The macro determines if the pointer -/// is to the high 32 bits or the low 32 bits. Currently, illumos -/// always sets `_LONG_LONG_HTOL`. +// The source for this structure makes use of the +// `_LONG_LONG_{LTOH,HTOL}` ISA macros. My guess is this is needed +// for 32-bit userland applications using `long long *` for things +// like file/memory addresses (where we have a 32-bit pointer +// pointing to a 64-bit value). The macro determines if the pointer +// is to the high 32 bits or the low 32 bits. Currently, illumos +// always sets `_LONG_LONG_HTOL`. #[repr(C)] pub union lloff_t { pub _f: offset_t, // full 64-bits @@ -355,9 +357,9 @@ pub struct upper_lower { // uts/common/sys/uio.h // ====================================================================== -/// This definition assumes applications are compiled with XPG4v2 -/// (`_XPG4_2`) or later support. If we want Rust drivers to have -/// maximum userland support we will want to also support pre-XPG4v2. +// This definition assumes applications are compiled with XPG4v2 +// (`_XPG4_2`) or later support. If we want Rust drivers to have +// maximum userland support we will want to also support pre-XPG4v2. #[repr(C)] pub struct iovec_t { pub iov_base: *mut c_void, diff --git a/crates/kstat-macro/src/lib.rs b/crates/kstat-macro/src/lib.rs index 93628fdb..fc689c48 100644 --- a/crates/kstat-macro/src/lib.rs +++ b/crates/kstat-macro/src/lib.rs @@ -83,7 +83,7 @@ pub fn derive_kstat_provider(input: TokenStream) -> TokenStream { fn init( &mut self - ) -> core::result::Result<(), kstat::Error> { + ) -> core::result::Result<(), ::opte::ddi::kstat::Error> { #( self.#fields_ident.init(stringify!(#fields_ident))?; )* Ok(()) } diff --git a/crates/opte-api/Cargo.toml b/crates/opte-api/Cargo.toml index daed612a..7c4d2e60 100644 --- a/crates/opte-api/Cargo.toml +++ b/crates/opte-api/Cargo.toml @@ -13,6 +13,7 @@ std = ["ipnetwork"] [dependencies] illumos-sys-hdrs.workspace = true +ingot.workspace = true ipnetwork = { workspace = true, optional = true } postcard.workspace = true serde.workspace = true diff --git a/crates/opte-api/src/dns.rs b/crates/opte-api/src/dns.rs index 823e44fd..ad4e0d06 100644 --- a/crates/opte-api/src/dns.rs +++ b/crates/opte-api/src/dns.rs @@ -28,7 +28,7 @@ use serde::Serialize; /// /// - The string form of the name may not exceed 253 octets /// - Each label (except for possibly the last, root label) must be between 1 -/// and 63 octets. +/// and 63 octets. /// /// # Details /// diff --git a/crates/opte-api/src/encap.rs b/crates/opte-api/src/encap.rs index d772b76e..dbe25ef3 100644 --- a/crates/opte-api/src/encap.rs +++ b/crates/opte-api/src/encap.rs @@ -2,95 +2,9 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2024 Oxide Computer Company -use alloc::string::String; -use alloc::string::ToString; -use core::fmt; -use core::fmt::Debug; -use core::fmt::Display; -use core::str::FromStr; -use serde::Deserialize; -use serde::Serialize; - -/// A Geneve Virtual Network Identifier (VNI). -#[derive( - Clone, Copy, Deserialize, Eq, Ord, PartialEq, PartialOrd, Serialize, -)] -pub struct Vni { - // A VNI is 24-bit. By storing it this way we don't have to check - // the value on the opte-core side to know if it's a valid VNI, we - // just decode the bytes. - // - // The bytes are in network order. - inner: [u8; 3], -} - -impl Default for Vni { - fn default() -> Self { - Vni::new(0u32).unwrap() - } -} - -impl From for u32 { - fn from(vni: Vni) -> u32 { - let bytes = vni.inner; - u32::from_be_bytes([0, bytes[0], bytes[1], bytes[2]]) - } -} - -impl FromStr for Vni { - type Err = String; - - fn from_str(val: &str) -> Result { - let n = val.parse::().map_err(|e| e.to_string())?; - Self::new(n) - } -} - -impl Display for Vni { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}", u32::from(*self)) - } -} - -// There's no reason to view the VNI as its raw array, so just present -// it in a human-friendly manner. -impl Debug for Vni { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "Vni {{ inner: {} }}", self) - } -} - -const VNI_MAX: u32 = 0x00_FF_FF_FF; - -impl Vni { - pub fn as_u32(&self) -> u32 { - u32::from_be_bytes([0, self.inner[0], self.inner[1], self.inner[2]]) - } - - /// Return the bytes that represent this VNI. The bytes are in - /// network order. - pub fn bytes(&self) -> [u8; 3] { - self.inner - } - - /// Attempt to create a new VNI from any value which can be - /// converted to a `u32`. - /// - /// # Errors - /// - /// Returns an error when the value exceeds the 24-bit maximum. - pub fn new>(val: N) -> Result { - let val = val.into(); - if val > VNI_MAX { - return Err(format!("VNI value exceeds maximum: {}", val)); - } - - let be_bytes = val.to_be_bytes(); - Ok(Vni { inner: [be_bytes[1], be_bytes[2], be_bytes[3]] }) - } -} +pub use ingot::geneve::Vni; #[cfg(test)] mod test { @@ -100,7 +14,7 @@ mod test { fn good_vni() { assert!(Vni::new(0u32).is_ok()); assert!(Vni::new(11u8).is_ok()); - assert!(Vni::new(VNI_MAX).is_ok()); + assert!(Vni::new((1u32 << 24) - 1).is_ok()); } #[test] @@ -112,7 +26,7 @@ mod test { #[test] fn vni_round_trip() { let vni = Vni::new(7777u32).unwrap(); - assert_eq!([0x00, 0x1E, 0x61], vni.inner); + assert_eq!([0x00, 0x1E, 0x61], vni.bytes()); assert_eq!(7777, u32::from(vni)); } } diff --git a/crates/opte-api/src/ip.rs b/crates/opte-api/src/ip.rs index bfdcf689..c41b1017 100644 --- a/crates/opte-api/src/ip.rs +++ b/crates/opte-api/src/ip.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2024 Oxide Computer Company use super::mac::MacAddr; use crate::DomainName; @@ -15,6 +15,7 @@ use core::fmt::Display; use core::ops::Deref; use core::result; use core::str::FromStr; +use ingot::types::NetworkRepr; use serde::Deserialize; use serde::Serialize; @@ -432,15 +433,13 @@ impl Ipv4Addr { } } -#[cfg(any(feature = "std", test))] -impl From for Ipv4Addr { - fn from(ip4: std::net::Ipv4Addr) -> Self { +impl From for Ipv4Addr { + fn from(ip4: core::net::Ipv4Addr) -> Self { Self { inner: ip4.octets() } } } -#[cfg(any(feature = "std", test))] -impl From for std::net::Ipv4Addr { +impl From for core::net::Ipv4Addr { fn from(ip4: Ipv4Addr) -> Self { Self::from(ip4.inner) } @@ -713,15 +712,13 @@ impl fmt::Display for Ipv6Addr { } } -#[cfg(any(feature = "std", test))] -impl From for Ipv6Addr { - fn from(ip6: std::net::Ipv6Addr) -> Self { +impl From for Ipv6Addr { + fn from(ip6: core::net::Ipv6Addr) -> Self { Self { inner: ip6.octets() } } } -#[cfg(any(feature = "std", test))] -impl From for std::net::Ipv6Addr { +impl From for core::net::Ipv6Addr { fn from(ip6: Ipv6Addr) -> Self { Self::from(ip6.inner) } @@ -1208,6 +1205,26 @@ impl From for ipnetwork::Ipv6Network { } } +impl NetworkRepr<[u8; 4]> for Ipv4Addr { + fn to_network(self) -> [u8; 4] { + self.inner + } + + fn from_network(val: [u8; 4]) -> Self { + Self { inner: val } + } +} + +impl NetworkRepr<[u8; 16]> for Ipv6Addr { + fn to_network(self) -> [u8; 16] { + self.inner + } + + fn from_network(val: [u8; 16]) -> Self { + Self { inner: val } + } +} + #[cfg(test)] mod test { use super::*; diff --git a/crates/opte-api/src/lib.rs b/crates/opte-api/src/lib.rs index 9298dbfe..21159a0a 100644 --- a/crates/opte-api/src/lib.rs +++ b/crates/opte-api/src/lib.rs @@ -39,15 +39,17 @@ pub use mac::*; pub use ndp::*; pub use ulp::*; -/// The overall version of the API. Anytime an API is added, removed, -/// or modified, this number should increment. Currently we attach no -/// semantic meaning to the number other than as a means to verify -/// that the user and kernel are compiled for the same API. A u64 is -/// used to give future wiggle room to play bit games if neeeded. +/// The overall version of the API. +/// +/// Anytime an API is added, removed, or modified, this number should +/// increment. Currently we attach no semantic meaning to the number +/// other than as a means to verify that the user and kernel are compiled +/// for the same API. A u64 is used to give future wiggle room to play bit +/// games if needed. /// /// We rely on CI and the check-api-version.sh script to verify that /// this number is incremented anytime the oxide-api code changes. -pub const API_VERSION: u64 = 33; +pub const API_VERSION: u64 = 34; /// Major version of the OPTE package. pub const MAJOR_VERSION: u64 = 0; diff --git a/crates/opte-api/src/mac.rs b/crates/opte-api/src/mac.rs index 8a133f4b..48d70c93 100644 --- a/crates/opte-api/src/mac.rs +++ b/crates/opte-api/src/mac.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2022 Oxide Computer Company +// Copyright 2024 Oxide Computer Company use alloc::str::FromStr; use alloc::string::String; @@ -11,17 +11,37 @@ use core::fmt; use core::fmt::Debug; use core::fmt::Display; use core::ops::Deref; +use ingot::types::NetworkRepr; use serde::Deserialize; use serde::Serialize; /// A MAC address. #[derive( - Clone, Copy, Default, Deserialize, Eq, Ord, PartialEq, PartialOrd, Serialize, + Clone, + Copy, + Default, + Deserialize, + Eq, + Ord, + PartialEq, + PartialOrd, + Serialize, + Hash, )] pub struct MacAddr { inner: [u8; 6], } +impl NetworkRepr<[u8; 6]> for MacAddr { + fn to_network(self) -> [u8; 6] { + self.inner + } + + fn from_network(val: [u8; 6]) -> Self { + Self { inner: val } + } +} + impl MacAddr { pub const BROADCAST: Self = Self { inner: [0xFF; 6] }; pub const ZERO: Self = Self { inner: [0x00; 6] }; diff --git a/dtrace/common.h b/dtrace/common.h index bb642d03..710b6301 100644 --- a/dtrace/common.h +++ b/dtrace/common.h @@ -56,5 +56,13 @@ */ #define DIR_STR(dir) ((dir) == 1 ? "IN" : "OUT") +/* + * Packet processing path. + * 1 = UFT Compiled/Fast + * 2 = UFT Hit/Medium + * 3 = UFT Miss/Slow + */ +#define PATH_STR(path) ((path) == 1 ? "FAST" : ((path) == 2 ? "MED" : "SLOW")) + #define EL_DELIMIT "->" #define EL_FMT "->%s" diff --git a/dtrace/opte-port-process.d b/dtrace/opte-port-process.d index 93b7cbc7..fa0e6546 100644 --- a/dtrace/opte-port-process.d +++ b/dtrace/opte-port-process.d @@ -6,12 +6,12 @@ #include "common.h" #include "protos.d" -#define HDR_FMT "%-12s %-3s %-8s %-43s %-43s %-5s %s\n" -#define LINE_FMT "%-12s %-3s %-8u %-43s %-43s %-5u %s\n" +#define HDR_FMT "%-12s %-3s %-8s %-43s %-43s %-5s %s %s\n" +#define LINE_FMT "%-12s %-3s %-8u %-43s %-43s %-5u %s %s\n" BEGIN { printf(HDR_FMT, "NAME", "DIR", "EPOCH", "FLOW BEFORE", "FLOW AFTER", - "LEN", "RESULT"); + "LEN", "RESULT", "PATH"); num = 0; } @@ -27,10 +27,11 @@ port-process-return { this->msgs = (derror_sdt_arg_t*) arg7; this->msg_len = this->msgs->len; this->res = stringof(""); + this->path = PATH_STR(arg8); if (num >= 10) { printf(HDR_FMT, "NAME", "DIR", "EPOCH", "FLOW BEFORE", - "FLOW AFTER", "LEN", "RESULT"); + "FLOW AFTER", "LEN", "RESULT", "PATH"); num = 0; } @@ -58,7 +59,7 @@ port-process-return /this->af == AF_INET/ { FLOW_FMT(this->s_before, this->flow_before); FLOW_FMT(this->s_after, this->flow_after); printf(LINE_FMT, this->name, this->dir, this->epoch, this->s_before, - this->s_after, msgsize(this->mp), this->res); + this->s_after, msgsize(this->mp), this->res, this->path); num++; } @@ -66,7 +67,7 @@ port-process-return /this->af == AF_INET6/ { FLOW_FMT6(this->s_before, this->flow_before); FLOW_FMT6(this->s_after, this->flow_after); printf(LINE_FMT, this->name, this->dir, this->epoch, this->s_before, - this->s_after, msgsize(this->mp), this->res); + this->s_after, msgsize(this->mp), this->res, this->path); num++; } diff --git a/fuzz/fuzz_targets/parse-in.rs b/fuzz/fuzz_targets/parse-in.rs index 8c55afcd..c9796faf 100644 --- a/fuzz/fuzz_targets/parse-in.rs +++ b/fuzz/fuzz_targets/parse-in.rs @@ -1,13 +1,11 @@ #![no_main] use libfuzzer_sys::fuzz_target; +use opte::ddi::mblk::MsgBlk; use opte::engine::packet::Packet; -use oxide_vpc::api::Direction; use oxide_vpc::engine::VpcParser; fuzz_target!(|data: &[u8]| { - let mut pkt = Packet::alloc_and_expand(data.len()); - let mut wtr = pkt.seg0_wtr(); - wtr.write(data).unwrap(); - pkt.parse(Direction::In, VpcParser {}); + let mut pkt_m = MsgBlk::copy(data); + let _ = Packet::parse_inbound(pkt_m.iter_mut(), VpcParser {}); }); diff --git a/fuzz/fuzz_targets/parse-out.rs b/fuzz/fuzz_targets/parse-out.rs index fbb43b2e..7806ad47 100644 --- a/fuzz/fuzz_targets/parse-out.rs +++ b/fuzz/fuzz_targets/parse-out.rs @@ -1,13 +1,11 @@ #![no_main] use libfuzzer_sys::fuzz_target; +use opte::ddi::mblk::MsgBlk; use opte::engine::packet::Packet; -use oxide_vpc::api::Direction; use oxide_vpc::engine::VpcParser; fuzz_target!(|data: &[u8]| { - let mut pkt = Packet::alloc_and_expand(data.len()); - let mut wtr = pkt.seg0_wtr(); - wtr.write(data).unwrap(); - pkt.parse(Direction::Out, VpcParser {}); + let mut pkt_m = MsgBlk::copy(data); + let _ = Packet::parse_outbound(pkt_m.iter_mut(), VpcParser {}); }); diff --git a/lib/opte-test-utils/src/dhcp.rs b/lib/opte-test-utils/src/dhcp.rs index 02a2fc02..84ca5ce2 100644 --- a/lib/opte-test-utils/src/dhcp.rs +++ b/lib/opte-test-utils/src/dhcp.rs @@ -8,121 +8,106 @@ use super::*; use dhcpv6::protocol::MessageType; +use opte::ddi::mblk::MsgBlk; +use opte::engine::dhcp::DHCP_CLIENT_PORT; +use opte::engine::dhcp::DHCP_SERVER_PORT; use opte::engine::dhcpv6; +use opte::engine::ether::Ethernet; +use opte::engine::ip::v4::Ipv4; +use opte::engine::ip::v6::Ipv6; +use opte::ingot::ethernet::Ethertype; +use opte::ingot::ip::IpProtocol; +use opte::ingot::udp::Udp; pub use smoltcp::wire::DhcpMessageType; pub use smoltcp::wire::DhcpPacket; pub use smoltcp::wire::DhcpRepr; // Build a packet from a DHCPv4 message, from a client to server. -pub fn packet_from_client_dhcpv4_message_unparsed( +pub fn packet_from_client_dhcpv4_message( cfg: &VpcCfg, msg: &DhcpRepr, -) -> Packet { - let eth = EtherMeta { - dst: MacAddr::BROADCAST, - src: cfg.guest_mac, - ether_type: EtherType::Ipv4, +) -> MsgBlk { + let eth = Ethernet { + destination: MacAddr::BROADCAST, + source: cfg.guest_mac, + ethertype: Ethertype::IPV4, }; - let ip = Ipv4Meta { - src: Ipv4Addr::ANY_ADDR, - dst: Ipv4Addr::LOCAL_BCAST, - proto: Protocol::UDP, - total_len: (msg.buffer_len() + UdpHdr::SIZE + Ipv4Hdr::BASE_SIZE) - as u16, - + let ip = Ipv4 { + source: Ipv4Addr::ANY_ADDR, + destination: Ipv4Addr::LOCAL_BCAST, + protocol: IpProtocol::UDP, + total_len: (msg.buffer_len() + + Udp::MINIMUM_LENGTH + + Ipv4::MINIMUM_LENGTH) as u16, ..Default::default() }; - let udp = UdpMeta { - src: 68, - dst: 67, - len: (UdpHdr::SIZE + msg.buffer_len()) as u16, + let udp = Udp { + source: DHCP_CLIENT_PORT, + destination: DHCP_SERVER_PORT, + length: (Udp::MINIMUM_LENGTH + msg.buffer_len()) as u16, ..Default::default() }; - let reply_len = - msg.buffer_len() + UdpHdr::SIZE + Ipv6Hdr::BASE_SIZE + EtherHdr::SIZE; - let mut pkt = Packet::alloc_and_expand(reply_len); - let mut wtr = pkt.seg0_wtr(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip.emit(wtr.slice_mut(ip.hdr_len()).unwrap()); - udp.emit(wtr.slice_mut(udp.hdr_len()).unwrap()); + let headers = (eth, ip, udp); + let total_len = msg.buffer_len() + headers.packet_length(); - let mut msg_buf = vec![0; msg.buffer_len()]; - let mut dhcp_pkt = DhcpPacket::new_checked(&mut msg_buf).unwrap(); + let mut pkt = MsgBlk::new_ethernet(total_len); + pkt.emit_back(&headers).unwrap(); + let dhcp_off = pkt.len(); + pkt.resize(total_len).unwrap(); + let mut dhcp_pkt = DhcpPacket::new_checked(&mut pkt[dhcp_off..]).unwrap(); msg.emit(&mut dhcp_pkt).unwrap(); - wtr.write(&msg_buf).unwrap(); + pkt } // Build a packet from a DHCPv6 message, from a client to server. -pub fn packet_from_client_dhcpv6_message_unparsed( +pub fn packet_from_client_dhcpv6_message( cfg: &VpcCfg, msg: &dhcpv6::protocol::Message<'_>, -) -> Packet { - let eth = EtherMeta { - dst: dhcpv6::ALL_RELAYS_AND_SERVERS.multicast_mac().unwrap(), - src: cfg.guest_mac, - ether_type: EtherType::Ipv6, +) -> MsgBlk { + let eth = Ethernet { + destination: dhcpv6::ALL_RELAYS_AND_SERVERS.multicast_mac().unwrap(), + source: cfg.guest_mac, + ethertype: Ethertype::IPV6, }; - let ip = Ipv6Meta { - src: Ipv6Addr::from_eui64(&cfg.guest_mac), - dst: dhcpv6::ALL_RELAYS_AND_SERVERS, - proto: Protocol::UDP, - next_hdr: IpProtocol::Udp, - pay_len: (msg.buffer_len() + UdpHdr::SIZE) as u16, + let ip = Ipv6 { + source: Ipv6Addr::from_eui64(&cfg.guest_mac), + destination: dhcpv6::ALL_RELAYS_AND_SERVERS, + next_header: IpProtocol::UDP, + payload_len: (msg.buffer_len() + Udp::MINIMUM_LENGTH) as u16, ..Default::default() }; - let udp = UdpMeta { - src: dhcpv6::CLIENT_PORT, - dst: dhcpv6::SERVER_PORT, - len: (UdpHdr::SIZE + msg.buffer_len()) as u16, + let udp = Udp { + source: dhcpv6::CLIENT_PORT, + destination: dhcpv6::SERVER_PORT, + length: ip.payload_len, ..Default::default() }; - write_dhcpv6_packet_unparsed(eth, ip, udp, msg) + write_dhcpv6_packet(eth, ip, udp, msg) } -pub fn packet_from_client_dhcpv6_message( - cfg: &VpcCfg, +pub fn write_dhcpv6_packet( + eth: Ethernet, + ip: Ipv6, + udp: Udp, msg: &dhcpv6::protocol::Message<'_>, -) -> Packet { - packet_from_client_dhcpv6_message_unparsed(cfg, msg) - .parse(Out, GenericUlp {}) - .unwrap() -} +) -> MsgBlk { + let headers = (eth, ip, udp); + let total_len = msg.buffer_len() + headers.packet_length(); -pub fn write_dhcpv6_packet_unparsed( - eth: EtherMeta, - ip: Ipv6Meta, - udp: UdpMeta, - msg: &dhcpv6::protocol::Message<'_>, -) -> Packet { - let reply_len = - msg.buffer_len() + UdpHdr::SIZE + Ipv6Hdr::BASE_SIZE + EtherHdr::SIZE; - let mut pkt = Packet::alloc_and_expand(reply_len); - let mut wtr = pkt.seg0_wtr(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip.emit(wtr.slice_mut(ip.hdr_len()).unwrap()); - udp.emit(wtr.slice_mut(udp.hdr_len()).unwrap()); - let mut msg_buf = vec![0; msg.buffer_len()]; - msg.copy_into(&mut msg_buf).unwrap(); - wtr.write(&msg_buf).unwrap(); - pkt -} + let mut pkt = MsgBlk::new_ethernet(total_len); + pkt.emit_back(&headers).unwrap(); + let dhcp_off = pkt.len(); + pkt.resize(total_len).unwrap(); + msg.copy_into(&mut pkt[dhcp_off..]).unwrap(); -pub fn write_dhcpv6_packet( - eth: EtherMeta, - ip: Ipv6Meta, - udp: UdpMeta, - msg: &dhcpv6::protocol::Message<'_>, -) -> Packet { - write_dhcpv6_packet_unparsed(eth, ip, udp, msg) - .parse(Out, GenericUlp {}) - .unwrap() + pkt } pub fn dhcpv6_with_reasonable_defaults( diff --git a/lib/opte-test-utils/src/icmp.rs b/lib/opte-test-utils/src/icmp.rs index 2af1c6c1..ad839efe 100644 --- a/lib/opte-test-utils/src/icmp.rs +++ b/lib/opte-test-utils/src/icmp.rs @@ -7,19 +7,20 @@ //! Routines for ICMP testing. use opte::api::*; -use opte::engine::ether::*; -use opte::engine::ip4::*; -use opte::engine::ip6::*; -use opte::engine::packet::*; -use opte::engine::Direction::*; -use oxide_vpc::engine::VpcParser; +use opte::ddi::mblk::MsgBlk; +use opte::engine::ether::Ethernet; +use opte::engine::ip::v4::Ipv4; +use opte::engine::ip::v6::Ipv6; +use opte::engine::ip::L3; +use opte::ingot::ethernet::Ethertype; +use opte::ingot::ip::IpProtocol as IngotIpProto; +use opte::ingot::types::HeaderLen; use smoltcp::phy::ChecksumCapabilities as CsumCapab; use smoltcp::wire::Icmpv4Packet; use smoltcp::wire::Icmpv4Repr; use smoltcp::wire::Icmpv6Packet; use smoltcp::wire::Icmpv6Repr; use smoltcp::wire::IpAddress; -use smoltcp::wire::IpProtocol; use smoltcp::wire::Ipv6Address; use smoltcp::wire::NdiscNeighborFlags; pub use smoltcp::wire::NdiscRepr; @@ -40,7 +41,7 @@ pub fn gen_icmp_echo_req( seq_no: u16, data: &[u8], segments: usize, -) -> Packet { +) -> MsgBlk { match (ip_src, ip_dst) { (IpAddr::Ip4(src), IpAddr::Ip4(dst)) => gen_icmpv4_echo_req( eth_src, eth_dst, src, dst, ident, seq_no, data, segments, @@ -62,7 +63,7 @@ pub fn gen_icmpv4_echo_req( seq_no: u16, data: &[u8], segments: usize, -) -> Packet { +) -> MsgBlk { let etype = IcmpEchoType::Req; gen_icmp_echo( etype, eth_src, eth_dst, ip_src, ip_dst, ident, seq_no, data, segments, @@ -79,7 +80,7 @@ pub fn gen_icmp_echo_reply( seq_no: u16, data: &[u8], segments: usize, -) -> Packet { +) -> MsgBlk { match (ip_src, ip_dst) { (IpAddr::Ip4(src), IpAddr::Ip4(dst)) => gen_icmpv4_echo_reply( eth_src, eth_dst, src, dst, ident, seq_no, data, segments, @@ -101,7 +102,7 @@ pub fn gen_icmpv4_echo_reply( seq_no: u16, data: &[u8], segments: usize, -) -> Packet { +) -> MsgBlk { let etype = IcmpEchoType::Reply; gen_icmp_echo( etype, eth_src, eth_dst, ip_src, ip_dst, ident, seq_no, data, segments, @@ -118,8 +119,8 @@ pub fn gen_icmp_echo( ident: u16, seq_no: u16, data: &[u8], - segments: usize, -) -> Packet { + n_segments: usize, +) -> MsgBlk { let icmp = match etype { IcmpEchoType::Req => Icmpv4Repr::EchoRequest { ident, seq_no, data }, IcmpEchoType::Reply => Icmpv4Repr::EchoReply { ident, seq_no, data }, @@ -128,123 +129,59 @@ pub fn gen_icmp_echo( let mut icmp_pkt = Icmpv4Packet::new_unchecked(&mut icmp_bytes); icmp.emit(&mut icmp_pkt, &Default::default()); - let mut ip4 = Ipv4Meta { - src: ip_src, - dst: ip_dst, - proto: Protocol::ICMP, - total_len: (Ipv4Hdr::BASE_SIZE + icmp.buffer_len()) as u16, - ..Default::default() + let eth = Ethernet { + destination: eth_dst, + source: eth_src, + ethertype: Ethertype::IPV4, }; - ip4.compute_hdr_csum(); - let eth = - &EtherMeta { dst: eth_dst, src: eth_src, ether_type: EtherType::Ipv4 }; - let total_len = EtherHdr::SIZE + ip4.hdr_len() + icmp.buffer_len(); + let mut ip: L3<&mut [u8]> = Ipv4 { + source: ip_src, + destination: ip_dst, + protocol: IngotIpProto::ICMP, + total_len: (icmp.buffer_len() + Ipv4::MINIMUM_LENGTH) as u16, + ..Default::default() + } + .into(); + ip.compute_checksum(); + + let mut segments = vec![]; - match segments { + match n_segments { 1 => { - let mut pkt = Packet::alloc_and_expand(total_len); - let mut wtr = pkt.seg0_wtr(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip4.emit(wtr.slice_mut(ip4.hdr_len()).unwrap()); - wtr.write(&icmp_bytes).unwrap(); - pkt.parse(Out, VpcParser::new()).unwrap() + return MsgBlk::new_ethernet_pkt((ð, &ip, &icmp_bytes)); } 2 => { - let mut pkt = Packet::alloc_and_expand(EtherHdr::SIZE); - let mut wtr = pkt.seg_wtr(0); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - let mut wtr = - pkt.add_seg(ip4.hdr_len() + icmp_bytes.len()).unwrap(); - ip4.emit(wtr.slice_mut(ip4.hdr_len()).unwrap()); - wtr.write(&icmp_bytes).unwrap(); - pkt.parse(Out, VpcParser::new()).unwrap() + segments.push(MsgBlk::new_ethernet_pkt(eth)); + segments.push(MsgBlk::new_pkt((&ip, &icmp_bytes))); } 3 => { - let mut pkt = Packet::alloc_and_expand(EtherHdr::SIZE); - let mut wtr = pkt.seg_wtr(0); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - let mut wtr = pkt.add_seg(ip4.hdr_len()).unwrap(); - ip4.emit(wtr.slice_mut(ip4.hdr_len()).unwrap()); - let mut wtr = pkt.add_seg(icmp_bytes.len()).unwrap(); - wtr.write(&icmp_bytes).unwrap(); - pkt.parse(Out, VpcParser::new()).unwrap() + segments.push(MsgBlk::new_ethernet_pkt(eth)); + segments.push(MsgBlk::new_pkt(ip)); + segments.push(MsgBlk::new_pkt(&icmp_bytes)); + } + 4 => { + // Used to test pullup behaviour around longer mblks + // which still have pkt bodies in guest memory. + assert!(icmp_bytes.len() > 8); + segments.push(MsgBlk::new_ethernet_pkt(eth)); + segments.push(MsgBlk::new_pkt(ip)); + segments.push(MsgBlk::new_pkt(&icmp_bytes[..8])); + segments.push(MsgBlk::new_pkt(&icmp_bytes[8..])); } _ => { panic!("only 1 2 or 3 segments allowed") } } -} -#[allow(clippy::too_many_arguments)] -pub fn gen_icmp_echo_unparsed( - etype: IcmpEchoType, - eth_src: MacAddr, - eth_dst: MacAddr, - ip_src: Ipv4Addr, - ip_dst: Ipv4Addr, - ident: u16, - seq_no: u16, - data: &[u8], - segments: usize, -) -> Packet { - let icmp = match etype { - IcmpEchoType::Req => Icmpv4Repr::EchoRequest { ident, seq_no, data }, - IcmpEchoType::Reply => Icmpv4Repr::EchoReply { ident, seq_no, data }, - }; - let mut icmp_bytes = vec![0u8; icmp.buffer_len()]; - let mut icmp_pkt = Icmpv4Packet::new_unchecked(&mut icmp_bytes); - icmp.emit(&mut icmp_pkt, &Default::default()); - - let mut ip4 = Ipv4Meta { - src: ip_src, - dst: ip_dst, - proto: Protocol::ICMP, - total_len: (Ipv4Hdr::BASE_SIZE + icmp.buffer_len()) as u16, - ..Default::default() - }; - ip4.compute_hdr_csum(); - let eth = - &EtherMeta { dst: eth_dst, src: eth_src, ether_type: EtherType::Ipv4 }; - - let total_len = EtherHdr::SIZE + ip4.hdr_len() + icmp.buffer_len(); + while segments.len() > 1 { + let chain = segments.pop().unwrap(); + let new_el = segments.last_mut().unwrap(); - match segments { - 1 => { - let mut pkt = Packet::alloc_and_expand(total_len); - let mut wtr = pkt.seg0_wtr(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip4.emit(wtr.slice_mut(ip4.hdr_len()).unwrap()); - wtr.write(&icmp_bytes).unwrap(); - - pkt - } - 2 => { - let mut pkt = Packet::alloc_and_expand(EtherHdr::SIZE); - let mut wtr = pkt.seg_wtr(0); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - let mut wtr = - pkt.add_seg(ip4.hdr_len() + icmp_bytes.len()).unwrap(); - ip4.emit(wtr.slice_mut(ip4.hdr_len()).unwrap()); - wtr.write(&icmp_bytes).unwrap(); - - pkt - } - 3 => { - let mut pkt = Packet::alloc_and_expand(EtherHdr::SIZE); - let mut wtr = pkt.seg_wtr(0); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - let mut wtr = pkt.add_seg(ip4.hdr_len()).unwrap(); - ip4.emit(wtr.slice_mut(ip4.hdr_len()).unwrap()); - let mut wtr = pkt.add_seg(icmp_bytes.len()).unwrap(); - wtr.write(&icmp_bytes).unwrap(); - - pkt - } - _ => { - panic!("only 1 2 or 3 segments allowed") - } + new_el.append(chain); } + + segments.pop().unwrap() } #[allow(clippy::too_many_arguments)] @@ -257,7 +194,7 @@ pub fn gen_icmpv6_echo_req( seq_no: u16, data: &[u8], segments: usize, -) -> Packet { +) -> MsgBlk { let etype = IcmpEchoType::Req; gen_icmpv6_echo( etype, eth_src, eth_dst, ip_src, ip_dst, ident, seq_no, data, segments, @@ -274,7 +211,7 @@ pub fn gen_icmpv6_echo_reply( seq_no: u16, data: &[u8], segments: usize, -) -> Packet { +) -> MsgBlk { let etype = IcmpEchoType::Reply; gen_icmpv6_echo( etype, eth_src, eth_dst, ip_src, ip_dst, ident, seq_no, data, segments, @@ -291,27 +228,8 @@ pub fn gen_icmpv6_echo( ident: u16, seq_no: u16, data: &[u8], - segments: usize, -) -> Packet { - gen_icmpv6_echo_unparsed( - etype, eth_src, eth_dst, ip_src, ip_dst, ident, seq_no, data, segments, - ) - .parse(Out, VpcParser::new()) - .unwrap() -} - -#[allow(clippy::too_many_arguments)] -pub fn gen_icmpv6_echo_unparsed( - etype: IcmpEchoType, - eth_src: MacAddr, - eth_dst: MacAddr, - ip_src: Ipv6Addr, - ip_dst: Ipv6Addr, - ident: u16, - seq_no: u16, - data: &[u8], - segments: usize, -) -> Packet { + n_segments: usize, +) -> MsgBlk { let icmp = match etype { IcmpEchoType::Req => Icmpv6Repr::EchoRequest { ident, seq_no, data }, IcmpEchoType::Reply => Icmpv6Repr::EchoReply { ident, seq_no, data }, @@ -325,88 +243,94 @@ pub fn gen_icmpv6_echo_unparsed( &mut req_pkt, &Default::default(), ); - let ip6 = Ipv6Meta { - src: ip_src, - dst: ip_dst, - proto: Protocol::ICMPv6, - next_hdr: IpProtocol::Icmpv6, + + let eth = Ethernet { + destination: eth_dst, + source: eth_src, + ethertype: Ethertype::IPV6, + }; + + let ip = Ipv6 { + source: ip_src, + destination: ip_dst, + next_header: IngotIpProto::ICMP_V6, + payload_len: icmp.buffer_len() as u16, hop_limit: 64, - pay_len: icmp.buffer_len() as u16, ..Default::default() }; - let eth = - &EtherMeta { dst: eth_dst, src: eth_src, ether_type: EtherType::Ipv6 }; - let total_len = EtherHdr::SIZE + ip6.hdr_len() + icmp.buffer_len(); + let mut segments = vec![]; - match segments { + match n_segments { 1 => { - let mut pkt = Packet::alloc_and_expand(total_len); - let mut wtr = pkt.seg0_wtr(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip6.emit(wtr.slice_mut(ip6.hdr_len()).unwrap()); - wtr.write(&body_bytes).unwrap(); - pkt + return MsgBlk::new_ethernet_pkt((ð, &ip, &body_bytes)); } 2 => { - let mut pkt = Packet::alloc_and_expand(EtherHdr::SIZE); - let mut wtr = pkt.seg_wtr(0); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - let mut wtr = - pkt.add_seg(ip6.hdr_len() + body_bytes.len()).unwrap(); - ip6.emit(wtr.slice_mut(ip6.hdr_len()).unwrap()); - wtr.write(&body_bytes).unwrap(); - pkt + segments.push(MsgBlk::new_ethernet_pkt(eth)); + segments.push(MsgBlk::new_pkt((&ip, &body_bytes))); } 3 => { - let mut pkt = Packet::alloc_and_expand(EtherHdr::SIZE); - let mut wtr = pkt.seg_wtr(0); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - let mut wtr = pkt.add_seg(ip6.hdr_len()).unwrap(); - ip6.emit(wtr.slice_mut(ip6.hdr_len()).unwrap()); - let mut wtr = pkt.add_seg(body_bytes.len()).unwrap(); - wtr.write(&body_bytes).unwrap(); - pkt + segments.push(MsgBlk::new_ethernet_pkt(eth)); + segments.push(MsgBlk::new_pkt(ip)); + segments.push(MsgBlk::new_pkt(&body_bytes)); + } + 4 => { + // Used to test pullup behaviour around longer mblks + // which still have pkt bodies in guest memory. + assert!(body_bytes.len() > 8); + segments.push(MsgBlk::new_ethernet_pkt(eth)); + segments.push(MsgBlk::new_pkt(ip)); + segments.push(MsgBlk::new_pkt(&body_bytes[..8])); + segments.push(MsgBlk::new_pkt(&body_bytes[8..])); } _ => { panic!("only 1 2 or 3 segments allowed") } } -} -/// Generate an NDP packet given an inner `repr`. -pub fn generate_ndisc( - repr: NdiscRepr, - src_mac: MacAddr, - dst_mac: MacAddr, - src_ip: Ipv6Addr, - dst_ip: Ipv6Addr, - with_checksum: bool, -) -> Packet { - generate_ndisc_unparsed( - repr, - src_mac, - dst_mac, - src_ip, - dst_ip, - with_checksum, - ) - .parse(Out, VpcParser::new()) - .unwrap() + while segments.len() > 1 { + let chain = segments.pop().unwrap(); + let new_el = segments.last_mut().unwrap(); + + new_el.append(chain); + } + + segments.pop().unwrap() } /// Generate an NDP packet given an inner `repr`. -pub fn generate_ndisc_unparsed( +pub fn generate_ndisc( repr: NdiscRepr, src_mac: MacAddr, dst_mac: MacAddr, src_ip: Ipv6Addr, dst_ip: Ipv6Addr, with_checksum: bool, -) -> Packet { +) -> MsgBlk { let req = Icmpv6Repr::Ndisc(repr); - let mut body = vec![0u8; req.buffer_len()]; - let mut req_pkt = Icmpv6Packet::new_unchecked(&mut body); + let eth = Ethernet { + destination: dst_mac, + source: src_mac, + ethertype: Ethertype::IPV6, + }; + + let ip = Ipv6 { + source: src_ip, + destination: dst_ip, + next_header: IngotIpProto::ICMP_V6, + payload_len: req.buffer_len() as u16, + hop_limit: 255, + ..Default::default() + }; + + let headers = (eth, ip); + let total_len = req.buffer_len() + headers.packet_length(); + let mut pkt = MsgBlk::new_ethernet(total_len); + pkt.emit_back(&headers).unwrap(); + let ndisc_off = pkt.len(); + pkt.resize(total_len).unwrap(); + + let mut req_pkt = Icmpv6Packet::new_unchecked(&mut pkt[ndisc_off..]); let mut csum = CsumCapab::ignored(); if with_checksum { csum.icmpv6 = smoltcp::phy::Checksum::Tx; @@ -417,24 +341,7 @@ pub fn generate_ndisc_unparsed( &mut req_pkt, &csum, ); - let ip6 = Ipv6Meta { - src: src_ip, - dst: dst_ip, - proto: Protocol::ICMPv6, - next_hdr: IpProtocol::Icmpv6, - hop_limit: 255, - pay_len: req.buffer_len() as u16, - ..Default::default() - }; - let eth = - EtherMeta { dst: dst_mac, src: src_mac, ether_type: EtherType::Ipv6 }; - - let total_len = EtherHdr::SIZE + ip6.hdr_len() + req.buffer_len(); - let mut pkt = Packet::alloc_and_expand(total_len); - let mut wtr = pkt.seg0_wtr(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip6.emit(wtr.slice_mut(ip6.hdr_len()).unwrap()); - wtr.write(&body).unwrap(); + pkt } @@ -443,7 +350,7 @@ pub fn generate_ndisc_unparsed( // The source MAC is used to generate the source IPv6 address, using the EUI-64 // transform. The resulting packet has a multicast MAC address, and the // All-Routers destination IPv6 address. -pub fn gen_router_solicitation(src_mac: &MacAddr) -> Packet { +pub fn gen_router_solicitation(src_mac: &MacAddr) -> MsgBlk { let solicit = NdiscRepr::RouterSolicit { lladdr: Some(RawHardwareAddress::from_bytes(src_mac)), }; @@ -466,7 +373,7 @@ pub fn gen_router_solicitation(src_mac: &MacAddr) -> Packet { pub fn generate_neighbor_solicitation( info: &SolicitInfo, with_checksum: bool, -) -> Packet { +) -> MsgBlk { let solicit = NdiscRepr::NeighborSolicit { target_addr: Ipv6Address::from(info.target_addr), lladdr: info.lladdr.map(|x| RawHardwareAddress::from_bytes(&x)), @@ -513,7 +420,7 @@ impl std::fmt::Display for SolicitInfo { pub fn generate_neighbor_advertisement( info: &AdvertInfo, with_checksum: bool, -) -> Packet { +) -> MsgBlk { let advert = NdiscRepr::NeighborAdvert { flags: info.flags, target_addr: info.target_addr.into(), diff --git a/lib/opte-test-utils/src/lib.rs b/lib/opte-test-utils/src/lib.rs index 480b7c7e..18be3bef 100644 --- a/lib/opte-test-utils/src/lib.rs +++ b/lib/opte-test-utils/src/lib.rs @@ -18,43 +18,50 @@ pub mod port_state; // Let's make our lives easier and pub use a bunch of stuff. pub use opte::api::Direction::*; pub use opte::api::MacAddr; -pub use opte::engine::ether::EtherHdr; +pub use opte::ddi::mblk::MsgBlk; +pub use opte::ddi::mblk::MsgBlkIterMut; pub use opte::engine::ether::EtherMeta; pub use opte::engine::ether::EtherType; -pub use opte::engine::geneve::GeneveHdr; +pub use opte::engine::ether::Ethernet; pub use opte::engine::geneve::GeneveMeta; pub use opte::engine::geneve::GeneveOption; pub use opte::engine::geneve::OxideOption; pub use opte::engine::geneve::Vni; +pub use opte::engine::geneve::GENEVE_OPT_CLASS_OXIDE; +pub use opte::engine::geneve::GENEVE_PORT; pub use opte::engine::headers::IpAddr; pub use opte::engine::headers::IpCidr; -pub use opte::engine::headers::IpMeta; -pub use opte::engine::headers::UlpMeta; -pub use opte::engine::ip4::Ipv4Addr; -pub use opte::engine::ip4::Ipv4Hdr; -pub use opte::engine::ip4::Ipv4Meta; -pub use opte::engine::ip4::Protocol; -pub use opte::engine::ip6::Ipv6Addr; -pub use opte::engine::ip6::Ipv6Hdr; -pub use opte::engine::ip6::Ipv6Meta; +pub use opte::engine::ip::v4::Ipv4; +pub use opte::engine::ip::v4::Ipv4Addr; +pub use opte::engine::ip::v4::Protocol; +pub use opte::engine::ip::v6::Ipv6; +pub use opte::engine::ip::v6::Ipv6Addr; +pub use opte::engine::ip::L3Repr; pub use opte::engine::layer::DenyReason; -pub use opte::engine::packet::BodyInfo; -pub use opte::engine::packet::HdrOffset; -pub use opte::engine::packet::Initialized; +pub use opte::engine::packet::LiteInPkt; +pub use opte::engine::packet::LiteOutPkt; +pub use opte::engine::packet::MblkLiteParsed; pub use opte::engine::packet::Packet; -pub use opte::engine::packet::Parsed; +pub use opte::engine::packet::ParseError; pub use opte::engine::port::meta::ActionMeta; pub use opte::engine::port::DropReason; pub use opte::engine::port::Port; pub use opte::engine::port::PortBuilder; pub use opte::engine::port::ProcessResult; pub use opte::engine::port::ProcessResult::*; -pub use opte::engine::tcp::TcpFlags; -pub use opte::engine::tcp::TcpHdr; -pub use opte::engine::tcp::TcpMeta; -pub use opte::engine::udp::UdpHdr; -pub use opte::engine::udp::UdpMeta; pub use opte::engine::GenericUlp; +pub use opte::engine::NetworkParser; +pub use opte::ingot::ethernet::Ethertype; +pub use opte::ingot::geneve::Geneve; +pub use opte::ingot::geneve::GeneveOpt; +pub use opte::ingot::geneve::GeneveOptionType; +pub use opte::ingot::ip::IpProtocol as IngotIpProto; +pub use opte::ingot::tcp::Tcp; +pub use opte::ingot::tcp::TcpFlags as IngotTcpFlags; +pub use opte::ingot::types::Emit; +pub use opte::ingot::types::EmitDoesNotRelyOnBufContents; +pub use opte::ingot::types::HeaderLen; +pub use opte::ingot::udp::Udp; pub use opte::ExecCtx; pub use oxide_vpc::api::AddFwRuleReq; pub use oxide_vpc::api::DhcpCfg; @@ -78,8 +85,8 @@ pub use oxide_vpc::engine::overlay; pub use oxide_vpc::engine::overlay::Virt2Boundary; pub use oxide_vpc::engine::overlay::Virt2Phys; pub use oxide_vpc::engine::overlay::VpcMappings; -use oxide_vpc::engine::overlay::BOUNDARY_SERVICES_VNI; -use oxide_vpc::engine::overlay::TUNNEL_ENDPOINT_MAC; +pub use oxide_vpc::engine::overlay::BOUNDARY_SERVICES_VNI; +pub use oxide_vpc::engine::overlay::TUNNEL_ENDPOINT_MAC; pub use oxide_vpc::engine::router; pub use oxide_vpc::engine::VpcNetwork; pub use oxide_vpc::engine::VpcParser; @@ -88,6 +95,36 @@ pub use smoltcp::wire::IpProtocol; pub use std::num::NonZeroU32; pub use std::sync::Arc; +/// Expects that a packet result is modified, and applies that modification. +#[macro_export] +macro_rules! expect_modified { + ($res:ident, $pkt:ident) => { + assert!( + matches!($res, Ok(Modified(_))), + "expected Modified, got {:?}", + $res + ); + #[allow(unused_assignments)] + if let Ok(Modified(spec)) = $res { + $pkt = spec.apply($pkt); + } + }; +} + +pub fn parse_inbound( + pkt: &mut MsgBlk, + parser: NP, +) -> Result, NP>, ParseError> { + Packet::parse_inbound(pkt.iter_mut(), parser) +} + +pub fn parse_outbound( + pkt: &mut MsgBlk, + parser: NP, +) -> Result, NP>, ParseError> { + Packet::parse_outbound(pkt.iter_mut(), parser) +} + // It's imperative that this list stays in sync with the layers that // makeup the VPC implementation. We verify this in the `check_layers` // test. @@ -433,109 +470,62 @@ fn set_default_fw_rules(pav: &mut PortAndVps, cfg: &VpcCfg) { update!(pav, ["set:epoch=3", "set:firewall.rules.in=3"]); } -fn verify_ulp_pkt_offsets( - pkt: &Packet, - ip: IpMeta, - ulp: UlpMeta, - body_len: usize, -) { - let mut pos = 0; - let off = pkt.hdr_offsets(); - assert_eq!( - off.inner.ether, - HdrOffset { - pkt_pos: pos, - seg_idx: 0, - seg_pos: pos, - hdr_len: EtherHdr::SIZE - }, - ); - pos += EtherHdr::SIZE; - assert_eq!( - off.inner.ip.unwrap(), - HdrOffset { - pkt_pos: pos, - seg_idx: 0, - seg_pos: pos, - hdr_len: ip.hdr_len() - }, - ); - pos += ip.hdr_len(); - assert_eq!( - off.inner.ulp.unwrap(), - HdrOffset { - pkt_pos: pos, - seg_idx: 0, - seg_pos: pos, - hdr_len: ulp.hdr_len() - }, - ); - pos += ulp.hdr_len(); - assert_eq!( - pkt.body_info(), - BodyInfo { - pkt_offset: pos, - seg_index: 0, - seg_offset: pos, - len: body_len - }, - ); -} - -pub fn ulp_pkt, U: Into>( - eth: EtherMeta, +pub fn ulp_pkt< + I: Emit + EmitDoesNotRelyOnBufContents, + U: Emit + EmitDoesNotRelyOnBufContents, +>( + eth: Ethernet, ip: I, ulp: U, body: &[u8], -) -> Packet { - let ip = ip.into(); - let ulp = ulp.into(); - let total_len = EtherHdr::SIZE + ip.hdr_len() + ulp.hdr_len() + body.len(); - let mut pkt = Packet::alloc_and_expand(total_len); - let mut wtr = pkt.seg0_wtr(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip.emit(wtr.slice_mut(ip.hdr_len()).unwrap()); - ulp.emit(wtr.slice_mut(ulp.hdr_len()).unwrap()); - wtr.write(body).unwrap(); - let mut pkt = pkt.parse(Out, GenericUlp {}).unwrap(); - pkt.compute_checksums(); - assert!(pkt.body_csum().is_some()); - verify_ulp_pkt_offsets(&pkt, ip, ulp, body.len()); +) -> MsgBlk { + let mut pkt = MsgBlk::new_ethernet_pkt((eth, ip, ulp, body)); + + let view = Packet::parse_outbound(pkt.iter_mut(), GenericUlp {}).unwrap(); + let mut view = view.to_full_meta(); + view.compute_checksums(); + drop(view); + + // Note: we don't need to create and act on an EmitSpec here + // because we haven't meaningfully transformed the packet. + // (processed, introduced new layers, altered options/EHs) + pkt } // Generate a packet representing the start of a TCP handshake for a // telnet session from src to dst. -pub fn tcp_telnet_syn(src: &VpcCfg, dst: &VpcCfg) -> Packet { - let body = vec![]; - let tcp = TcpMeta { - src: 7865, - dst: 23, - flags: TcpFlags::SYN, - seq: 4224936861, - ack: 0, +pub fn tcp_telnet_syn(src: &VpcCfg, dst: &VpcCfg) -> MsgBlk { + let body: &[u8] = &[]; + let tcp = Tcp { + source: 7865, + destination: 23, + flags: IngotTcpFlags::SYN, + sequence: 4224936861, + acknowledgement: 0, ..Default::default() }; - let ip4 = Ipv4Meta { - src: src.ipv4_cfg().unwrap().private_ip, - dst: dst.ipv4_cfg().unwrap().private_ip, - proto: Protocol::TCP, - total_len: (Ipv4Hdr::BASE_SIZE + tcp.hdr_len() + body.len()) as u16, + let ip4 = Ipv4 { + source: src.ipv4_cfg().unwrap().private_ip, + destination: dst.ipv4_cfg().unwrap().private_ip, + protocol: IngotIpProto::TCP, + total_len: (Ipv4::MINIMUM_LENGTH + tcp.packet_length() + body.len()) + as u16, ..Default::default() }; - let eth = EtherMeta { - ether_type: EtherType::Ipv4, - src: src.guest_mac, - dst: src.gateway_mac, + let eth = Ethernet { + destination: src.gateway_mac, + source: src.guest_mac, + ethertype: Ethertype::IPV4, }; - ulp_pkt(eth, ip4, tcp, &body) + ulp_pkt(eth, ip4, tcp, &[]) } pub const HTTP_SYN_OPTS_LEN: usize = 20; // Generate a packet representing the start of a TCP handshake for an // HTTP request from src to dst. -pub fn http_syn(src: &VpcCfg, dst: &VpcCfg) -> Packet { +pub fn http_syn(src: &VpcCfg, dst: &VpcCfg) -> MsgBlk { http_syn2( src.guest_mac, src.ipv4_cfg().unwrap().private_ip, @@ -551,7 +541,7 @@ pub fn http_syn2( ip_src: impl Into, eth_dst: MacAddr, ip_dst: impl Into, -) -> Packet { +) -> MsgBlk { http_syn3(eth_src, ip_src, eth_dst, ip_dst, 44490, 80) } @@ -562,11 +552,10 @@ pub fn http_syn3( ip_dst: impl Into, sport: u16, dport: u16, -) -> Packet { +) -> MsgBlk { let body = vec![]; - let mut options = [0x00; TcpHdr::MAX_OPTION_SIZE]; #[rustfmt::skip] - let bytes = [ + let options = vec![ // MSS 0x02, 0x04, 0x05, 0xb4, // SACK @@ -578,57 +567,54 @@ pub fn http_syn3( // Window Scale 0x03, 0x03, 0x01, ]; - options[0..bytes.len()].copy_from_slice(&bytes); - let options_len = bytes.len(); - - let tcp = TcpMeta { - src: sport, - dst: dport, - flags: TcpFlags::SYN, - seq: 2382112979, - ack: 0, + + let tcp = Tcp { + source: sport, + destination: dport, + sequence: 2382112979, + acknowledgement: 0, + flags: IngotTcpFlags::SYN, window_size: 64240, - options_bytes: Some(options), - options_len, - csum: [0; 2], + options, + ..Default::default() }; - let (ether_type, ip): (_, IpMeta) = match (ip_src.into(), ip_dst.into()) { - (IpAddr::Ip4(src), IpAddr::Ip4(dst)) => ( - EtherType::Ipv4, - Ipv4Meta { - src, - dst, - proto: Protocol::TCP, - total_len: (Ipv4Hdr::BASE_SIZE + tcp.hdr_len() + body.len()) - as u16, - ttl: 64, - ident: 2662, + + let (ethertype, ip) = match (ip_src.into(), ip_dst.into()) { + (IpAddr::Ip4(source), IpAddr::Ip4(destination)) => ( + Ethertype::IPV4, + L3Repr::Ipv4(Ipv4 { + total_len: (Ipv4::MINIMUM_LENGTH + + tcp.packet_length() + + body.len()) as u16, + identification: 2662, + hop_limit: 64, + protocol: IngotIpProto::TCP, + source, + destination, ..Default::default() - } - .into(), + }), ), - (IpAddr::Ip6(src), IpAddr::Ip6(dst)) => ( - EtherType::Ipv6, - Ipv6Meta { - src, - dst, - proto: Protocol::TCP, - next_hdr: IpProtocol::Tcp, - pay_len: (tcp.hdr_len() + body.len()) as u16, + (IpAddr::Ip6(source), IpAddr::Ip6(destination)) => ( + Ethertype::IPV6, + L3Repr::Ipv6(Ipv6 { + payload_len: (tcp.packet_length() + body.len()) as u16, + next_header: IngotIpProto::TCP, + hop_limit: 64, + source, + destination, ..Default::default() - } - .into(), + }), ), _ => panic!("source and destination must be the same IP version"), }; // Any packet from the guest is always addressed to the gateway. - let eth = EtherMeta { ether_type, src: eth_src, dst: eth_dst }; + let eth = Ethernet { destination: eth_dst, source: eth_src, ethertype }; ulp_pkt(eth, ip, tcp, &body) } // Generate a packet representing the SYN+ACK reply to `http_tcp_syn()`, // from g1 to g2. -pub fn http_syn_ack(src: &VpcCfg, dst: &VpcCfg) -> Packet { +pub fn http_syn_ack(src: &VpcCfg, dst: &VpcCfg) -> MsgBlk { http_syn_ack2( src.guest_mac, src.ipv4().private_ip, @@ -646,46 +632,46 @@ pub fn http_syn_ack2( eth_dst: MacAddr, ip_dst: impl Into, dport: u16, -) -> Packet { +) -> MsgBlk { let body = vec![]; - let tcp = TcpMeta { - src: 80, - dst: dport, - flags: TcpFlags::SYN | TcpFlags::ACK, - seq: 44161351, - ack: 2382112980, + let tcp = Tcp { + source: 80, + destination: dport, + sequence: 44161351, + acknowledgement: 2382112980, + flags: IngotTcpFlags::SYN | IngotTcpFlags::ACK, ..Default::default() }; - let (ether_type, ip): (_, IpMeta) = match (ip_src.into(), ip_dst.into()) { - (IpAddr::Ip4(src), IpAddr::Ip4(dst)) => ( - EtherType::Ipv4, - Ipv4Meta { - src, - dst, - proto: Protocol::TCP, - total_len: (Ipv4Hdr::BASE_SIZE + tcp.hdr_len() + body.len()) - as u16, - ttl: 64, - ident: 2662, + let (ethertype, ip) = match (ip_src.into(), ip_dst.into()) { + (IpAddr::Ip4(source), IpAddr::Ip4(destination)) => ( + Ethertype::IPV4, + L3Repr::Ipv4(Ipv4 { + total_len: (Ipv4::MINIMUM_LENGTH + + tcp.packet_length() + + body.len()) as u16, + identification: 2662, + hop_limit: 64, + protocol: IngotIpProto::TCP, + source, + destination, ..Default::default() - } - .into(), + }), ), - (IpAddr::Ip6(src), IpAddr::Ip6(dst)) => ( - EtherType::Ipv6, - Ipv6Meta { - src, - dst, - proto: Protocol::TCP, - next_hdr: IpProtocol::Tcp, - pay_len: (tcp.hdr_len() + body.len()) as u16, + (IpAddr::Ip6(source), IpAddr::Ip6(destination)) => ( + Ethertype::IPV6, + L3Repr::Ipv6(Ipv6 { + payload_len: (tcp.packet_length() + body.len()) as u16, + next_header: IngotIpProto::TCP, + hop_limit: 64, + source, + destination, ..Default::default() - } - .into(), + }), ), _ => panic!("source and destination must be the same IP version"), }; - let eth = EtherMeta { ether_type, src: eth_src, dst: eth_dst }; + + let eth = Ethernet { destination: eth_dst, source: eth_src, ethertype }; ulp_pkt(eth, ip, tcp, &body) } @@ -694,25 +680,29 @@ pub fn http_ack2( ip_src: Ipv4Addr, eth_dst: MacAddr, ip_dst: Ipv4Addr, -) -> Packet { +) -> MsgBlk { let body = vec![]; - let tcp = TcpMeta { - src: 44490, - dst: 80, - flags: TcpFlags::ACK, - seq: 2382112980, - ack: 44161352, + let tcp = Tcp { + source: 44490, + destination: 80, + sequence: 2382112980, + acknowledgement: 44161352, + flags: IngotTcpFlags::ACK, ..Default::default() }; - let ip4 = Ipv4Meta { - src: ip_src, - dst: ip_dst, - proto: Protocol::TCP, - total_len: (Ipv4Hdr::BASE_SIZE + tcp.hdr_len() + body.len()) as u16, + let ip4 = Ipv4 { + total_len: (Ipv4::MINIMUM_LENGTH + tcp.packet_length() + body.len()) + as u16, + protocol: IngotIpProto::TCP, + source: ip_src, + destination: ip_dst, ..Default::default() }; - let eth = - EtherMeta { ether_type: EtherType::Ipv4, src: eth_src, dst: eth_dst }; + let eth = Ethernet { + destination: eth_dst, + source: eth_src, + ethertype: Ethertype::IPV4, + }; ulp_pkt(eth, ip4, tcp, &body) } @@ -721,27 +711,31 @@ pub fn http_get2( ip_src: Ipv4Addr, eth_dst: MacAddr, ip_dst: Ipv4Addr, -) -> Packet { +) -> MsgBlk { // The details of the HTTP body are irrelevant to our testing. You // only need know it's 18 characters for the purposes of seq/ack. - let body = "GET / HTTP/1.1\r\n\r\n".as_bytes(); - let tcp = TcpMeta { - src: 44490, - dst: 80, - flags: TcpFlags::PSH | TcpFlags::ACK, - seq: 2382112980, - ack: 44161352, + let body = b"GET / HTTP/1.1\r\n\r\n"; + let tcp = Tcp { + source: 44490, + destination: 80, + sequence: 2382112980, + acknowledgement: 44161352, + flags: IngotTcpFlags::PSH | IngotTcpFlags::ACK, ..Default::default() }; - let ip4 = Ipv4Meta { - src: ip_src, - dst: ip_dst, - proto: Protocol::TCP, - total_len: (Ipv4Hdr::BASE_SIZE + tcp.hdr_len() + body.len()) as u16, + let ip4 = Ipv4 { + total_len: (Ipv4::MINIMUM_LENGTH + tcp.packet_length() + body.len()) + as u16, + protocol: IngotIpProto::TCP, + source: ip_src, + destination: ip_dst, ..Default::default() }; - let eth = - EtherMeta { ether_type: EtherType::Ipv4, src: eth_src, dst: eth_dst }; + let eth = Ethernet { + destination: eth_dst, + source: eth_src, + ethertype: Ethertype::IPV4, + }; ulp_pkt(eth, ip4, tcp, body) } @@ -751,25 +745,29 @@ pub fn http_get_ack2( eth_dst: MacAddr, ip_dst: Ipv4Addr, dst_port: u16, -) -> Packet { +) -> MsgBlk { let body = vec![]; - let tcp = TcpMeta { - src: 80, - dst: dst_port, - flags: TcpFlags::ACK, - seq: 44161353, - ack: 2382112998, + let tcp = Tcp { + source: 80, + destination: dst_port, + sequence: 44161353, + acknowledgement: 2382112998, + flags: IngotTcpFlags::ACK, ..Default::default() }; - let ip4 = Ipv4Meta { - src: ip_src, - dst: ip_dst, - proto: Protocol::TCP, - total_len: (Ipv4Hdr::BASE_SIZE + tcp.hdr_len() + body.len()) as u16, + let ip4 = Ipv4 { + total_len: (Ipv4::MINIMUM_LENGTH + tcp.packet_length() + body.len()) + as u16, + protocol: IngotIpProto::TCP, + source: ip_src, + destination: ip_dst, ..Default::default() }; - let eth = - EtherMeta { ether_type: EtherType::Ipv4, src: eth_src, dst: eth_dst }; + let eth = Ethernet { + destination: eth_dst, + source: eth_src, + ethertype: Ethertype::IPV4, + }; ulp_pkt(eth, ip4, tcp, &body) } @@ -779,27 +777,31 @@ pub fn http_301_reply2( eth_dst: MacAddr, ip_dst: Ipv4Addr, dst_port: u16, -) -> Packet { +) -> MsgBlk { // The details of the HTTP body are irrelevant to our testing. You // only need know it's 34 characters for the purposes of seq/ack. let body = "HTTP/1.1 301 Moved Permanently\r\n\r\n".as_bytes(); - let tcp = TcpMeta { - src: 80, - dst: dst_port, - flags: TcpFlags::PSH | TcpFlags::ACK, - seq: 44161353, - ack: 2382112998, + let tcp = Tcp { + source: 80, + destination: dst_port, + sequence: 44161353, + acknowledgement: 2382112998, + flags: IngotTcpFlags::PSH | IngotTcpFlags::ACK, ..Default::default() }; - let ip4 = Ipv4Meta { - src: ip_src, - dst: ip_dst, - proto: Protocol::TCP, - total_len: (Ipv4Hdr::BASE_SIZE + tcp.hdr_len() + body.len()) as u16, + let ip4 = Ipv4 { + total_len: (Ipv4::MINIMUM_LENGTH + tcp.packet_length() + body.len()) + as u16, + protocol: IngotIpProto::TCP, + source: ip_src, + destination: ip_dst, ..Default::default() }; - let eth = - EtherMeta { ether_type: EtherType::Ipv4, src: eth_src, dst: eth_dst }; + let eth = Ethernet { + destination: eth_dst, + source: eth_src, + ethertype: Ethertype::IPV4, + }; ulp_pkt(eth, ip4, tcp, body) } @@ -808,25 +810,29 @@ pub fn http_301_ack2( ip_src: Ipv4Addr, eth_dst: MacAddr, ip_dst: Ipv4Addr, -) -> Packet { +) -> MsgBlk { let body = vec![]; - let tcp = TcpMeta { - src: 44490, - dst: 80, - flags: TcpFlags::ACK, - seq: 2382112998, - ack: 44161353 + 34, + let tcp = Tcp { + source: 44490, + destination: 80, + sequence: 2382112998, + acknowledgement: 44161353 + 34, + flags: IngotTcpFlags::ACK, ..Default::default() }; - let ip4 = Ipv4Meta { - src: ip_src, - dst: ip_dst, - proto: Protocol::TCP, - total_len: (Ipv4Hdr::BASE_SIZE + tcp.hdr_len() + body.len()) as u16, + let ip4 = Ipv4 { + total_len: (Ipv4::MINIMUM_LENGTH + tcp.packet_length() + body.len()) + as u16, + protocol: IngotIpProto::TCP, + source: ip_src, + destination: ip_dst, ..Default::default() }; - let eth = - EtherMeta { ether_type: EtherType::Ipv4, src: eth_src, dst: eth_dst }; + let eth = Ethernet { + destination: eth_dst, + source: eth_src, + ethertype: Ethertype::IPV4, + }; ulp_pkt(eth, ip4, tcp, &body) } @@ -835,25 +841,29 @@ pub fn http_guest_fin2( ip_src: Ipv4Addr, eth_dst: MacAddr, ip_dst: Ipv4Addr, -) -> Packet { +) -> MsgBlk { let body = vec![]; - let tcp = TcpMeta { - src: 44490, - dst: 80, - flags: TcpFlags::ACK | TcpFlags::FIN, - seq: 2382112998, - ack: 44161353 + 34, + let tcp = Tcp { + source: 44490, + destination: 80, + sequence: 2382112998, + acknowledgement: 44161353 + 34, + flags: IngotTcpFlags::ACK | IngotTcpFlags::FIN, ..Default::default() }; - let ip4 = Ipv4Meta { - src: ip_src, - dst: ip_dst, - proto: Protocol::TCP, - total_len: (Ipv4Hdr::BASE_SIZE + tcp.hdr_len() + body.len()) as u16, + let ip4 = Ipv4 { + total_len: (Ipv4::MINIMUM_LENGTH + tcp.packet_length() + body.len()) + as u16, + protocol: IngotIpProto::TCP, + source: ip_src, + destination: ip_dst, ..Default::default() }; - let eth = - EtherMeta { ether_type: EtherType::Ipv4, src: eth_src, dst: eth_dst }; + let eth = Ethernet { + destination: eth_dst, + source: eth_src, + ethertype: Ethertype::IPV4, + }; ulp_pkt(eth, ip4, tcp, &body) } @@ -863,26 +873,30 @@ pub fn http_server_ack_fin2( eth_dst: MacAddr, ip_dst: Ipv4Addr, dst_port: u16, -) -> Packet { +) -> MsgBlk { let body = vec![]; - let tcp = TcpMeta { - src: 80, - dst: dst_port, - flags: TcpFlags::ACK, - seq: 44161353 + 34, + let tcp = Tcp { + source: 80, + destination: dst_port, + sequence: 44161353 + 34, // We are ACKing the FIN, which counts as 1 byte. - ack: 2382112998 + 1, + acknowledgement: 2382112998 + 1, + flags: IngotTcpFlags::ACK, ..Default::default() }; - let ip4 = Ipv4Meta { - src: ip_src, - dst: ip_dst, - proto: Protocol::TCP, - total_len: (Ipv4Hdr::BASE_SIZE + tcp.hdr_len() + body.len()) as u16, + let ip4 = Ipv4 { + total_len: (Ipv4::MINIMUM_LENGTH + tcp.packet_length() + body.len()) + as u16, + protocol: IngotIpProto::TCP, + source: ip_src, + destination: ip_dst, ..Default::default() }; - let eth = - EtherMeta { ether_type: EtherType::Ipv4, src: eth_src, dst: eth_dst }; + let eth = Ethernet { + destination: eth_dst, + source: eth_src, + ethertype: Ethertype::IPV4, + }; ulp_pkt(eth, ip4, tcp, &body) } @@ -892,25 +906,29 @@ pub fn http_server_fin2( eth_dst: MacAddr, ip_dst: Ipv4Addr, dst_port: u16, -) -> Packet { +) -> MsgBlk { let body = vec![]; - let tcp = TcpMeta { - src: 80, - dst: dst_port, - flags: TcpFlags::ACK | TcpFlags::FIN, - seq: 44161353 + 34, - ack: 2382112998 + 1, + let tcp = Tcp { + source: 80, + destination: dst_port, + sequence: 44161353 + 34, + acknowledgement: 2382112998 + 1, + flags: IngotTcpFlags::ACK | IngotTcpFlags::FIN, ..Default::default() }; - let ip4 = Ipv4Meta { - src: ip_src, - dst: ip_dst, - proto: Protocol::TCP, - total_len: (Ipv4Hdr::BASE_SIZE + tcp.hdr_len() + body.len()) as u16, + let ip4 = Ipv4 { + total_len: (Ipv4::MINIMUM_LENGTH + tcp.packet_length() + body.len()) + as u16, + protocol: IngotIpProto::TCP, + source: ip_src, + destination: ip_dst, ..Default::default() }; - let eth = - EtherMeta { ether_type: EtherType::Ipv4, src: eth_src, dst: eth_dst }; + let eth = Ethernet { + destination: eth_dst, + source: eth_src, + ethertype: Ethertype::IPV4, + }; ulp_pkt(eth, ip4, tcp, &body) } @@ -919,26 +937,30 @@ pub fn http_guest_ack_fin2( ip_src: Ipv4Addr, eth_dst: MacAddr, ip_dst: Ipv4Addr, -) -> Packet { +) -> MsgBlk { let body = vec![]; - let tcp = TcpMeta { - src: 44490, - dst: 80, - flags: TcpFlags::ACK, - seq: 2382112998, - // We are ACKing the FIN, which counts as 1 bytes. - ack: 44161353 + 34 + 1, + let tcp = Tcp { + source: 44490, + destination: 80, + sequence: 2382112998, + // We are ACKing the FIN, which counts as 1 byte. + acknowledgement: 44161353 + 34 + 1, + flags: IngotTcpFlags::ACK, ..Default::default() }; - let ip4 = Ipv4Meta { - src: ip_src, - dst: ip_dst, - proto: Protocol::TCP, - total_len: (Ipv4Hdr::BASE_SIZE + tcp.hdr_len() + body.len()) as u16, + let ip4 = Ipv4 { + total_len: (Ipv4::MINIMUM_LENGTH + tcp.packet_length() + body.len()) + as u16, + protocol: IngotIpProto::TCP, + source: ip_src, + destination: ip_dst, ..Default::default() }; - let eth = - EtherMeta { ether_type: EtherType::Ipv4, src: eth_src, dst: eth_dst }; + let eth = Ethernet { + destination: eth_dst, + source: eth_src, + ethertype: Ethertype::IPV4, + }; ulp_pkt(eth, ip4, tcp, &body) } @@ -955,144 +977,73 @@ pub struct TestIpPhys { /// the rack. #[must_use] pub fn encap_external( - inner_pkt: Packet, + inner_pkt: MsgBlk, src: TestIpPhys, dst: TestIpPhys, -) -> Packet { +) -> MsgBlk { _encap(inner_pkt, src, dst, true) } /// Encapsulate a guest packet. #[must_use] -pub fn encap( - inner_pkt: Packet, - src: TestIpPhys, - dst: TestIpPhys, -) -> Packet { +pub fn encap(inner_pkt: MsgBlk, src: TestIpPhys, dst: TestIpPhys) -> MsgBlk { _encap(inner_pkt, src, dst, false) } /// Encapsulate a guest packet. #[must_use] fn _encap( - inner_pkt: Packet, + inner_pkt: MsgBlk, src: TestIpPhys, dst: TestIpPhys, external_snat: bool, -) -> Packet { - let old_pkt = inner_pkt.all_bytes(); - - let inner_ip_len = inner_pkt.hdr_offsets().inner.ip.map(|off| off.hdr_len); +) -> MsgBlk { + let base_len = inner_pkt.byte_len(); - let inner_ulp_len = - inner_pkt.hdr_offsets().inner.ulp.map(|off| off.hdr_len); + let mut outer_geneve = Geneve { vni: dst.vni, ..Default::default() }; - let inner_len = inner_pkt.len(); + if external_snat { + let external_tag = GeneveOpt { + class: GENEVE_OPT_CLASS_OXIDE, + option_type: GeneveOptionType(OxideOption::External.opt_type()), + ..Default::default() + }; - let opt_len = if external_snat { - GeneveOption::Oxide(OxideOption::External).len() - } else { - 0 - }; + outer_geneve.opt_len += (external_tag.packet_length() >> 2) as u8; + outer_geneve.options.push(external_tag); + } - let geneve = GeneveMeta { - entropy: 99, - vni: dst.vni, - oxide_external_pkt: external_snat, + let outer_udp = Udp { + source: 99, + destination: GENEVE_PORT, + length: (base_len + Udp::MINIMUM_LENGTH + outer_geneve.packet_length()) + as u16, + ..Default::default() }; - let pay_len: u16 = (inner_len + geneve.hdr_len()).try_into().unwrap(); - assert_eq!( - pay_len as usize, - inner_len + UdpHdr::SIZE + GeneveHdr::BASE_SIZE + opt_len - ); - - let ip = Ipv6Meta { - src: src.ip, - dst: dst.ip, - pay_len, - proto: Protocol::UDP, - next_hdr: IpProtocol::Udp, + let outer_ip = Ipv6 { + source: src.ip, + destination: dst.ip, + next_header: IngotIpProto::UDP, + payload_len: outer_udp.length, ..Default::default() }; - let eth = - EtherMeta { ether_type: EtherType::Ipv6, src: src.mac, dst: dst.mac }; - - let total_len = EtherHdr::SIZE + usize::from(ip.total_len()); - let mut pkt = Packet::alloc_and_expand(total_len); - let mut wtr = pkt.seg0_wtr(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip.emit(wtr.slice_mut(ip.hdr_len()).unwrap()); - geneve.emit(pay_len, wtr.slice_mut(geneve.hdr_len()).unwrap()); - wtr.write(&old_pkt).unwrap(); - let pkt = pkt.parse(In, VpcParser::new()).unwrap(); - let off = pkt.hdr_offsets(); - let mut pos = 0; - - assert_eq!( - off.outer.ether.unwrap(), - HdrOffset { - pkt_pos: pos, - seg_idx: 0, - seg_pos: pos, - hdr_len: eth.hdr_len() - }, - ); - pos += eth.hdr_len(); - - assert_eq!( - off.outer.ip.unwrap(), - HdrOffset { - pkt_pos: pos, - seg_idx: 0, - seg_pos: pos, - hdr_len: ip.hdr_len() - }, - ); - pos += ip.hdr_len(); - - assert_eq!( - off.outer.encap.unwrap(), - HdrOffset { - pkt_pos: pos, - seg_idx: 0, - seg_pos: pos, - hdr_len: geneve.hdr_len() - }, - ); - pos += geneve.hdr_len(); - - assert_eq!( - off.inner.ether, - HdrOffset { - pkt_pos: pos, - seg_idx: 0, - seg_pos: pos, - hdr_len: EtherHdr::SIZE - }, - ); - pos += EtherHdr::SIZE; - - if let Some(hdr_len) = inner_ip_len { - assert_eq!( - off.inner.ip.unwrap(), - HdrOffset { pkt_pos: pos, seg_idx: 0, seg_pos: pos, hdr_len }, - ); - pos += hdr_len; - } - - if let Some(hdr_len) = inner_ulp_len { - assert_eq!( - off.inner.ulp.unwrap(), - HdrOffset { pkt_pos: pos, seg_idx: 0, seg_pos: pos, hdr_len }, - ); - } + let outer_eth = Ethernet { + destination: dst.mac, + source: src.mac, + ethertype: Ethertype::IPV6, + }; - let new_pkt = pkt.all_bytes(); - assert_eq!(&new_pkt[new_pkt.len() - old_pkt.len()..], &old_pkt); + let mut encap_pkt = MsgBlk::new_ethernet_pkt(&( + outer_eth, + outer_ip, + outer_udp, + outer_geneve, + )); + encap_pkt.append(inner_pkt); - pkt + encap_pkt } /// Like `assert!`, except you also pass in the `PortAndVps` so that diff --git a/lib/opte-test-utils/src/pcap.rs b/lib/opte-test-utils/src/pcap.rs index b6267e77..6244ddbd 100644 --- a/lib/opte-test-utils/src/pcap.rs +++ b/lib/opte-test-utils/src/pcap.rs @@ -6,7 +6,7 @@ //! Routines for building packet capture files. -use opte::engine::packet::*; +use opte::ddi::mblk::MsgBlk; use pcap_parser::pcap; use pcap_parser::pcap::LegacyPcapBlock; use pcap_parser::pcap::PcapHeader; @@ -62,8 +62,8 @@ impl PcapBuilder { } /// Add a packet to the capture. - pub fn add_pkt(&mut self, pkt: &Packet) { - let pkt_bytes = pkt.get_rdr().copy_remaining(); + pub fn add_pkt(&mut self, pkt: &MsgBlk) { + let pkt_bytes = pkt.copy_all(); let mut block = LegacyPcapBlock { ts_sec: 7777, ts_usec: 7777, diff --git a/lib/opte/Cargo.toml b/lib/opte/Cargo.toml index 0eb965b3..dcc77f47 100644 --- a/lib/opte/Cargo.toml +++ b/lib/opte/Cargo.toml @@ -27,6 +27,9 @@ illumos-sys-hdrs.workspace = true kstat-macro.workspace = true opte-api.workspace = true +ingot.workspace = true + +bitflags.workspace = true cfg-if.workspace = true crc32fast = { workspace = true, optional = true } dyn-clone.workspace = true diff --git a/lib/opte/README.adoc b/lib/opte/README.adoc index 8c680485..3bf6fe79 100644 --- a/lib/opte/README.adoc +++ b/lib/opte/README.adoc @@ -357,7 +357,14 @@ which it is currently processing. The packet (`opte::engine::Packet`) abstraction forms a single view into the the underlying `mblk_t *` chain that makes up the underlying packet and its data. It attempts to hide the complexity of dealing -with mblk chains directly. +with mblk chains directly. Packets represent a set of byteslices cast +into senantically useful header types, and allow read/write access to +their fields. The `Packet` type is also responsible for computing any +changes which must be fully serialised back into the `mblk_t` chain once +OPTE has completed its processing. + +It is possible in future to support underlying buffer types other than +`mblk_t`s, but today all packets must be `mblk_t`s. === Layer Flow Table @@ -506,10 +513,9 @@ report a warning to the user)? Furthermore, you could give the engine the smarts to determine when there is a contradiction and report some kind of error. You could also effect a sort of "last write wins" for some sequences of transformations: e.g., two modifications on the same -header. In any event, OPTE has not implemented any sort of "compiling" -of header transformations at this time. It simply builds a list, -assuming its sequence of transformations are sane, and stores said -list in the UFT. +header. OPTE implements a limited form of compilation of disjoint +transforms, and falls back to a full list when more than one transform +is applied to any one header. === SDT probes diff --git a/lib/opte/src/d_error.rs b/lib/opte/src/d_error.rs index edbe61f4..a9a88941 100644 --- a/lib/opte/src/d_error.rs +++ b/lib/opte/src/d_error.rs @@ -28,9 +28,11 @@ pub trait DError { static EMPTY_STRING: &CStr = c""; -/// An string list designed to be passed to a DTrace handler, which contains -/// the names of all `enum` discriminators encountered when resolving an error -/// or other result-like enum, as well as the data from a leaf node. +/// A string list designed to be passed to a DTrace handler. +/// +/// This contains the names of all `enum` discriminators encountered when +/// resolving an error or other result-like enum, as well as the data from a +/// leaf node. /// /// This wrapper cannot contain a null c_string pointer, so all entries are /// safe to dereference from a DTrace script. Additionally, it has a fixed @@ -170,7 +172,7 @@ pub struct LabelBlockIter<'a, const L: usize> { inner: &'a LabelBlock, } -impl<'a, const L: usize> Iterator for LabelBlockIter<'a, L> { +impl Iterator for LabelBlockIter<'_, L> { type Item = &'static CStr; fn next(&mut self) -> Option { @@ -193,7 +195,7 @@ impl<'a, const L: usize> Iterator for LabelBlockIter<'a, L> { } } -impl<'a, const L: usize> ExactSizeIterator for LabelBlockIter<'a, L> { +impl ExactSizeIterator for LabelBlockIter<'_, L> { fn len(&self) -> usize { self.inner.len - self.pos } diff --git a/lib/opte/src/ddi/kstat.rs b/lib/opte/src/ddi/kstat.rs index edbaa8a1..a6587356 100644 --- a/lib/opte/src/ddi/kstat.rs +++ b/lib/opte/src/ddi/kstat.rs @@ -12,6 +12,8 @@ use alloc::string::String; use core::fmt; use core::fmt::Display; +pub use kstat_macro::KStatProvider; + cfg_if! { if #[cfg(all(not(feature = "std"), not(test)))] { use alloc::ffi::CString; @@ -40,7 +42,6 @@ cfg_if! { /// /// ``` /// use opte::ddi::kstat::{self, KStatProvider, KStatU64}; -/// use kstat_macro::KStatProvider; /// /// #[derive(KStatProvider)] /// struct SomeStats { @@ -84,7 +85,6 @@ pub trait KStatProvider { /// /// ``` /// use opte::ddi::kstat::{self, KStatNamed, KStatProvider, KStatU64}; -/// use kstat_macro::KStatProvider; /// /// #[derive(KStatProvider)] /// pub struct StatProvider { diff --git a/lib/opte/src/ddi/mblk.rs b/lib/opte/src/ddi/mblk.rs new file mode 100644 index 00000000..13cacfe9 --- /dev/null +++ b/lib/opte/src/ddi/mblk.rs @@ -0,0 +1,1368 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2024 Oxide Computer Company + +use crate::engine::packet::BufferState; +use crate::engine::packet::Pullup; +use crate::engine::packet::SegAdjustError; +use crate::engine::packet::WrapError; +use crate::engine::packet::WriteError; +#[cfg(any(feature = "std", test))] +use alloc::boxed::Box; +use alloc::vec::Vec; +use core::cmp::Ordering; +use core::marker::PhantomData; +use core::mem::ManuallyDrop; +use core::mem::MaybeUninit; +use core::ops::Deref; +use core::ops::DerefMut; +use core::ptr; +use core::ptr::NonNull; +use core::slice; +#[cfg(all(not(feature = "std"), not(test)))] +use illumos_sys_hdrs as ddi; +#[cfg(any(feature = "std", test))] +use illumos_sys_hdrs::c_uchar; +#[cfg(any(feature = "std", test))] +use illumos_sys_hdrs::dblk_t; +use illumos_sys_hdrs::mblk_t; +use illumos_sys_hdrs::uintptr_t; +use ingot::types::Emit; +use ingot::types::EmitDoesNotRelyOnBufContents; +use ingot::types::ParseError as IngotParseErr; +use ingot::types::Read; + +pub static MBLK_MAX_SIZE: usize = u16::MAX as usize; + +/// The head and tail of an mblk_t list. +struct MsgBlkChainInner { + head: NonNull, + tail: NonNull, +} + +/// A chain of illumos MsgBlk/`mblk_t` buffers. +/// +/// Network packets are provided by illumos as a linked list of linked lists, +/// using the `b_next` and `b_prev` fields. +/// +/// See the documentation for [`crate::engine::packet::Packet`] and/or [`MsgBlk`] +/// for full context. +// TODO: We might retool this type now that MsgBlk does not decompose +// each mblk_t into individual segments (i.e., packets could be allocated +// a lifetime via PhantomData based on whether we want to remove them from the chain or modify in place). +// Today's code is all equivalent to always using 'static, because +// we remove and re-add the mblks to work on them. +// We might want also want to return either a chain/mblk_t in an enum, but +// practically XDE will always assume it has a chain from MAC. +pub struct MsgBlkChain(Option); + +impl MsgBlkChain { + /// Create an empty packet chain. + pub fn empty() -> Self { + Self(None) + } + + /// Convert an mblk_t packet chain into a safe source of `MsgBlk`s. + /// + /// # Safety + /// The `mp` pointer must point to an `mblk_t` allocated by + /// `allocb(9F)` or provided by some kernel API which itself used + /// one of the DDI/DKI APIs to allocate it. + /// Packets must form a valid linked list (no loops). + /// The original mblk_t pointer must not be used again. + pub unsafe fn new(mp: *mut mblk_t) -> Result { + let head = NonNull::new(mp).ok_or(WrapError::NullPtr)?; + + // Walk the chain to find the tail, and support faster append. + let mut tail = head; + while let Some(next_ptr) = NonNull::new((*tail.as_ptr()).b_next) { + tail = next_ptr; + } + + Ok(Self(Some(MsgBlkChainInner { head, tail }))) + } + + /// Removes the next packet from the top of the chain and returns + /// it, taking ownership. + pub fn pop_front(&mut self) -> Option { + if let Some(ref mut list) = &mut self.0 { + unsafe { + let curr_b = list.head; + let curr = curr_b.as_ptr(); + let next = NonNull::new((*curr).b_next); + + // Break the forward link on the packet we have access to, + // and the backward link on the next element if possible. + if let Some(next) = next { + (*next.as_ptr()).b_prev = ptr::null_mut(); + } + (*curr).b_next = ptr::null_mut(); + + // Update the current head. If the next element is null, + // we're now empty. + if let Some(next) = next { + list.head = next; + } else { + self.0 = None; + } + + Some(MsgBlk(curr_b)) + } + } else { + None + } + } + + /// Adds an owned `MsgBlk` to the end of this chain. + /// + /// Internally, this unwraps the `MsgBlk` back into an mblk_t, + /// before placing it at the tail. + pub fn append(&mut self, packet: MsgBlk) { + // Unwrap safety: a valid Packet implies a non-null mblk_t. + // Jamming `NonNull` into PacketSeg/Packet might take some + // work just to avoid this unwrap. + let pkt = packet.unwrap_mblk(); + + // We're guaranteeing today that a 'static Packet has + // no neighbours and is not part of a chain. + // This simplifies tail updates in both cases (no chain walk). + unsafe { + assert!((*pkt.as_ptr()).b_prev.is_null()); + assert!((*pkt.as_ptr()).b_next.is_null()); + } + + if let Some(ref mut list) = &mut self.0 { + let pkt_p = pkt.as_ptr(); + let tail_p = list.tail.as_ptr(); + unsafe { + (*tail_p).b_next = pkt_p; + (*pkt_p).b_prev = tail_p; + // pkt_p->b_next is already null. + } + list.tail = pkt; + } else { + self.0 = Some(MsgBlkChainInner { head: pkt, tail: pkt }); + } + } + + /// Return the head of the underlying `mblk_t` packet chain and + /// consume `self`. The caller of this function now owns the + /// `mblk_t` segment chain. + pub fn unwrap_mblk(mut self) -> Option> { + self.0.take().map(|v| v.head) + } +} + +impl Drop for MsgBlkChain { + fn drop(&mut self) { + // This is a minor variation on MsgBlk's logic. illumos + // contains helper functions from STREAMS to just drop a whole + // chain. + cfg_if! { + if #[cfg(all(not(feature = "std"), not(test)))] { + // Safety: This is safe as long as the original + // `mblk_t` came from a call to `allocb(9F)` (or + // similar API). + if let Some(list) = &self.0 { + unsafe { ddi::freemsgchain(list.head.as_ptr()) }; + } + } else { + while let Some(pkt) = self.pop_front() { + drop(pkt); + } + } + } + } +} + +/// An individual illumos `mblk_t` -- a single bytestream +/// comprised of a linked list of data segments. +/// +/// To facilitate testing the OPTE core, [`MsgBlk`] is an abstraction for +/// manipulating network packets in both a `std` and `no_std` environment. +/// The first is useful for writing tests against the OPTE core engine and +/// executing them in userland, without the need for standing up a full-blown +/// virtual machine. +/// +/// The `no_std` implementation is used when running in-kernel. The +/// main difference is the `mblk_t` and `dblk_t` structures are coming +/// from viona (outbound/Tx) and mac (inbound/Rx), and we consume them +/// via [`MsgBlk::wrap_mblk()`]. In reality this is typically holding +/// an Ethernet _frame_, but we prefer to use the colloquial +/// nomenclature of "packet". +#[derive(Debug)] +pub struct MsgBlk(NonNull); + +impl Deref for MsgBlk { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + unsafe { + let self_ptr = self.0.as_ptr(); + let rptr = (*self_ptr).b_rptr; + let len = (*self_ptr).b_wptr.offset_from(rptr) as usize; + slice::from_raw_parts(rptr, len) + } + } +} + +impl DerefMut for MsgBlk { + fn deref_mut(&mut self) -> &mut Self::Target { + unsafe { + let self_ptr = self.0.as_ptr(); + let rptr = (*self_ptr).b_rptr; + let len = (*self_ptr).b_wptr.offset_from(rptr) as usize; + slice::from_raw_parts_mut(rptr, len) + } + } +} + +impl MsgBlk { + /// Allocate a new [`MsgBlk`] containing a data buffer of `len` + /// bytes. + /// + /// The returned packet consists of exactly one segment, and the + /// underlying `dblk_t` will have only a single referent making + /// mutable access safe. + /// + /// In the kernel environment this uses `allocb(9F)` and + /// `freemsg(9F)` under the hood. + /// + /// In the `std` environment this uses a mock implementation of + /// `allocb(9F)` and `freeb(9F)`, which contains enough scaffolding + /// to satisfy OPTE's use of the underlying `mblk_t` and `dblk_t` + /// structures. + pub fn new(len: usize) -> Self { + let inner = NonNull::new(allocb(len)) + .expect("somehow failed to get an mblk..."); + + Self(inner) + } + + /// Allocates a new [`MsgBlk`] of size `buf.len()`, copying its + /// contents. + pub fn copy(buf: impl AsRef<[u8]>) -> Self { + let mut out = Self::new(buf.as_ref().len()); + // Unwrap safety -- just allocated length of input buffer. + out.write_bytes_back(buf).unwrap(); + out + } + + /// Creates a new [`MsgBlk`] using a given set of packet headers. + pub fn new_pkt(emit: impl Emit + EmitDoesNotRelyOnBufContents) -> Self { + let mut pkt = Self::new(emit.packet_length()); + pkt.emit_back(emit).unwrap(); + pkt + } + + /// Returns the number of bytes available for writing ahead of the + /// read pointer in the current datablock. + pub fn head_capacity(&self) -> usize { + unsafe { + let inner = self.0.as_ptr(); + + (*inner).b_rptr.offset_from((*(*inner).b_datap).db_base) as usize + } + } + + /// Returns the number of bytes available for writing after the + /// write pointer in the current datablock. + pub fn tail_capacity(&self) -> usize { + unsafe { + let inner = self.0.as_ptr(); + + (*(*inner).b_datap).db_lim.offset_from((*inner).b_wptr) as usize + } + } + + /// Returns the number of bytes allocated in all datablocks in + /// this message. + pub fn all_segs_capacity(&self) -> usize { + self.iter() + .map(|v| unsafe { + let tail = (*v.0.b_datap).db_lim; + let head = (*v.0.b_datap).db_base; + + tail.offset_from(head) as usize + }) + .sum() + } + + /// Creates a new [`MsgBlk`] containing a data buffer of `len` + /// bytes with 2B of headroom/alignment. + /// + /// This sets up 4B alignment on all post-ethernet headers. + pub fn new_ethernet(len: usize) -> Self { + Self::new_with_headroom(2, len) + } + + /// Creates a new [`MsgBlk`] using a given set of packet headers + /// with 2B of headroom/alignment. + /// + /// This sets up 4B alignment on all post-ethernet headers. + pub fn new_ethernet_pkt( + emit: impl Emit + EmitDoesNotRelyOnBufContents, + ) -> Self { + let mut pkt = Self::new_ethernet(emit.packet_length()); + pkt.emit_back(emit).unwrap(); + pkt + } + + /// Return the number of initialised bytes in this `MsgBlk` over + /// all linked segments. + pub fn byte_len(&self) -> usize { + unsafe { count_mblk_bytes(Some(self.0)) } + } + + /// Return the number of segments in this `MsgBlk`. + pub fn seg_len(&self) -> usize { + self.iter().len() + } + + /// Truncates an `MsgBlk` chain, dropping any elements such that + /// it contains at most `len` bytes. + pub fn truncate_chain(&mut self, len: usize) { + let mut seen = 0; + let mut curr = Some(self.0); + let mut old_tail = ptr::null_mut(); + + while let Some(valid_curr) = curr.take() { + let valid_curr = valid_curr.as_ptr(); + + let seg_len = usize::try_from(unsafe { + (*valid_curr).b_wptr.offset_from((*valid_curr).b_rptr) + }) + .expect("operating on packet with end before start"); + + let seen_til_now = seen; + seen += seg_len; + + if seen >= len { + let to_keep = len.saturating_sub(seen_til_now); + + // SAFETY: this will only reduce the read window of this slice, + // so derived byteslices will remain in capacity. + unsafe { + (*valid_curr).b_wptr = (*valid_curr).b_rptr.add(to_keep); + + core::ptr::swap( + &raw mut (*valid_curr).b_cont, + &raw mut old_tail, + ); + } + } else { + curr = NonNull::new(unsafe { (*valid_curr).b_cont }); + } + } + + // SAFETY: we have exclusive ownership of this element + // via self, and we have just disconnected it from the chain. + // This method also handles the nullptr case on our behalf. + drop(unsafe { Self::wrap_mblk(old_tail) }) + } + + /// Allocate a new [`MsgBlk`] containing a data buffer of size + /// `head_len + body_len`. + /// + /// The read/write pointer is set to have `head_len` bytes of + /// headroom and `body_len` bytes of capacity at the back. + pub fn new_with_headroom(head_len: usize, body_len: usize) -> Self { + let out = Self::new(head_len + body_len); + + // SAFETY: alloc is contiguous and always larger than head_len. + let mut_out = out.0.as_ptr(); + unsafe { + (*mut_out).b_rptr = (*mut_out).b_rptr.add(head_len); + (*mut_out).b_wptr = (*mut_out).b_rptr; + } + + out + } + + /// Provides a slice of length `n_bytes` at the back of an [`MsgBlk`] + /// (if capacity exists) to be initialised, before increasing `len` + /// by `n_bytes`. + /// + /// # Safety + /// Users must write a value to every element of the `MaybeUninit` + /// buffer at least once in the `MsgBlk` lifecycle -- all `n_bytes` + /// are assumed to be initialised. + pub unsafe fn write_back( + &mut self, + n_bytes: usize, + f: impl FnOnce(&mut [MaybeUninit]), + ) -> Result<(), WriteError> { + let mut_out = self.0.as_ptr(); + let avail_bytes = unsafe { + (*(*mut_out).b_datap).db_lim.offset_from((*mut_out).b_wptr) + }; + + if avail_bytes < 0 || (avail_bytes as usize) < n_bytes { + return Err(WriteError::NotEnoughBytes { + available: avail_bytes.max(0) as usize, + needed: n_bytes, + }); + } + + let in_slice = unsafe { + slice::from_raw_parts_mut( + (*mut_out).b_wptr as *mut MaybeUninit, + n_bytes, + ) + }; + + f(in_slice); + + unsafe { + (*mut_out).b_wptr = (*mut_out).b_wptr.add(n_bytes); + } + + Ok(()) + } + + /// Provides a slice of length `n_bytes` at the front of an [`MsgBlk`] + /// (if capacity exists) to be initialised, before increasing `len` + /// by `n_bytes`. + /// + /// # Safety + /// Users must write a value to every element of the `MaybeUninit` + /// buffer at least once in the `MsgBlk` lifecycle -- all `n_bytes` + /// are assumed to be initialised. + pub unsafe fn write_front( + &mut self, + n_bytes: usize, + f: impl FnOnce(&mut [MaybeUninit]), + ) -> Result<(), WriteError> { + let mut_out = self.0.as_ptr(); + let avail_bytes = unsafe { + (*mut_out).b_rptr.offset_from((*(*mut_out).b_datap).db_base) + }; + + if avail_bytes < 0 || (avail_bytes as usize) < n_bytes { + return Err(WriteError::NotEnoughBytes { + available: avail_bytes.max(0) as usize, + needed: n_bytes, + }); + } + + let new_head = unsafe { (*mut_out).b_rptr.sub(n_bytes) }; + + let in_slice = unsafe { + slice::from_raw_parts_mut(new_head as *mut MaybeUninit, n_bytes) + }; + + f(in_slice); + + (*mut_out).b_rptr = new_head; + + Ok(()) + } + + /// Adjusts the write pointer for this MsgBlk, initialising any extra bytes to 0. + pub fn resize(&mut self, new_len: usize) -> Result<(), WriteError> { + let len = self.len(); + match new_len.cmp(&len) { + Ordering::Less => unsafe { + let mut_inner = self.0.as_ptr(); + (*mut_inner).b_wptr = (*mut_inner).b_wptr.sub(len - new_len); + Ok(()) + }, + Ordering::Greater => unsafe { + self.write_back(new_len - len, |v| { + // MaybeUninit::fill is unstable. + let n = v.len(); + v.as_mut_ptr().write_bytes(0, n); + }) + }, + Ordering::Equal => Ok(()), + } + } + + /// Adjusts the write pointer for this MsgBlk, initialising any extra bytes to 0. + pub fn expand_front(&mut self, n: usize) -> Result<(), SegAdjustError> { + unsafe { + self.write_front(n, |v| { + // MaybeUninit::fill is unstable. + let n = v.len(); + v.as_mut_ptr().write_bytes(0, n); + }) + .map_err(|_| SegAdjustError::StartBeforeBase) + } + } + + /// Shrink the writable/readable area by shifting the `b_rptr` by + /// `len`; effectively removing bytes from the start of the packet. + /// + /// # Errors + /// + /// `SegAdjustError::StartPastEnd`: Shifting the read pointer by + /// `len` would move `b_rptr` past `b_wptr`. + pub fn drop_front_bytes(&mut self, n: usize) -> Result<(), SegAdjustError> { + let node = self + .iter_mut() + .next() + .expect("There will always be a front element by definition"); + + node.drop_front_bytes(n) + } + + /// Emits an `ingot` packet after any bytes present in this mblk. + pub fn emit_back( + &mut self, + pkt: impl Emit + EmitDoesNotRelyOnBufContents, + ) -> Result<(), WriteError> { + unsafe { + self.write_back(pkt.packet_length(), |v| { + // Unwrap safety: write will return an Error if + // unsuccessful. + pkt.emit_uninit(v).unwrap(); + }) + } + } + + /// Emits an `ingot` packet before any bytes present in this mblk. + pub fn emit_front( + &mut self, + pkt: impl Emit + EmitDoesNotRelyOnBufContents, + ) -> Result<(), WriteError> { + unsafe { + self.write_front(pkt.packet_length(), |v| { + pkt.emit_uninit(v).unwrap(); + }) + } + } + + /// Copies a byte slice into the region after any bytes present in this mblk. + pub fn write_bytes_back( + &mut self, + bytes: impl AsRef<[u8]>, + ) -> Result<(), WriteError> { + let bytes = bytes.as_ref(); + unsafe { + self.write_back(bytes.len(), |v| { + // feat(maybe_uninit_write_slice) -> copy_from_slice + // is unstable. + let uninit_src: &[MaybeUninit] = + core::mem::transmute(bytes); + v.copy_from_slice(uninit_src); + }) + } + } + + /// Copies a byte slice into the region before any bytes present in this mblk. + pub fn write_bytes_front( + &mut self, + bytes: impl AsRef<[u8]>, + ) -> Result<(), WriteError> { + let bytes = bytes.as_ref(); + unsafe { + self.write_front(bytes.len(), |v| { + // feat(maybe_uninit_write_slice) -> copy_from_slice + // is unstable. + let uninit_src: &[MaybeUninit] = + core::mem::transmute(bytes); + v.copy_from_slice(uninit_src); + }) + } + } + + /// Places another `MsgBlk` at the end of this packet's + /// b_cont chain. + pub fn append(&mut self, other: Self) { + // Find the last element in the pkt chain + // i.e., whose b_cont is null. + let mut curr = self.0.as_ptr(); + while unsafe { !(*curr).b_cont.is_null() } { + curr = unsafe { (*curr).b_cont }; + } + + unsafe { + (*curr).b_cont = other.unwrap_mblk().as_ptr(); + } + } + + /// Drop all bytes and move the cursor to the very back of the dblk. + pub fn pop_all(&mut self) { + let mut_out = self.0.as_ptr(); + unsafe { + (*mut_out).b_rptr = (*(*mut_out).b_datap).db_lim; + (*mut_out).b_wptr = (*(*mut_out).b_datap).db_lim; + } + } + + /// Returns a shared cursor over all segments in this `MsgBlk`. + pub fn iter(&self) -> MsgBlkIter { + MsgBlkIter { curr: Some(self.0), marker: PhantomData } + } + + /// Returns a mutable cursor over all segments in this `MsgBlk`. + pub fn iter_mut(&mut self) -> MsgBlkIterMut { + MsgBlkIterMut { curr: Some(self.0), marker: PhantomData } + } + + /// Return the pointer address of the underlying mblk_t. + /// + /// NOTE: This is purely to allow passing the pointer value up to + /// DTrace so that the mblk can be inspected (read only) in probe + /// context. + pub fn mblk_addr(&self) -> uintptr_t { + self.0.as_ptr() as uintptr_t + } + + /// Return the head of the underlying `mblk_t` segment chain and + /// consume `self`. The caller of this function now owns the + /// `mblk_t` segment chain. + pub fn unwrap_mblk(self) -> NonNull { + let ptr_out = self.0; + _ = ManuallyDrop::new(self); + ptr_out + } + + /// Wrap the `mblk_t` packet in a [`MsgBlk`], taking ownership of + /// the `mblk_t` packet as a result. An `mblk_t` packet consists + /// of one or more `mblk_t` segments chained together via + /// `b_cont`. When the [`MsgBlk`] is dropped, the + /// underlying `mblk_t` segment chain is freed. If you wish to + /// pass on ownership you must call the [`MsgBlk::unwrap_mblk()`] + /// function. + /// + /// # Safety + /// + /// The `mp` pointer must point to an `mblk_t` allocated by + /// `allocb(9F)` or provided by some kernel API which itself used + /// one of the DDI/DKI APIs to allocate it. + /// + /// Users *must* be certain that, for any `mblk_t` in the `b_cont` chain, + /// any underlying `dblk_t`s have only a single referent (this chain) if + /// they are going to read (or &mut) the backing byteslice. This is a + /// possibility for, e.g., packets served by `viona` whose mblks after + /// the initial header pullup will point directly into guest memory (!!!). + /// We do not currently have an API for conditionally handing out slices + /// and performing pullup on the fly based on refcnt -- potentially untrusted + /// mblk uses (e.g. read/write of body segs) *must* perform a manual pullup. + /// + /// # Errors + /// + /// * Return [`WrapError::NullPtr`] is `mp` is `NULL`. + /// * Return [`WrapError::Chain`] is `mp->b_next` or `mp->b_prev` are set. + pub unsafe fn wrap_mblk(ptr: *mut mblk_t) -> Result { + let inner = NonNull::new(ptr).ok_or(WrapError::NullPtr)?; + let inner_ref = inner.as_ptr(); + + if (*inner_ref).b_next.is_null() && (*inner_ref).b_prev.is_null() { + Ok(Self(inner)) + } else { + Err(WrapError::Chain) + } + } + + /// Copy out all bytes within this mblk and its successors + /// to a single contiguous buffer. + pub fn copy_all(&self) -> Vec { + let len = self.byte_len(); + let mut out = Vec::with_capacity(len); + + for node in self.iter() { + out.extend_from_slice(node) + } + + out + } + + /// Drops all empty mblks from the start of this chain where possible + /// (i.e., any empty mblk is followed by another mblk). + pub fn drop_empty_segments(&mut self) { + // We should not be creating message block continuations to zero + // sized blocks. This is not a generally expected thing and has + // caused NIC hardware to stop working. + // Stripping these out where possible is necessary. + let mut head = self.0; + let mut neighbour = unsafe { (*head.as_ptr()).b_cont }; + + let offload_info = unsafe { offload_info(head) }; + + while !neighbour.is_null() + && unsafe { (*head.as_ptr()).b_rptr == (*head.as_ptr()).b_wptr } + { + // Replace head with neighbour. + // Disconnect head from neighbour, and drop head. + unsafe { + (*head.as_ptr()).b_cont = ptr::null_mut(); + drop(MsgBlk::wrap_mblk(head.as_ptr())); + + // SAFETY: we know neighbour is non_null. + head = NonNull::new_unchecked(neighbour); + neighbour = (*head.as_ptr()).b_cont + } + } + + // Carry over offload flags and MSS information. + // SAFETY: db_struioun contains no payload-specific offsets, + // only flags pertaining to *required* offloads and the path MTU/MSS. + unsafe { + set_offload_info(head, offload_info); + } + + self.0 = head; + } +} + +/// An interior node of an [`MsgBlk`]'s chain, accessed via iterator. +/// +/// This supports a reduced set of operations compared to [`MsgBlk`], +/// primarily to allow (mutable) access to the inner bytes while preventing +/// iterator invalidation. +#[derive(Debug)] +pub struct MsgBlkNode(mblk_t); + +impl Deref for MsgBlkNode { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + unsafe { + let rptr = self.0.b_rptr; + let len = self.0.b_wptr.offset_from(rptr) as usize; + slice::from_raw_parts(rptr, len) + } + } +} + +impl DerefMut for MsgBlkNode { + fn deref_mut(&mut self) -> &mut Self::Target { + unsafe { + let rptr = self.0.b_rptr; + let len = self.0.b_wptr.offset_from(rptr) as usize; + slice::from_raw_parts_mut(rptr, len) + } + } +} + +impl MsgBlkNode { + /// Shrink the writable/readable area by shifting the `b_rptr` by + /// `len`; effectively removing bytes from the start of the packet. + /// + /// # Errors + /// + /// `SegAdjustError::StartPastEnd`: Shifting the read pointer by + /// `len` would move `b_rptr` past `b_wptr`. + pub fn drop_front_bytes(&mut self, n: usize) -> Result<(), SegAdjustError> { + unsafe { + if self.0.b_wptr.offset_from(self.0.b_rptr) < n as isize { + return Err(SegAdjustError::StartPastEnd); + } + self.0.b_rptr = self.0.b_rptr.add(n); + } + + Ok(()) + } +} + +#[derive(Debug)] +pub struct MsgBlkIter<'a> { + curr: Option>, + marker: PhantomData<&'a MsgBlk>, +} + +#[derive(Debug)] +pub struct MsgBlkIterMut<'a> { + curr: Option>, + marker: PhantomData<&'a mut MsgBlk>, +} + +impl MsgBlkIterMut<'_> { + pub fn next_iter(&self) -> MsgBlkIter { + let curr = self + .curr + .and_then(|ptr| NonNull::new(unsafe { (*ptr.as_ptr()).b_cont })); + MsgBlkIter { curr, marker: PhantomData } + } + + pub fn next_iter_mut(&mut self) -> MsgBlkIterMut { + let curr = self + .curr + .and_then(|ptr| NonNull::new(unsafe { (*ptr.as_ptr()).b_cont })); + MsgBlkIterMut { curr, marker: PhantomData } + } +} + +impl Pullup for MsgBlkIterMut<'_> { + fn pullup(&self, prepend: Option<&[u8]>) -> MsgBlk { + let prepend = prepend.unwrap_or_default(); + let bytes_in_self = BufferState::len(self); + let needed_alloc = prepend.len() + bytes_in_self; + let mut new_seg = MsgBlk::new(needed_alloc); + + new_seg + .write_bytes_back(prepend) + .expect("allocated enough bytes for prepend and self"); + + let offload_info = self.curr.map(|v| unsafe { offload_info(v) }); + + if bytes_in_self != 0 { + // SAFETY: We need to make use of ptr::copy for a pullup + // because we cannot guarantee a dblk refcnt of 1 -- thus + // using Deref<[u8]> for these segments is not safe. + unsafe { + new_seg + .write_back(bytes_in_self, |mut buf| { + let mut curr = self.curr; + while let Some(valid_curr) = curr { + let valid_curr = valid_curr.as_ptr(); + let src = (*valid_curr).b_rptr; + let seg_len = usize::try_from( + (*valid_curr).b_wptr.offset_from(src), + ) + .expect("invalid mblk -- slice end before start"); + + // Safety: slice contains exactly bytes_in_self bytes (!= 0). + // Cast replicates `MaybeUninit::slice_as_mut_ptr` (unstable). + let dst = buf.as_mut_ptr() as *mut u8; + + dst.copy_from_nonoverlapping( + (*valid_curr).b_rptr, + seg_len, + ); + + curr = NonNull::new((*valid_curr).b_cont); + buf = buf.split_at_mut(seg_len).1; + } + }) + .expect("allocated enough bytes for prepend and self"); + } + } + + // Carry over offload flags and MSS information. + // SAFETY: db_struioun contains no payload-specific offsets, + // only flags pertaining to *required* offloads and the path MTU/MSS. + if let Some(info) = offload_info { + unsafe { + set_offload_info(new_seg.0, info); + } + } + + new_seg + } +} + +/// Counts the number of segments in an `mblk_t` from `head`, linked +/// via `b_cont`. +unsafe fn count_mblk_chain(mut head: Option>) -> usize { + let mut count = 0; + while let Some(valid_head) = head { + count += 1; + head = NonNull::new((*valid_head.as_ptr()).b_cont); + } + count +} + +/// Counts the number of bytes in an `mblk_t` from `head`, linked +/// via `b_cont`. +/// +/// This is used to avoid contructing a &[] over slices which may/may not +/// have a higher refcnt. +unsafe fn count_mblk_bytes(mut head: Option>) -> usize { + let mut count = 0; + while let Some(valid_head) = head { + let headref = valid_head.as_ptr(); + count += + (*headref).b_wptr.offset_from((*headref).b_rptr).max(0) as usize; + head = NonNull::new((*headref).b_cont); + } + count +} + +/// Copy out the opaque representation of offload flags and sizes +/// associated with this packet. +unsafe fn offload_info(head: NonNull) -> u64 { + unsafe { (*(*head.as_ptr()).b_datap).db_struioun } +} + +/// Set the opaque representation of offload flags and sizes +/// associated with this packet. +unsafe fn set_offload_info(head: NonNull, info: u64) { + unsafe { + (*(*head.as_ptr()).b_datap).db_struioun = info; + } +} + +impl<'a> Iterator for MsgBlkIter<'a> { + type Item = &'a MsgBlkNode; + + fn next(&mut self) -> Option { + if let Some(ptr) = self.curr { + self.curr = NonNull::new(unsafe { (*ptr.as_ptr()).b_cont }); + // SAFETY: MsgBlkNode has identical layout to mblk_t. + unsafe { Some(&*(ptr.as_ptr() as *const MsgBlkNode)) } + } else { + None + } + } + + fn size_hint(&self) -> (usize, Option) { + let len = unsafe { count_mblk_chain(self.curr) }; + (len, Some(len)) + } +} + +impl ExactSizeIterator for MsgBlkIter<'_> {} + +impl<'a> Read for MsgBlkIter<'a> { + type Chunk = &'a [u8]; + + fn next_chunk(&mut self) -> ingot::types::ParseResult { + self.next().ok_or(IngotParseErr::TooSmall).map(|v| v.as_ref()) + } + + fn chunks_len(&self) -> usize { + ExactSizeIterator::len(self) + } +} + +impl<'a> Iterator for MsgBlkIterMut<'a> { + type Item = &'a mut MsgBlkNode; + + fn next(&mut self) -> Option { + if let Some(ptr) = self.curr { + self.curr = NonNull::new(unsafe { (*ptr.as_ptr()).b_cont }); + // SAFETY: MsgBlkNode has identical layout to mblk_t. + unsafe { Some(&mut *(ptr.as_ptr() as *mut MsgBlkNode)) } + } else { + None + } + } + + fn size_hint(&self) -> (usize, Option) { + let len = unsafe { count_mblk_chain(self.curr) }; + (len, Some(len)) + } +} + +impl ExactSizeIterator for MsgBlkIterMut<'_> {} + +impl<'a> Read for MsgBlkIterMut<'a> { + type Chunk = &'a mut [u8]; + + fn next_chunk(&mut self) -> ingot::types::ParseResult { + self.next().ok_or(IngotParseErr::TooSmall).map(|v| v.as_mut()) + } + + fn chunks_len(&self) -> usize { + ExactSizeIterator::len(self) + } +} + +impl BufferState for MsgBlkIterMut<'_> { + #[inline] + fn len(&self) -> usize { + unsafe { count_mblk_bytes(self.curr) } + } + + #[inline] + fn base_ptr(&self) -> uintptr_t { + self.curr.map(|v| v.as_ptr() as uintptr_t).unwrap_or(0) + } +} + +/// For the `no_std`/illumos kernel environment, we want the `mblk_t` +/// drop to occur at the packet level, where we can make use of +/// `freemsg(9F)`. +impl Drop for MsgBlk { + fn drop(&mut self) { + // Drop the segment chain if there is one. Consumers of MsgBlk + // will never own a packet with no segments. + // This guarantees that we only free the segment chain once. + cfg_if! { + if #[cfg(all(not(feature = "std"), not(test)))] { + // Safety: This is safe as long as the original + // `mblk_t` came from a call to `allocb(9F)` (or + // similar API). + unsafe { ddi::freemsg(self.0.as_ptr()) }; + } else { + mock_freemsg(self.0.as_ptr()); + } + } + } +} + +/// The common entry into an `allocb(9F)` implementation that works in +/// both std and `no_std` environments. +/// +/// NOTE: We do not emulate the priority argument as it is not +/// relevant to OPTE's implementation. In the case of `no_std`, we +/// always pass a priority value of `0` to `allocb(9F)`. +pub fn allocb(size: usize) -> *mut mblk_t { + assert!(size <= MBLK_MAX_SIZE); + + #[cfg(any(feature = "std", test))] + return mock_allocb(size); + + // Safety: allocb(9F) should be safe for any size equal to or + // less than MBLK_MAX_SIZE. + #[cfg(all(not(feature = "std"), not(test)))] + unsafe { + ddi::allocb(size, 0) + } +} + +#[cfg(any(feature = "std", test))] +pub fn mock_allocb(size: usize) -> *mut mblk_t { + // If the requested size is 0 we mimic allocb(9F) and allocate 16 + // bytes. See `uts/common/io/stream.c`. + let size = if size == 0 { 16 } else { size }; + let buf = Vec::with_capacity(size); + mock_desballoc(buf) +} + +#[cfg(any(feature = "std", test))] +pub fn mock_desballoc(buf: Vec) -> *mut mblk_t { + let mut buf = std::mem::ManuallyDrop::new(buf); + let ptr = buf.as_mut_ptr(); + let len = buf.len(); + let avail = buf.capacity(); + + // For the purposes of mocking in std the only fields that + // matter here are the ones relating to the data buffer: + // db_base and db_lim. + let dblk = Box::new(dblk_t { + db_frtnp: ptr::null(), + db_base: ptr, + // Safety: We rely on the Vec implementation to give us + // the correct value for avail. + db_lim: unsafe { ptr.add(avail) }, + db_ref: 0, + db_type: 0, + db_flags: 0, + db_struioflag: 0, + db_cpid: 0, + db_cache: ptr::null(), + db_mblk: ptr::null(), + db_free: ptr::null(), + db_lastfree: ptr::null(), + db_cksumstart: 0, + db_cksumend: 0, + db_cksumstuff: 0, + db_struioun: 0, + db_fthdr: ptr::null(), + db_credp: ptr::null(), + }); + + let dbp = Box::into_raw(dblk); + + // For the purposes of mocking in std the only fields that + // matter are b_rptr and b_wptr. However, in the future we + // will probably want to mock segments packets via b_cont and + // packet chains via b_next. + let mblk = Box::new(mblk_t { + b_next: ptr::null_mut(), + b_prev: ptr::null_mut(), + b_cont: ptr::null_mut(), + // Safety: We know dbp is valid because we just created it. + b_rptr: unsafe { (*dbp).db_base as *mut c_uchar }, + b_wptr: unsafe { (*dbp).db_base.add(len) as *mut c_uchar }, + b_datap: dbp, + b_band: 0, + b_tag: 0, + b_flag: 0, + b_queue: ptr::null(), + }); + + let mp = Box::into_raw(mblk); + // Safety: We know dbp is valid because we just created it. + unsafe { (*dbp).db_mblk = mp as *const mblk_t }; + + mp +} + +// The std equivalent to `freemsg(9F)`. +#[cfg(any(feature = "std", test))] +pub(crate) fn mock_freemsg(mut mp: *mut mblk_t) { + while !mp.is_null() { + let cont = unsafe { (*mp).b_cont }; + mock_freeb(mp); + mp = cont; + } +} + +// The std equivalent to `freeb(9F)`. +#[cfg(any(feature = "std", test))] +fn mock_freeb(mp: *mut mblk_t) { + // Safety: All of these were created safely in `mock_alloc()`. + // As long as the other methods don't do any of the following, + // this is safe: + // + // * Modify the `mp`/`dblk` pointers. + // * Increase `len` beyond `limit`. + // * Modify `limit`. + unsafe { + let bmblk = Box::from_raw(mp); + let bdblk = Box::from_raw(bmblk.b_datap); + let buffer = Vec::from_raw_parts( + bdblk.db_base, + bmblk.b_wptr.offset_from(bmblk.b_rptr) as usize, + bdblk.db_lim.offset_from(bdblk.db_base) as usize, + ); + drop(buffer); + drop(bdblk); + drop(bmblk); + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::engine::packet::Packet; + use crate::engine::packet::ParseError; + use crate::engine::GenericUlp; + use ingot::types::ParseError as IngotParseError; + + #[test] + fn zero_byte_packet() { + let mut pkt = MsgBlk::new(0); + assert_eq!(pkt.len(), 0); + assert_eq!(pkt.seg_len(), 1); + assert_eq!(pkt.tail_capacity(), 16); + + let res = Packet::parse_outbound(pkt.iter_mut(), GenericUlp {}); + match res { + Err(ParseError::IngotError(err)) => { + assert_eq!(err.header().as_str(), "inner_eth"); + assert_eq!(err.error(), &IngotParseError::TooSmall); + } + + Err(e) => panic!("expected read error, got: {:?}", e), + _ => panic!("expected failure, accidentally succeeded at parsing"), + } + + let pkt2 = MsgBlk::copy(&[]); + assert_eq!(pkt2.len(), 0); + assert_eq!(pkt2.seg_len(), 1); + assert_eq!(pkt2.tail_capacity(), 16); + let res = Packet::parse_outbound(pkt.iter_mut(), GenericUlp {}); + match res { + Err(ParseError::IngotError(err)) => { + assert_eq!(err.header().as_str(), "inner_eth"); + assert_eq!(err.error(), &IngotParseError::TooSmall); + } + + Err(e) => panic!("expected read error, got: {:?}", e), + _ => panic!("expected failure, accidentally succeeded at parsing"), + } + } + + #[test] + fn wrap() { + let mut buf1 = Vec::with_capacity(20); + let mut buf2 = Vec::with_capacity(2); + buf1.extend_from_slice(&[0x1, 0x2, 0x3, 0x4]); + buf2.extend_from_slice(&[0x5, 0x6]); + let mp1 = mock_desballoc(buf1); + let mp2 = mock_desballoc(buf2); + + unsafe { + (*mp1).b_cont = mp2; + } + + let pkt = unsafe { MsgBlk::wrap_mblk(mp1).unwrap() }; + assert_eq!(pkt.seg_len(), 2); + assert_eq!(pkt.all_segs_capacity(), 22); + assert_eq!(pkt.byte_len(), 6); + } + + #[test] + fn read_seg() { + let buf1 = vec![0x1, 0x2, 0x3, 0x4]; + let buf2 = vec![0x5, 0x6]; + let mp1 = mock_desballoc(buf1); + let mp2 = mock_desballoc(buf2); + + unsafe { + (*mp1).b_cont = mp2; + } + + let pkt = unsafe { MsgBlk::wrap_mblk(mp1).unwrap() }; + assert_eq!(pkt.byte_len(), 6); + assert_eq!(pkt.seg_len(), 2); + + let mut segs = pkt.iter(); + assert_eq!(segs.next().map(|v| &v[..]).unwrap(), &[0x1, 0x2, 0x3, 0x4]); + assert_eq!(segs.next().map(|v| &v[..]).unwrap(), &[0x5, 0x6]); + } + + #[test] + fn truncate() { + let mut p1 = MsgBlk::copy(&[0, 1, 2, 3]); + p1.append(MsgBlk::copy(&[4, 5, 6, 7])); + p1.append(MsgBlk::copy(&[8, 9, 10, 11])); + + assert_eq!(p1.seg_len(), 3); + assert_eq!(p1.byte_len(), 12); + + // Assert drop of followup segments. + p1.truncate_chain(7); + assert_eq!(p1.seg_len(), 2); + assert_eq!(p1.byte_len(), 7); + let mut iter = p1.iter(); + let el1 = iter.next().unwrap(); + let el2 = iter.next().unwrap(); + assert_eq!(&el1[..], &[0, 1, 2, 3]); + assert_eq!(&el2[..], &[4, 5, 6]); + } + + // Verify uninitialized packet. + #[test] + fn uninitialized_packet() { + let pkt = MsgBlk::new(200); + assert_eq!(pkt.len(), 0); + assert_eq!(pkt.seg_len(), 1); + assert_eq!(pkt.tail_capacity(), 200); + } + + #[test] + fn expand_and_shrink() { + let mut seg = MsgBlk::new(18); + assert_eq!(seg.len(), 0); + seg.resize(18).unwrap(); + assert_eq!(seg.len(), 18); + seg.drop_front_bytes(4).unwrap(); + assert_eq!(seg.len(), 14); + seg.expand_front(4).unwrap(); + assert_eq!(seg.len(), 18); + + assert!(seg.resize(20).is_err()); + assert!(seg.drop_front_bytes(20).is_err()); + assert!(seg.expand_front(4).is_err()); + } + + #[test] + fn prefix_len() { + let mut seg = MsgBlk::new(18); + assert_eq!(seg.head_capacity(), 0); + seg.resize(18).unwrap(); + assert_eq!(seg.head_capacity(), 0); + seg.drop_front_bytes(4).unwrap(); + assert_eq!(seg.head_capacity(), 4); + seg.expand_front(4).unwrap(); + assert_eq!(seg.head_capacity(), 0); + } + + // Verify that we do not panic when we get long chains of mblks linked by + // `b_cont`. This is a regression test for + // https://github.com/oxidecomputer/opte/issues/335 + #[test] + fn test_long_packet_continuation() { + const N_SEGMENTS: usize = 8; + let mut blocks: Vec<*mut mblk_t> = Vec::with_capacity(N_SEGMENTS); + for i in 0..N_SEGMENTS { + let mp = allocb(32); + + // Link previous block to this one. + if i > 0 { + let prev = blocks[i - 1]; + unsafe { + (*prev).b_cont = mp; + } + } + blocks.push(mp); + } + + // Wrap the first mblk in a Packet, and check that we still have a + // reference to everything. + let packet = unsafe { MsgBlk::wrap_mblk(blocks[0]) } + .expect("Failed to wrap mblk chain with many segments"); + + assert_eq!(packet.seg_len(), N_SEGMENTS); + for (seg, mblk) in packet.iter().zip(blocks) { + assert_eq!(core::ptr::addr_of!(seg.0) as *mut _, mblk); + } + } + + fn create_linked_mblks(n: usize) -> Vec<*mut mblk_t> { + let mut els = vec![]; + for _ in 0..n { + els.push(allocb(8)); + } + + // connect the elements in a chain + for (lhs, rhs) in els.iter().zip(els[1..].iter()) { + unsafe { + (**lhs).b_next = *rhs; + (**rhs).b_prev = *lhs; + } + } + + els + } + + #[test] + fn chain_has_correct_ends() { + let els = create_linked_mblks(3); + + let chain = unsafe { MsgBlkChain::new(els[0]) }.unwrap(); + let chain_inner = chain.0.as_ref().unwrap(); + assert_eq!(chain_inner.head.as_ptr(), els[0]); + assert_eq!(chain_inner.tail.as_ptr(), els[2]); + } + + #[test] + fn chain_breaks_links() { + let els = create_linked_mblks(3); + + let mut chain = unsafe { MsgBlkChain::new(els[0]) }.unwrap(); + + let p0 = chain.pop_front().unwrap(); + assert_eq!(p0.mblk_addr(), els[0] as uintptr_t); + unsafe { + assert!((*els[0]).b_prev.is_null()); + assert!((*els[0]).b_next.is_null()); + } + + // Chain head/tail ptrs are correct + let chain_inner = chain.0.as_ref().unwrap(); + assert_eq!(chain_inner.head.as_ptr(), els[1]); + assert_eq!(chain_inner.tail.as_ptr(), els[2]); + unsafe { + assert!((*els[1]).b_prev.is_null()); + assert!((*els[2]).b_next.is_null()); + } + } + + #[test] + fn chain_append_links() { + let els = create_linked_mblks(3); + let new_el = allocb(8); + + let mut chain = unsafe { MsgBlkChain::new(els[0]) }.unwrap(); + let pkt = unsafe { MsgBlk::wrap_mblk(new_el) }.unwrap(); + + chain.append(pkt); + + // Chain head/tail ptrs are correct + let chain_inner = chain.0.as_ref().unwrap(); + assert_eq!(chain_inner.head.as_ptr(), els[0]); + assert_eq!(chain_inner.tail.as_ptr(), new_el); + + // Last el has been linked to the new pkt, and it has a valid + // backward link. + unsafe { + assert_eq!((*new_el).b_prev, els[2]); + assert!((*new_el).b_next.is_null()); + assert_eq!((*els[2]).b_next, new_el); + } + } + + #[test] + fn chain_drain_complete() { + let els = create_linked_mblks(64); + + let mut chain = unsafe { MsgBlkChain::new(els[0]) }.unwrap(); + + for i in 0..els.len() { + let pkt = chain.pop_front().unwrap(); + assert_eq!(pkt.mblk_addr(), els[i] as uintptr_t); + } + + assert!(chain.pop_front().is_none()); + } +} diff --git a/lib/opte/src/ddi/mod.rs b/lib/opte/src/ddi/mod.rs index f2de7cca..79f9c257 100644 --- a/lib/opte/src/ddi/mod.rs +++ b/lib/opte/src/ddi/mod.rs @@ -2,10 +2,11 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2024 Oxide Computer Company //! Various abstractions for using the illumos DDI/DKI. pub mod kstat; +pub mod mblk; pub mod sync; pub mod time; diff --git a/lib/opte/src/ddi/sync.rs b/lib/opte/src/ddi/sync.rs index 6050a738..ac040150 100644 --- a/lib/opte/src/ddi/sync.rs +++ b/lib/opte/src/ddi/sync.rs @@ -28,9 +28,10 @@ cfg_if! { use illumos_sys_hdrs::kmutex_type_t; use illumos_sys_hdrs::krw_type_t; -/// Exposes the illumos mutex(9F) API in a safe manner. We name it -/// `KMutex` (Kernel Mutex) on purpose. The API for a kernel mutex -/// isn't quite the same as a userland `Mutex`, and there's no reason +/// Exposes the illumos mutex(9F) API in a safe manner. +/// +/// We name it `KMutex` (Kernel Mutex) on purpose. The API for a kernel +/// mutex isn't quite the same as a userland `Mutex`, and there's no reason /// that we have to use that exact name. Using `KMutex` makes it /// obvious that we are using a mutex, but not the one that comes from /// std. diff --git a/lib/opte/src/ddi/time.rs b/lib/opte/src/ddi/time.rs index f552ba73..1732f389 100644 --- a/lib/opte/src/ddi/time.rs +++ b/lib/opte/src/ddi/time.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2022 Oxide Computer Company +// Copyright 2024 Oxide Computer Company //! Moments, periodics, etc. use core::ops::Add; @@ -15,6 +15,7 @@ cfg_if! { use illumos_sys_hdrs as ddi; } else { use std::time::Instant; + use std::sync::OnceLock; } } @@ -31,8 +32,13 @@ pub struct Moment { #[cfg(all(not(feature = "std"), not(test)))] inner: ddi::hrtime_t, + // This is a duration masquerading as an instant -- this + // allows us to move to and from raw ns counts when needed on std. + // + // Ultimately, this is a requirement for us to place `Moment`s into + // e.g. `AtomicU64`s for table design. #[cfg(any(feature = "std", test))] - inner: Instant, + inner: Duration, } impl Add for Moment { @@ -62,7 +68,7 @@ impl Moment { if #[cfg(all(not(feature = "std"), not(test)))] { (self.inner as u64).saturating_sub(earlier.inner as u64) / NANOS_TO_MILLIS } else { - let delta = self.inner.saturating_duration_since(earlier.inner); + let delta = self.inner.saturating_sub(earlier.inner); delta.as_secs() * MILLIS + delta.subsec_millis() as u64 } } @@ -73,20 +79,38 @@ impl Moment { if #[cfg(all(not(feature = "std"), not(test)))] { Self { inner: unsafe { ddi::gethrtime() } } } else { - Self { inner: Instant::now() } + static FIRST_TS: OnceLock = OnceLock::new(); + + let first_ts = *FIRST_TS.get_or_init(Instant::now); + Self { inner: Instant::now().saturating_duration_since(first_ts) } } } } - /// Return the underlying timestamp for debugging purposes - /// if supported on the current platform. - #[allow(dead_code)] - pub(crate) fn raw_millis(&self) -> Option { + /// Return the underlying timestamp for atomic storage or debugging, converted + /// to milliseconds. + pub(crate) fn raw_millis(&self) -> u64 { + self.raw() / NANOS_TO_MILLIS + } + + /// Return the underlying timestamp for atomic storage or debugging. + pub(crate) fn raw(&self) -> u64 { + cfg_if! { + if #[cfg(all(not(feature = "std"), not(test)))] { + self.inner as u64 + } else { + // Conversion here is truncating. + self.inner.as_nanos() as u64 + } + } + } + + pub(crate) fn from_raw_nanos(raw: u64) -> Self { cfg_if! { if #[cfg(all(not(feature = "std"), not(test)))] { - Some(self.inner as u64 / NANOS_TO_MILLIS) + Self { inner: raw as ddi::hrtime_t } } else { - None + Self { inner: Duration::from_nanos(raw) } } } } diff --git a/lib/opte/src/engine/arp.rs b/lib/opte/src/engine/arp.rs index e9ba7956..ab53cc73 100644 --- a/lib/opte/src/engine/arp.rs +++ b/lib/opte/src/engine/arp.rs @@ -6,96 +6,65 @@ //! ARP headers and data. -use super::ether::EtherHdr; -use super::ether::EtherMeta; -use super::ether::EtherType; -use super::headers::RawHeader; -use super::packet::Initialized; -use super::packet::Packet; -use super::packet::PacketReadMut; -use super::packet::ReadErr; -use crate::d_error::DError; +use super::ether::Ethernet; +use crate::ddi::mblk::MsgBlk; use core::fmt; use core::fmt::Display; +use ingot::ethernet::Ethertype; +use ingot::types::primitives::u16be; +use ingot::types::NetworkRepr; +use ingot::Ingot; use opte_api::Ipv4Addr; use opte_api::MacAddr; use serde::Deserialize; use serde::Serialize; -use zerocopy::AsBytes; -use zerocopy::FromBytes; -use zerocopy::FromZeroes; -use zerocopy::Ref; -use zerocopy::Unaligned; +use zerocopy::ByteSlice; pub const ARP_HTYPE_ETHERNET: u16 = 1; -#[repr(u16)] #[derive( - Clone, Copy, Debug, Deserialize, Eq, Ord, PartialEq, PartialOrd, Serialize, + Clone, + Copy, + Debug, + Deserialize, + Eq, + Ord, + PartialEq, + PartialOrd, + Serialize, + Hash, )] -pub enum ArpOp { - Request = 1, - Reply = 2, -} +pub struct ArpOp(u16); impl ArpOp { - pub fn to_be_bytes(self) -> [u8; 2] { - match self { - ArpOp::Request => 1u16.to_be_bytes(), - ArpOp::Reply => 2u16.to_be_bytes(), - } - } + pub const REQUEST: Self = Self(1); + pub const REPLY: Self = Self(2); } -impl TryFrom for ArpOp { - type Error = ArpHdrError; - - fn try_from(val: u16) -> Result { - match val { - 1 => Ok(ArpOp::Request), - 2 => Ok(ArpOp::Reply), - _ => Err(Self::Error::BadOp { op: val }), - } +impl Default for ArpOp { + fn default() -> Self { + Self::REQUEST } } impl Display for ArpOp { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let s = match self { - ArpOp::Request => "Request", - ArpOp::Reply => "Reply", + let s = match *self { + ArpOp::REQUEST => "Request", + ArpOp::REPLY => "Reply", + _ => "Unknown", }; write!(f, "{}", s) } } -#[derive(Clone, Copy, Debug, DError, Eq, PartialEq)] -#[derror(leaf_data = ArpHdrError::derror_data)] -pub enum ArpHdrError { - BadOp { op: u16 }, - ReadError(ReadErr), - UnexpectedProtoLen { plen: u8 }, - UnexpectedProtoType { ptype: u16 }, - UnexpectedHwLen { hlen: u8 }, - UnexpectedHwType { htype: u16 }, -} - -impl ArpHdrError { - fn derror_data(&self, data: &mut [u64]) { - data[0] = match self { - Self::BadOp { op } => *op as u64, - Self::UnexpectedProtoLen { plen } => *plen as u64, - Self::UnexpectedProtoType { ptype } => *ptype as u64, - Self::UnexpectedHwLen { hlen } => *hlen as u64, - Self::UnexpectedHwType { htype } => *htype as u64, - _ => 0, - }; +impl NetworkRepr> for ArpOp { + fn to_network(self) -> zerocopy::U16 { + self.0.into() } -} -impl From for ArpHdrError { - fn from(error: ReadErr) -> Self { - Self::ReadError(error) + fn from_network(val: zerocopy::U16) -> Self { + Self(val.into()) } } @@ -105,146 +74,53 @@ pub fn gen_arp_reply( spa: Ipv4Addr, tha: MacAddr, tpa: Ipv4Addr, -) -> Packet { - let len = EtherHdr::SIZE + ArpEthIpv4Raw::SIZE; - let mut pkt = Packet::alloc_and_expand(len); - let mut wtr = pkt.seg0_wtr(); - - let eth = EtherMeta { dst: tha, src: sha, ether_type: EtherType::Arp }; - - let arp = ArpEthIpv4 { - htype: ARP_HTYPE_ETHERNET, - ptype: u16::from(EtherType::Ipv4), - hlen: 6, - plen: 4, - op: ArpOp::Reply, - sha, - spa, - tha, - tpa, - }; - - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - arp.emit(wtr.slice_mut(ArpEthIpv4::SIZE).unwrap()); - pkt +) -> MsgBlk { + MsgBlk::new_ethernet_pkt(( + Ethernet { destination: tha, source: sha, ethertype: Ethertype::ARP }, + ArpEthIpv4 { + op: ArpOp::REPLY, + sha, + spa, + tha, + tpa, + ..Default::default() + }, + )) } -#[derive(Clone, Copy, Debug)] +/// An ARP packet containing Ethernet (MAC) to IPv4 address mappings. +#[derive(Copy, Clone, Debug, Eq, Hash, PartialEq, Ingot)] +#[ingot(impl_default)] pub struct ArpEthIpv4 { - pub htype: u16, - pub ptype: u16, + #[ingot(default = ARP_HTYPE_ETHERNET)] + pub htype: u16be, + #[ingot(default = Ethertype::IPV4, is = "u16be")] + pub ptype: Ethertype, + #[ingot(default = size_of::() as u8)] pub hlen: u8, + #[ingot(default = size_of::() as u8)] pub plen: u8, + + #[ingot(is = "u16be")] pub op: ArpOp, + + #[ingot(is = "[u8; 6]")] pub sha: MacAddr, + #[ingot(is = "[u8; 4]")] pub spa: Ipv4Addr, + + #[ingot(is = "[u8; 6]")] pub tha: MacAddr, + #[ingot(is = "[u8; 4]")] pub tpa: Ipv4Addr, } -impl ArpEthIpv4 { - pub const SIZE: usize = ArpEthIpv4Raw::SIZE; - - pub fn emit(&self, dst: &mut [u8]) { - debug_assert_eq!(dst.len(), ArpEthIpv4Raw::SIZE); - let mut raw = ArpEthIpv4Raw::new_mut(dst).unwrap(); - raw.write(ArpEthIpv4Raw::from(self)); - } - - pub fn parse<'a, 'b, R>(rdr: &'b mut R) -> Result - where - R: PacketReadMut<'a>, - { - let src = rdr.slice_mut(ArpEthIpv4Raw::SIZE)?; - Self::try_from(&ArpEthIpv4Raw::new_mut(src)?) - } -} - -impl TryFrom<&Ref<&mut [u8], ArpEthIpv4Raw>> for ArpEthIpv4 { - type Error = ArpHdrError; - - // NOTE: This only accepts IPv4/Ethernet ARP. - fn try_from( - raw: &Ref<&mut [u8], ArpEthIpv4Raw>, - ) -> Result { - let htype = u16::from_be_bytes(raw.htype); - - if htype != ARP_HTYPE_ETHERNET { - return Err(Self::Error::UnexpectedHwType { htype }); - } - - let hlen = raw.hlen; - - if hlen != 6 { - return Err(Self::Error::UnexpectedHwLen { hlen }); - } - - let ptype = u16::from_be_bytes(raw.ptype); - - if ptype != super::ether::ETHER_TYPE_IPV4 { - return Err(Self::Error::UnexpectedProtoType { ptype }); - } - - let plen = raw.plen; - - if plen != 4 { - return Err(Self::Error::UnexpectedProtoLen { plen }); - } - - let op = ArpOp::try_from(u16::from_be_bytes(raw.op))?; - - Ok(Self { - htype, - ptype, - hlen, - plen, - op, - sha: MacAddr::from(raw.sha), - spa: Ipv4Addr::from(u32::from_be_bytes(raw.spa)), - tha: MacAddr::from(raw.tha), - tpa: Ipv4Addr::from(u32::from_be_bytes(raw.tpa)), - }) - } -} - -impl From<&ArpEthIpv4> for ArpEthIpv4Raw { - fn from(arp: &ArpEthIpv4) -> Self { - Self { - htype: arp.htype.to_be_bytes(), - ptype: arp.ptype.to_be_bytes(), - hlen: arp.hlen, - plen: arp.plen, - op: arp.op.to_be_bytes(), - sha: arp.sha.bytes(), - spa: arp.spa.bytes(), - tha: arp.tha.bytes(), - tpa: arp.tpa.bytes(), - } - } -} - -#[repr(C)] -#[derive(AsBytes, Clone, Debug, FromBytes, FromZeroes, Unaligned)] -pub struct ArpEthIpv4Raw { - pub htype: [u8; 2], - pub ptype: [u8; 2], - pub hlen: u8, - pub plen: u8, - pub op: [u8; 2], - pub sha: [u8; 6], - pub spa: [u8; 4], - pub tha: [u8; 6], - pub tpa: [u8; 4], -} - -impl<'a> RawHeader<'a> for ArpEthIpv4Raw { - #[inline] - fn new_mut(src: &mut [u8]) -> Result, ReadErr> { - debug_assert_eq!(src.len(), Self::SIZE); - let hdr = match Ref::new(src) { - Some(hdr) => hdr, - None => return Err(ReadErr::BadLayout), - }; - Ok(hdr) +impl ValidArpEthIpv4 { + pub fn values_valid(&self) -> bool { + self.htype() == ARP_HTYPE_ETHERNET + && self.ptype() == Ethertype::IPV4 + && self.hlen() == (size_of::() as u8) + && self.plen() == (size_of::() as u8) + && (self.op() == ArpOp::REQUEST || self.op() == ArpOp::REPLY) } } diff --git a/lib/opte/src/engine/checksum.rs b/lib/opte/src/engine/checksum.rs index 59154e54..fc8ce80b 100644 --- a/lib/opte/src/engine/checksum.rs +++ b/lib/opte/src/engine/checksum.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2022 Oxide Computer Company +// Copyright 2024 Oxide Computer Company //! Types for calculating the internet checksum. //! @@ -121,6 +121,11 @@ pub struct Checksum { } impl Checksum { + /// Creates a new checksum counter. + pub fn new() -> Self { + Self::from(0) + } + /// Update the sum based by adding the contents of `bytes`. /// /// This is useful for incrementally updating an existing checksum @@ -152,6 +157,14 @@ impl Checksum { (self.inner & 0xFFFF) as u16 } + + /// Calls [`Self::finalize`], and returns the one's complement value + /// of the checksum for storage as a `u16be`. + pub fn finalize_for_ingot(&mut self) -> u16 { + let out = self.finalize(); + + (!out).to_be() + } } impl From for Checksum { diff --git a/lib/opte/src/engine/dhcp.rs b/lib/opte/src/engine/dhcp.rs index 68e7003d..4d3904ea 100644 --- a/lib/opte/src/engine/dhcp.rs +++ b/lib/opte/src/engine/dhcp.rs @@ -2,23 +2,13 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2024 Oxide Computer Company //! DHCP headers, data, and actions. -use super::checksum::HeaderChecksum; -use super::ether::EtherHdr; -use super::ether::EtherMeta; -use super::ether::EtherType; -use super::ip4::Ipv4Addr; -use super::ip4::Ipv4Hdr; -use super::ip4::Ipv4Meta; -use super::ip4::Protocol; -use super::ip6::UlpCsumOpt; -use super::packet::Packet; -use super::packet::PacketMeta; -use super::packet::PacketRead; -use super::packet::PacketReader; +use super::ether::Ethernet; +use super::ip::v4::*; +use super::packet::MblkPacketData; use super::predicate::DataPredicate; use super::predicate::EtherAddrMatch; use super::predicate::IpProtoMatch; @@ -28,13 +18,16 @@ use super::predicate::Predicate; use super::rule::AllowOrDeny; use super::rule::GenPacketResult; use super::rule::HairpinAction; -use super::udp::UdpHdr; -use super::udp::UdpMeta; +use crate::ddi::mblk::MsgBlk; use alloc::string::ToString; use alloc::vec::Vec; use core::fmt; use core::fmt::Display; use heapless::Vec as HeaplessVec; +use ingot::ethernet::Ethertype; +use ingot::ip::IpProtocol; +use ingot::types::HeaderLen; +use ingot::udp::Udp; use opte_api::DhcpCfg; use opte_api::DhcpReplyType; use opte_api::DomainName; @@ -53,6 +46,9 @@ use smoltcp::wire::DhcpRepr; use smoltcp::wire::Ipv4Address; use smoltcp::wire::DHCP_MAX_DNS_SERVER_COUNT; +pub const DHCP_SERVER_PORT: u16 = 67; +pub const DHCP_CLIENT_PORT: u16 = 68; + /// The DHCP message type. /// /// Why define our own wrapper type when smoltcp already provides this @@ -125,7 +121,7 @@ impl From for MessageType { struct MessageTypeVisitor; -impl<'de> Visitor<'de> for MessageTypeVisitor { +impl Visitor<'_> for MessageTypeVisitor { type Value = MessageType; fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result { @@ -461,8 +457,8 @@ impl HairpinAction for DhcpAction { Ipv4Addr::LOCAL_BCAST, )]), Predicate::InnerIpProto(vec![IpProtoMatch::Exact(Protocol::UDP)]), - Predicate::InnerDstPort(vec![PortMatch::Exact(67)]), - Predicate::InnerSrcPort(vec![PortMatch::Exact(68)]), + Predicate::InnerDstPort(vec![PortMatch::Exact(DHCP_SERVER_PORT)]), + Predicate::InnerSrcPort(vec![PortMatch::Exact(DHCP_CLIENT_PORT)]), ]; let data_preds = match self.reply_type { @@ -482,12 +478,8 @@ impl HairpinAction for DhcpAction { (hdr_preds, data_preds) } - fn gen_packet( - &self, - _meta: &PacketMeta, - rdr: &mut PacketReader, - ) -> GenPacketResult { - let body = rdr.copy_remaining(); + fn gen_packet(&self, meta: &MblkPacketData) -> GenPacketResult { + let body = meta.copy_remaining(); let client_pkt = DhcpPacket::new_checked(&body)?; let client_dhcp = DhcpRepr::parse(&client_pkt)?; let mt = MessageType::from(self.reply_type); @@ -571,21 +563,10 @@ impl HairpinAction for DhcpAction { let reply_len = reply.buffer_len(); - // XXX This is temporary until I can add interface to Packet - // to initialize a zero'd mblk of N bytes and then get a - // direct mutable reference to the PacketSeg. - // - // We provide exactly the number of bytes needed guaranteeing - // that emit() should not fail. - let mut tmp = vec![0u8; reply_len]; - let mut dhcp = DhcpPacket::new_unchecked(&mut tmp); - reply.emit(&mut dhcp).unwrap(); - - let mut udp = UdpMeta { - src: 67, - dst: 68, - len: (UdpHdr::SIZE + tmp.len()) as u16, - ..Default::default() + let eth_dst = if client_dhcp.broadcast { + MacAddr::BROADCAST + } else { + self.client_mac }; let ip_dst = if client_dhcp.broadcast { @@ -594,41 +575,40 @@ impl HairpinAction for DhcpAction { self.client_ip }; - let mut ip = Ipv4Meta { - src: self.gw_ip, - dst: ip_dst, - proto: Protocol::UDP, - total_len: Ipv4Hdr::BASE_SIZE as u16 + udp.len, + let udp = Udp { + source: DHCP_SERVER_PORT, + destination: DHCP_CLIENT_PORT, + length: (Udp::MINIMUM_LENGTH + reply_len) as u16, ..Default::default() }; - ip.compute_hdr_csum(); - let eth_dst = if client_dhcp.broadcast { - MacAddr::BROADCAST - } else { - self.client_mac + let mut ip = Ipv4 { + source: self.gw_ip, + destination: ip_dst, + protocol: IpProtocol::UDP, + total_len: Ipv4::MINIMUM_LENGTH as u16 + udp.length, + ..Default::default() }; + ip.compute_checksum(); - let eth = EtherMeta { - dst: eth_dst, - src: self.gw_mac, - ether_type: EtherType::Ipv4, + let eth = Ethernet { + destination: eth_dst, + source: self.gw_mac, + ethertype: Ethertype::IPV4, }; - // XXX: Would be preferable to write in here directly rather than - // allocing tmp. - let total_len = - EtherHdr::SIZE + Ipv4Hdr::BASE_SIZE + UdpHdr::SIZE + tmp.len(); - let mut pkt = Packet::alloc_and_expand(total_len); - let mut wtr = pkt.seg0_wtr(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip.emit(wtr.slice_mut(ip.hdr_len()).unwrap()); - let mut udp_buf = [0u8; UdpHdr::SIZE]; - udp.emit(&mut udp_buf); - let csum = ip.compute_ulp_csum(UlpCsumOpt::Full, &udp_buf, &tmp); - udp.csum = HeaderChecksum::from(csum).bytes(); - udp.emit(wtr.slice_mut(udp.hdr_len()).unwrap()); - wtr.write(&tmp).unwrap(); + let ingot_layers = (ð, &ip, &udp); + let total_sz = ingot_layers.packet_length() + reply_len; + let mut pkt = MsgBlk::new_ethernet(total_sz); + pkt.emit_back(ingot_layers) + .expect("MsgBlk should have enough bytes by construction"); + let l = pkt.len(); + pkt.resize(total_sz) + .expect("MsgBlk should have enough bytes by construction"); + + let mut dhcp = DhcpPacket::new_unchecked(&mut pkt[l..]); + reply.emit(&mut dhcp).unwrap(); + Ok(AllowOrDeny::Allow(pkt)) } } @@ -636,8 +616,8 @@ impl HairpinAction for DhcpAction { #[cfg(test)] mod test { use super::*; - use crate::engine::ip4::Ipv4Addr; - use crate::engine::ip4::Ipv4Cidr; + use crate::engine::ip::v4::Ipv4Addr; + use crate::engine::ip::v4::Ipv4Cidr; fn test_option_emit(opt: impl DhcpOption, truth: Vec) { let buf = gen_dhcp_from_option(opt); diff --git a/lib/opte/src/engine/dhcpv6/mod.rs b/lib/opte/src/engine/dhcpv6/mod.rs index 06e9864c..caacbb76 100644 --- a/lib/opte/src/engine/dhcpv6/mod.rs +++ b/lib/opte/src/engine/dhcpv6/mod.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2024 Oxide Computer Company //! Core implementation of DHCPv6 protocol. //! @@ -54,10 +54,10 @@ //! - Option Request: A list of Option codes for requested options. //! - Elapsed Time: The duration a client has been trying to talk to the server. //! - Rapid Commit: An option that tells the server to commit data to a client, -//! without waiting for a second ACK sequence of messages. +//! without waiting for a second ACK sequence of messages. //! - DNS Servers: A list of IPv6 addresses for DNS servers the client can use. //! - SNTP Servers: A list of IPv6 addresses for SNTP servers the client can -//! use. +//! use. //! //! See the `options` module for more details on the encoding of these in a //! message. @@ -108,18 +108,18 @@ pub const CLIENT_PORT: u16 = 546; #[derive(Clone, Debug, PartialEq)] pub struct TransactionId<'a>(pub Cow<'a, [u8]>); -impl<'a> TransactionId<'a> { +impl TransactionId<'_> { pub const SIZE: usize = 3; } -impl<'a> Deref for TransactionId<'a> { +impl Deref for TransactionId<'_> { type Target = [u8]; fn deref(&self) -> &Self::Target { &self.0 } } -impl<'a> AsRef<[u8]> for TransactionId<'a> { +impl AsRef<[u8]> for TransactionId<'_> { fn as_ref(&self) -> &[u8] { &self.0 } diff --git a/lib/opte/src/engine/dhcpv6/protocol.rs b/lib/opte/src/engine/dhcpv6/protocol.rs index 09dbf608..c1c575f0 100644 --- a/lib/opte/src/engine/dhcpv6/protocol.rs +++ b/lib/opte/src/engine/dhcpv6/protocol.rs @@ -2,13 +2,13 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2024 Oxide Computer Company //! Implementation of the main message types for DHCPv6. use super::Dhcpv6Action; use super::TransactionId; -use crate::engine::checksum::HeaderChecksum; +use crate::ddi::mblk::MsgBlk; use crate::engine::dhcpv6::options::Code as OptionCode; use crate::engine::dhcpv6::options::IaAddr; use crate::engine::dhcpv6::options::IaNa; @@ -22,16 +22,10 @@ use crate::engine::dhcpv6::ALL_RELAYS_AND_SERVERS; use crate::engine::dhcpv6::ALL_SERVERS; use crate::engine::dhcpv6::CLIENT_PORT; use crate::engine::dhcpv6::SERVER_PORT; -use crate::engine::ether::EtherHdr; -use crate::engine::ether::EtherMeta; -use crate::engine::ether::EtherType; -use crate::engine::ip6::Ipv6Hdr; -use crate::engine::ip6::Ipv6Meta; -use crate::engine::ip6::UlpCsumOpt; -use crate::engine::packet::Packet; -use crate::engine::packet::PacketMeta; -use crate::engine::packet::PacketRead; -use crate::engine::packet::PacketReader; +use crate::engine::ether::Ethernet; +use crate::engine::ip::v6::Ipv6; +use crate::engine::ip::v6::Ipv6Ref; +use crate::engine::packet::MblkPacketData; use crate::engine::predicate::DataPredicate; use crate::engine::predicate::EtherAddrMatch; use crate::engine::predicate::IpProtoMatch; @@ -41,19 +35,20 @@ use crate::engine::predicate::Predicate; use crate::engine::rule::AllowOrDeny; use crate::engine::rule::GenPacketResult; use crate::engine::rule::HairpinAction; -use crate::engine::udp::UdpHdr; -use crate::engine::udp::UdpMeta; use alloc::borrow::Cow; use alloc::vec::Vec; use core::fmt; use core::ops::Range; +use ingot::ethernet::Ethertype; +use ingot::ip::IpProtocol as IngotIpProto; +use ingot::types::HeaderLen; +use ingot::udp::Udp; use opte_api::Ipv6Addr; use opte_api::Ipv6Cidr; use opte_api::MacAddr; use opte_api::Protocol; use serde::Deserialize; use serde::Serialize; -use smoltcp::wire::IpProtocol; #[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)] pub enum MessageType { @@ -590,7 +585,7 @@ fn process_confirm_message<'a>( // Process a DHCPv6 message from the a client. fn process_client_message<'a>( action: &'a Dhcpv6Action, - _meta: &'a PacketMeta, + _meta: &'a MblkPacketData, client_msg: &'a Message<'a>, ) -> Option> { match client_msg.typ { @@ -612,55 +607,44 @@ fn process_client_message<'a>( // the request and the actual DHCPv6 message to send out. fn generate_packet<'a>( action: &Dhcpv6Action, - meta: &PacketMeta, + meta: &MblkPacketData, msg: &'a Message<'a>, ) -> GenPacketResult { - let eth = EtherMeta { - dst: action.client_mac, - src: action.server_mac, - ether_type: EtherType::Ipv6, + let udp = Udp { + source: SERVER_PORT, + destination: CLIENT_PORT, + length: (Udp::MINIMUM_LENGTH + msg.buffer_len()) as u16, + ..Default::default() }; - let ip = Ipv6Meta { - src: Ipv6Addr::from_eui64(&action.server_mac), + let ip = Ipv6 { + source: Ipv6Addr::from_eui64(&action.server_mac), // Safety: We're only here if the predicates match, one of which is // IPv6. - dst: meta.inner_ip6().unwrap().src, - proto: Protocol::UDP, - next_hdr: IpProtocol::Udp, - pay_len: (UdpHdr::SIZE + msg.buffer_len()) as u16, + destination: meta.inner_ip6().unwrap().source(), + next_header: IngotIpProto::UDP, + payload_len: udp.length, ..Default::default() }; - let mut udp = UdpMeta { - src: SERVER_PORT, - dst: CLIENT_PORT, - len: (UdpHdr::SIZE + msg.buffer_len()) as u16, - ..Default::default() + let eth = Ethernet { + destination: action.client_mac, + source: action.server_mac, + ethertype: Ethertype::IPV6, }; // Allocate a segment into which we'll write the packet. - let reply_len = - msg.buffer_len() + UdpHdr::SIZE + Ipv6Hdr::BASE_SIZE + EtherHdr::SIZE; - let mut pkt = Packet::alloc_and_expand(reply_len); - let mut wtr = pkt.seg0_wtr(); - - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip.emit(wtr.slice_mut(ip.hdr_len()).unwrap()); - - // Create the buffer to contain the DHCP message so that we may - // compute the UDP checksum. - let mut msg_buf = vec![0; msg.buffer_len()]; - msg.copy_into(&mut msg_buf).unwrap(); - - // Compute the UDP checksum. Write the UDP header and DHCP message - // to the segment. - let mut udp_buf = [0u8; UdpHdr::SIZE]; - udp.emit(&mut udp_buf); - let csum = ip.compute_ulp_csum(UlpCsumOpt::Full, &udp_buf, &msg_buf); - udp.csum = HeaderChecksum::from(csum).bytes(); - udp.emit(wtr.slice_mut(udp.hdr_len()).unwrap()); - wtr.write(&msg_buf).unwrap(); + let ingot_layers = (ð, &ip, &udp); + let total_sz = ingot_layers.packet_length() + msg.buffer_len(); + + let mut pkt = MsgBlk::new_ethernet(total_sz); + pkt.emit_back(ingot_layers) + .expect("MsgBlk should have enough bytes by construction"); + let l = pkt.len(); + pkt.resize(total_sz) + .expect("MsgBlk should have enough bytes by construction"); + msg.copy_into(&mut pkt[l..]); + Ok(AllowOrDeny::Allow(pkt)) } @@ -683,12 +667,8 @@ impl HairpinAction for Dhcpv6Action { // Rather than put this logic into DataPredicates, we just parse the packet // here and reply accordingly. So the `Dhcpv6Action` is really a full // server, to the extent we emulate one. - fn gen_packet( - &self, - meta: &PacketMeta, - rdr: &mut PacketReader, - ) -> GenPacketResult { - let body = rdr.copy_remaining(); + fn gen_packet(&self, meta: &MblkPacketData) -> GenPacketResult { + let body = meta.copy_remaining(); if let Some(client_msg) = Message::from_bytes(&body) { if let Some(reply) = process_client_message(self, meta, &client_msg) { @@ -710,11 +690,11 @@ mod test { use super::Message; use super::MessageType; use super::OptionCode; - use super::Packet; + use crate::ddi::mblk::MsgBlk; use crate::engine::dhcpv6::test_data; + use crate::engine::packet::Packet; use crate::engine::port::meta::ActionMeta; use crate::engine::GenericUlp; - use opte_api::Direction::*; // Test that we correctly parse out the entire Solicit message from a // snooped packet. @@ -743,9 +723,10 @@ mod test { #[test] fn test_predicates_match_snooped_solicit_message() { - let pkt = Packet::copy(test_data::TEST_SOLICIT_PACKET) - .parse(Out, GenericUlp {}) - .unwrap(); + let mut pkt = MsgBlk::copy(test_data::TEST_SOLICIT_PACKET); + let pkt = Packet::parse_outbound(pkt.iter_mut(), GenericUlp {}) + .unwrap() + .to_full_meta(); let pmeta = pkt.meta(); let ameta = ActionMeta::new(); let client_mac = diff --git a/lib/opte/src/engine/ether.rs b/lib/opte/src/engine/ether.rs index 05927fec..76dbc285 100644 --- a/lib/opte/src/engine/ether.rs +++ b/lib/opte/src/engine/ether.rs @@ -6,12 +6,11 @@ //! Ethernet frames. +use super::headers::HasInnerCksum; +use super::headers::HeaderActionError; +use super::headers::HeaderActionModify; use super::headers::ModifyAction; use super::headers::PushAction; -use super::headers::RawHeader; -use super::packet::PacketReadMut; -use super::packet::ReadErr; -use crate::d_error::DError; use alloc::string::String; use alloc::vec::Vec; use core::fmt; @@ -19,14 +18,16 @@ use core::fmt::Debug; use core::fmt::Display; use core::result; use core::str::FromStr; +use ingot::ethernet::Ethertype; +use ingot::types::Header; +use ingot::types::HeaderLen; +use ingot::types::InlineHeader; +use ingot::Ingot; use opte_api::MacAddr; use serde::Deserialize; use serde::Serialize; -use zerocopy::AsBytes; -use zerocopy::FromBytes; -use zerocopy::FromZeroes; -use zerocopy::Ref; -use zerocopy::Unaligned; +use zerocopy::ByteSlice; +use zerocopy::ByteSliceMut; pub const ETHER_TYPE_ETHER: u16 = 0x6558; pub const ETHER_TYPE_IPV4: u16 = 0x0800; @@ -35,6 +36,17 @@ pub const ETHER_TYPE_IPV6: u16 = 0x86DD; pub const ETHER_ADDR_LEN: usize = 6; +#[derive(Copy, Clone, Debug, Eq, Hash, PartialEq, Ingot)] +#[ingot(impl_default)] +pub struct Ethernet { + #[ingot(is = "[u8; 6]")] + pub destination: MacAddr, + #[ingot(is = "[u8; 6]")] + pub source: MacAddr, + #[ingot(is = "u16be", next_layer)] + pub ethertype: Ethertype, +} + #[repr(u16)] #[derive( Clone, Copy, Deserialize, Eq, Ord, PartialEq, PartialOrd, Serialize, @@ -209,16 +221,6 @@ impl PushAction for EtherMeta { } } -impl<'a> From<&EtherHdr<'a>> for EtherMeta { - fn from(eth: &EtherHdr) -> Self { - EtherMeta { - src: eth.src(), - dst: eth.dst(), - ether_type: eth.ether_type(), - } - } -} - #[derive(Clone, Debug, Default, Deserialize, Serialize)] pub struct EtherMod { pub src: Option, @@ -238,152 +240,139 @@ impl ModifyAction for EtherMod { } impl EtherMeta { - #[inline] - pub fn emit(&self, dst: &mut [u8]) { - debug_assert_eq!(dst.len(), EtherHdrRaw::SIZE); - let mut raw = EtherHdrRaw::new_mut(dst).unwrap(); - raw.write(EtherHdrRaw::from(self)); - } - #[inline] pub fn hdr_len(&self) -> usize { - EtherHdr::SIZE + Ethernet::MINIMUM_LENGTH } } -#[derive(Debug)] -pub struct EtherHdr<'a> { - bytes: Ref<&'a mut [u8], EtherHdrRaw>, -} - -impl<'a> EtherHdr<'a> { - // For the moment, this type is for non-VLAN ethernet headers - // only. - pub const SIZE: usize = EtherHdrRaw::SIZE; - - pub fn as_bytes(&self) -> &[u8] { - self.bytes.bytes() - } - - pub fn ether_type(&self) -> EtherType { - EtherType::from(u16::from_be_bytes(self.bytes.ether_type)) - } - - pub fn hdr_len(&self) -> usize { - Self::SIZE - } - - pub fn src(&self) -> MacAddr { - MacAddr::from(self.bytes.src) - } - - pub fn dst(&self) -> MacAddr { - MacAddr::from(self.bytes.dst) - } - - pub fn set_dst(&mut self, dst: MacAddr) { - self.bytes.dst = dst.bytes(); - } +impl HeaderActionModify for EthernetPacket { + #[inline] + fn run_modify( + &mut self, + mod_spec: &EtherMod, + ) -> Result<(), HeaderActionError> { + if let Some(src) = mod_spec.src { + self.set_source(src); + } + if let Some(dst) = mod_spec.dst { + self.set_destination(dst); + } - pub fn parse<'b, R>(rdr: &'b mut R) -> Result - where - R: PacketReadMut<'a>, - { - let src = rdr.slice_mut(EtherHdrRaw::SIZE)?; - Ok(Self { bytes: EtherHdrRaw::new_mut(src)? }) + Ok(()) } } -#[derive(Clone, Copy, Eq, PartialEq, DError)] -#[derror(leaf_data = EtherHdrError::derror_data)] -pub enum EtherHdrError { - ReadError(ReadErr), - UnsupportedEtherType { ether_type: u16 }, -} - -impl EtherHdrError { - fn derror_data(&self, data: &mut [u64]) { - if let Self::UnsupportedEtherType { ether_type } = self { - data[0] = *ether_type as u64; +impl HeaderActionModify + for InlineHeader> +{ + #[inline] + fn run_modify( + &mut self, + mod_spec: &EtherMod, + ) -> Result<(), HeaderActionError> { + match self { + InlineHeader::Repr(a) => { + if let Some(src) = mod_spec.src { + a.set_source(src); + } + if let Some(dst) = mod_spec.dst { + a.set_destination(dst); + } + } + InlineHeader::Raw(a) => { + if let Some(src) = mod_spec.src { + a.set_source(src); + } + if let Some(dst) = mod_spec.dst { + a.set_destination(dst); + } + } } + + Ok(()) } } -impl From for EtherHdrError { - fn from(error: ReadErr) -> Self { - EtherHdrError::ReadError(error) - } +impl HasInnerCksum for InlineHeader> { + const HAS_CKSUM: bool = false; } -impl Display for EtherHdrError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - Self::UnsupportedEtherType { ether_type } => { - write!(f, "Unsupported Ether Type: 0x{:04X}", ether_type) - } +impl HasInnerCksum for EthernetPacket { + const HAS_CKSUM: bool = false; +} - Self::ReadError(error) => { - write!(f, "read error: {:?}", error) +impl From for Header> { + #[inline] + fn from(value: EtherMeta) -> Self { + Header::Repr( + Ethernet { + destination: value.dst, + source: value.src, + ethertype: Ethertype(u16::from(value.ether_type)), } - } + .into(), + ) } } -impl Debug for EtherHdrError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}", self) +impl From + for InlineHeader> +{ + #[inline] + fn from(value: EtherMeta) -> Self { + InlineHeader::Repr(Ethernet { + destination: value.dst, + source: value.src, + ethertype: Ethertype(u16::from(value.ether_type)), + }) } } -impl From<&EtherMeta> for EtherHdrRaw { - fn from(meta: &EtherMeta) -> Self { - Self { - dst: meta.dst.bytes(), - src: meta.src.bytes(), - ether_type: u16::from(meta.ether_type).to_be_bytes(), - } +impl PushAction>> + for EtherMeta +{ + #[inline] + fn push(&self) -> InlineHeader> { + InlineHeader::Repr(Ethernet { + destination: self.dst, + source: self.src, + ethertype: Ethertype(u16::from(self.ether_type)), + }) } } -/// Note: For now we keep this unaligned to be safe. -#[repr(C)] -#[derive(Clone, Debug, Default, FromBytes, AsBytes, FromZeroes, Unaligned)] -pub struct EtherHdrRaw { - pub dst: [u8; 6], - pub src: [u8; 6], - pub ether_type: [u8; 2], -} - -impl<'a> RawHeader<'a> for EtherHdrRaw { +impl PushAction> for EtherMeta { #[inline] - fn new_mut(src: &mut [u8]) -> Result, ReadErr> { - debug_assert_eq!(src.len(), Self::SIZE); - let hdr = match Ref::new(src) { - Some(hdr) => hdr, - None => return Err(ReadErr::BadLayout), - }; - Ok(hdr) + fn push(&self) -> EthernetPacket { + Header::Repr( + Ethernet { + destination: self.dst, + source: self.src, + ethertype: Ethertype(u16::from(self.ether_type)), + } + .into(), + ) } } #[cfg(test)] mod test { use super::*; - use crate::engine::packet::Packet; + use ingot::types::Emit; + use ingot::types::HeaderParse; #[test] fn emit() { - let eth = EtherMeta { - dst: MacAddr::from([0xA8, 0x40, 0x25, 0xFF, 0x77, 0x77]), - src: MacAddr::from([0xA8, 0x40, 0x25, 0xFA, 0xFA, 0x37]), - ether_type: EtherType::Ipv4, + let eth = Ethernet { + destination: MacAddr::from([0xA8, 0x40, 0x25, 0xFF, 0x77, 0x77]), + source: MacAddr::from([0xA8, 0x40, 0x25, 0xFA, 0xFA, 0x37]), + ethertype: Ethertype::IPV4, }; // Verify bytes are written and segment length is correct. - let mut pkt = Packet::alloc_and_expand(14); - let mut wtr = pkt.seg0_wtr(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - assert_eq!(pkt.len(), 14); + let out = eth.emit_vec(); + assert_eq!(out.len(), 14); #[rustfmt::skip] let expected_bytes = vec![ // destination @@ -393,11 +382,9 @@ mod test { // ether type 0x08, 0x00, ]; - assert_eq!(&expected_bytes, pkt.seg_bytes(0)); + assert_eq!(expected_bytes, out); // Verify error when the mblk is not large enough. - let mut pkt = Packet::alloc_and_expand(10); - let mut wtr = pkt.seg0_wtr(); - assert!(wtr.slice_mut(EtherHdr::SIZE).is_err()); + assert!(ValidEthernet::parse(&[0; 10][..]).is_err()); } } diff --git a/lib/opte/src/engine/flow_table.rs b/lib/opte/src/engine/flow_table.rs index 4bd53d27..4b762270 100644 --- a/lib/opte/src/engine/flow_table.rs +++ b/lib/opte/src/engine/flow_table.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2024 Oxide Computer Company //! The flow table implementation. //! @@ -16,9 +16,13 @@ use alloc::boxed::Box; use alloc::collections::BTreeMap; use alloc::ffi::CString; use alloc::string::String; +use alloc::sync::Arc; use alloc::vec::Vec; use core::fmt; use core::num::NonZeroU32; +use core::sync::atomic::AtomicBool; +use core::sync::atomic::AtomicU64; +use core::sync::atomic::Ordering; #[cfg(all(not(feature = "std"), not(test)))] use illumos_sys_hdrs::uintptr_t; use opte_api::OpteError; @@ -80,12 +84,12 @@ pub struct FlowTable { name_c: CString, limit: NonZeroU32, policy: Box>, - map: BTreeMap>, + map: BTreeMap>>, } impl FlowTable where - S: Clone + fmt::Debug + Dump, + S: fmt::Debug + Dump, { /// Add a new entry to the flow table. /// @@ -101,16 +105,39 @@ where } let entry = FlowEntry::new(state); - self.map.insert(flow_id, entry); + self.map.insert(flow_id, entry.into()); Ok(()) } + /// Add a new entry to the flow table, returning a shared refrence to + /// the entry. + /// + /// # Errors + /// + /// If the table is at max capacity, an error is returned and no + /// modification is made to the table. + /// + /// If an entry already exists for this flow, it is overwritten. + pub fn add_and_return( + &mut self, + flow_id: InnerFlowId, + state: S, + ) -> Result>> { + if self.map.len() == self.limit.get() as usize { + return Err(OpteError::MaxCapacity(self.limit.get() as u64)); + } + + let entry = Arc::new(FlowEntry::new(state)); + self.map.insert(flow_id, entry.clone()); + Ok(entry) + } + /// Add a new entry to the flow table while eliding the capacity check. /// /// This is meant for table implementations that enforce their own limit. pub fn add_unchecked(&mut self, flow_id: InnerFlowId, state: S) { let entry = FlowEntry::new(state); - self.map.insert(flow_id, entry); + self.map.insert(flow_id, entry.into()); } // Clear all entries from the flow table. @@ -145,8 +172,8 @@ where port_c, name_c, flowid, - Some(entry.last_hit), - Some(now), + Some(entry.last_hit.load(Ordering::Relaxed)), + Some(now.raw_millis()), ); expired.push(f(entry.state())); return false; @@ -165,19 +192,10 @@ where /// Get a reference to the flow entry for a given flow, if one /// exists. - pub fn get(&mut self, flow_id: &InnerFlowId) -> Option<&FlowEntry> { + pub fn get(&self, flow_id: &InnerFlowId) -> Option<&Arc>> { self.map.get(flow_id) } - /// Get a mutable reference to the flow entry for a given flow, if - /// one exists. - pub fn get_mut( - &mut self, - flow_id: &InnerFlowId, - ) -> Option<&mut FlowEntry> { - self.map.get_mut(flow_id) - } - /// Mark all flow table entries as requiring revalidation after a /// reset or removal of rules. /// @@ -185,8 +203,8 @@ where /// will occupy flowtable space until they are denied or expire. As such /// this method should be used only when the original state (`S`) *must* /// be preserved to ensure correctness. - pub fn mark_dirty(&mut self) { - self.map.values_mut().for_each(|v| v.dirty = true); + pub fn mark_dirty(&self) { + self.map.values().for_each(|v| v.set_dirty()); } pub fn new( @@ -211,7 +229,7 @@ where self.map.len() as u32 } - pub fn remove(&mut self, flow: &InnerFlowId) -> Option> { + pub fn remove(&mut self, flow: &InnerFlowId) -> Option>> { self.map.remove(flow) } } @@ -221,8 +239,8 @@ fn flow_expired_probe( port: &CString, name: &CString, flowid: &InnerFlowId, - last_hit: Option, - now: Option, + last_hit: Option, + now: Option, ) { cfg_if! { if #[cfg(all(not(feature = "std"), not(test)))] { @@ -231,8 +249,8 @@ fn flow_expired_probe( port.as_ptr() as uintptr_t, name.as_ptr() as uintptr_t, flowid, - last_hit.and_then(|m| m.raw_millis()).unwrap_or_default() as usize, - now.and_then(|m| m.raw_millis()).unwrap_or_default() as usize, + last_hit.unwrap_or_default() as usize, + now.unwrap_or_default() as usize, ); } } else if #[cfg(feature = "usdt")] { @@ -240,7 +258,7 @@ fn flow_expired_probe( let port_s = port.to_str().unwrap(); let name_s = name.to_str().unwrap(); crate::opte_provider::flow__expired!( - || (port_s, name_s, flowid.to_string(), 0, 0) + || (port_s, name_s, flowid.to_string(), last_hit.unwrap_or_default(), now.unwrap_or_default()) ); } else { let (_, _, _) = (port, name, flowid); @@ -257,24 +275,27 @@ pub trait Dump { } /// The FlowEntry holds any arbitrary state type `S`. -#[derive(Clone, Debug)] +#[derive(Debug)] pub struct FlowEntry { state: S, /// Number of times this flow has been matched. - hits: u64, + hits: AtomicU64, /// This tracks the last time the flow was matched. - last_hit: Moment, + /// + /// These are raw u64s sourced from a `Moment`, which tracks time + /// in nanoseconds. + last_hit: AtomicU64, /// Records whether this flow predates a rule change, and /// must rerun rule processing before `state` can be used. - dirty: bool, + dirty: AtomicBool, } impl FlowEntry { fn dump(&self) -> S::DumpVal { - self.state.dump(self.hits) + self.state.dump(self.hits.load(Ordering::Relaxed)) } pub fn state_mut(&mut self) -> &mut S { @@ -286,32 +307,56 @@ impl FlowEntry { } pub fn hits(&self) -> u64 { - self.hits + self.hits.load(Ordering::Relaxed) } - pub fn hit(&mut self) { - self.hits += 1; - self.last_hit = Moment::now(); + /// Increments this flow's hit counter and updates its timestamp to + /// the current instant. + pub fn hit(&self) { + self.hit_at(Moment::now()) + } + + /// Increments this flow's hit counter and updates its timestamp to + /// a given timestamp. + /// + /// This is used to minimise calls to `gethrtime` in fastpath + /// operations. Callers *MUST* be certain that expiry logic for this flow + /// entry uses saturating comparisons, particularly if timestamps are + /// sourced before grabbing a lock / processing a packet / any other + /// long-running operation. **This is doubly true if you are not holding + /// the port lock.** + pub(crate) fn hit_at(&self, now: Moment) { + self.hits.fetch_add(1, Ordering::Relaxed); + self.last_hit.store(now.raw(), Ordering::Relaxed); } pub fn is_dirty(&self) -> bool { - self.dirty + self.dirty.load(Ordering::Relaxed) } - pub fn mark_clean(&mut self) { - self.dirty = false + pub fn set_dirty(&self) { + self.dirty.store(true, Ordering::Relaxed) } - pub fn last_hit(&self) -> &Moment { - &self.last_hit + pub fn mark_clean(&self) { + self.dirty.store(false, Ordering::Relaxed) + } + + pub fn last_hit(&self) -> Moment { + Moment::from_raw_nanos(self.last_hit.load(Ordering::Relaxed)) } fn is_expired(&self, now: Moment, ttl: Ttl) -> bool { - ttl.is_expired(self.last_hit, now) + ttl.is_expired(self.last_hit(), now) } fn new(state: S) -> Self { - FlowEntry { state, hits: 0, last_hit: Moment::now(), dirty: false } + FlowEntry { + state, + hits: 0.into(), + last_hit: Moment::now().raw().into(), + dirty: false.into(), + } } } @@ -347,7 +392,7 @@ impl Dump for () { #[cfg(test)] mod test { use super::*; - use crate::engine::ip4::Protocol; + use crate::engine::ip::v4::Protocol; use crate::engine::packet::AddrPair; use crate::engine::packet::FLOW_ID_DEFAULT; use core::time::Duration; diff --git a/lib/opte/src/engine/geneve.rs b/lib/opte/src/engine/geneve.rs index 28a88d13..1074b18a 100644 --- a/lib/opte/src/engine/geneve.rs +++ b/lib/opte/src/engine/geneve.rs @@ -8,24 +8,23 @@ //! //! RFC 8926 Geneve: Generic Network Virtualization Encapsulation -use super::ether::ETHER_TYPE_ETHER; use super::headers::ModifyAction; use super::headers::PushAction; -use super::headers::RawHeader; -use super::packet::PacketReadMut; -use super::packet::ReadErr; -use super::udp::UdpHdr; -use super::udp::UdpMeta; -use crate::d_error::DError; -use core::mem; +use super::packet::MismatchError; +use super::packet::ParseError; +use ingot::geneve::Geneve; +use ingot::geneve::GeneveFlags; +use ingot::geneve::GeneveOpt; +use ingot::geneve::GeneveOptRef; +use ingot::geneve::GeneveRef; +use ingot::geneve::ValidGeneve; +use ingot::types::Header; +use ingot::types::HeaderLen; +use ingot::udp::Udp; pub use opte_api::Vni; use serde::Deserialize; use serde::Serialize; -use zerocopy::AsBytes; -use zerocopy::FromBytes; -use zerocopy::FromZeroes; -use zerocopy::Ref; -use zerocopy::Unaligned; +use zerocopy::ByteSlice; pub const GENEVE_VSN: u8 = 0; pub const GENEVE_VER_MASK: u8 = 0xC0; @@ -40,6 +39,66 @@ pub const GENEVE_OPT_RESERVED_SHIFT: u8 = 5; pub const GENEVE_OPT_RESERVED_MASK: u8 = (1 << GENEVE_OPT_RESERVED_SHIFT) - 1; pub const GENEVE_OPT_CLASS_OXIDE: u16 = 0x0129; +#[inline] +pub fn validate_geneve( + pkt: &ValidGeneve, +) -> Result<(), ParseError> { + if pkt.version() != 0 { + return Err(ParseError::IllegalValue(MismatchError { + location: c"Geneve.version", + expected: 0, + actual: pkt.version() as u64, + })); + } + + if pkt.flags().contains(GeneveFlags::CRITICAL_OPTS) { + match pkt.options_ref() { + ingot::types::FieldRef::Repr(g) => { + for opt in g.iter() { + if !opt.option_type.is_critical() { + continue; + } + + GeneveOption::from_code_and_ty( + opt.class, + opt.option_type.0, + )?; + } + } + ingot::types::FieldRef::Raw(Header::Repr(g)) => { + for opt in g.iter() { + if !opt.option_type.is_critical() { + continue; + } + + GeneveOption::from_code_and_ty( + opt.class, + opt.option_type.0, + )?; + } + } + ingot::types::FieldRef::Raw(Header::Raw(g)) => { + for opt in g.iter(None) { + let Ok(opt) = opt else { + break; + }; + + if !opt.option_type().is_critical() { + continue; + } + + GeneveOption::from_code_and_ty( + opt.class(), + opt.option_type().0, + )?; + } + } + } + } + + Ok(()) +} + #[derive(Clone, Copy, Debug, Default, Eq, Ord, PartialEq, PartialOrd)] pub struct GeneveMeta { pub entropy: u16, @@ -76,7 +135,7 @@ impl PushAction for GenevePush { #[derive(Clone, Debug, Deserialize, Serialize)] pub struct GeneveMod { - vni: Option, + pub vni: Option, } impl ModifyAction for GeneveMod { @@ -88,50 +147,17 @@ impl ModifyAction for GeneveMod { } impl GeneveMeta { - /// Emit only the inner Geneve header. - #[inline] - pub fn emit_inner(&self, dst: &mut [u8]) { - debug_assert_eq!(dst.len(), self.hdr_len_inner()); - let (base, remainder) = dst.split_at_mut(GeneveHdrRaw::SIZE); - let mut raw = GeneveHdrRaw::new_mut(base).unwrap(); - raw.write(GeneveHdrRaw::from(self)); - - raw.ver_opt_len = if self.oxide_external_pkt { - GeneveOption::Oxide(OxideOption::External).emit(remainder) as u8 - } else { - raw.ver_opt_len - }; - } - - /// Emit a full Geneve encapsulation for an inner packet, including - /// UDP. - /// - /// `total_len` should be precomputed as `self.hdr_len() + body.len()`. - #[inline] - pub fn emit(&self, total_len: u16, dst: &mut [u8]) { - let (udp_buf, geneve_buf) = dst.split_at_mut(UdpHdr::SIZE); - let udp = UdpMeta { - src: self.entropy, - dst: GENEVE_PORT, - len: total_len, - csum: [0; 2], - }; - udp.emit(udp_buf); - - self.emit_inner(geneve_buf); - } - /// Return the length of headers needed to fully Geneve-encapsulate /// a packet, including UDP. #[inline] pub fn hdr_len(&self) -> usize { - UdpHdr::SIZE + self.hdr_len_inner() + Udp::MINIMUM_LENGTH + self.hdr_len_inner() } /// Return the length of only the Geneve header. #[inline] pub fn hdr_len_inner(&self) -> usize { - GeneveHdr::BASE_SIZE + self.options_len() + Geneve::MINIMUM_LENGTH + self.options_len() } /// Return the required length (in bytes) needed to store @@ -139,188 +165,13 @@ impl GeneveMeta { pub fn options_len(&self) -> usize { // XXX: This is very special-cased just to enable testing. if self.oxide_external_pkt { - GeneveOptHdrRaw::SIZE + GeneveOpt::MINIMUM_LENGTH } else { 0 } } } -impl<'a> From<(&UdpHdr<'a>, &GeneveHdr<'a>)> for GeneveMeta { - fn from((udp, geneve): (&UdpHdr<'a>, &GeneveHdr<'a>)) -> Self { - let mut out = Self::from(geneve); - out.entropy = udp.src_port(); - out - } -} - -impl<'a> From<&GeneveHdr<'a>> for GeneveMeta { - fn from(geneve: &GeneveHdr<'a>) -> Self { - let mut out = - Self { vni: geneve.vni(), entropy: 0, ..Default::default() }; - - if let Some(ref opts) = geneve.opts { - // XXX: Prevent duplication by making Meta generation fallible - // in same way as Parsing? - // Unwrap safety: Invalid options will have been caught in - // GeneveHdr::parse. - GeneveOption::parse_all(opts, Some(&mut out)).unwrap(); - } - - out - } -} - -pub struct GeneveHdr<'a> { - /// Main body of the Geneve Header. - bytes: Ref<&'a mut [u8], GeneveHdrRaw>, - /// Byte slice occupied by Geneve options. - opts: Option<&'a mut [u8]>, -} - -impl<'a> GeneveHdr<'a> { - pub const BASE_SIZE: usize = mem::size_of::(); - - /// Return the header length, in bytes. - pub fn hdr_len(&self) -> usize { - usize::from(self.bytes.options_len() * 4) + Self::BASE_SIZE - } - - pub fn parse<'b, R>(rdr: &'b mut R) -> Result - where - R: PacketReadMut<'a>, - { - let src = rdr.slice_mut(GeneveHdrRaw::SIZE)?; - let bytes = GeneveHdrRaw::new_mut(src)?; - let opt_len = bytes.options_len_bytes().into(); - let opts = if opt_len != 0 { - let opts_body = rdr.slice_mut(opt_len)?; - - // Check for malformed options. - // XXX: Can we use this to elide some checks when building GeneveMeta? - // Otherwise, currently repeated to filter packets at parse time. - GeneveOption::parse_all(opts_body, None)?; - - Some(opts_body) - } else { - None - }; - - Ok(Self { bytes, opts }) - } - - /// Return the VNI. - pub fn vni(&self) -> Vni { - // Unwrap: We know it's legit because we are making sure the - // MSB is zero. - Vni::new(u32::from_be_bytes([ - 0, - self.bytes.vni[0], - self.bytes.vni[1], - self.bytes.vni[2], - ])) - .unwrap() - } -} - -#[derive(Clone, Copy, Debug, Eq, PartialEq, DError)] -#[derror(leaf_data = GeneveHdrError::derror_data)] -pub enum GeneveHdrError { - BadDstPort { dst_port: u16 }, - BadLength { len: u16 }, - BadVersion { vsn: u8 }, - BadVni { vni: u32 }, - ReadError(ReadErr), - UnexpectedProtocol { protocol: u16 }, - UnknownCriticalOption { class: u16, opt_type: u8 }, -} - -impl From for GeneveHdrError { - fn from(error: ReadErr) -> Self { - GeneveHdrError::ReadError(error) - } -} - -impl GeneveHdrError { - fn derror_data(&self, data: &mut [u64]) { - [data[0], data[1]] = match self { - Self::BadDstPort { dst_port } => [*dst_port as u64, 0], - Self::BadLength { len } => [*len as u64, 0], - Self::BadVersion { vsn } => [*vsn as u64, 0], - Self::BadVni { vni } => [*vni as u64, 0], - Self::UnexpectedProtocol { protocol } => [*protocol as u64, 0], - Self::UnknownCriticalOption { class, opt_type } => { - [*class as u64, *opt_type as u64] - } - _ => [0, 0], - } - } -} - -/// Note: For now we keep this unaligned to be safe. -#[repr(C)] -#[derive(Clone, Debug, FromBytes, AsBytes, FromZeroes, Unaligned)] -pub struct GeneveHdrRaw { - ver_opt_len: u8, - flags: u8, - proto: [u8; 2], - vni: [u8; 3], - reserved: u8, -} - -impl GeneveHdrRaw { - /// Return the length of the Geneve options in 4-byte units. - pub fn options_len(&self) -> u8 { - self.ver_opt_len & GENEVE_OPT_LEN_MASK - } - - /// Return the length of the Geneve options in bytes. - pub fn options_len_bytes(&self) -> u8 { - self.options_len() << GENEVE_OPT_LEN_SCALE_SHIFT - } - - pub fn version(&self) -> u8 { - (self.ver_opt_len & GENEVE_VER_MASK) >> GENEVE_VER_SHIFT - } -} - -impl<'a> RawHeader<'a> for GeneveHdrRaw { - #[inline] - fn new_mut(src: &mut [u8]) -> Result, ReadErr> { - debug_assert_eq!(src.len(), mem::size_of::()); - let hdr = match Ref::new(src) { - Some(hdr) => hdr, - None => return Err(ReadErr::BadLayout), - }; - Ok(hdr) - } -} - -impl Default for GeneveHdrRaw { - fn default() -> Self { - Self { - ver_opt_len: 0x0, - flags: 0x0, - proto: ETHER_TYPE_ETHER.to_be_bytes(), - vni: [0x0; 3], - reserved: 0, - } - } -} - -impl From<&GeneveMeta> for GeneveHdrRaw { - fn from(meta: &GeneveMeta) -> Self { - Self { - ver_opt_len: (meta.options_len() >> GENEVE_OPT_LEN_SCALE_SHIFT) - as u8, - flags: 0x0, - proto: ETHER_TYPE_ETHER.to_be_bytes(), - vni: meta.vni.bytes(), - reserved: 0, - } - } -} - /// Parsed form of an individual Geneve option TLV. /// /// These are grouped by the vendor `class`es understood by OPTE. @@ -330,60 +181,16 @@ pub enum GeneveOption { } impl GeneveOption { - /// Parse and check validity for all options attached to a Geneve - /// header, recording known extensions in a [`GeneveMeta`] if - /// given. - pub fn parse_all( - mut src: &[u8], - mut meta: Option<&mut GeneveMeta>, - ) -> Result<(), GeneveHdrError> { - while !src.is_empty() { - let option = GeneveOption::parse(&mut src)?; - if let Some(ref mut meta) = meta { - #[allow(clippy::single_match)] - match option { - Some(GeneveOption::Oxide(OxideOption::External)) => { - meta.oxide_external_pkt = true - } - _ => {} - } + #[inline] + pub fn from_code_and_ty(class: u16, ty: u8) -> Result { + match (class, ty) { + (GENEVE_OPT_CLASS_OXIDE, v) + if OxideOption::External.opt_type() == v => + { + Ok(Self::Oxide(OxideOption::External)) } + _ => Err(ParseError::UnrecognisedTunnelOpt { class, ty }), } - - Ok(()) - } - - /// Parse an individual Geneve option from a byte slice, advancing the - /// read location. - pub fn parse(src: &mut &[u8]) -> Result, GeneveHdrError> { - let (head, tail) = src.split_at(GeneveOptHdrRaw::SIZE); - let opt_header = GeneveOptHdrRaw::new(head)?; - let needed_bytes = opt_header.options_len_bytes() as usize; - if tail.len() < needed_bytes { - return Err(GeneveHdrError::BadLength { len: needed_bytes as u16 }); - } - - let class = u16::from_be_bytes(opt_header.option_class); - let opt_type = opt_header.option_type(); - - // We don't yet have any options which need body parsing. - // This will skip over them regardless. - let (_body, tail) = tail.split_at(needed_bytes); - *src = tail; - - // XXX: Break this out into a trait/impls to handle more cleanly. - Ok(match (class, opt_header.option_type()) { - (GENEVE_OPT_CLASS_OXIDE, 0) => { - Some(GeneveOption::Oxide(OxideOption::External)) - } - _ if opt_header.is_critical() => { - return Err(GeneveHdrError::UnknownCriticalOption { - class, - opt_type, - }) - } - _ => None, - }) } /// Return the wire-length of this option in bytes, including headers. @@ -392,24 +199,6 @@ impl GeneveOption { GeneveOption::Oxide(o) => o.len(), } } - - /// Emit an option, returning the number of 4-byte chunks written. - pub fn emit(&self, dst: &mut [u8]) -> usize { - let mut raw = GeneveOptHdrRaw::new_mut(dst).unwrap(); - - let (class, opt_type, len) = match self { - Self::Oxide(o) => ( - GENEVE_OPT_CLASS_OXIDE, - o.opt_type(), - o.len() >> GENEVE_OPT_LEN_SCALE_SHIFT, - ), - }; - raw.option_class = class.to_be_bytes(); - raw.crit_type = opt_type; - raw.reserved_len = len as u8; - - len + 1 - } } /// Geneve options defined by Oxide, [`GENEVE_OPT_CLASS_OXIDE`]. @@ -430,75 +219,104 @@ impl OxideOption { } /// Return the option type number. - pub fn opt_type(&self) -> u8 { + pub const fn opt_type(&self) -> u8 { match self { OxideOption::External => 0, } } } -/// Field layout for a single Geneve option. -/// -/// Note: Unaligned on the same rationale as [`GeneveHdrRaw`]. -#[repr(C)] -#[derive(Clone, Debug, FromBytes, AsBytes, FromZeroes, Unaligned)] -pub struct GeneveOptHdrRaw { - option_class: [u8; 2], - crit_type: u8, - reserved_len: u8, -} - -impl GeneveOptHdrRaw { - /// Indicates whether this option is critical, and MUST be dropped - /// if not understood by a tunnel endpoint. - pub fn is_critical(&self) -> bool { - (self.crit_type >> GENEVE_OPT_CRIT_SHIFT) != 0 +// We probably want a more general way to retrieve all facts we care about +// from the geneve options -- we only have the one today, however. +#[inline] +pub fn geneve_has_oxide_external(pkt: &Geneve) -> bool { + let mut out = false; + for opt in pkt.options.iter() { + out = matches!( + GeneveOption::from_code_and_ty(opt.class, opt.option_type.0,), + Ok(GeneveOption::Oxide(OxideOption::External)) + ); + if out { + break; + } } - /// Return the type of this header. - pub fn option_type(&self) -> u8 { - self.crit_type & GENEVE_OPT_TYPE_MASK - } + out +} - /// Return the length of this Geneve option's body in 4-byte units. - pub fn options_len(&self) -> u8 { - self.reserved_len & GENEVE_OPT_RESERVED_MASK +#[inline] +pub fn valid_geneve_has_oxide_external( + pkt: &ValidGeneve, +) -> bool { + let mut out = false; + + match pkt.options_ref() { + ingot::types::FieldRef::Repr(g) => { + for opt in g.iter() { + out = matches!( + GeneveOption::from_code_and_ty( + opt.class, + opt.option_type.0, + ), + Ok(GeneveOption::Oxide(OxideOption::External)) + ); + if out { + break; + } + } + } + ingot::types::FieldRef::Raw(Header::Repr(g)) => { + for opt in g.iter() { + out = matches!( + GeneveOption::from_code_and_ty( + opt.class, + opt.option_type.0, + ), + Ok(GeneveOption::Oxide(OxideOption::External)) + ); + if out { + break; + } + } + } + ingot::types::FieldRef::Raw(Header::Raw(g)) => { + for opt in g.iter(None) { + let Ok(opt) = opt else { + break; + }; + + out = matches!( + GeneveOption::from_code_and_ty( + opt.class(), + opt.option_type().0, + ), + Ok(GeneveOption::Oxide(OxideOption::External)) + ); + if out { + break; + } + } + } } - /// Return the length of the Geneve options in bytes. - pub fn options_len_bytes(&self) -> u8 { - self.options_len() << GENEVE_OPT_LEN_SCALE_SHIFT - } + out } -impl<'a> RawHeader<'a> for GeneveOptHdrRaw { - #[inline] - fn new_mut(src: &mut [u8]) -> Result, ReadErr> { - debug_assert_eq!(src.len(), mem::size_of::()); - let hdr = match Ref::new(src) { - Some(hdr) => hdr, - None => return Err(ReadErr::BadLayout), - }; - Ok(hdr) - } - - #[inline] - fn new(src: &[u8]) -> Result, ReadErr> { - debug_assert_eq!(src.len(), mem::size_of::()); - let hdr = match Ref::new(src) { - Some(hdr) => hdr, - None => return Err(ReadErr::BadLayout), - }; - Ok(hdr) - } +#[inline(always)] +pub fn geneve_opt_is_oxide_external( + opt: &impl GeneveOptRef, +) -> bool { + opt.class() == GENEVE_OPT_CLASS_OXIDE + && opt.option_type().0 == OxideOption::External.opt_type() } #[cfg(test)] mod test { - use core::matches; - use super::*; - use crate::engine::packet::Packet; + use crate::engine::headers::EncapMeta; + use ingot::types::Emit; + use ingot::types::HeaderParse; + use ingot::udp::ValidUdp; #[test] fn emit_no_opts() { @@ -510,13 +328,9 @@ mod test { }; let len = geneve.hdr_len(); - let mut pkt = Packet::alloc_and_expand(len); - let mut wtr = pkt.seg0_wtr(); - geneve.emit( - geneve.hdr_len().try_into().unwrap(), - wtr.slice_mut(len).unwrap(), - ); - assert_eq!(len, pkt.len()); + let emitted = EncapMeta::Geneve(geneve).to_vec(); + assert_eq!(len, emitted.len()); + #[rustfmt::skip] let expected_bytes = vec![ // source @@ -536,7 +350,7 @@ mod test { // vni + reserved 0x00, 0x04, 0xD2, 0x00 ]; - assert_eq!(&expected_bytes, pkt.seg_bytes(0)); + assert_eq!(expected_bytes, emitted); } #[test] @@ -548,13 +362,9 @@ mod test { }; let len = geneve.hdr_len(); - let mut pkt = Packet::alloc_and_expand(len); - let mut wtr = pkt.seg0_wtr(); - geneve.emit( - geneve.hdr_len().try_into().unwrap(), - wtr.slice_mut(len).unwrap(), - ); - assert_eq!(len, pkt.len()); + let emitted = EncapMeta::Geneve(geneve).to_vec(); + assert_eq!(len, emitted.len()); + #[rustfmt::skip] let expected_bytes = vec![ // source @@ -581,7 +391,7 @@ mod test { // rsvd + len 0x00, ]; - assert_eq!(&expected_bytes, pkt.seg_bytes(0)); + assert_eq!(&expected_bytes, &emitted[..]); } #[test] @@ -613,64 +423,20 @@ mod test { // rsvd + len 0x00, ]; - let mut pkt = Packet::copy(&buf); - let mut reader = pkt.get_rdr_mut(); - let udp = UdpHdr::parse(&mut reader).unwrap(); - let header = GeneveHdr::parse(&mut reader).unwrap(); - - // Previously, the `Ipv6Meta::total_len` method double-counted the - // extension header length. Assert we don't do that here. - let meta = GeneveMeta::from((&udp, &header)); - assert_eq!( - meta.entropy, - u16::from_be_bytes(buf[0..2].try_into().unwrap()) - ); - assert!(meta.oxide_external_pkt); - } - #[test] - fn bad_opt_len_fails() { - // Create a packet with one extension header. - #[rustfmt::skip] - let buf = vec![ - // source - 0x1E, 0x61, - // dest - 0x17, 0xC1, - // length - 0x00, 0x14, - // csum - 0x00, 0x00, - // ver + BAD opt len - 0x01, - // flags - 0x00, - // proto - 0x65, 0x58, - // vni + reserved - 0x00, 0x04, 0xD2, 0x00, + let (.., rem) = ValidUdp::parse(&buf[..]).unwrap(); + let (geneve, ..) = ValidGeneve::parse(rem).unwrap(); - // option class - 0x01, 0x29, - // crt + type - 0x01, - // rsvd + len - 0x01, - // body - 0x00, 0x00, 0x00, 0x00 - ]; - let mut pkt = Packet::copy(&buf); - let mut reader = pkt.get_rdr_mut(); - UdpHdr::parse(&mut reader).unwrap(); - assert!(matches!( - GeneveHdr::parse(&mut reader), - Err(GeneveHdrError::BadLength { .. }), - )); + validate_geneve(&geneve).unwrap(); + + assert!(valid_geneve_has_oxide_external(&geneve)); } #[test] fn unknown_crit_option_fails() { - // Create a packet with one extension header. + // Create a packet with one extension header with the critical + // flag set. + // We do not unsdertand this extension, so must drop the packet. #[rustfmt::skip] let buf = vec![ // source @@ -697,21 +463,22 @@ mod test { // rsvd + len 0x00, ]; - let mut pkt = Packet::copy(&buf); - let mut reader = pkt.get_rdr_mut(); - UdpHdr::parse(&mut reader).unwrap(); + + let (_udp, _, rem) = ValidUdp::parse(&buf[..]).unwrap(); + let (geneve, ..) = ValidGeneve::parse(rem).unwrap(); + assert!(matches!( - GeneveHdr::parse(&mut reader), - Err(GeneveHdrError::UnknownCriticalOption { - class: 0xff_ff, - opt_type: 0 - }), + validate_geneve(&geneve), + Err(ParseError::UnrecognisedTunnelOpt { class: 0xffff, ty: 0x80 }), )); } #[test] fn parse_multi_opt() { - // Create a packet with one extension header. + // Create a packet with three extension headers. + // None are critical, so the fact that we + // We shoukld also be able to extract info on the options we *do* + // care about. #[rustfmt::skip] let buf = vec![ // source @@ -756,18 +523,11 @@ mod test { // body 0x00, 0x00, 0x00, 0x00, ]; - let mut pkt = Packet::copy(&buf); - let mut reader = pkt.get_rdr_mut(); - let udp = UdpHdr::parse(&mut reader).unwrap(); - let header = GeneveHdr::parse(&mut reader).unwrap(); - - // Previously, the `Ipv6Meta::total_len` method double-counted the - // extension header length. Assert we don't do that here. - let meta = GeneveMeta::from((&udp, &header)); - assert_eq!( - meta.entropy, - u16::from_be_bytes(buf[0..2].try_into().unwrap()) - ); - assert!(meta.oxide_external_pkt); + + let (.., rem) = ValidUdp::parse(&buf[..]).unwrap(); + let (geneve, ..) = ValidGeneve::parse(rem).unwrap(); + + validate_geneve(&geneve).unwrap(); + assert!(valid_geneve_has_oxide_external(&geneve)); } } diff --git a/lib/opte/src/engine/headers.rs b/lib/opte/src/engine/headers.rs index 646c0217..83fa6326 100644 --- a/lib/opte/src/engine/headers.rs +++ b/lib/opte/src/engine/headers.rs @@ -2,69 +2,50 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2022 Oxide Computer Company +// Copyright 2024 Oxide Computer Company -//! Header metadata combinations for IP, ULP, and Encap. +//! Header metadata modifications for IP, ULP, and Encap. -use super::checksum::Checksum; -use super::geneve::GeneveHdr; use super::geneve::GeneveMeta; use super::geneve::GeneveMod; use super::geneve::GenevePush; -use super::icmp::IcmpHdr; -use super::icmp::Icmpv4Meta; -use super::icmp::Icmpv6Meta; -use super::ip4::Ipv4Hdr; -use super::ip4::Ipv4Meta; -use super::ip4::Ipv4Mod; -use super::ip4::Ipv4Push; -use super::ip6::Ipv6Hdr; -use super::ip6::Ipv6Meta; -use super::ip6::Ipv6Mod; -use super::ip6::Ipv6Push; -use super::packet::ReadErr; -use super::tcp::TcpHdr; -use super::tcp::TcpMeta; +use super::geneve::OxideOption; +use super::geneve::GENEVE_OPT_CLASS_OXIDE; +use super::geneve::GENEVE_PORT; +use super::ip::v4::Ipv4Mod; +use super::ip::v4::Ipv4Push; +use super::ip::v6::Ipv6Mod; +use super::ip::v6::Ipv6Push; use super::tcp::TcpMod; use super::tcp::TcpPush; -use super::udp::UdpHdr; -use super::udp::UdpMeta; use super::udp::UdpMod; use super::udp::UdpPush; -use crate::engine::icmp::QueryEcho; use core::fmt; +use ingot::ethernet::Ethertype; +use ingot::geneve::Geneve; +use ingot::geneve::GeneveMut; +use ingot::geneve::GeneveOpt; +use ingot::geneve::GeneveOptionType; +use ingot::geneve::ValidGeneve; +use ingot::types::util::Repeated; +use ingot::types::Emit; +use ingot::types::Header; +use ingot::types::HeaderLen; +use ingot::types::InlineHeader; +use ingot::udp::Udp; +use ingot::udp::ValidUdp; pub use opte_api::IpAddr; pub use opte_api::IpCidr; pub use opte_api::Protocol; pub use opte_api::Vni; use serde::Deserialize; use serde::Serialize; -use zerocopy::Ref; +use zerocopy::ByteSlice; +use zerocopy::ByteSliceMut; pub const AF_INET: i32 = 2; pub const AF_INET6: i32 = 26; -/// A raw header. -/// -/// A raw header is the most basic and raw representation of a given -/// header type. A raw header value preserves the bytes as they are, -/// in network order. A raw header undergoes no validation of header -/// fields. A raw header represents only the base header, eschewing -/// any options or extensions. -pub trait RawHeader<'a>: Sized { - const SIZE: usize = core::mem::size_of::(); - - /// Create a mutable, zerocopy version of the raw header from the - /// src. - fn new_mut(src: &mut [u8]) -> Result, ReadErr>; - - /// Create an immutable, zerocopy version of the raw header from the - /// src. - fn new(_src: &[u8]) -> Result, ReadErr> { - Err(ReadErr::NotImplemented) - } -} - pub trait PushAction { fn push(&self) -> HdrM; } @@ -75,137 +56,12 @@ pub trait ModifyAction { fn modify(&self, meta: &mut HdrM); } -#[derive(Clone, Copy, Debug)] -pub enum IpType { - Ipv4, - Ipv6, -} - -#[derive(Debug)] -pub enum IpHdr<'a> { - Ip4(Ipv4Hdr<'a>), - Ip6(Ipv6Hdr<'a>), -} - -impl<'a> IpHdr<'a> { - pub fn pseudo_csum(&self) -> Checksum { - match self { - Self::Ip4(ip4) => ip4.pseudo_csum(), - Self::Ip6(ip6) => ip6.pseudo_csum(), - } - } -} - -impl<'a> From> for IpHdr<'a> { - fn from(ip4: Ipv4Hdr<'a>) -> Self { - Self::Ip4(ip4) - } -} - -impl<'a> From> for IpHdr<'a> { - fn from(ip6: Ipv6Hdr<'a>) -> Self { - Self::Ip6(ip6) - } -} - -#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd, Copy)] -pub enum IpMeta { - Ip4(Ipv4Meta), - Ip6(Ipv6Meta), -} - -impl IpMeta { - /// Return the checksum value. - pub fn csum(&self) -> [u8; 2] { - match self { - Self::Ip4(ip4) => ip4.csum, - // IPv6 has no checksum. - Self::Ip6(_) => [0; 2], - } - } - - pub fn has_csum(&self) -> bool { - match self { - Self::Ip4(ip4) => ip4.csum != [0; 2], - // IPv6 has no checksum. - Self::Ip6(_) => false, - } - } - - pub fn emit(&self, dst: &mut [u8]) { - match self { - Self::Ip4(ip4) => ip4.emit(dst), - Self::Ip6(ip6) => ip6.emit(dst), - } - } - - pub fn hdr_len(&self) -> usize { - match self { - Self::Ip4(ip4) => ip4.hdr_len(), - Self::Ip6(ip6) => ip6.hdr_len(), - } - } - - /// Get the [`Ipv4Meta`], if this is IPv4. - pub fn ip4(&self) -> Option<&Ipv4Meta> { - match self { - Self::Ip4(meta) => Some(meta), - _ => None, - } - } - - /// Get the [`Ipv6Meta`], if this is IPv6. - pub fn ip6(&self) -> Option<&Ipv6Meta> { - match self { - Self::Ip6(meta) => Some(meta), - _ => None, - } - } - - /// Get the [`Protocol`]. - pub fn proto(&self) -> Protocol { - match self { - Self::Ip4(meta) => meta.proto, - Self::Ip6(meta) => meta.proto, - } - } - - pub fn pseudo_csum(&self) -> Checksum { - match self { - Self::Ip4(ip4) => ip4.pseudo_csum(), - Self::Ip6(ip6) => ip6.pseudo_csum(), - } - } -} - -impl From for IpMeta { - fn from(ip4: Ipv4Meta) -> Self { - IpMeta::Ip4(ip4) - } -} - -impl From for IpMeta { - fn from(ip6: Ipv6Meta) -> Self { - IpMeta::Ip6(ip6) - } -} - #[derive(Clone, Copy, Debug, Deserialize, Serialize)] pub enum IpPush { Ip4(Ipv4Push), Ip6(Ipv6Push), } -impl PushAction for IpPush { - fn push(&self) -> IpMeta { - match self { - Self::Ip4(spec) => IpMeta::from(spec.push()), - - Self::Ip6(spec) => IpMeta::from(spec.push()), - } - } -} - impl From for IpPush { fn from(ip4: Ipv4Push) -> Self { Self::Ip4(ip4) @@ -248,27 +104,6 @@ impl IpMod { } } -impl ModifyAction for IpMod { - fn modify(&self, meta: &mut IpMeta) { - match (self, meta) { - (IpMod::Ip4(spec), IpMeta::Ip4(meta)) => { - spec.modify(meta); - } - - (IpMod::Ip6(spec), IpMeta::Ip6(meta)) => { - spec.modify(meta); - } - - (meta, spec) => { - panic!( - "Different IP versions for meta and spec: {:?} {:?}", - meta, spec - ); - } - } - } -} - impl From for IpMod { fn from(ip4: Ipv4Mod) -> Self { Self::Ip4(ip4) @@ -281,16 +116,6 @@ impl From for IpMod { } } -pub enum EncapHdr<'a> { - Geneve(GeneveHdr<'a>), -} - -impl<'a> From> for EncapHdr<'a> { - fn from(hdr: GeneveHdr<'a>) -> Self { - Self::Geneve(hdr) - } -} - #[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)] pub enum EncapMeta { Geneve(GeneveMeta), @@ -346,145 +171,202 @@ impl EncapMeta { } } -#[derive(Debug)] -pub enum UlpHdr<'a> { - Icmpv4(IcmpHdr<'a>), - Icmpv6(IcmpHdr<'a>), - Tcp(TcpHdr<'a>), - Udp(UdpHdr<'a>), -} - -impl<'a> UlpHdr<'a> { - pub fn csum_minus_hdr(&self) -> Option { - match self { - Self::Icmpv4(icmp) | Self::Icmpv6(icmp) => icmp.csum_minus_hdr(), - Self::Tcp(tcp) => tcp.csum_minus_hdr(), - Self::Udp(udp) => udp.csum_minus_hdr(), - } - } - - pub fn hdr_len(&self) -> usize { - match self { - Self::Icmpv4(icmp) | Self::Icmpv6(icmp) => icmp.hdr_len(), - Self::Tcp(tcp) => tcp.hdr_len(), - Self::Udp(udp) => udp.hdr_len(), - } - } - - pub fn set_pay_len(&mut self, len: usize) { - match self { - // Nothing to do for ICMP(v6) or TCP which determine payload len - // from IP header. - Self::Icmpv4(_) | Self::Icmpv6(_) => (), - Self::Tcp(_tcp) => (), - Self::Udp(udp) => udp.set_pay_len(len as u16), +impl HeaderActionModify + for InlineHeader> +{ + #[inline] + fn run_modify( + &mut self, + mod_spec: &EncapMod, + ) -> Result<(), HeaderActionError> { + match (self, mod_spec) { + ( + InlineHeader::Repr(EncapMeta::Geneve(g)), + EncapMod::Geneve(mod_spec), + ) => { + if let Some(vni) = mod_spec.vni { + g.vni = vni; + } + } + ( + InlineHeader::Raw(ValidEncapMeta::Geneve(_, g)), + EncapMod::Geneve(mod_spec), + ) => { + if let Some(vni) = mod_spec.vni { + g.set_vni(vni); + } + } } - } - pub fn set_total_len(&mut self, len: usize) { - match self { - // Nothing to do for ICMP(v6) or TCP which determine payload len - // from IP header. - Self::Icmpv4(_) | Self::Icmpv6(_) => (), - Self::Tcp(_tcp) => (), - Self::Udp(udp) => udp.set_len(len as u16), - } + Ok(()) } +} - pub fn udp(&self) -> Option<&UdpHdr> { - match self { - Self::Udp(udp) => Some(udp), - _ => None, - } - } +impl HasInnerCksum + for InlineHeader> +{ + const HAS_CKSUM: bool = false; } -impl<'a> From> for UlpHdr<'a> { - fn from(tcp: TcpHdr<'a>) -> Self { - UlpHdr::Tcp(tcp) +impl From for Header> { + #[inline] + fn from(value: EncapMeta) -> Self { + Header::Repr(value.into()) } } -impl<'a> From> for UlpHdr<'a> { - fn from(udp: UdpHdr<'a>) -> Self { - Self::Udp(udp) +impl From + for InlineHeader> +{ + #[inline] + fn from(value: EncapMeta) -> Self { + InlineHeader::Repr(value) } } -#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)] -pub enum UlpMeta { - Icmpv4(Icmpv4Meta), - Icmpv6(Icmpv6Meta), - Tcp(TcpMeta), - Udp(UdpMeta), +pub enum ValidEncapMeta { + Geneve(ValidUdp, ValidGeneve), } -impl UlpMeta { - /// Return the checksum value. - pub fn csum(&self) -> [u8; 2] { - match self { - Self::Icmpv4(icmp) => icmp.csum, - Self::Icmpv6(icmp6) => icmp6.csum, - Self::Tcp(tcp) => tcp.csum, - Self::Udp(udp) => udp.csum, - } - } - - pub fn has_csum(&self) -> bool { - self.csum() != [0; 2] +impl Emit for EncapMeta { + #[inline] + fn emit_raw(&self, buf: V) -> usize { + SizeHoldingEncap { encapped_len: 0, meta: self }.emit_raw(buf) } - pub fn is_pseudoheader_in_csum(&self) -> bool { - !matches!(self, Self::Icmpv4(_)) + #[inline] + fn needs_emit(&self) -> bool { + true } +} - /// Return the destination port, if any. - pub fn dst_port(&self) -> Option { +impl Emit for ValidEncapMeta { + #[inline] + fn emit_raw(&self, buf: V) -> usize { match self { - Self::Icmpv4(_) => None, - Self::Icmpv6(_) => None, - Self::Tcp(tcp) => Some(tcp.dst), - Self::Udp(udp) => Some(udp.dst), + ValidEncapMeta::Geneve(u, g) => (u, g).emit_raw(buf), } } - pub fn hdr_len(&self) -> usize { + #[inline] + fn needs_emit(&self) -> bool { match self { - Self::Icmpv4(icmp) => icmp.hdr_len(), - Self::Icmpv6(icmp6) => icmp6.hdr_len(), - Self::Tcp(tcp) => tcp.hdr_len(), - Self::Udp(udp) => udp.hdr_len(), + ValidEncapMeta::Geneve(u, g) => u.needs_emit() && g.needs_emit(), } } +} + +impl HeaderLen for EncapMeta { + const MINIMUM_LENGTH: usize = Udp::MINIMUM_LENGTH + Geneve::MINIMUM_LENGTH; - /// Return a pseudo port used to differentiate flows if the - /// ULP does not include source/dest ports. - pub fn pseudo_port(&self) -> Option { + #[inline] + fn packet_length(&self) -> usize { match self { - Self::Icmpv4(icmp) => icmp.echo_id(), - Self::Icmpv6(icmp6) => icmp6.echo_id(), - _ => None, + EncapMeta::Geneve(g) => { + Self::MINIMUM_LENGTH + + g.oxide_external_pkt.then_some(4).unwrap_or_default() + } } } +} + +impl HeaderLen for ValidEncapMeta { + const MINIMUM_LENGTH: usize = Udp::MINIMUM_LENGTH + Geneve::MINIMUM_LENGTH; - /// Return the source port, if any. - pub fn src_port(&self) -> Option { + #[inline] + fn packet_length(&self) -> usize { match self { - Self::Icmpv4(_) => None, - Self::Icmpv6(_) => None, - Self::Tcp(tcp) => Some(tcp.src), - Self::Udp(udp) => Some(udp.src), + ValidEncapMeta::Geneve(u, g) => { + u.packet_length() + g.packet_length() + } } } +} - pub fn emit(&self, dst: &mut [u8]) { - match self { - Self::Icmpv4(icmp) => icmp.emit(dst), - Self::Icmpv6(icmp6) => icmp6.emit(dst), - Self::Tcp(tcp) => tcp.emit(dst), - Self::Udp(udp) => udp.emit(dst), +pub struct SizeHoldingEncap<'a> { + pub encapped_len: u16, + pub meta: &'a EncapMeta, +} + +// SAFETY: All Emit writes are done via ingot-generated methods, +// and we don't read any element of `buf` in `SizeHoldingEncap::emit_raw`. +unsafe impl ingot::types::EmitDoesNotRelyOnBufContents + for SizeHoldingEncap<'_> +{ +} + +impl HeaderLen for SizeHoldingEncap<'_> { + const MINIMUM_LENGTH: usize = EncapMeta::MINIMUM_LENGTH; + + #[inline] + fn packet_length(&self) -> usize { + self.meta.packet_length() + } +} + +impl Emit for SizeHoldingEncap<'_> { + #[inline] + fn emit_raw(&self, buf: V) -> usize { + match self.meta { + EncapMeta::Geneve(g) => { + let mut opts = vec![]; + + if g.oxide_external_pkt { + opts.push(GeneveOpt { + class: GENEVE_OPT_CLASS_OXIDE, + option_type: GeneveOptionType( + OxideOption::External.opt_type(), + ), + ..Default::default() + }); + } + + let options = Repeated::new(opts); + let opt_len_unscaled = options.packet_length(); + let opt_len = (opt_len_unscaled >> 2) as u8; + + let geneve = Geneve { + protocol_type: Ethertype::ETHERNET, + vni: g.vni, + opt_len, + options, + ..Default::default() + }; + + let length = self.encapped_len + + (Udp::MINIMUM_LENGTH + geneve.packet_length()) as u16; + + // It's worth noting that we have a zero UDP checksum here, + // which holds true even if we're sending out over IPv6. + // Ordinarily IPv6 requires a full checksum compute for UDP, + // however RFCs 6935 & 6936 make an optional exception for + // tunnelled transports (e.g., Geneve) over UDP/v6. + // Generally OPTE is covered on validity of this: + // * We preserve cksums on inner messages, so their headers and + // payloads are *always* valid. + // * OPTE ports will only accept inbound packets with correct + // Ethernet dest, next headers, L3 dest, and VNI. + // Misdelivery on the basis of IPv6 (or other) corruption + // will lead to a drop. + // This is also reflected in RFC 8200 §8.1 (IPv6 2017). + ( + Udp { + source: g.entropy, + destination: GENEVE_PORT, + length, + ..Default::default() + }, + &geneve, + ) + .emit_raw(buf) + } } } + + #[inline] + fn needs_emit(&self) -> bool { + true + } } #[derive( @@ -495,16 +377,6 @@ pub enum UlpPush { Udp(UdpPush), } -impl PushAction for UlpPush { - fn push(&self) -> UlpMeta { - match self { - Self::Tcp(tcp) => UlpMeta::from(tcp.push()), - - Self::Udp(udp) => UlpMeta::from(udp.push()), - } - } -} - impl From for UlpPush { fn from(tcp: TcpPush) -> Self { UlpPush::Tcp(tcp) @@ -523,24 +395,6 @@ pub enum UlpMod { Udp(UdpMod), } -impl ModifyAction for UlpMod { - fn modify(&self, meta: &mut UlpMeta) { - match (self, meta) { - (Self::Tcp(spec), UlpMeta::Tcp(meta)) => { - spec.modify(meta); - } - - (Self::Udp(spec), UlpMeta::Udp(meta)) => { - spec.modify(meta); - } - - (spec, meta) => { - panic!("differeing ULP meta and spec: {:?} {:?}", meta, spec); - } - } - } -} - impl From for UlpMod { fn from(tcp: TcpMod) -> Self { UlpMod::Tcp(tcp) @@ -553,81 +407,81 @@ impl From for UlpMod { } } -impl From for UlpMeta { - fn from(icmp: Icmpv4Meta) -> Self { - UlpMeta::Icmpv4(icmp) - } -} - -impl From for UlpMeta { - fn from(icmp6: Icmpv6Meta) -> Self { - UlpMeta::Icmpv6(icmp6) - } +pub trait HasInnerCksum { + const HAS_CKSUM: bool; } -impl From for UlpMeta { - fn from(tcp: TcpMeta) -> Self { - UlpMeta::Tcp(tcp) - } -} - -impl From for UlpMeta { - fn from(udp: UdpMeta) -> Self { - UlpMeta::Udp(udp) - } +/// Transform a header layer using an OPTE action. +pub trait Transform: HasInnerCksum +where + P: PushAction + fmt::Debug, + M: fmt::Debug, +{ + /// Modify/push/pop self, dependent on a given action. + /// + /// Returns whether we will need a checksum recompute on the target field + /// if it is still present. + fn act_on( + &mut self, + action: &HeaderAction, + ) -> Result; } -impl<'a> From<&UlpHdr<'a>> for UlpMeta { - fn from(ulp: &UlpHdr) -> Self { - match ulp { - UlpHdr::Icmpv4(icmp) => UlpMeta::Icmpv4(Icmpv4Meta::from(icmp)), - UlpHdr::Icmpv6(icmp6) => UlpMeta::Icmpv6(Icmpv6Meta::from(icmp6)), - UlpHdr::Tcp(tcp) => UlpMeta::Tcp(TcpMeta::from(tcp)), - UlpHdr::Udp(udp) => UlpMeta::Udp(UdpMeta::from(udp)), - } - } +impl HasInnerCksum for Option { + const HAS_CKSUM: bool = T::HAS_CKSUM; } -impl HeaderActionModify for UlpMeta { - fn run_modify(&mut self, spec: &UlpMetaModify) { - match self { - UlpMeta::Icmpv4(icmp_meta) => icmp_meta.run_modify(spec), - UlpMeta::Icmpv6(icmp6_meta) => icmp6_meta.run_modify(spec), - UlpMeta::Tcp(tcp_meta) => tcp_meta.run_modify(spec), - UlpMeta::Udp(udp_meta) => udp_meta.run_modify(spec), +impl Transform for X +where + P: PushAction + fmt::Debug, + M: fmt::Debug, + X: HeaderActionModify + From + HasInnerCksum, +{ + #[inline] + fn act_on( + &mut self, + action: &HeaderAction, + ) -> Result { + match action { + HeaderAction::Ignore => Ok(false), + HeaderAction::Push(p) => { + *self = p.push().into(); + Ok(Self::HAS_CKSUM) + } + HeaderAction::Pop => Err(HeaderActionError::CantPop), + HeaderAction::Modify(m) => { + self.run_modify(m)?; + Ok(Self::HAS_CKSUM) + } } } } /// The action to take for a particular header transposition. -#[derive(Clone, Debug, Default, Deserialize, Serialize)] -pub enum HeaderAction -where - P: PushAction + fmt::Debug, - M: ModifyAction + fmt::Debug, -{ - Push(P, core::marker::PhantomData), +#[derive(Copy, Clone, Debug, Default, Deserialize, Serialize)] +pub enum HeaderAction { + Push(P), Pop, - Modify(M, core::marker::PhantomData), + Modify(M), #[default] Ignore, } -impl HeaderAction -where - P: PushAction + fmt::Debug, - M: ModifyAction + fmt::Debug, -{ - pub fn run(&self, meta: &mut Option) -> Result<(), HeaderActionError> { +impl HeaderAction { + pub fn run(&self, meta: &mut Option) -> Result<(), HeaderActionError> + where + P: PushAction + fmt::Debug, + M: ModifyAction + fmt::Debug, + { match self { Self::Ignore => (), - Self::Modify(action, _) => match meta { + Self::Modify(action) => match meta { Some(meta) => action.modify(meta), None => return Err(HeaderActionError::MissingHeader), }, - Self::Push(action, _) => { + Self::Push(action) => { meta.replace(action.push()); } @@ -640,19 +494,46 @@ where Ok(()) } + + pub fn act_on_option( + &self, + target: &mut Option, + ) -> Result + where + P: PushAction + fmt::Debug, + M: fmt::Debug, + X: Transform + From, + X: HeaderActionModify + HasInnerCksum, + { + match (self, target) { + (HeaderAction::Ignore, _) => Ok(false), + (HeaderAction::Push(p), a) => { + *a = Some(p.push().into()); + Ok(X::HAS_CKSUM) + } + (HeaderAction::Pop, a) => { + *a = None; + Ok(X::HAS_CKSUM) + } + (a @ HeaderAction::Modify(..), Some(h)) => h.act_on(a), + (_, None) => Err(HeaderActionError::MissingHeader), + } + } } #[derive(Clone, Debug)] pub enum HeaderActionError { MissingHeader, + CantPop, + MalformedExtension, } pub trait ModifyActionArg {} /// A header type that allows itself to be modified via a /// [`ModifyActionArg`] specification. -pub trait HeaderActionModify { - fn run_modify(&mut self, mod_spec: &M); +pub trait HeaderActionModify { + fn run_modify(&mut self, mod_spec: &M) -> Result<(), HeaderActionError>; } #[derive(Clone, Debug, Default, Deserialize, Serialize)] @@ -679,18 +560,22 @@ pub enum UlpHeaderAction { } impl UlpHeaderAction { - pub fn run

(&self, meta: &mut Option

) -> Result<(), HeaderActionError> + pub fn run

( + &self, + meta: &mut Option

, + ) -> Result where P: HeaderActionModify, { match self { - Self::Ignore => (), + Self::Ignore => Ok(false), Self::Modify(arg) => match meta { - Some(meta) => meta.run_modify(arg), - None => return Err(HeaderActionError::MissingHeader), + Some(meta) => { + meta.run_modify(arg)?; + Ok(true) + } + None => Err(HeaderActionError::MissingHeader), }, } - - Ok(()) } } diff --git a/lib/opte/src/engine/icmp/mod.rs b/lib/opte/src/engine/icmp/mod.rs index 1170ce74..c44e6fd5 100644 --- a/lib/opte/src/engine/icmp/mod.rs +++ b/lib/opte/src/engine/icmp/mod.rs @@ -9,21 +9,6 @@ pub mod v4; pub mod v6; -use super::checksum::Checksum as OpteCsum; -use super::checksum::HeaderChecksum; -use super::headers::RawHeader; -use super::packet::PacketReadMut; -use super::packet::ReadErr; -use crate::d_error::DError; -use crate::engine::ether::EtherHdr; -use crate::engine::ether::EtherMeta; -use crate::engine::ether::EtherType; -use crate::engine::headers::HeaderActionModify; -use crate::engine::headers::UlpMetaModify; -use crate::engine::packet::Packet; -use crate::engine::packet::PacketMeta; -use crate::engine::packet::PacketRead; -use crate::engine::packet::PacketReader; use crate::engine::predicate::DataPredicate; use crate::engine::predicate::EtherAddrMatch; use crate::engine::predicate::IpProtoMatch; @@ -35,66 +20,14 @@ use crate::engine::rule::HairpinAction; use alloc::vec::Vec; use core::fmt; use core::fmt::Display; +use ingot::types::primitives::u16be; +use ingot::Ingot; pub use opte_api::ip::Protocol; use serde::Deserialize; use serde::Serialize; use smoltcp::phy::Checksum; use smoltcp::phy::ChecksumCapabilities as Csum; -pub use v4::Icmpv4Meta; -pub use v6::Icmpv6Meta; -use zerocopy::AsBytes; -use zerocopy::FromBytes; -use zerocopy::FromZeroes; -use zerocopy::Ref; -use zerocopy::Unaligned; - -#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)] -pub struct IcmpMeta { - pub msg_type: T, - pub msg_code: u8, - pub csum: [u8; 2], - pub rest_of_header: [u8; 4], -} - -impl + Copy> IcmpMeta { - // This assumes the dst is large enough. - #[inline] - pub fn emit(&self, dst: &mut [u8]) { - debug_assert!(dst.len() >= IcmpHdr::SIZE); - dst[0] = self.msg_type.into(); - dst[1] = self.msg_code; - dst[2..4].copy_from_slice(&self.csum); - dst[4..8].copy_from_slice(&self.rest_of_header); - } - - #[inline] - pub fn hdr_len(&self) -> usize { - IcmpHdr::SIZE - } - - #[inline] - pub fn body_echo(&self) -> Ref<&[u8], IcmpEchoRaw> { - // Panic safety: Size *must* be 4B by construction. - IcmpEchoRaw::new(&self.rest_of_header[..]).unwrap() - } - - #[inline] - pub fn body_echo_mut(&mut self) -> Ref<&mut [u8], IcmpEchoRaw> { - // Panic safety: Size *must* be 4B by construction. - IcmpEchoRaw::new_mut(&mut self.rest_of_header[..]).unwrap() - } -} - -impl<'a, T: From> From<&IcmpHdr<'a>> for IcmpMeta { - fn from(hdr: &IcmpHdr<'a>) -> Self { - Self { - msg_type: hdr.base.msg_type.into(), - msg_code: hdr.base.msg_code, - csum: hdr.base.csum, - rest_of_header: hdr.base.rest_of_header, - } - } -} +use zerocopy::ByteSlice; /// Shared methods for handling ICMPv4/v6 Echo fields. pub trait QueryEcho { @@ -104,133 +37,10 @@ pub trait QueryEcho { fn echo_id(&self) -> Option; } -// This covers both v4/v6 ICMP Echo rewriting for SNAT compatibility. -impl + Copy> HeaderActionModify for IcmpMeta -where - IcmpMeta: QueryEcho, -{ - fn run_modify(&mut self, spec: &UlpMetaModify) { - let Some(new_id) = spec.icmp_id else { - return; - }; - - if self.echo_id().is_none() { - return; - } - - let mut echo_data = self.body_echo_mut(); - echo_data.id = new_id.to_be_bytes(); - } -} - -#[derive(Clone, Copy, Debug, Eq, PartialEq, DError)] -pub enum IcmpHdrError { - ReadError(ReadErr), -} - -impl From for IcmpHdrError { - fn from(error: ReadErr) -> Self { - IcmpHdrError::ReadError(error) - } -} - -#[derive(Debug)] -pub struct IcmpHdr<'a> { - base: Ref<&'a mut [u8], IcmpHdrRaw>, -} - -impl<'a> IcmpHdr<'a> { - pub const SIZE: usize = IcmpHdrRaw::SIZE; - - /// Offset to the start of the ICMP(v6) checksum field. - pub const CSUM_BEGIN_OFFSET: usize = 2; - - /// Offset to the end of the ICMP(v6) checksum field. - pub const CSUM_END_OFFSET: usize = 4; - - pub fn csum_minus_hdr(&self) -> Option { - if self.base.csum != [0; 2] { - let mut csum = OpteCsum::from(HeaderChecksum::wrap(self.base.csum)); - let bytes = self.base.bytes(); - csum.sub_bytes(&bytes[..Self::CSUM_BEGIN_OFFSET]); - csum.sub_bytes(&bytes[Self::CSUM_END_OFFSET..]); - Some(csum) - } else { - None - } - } - - /// Return the header length, in bytes. - pub fn hdr_len(&self) -> usize { - Self::SIZE - } - - pub fn parse<'b>( - rdr: &'b mut impl PacketReadMut<'a>, - ) -> Result { - let src = rdr.slice_mut(IcmpHdr::SIZE)?; - Ok(Self { base: IcmpHdrRaw::new_mut(src)? }) - } -} - -/// Note: For now we keep this unaligned to be safe. -#[repr(C)] -#[derive(Clone, Debug, FromBytes, AsBytes, FromZeroes, Unaligned)] -pub struct IcmpHdrRaw { - pub msg_type: u8, - pub msg_code: u8, - pub csum: [u8; 2], - pub rest_of_header: [u8; 4], -} - -impl IcmpHdrRaw { - /// An ICMP(v6) header is always 8 bytes. - pub const SIZE: usize = core::mem::size_of::(); -} - -impl<'a> RawHeader<'a> for IcmpHdrRaw { - #[inline] - fn new_mut(src: &mut [u8]) -> Result, ReadErr> { - debug_assert_eq!(src.len(), Self::SIZE); - let hdr = match Ref::new(src) { - Some(hdr) => hdr, - None => return Err(ReadErr::BadLayout), - }; - Ok(hdr) - } -} - /// Internal structure of an ICMP(v6) Echo(Reply)'s rest_of_header. -#[repr(C)] -#[derive(Clone, Debug, FromBytes, AsBytes, FromZeroes, Unaligned)] -pub struct IcmpEchoRaw { - pub id: [u8; 2], - pub sequence: [u8; 2], -} - -impl IcmpEchoRaw { - /// Echo-specific fields are always 4 bytes. - pub const SIZE: usize = core::mem::size_of::(); -} - -impl<'a> RawHeader<'a> for IcmpEchoRaw { - #[inline] - fn new_mut(src: &mut [u8]) -> Result, ReadErr> { - debug_assert_eq!(src.len(), Self::SIZE); - let hdr = match Ref::new(src) { - Some(hdr) => hdr, - None => return Err(ReadErr::BadLayout), - }; - Ok(hdr) - } - - #[inline] - fn new(src: &[u8]) -> Result, ReadErr> { - debug_assert_eq!(src.len(), Self::SIZE); - let hdr = match Ref::new(src) { - Some(hdr) => hdr, - None => return Err(ReadErr::BadLayout), - }; - Ok(hdr) - } +#[derive(Clone, Debug, Eq, Hash, PartialEq, Ingot)] +#[ingot(impl_default)] +pub struct IcmpEcho { + pub id: u16be, + pub sequence: u16be, } diff --git a/lib/opte/src/engine/icmp/v4.rs b/lib/opte/src/engine/icmp/v4.rs index 273bfe11..98bc812e 100644 --- a/lib/opte/src/engine/icmp/v4.rs +++ b/lib/opte/src/engine/icmp/v4.rs @@ -2,37 +2,28 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2024 Oxide Computer Company //! ICMPv4 headers and processing. use super::*; -use crate::engine::ip4::Ipv4Hdr; -use crate::engine::ip4::Ipv4Meta; +use crate::ddi::mblk::MsgBlk; +use crate::engine::checksum::HeaderChecksum; +use crate::engine::ether::Ethernet; +use crate::engine::ip::v4::Ipv4; +use crate::engine::packet::MblkPacketData; use crate::engine::predicate::Ipv4AddrMatch; +use ingot::ethernet::Ethertype; +use ingot::icmp::IcmpV4; +use ingot::icmp::IcmpV4Packet; +use ingot::icmp::IcmpV4Ref; +use ingot::icmp::ValidIcmpV4; +use ingot::ip::IpProtocol; +use ingot::types::HeaderLen; +use ingot::types::HeaderParse; +use opte::engine::Checksum as OpteCsum; pub use opte_api::ip::IcmpEchoReply; use smoltcp::wire; -use smoltcp::wire::Icmpv4Message; -use smoltcp::wire::Icmpv4Packet; -use smoltcp::wire::Icmpv4Repr; - -pub type Icmpv4Meta = IcmpMeta; - -impl QueryEcho for Icmpv4Meta { - /// Extract an ID from the body of an ICMPv4 packet to use as a - /// pseudo port for flow differentiation. - /// - /// This method returns `None` for any non-echo packets. - #[inline] - fn echo_id(&self) -> Option { - match self.msg_type.inner { - Icmpv4Message::EchoRequest | Icmpv4Message::EchoReply => { - Some(u16::from_be_bytes(self.body_echo().id)) - } - _ => None, - } - } -} impl HairpinAction for IcmpEchoReply { fn implicit_preds(&self) -> (Vec, Vec) { @@ -59,11 +50,7 @@ impl HairpinAction for IcmpEchoReply { (hdr_preds, data_preds) } - fn gen_packet( - &self, - meta: &PacketMeta, - rdr: &mut PacketReader, - ) -> GenPacketResult { + fn gen_packet(&self, meta: &MblkPacketData) -> GenPacketResult { let Some(icmp) = meta.inner_icmp() else { // Getting here implies the predicate matched, but that the // extracted metadata indicates this isn't an ICMP packet. That @@ -75,66 +62,81 @@ impl HairpinAction for IcmpEchoReply { ))); }; - // `Icmpv4Packet` requires the ICMPv4 header and not just the message payload. - // Given we successfully got the ICMPv4 metadata, rewinding here is fine. - rdr.seek_back(icmp.hdr_len())?; - let body = rdr.copy_remaining(); - let src_pkt = Icmpv4Packet::new_checked(&body)?; - let src_icmp = Icmpv4Repr::parse(&src_pkt, &Csum::ignored())?; + let ty = MessageType::from(icmp.ty()); - let (src_ident, src_seq_no, src_data) = match src_icmp { - Icmpv4Repr::EchoRequest { ident, seq_no, data } => { - (ident, seq_no, data) + // We'll be recycling the sequence and identity. + let rest_of_hdr = match (ty, icmp.code()) { + (MessageType { inner: wire::Icmpv4Message::EchoRequest }, 0) => { + icmp.rest_of_hdr() } - - _ => { + (ty, code) => { // We should never hit this case because the predicate // should have verified that we are dealing with an // Echo Request. However, programming error could // cause this to happen -- let's not take any chances. return Err(GenErr::Unexpected(format!( "expected an ICMPv4 Echo Request, got {} {}", - src_pkt.msg_type(), - src_pkt.msg_code() + ty, code, ))); } }; - let reply = Icmpv4Repr::EchoReply { - ident: src_ident, - seq_no: src_seq_no, - data: src_data, + // Checksum update is minimal for a ping reply. + // May need to compute from scratch if offloading / request + // cksum is elided. + let mut csum = match icmp.checksum() { + 0 => { + let mut csum = OpteCsum::new(); + csum.add_bytes(meta.body()); + csum.add_bytes(icmp.rest_of_hdr_ref()); + csum + } + valid => { + let mut csum = + OpteCsum::from(HeaderChecksum::wrap(valid.to_be_bytes())); + csum.sub_bytes(&[icmp.ty(), icmp.code()]); + csum + } + }; + + let ty = wire::Icmpv4Message::EchoReply.into(); + let code = 0; + csum.add_bytes(&[ty, code]); + + // Build the reply in place, and send it out. + let body_len: usize = meta.body().len(); + + let icmp = IcmpV4 { + ty, + code, + checksum: csum.finalize_for_ingot(), + rest_of_hdr, }; - let reply_len = reply.buffer_len(); - let mut tmp = vec![0u8; reply_len]; - let mut icmp_reply = Icmpv4Packet::new_unchecked(&mut tmp); - let mut csum = Csum::ignored(); - csum.icmpv4 = Checksum::Tx; - reply.emit(&mut icmp_reply, &csum); - - let mut ip4 = Ipv4Meta { - src: self.echo_dst_ip, - dst: self.echo_src_ip, - proto: Protocol::ICMP, - total_len: (Ipv4Hdr::BASE_SIZE + reply_len) as u16, + let mut ip4 = Ipv4 { + source: self.echo_dst_ip, + destination: self.echo_src_ip, + protocol: IpProtocol::ICMP, + total_len: (Ipv4::MINIMUM_LENGTH + icmp.packet_length() + body_len) + as u16, ..Default::default() }; - ip4.compute_hdr_csum(); + ip4.compute_checksum(); - let eth = EtherMeta { - dst: self.echo_src_mac, - src: self.echo_dst_mac, - ether_type: EtherType::Ipv4, + let eth = Ethernet { + destination: self.echo_src_mac, + source: self.echo_dst_mac, + ethertype: Ethertype::IPV4, }; - let total_len = EtherHdr::SIZE + Ipv4Hdr::BASE_SIZE + reply_len; - let mut pkt = Packet::alloc_and_expand(total_len); - let mut wtr = pkt.seg0_wtr(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip4.emit(wtr.slice_mut(ip4.hdr_len()).unwrap()); - wtr.write(&tmp).unwrap(); - Ok(AllowOrDeny::Allow(pkt)) + let total_len = body_len + (ð, &ip4, &icmp).packet_length(); + + let mut pkt_out = MsgBlk::new_ethernet(total_len); + pkt_out + .emit_back((ð, &ip4, &icmp, meta.body())) + .expect("Allocated space for pkt headers and body"); + + Ok(AllowOrDeny::Allow(pkt_out)) } } @@ -193,37 +195,30 @@ impl Display for MessageType { } } -#[cfg(test)] -mod test { - use crate::engine::checksum::Checksum as OpteCsum; - use crate::engine::headers::RawHeader; - use crate::engine::icmp::IcmpHdr; - use crate::engine::icmp::IcmpHdrRaw; - use smoltcp::wire::Icmpv4Packet; - use smoltcp::wire::Icmpv4Repr; - - use super::*; - - #[test] - fn icmp4_body_csum_equals_body() { - let data = b"reunion\0"; - let mut body_csum = OpteCsum::default(); - body_csum.add_bytes(data); - - let mut cksum_cfg = Csum::ignored(); - cksum_cfg.icmpv4 = Checksum::Both; - - let test_pkt = Icmpv4Repr::EchoRequest { ident: 7, seq_no: 7777, data }; - let mut out = vec![0u8; test_pkt.buffer_len()]; - let mut packet = Icmpv4Packet::new_unchecked(&mut out); - test_pkt.emit(&mut packet, &cksum_cfg); - - let src = &mut out[..IcmpHdr::SIZE]; - let icmp = IcmpHdr { base: IcmpHdrRaw::new_mut(src).unwrap() }; - - assert_eq!( - Some(body_csum.finalize()), - icmp.csum_minus_hdr().map(|mut v| v.finalize()) - ); +impl QueryEcho for IcmpV4Packet { + #[inline] + fn echo_id(&self) -> Option { + match (self.code(), self.ty()) { + (0, 0) | (0, 8) => { + ValidIcmpEcho::parse(self.rest_of_hdr_ref().as_slice()) + .ok() + .map(|(v, ..)| v.id()) + } + _ => None, + } + } +} + +impl QueryEcho for ValidIcmpV4 { + #[inline] + fn echo_id(&self) -> Option { + match (self.code(), self.ty()) { + (0, 0) | (0, 8) => { + ValidIcmpEcho::parse(self.rest_of_hdr_ref().as_slice()) + .ok() + .map(|(v, ..)| v.id()) + } + _ => None, + } } } diff --git a/lib/opte/src/engine/icmp/v6.rs b/lib/opte/src/engine/icmp/v6.rs index f3d48cda..c1234d6c 100644 --- a/lib/opte/src/engine/icmp/v6.rs +++ b/lib/opte/src/engine/icmp/v6.rs @@ -2,15 +2,29 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2024 Oxide Computer Company //! ICMPv6 headers and processing. use super::*; -use crate::engine::ip6::Ipv6Hdr; -use crate::engine::ip6::Ipv6Meta; +use crate::ddi::mblk::MsgBlk; +use crate::engine::checksum::HeaderChecksum; +use crate::engine::ether::Ethernet; +use crate::engine::ip::v6::Ipv6; +use crate::engine::ip::v6::Ipv6Ref; +use crate::engine::packet::MblkPacketData; use crate::engine::predicate::Ipv6AddrMatch; use alloc::string::String; +use ingot::ethernet::Ethertype; +use ingot::icmp::IcmpV6; +use ingot::icmp::IcmpV6Packet; +use ingot::icmp::IcmpV6Ref; +use ingot::icmp::ValidIcmpV6; +use ingot::ip::IpProtocol as IngotIpProto; +use ingot::types::Emit; +use ingot::types::HeaderLen; +use ingot::types::HeaderParse; +use opte::engine::Checksum as OpteCsum; pub use opte_api::ip::Icmpv6EchoReply; pub use opte_api::ip::Ipv6Addr; pub use opte_api::ip::Ipv6Cidr; @@ -22,30 +36,11 @@ use smoltcp::wire::Icmpv6Message; use smoltcp::wire::Icmpv6Packet; use smoltcp::wire::Icmpv6Repr; use smoltcp::wire::IpAddress; -use smoltcp::wire::IpProtocol; use smoltcp::wire::Ipv6Address; use smoltcp::wire::NdiscNeighborFlags; use smoltcp::wire::NdiscRepr; use smoltcp::wire::RawHardwareAddress; -pub type Icmpv6Meta = IcmpMeta; - -impl QueryEcho for Icmpv6Meta { - /// Extract an ID from the body of an ICMPv6 packet to use as a - /// pseudo port for flow differentiation. - /// - /// This method returns `None` for any non-echo packets. - #[inline] - fn echo_id(&self) -> Option { - match self.msg_type.inner { - Icmpv6Message::EchoRequest | Icmpv6Message::EchoReply => { - Some(u16::from_be_bytes(self.body_echo().id)) - } - _ => None, - } - } -} - /// An ICMPv6 message type #[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)] #[serde(from = "u8", into = "u8")] @@ -114,11 +109,7 @@ impl HairpinAction for Icmpv6EchoReply { (hdr_preds, data_preds) } - fn gen_packet( - &self, - meta: &PacketMeta, - rdr: &mut PacketReader, - ) -> GenPacketResult { + fn gen_packet(&self, meta: &MblkPacketData) -> GenPacketResult { let Some(icmp6) = meta.inner_icmp6() else { // Getting here implies the predicate matched, but that the // extracted metadata indicates this isn't an ICMPv6 packet. That @@ -130,85 +121,83 @@ impl HairpinAction for Icmpv6EchoReply { ))); }; - // Collect the src / dst IP addresses, which are needed to emit the - // resulting ICMPv6 echo reply. - let (src_ip, dst_ip) = if let Some(metadata) = meta.inner_ip6() { - ( - IpAddress::Ipv6(Ipv6Address(metadata.src.bytes())), - IpAddress::Ipv6(Ipv6Address(metadata.dst.bytes())), - ) - } else { - // We got the ICMPv6 metadata above but no IPv6 somehow? - return Err(GenErr::Unexpected(format!( - "Expected IPv6 packet metadata, but found: {:?}", - meta - ))); - }; + let ty = MessageType::from(icmp6.ty()); - // `Icmpv6Packet` requires the ICMPv6 header and not just the message payload. - // Given we successfully got the ICMPv6 metadata, rewinding here is fine. - rdr.seek_back(icmp6.hdr_len())?; - - let body = rdr.copy_remaining(); - let src_pkt = Icmpv6Packet::new_checked(&body)?; - let src_icmp = - Icmpv6Repr::parse(&src_ip, &dst_ip, &src_pkt, &Csum::ignored())?; - - let (src_ident, src_seq_no, src_data) = match src_icmp { - Icmpv6Repr::EchoRequest { ident, seq_no, data } => { - (ident, seq_no, data) + // We'll be recycling the sequence and identity. + let rest_of_hdr = match (ty, icmp6.code()) { + (MessageType { inner: Icmpv6Message::EchoRequest }, 0) => { + icmp6.rest_of_hdr() } - - _ => { + (ty, code) => { // We should never hit this case because the predicate // should have verified that we are dealing with an // Echo Request. However, programming error could // cause this to happen -- let's not take any chances. return Err(GenErr::Unexpected(format!( "expected an ICMPv6 Echo Request, got {} {}", - src_pkt.msg_type(), - src_pkt.msg_code() + ty, code, ))); } }; - let reply = Icmpv6Repr::EchoReply { - ident: src_ident, - seq_no: src_seq_no, - data: src_data, + // Checksum update is minimal for a ping reply. + // May need to compute from scratch if offloading / request + // cksum is elided. + let mut csum = match icmp6.checksum() { + 0 => { + let mut csum = OpteCsum::new(); + + csum.add_bytes(meta.body()); + + csum.add_bytes(icmp6.rest_of_hdr_ref()); + + csum + } + valid => { + let mut csum = + OpteCsum::from(HeaderChecksum::wrap(valid.to_be_bytes())); + csum.sub_bytes(&[icmp6.ty(), icmp6.code()]); + csum + } }; - let reply_len = reply.buffer_len(); - let mut ulp_body = vec![0u8; reply_len]; - let mut icmp_reply = Icmpv6Packet::new_unchecked(&mut ulp_body); - let mut csum = Csum::ignored(); - csum.icmpv6 = Checksum::Tx; - reply.emit(&dst_ip, &src_ip, &mut icmp_reply, &csum); - - let ip = Ipv6Meta { - src: self.dst_ip, - dst: self.src_ip, - proto: Protocol::ICMPv6, - next_hdr: IpProtocol::Icmpv6, - // There are no extension headers. The ULP is the only - // content. - pay_len: reply_len as u16, + let ty = Icmpv6Message::EchoReply.into(); + let code = 0; + csum.add_bytes(&[ty, code]); + + // Build the reply in place, and send it out. + let body_len: usize = meta.body().len(); + + let icmp = IcmpV6 { + ty, + code, + checksum: csum.finalize_for_ingot(), + rest_of_hdr, + }; + + // Note: an IP address swap does not require addition/removal from + // the internet checksum. + let ip6 = Ipv6 { + source: self.dst_ip, + destination: self.src_ip, + next_header: IngotIpProto::ICMP_V6, + payload_len: (icmp.packet_length() + body_len) as u16, ..Default::default() }; - let eth = EtherMeta { - ether_type: EtherType::Ipv6, - dst: self.src_mac, - src: self.dst_mac, + let eth = Ethernet { + destination: self.src_mac, + source: self.dst_mac, + ethertype: Ethertype::IPV6, }; - let total_len = EtherHdr::SIZE + Ipv6Hdr::BASE_SIZE + reply_len; - let mut pkt = Packet::alloc_and_expand(total_len); - let mut wtr = pkt.seg0_wtr(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip.emit(wtr.slice_mut(ip.hdr_len()).unwrap()); - wtr.write(&ulp_body).unwrap(); - Ok(AllowOrDeny::Allow(pkt)) + let total_len = body_len + (ð, &ip6, &icmp).packet_length(); + let mut pkt_out = MsgBlk::new_ethernet(total_len); + pkt_out + .emit_back((ð, &ip6, &icmp, meta.body())) + .expect("Allocated space for pkt headers and body"); + + Ok(AllowOrDeny::Allow(pkt_out)) } } @@ -250,11 +239,7 @@ impl HairpinAction for RouterAdvertisement { (hdr_preds, data_preds) } - fn gen_packet( - &self, - meta: &PacketMeta, - rdr: &mut PacketReader, - ) -> GenPacketResult { + fn gen_packet(&self, meta: &MblkPacketData) -> GenPacketResult { use smoltcp::time::Duration; use smoltcp::wire::NdiscRouterFlags; @@ -278,14 +263,14 @@ impl HairpinAction for RouterAdvertisement { meta ))); }; - let src_ip = IpAddress::Ipv6(Ipv6Address(ip6.src.bytes())); - let dst_ip = IpAddress::Ipv6(Ipv6Address(ip6.dst.bytes())); + let src_ip = IpAddress::Ipv6(Ipv6Address(ip6.source().bytes())); + let dst_ip = IpAddress::Ipv6(Ipv6Address(ip6.destination().bytes())); // `Icmpv6Packet` requires the ICMPv6 header and not just the message payload. // Given we successfully got the ICMPv6 metadata, rewinding here is fine. - rdr.seek_back(icmp6.hdr_len())?; + let mut body = icmp6.emit_vec(); + meta.append_remaining(&mut body); - let body = rdr.copy_remaining(); let src_pkt = Icmpv6Packet::new_checked(&body)?; let mut csum = Csum::ignored(); csum.icmpv6 = Checksum::Rx; @@ -322,10 +307,10 @@ impl HairpinAction for RouterAdvertisement { // and thus _not_ UNSPEC, so we skip that checking here. // // This leaves the hop limit as the only validity check. - if ip6.hop_limit != 255 { + if ip6.hop_limit() != 255 { return Err(GenErr::Unexpected(format!( "Received RS with invalid hop limit ({}).", - ip6.hop_limit + ip6.hop_limit() ))); } @@ -369,35 +354,27 @@ impl HairpinAction for RouterAdvertisement { &csum, ); - let ip = Ipv6Meta { - src: *self.ip(), + let ip6 = Ipv6 { + source: *self.ip(), // Safety: We match on this being Some(_) above, so unwrap is safe. - dst: meta.inner_ip6().unwrap().src, - proto: Protocol::ICMPv6, - next_hdr: IpProtocol::Icmpv6, + destination: meta.inner_ip6().unwrap().source(), + next_header: IngotIpProto::ICMP_V6, + payload_len: reply_len as u16, + // RFC 4861 6.1.2 requires that the hop limit be 255 in an RA. hop_limit: 255, - // There are no extension headers; the ULP is the only - // content. - pay_len: reply_len as u16, ..Default::default() }; - // The Ethernet frame should come from OPTE's virtual gateway MAC, and - // be destined for the client which sent us the packet. - let eth = EtherMeta { - ether_type: EtherType::Ipv6, - dst: self.src_mac, - src: self.mac, + let eth = Ethernet { + destination: self.src_mac, + source: self.mac, + ethertype: Ethertype::IPV6, }; - let total_len = EtherHdr::SIZE + Ipv6Hdr::BASE_SIZE + reply_len; - let mut pkt = Packet::alloc_and_expand(total_len); - let mut wtr = pkt.seg0_wtr(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip.emit(wtr.slice_mut(ip.hdr_len()).unwrap()); - wtr.write(&ulp_body).unwrap(); - Ok(AllowOrDeny::Allow(pkt)) + Ok(AllowOrDeny::Allow(MsgBlk::new_ethernet_pkt(( + ð, &ip6, &ulp_body, + )))) } } @@ -407,15 +384,14 @@ impl HairpinAction for RouterAdvertisement { // the validations performed. // // Return the target address from the Neighbor Solicitation. -fn validate_neighbor_solicitation( - rdr: &mut PacketReader, - metadata: &Ipv6Meta, +fn validate_neighbor_solicitation( + rdr: &[u8], + metadata: &impl Ipv6Ref, ) -> Result { // First, check if this is in fact a NS message. - let smol_src = IpAddress::Ipv6(metadata.src.into()); - let smol_dst = IpAddress::Ipv6(metadata.dst.into()); - let body = rdr.copy_remaining(); - let src_pkt = Icmpv6Packet::new_checked(&body)?; + let smol_src = IpAddress::Ipv6(Ipv6Address(metadata.source().bytes())); + let smol_dst = IpAddress::Ipv6(Ipv6Address(metadata.destination().bytes())); + let src_pkt = Icmpv6Packet::new_checked(rdr)?; let mut csum = Csum::ignored(); csum.icmpv6 = Checksum::Rx; let icmp = Icmpv6Repr::parse(&smol_src, &smol_dst, &src_pkt, &csum)?; @@ -426,10 +402,10 @@ fn validate_neighbor_solicitation( // - ICMP length is at least 24 octets // - Any included options have a non-zero length - if metadata.hop_limit != 255 { + if metadata.hop_limit() != 255 { return Err(GenErr::Unexpected(format!( "Received NS with invalid hop limit ({}).", - metadata.hop_limit + metadata.hop_limit() ))); } @@ -460,8 +436,8 @@ fn validate_neighbor_solicitation( // NS is only allowed from the unspecified address if the destination is a // solicited-node multicast address. - if metadata.src == Ipv6Addr::ANY_ADDR - && !metadata.dst.is_solicited_node_multicast() + if metadata.source() == Ipv6Addr::ANY_ADDR + && !metadata.destination().is_solicited_node_multicast() { return Err(GenErr::Unexpected(String::from( "Received NS from UNSPEC, but destination is not the solicited \ @@ -470,7 +446,7 @@ fn validate_neighbor_solicitation( } // Cannot contain Link-Layer address option if from the unspecified address. - if metadata.src == Ipv6Addr::ANY_ADDR && has_ll_option { + if metadata.source() == Ipv6Addr::ANY_ADDR && has_ll_option { return Err(GenErr::Unexpected(String::from( "Received NS from UNSPEC, but message contains the \ Link-Layer Address option.", @@ -586,11 +562,7 @@ impl HairpinAction for NeighborAdvertisement { (hdr_preds, data_preds) } - fn gen_packet( - &self, - meta: &PacketMeta, - rdr: &mut PacketReader, - ) -> GenPacketResult { + fn gen_packet(&self, meta: &MblkPacketData) -> GenPacketResult { let Some(icmp6) = meta.inner_icmp6() else { // Getting here implies the predicate matched, but that the // extracted metadata indicates this isn't an ICMPv6 packet. That @@ -613,23 +585,22 @@ impl HairpinAction for NeighborAdvertisement { // `Icmpv6Packet` requires the ICMPv6 header and not just the message payload. // Given we successfully got the ICMPv6 metadata, rewinding here is fine. - rdr.seek_back(icmp6.hdr_len())?; + let mut body = icmp6.emit_vec(); + meta.append_remaining(&mut body); // Validate the ICMPv6 packet is actually a Neighbor Solicitation, and // that its data is appopriate. - let target_addr = validate_neighbor_solicitation(rdr, metadata)?; + let target_addr = validate_neighbor_solicitation(&body, metadata)?; // Build the NA, whose data depends on how we received the packet. If // `None` is returned, the NS is not destined for us, and will be // dropped. - let (dst_ip, advert) = match construct_neighbor_advert( - self, - &target_addr, - &metadata.src, - ) { - Some(data) => data, - None => return Ok(AllowOrDeny::Deny), - }; + let conv_ip = metadata.source(); + let (dst_ip, advert) = + match construct_neighbor_advert(self, &target_addr, &conv_ip) { + Some(data) => data, + None => return Ok(AllowOrDeny::Deny), + }; // Construct the actual bytes of the reply packet, and return it. let reply = Icmpv6Repr::Ndisc(advert); @@ -645,37 +616,57 @@ impl HairpinAction for NeighborAdvertisement { &csum, ); - let ip = Ipv6Meta { - src: *self.ip(), - dst: dst_ip, - proto: Protocol::ICMPv6, - next_hdr: IpProtocol::Icmpv6, + // While the frame must always be sent from the gateway, who the frame + // is addressed to depends on whether we should multicast the packet. + let dst_mac = dst_ip.multicast_mac().unwrap_or(self.src_mac); + + let ip6 = Ipv6 { + source: *self.ip(), + destination: dst_ip, + next_header: IngotIpProto::ICMP_V6, + payload_len: reply_len as u16, + // RFC 4861 7.1.2 requires that the hop limit be 255 in an NA. hop_limit: 255, - // There are no extension headers; the ULP is the only - // content. - pay_len: reply_len as u16, ..Default::default() }; - // While the frame must always be sent from the gateway, who the frame - // is addressed to depends on whether we should multicast the packet. - let dst_mac = dst_ip.multicast_mac().unwrap_or(self.src_mac); - - // The Ethernet frame should come from OPTE's virtual gateway MAC, and - // be destined for the client which sent us the packet. - let eth = EtherMeta { - ether_type: EtherType::Ipv6, - dst: dst_mac, - src: self.mac, + let eth = Ethernet { + destination: dst_mac, + source: self.mac, + ethertype: Ethertype::IPV6, }; - let len = EtherHdr::SIZE + Ipv6Hdr::BASE_SIZE + reply_len; - let mut pkt = Packet::alloc_and_expand(len); - let mut wtr = pkt.seg0_wtr(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip.emit(wtr.slice_mut(ip.hdr_len()).unwrap()); - wtr.write(&ulp_body).unwrap(); - Ok(AllowOrDeny::Allow(pkt)) + Ok(AllowOrDeny::Allow(MsgBlk::new_ethernet_pkt(( + ð, &ip6, &ulp_body, + )))) + } +} + +impl QueryEcho for IcmpV6Packet { + #[inline] + fn echo_id(&self) -> Option { + match (self.code(), self.ty()) { + (0, 128) | (0, 129) => { + ValidIcmpEcho::parse(&self.rest_of_hdr_ref()[..]) + .ok() + .map(|(v, ..)| v.id()) + } + _ => None, + } + } +} + +impl QueryEcho for ValidIcmpV6 { + #[inline] + fn echo_id(&self) -> Option { + match (self.code(), self.ty()) { + (0, 128) | (0, 129) => { + ValidIcmpEcho::parse(&self.rest_of_hdr_ref()[..]) + .ok() + .map(|(v, ..)| v.id()) + } + _ => None, + } } } diff --git a/lib/opte/src/engine/ip/mod.rs b/lib/opte/src/engine/ip/mod.rs new file mode 100644 index 00000000..adc55594 --- /dev/null +++ b/lib/opte/src/engine/ip/mod.rs @@ -0,0 +1,289 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2024 Oxide Computer Company + +pub mod v4; +pub mod v6; + +use super::checksum::Checksum; +use super::headers::HasInnerCksum; +use super::headers::HeaderActionError; +use super::headers::HeaderActionModify; +use super::headers::IpMod; +use super::headers::IpPush; +use super::headers::PushAction; +use super::packet::ParseError; +use ingot::choice; +use ingot::ethernet::Ethertype; +use ingot::ip::IpProtocol; +use ingot::ip::Ipv4Flags; +use ingot::types::ByteSlice; +use ingot::types::Header; +use ingot::types::InlineHeader; +use ingot::types::NextLayer; +use v4::*; +use v6::*; +use zerocopy::ByteSliceMut; +use zerocopy::IntoBytes; + +// Redefine Ethernet and v4/v6 because we have our own, internal, +// address types already. + +#[choice(on = Ethertype)] +pub enum L3 { + Ipv4 = Ethertype::IPV4, + Ipv6 = Ethertype::IPV6, +} + +impl L3 { + pub fn pseudo_header(&self) -> Checksum { + match self { + L3::Ipv4(v4) => { + let mut pseudo_hdr_bytes = [0u8; 12]; + pseudo_hdr_bytes[0..4].copy_from_slice(v4.source().as_ref()); + pseudo_hdr_bytes[4..8] + .copy_from_slice(v4.destination().as_ref()); + // pseudo_hdr_bytes[8] reserved + pseudo_hdr_bytes[9] = v4.protocol().0; + let ulp_len = v4.total_len() - 4 * (v4.ihl() as u16); + pseudo_hdr_bytes[10..].copy_from_slice(&ulp_len.to_be_bytes()); + + Checksum::compute(&pseudo_hdr_bytes) + } + L3::Ipv6(v6) => { + let mut pseudo_hdr_bytes = [0u8; 40]; + pseudo_hdr_bytes[0..16].copy_from_slice(v6.source().as_ref()); + pseudo_hdr_bytes[16..32] + .copy_from_slice(v6.destination().as_ref()); + let ulp_len = v6.payload_len() as u32; + pseudo_hdr_bytes[32..36] + .copy_from_slice(&ulp_len.to_be_bytes()); + pseudo_hdr_bytes[39] = v6.next_layer().unwrap_or_default().0; + + Checksum::compute(&pseudo_hdr_bytes) + } + } + } +} + +impl L3 { + #[inline] + pub fn compute_checksum(&mut self) { + if let L3::Ipv4(ip) = self { + match ip { + Header::Repr(ip) => ip.compute_checksum(), + Header::Raw(ip) => ip.compute_checksum(), + } + } + } +} + +impl ValidL3 { + pub fn pseudo_header(&self) -> Checksum { + match self { + ValidL3::Ipv4(v4) => { + let mut pseudo_hdr_bytes = [0u8; 12]; + pseudo_hdr_bytes[0..4].copy_from_slice(v4.source().as_ref()); + pseudo_hdr_bytes[4..8] + .copy_from_slice(v4.destination().as_ref()); + // pseudo_hdr_bytes[8] reserved + pseudo_hdr_bytes[9] = v4.protocol().0; + let ulp_len = v4.total_len() - 4 * (v4.ihl() as u16); + pseudo_hdr_bytes[10..].copy_from_slice(&ulp_len.to_be_bytes()); + + Checksum::compute(&pseudo_hdr_bytes) + } + ValidL3::Ipv6(v6) => { + let mut pseudo_hdr_bytes = [0u8; 40]; + pseudo_hdr_bytes[0..16].copy_from_slice(v6.source().as_ref()); + pseudo_hdr_bytes[16..32] + .copy_from_slice(v6.destination().as_ref()); + let ulp_len = v6.payload_len() as u32; + pseudo_hdr_bytes[32..36] + .copy_from_slice(&ulp_len.to_be_bytes()); + pseudo_hdr_bytes[39] = v6.next_layer().unwrap_or_default().0; + + Checksum::compute(&pseudo_hdr_bytes) + } + } + } + + pub fn csum(&self) -> [u8; 2] { + match self { + ValidL3::Ipv4(i4) => i4.checksum(), + ValidL3::Ipv6(_) => 0, + } + .to_be_bytes() + } + + #[inline] + pub fn validate(&self, bytes_after: usize) -> Result<(), ParseError> { + match self { + ValidL3::Ipv4(i4) => i4.validate(bytes_after), + ValidL3::Ipv6(i6) => i6.validate(bytes_after), + } + } +} + +impl ValidL3 { + #[inline] + pub fn compute_checksum(&mut self) { + if let ValidL3::Ipv4(ip) = self { + ip.set_checksum(0); + + let mut csum = Checksum::new(); + csum.add_bytes(ip.0.as_bytes()); + match &ip.1 { + Header::Repr(opts) => { + csum.add_bytes(opts); + } + Header::Raw(opts) => { + csum.add_bytes(opts); + } + } + + ip.set_checksum(csum.finalize_for_ingot()); + } + } +} + +impl HeaderActionModify + for InlineHeader> +{ + #[inline] + fn run_modify( + &mut self, + mod_spec: &IpMod, + ) -> Result<(), HeaderActionError> { + match mod_spec { + IpMod::Ip4(mods) => match self { + InlineHeader::Repr(L3Repr::Ipv4(v4)) => { + if let Some(src) = mods.src { + v4.source = src; + } + if let Some(dst) = mods.dst { + v4.destination = dst; + } + if let Some(p) = mods.proto { + v4.protocol = IpProtocol(u8::from(p)); + } + } + InlineHeader::Raw(ValidL3::Ipv4(v4)) => { + if let Some(src) = mods.src { + v4.set_source(src); + } + if let Some(dst) = mods.dst { + v4.set_destination(dst); + } + if let Some(p) = mods.proto { + v4.set_protocol(IpProtocol(u8::from(p))); + } + } + _ => return Err(HeaderActionError::MissingHeader), + }, + IpMod::Ip6(mods) => match self { + InlineHeader::Repr(L3Repr::Ipv6(v6)) => { + if let Some(src) = mods.src { + v6.source = src; + } + if let Some(dst) = mods.dst { + v6.destination = dst; + } + if let Some(p) = mods.proto { + let ipp = IpProtocol(u8::from(p)); + + v6_set_next_header::<&mut [u8]>(ipp, v6)?; + } + } + InlineHeader::Raw(ValidL3::Ipv6(v6)) => { + if let Some(src) = mods.src { + v6.set_source(src); + } + if let Some(dst) = mods.dst { + v6.set_destination(dst); + } + if let Some(p) = mods.proto { + let ipp = IpProtocol(u8::from(p)); + v6_set_next_header(ipp, v6)?; + } + } + _ => return Err(HeaderActionError::MissingHeader), + }, + } + + Ok(()) + } +} + +impl HeaderActionModify for L3 { + #[inline] + fn run_modify( + &mut self, + mod_spec: &IpMod, + ) -> Result<(), HeaderActionError> { + match (self, mod_spec) { + (L3::Ipv4(v4), IpMod::Ip4(mods)) => { + if let Some(src) = mods.src { + v4.set_source(src); + } + if let Some(dst) = mods.dst { + v4.set_destination(dst); + } + if let Some(p) = mods.proto { + v4.set_protocol(IpProtocol(u8::from(p))); + } + Ok(()) + } + (L3::Ipv6(v6), IpMod::Ip6(mods)) => { + if let Some(src) = mods.src { + v6.set_source(src); + } + if let Some(dst) = mods.dst { + v6.set_destination(dst); + } + if let Some(p) = mods.proto { + let ipp = IpProtocol(u8::from(p)); + v6_set_next_header(ipp, v6)?; + } + Ok(()) + } + _ => Err(HeaderActionError::MissingHeader), + } + } +} + +impl HasInnerCksum for InlineHeader> { + const HAS_CKSUM: bool = true; +} + +impl HasInnerCksum for L3 { + const HAS_CKSUM: bool = true; +} + +impl PushAction> for IpPush { + fn push(&self) -> L3 { + match self { + IpPush::Ip4(v4) => L3::Ipv4( + Ipv4 { + protocol: IpProtocol(u8::from(v4.proto)), + source: v4.src, + destination: v4.dst, + flags: Ipv4Flags::DONT_FRAGMENT, + ..Default::default() + } + .into(), + ), + IpPush::Ip6(v6) => L3::Ipv6( + Ipv6 { + next_header: IpProtocol(u8::from(v6.proto)), + source: v6.src, + destination: v6.dst, + ..Default::default() + } + .into(), + ), + } + } +} diff --git a/lib/opte/src/engine/ip/v4.rs b/lib/opte/src/engine/ip/v4.rs new file mode 100644 index 00000000..f91d1aa0 --- /dev/null +++ b/lib/opte/src/engine/ip/v4.rs @@ -0,0 +1,273 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2024 Oxide Computer Company + +//! IPv4 headers. + +use crate::engine::checksum::Checksum; +use crate::engine::packet::MismatchError; +use crate::engine::packet::ParseError; +use crate::engine::predicate::MatchExact; +use crate::engine::predicate::MatchExactVal; +use crate::engine::predicate::MatchPrefix; +use crate::engine::predicate::MatchPrefixVal; +use crate::engine::predicate::MatchRangeVal; +use ingot::ip::Ecn; +use ingot::ip::IpProtocol; +use ingot::ip::Ipv4Flags; +use ingot::types::primitives::*; +use ingot::types::Emit; +use ingot::types::Header; +use ingot::types::HeaderLen; +use ingot::types::Vec; +use ingot::Ingot; +pub use opte_api::Ipv4Addr; +pub use opte_api::Ipv4Cidr; +pub use opte_api::Ipv4PrefixLen; +pub use opte_api::Protocol; +use serde::Deserialize; +use serde::Serialize; +use zerocopy::ByteSlice; +use zerocopy::ByteSliceMut; +use zerocopy::IntoBytes; + +#[derive(Clone, Debug, Eq, Hash, PartialEq, Ingot)] +#[ingot(impl_default)] +pub struct Ipv4 { + #[ingot(default = 4)] + pub version: u4, + #[ingot(default = 5)] + pub ihl: u4, + pub dscp: u6, + #[ingot(is = "u2")] + pub ecn: Ecn, + pub total_len: u16be, + + pub identification: u16be, + #[ingot(is = "u3")] + pub flags: Ipv4Flags, + pub fragment_offset: u13be, + + #[ingot(default = 128)] + pub hop_limit: u8, + #[ingot(is = "u8", next_layer)] + pub protocol: IpProtocol, + pub checksum: u16be, + + #[ingot(is = "[u8; 4]", default = Ipv4Addr::ANY_ADDR)] + pub source: Ipv4Addr, + #[ingot(is = "[u8; 4]", default = Ipv4Addr::ANY_ADDR)] + pub destination: Ipv4Addr, + + #[ingot(var_len = "(ihl * 4).saturating_sub(20)")] + pub options: Vec, +} + +impl Ipv4 { + #[inline] + pub fn compute_checksum(&mut self) { + self.checksum = 0; + + let mut csum = Checksum::new(); + + let mut bytes = [0u8; 56]; + self.emit_raw(&mut bytes[..]); + csum.add_bytes(&bytes[..]); + + self.checksum = csum.finalize_for_ingot(); + } +} + +impl ValidIpv4 { + #[inline] + pub fn compute_checksum(&mut self) { + self.set_checksum(0); + + let mut csum = Checksum::new(); + + csum.add_bytes(self.0.as_bytes()); + + match &self.1 { + Header::Repr(opts) => { + csum.add_bytes(opts); + } + Header::Raw(opts) => { + csum.add_bytes(opts); + } + } + + self.set_checksum(csum.finalize_for_ingot()); + } +} + +impl ValidIpv4 { + #[inline] + pub fn validate(&self, bytes_after: usize) -> Result<(), ParseError> { + let v = self.version(); + if self.version() != 4 { + return Err(ParseError::IllegalValue(MismatchError { + location: c"Ipv4.version", + expected: 4, + actual: v as u64, + })); + } + + let own_len = self.packet_length(); + let ihl = self.ihl(); + let expt_ihl = (own_len >> 2) as u8; + if expt_ihl != ihl { + return Err(ParseError::IllegalValue(MismatchError { + location: c"Ipv4.ihl", + expected: expt_ihl as u64, + actual: ihl as u64, + })); + } + + // Bail if our total len value is less than the IPv4 header + // itself requires. + // Note: IHL checks are baked into ingot. + let expt_internal_len = (self.ihl() as usize) << 2; + if (self.total_len() as usize) < expt_internal_len { + return Err(ParseError::BadLength(MismatchError { + location: c"Ipv4.total_len(min)", + expected: expt_internal_len as u64, + actual: self.total_len() as u64, + })); + } + + // Packets can have arbitrary zero-padding at the end so + // our length *could* be larger than the packet reports. + // Unlikely in practice as Encap headers push us past the 64B + // minimum packet size. + let expt_total_len = bytes_after + own_len; + if expt_total_len < self.total_len() as usize { + return Err(ParseError::BadLength(MismatchError { + location: c"Ipv4.total_len", + expected: expt_total_len as u64, + actual: self.total_len() as u64, + })); + } + + Ok(()) + } +} + +impl MatchPrefixVal for Ipv4Cidr {} +impl MatchExactVal for Ipv4Addr {} +impl MatchRangeVal for Ipv4Addr {} + +impl MatchExact for Ipv4Addr { + fn match_exact(&self, val: &Ipv4Addr) -> bool { + *self == *val + } +} + +impl MatchPrefix for Ipv4Addr { + fn match_prefix(&self, prefix: &Ipv4Cidr) -> bool { + prefix.is_member(*self) + } +} + +impl MatchExactVal for Protocol {} + +impl MatchExact for Protocol { + fn match_exact(&self, val: &Protocol) -> bool { + *self == *val + } +} + +#[derive( + Clone, Copy, Debug, Deserialize, Eq, Ord, PartialEq, PartialOrd, Serialize, +)] +pub struct Ipv4Push { + pub src: Ipv4Addr, + pub dst: Ipv4Addr, + pub proto: Protocol, +} + +#[derive(Clone, Debug, Default, Deserialize, Serialize)] +pub struct Ipv4Mod { + pub src: Option, + pub dst: Option, + pub proto: Option, +} + +#[cfg(test)] +mod test { + use super::*; + + use ingot::types::HeaderLen; + + pub const DEF_ROUTE: &str = "0.0.0.0/0"; + + #[test] + fn match_check() { + let ip = "192.168.2.11".parse::().unwrap(); + assert!(ip.match_exact(&ip)); + assert!(ip.match_prefix(&"192.168.2.0/24".parse::().unwrap())); + } + + #[test] + fn cidr_match() { + let ip1 = "192.168.2.22".parse::().unwrap(); + let cidr1 = "192.168.2.0/24".parse().unwrap(); + assert!(ip1.match_prefix(&cidr1)); + + let ip2 = "10.7.7.7".parse::().unwrap(); + let cidr2 = "10.0.0.0/8".parse().unwrap(); + assert!(ip2.match_prefix(&cidr2)); + + let ip3 = "52.10.128.69".parse::().unwrap(); + let cidr3 = DEF_ROUTE.parse().unwrap(); + assert!(ip3.match_prefix(&cidr3)); + } + + #[test] + fn emit() { + let ip = Ipv4 { + source: Ipv4Addr::from([10, 0, 0, 54]), + destination: Ipv4Addr::from([52, 10, 128, 69]), + protocol: IpProtocol::TCP, + flags: Ipv4Flags::DONT_FRAGMENT, + hop_limit: 64, + identification: 2662, + ihl: 5, + total_len: 60, + + ..Default::default() + }; + + let len = ip.packet_length(); + assert_eq!(len, 20); + + let bytes = ip.emit_vec(); + assert_eq!(len, bytes.len()); + + #[rustfmt::skip] + let expected_bytes = vec![ + // version + IHL + 0x45, + // DSCP + ECN + 0x00, + // total length + 0x00, 0x3C, + // ident + 0x0A, 0x66, + // flags + frag offset + 0x40, 0x00, + // TTL + 0x40, + // protocol + 0x06, + // checksum + 0x00, 0x00, + // source + 0x0A, 0x00, 0x00, 0x36, + // dest + 0x34, 0x0A, 0x80, 0x45, + ]; + assert_eq!(&expected_bytes, &bytes); + } +} diff --git a/lib/opte/src/engine/ip/v6.rs b/lib/opte/src/engine/ip/v6.rs new file mode 100644 index 00000000..a08beac6 --- /dev/null +++ b/lib/opte/src/engine/ip/v6.rs @@ -0,0 +1,597 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2024 Oxide Computer Company + +//! IPv6 Headers. + +use crate::engine::headers::HeaderActionError; +use crate::engine::packet::MismatchError; +use crate::engine::packet::ParseError; +use crate::engine::predicate::MatchExact; +use crate::engine::predicate::MatchExactVal; +use crate::engine::predicate::MatchPrefix; +use crate::engine::predicate::MatchPrefixVal; +use ingot::ip::Ecn; +use ingot::ip::IpProtocol; +use ingot::ip::IpV6Ext6564Mut; +use ingot::ip::IpV6Ext6564Ref; +use ingot::ip::IpV6ExtFragmentMut; +use ingot::ip::IpV6ExtFragmentRef; +use ingot::ip::LowRentV6EhRepr; +use ingot::ip::ValidLowRentV6Eh; +use ingot::types::primitives::*; +use ingot::types::util::Repeated; +use ingot::types::FieldMut; +use ingot::types::FieldRef; +use ingot::types::Header; +use ingot::types::HeaderLen; +use ingot::types::ParseChoice; +use ingot::Ingot; +pub use opte_api::Ipv6Addr; +pub use opte_api::Ipv6Cidr; +use opte_api::Protocol; +use serde::Deserialize; +use serde::Serialize; +use zerocopy::ByteSlice; +use zerocopy::ByteSliceMut; + +pub const DDM_HEADER_ID: u8 = 0xFE; + +#[derive(Debug, Clone, Ingot, Eq, PartialEq)] +#[ingot(impl_default)] +pub struct Ipv6 { + #[ingot(default = "6")] + pub version: u4, + pub dscp: u6, + #[ingot(is = "u2")] + pub ecn: Ecn, + pub flow_label: u20be, + + pub payload_len: u16be, + #[ingot(is = "u8", next_layer)] + pub next_header: IpProtocol, + #[ingot(default = 128)] + pub hop_limit: u8, + + #[ingot(is = "[u8; 16]", default = Ipv6Addr::ANY_ADDR)] + pub source: Ipv6Addr, + #[ingot(is = "[u8; 16]", default = Ipv6Addr::ANY_ADDR)] + pub destination: Ipv6Addr, + + #[ingot(subparse(on_next_layer))] + pub v6ext: Repeated, +} + +impl MatchExactVal for Ipv6Addr {} +impl MatchPrefixVal for Ipv6Cidr {} + +impl MatchExact for Ipv6Addr { + fn match_exact(&self, val: &Ipv6Addr) -> bool { + *self == *val + } +} + +impl MatchPrefix for Ipv6Addr { + fn match_prefix(&self, prefix: &Ipv6Cidr) -> bool { + prefix.is_member(*self) + } +} + +impl ValidIpv6 { + #[inline] + pub fn validate(&self, bytes_after: usize) -> Result<(), ParseError> { + let v = self.version(); + if self.version() != 6 { + return Err(ParseError::IllegalValue(MismatchError { + location: c"Ipv6.version", + expected: 6, + actual: v as u64, + })); + } + + // Packets can have arbitrary zero-padding at the end so + // our length *could* be larger than the packet reports. + // Unlikely in practice as Encap headers push us past the 64B + // minimum packet size. + let ex_len = bytes_after + self.1.packet_length(); + let pll = self.payload_len(); + if ex_len < (self.payload_len() as usize) { + return Err(ParseError::BadLength(MismatchError { + location: c"Ipv6.payload_len", + expected: ex_len as u64, + actual: pll as u64, + })); + } + + Ok(()) + } + + pub fn ulp_len(&self) -> usize { + self.payload_len() as usize - self.1.packet_length() + } + + pub fn set_ulp_len(&mut self, len: usize) + where + V: ByteSliceMut, + { + self.set_payload_len((self.1.packet_length() + len) as u16) + } + + pub fn ext_len(&self) -> usize { + self.1.packet_length() + } +} + +#[derive( + Clone, Copy, Debug, Deserialize, Eq, Ord, PartialEq, PartialOrd, Serialize, +)] +pub struct Ipv6Push { + pub src: Ipv6Addr, + pub dst: Ipv6Addr, + pub proto: Protocol, +} + +#[derive(Clone, Debug, Default, Deserialize, Serialize)] +pub struct Ipv6Mod { + pub src: Option, + pub dst: Option, + pub proto: Option, +} + +#[inline] +pub fn v6_set_next_header( + ipp: IpProtocol, + v6: &mut (impl Ipv6Mut + Ipv6Ref), +) -> Result<(), HeaderActionError> { + let mut curr_ipp = v6.next_header(); + if curr_ipp.class().is_none() { + v6.set_next_header(ipp); + return Ok(()); + } + + match v6.v6ext_mut() { + FieldMut::Repr(a) => match a.iter_mut().last() { + Some(LowRentV6EhRepr::IpV6ExtFragment(f)) => { + f.next_header = ipp; + } + Some(LowRentV6EhRepr::IpV6Ext6564(f)) => { + f.next_header = ipp; + } + None => { + v6.set_next_header(ipp); + } + }, + FieldMut::Raw(Header::Repr(a)) => match a.iter_mut().last() { + Some(LowRentV6EhRepr::IpV6ExtFragment(f)) => { + f.next_header = ipp; + } + Some(LowRentV6EhRepr::IpV6Ext6564(f)) => { + f.next_header = ipp; + } + None => { + v6.set_next_header(ipp); + } + }, + FieldMut::Raw(Header::Raw(a)) => { + // This would be better done over all `Repeated` in ingot, + // however making mutable access generic in that case proved + // challenging. We can just do it manually for now. + let mut buf = a.as_mut(); + + while curr_ipp.class().is_some() { + let (hdr, nh, rem) = + ValidLowRentV6Eh::parse_choice(buf, Some(curr_ipp)) + .map_err(|_| HeaderActionError::MalformedExtension)?; + let nh = nh.expect("V6EHs always have a next_header field"); + buf = rem; + curr_ipp = nh; + + // We're at the last EH -- now we can update the next header. + if nh.class().is_none() { + match hdr { + ValidLowRentV6Eh::IpV6ExtFragment(mut f) => { + f.set_next_header(ipp); + } + ValidLowRentV6Eh::IpV6Ext6564(mut f) => { + f.set_next_header(ipp); + } + } + } + } + } + } + + Ok(()) +} + +#[inline] +pub fn v6_get_next_header( + v6: &impl Ipv6Ref, +) -> Result { + let curr_ipp = v6.next_header(); + if curr_ipp.class().is_none() { + return Ok(curr_ipp); + } + + Ok(match v6.v6ext_ref() { + FieldRef::Repr(a) => match a.iter().last() { + Some(LowRentV6EhRepr::IpV6ExtFragment(f)) => f.next_header, + Some(LowRentV6EhRepr::IpV6Ext6564(f)) => f.next_header, + None => curr_ipp, + }, + FieldRef::Raw(Header::Repr(a)) => match a.iter().last() { + Some(LowRentV6EhRepr::IpV6ExtFragment(f)) => f.next_header, + Some(LowRentV6EhRepr::IpV6Ext6564(f)) => f.next_header, + None => curr_ipp, + }, + FieldRef::Raw(Header::Raw(a)) => match a.iter(Some(curr_ipp)).last() { + Some(Ok(ValidLowRentV6Eh::IpV6ExtFragment(f))) => f.next_header(), + Some(Ok(ValidLowRentV6Eh::IpV6Ext6564(f))) => f.next_header(), + _ => curr_ipp, + }, + }) +} + +#[cfg(test)] +pub(crate) mod test { + use super::*; + use ingot::ip::IpProtocol as IngotIpProtocol; + use ingot::types::Accessor; + use ingot::types::Emit; + use ingot::types::Header; + use ingot::types::HeaderParse; + use itertools::Itertools; + use smoltcp::wire::IpProtocol; + use smoltcp::wire::Ipv6Address; + use smoltcp::wire::Ipv6ExtHeader; + use smoltcp::wire::Ipv6FragmentHeader; + use smoltcp::wire::Ipv6FragmentRepr; + use smoltcp::wire::Ipv6HopByHopHeader; + use smoltcp::wire::Ipv6HopByHopRepr; + use smoltcp::wire::Ipv6OptionRepr; + use smoltcp::wire::Ipv6Packet; + use smoltcp::wire::Ipv6Repr; + use smoltcp::wire::Ipv6RoutingHeader; + use smoltcp::wire::Ipv6RoutingRepr; + use std::vec::Vec; + + // Test packet size and payload length + const BUFFER_LEN: usize = 512; + const PAYLOAD_LEN: usize = 512 - Ipv6::MINIMUM_LENGTH; + pub(crate) const SUPPORTED_EXTENSIONS: [IpProtocol; 4] = [ + IpProtocol::HopByHop, + IpProtocol::Ipv6Route, + IpProtocol::Ipv6Frag, + IpProtocol::Unknown(DDM_HEADER_ID), + ]; + + #[test] + fn from_pairs() { + let ip6 = super::Ipv6Addr::from([ + 0x2601, 0x0284, 0x4100, 0xE240, 0x0000, 0x0000, 0xC0A8, 0x01F5, + ]); + + assert_eq!( + ip6.bytes(), + [ + 0x26, 0x01, 0x02, 0x84, 0x41, 0x00, 0xE2, 0x40, 0x00, 0x00, + 0x00, 0x00, 0xC0, 0xA8, 0x01, 0xF5 + ] + ); + } + + fn base_header() -> Ipv6Repr { + Ipv6Repr { + src_addr: Ipv6Address::new(0xfd00, 0, 0, 0, 0, 0, 0, 1), + dst_addr: Ipv6Address::new(0xfd00, 0, 0, 0, 0, 0, 0, 2), + next_header: IpProtocol::Tcp, + payload_len: PAYLOAD_LEN, + hop_limit: 6, + } + } + + fn hop_by_hop_header() -> Ipv6HopByHopRepr<'static> { + // in 8-octet units, not including the first + const OPTION_LEN: usize = 1; + // SmolTCP limits us to 2 max HBH options in its repr. + // Pad to the next multiple of 8, then one more 8-octet unit. + // - Ext header takes 2B + // - PadN(n) takes 2B, then n bytes. + // => 4 + fill + const LEN: usize = 4 + OPTION_LEN * 8; + static OPTIONS: [Ipv6OptionRepr; 1] = + [Ipv6OptionRepr::PadN(LEN as u8); 1]; + Ipv6HopByHopRepr { + options: heapless::Vec::from_slice(&OPTIONS).unwrap(), + } + } + + fn route_header() -> Ipv6RoutingRepr<'static> { + // In 8-octet units, not including the first, i.e., this just needs the + // home address, 128 bits. + let segments_left = 1; + let home_address = Ipv6Address::new(0xfd00, 0, 0, 0, 0, 0, 0, 1); + Ipv6RoutingRepr::Type2 { segments_left, home_address } + } + + fn fragment_header() -> Ipv6FragmentRepr { + Ipv6FragmentRepr { frag_offset: 128, more_frags: false, ident: 0x17 } + } + + // Generate a test packet. + // + // This creates a base IPv6 header, and any extension headers with protocols + // defined by `extensions`. There is always a base header, and the ULP is + // always defined to be TCP. `extensions` can be empty. + // + // This returns the byte array of the packet, plus the size of the entire + // header, including extensions. + pub(crate) fn generate_test_packet( + extensions: &[IpProtocol], + ) -> (Vec, usize) { + // Create a chain of headers, starting with the base. Emit them into + // byte arrays, to test parsing. + let mut data = vec![0; BUFFER_LEN]; + let mut header_start = 0; + let mut next_header_pos = 6; + let mut header_end = Ipv6::MINIMUM_LENGTH; + let mut buf = &mut data[header_start..]; + + // The base header. The payload length is always the same, but the base + // protocol may be updated. + let base = base_header(); + let mut packet = Ipv6Packet::new_checked(&mut buf).unwrap(); + base.emit(&mut packet); + + if extensions.is_empty() { + // No extensions at all, just base header with a TCP ULP + return (buf.to_vec(), Ipv6::MINIMUM_LENGTH); + } + + for extension in extensions { + // First, update the _previous_ next_header with the type of this + // extension header. They form a linked-list. We do this first, so + // that in the case of the first extension header, we're rewriting + // the `next_header` value in the base header. + buf[next_header_pos] = u8::from(*extension); + + // For every extension header, the `next_header` is the first octet. + // That is, the base header is the only one where it's a different + // position. + next_header_pos = 0; + + // Grab the remaining packet buffer, from the end of the previous + // header. This is where we'll start inserting the current extension + // header. + buf = &mut data[header_end..]; + + // Insert the bytes of each extension header, returning the number + // of octets written. + // + // For each extension header, we need to build the top level ExtHeader + // and set length manually: this is (inner_len / 8) := the number of + // 8-byte blocks FOLLOWING the first. + use IpProtocol::*; + let mut ext_packet = Ipv6ExtHeader::new_checked(&mut buf).unwrap(); + ext_packet.set_next_header(IpProtocol::Tcp); + // Temporarily set high enough to give us enough bytes to emit into. + // XXX: propose a joint emit + set_len for smoltcp. + ext_packet.set_header_len(3); + let len = 2 + match extension { + HopByHop => { + let hbh = hop_by_hop_header(); + let mut hbh_packet = Ipv6HopByHopHeader::new_checked( + ext_packet.payload_mut(), + ) + .unwrap(); + hbh.emit(&mut hbh_packet); + hbh.buffer_len() + } + Ipv6Frag => { + let frag = fragment_header(); + let mut frag_packet = Ipv6FragmentHeader::new_checked( + ext_packet.payload_mut(), + ) + .unwrap(); + fragment_header().emit(&mut frag_packet); + frag.buffer_len() + } + Ipv6Route => { + let route = route_header(); + let mut route_packet = Ipv6RoutingHeader::new_checked( + ext_packet.payload_mut(), + ) + .unwrap(); + route.emit(&mut route_packet); + route.buffer_len() + } + Unknown(x) if x == &DDM_HEADER_ID => { + // TODO: actually build DDM ID + Timestamp values here. + // for now we just emit an empty header here. + 14 + } + _ => unimplemented!( + "Extension header {:#?} unsupported", + extension + ), + }; + ext_packet.set_header_len(match extension { + Ipv6Frag => 0, + _ => u8::try_from((len - 8) / 8).unwrap(), + }); + + // Move the position markers to the new header. + header_start = header_end; + header_end += len; + } + + // Set the last header to point to the ULP + data[header_start] = u8::from(IpProtocol::Tcp); + + (data, header_end) + } + + // Test every permuation of the supported extension headers, verifying the + // computed lengths of: + // + // - Payload length + // - ULP length + // - Extension header length + // - Full header length + #[test] + fn test_extension_header_lengths_ok() { + for n_extensions in 0..SUPPORTED_EXTENSIONS.len() { + for extensions in + SUPPORTED_EXTENSIONS.into_iter().permutations(n_extensions) + { + let (buf, pos) = generate_test_packet(extensions.as_slice()); + let (header, ..) = ValidIpv6::parse(&buf[..]).unwrap(); + assert_all_lengths_ok(&header, pos); + } + } + } + + fn assert_all_lengths_ok( + header: &ValidIpv6, + header_end: usize, + ) { + assert_eq!( + header.packet_length() as usize, + header_end, + "Header length does not include all extension headers" + ); + assert_eq!( + header.payload_len() as usize, + PAYLOAD_LEN, + "Payload length does not include all extension headers", + ); + assert_eq!( + header.1.packet_length(), + header_end - Ipv6::MINIMUM_LENGTH, + "Extension header size is incorrect", + ); + assert_eq!( + header.ulp_len(), + PAYLOAD_LEN - header.ext_len(), + "ULP length is not correct" + ); + } + + #[test] + fn test_ipv6_addr_match_exact() { + let addr: Ipv6Addr = "fd00::1".parse().unwrap(); + assert!(addr.match_exact(&addr)); + assert!(!addr.match_exact(&("fd00::2".parse().unwrap()))); + } + + #[test] + fn test_ipv6_cidr_match_prefix() { + let cidr: Ipv6Cidr = "fd00::1/16".parse().unwrap(); + let addr: Ipv6Addr = "fd00::1".parse().unwrap(); + assert!(addr.match_prefix(&cidr)); + + let addr: Ipv6Addr = "fd00::2".parse().unwrap(); + assert!(addr.match_prefix(&cidr)); + + let addr: Ipv6Addr = "fd01::1".parse().unwrap(); + assert!(!addr.match_prefix(&cidr)); + + let addr: Ipv6Addr = "fd01::2".parse().unwrap(); + assert!(!addr.match_prefix(&cidr)); + } + + #[test] + fn emit() { + let ip = Ipv6 { + source: Ipv6Addr::from_const([ + 0xFE80, 0x0000, 0x0000, 0x0000, 0xBAF8, 0x53FF, 0xFEAF, 0x537D, + ]), + destination: Ipv6Addr::from_const([ + 0xFE80, 0x000, 0x0000, 0x0000, 0x56BE, 0xF7FF, 0xFE0B, 0x09EC, + ]), + next_header: IngotIpProtocol::ICMP_V6, + hop_limit: 255, + payload_len: 32, + ..Default::default() + }; + + let len = ip.packet_length(); + let pkt = ip.emit_vec(); + assert_eq!(len, pkt.len()); + + #[rustfmt::skip] + let expected_bytes = [ + // version + class + label + 0x60, 0x00, 0x00, 0x00, + // payload len + 0x00, 0x20, + // next header + hop limit + 0x3A, 0xFF, + // source address + 0xFE, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xBA, 0xF8, 0x53, 0xFF, 0xFE, 0xAF, 0x53, 0x7D, + // dest address + 0xFE, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x56, 0xBE, 0xF7, 0xFF, 0xFE, 0x0B, 0x09, 0xEC, + ]; + assert_eq!(&expected_bytes, &pkt[..]); + } + + #[test] + fn test_set_total_len() { + // Create a packet with one extension header. + let (mut buf, _) = generate_test_packet(&[IpProtocol::Ipv6Frag]); + let (mut header, ..) = ValidIpv6::parse(&mut buf[..]).unwrap(); + + // Set the total length to 128. + // + // The Payload Length field contains the length of both the extension + // headers and the actual ULP. Because we have the Fragmentation header, + // which is a fixed 8-octet thing, this should result in a Payload + // Length of 128 - Ipv6Hdr::BASE_SIZE = 78. + const NEW_SIZE: usize = 128; + header.set_ulp_len(NEW_SIZE); + assert_eq!(header.ulp_len(), NEW_SIZE); + assert_eq!(header.packet_length(), Ipv6::MINIMUM_LENGTH + 8); + assert_eq!(header.payload_len() as usize, NEW_SIZE + 8); + } + + #[test] + fn bad_ipv6_version_caught() { + // This packet was produced due to prior sidecar testing, + // and put 4B between Eth and IPv6. This should fail to + // parse 0x00 as a v6 version. + #[rustfmt::skip] + let buf: &[u8] = &[ + // Garbage + 0x00, 0xc8, 0x08, 0x00, + // IPv6 + 0x60, 0x00, 0x00, 0x00, 0x02, 0x27, 0x11, 0xfe, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xfd, 0x00, 0x11, 0x22, 0x33, 0x44, 0x01, 0x11, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x17, 0xc1, 0x17, 0xc1, + 0x02, 0x27, 0xcf, 0x4e, 0x01, 0x00, 0x65, 0x58, 0x00, 0x00, 0x64, + 0x00, 0x01, 0x29, 0x00, 0x00, 0xa8, 0x40, 0x25, 0xff, 0xe8, 0x5f, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x81, 0x00, 0x45, 0x00, 0x02, + 0x05, 0xe0, 0x80, 0x40, 0x00, 0x37, 0x06, 0x1a, 0x9f, 0xc6, 0xd3, + 0x7a, 0x40, 0x2d, 0x9a, 0xd8, 0x25, 0xa1, 0x22, 0x01, 0xbb, 0xad, + 0x22, 0x51, 0x93, 0xa5, 0xf8, 0x01, 0x58, 0x80, 0x18, 0x01, 0x26, + 0x02, 0x24, 0x00, 0x00, 0x01, 0x01, 0x08, 0x0a, 0x48, 0xd7, 0x9a, + 0x23, 0x04, 0x31, 0x9f, 0x43, 0x14, 0x03, 0x03, 0x00, 0x01, 0x01, + 0x17, 0x03, 0x03, 0x00, 0x45, 0xf6, 0xcd, 0xe2, 0xc1, 0xe5, 0xa0, + 0x65, 0xa7, 0xfe, 0x29, 0xa8, 0xa2, 0xb0, 0x57, 0x91, 0x7e, 0xac, + 0xc8, 0x34, 0xdd, 0x6b, 0xfa, 0x21, + ]; + + // Parsing this one will fail -- next header is hop-by-hop, which is + // an RFC6564 header -- we don't have (0xc1 * 8) bytes here!! + assert!(ValidIpv6::parse(&buf[..]).is_err()); + + // We can construct this manually via ingot... + let (v6, _rem) = Accessor::read_from_prefix(&buf[..]).unwrap(); + let ip = ValidIpv6(v6, Header::Repr(Default::default())); + assert!(ip.validate(120).is_err()); + } +} diff --git a/lib/opte/src/engine/ip4.rs b/lib/opte/src/engine/ip4.rs deleted file mode 100644 index 889e9910..00000000 --- a/lib/opte/src/engine/ip4.rs +++ /dev/null @@ -1,615 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -// Copyright 2024 Oxide Computer Company - -//! IPv4 headers. - -use super::checksum::Checksum; -use super::checksum::HeaderChecksum; -use super::headers::ModifyAction; -use super::headers::PushAction; -use super::headers::RawHeader; -use super::packet::PacketReadMut; -use super::packet::ReadErr; -use super::predicate::MatchExact; -use super::predicate::MatchExactVal; -use super::predicate::MatchPrefix; -use super::predicate::MatchPrefixVal; -use super::predicate::MatchRangeVal; -use crate::d_error::DError; -use alloc::string::String; -use core::fmt; -use core::fmt::Debug; -use core::fmt::Display; -use core::num::ParseIntError; -use core::result; -pub use opte_api::Ipv4Addr; -pub use opte_api::Ipv4Cidr; -pub use opte_api::Ipv4PrefixLen; -pub use opte_api::Protocol; -use serde::Deserialize; -use serde::Serialize; -use zerocopy::AsBytes; -use zerocopy::FromBytes; -use zerocopy::FromZeroes; -use zerocopy::Ref; -use zerocopy::Unaligned; - -pub const IPV4_HDR_LEN_MASK: u8 = 0x0F; -pub const IPV4_HDR_VER_MASK: u8 = 0xF0; -pub const IPV4_HDR_VER_SHIFT: u8 = 4; -pub const IPV4_VERSION: u8 = 4; - -pub const DEF_ROUTE: &str = "0.0.0.0/0"; - -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum IpError { - BadPrefix(u8), - Ipv4NonPrivateNetwork(Ipv4Addr), - MalformedCidr(String), - MalformedInt, - MalformedIp(String), - MalformedPrefix(String), - Other(String), -} - -impl From for IpError { - fn from(_err: ParseIntError) -> Self { - IpError::MalformedInt - } -} - -impl From for IpError { - fn from(err: String) -> Self { - IpError::Other(err) - } -} - -impl Display for IpError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - use IpError::*; - - match self { - BadPrefix(prefix) => { - write!(f, "bad prefix: {}", prefix) - } - - Ipv4NonPrivateNetwork(addr) => { - write!(f, "non-private network: {}", addr) - } - - MalformedCidr(cidr) => { - write!(f, "malformed CIDR: {}", cidr) - } - - MalformedInt => { - write!(f, "malformed integer") - } - - MalformedIp(ip) => { - write!(f, "malformed IP: {}", ip) - } - - MalformedPrefix(prefix) => { - write!(f, "malformed prefix: {}", prefix) - } - - Other(msg) => { - write!(f, "{}", msg) - } - } - } -} - -impl From for String { - fn from(err: IpError) -> Self { - format!("{}", err) - } -} - -impl MatchPrefixVal for Ipv4Cidr {} - -#[test] -fn cidr_match() { - let ip1 = "192.168.2.22".parse::().unwrap(); - let cidr1 = "192.168.2.0/24".parse().unwrap(); - assert!(ip1.match_prefix(&cidr1)); - - let ip2 = "10.7.7.7".parse::().unwrap(); - let cidr2 = "10.0.0.0/8".parse().unwrap(); - assert!(ip2.match_prefix(&cidr2)); - - let ip3 = "52.10.128.69".parse::().unwrap(); - let cidr3 = DEF_ROUTE.parse().unwrap(); - assert!(ip3.match_prefix(&cidr3)); -} - -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub struct Ipv4CidrPrefix { - val: u8, -} - -impl Ipv4CidrPrefix { - pub fn new(net_prefix: u8) -> result::Result { - if net_prefix > 32 { - return Err(IpError::BadPrefix(net_prefix)); - } - - Ok(Ipv4CidrPrefix { val: net_prefix }) - } -} - -impl MatchExactVal for Ipv4Addr {} -impl MatchRangeVal for Ipv4Addr {} - -impl MatchExact for Ipv4Addr { - fn match_exact(&self, val: &Ipv4Addr) -> bool { - *self == *val - } -} - -impl MatchPrefix for Ipv4Addr { - fn match_prefix(&self, prefix: &Ipv4Cidr) -> bool { - prefix.is_member(*self) - } -} - -#[test] -fn match_check() { - let ip = "192.168.2.11".parse::().unwrap(); - assert!(ip.match_exact(&ip)); - assert!(ip.match_prefix(&"192.168.2.0/24".parse::().unwrap())); -} - -impl MatchExactVal for Protocol {} - -impl MatchExact for Protocol { - fn match_exact(&self, val: &Protocol) -> bool { - *self == *val - } -} - -#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)] -pub struct Ipv4Meta { - pub src: Ipv4Addr, - pub dst: Ipv4Addr, - pub proto: Protocol, - pub ttl: u8, - pub ident: u16, - pub hdr_len: u16, - pub total_len: u16, - pub csum: [u8; 2], -} - -impl Default for Ipv4Meta { - fn default() -> Self { - Self { - src: Ipv4Addr::ANY_ADDR, - dst: Ipv4Addr::ANY_ADDR, - proto: Protocol::Unknown(255), - ttl: 64, - ident: 0, - hdr_len: Ipv4Hdr::BASE_SIZE as u16, - total_len: 0, - csum: [0; 2], - } - } -} - -impl Ipv4Meta { - pub fn compute_hdr_csum(&mut self) { - let mut hdr = [0; 20]; - self.csum = [0; 2]; - self.emit(&mut hdr); - let csum = Checksum::compute(&hdr); - self.csum = HeaderChecksum::from(csum).bytes(); - } - - pub fn compute_ulp_csum( - &self, - opt: UlpCsumOpt, - ulp_hdr: &[u8], - body: &[u8], - ) -> Checksum { - match opt { - UlpCsumOpt::Partial => todo!("implement partial csum"), - UlpCsumOpt::Full => { - let mut csum = self.pseudo_csum(); - csum.add_bytes(ulp_hdr); - csum.add_bytes(body); - csum - } - } - } - - #[inline] - pub fn emit(&self, dst: &mut [u8]) { - // The raw header relies on the slice being the exactly length. - debug_assert_eq!(dst.len(), Ipv4Hdr::BASE_SIZE); - let mut raw = Ipv4HdrRaw::new_mut(dst).unwrap(); - raw.write(Ipv4HdrRaw::from(self)); - } - - /// Return the length of the header needed to emit the metadata. - pub fn hdr_len(&self) -> usize { - Ipv4Hdr::BASE_SIZE - } - - /// Populate `bytes` with the pseudo header bytes. - pub fn pseudo_bytes(&self, bytes: &mut [u8; 12]) { - bytes[0..4].copy_from_slice(&self.src.bytes()); - bytes[4..8].copy_from_slice(&self.dst.bytes()); - let ulp_len = self.total_len - self.hdr_len; - let len_bytes = ulp_len.to_be_bytes(); - bytes[8..12].copy_from_slice(&[ - 0, - u8::from(self.proto), - len_bytes[0], - len_bytes[1], - ]); - } - - /// Return a [`Checksum`] of the pseudo header. - pub fn pseudo_csum(&self) -> Checksum { - let mut pseudo_bytes = [0u8; 12]; - self.pseudo_bytes(&mut pseudo_bytes); - Checksum::compute(&pseudo_bytes) - } -} - -impl<'a> From<&Ipv4Hdr<'a>> for Ipv4Meta { - fn from(ip4: &Ipv4Hdr) -> Self { - let raw = ip4.bytes.read(); - - let hdr_len = u16::from((raw.ver_hdr_len & IPV4_HDR_LEN_MASK) * 4); - - Self { - src: Ipv4Addr::from(raw.src), - dst: Ipv4Addr::from(raw.dst), - proto: Protocol::from(raw.proto), - ttl: raw.ttl, - ident: u16::from_be_bytes(raw.ident), - hdr_len, - total_len: u16::from_be_bytes(raw.total_len), - csum: raw.csum, - } - } -} - -#[derive( - Clone, Copy, Debug, Deserialize, Eq, Ord, PartialEq, PartialOrd, Serialize, -)] -pub struct Ipv4Push { - pub src: Ipv4Addr, - pub dst: Ipv4Addr, - pub proto: Protocol, -} - -impl PushAction for Ipv4Push { - fn push(&self) -> Ipv4Meta { - Ipv4Meta { - src: self.src, - dst: self.dst, - proto: self.proto, - ..Default::default() - } - } -} - -#[derive(Clone, Debug, Default, Deserialize, Serialize)] -pub struct Ipv4Mod { - pub src: Option, - pub dst: Option, - pub proto: Option, -} - -impl ModifyAction for Ipv4Mod { - fn modify(&self, meta: &mut Ipv4Meta) { - if let Some(src) = self.src { - meta.src = src; - } - - if let Some(dst) = self.dst { - meta.dst = dst; - } - - if let Some(proto) = self.proto { - meta.proto = proto; - } - } -} - -#[derive(Debug)] -pub struct Ipv4Hdr<'a> { - bytes: Ref<&'a mut [u8], Ipv4HdrRaw>, -} - -impl<'a> Ipv4Hdr<'a> { - pub const BASE_SIZE: usize = Ipv4HdrRaw::SIZE; - pub const CSUM_BEGIN: usize = 10; - pub const CSUM_END: usize = 12; - - #[inline] - pub fn csum(&self) -> [u8; 2] { - self.bytes.csum - } - - #[inline] - pub fn dst(&self) -> Ipv4Addr { - Ipv4Addr::from(self.bytes.dst) - } - - /// Return the header length, in bytes. - #[inline] - pub fn hdr_len(&self) -> u16 { - u16::from((self.bytes.ver_hdr_len & IPV4_HDR_LEN_MASK) * 4) - } - - #[inline] - pub fn ident(&self) -> u16 { - u16::from_be_bytes(self.bytes.ident) - } - - pub fn parse<'b, R>(rdr: &'b mut R) -> Result - where - R: PacketReadMut<'a>, - { - let src = rdr.slice_mut(Ipv4HdrRaw::SIZE)?; - let ip = Self { bytes: Ipv4HdrRaw::new_mut(src)? }; - - match ip.version() { - 4 => {} - vsn => return Err(Ipv4HdrError::BadVersion { vsn }), - } - - let hdr_len = ip.hdr_len(); - - if (hdr_len as usize) < Ipv4HdrRaw::SIZE { - return Err(Ipv4HdrError::HeaderTruncated { hdr_len }); - } - - if ip.total_len() < hdr_len { - return Err(Ipv4HdrError::BadTotalLen { - total_len: ip.total_len(), - }); - } - - // TODO: actually capture and re-emit ipv4 options. - // before, they were accidentally *becoming* the ULP. - // now, we're at least skipping them. - let remaining_bytes = (hdr_len as usize) - Ipv4HdrRaw::SIZE; - rdr.seek(remaining_bytes) - .map_err(|_| Ipv4HdrError::HeaderTruncated { hdr_len })?; - - let _proto = Protocol::from(ip.bytes.proto); - - Ok(ip) - } - - /// Return the [`Protocol`]. - #[inline] - pub fn proto(&self) -> Protocol { - // Unwrap: We verified the proto is good upon parsing. - Protocol::from(self.bytes.proto) - } - - /// Populate `bytes` with the pseudo header bytes. - pub fn pseudo_bytes(&self, bytes: &mut [u8; 12]) { - bytes[0..4].copy_from_slice(&self.bytes.src); - bytes[4..8].copy_from_slice(&self.bytes.dst); - let len_bytes = self.ulp_len().to_be_bytes(); - bytes[8..12].copy_from_slice(&[ - 0, - self.bytes.proto, - len_bytes[0], - len_bytes[1], - ]); - } - - /// Return a [`Checksum`] of the pseudo header. - pub fn pseudo_csum(&self) -> Checksum { - let mut pseudo_bytes = [0u8; 12]; - self.pseudo_bytes(&mut pseudo_bytes); - Checksum::compute(&pseudo_bytes) - } - - #[inline] - pub fn set_csum(&mut self, csum: [u8; 2]) { - self.bytes.csum = csum; - } - - /// Set the `Total Length` field. - #[inline] - pub fn set_total_len(&mut self, len: u16) { - self.bytes.total_len = len.to_be_bytes() - } - - /// Return the source address. - #[inline] - pub fn src(&self) -> Ipv4Addr { - Ipv4Addr::from(self.bytes.src) - } - - /// Return the value of the `Total Length` field. - #[inline] - pub fn total_len(&self) -> u16 { - u16::from_be_bytes(self.bytes.total_len) - } - - #[inline] - pub fn ttl(&self) -> u8 { - self.bytes.ttl - } - - /// Return the length of the Upper Layer Protocol (ULP) portion of - /// the packet. - #[inline] - pub fn ulp_len(&self) -> u16 { - self.total_len() - self.hdr_len() - } - - /// Return the reported IP version field from the packet. - #[inline] - pub fn version(&self) -> u8 { - self.bytes.ver_hdr_len >> IPV4_HDR_VER_SHIFT - } -} - -/// Options for computing a ULP checksum. -#[derive(Clone, Copy, Debug)] -pub enum UlpCsumOpt { - /// Compute a partial checksum, using only the pseudo-header. - /// - /// This is intended in situations in which computing the checksum of the - /// body itself can be offloaded to hardware. - Partial, - /// Compute the full checksum, including the pseudo-header, ULP header and - /// the ULP body. - Full, -} - -#[derive(Clone, Copy, Debug, Eq, PartialEq, DError)] -#[derror(leaf_data = Ipv4HdrError::derror_data)] -pub enum Ipv4HdrError { - BadTotalLen { total_len: u16 }, - BadVersion { vsn: u8 }, - HeaderTruncated { hdr_len: u16 }, - ReadError(ReadErr), - UnexpectedProtocol { protocol: u8 }, -} - -impl From for Ipv4HdrError { - fn from(error: ReadErr) -> Self { - Ipv4HdrError::ReadError(error) - } -} - -impl Ipv4HdrError { - fn derror_data(&self, data: &mut [u64]) { - data[0] = match self { - Self::BadTotalLen { total_len } => *total_len as u64, - Self::BadVersion { vsn } => *vsn as u64, - Self::HeaderTruncated { hdr_len } => *hdr_len as u64, - Self::UnexpectedProtocol { protocol } => *protocol as u64, - _ => 0, - } - } -} - -/// Note: For now we keep this unaligned to be safe. -#[repr(C)] -#[derive(Clone, Debug, FromBytes, AsBytes, FromZeroes, Unaligned)] -pub struct Ipv4HdrRaw { - pub ver_hdr_len: u8, - pub dscp_ecn: u8, - pub total_len: [u8; 2], - pub ident: [u8; 2], - pub frag_and_flags: [u8; 2], - pub ttl: u8, - pub proto: u8, - pub csum: [u8; 2], - pub src: [u8; 4], - pub dst: [u8; 4], -} - -impl<'a> RawHeader<'a> for Ipv4HdrRaw { - #[inline] - fn new_mut(src: &mut [u8]) -> Result, ReadErr> { - debug_assert_eq!(src.len(), Self::SIZE); - let hdr = match Ref::new(src) { - Some(hdr) => hdr, - None => return Err(ReadErr::BadLayout), - }; - Ok(hdr) - } -} - -impl Default for Ipv4HdrRaw { - fn default() -> Self { - Ipv4HdrRaw { - ver_hdr_len: 0x45, - dscp_ecn: 0x0, - total_len: [0x0; 2], - ident: [0x0; 2], - frag_and_flags: [0x40, 0x0], - ttl: 64, - proto: u8::from(Protocol::Unknown(255)), - csum: [0x0; 2], - src: [0x0; 4], - dst: [0x0; 4], - } - } -} - -impl From<&Ipv4Meta> for Ipv4HdrRaw { - #[inline] - fn from(meta: &Ipv4Meta) -> Self { - Ipv4HdrRaw { - ver_hdr_len: 0x45, - dscp_ecn: 0x0, - total_len: meta.total_len.to_be_bytes(), - ident: meta.ident.to_be_bytes(), - frag_and_flags: [0x40, 0x0], - ttl: meta.ttl, - proto: u8::from(meta.proto), - csum: meta.csum, - src: meta.src.bytes(), - dst: meta.dst.bytes(), - } - } -} - -#[cfg(test)] -mod test { - use super::*; - use crate::engine::packet::Packet; - - #[test] - fn emit() { - let ip = Ipv4Meta { - src: Ipv4Addr::from([10, 0, 0, 54]), - dst: Ipv4Addr::from([52, 10, 128, 69]), - proto: Protocol::TCP, - ttl: 64, - ident: 2662, - hdr_len: 20, - total_len: 60, - csum: [0; 2], - }; - - let len = ip.hdr_len(); - assert_eq!(20, len); - - let mut pkt = Packet::alloc_and_expand(len); - let mut wtr = pkt.seg0_wtr(); - ip.emit(wtr.slice_mut(ip.hdr_len()).unwrap()); - assert_eq!(len, pkt.len()); - - #[rustfmt::skip] - let expected_bytes = vec![ - // version + IHL - 0x45, - // DSCP + ECN - 0x00, - // total length - 0x00, 0x3C, - // ident - 0x0A, 0x66, - // flags + frag offset - 0x40, 0x00, - // TTL - 0x40, - // protocol - 0x06, - // checksum - 0x00, 0x00, - // source - 0x0A, 0x00, 0x00, 0x36, - // dest - 0x34, 0x0A, 0x80, 0x45, - ]; - assert_eq!(&expected_bytes, pkt.seg_bytes(0)); - } -} diff --git a/lib/opte/src/engine/ip6.rs b/lib/opte/src/engine/ip6.rs deleted file mode 100644 index 2c0d4ad1..00000000 --- a/lib/opte/src/engine/ip6.rs +++ /dev/null @@ -1,923 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -// Copyright 2024 Oxide Computer Company - -//! IPv6 headers. - -use super::checksum::Checksum; -use super::headers::ModifyAction; -use super::headers::PushAction; -use super::ip4::Protocol; -pub use super::ip4::UlpCsumOpt; -use super::packet::PacketReadMut; -use super::packet::ReadErr; -use crate::d_error::DError; -use crate::engine::predicate::MatchExact; -use crate::engine::predicate::MatchExactVal; -use crate::engine::predicate::MatchPrefix; -use crate::engine::predicate::MatchPrefixVal; -pub use opte_api::Ipv6Addr; -pub use opte_api::Ipv6Cidr; -use serde::Deserialize; -use serde::Serialize; -use smoltcp::wire::IpProtocol; -use smoltcp::wire::Ipv6ExtHeader; -use smoltcp::wire::Ipv6FragmentHeader; -use smoltcp::wire::Ipv6HopByHopHeader; -use smoltcp::wire::Ipv6Packet; -use smoltcp::wire::Ipv6RoutingHeader; - -pub const IPV6_HDR_VSN_MASK: u8 = 0xF0; -pub const IPV6_HDR_VSN_SHIFT: u8 = 4; -pub const IPV6_VERSION: u8 = 6; -pub const DDM_HEADER_ID: u8 = 0xFE; -/// Current maximum bytes for extension headers which fit -/// in IPv6Meta. -/// -/// TODO: refactor so as *not* to need this. -pub const IPV6_MAX_EXT_LEN: usize = 64; - -impl MatchExactVal for Ipv6Addr {} -impl MatchPrefixVal for Ipv6Cidr {} - -impl MatchExact for Ipv6Addr { - fn match_exact(&self, val: &Ipv6Addr) -> bool { - *self == *val - } -} - -impl MatchPrefix for Ipv6Addr { - fn match_prefix(&self, prefix: &Ipv6Cidr) -> bool { - prefix.is_member(*self) - } -} - -#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)] -pub struct Ipv6Meta { - pub src: Ipv6Addr, - pub dst: Ipv6Addr, - pub next_hdr: IpProtocol, - pub proto: Protocol, - pub hop_limit: u8, - pub pay_len: u16, - - // For now we hold extensions as raw bytes. Ideally, each extension - // we support should get its own meta-like type and be declared - // optional. - // - // ``` - // pub hbh: Option, - // pub routing: Option, - // pub frag: Option, - // ... - // ``` - pub ext: Option<[u8; 64]>, - // NOTE: We need `ext_len` explicitly, because `ext` is a fixed-size array. - pub ext_len: usize, -} - -impl Default for Ipv6Meta { - fn default() -> Self { - Self { - src: Ipv6Addr::from([0; 16]), - dst: Ipv6Addr::from([0; 16]), - next_hdr: IpProtocol::Unknown(255), - proto: Protocol::Unknown(255), - hop_limit: 128, - pay_len: 0, - ext: None, - ext_len: 0, - } - } -} - -impl Ipv6Meta { - /// Compute the [`Checksum`] of the contained ULP datagram. - /// - /// This computes the checksum of the pseudo-header, and adds to it the sum - /// of the ULP header and body. - pub fn compute_ulp_csum( - &self, - opt: UlpCsumOpt, - ulp_hdr: &[u8], - body: &[u8], - ) -> Checksum { - match opt { - UlpCsumOpt::Partial => todo!("implement partial csum"), - UlpCsumOpt::Full => { - let mut csum = self.pseudo_csum(); - csum.add_bytes(ulp_hdr); - csum.add_bytes(body); - csum - } - } - } - - #[inline] - pub fn emit(&self, dst: &mut [u8]) { - debug_assert_eq!(dst.len(), self.hdr_len()); - let base = &mut dst[0..Ipv6Hdr::BASE_SIZE]; - let mut pkt = Ipv6Packet::new_unchecked(base); - pkt.set_version(6); - // For now assume no traffic class or flow label. - pkt.set_traffic_class(0); - pkt.set_flow_label(0); - pkt.set_payload_len(self.pay_len); - pkt.set_next_header(self.next_hdr); - pkt.set_hop_limit(self.hop_limit); - pkt.set_src_addr(self.src.into()); - pkt.set_dst_addr(self.dst.into()); - - if let Some(ext_bytes) = self.ext { - dst[Ipv6Hdr::BASE_SIZE..] - .copy_from_slice(&ext_bytes[0..self.ext_len]); - } - } - - /// Return the length of the IPv6 header, including the base header and - /// extension headers. - pub fn hdr_len(&self) -> usize { - Ipv6Hdr::BASE_SIZE + self.ext_len - } - - /// Return the pseudo header bytes. - pub fn pseudo_bytes(&self, bytes: &mut [u8; 40]) { - bytes[0..16].copy_from_slice(&self.src.bytes()); - bytes[16..32].copy_from_slice(&self.dst.bytes()); - bytes[32..36].copy_from_slice(&((self.pay_len as u32).to_be_bytes())); - bytes[36..40].copy_from_slice(&[0u8, 0u8, 0u8, u8::from(self.proto)]); - } - - /// Return a [`Checksum`] of the pseudo header. - pub fn pseudo_csum(&self) -> Checksum { - let mut bytes = [0u8; 40]; - self.pseudo_bytes(&mut bytes); - Checksum::compute(&bytes) - } - - /// Return the total length of the packet, including the base header, any - /// extension headers, and the payload itself. - pub fn total_len(&self) -> u16 { - Ipv6Hdr::BASE_SIZE as u16 + self.pay_len - } -} - -impl<'a> From<&Ipv6Hdr<'a>> for Ipv6Meta { - fn from(ip6: &Ipv6Hdr) -> Self { - let (ext, ext_len) = if let Some((ext_bytes, _proto_off)) = &ip6.ext { - let ext_len = ext_bytes.len(); - assert!(ext_len <= 64); - let mut ext = [0; 64]; - ext[0..ext_len].copy_from_slice(ext_bytes); - (Some(ext), ext_len) - } else { - (None, 0) - }; - - Ipv6Meta { - src: ip6.src(), - dst: ip6.dst(), - proto: ip6.proto(), - next_hdr: ip6.next_hdr(), - hop_limit: ip6.hop_limit(), - pay_len: ip6.pay_len() as u16, - ext, - ext_len, - } - } -} - -#[derive( - Clone, Copy, Debug, Deserialize, Eq, Ord, PartialEq, PartialOrd, Serialize, -)] -pub struct Ipv6Push { - pub src: Ipv6Addr, - pub dst: Ipv6Addr, - pub proto: Protocol, -} - -impl PushAction for Ipv6Push { - fn push(&self) -> Ipv6Meta { - Ipv6Meta { - src: self.src, - dst: self.dst, - proto: self.proto, - // For now you cannot push extension headers. - next_hdr: IpProtocol::from(self.proto), - ..Default::default() - } - } -} - -#[derive(Clone, Debug, Default, Deserialize, Serialize)] -pub struct Ipv6Mod { - pub src: Option, - pub dst: Option, - pub proto: Option, -} - -impl ModifyAction for Ipv6Mod { - fn modify(&self, meta: &mut Ipv6Meta) { - if let Some(src) = self.src { - meta.src = src; - } - if let Some(dst) = self.dst { - meta.dst = dst; - } - if let Some(proto) = self.proto { - meta.proto = proto; - } - } -} - -/// An IPv6 packet header. -#[derive(Debug)] -pub struct Ipv6Hdr<'a> { - base: Ipv6Packet<&'a mut [u8]>, - // The proto reference points to the last next_header value (aka - // the upper-layer protocol number). - // proto: &'a mut u8, - /// Byteslice verified to be smaller than `IPV6_MAX_EXT_LEN`. - /// (extensions bytes, protocol field offset). - ext: Option<(&'a mut [u8], usize)>, -} - -impl<'a> Ipv6Hdr<'a> { - /// The size of the fixed IPv6 header. - /// - /// IPv6 headers are variable length, including a fixed, 40-byte portion as - /// well as a variable number of extension headers, each with potentially - /// different sizes. This size describes the fixed portion. - pub const BASE_SIZE: usize = 40; - - /// The offset of the Protocol (Next Header) field in the base header. - pub const BASE_HDR_PROTO_OFFSET: usize = 6; - - /// Return the destination address. - pub fn dst(&self) -> Ipv6Addr { - Ipv6Addr::from(self.base.dst_addr()) - } - - /// Return the length of the extensions headers, or 0 if there are - /// none. - fn ext_len(&self) -> usize { - match &self.ext { - None => 0, - Some((ext_bytes, _)) => ext_bytes.len(), - } - } - - /// Return the length of the header portion of the packet, including - /// extension headers - pub fn hdr_len(&self) -> usize { - Self::BASE_SIZE + self.ext_len() - } - - /// Return the hop limit value. - pub fn hop_limit(&self) -> u8 { - self.base.hop_limit() - } - - fn next_hdr(&self) -> IpProtocol { - self.base.next_header() - } - - /// Parse an IPv6 packet out of a reader, if possible. - pub fn parse<'b>( - rdr: &'b mut impl PacketReadMut<'a>, - ) -> Result { - // Parse the base IPv6 header. - let buf = rdr.slice_mut(Self::BASE_SIZE)?; - let base = Ipv6Packet::new_unchecked(buf); - match base.version() { - 6 => {} - vsn => return Err(Ipv6HdrError::BadVersion { vsn }), - } - - // Parse any extension headers. - // - // At this point, we don't need any information out of the headers other - // than their length (to determine the boundary with the ULP). We'll - // verify that the headers are supported, but otherwise maintain only a - // byte array with their contents. - let mut ext_len = 0; - let mut next_header = base.next_header(); - - // Either we have no extensions or we are parsing zero'd - // header data for the purpose of emitting. - if is_ulp_protocol(next_header) { - return Ok(Self { base, ext: None }); - } - - let mut proto_offset: usize = 0; - while !is_ulp_protocol(next_header) { - let n_bytes = match V6ExtClass::from(next_header) { - V6ExtClass::Rfc6564 => { - let buf = rdr.slice_mut(rdr.seg_left())?; - let mut header = Ipv6ExtHeader::new_checked(buf)?; - - // verify carried protocol if possible. - match next_header { - IpProtocol::HopByHop => { - _ = Ipv6HopByHopHeader::new_checked( - header.payload_mut(), - )? - } - IpProtocol::Ipv6Route => { - _ = Ipv6RoutingHeader::new_checked( - header.payload_mut(), - )? - } - _ => {} - } - - let n_bytes = 8 * (usize::from(header.header_len()) + 1); - next_header = header.next_header(); - let buf = header.into_inner(); - ext_len += n_bytes; - - // Put back any bytes in the segment not needed - // for this header. - rdr.seek_back(buf.len() - n_bytes)?; - - n_bytes - } - V6ExtClass::Frag => { - // This header's length is fixed. - // - // We'd like to use `size_of::()`, but - // that is not `repr(packed)`, so we'd possibly count - // padding. - const FRAGMENT_HDR_SIZE: usize = 8; - let buf = rdr.slice_mut(FRAGMENT_HDR_SIZE)?; - ext_len += buf.len(); - let mut header = Ipv6ExtHeader::new_checked(buf)?; - _ = Ipv6FragmentHeader::new_checked(header.payload_mut())?; - next_header = header.next_header(); - - FRAGMENT_HDR_SIZE - } - _ => { - return Err(Ipv6HdrError::UnexpectedNextHeader { - next_header: next_header.into(), - }); - } - }; - - if !is_ulp_protocol(next_header) { - proto_offset += n_bytes; - } - } - - // Panic: The protocol is the last value of next header, and since - // we've matched on everything we support in the `try_from` impl, this - // unwrap can't panic. - let _protocol = Protocol::from(next_header); - - if ext_len > IPV6_MAX_EXT_LEN { - return Err(Ipv6HdrError::ExtensionsTooLarge); - } - - // Seek back to the start of the extensions, then take a slice of - // all the options. - rdr.seek_back(ext_len)?; - let ext = Some((rdr.slice_mut(ext_len)?, proto_offset)); - Ok(Self { base, ext }) - } - - /// Return the payload length. - /// - /// This length includes any extension headers along with the - /// body. - pub fn pay_len(&self) -> usize { - usize::from(self.base.payload_len()) - } - - /// Return the Upper Layer Protocol in use. - /// - /// Even when extension headers are in play, this call always - /// returns the ULP. In other words, it always returns the final - /// "Next Header" value at the end of the extension header chain. - pub fn proto(&self) -> Protocol { - // Unwrap: We verified the proto is good upon parsing. - if let Some((bytes, proto_offset)) = &self.ext { - Protocol::from(bytes[*proto_offset]) - } else { - Protocol::from(self.base.next_header()) - } - } - - /// Populate `bytes` with the pseudo header bytes. - pub fn pseudo_bytes(&self, bytes: &mut [u8; 40]) { - bytes[0..16].copy_from_slice(self.base.src_addr().as_bytes()); - bytes[16..32].copy_from_slice(self.base.dst_addr().as_bytes()); - bytes[32..36].copy_from_slice(&(self.pay_len() as u32).to_be_bytes()); - bytes[36..40].copy_from_slice(&[0u8, 0u8, 0u8, u8::from(self.proto())]); - } - - /// Return a [`Checksum`] of the pseudo header. - pub fn pseudo_csum(&self) -> Checksum { - let mut pseudo_bytes = [0u8; 40]; - self.pseudo_bytes(&mut pseudo_bytes); - Checksum::compute(&pseudo_bytes) - } - - /// Set the total length of the packet. - /// - /// There is no "total length" for IPv6; it keeps a payload - /// length. However, this API is useful for having a consistent - /// method for setting lengths when emitting headers. - pub fn set_total_len(&mut self, len: u16) { - // The Payload Length field of the IPv6 header includes the ULP payload - // _and_ the length of any extension headers. - self.base.set_payload_len(len - Self::BASE_SIZE as u16); - } - - /// Return the source address. - pub fn src(&self) -> Ipv6Addr { - Ipv6Addr::from(self.base.src_addr()) - } - - /// Return the total length of the packet, including the base header, any - /// extension headers, and the payload itself. - pub fn total_len(&self) -> usize { - self.pay_len() + Self::BASE_SIZE - } - - /// Return the length of the upper-layer protocol payload. - pub fn ulp_len(&self) -> usize { - self.pay_len() - self.ext_len() - } -} - -fn is_ulp_protocol(proto: IpProtocol) -> bool { - matches!(V6ExtClass::from(proto), V6ExtClass::Ulp) -} - -#[derive(Copy, Clone, Debug, Eq, PartialEq)] -enum V6ExtClass { - Ulp, - Frag, - Rfc6564, - Unknown, -} - -impl From for V6ExtClass { - #[inline] - fn from(value: IpProtocol) -> Self { - use IpProtocol::*; - - match value { - Icmp | Igmp | Tcp | Udp | Icmpv6 => Self::Ulp, - Ipv6Frag => Self::Frag, - HopByHop | Ipv6Route | Ipv6Opts => Self::Rfc6564, - // Also follow RFC6564: - // 135 (RFC6275), 139 (RFC7401), 140 (RFC5533) - Unknown(x) if x == DDM_HEADER_ID => Self::Rfc6564, - _ => Self::Unknown, - } - } -} - -#[derive(Clone, Copy, Debug, Eq, PartialEq, DError)] -#[derror(leaf_data = Ipv6HdrError::derror_data)] -pub enum Ipv6HdrError { - BadVersion { vsn: u8 }, - ReadError(ReadErr), - UnexpectedNextHeader { next_header: u8 }, - Malformed, - ExtensionsTooLarge, -} - -impl Ipv6HdrError { - fn derror_data(&self, data: &mut [u64]) { - data[0] = match self { - Self::BadVersion { vsn } => *vsn as u64, - Self::UnexpectedNextHeader { next_header } => *next_header as u64, - _ => 0, - } - } -} - -impl From for Ipv6HdrError { - fn from(_error: smoltcp::wire::Error) -> Ipv6HdrError { - Ipv6HdrError::Malformed - } -} - -impl From for Ipv6HdrError { - fn from(error: ReadErr) -> Self { - Ipv6HdrError::ReadError(error) - } -} - -#[cfg(test)] -pub(crate) mod test { - use super::*; - use crate::engine::packet::Packet; - use itertools::Itertools; - use smoltcp::wire::IpProtocol; - use smoltcp::wire::Ipv6Address; - use smoltcp::wire::Ipv6FragmentHeader; - use smoltcp::wire::Ipv6FragmentRepr; - use smoltcp::wire::Ipv6HopByHopHeader; - use smoltcp::wire::Ipv6HopByHopRepr; - use smoltcp::wire::Ipv6OptionRepr; - use smoltcp::wire::Ipv6Packet; - use smoltcp::wire::Ipv6Repr; - use smoltcp::wire::Ipv6RoutingHeader; - use smoltcp::wire::Ipv6RoutingRepr; - use std::vec::Vec; - - // Test packet size and payload length - const BUFFER_LEN: usize = 512; - const PAYLOAD_LEN: usize = 512 - Ipv6Hdr::BASE_SIZE; - pub(crate) const SUPPORTED_EXTENSIONS: [IpProtocol; 4] = [ - IpProtocol::HopByHop, - IpProtocol::Ipv6Route, - IpProtocol::Ipv6Frag, - IpProtocol::Unknown(DDM_HEADER_ID), - ]; - - #[test] - fn from_pairs() { - let ip6 = super::Ipv6Addr::from([ - 0x2601, 0x0284, 0x4100, 0xE240, 0x0000, 0x0000, 0xC0A8, 0x01F5, - ]); - - assert_eq!( - ip6.bytes(), - [ - 0x26, 0x01, 0x02, 0x84, 0x41, 0x00, 0xE2, 0x40, 0x00, 0x00, - 0x00, 0x00, 0xC0, 0xA8, 0x01, 0xF5 - ] - ); - } - - fn base_header() -> Ipv6Repr { - Ipv6Repr { - src_addr: Ipv6Address::new(0xfd00, 0, 0, 0, 0, 0, 0, 1), - dst_addr: Ipv6Address::new(0xfd00, 0, 0, 0, 0, 0, 0, 2), - next_header: IpProtocol::Tcp, - payload_len: PAYLOAD_LEN, - hop_limit: 6, - } - } - - fn hop_by_hop_header() -> Ipv6HopByHopRepr<'static> { - // in 8-octet units, not including the first - const OPTION_LEN: usize = 1; - // SmolTCP limits us to 2 max HBH options in its repr. - // Pad to the next multiple of 8, then one more 8-octet unit. - // - Ext header takes 2B - // - PadN(n) takes 2B, then n bytes. - // => 4 + fill - const LEN: usize = 4 + OPTION_LEN * 8; - static OPTIONS: [Ipv6OptionRepr; 1] = - [Ipv6OptionRepr::PadN(LEN as u8); 1]; - Ipv6HopByHopRepr { - options: heapless::Vec::from_slice(&OPTIONS).unwrap(), - } - } - - fn route_header() -> Ipv6RoutingRepr<'static> { - // In 8-octet units, not including the first, i.e., this just needs the - // home address, 128 bits. - let segments_left = 1; - let home_address = Ipv6Address::new(0xfd00, 0, 0, 0, 0, 0, 0, 1); - Ipv6RoutingRepr::Type2 { segments_left, home_address } - } - - fn fragment_header() -> Ipv6FragmentRepr { - Ipv6FragmentRepr { frag_offset: 128, more_frags: false, ident: 0x17 } - } - - // Generate a test packet. - // - // This creates a base IPv6 header, and any extension headers with protocols - // defined by `extensions`. There is always a base header, and the ULP is - // always defined to be TCP. `extensions` can be empty. - // - // This returns the byte array of the packet, plus the size of the entire - // header, including extensions. - pub(crate) fn generate_test_packet( - extensions: &[IpProtocol], - ) -> (Vec, usize) { - // Create a chain of headers, starting with the base. Emit them into - // byte arrays, to test parsing. - let mut data = vec![0; BUFFER_LEN]; - let mut header_start = 0; - let mut next_header_pos = 6; - let mut header_end = Ipv6Hdr::BASE_SIZE; - let mut buf = &mut data[header_start..]; - - // The base header. The payload length is always the same, but the base - // protocol may be updated. - let base = base_header(); - let mut packet = Ipv6Packet::new_checked(&mut buf).unwrap(); - base.emit(&mut packet); - - if extensions.is_empty() { - // No extensions at all, just base header with a TCP ULP - return (buf.to_vec(), Ipv6Hdr::BASE_SIZE); - } - - for extension in extensions { - // First, update the _previous_ next_header with the type of this - // extension header. They form a linked-list. We do this first, so - // that in the case of the first extension header, we're rewriting - // the `next_header` value in the base header. - buf[next_header_pos] = u8::from(*extension); - - // For every extension header, the `next_header` is the first octet. - // That is, the base header is the only one where it's a different - // position. - next_header_pos = 0; - - // Grab the remaining packet buffer, from the end of the previous - // header. This is where we'll start inserting the current extension - // header. - buf = &mut data[header_end..]; - - // Insert the bytes of each extension header, returning the number - // of octets written. - // - // For each extension header, we need to build the top level ExtHeader - // and set length manually: this is (inner_len / 8) := the number of - // 8-byte blocks FOLLOWING the first. - use IpProtocol::*; - let mut ext_packet = Ipv6ExtHeader::new_checked(&mut buf).unwrap(); - ext_packet.set_next_header(IpProtocol::Tcp); - // Temporarily set high enough to give us enough bytes to emit into. - // XXX: propose a joint emit + set_len for smoltcp. - ext_packet.set_header_len(3); - let len = 2 + match extension { - HopByHop => { - let hbh = hop_by_hop_header(); - let mut hbh_packet = Ipv6HopByHopHeader::new_checked( - ext_packet.payload_mut(), - ) - .unwrap(); - hbh.emit(&mut hbh_packet); - hbh.buffer_len() - } - Ipv6Frag => { - let frag = fragment_header(); - let mut frag_packet = Ipv6FragmentHeader::new_checked( - ext_packet.payload_mut(), - ) - .unwrap(); - fragment_header().emit(&mut frag_packet); - frag.buffer_len() - } - Ipv6Route => { - let route = route_header(); - let mut route_packet = Ipv6RoutingHeader::new_checked( - ext_packet.payload_mut(), - ) - .unwrap(); - route.emit(&mut route_packet); - route.buffer_len() - } - Unknown(x) if x == &DDM_HEADER_ID => { - // TODO: actually build DDM ID + Timestamp values here. - // for now we just emit an empty header here. - 14 - } - _ => unimplemented!( - "Extension header {:#?} unsupported", - extension - ), - }; - ext_packet.set_header_len(match V6ExtClass::from(*extension) { - V6ExtClass::Frag => 0, - V6ExtClass::Rfc6564 => u8::try_from((len - 8) / 8).unwrap(), - _ => unreachable!(), - }); - - // Move the position markers to the new header. - header_start = header_end; - header_end += len; - } - - // Set the last header to point to the ULP - data[header_start] = u8::from(IpProtocol::Tcp); - - (data, header_end) - } - - // Test every permuation of the supported extension headers, verifying the - // computed lengths of: - // - // - Payload length - // - ULP length - // - Extension header length - // - Full header length - #[test] - fn test_extension_header_lengths_ok() { - for n_extensions in 0..SUPPORTED_EXTENSIONS.len() { - for extensions in - SUPPORTED_EXTENSIONS.into_iter().permutations(n_extensions) - { - let (buf, pos) = generate_test_packet(extensions.as_slice()); - let mut pkt = Packet::copy(&buf); - let mut reader = pkt.get_rdr_mut(); - let header = Ipv6Hdr::parse(&mut reader).unwrap(); - assert_all_lengths_ok(&header, pos); - } - } - } - - fn assert_all_lengths_ok(header: &Ipv6Hdr, header_end: usize) { - assert_eq!( - header.hdr_len(), - header_end, - "Header length does not include all extension headers" - ); - assert_eq!( - header.pay_len(), - PAYLOAD_LEN, - "Payload length does not include all extension headers", - ); - assert_eq!( - header.ext_len(), - header_end - Ipv6Hdr::BASE_SIZE, - "Extension header size is incorrect", - ); - assert_eq!( - header.ulp_len(), - PAYLOAD_LEN - header.ext_len(), - "ULP length is not correct" - ); - assert_eq!( - header.total_len(), - PAYLOAD_LEN + Ipv6Hdr::BASE_SIZE, - "Total packet length is not correct", - ); - } - - #[test] - fn test_ipv6_addr_match_exact() { - let addr: Ipv6Addr = "fd00::1".parse().unwrap(); - assert!(addr.match_exact(&addr)); - assert!(!addr.match_exact(&("fd00::2".parse().unwrap()))); - } - - #[test] - fn test_ipv6_cidr_match_prefix() { - let cidr: Ipv6Cidr = "fd00::1/16".parse().unwrap(); - let addr: Ipv6Addr = "fd00::1".parse().unwrap(); - assert!(addr.match_prefix(&cidr)); - - let addr: Ipv6Addr = "fd00::2".parse().unwrap(); - assert!(addr.match_prefix(&cidr)); - - let addr: Ipv6Addr = "fd01::1".parse().unwrap(); - assert!(!addr.match_prefix(&cidr)); - - let addr: Ipv6Addr = "fd01::2".parse().unwrap(); - assert!(!addr.match_prefix(&cidr)); - } - - #[test] - fn emit() { - let ip = Ipv6Meta { - src: Ipv6Addr::from_const([ - 0xFE80, 0x0000, 0x0000, 0x0000, 0xBAF8, 0x53FF, 0xFEAF, 0x537D, - ]), - dst: Ipv6Addr::from_const([ - 0xFE80, 0x000, 0x0000, 0x0000, 0x56BE, 0xF7FF, 0xFE0B, 0x09EC, - ]), - proto: Protocol::ICMPv6, - next_hdr: IpProtocol::Icmpv6, - hop_limit: 255, - pay_len: 32, - ext: None, - ext_len: 0, - }; - - let len = ip.hdr_len(); - let mut pkt = Packet::alloc_and_expand(len); - let mut wtr = pkt.seg0_wtr(); - ip.emit(wtr.slice_mut(ip.hdr_len()).unwrap()); - assert_eq!(len, pkt.len()); - - #[rustfmt::skip] - let expected_bytes = [ - // version + class + label - 0x60, 0x00, 0x00, 0x00, - // payload len - 0x00, 0x20, - // next header + hop limit - 0x3A, 0xFF, - // source address - 0xFE, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xBA, 0xF8, 0x53, 0xFF, 0xFE, 0xAF, 0x53, 0x7D, - // dest address - 0xFE, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x56, 0xBE, 0xF7, 0xFF, 0xFE, 0x0B, 0x09, 0xEC, - ]; - assert_eq!(&expected_bytes, pkt.seg_bytes(0)); - } - - #[test] - fn test_set_total_len() { - // Create a packet with one extension header. - let (buf, _) = generate_test_packet(&[IpProtocol::Ipv6Frag]); - let mut pkt = Packet::copy(&buf); - let mut reader = pkt.get_rdr_mut(); - let mut header = Ipv6Hdr::parse(&mut reader).unwrap(); - - // Set the total length to 128. - // - // The Payload Length field contains the length of both the extension - // headers and the actual ULP. Because we have the Fragmentation header, - // which is a fixed 8-octet thing, this should result in a Payload - // Length of 128 - Ipv6Hdr::BASE_SIZE = 78. - const NEW_SIZE: usize = 128; - header.set_total_len(NEW_SIZE as _); - assert_eq!(header.total_len(), NEW_SIZE); - assert_eq!(header.hdr_len(), Ipv6Hdr::BASE_SIZE + 8); - assert_eq!(header.pay_len(), NEW_SIZE - Ipv6Hdr::BASE_SIZE); - } - - #[test] - fn test_ip6_meta_total_len() { - // Create a packet with one extension header. - let (buf, _) = generate_test_packet(&[IpProtocol::Ipv6Frag]); - let mut pkt = Packet::copy(&buf); - let mut reader = pkt.get_rdr_mut(); - let header = Ipv6Hdr::parse(&mut reader).unwrap(); - - // Previously, the `Ipv6Meta::total_len` method double-counted the - // extension header length. Assert we don't do that here. - let meta = Ipv6Meta::from(&header); - assert!(meta.ext.is_some()); - assert_eq!(meta.ext_len, 8); // Fixed size - assert_eq!( - meta.total_len() as usize, - header.hdr_len() + header.ulp_len() - ); - } - - #[test] - fn bad_ipv6_version_caught() { - // This packet was produced due to prior sidecar testing, - // and put 4B between Eth and IPv6. This should fail to - // parse 0x00 as a v6 version. - #[rustfmt::skip] - let buf: &[u8] = &[ - // Garbage - 0x00, 0xc8, 0x08, 0x00, - // IPv6 - 0x60, 0x00, 0x00, 0x00, 0x02, 0x27, 0x11, 0xfe, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0xfd, 0x00, 0x11, 0x22, 0x33, 0x44, 0x01, 0x11, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x17, 0xc1, 0x17, 0xc1, - 0x02, 0x27, 0xcf, 0x4e, 0x01, 0x00, 0x65, 0x58, 0x00, 0x00, 0x64, - 0x00, 0x01, 0x29, 0x00, 0x00, 0xa8, 0x40, 0x25, 0xff, 0xe8, 0x5f, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x81, 0x00, 0x45, 0x00, 0x02, - 0x05, 0xe0, 0x80, 0x40, 0x00, 0x37, 0x06, 0x1a, 0x9f, 0xc6, 0xd3, - 0x7a, 0x40, 0x2d, 0x9a, 0xd8, 0x25, 0xa1, 0x22, 0x01, 0xbb, 0xad, - 0x22, 0x51, 0x93, 0xa5, 0xf8, 0x01, 0x58, 0x80, 0x18, 0x01, 0x26, - 0x02, 0x24, 0x00, 0x00, 0x01, 0x01, 0x08, 0x0a, 0x48, 0xd7, 0x9a, - 0x23, 0x04, 0x31, 0x9f, 0x43, 0x14, 0x03, 0x03, 0x00, 0x01, 0x01, - 0x17, 0x03, 0x03, 0x00, 0x45, 0xf6, 0xcd, 0xe2, 0xc1, 0xe5, 0xa0, - 0x65, 0xa7, 0xfe, 0x29, 0xa8, 0xa2, 0xb0, 0x57, 0x91, 0x7e, 0xac, - 0xc8, 0x34, 0xdd, 0x6b, 0xfa, 0x21, - ]; - - let mut pkt = Packet::copy(buf); - let mut reader = pkt.get_rdr_mut(); - assert!(matches!( - Ipv6Hdr::parse(&mut reader), - Err(Ipv6HdrError::BadVersion { vsn: 0 }) - )); - } - - #[test] - fn too_many_exts_are_parse_error() { - // Create a packet with entirely too many extension headers. 80B! - let (buf, _) = generate_test_packet(&[ - IpProtocol::Ipv6Route, - IpProtocol::Ipv6Route, - IpProtocol::Ipv6Route, - IpProtocol::Ipv6Route, - IpProtocol::Ipv6Route, - IpProtocol::Ipv6Route, - IpProtocol::Ipv6Route, - IpProtocol::Ipv6Route, - IpProtocol::Ipv6Route, - IpProtocol::Ipv6Route, - IpProtocol::Ipv6Route, - ]); - let mut pkt = Packet::copy(&buf); - let mut reader = pkt.get_rdr_mut(); - assert!(matches!( - Ipv6Hdr::parse(&mut reader), - Err(Ipv6HdrError::ExtensionsTooLarge) - )); - } -} diff --git a/lib/opte/src/engine/layer.rs b/lib/opte/src/engine/layer.rs index bfdb053c..45b67557 100644 --- a/lib/opte/src/engine/layer.rs +++ b/lib/opte/src/engine/layer.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2024 Oxide Computer Company //! A layer in a port. @@ -14,12 +14,10 @@ use super::flow_table::FLOW_DEF_EXPIRE_SECS; use super::ioctl; use super::ioctl::ActionDescEntryDump; use super::packet::BodyTransformError; -use super::packet::Initialized; use super::packet::InnerFlowId; +use super::packet::MblkFullParsed; +use super::packet::MblkPacketData; use super::packet::Packet; -use super::packet::PacketMeta; -use super::packet::PacketRead; -use super::packet::Parsed; use super::packet::FLOW_ID_DEFAULT; use super::port::meta::ActionMeta; use super::port::Transforms; @@ -35,10 +33,10 @@ use super::rule::Rule; use crate::d_error::DError; #[cfg(all(not(feature = "std"), not(test)))] use crate::d_error::LabelBlock; -use crate::ddi::kstat; use crate::ddi::kstat::KStatNamed; use crate::ddi::kstat::KStatProvider; use crate::ddi::kstat::KStatU64; +use crate::ddi::mblk::MsgBlk; use crate::ddi::time::Moment; use crate::ExecCtx; use crate::LogLevel; @@ -54,7 +52,6 @@ use core::num::NonZeroU32; use core::result; use illumos_sys_hdrs::c_char; use illumos_sys_hdrs::uintptr_t; -use kstat_macro::KStatProvider; use opte_api::Direction; #[derive(Debug)] @@ -128,7 +125,7 @@ pub enum LayerResult { reason: DenyReason, }, #[leaf] - Hairpin(Packet), + Hairpin(MsgBlk), HandlePkt, } @@ -244,8 +241,8 @@ impl LayerFlowTable { self.count = self.ft_out.num_flows(); } - fn get_in(&mut self, flow: &InnerFlowId) -> EntryState { - match self.ft_in.get_mut(flow) { + fn get_in(&self, flow: &InnerFlowId) -> EntryState { + match self.ft_in.get(flow) { Some(entry) => { entry.hit(); if entry.is_dirty() { @@ -259,8 +256,8 @@ impl LayerFlowTable { } } - fn get_out(&mut self, flow: &InnerFlowId) -> EntryState { - match self.ft_out.get_mut(flow) { + fn get_out(&self, flow: &InnerFlowId) -> EntryState { + match self.ft_out.get(flow) { Some(entry) => { entry.hit(); let action = entry.state().action_desc.clone(); @@ -278,27 +275,27 @@ impl LayerFlowTable { fn remove_in( &mut self, flow: &InnerFlowId, - ) -> Option> { + ) -> Option>> { self.ft_in.remove(flow) } fn remove_out( &mut self, flow: &InnerFlowId, - ) -> Option> { + ) -> Option>> { self.ft_out.remove(flow) } fn mark_clean(&mut self, dir: Direction, flow: &InnerFlowId) { match dir { Direction::In => { - let entry = self.ft_in.get_mut(flow); + let entry = self.ft_in.get(flow); if let Some(entry) = entry { entry.mark_clean(); } } Direction::Out => { - let entry = self.ft_out.get_mut(flow); + let entry = self.ft_out.get(flow); if let Some(entry) = entry { entry.mark_clean(); } @@ -799,7 +796,7 @@ impl Layer { &mut self, ectx: &ExecCtx, dir: Direction, - pkt: &mut Packet, + pkt: &mut Packet, xforms: &mut Transforms, ameta: &mut ActionMeta, ) -> result::Result { @@ -817,7 +814,7 @@ impl Layer { fn process_in( &mut self, ectx: &ExecCtx, - pkt: &mut Packet, + pkt: &mut Packet, xforms: &mut Transforms, ameta: &mut ActionMeta, ) -> result::Result { @@ -865,9 +862,9 @@ impl Layer { pkt.flow(), ); - if let Some(body_segs) = pkt.body_segs() { + if let Some(body_segs) = pkt.body() { if let Some(bt) = - desc.gen_bt(Direction::In, pkt.meta(), &body_segs)? + desc.gen_bt(Direction::In, pkt.meta(), body_segs)? { pkt.body_transform(Direction::In, &*bt)?; xforms.body.push(bt); @@ -887,17 +884,14 @@ impl Layer { fn process_in_rules( &mut self, ectx: &ExecCtx, - pkt: &mut Packet, + pkt: &mut Packet, xforms: &mut Transforms, ameta: &mut ActionMeta, ) -> result::Result { use Direction::In; self.stats.vals.in_lft_miss += 1; - let mut rdr = pkt.get_body_rdr(); - let rule = - self.rules_in.find_match(pkt.flow(), pkt.meta(), ameta, &mut rdr); - let _ = rdr.finish(); + let rule = self.rules_in.find_match(pkt.flow(), pkt.meta(), ameta); let action = if let Some(rule) = rule { self.stats.vals.in_rule_match += 1; @@ -1060,8 +1054,8 @@ impl Layer { pkt.flow(), ); - if let Some(body_segs) = pkt.body_segs() { - if let Some(bt) = desc.gen_bt(In, pkt.meta(), &body_segs)? { + if let Some(body_segs) = pkt.body() { + if let Some(bt) = desc.gen_bt(In, pkt.meta(), body_segs)? { pkt.body_transform(In, &*bt)?; xforms.body.push(bt); } @@ -1085,23 +1079,16 @@ impl Layer { } Action::Hairpin(action) => { - let mut rdr = pkt.get_body_rdr(); - match action.gen_packet(pkt.meta(), &mut rdr) { - Ok(aord) => match aord { - AllowOrDeny::Allow(pkt) => { - let _ = rdr.finish(); - Ok(LayerResult::Hairpin(pkt)) - } - - AllowOrDeny::Deny => Ok(LayerResult::Deny { - name: self.name, - reason: DenyReason::Action, - }), - }, - + match action.gen_packet(pkt.meta()) { + Ok(AllowOrDeny::Allow(pkt)) => { + Ok(LayerResult::Hairpin(pkt)) + } + Ok(AllowOrDeny::Deny) => Ok(LayerResult::Deny { + name: self.name, + reason: DenyReason::Action, + }), Err(e) => { // XXX SDT probe, error stat, log - let _ = rdr.finish(); Err(LayerError::GenPacket(e)) } } @@ -1114,7 +1101,7 @@ impl Layer { fn process_out( &mut self, ectx: &ExecCtx, - pkt: &mut Packet, + pkt: &mut Packet, xforms: &mut Transforms, ameta: &mut ActionMeta, ) -> result::Result { @@ -1162,9 +1149,9 @@ impl Layer { pkt.flow(), ); - if let Some(body_segs) = pkt.body_segs() { + if let Some(body_segs) = pkt.body() { if let Some(bt) = - desc.gen_bt(Direction::Out, pkt.meta(), &body_segs)? + desc.gen_bt(Direction::Out, pkt.meta(), body_segs)? { pkt.body_transform(Direction::Out, &*bt)?; xforms.body.push(bt); @@ -1184,17 +1171,14 @@ impl Layer { fn process_out_rules( &mut self, ectx: &ExecCtx, - pkt: &mut Packet, + pkt: &mut Packet, xforms: &mut Transforms, ameta: &mut ActionMeta, ) -> result::Result { use Direction::Out; self.stats.vals.out_lft_miss += 1; - let mut rdr = pkt.get_body_rdr(); - let rule = - self.rules_out.find_match(pkt.flow(), pkt.meta(), ameta, &mut rdr); - let _ = rdr.finish(); + let rule = self.rules_out.find_match(pkt.flow(), pkt.meta(), ameta); let action = if let Some(rule) = rule { self.stats.vals.out_rule_match += 1; @@ -1359,10 +1343,8 @@ impl Layer { pkt.flow(), ); - if let Some(body_segs) = pkt.body_segs() { - if let Some(bt) = - desc.gen_bt(Out, pkt.meta(), &body_segs)? - { + if let Some(body_segs) = pkt.body() { + if let Some(bt) = desc.gen_bt(Out, pkt.meta(), body_segs)? { pkt.body_transform(Out, &*bt)?; xforms.body.push(bt); } @@ -1387,23 +1369,16 @@ impl Layer { } Action::Hairpin(action) => { - let mut rdr = pkt.get_body_rdr(); - match action.gen_packet(pkt.meta(), &mut rdr) { - Ok(aord) => match aord { - AllowOrDeny::Allow(pkt) => { - let _ = rdr.finish(); - Ok(LayerResult::Hairpin(pkt)) - } - - AllowOrDeny::Deny => Ok(LayerResult::Deny { - name: self.name, - reason: DenyReason::Action, - }), - }, - + match action.gen_packet(pkt.meta()) { + Ok(AllowOrDeny::Allow(pkt)) => { + Ok(LayerResult::Hairpin(pkt)) + } + Ok(AllowOrDeny::Deny) => Ok(LayerResult::Deny { + name: self.name, + reason: DenyReason::Action, + }), Err(e) => { // XXX SDT probe, error stat, log - let _ = rdr.finish(); Err(LayerError::GenPacket(e)) } } @@ -1596,7 +1571,7 @@ pub enum RuleRemoveErr { NotFound, } -impl<'a> RuleTable { +impl RuleTable { fn add(&mut self, rule: Rule) { match self.find_pos(&rule) { RulePlace::End => { @@ -1620,18 +1595,14 @@ impl<'a> RuleTable { dump } - fn find_match<'b, R>( + fn find_match( &mut self, ifid: &InnerFlowId, - pmeta: &PacketMeta, + pmeta: &MblkPacketData, ameta: &ActionMeta, - rdr: &'b mut R, - ) -> Option<&Rule> - where - R: PacketRead<'a>, - { + ) -> Option<&Rule> { for rte in self.rules.iter_mut() { - if rte.rule.is_match(pmeta, ameta, rdr) { + if rte.rule.is_match(pmeta, ameta) { rte.hits += 1; Self::rule_match_probe( self.port_c.as_c_str(), @@ -1853,19 +1824,21 @@ pub struct rule_no_match_sdt_arg { #[cfg(test)] mod test { + use ingot::ethernet::Ethernet; + use ingot::ethernet::Ethertype; + use ingot::tcp::Tcp; + use ingot::types::HeaderLen; + + use crate::engine::ip::v4::Ipv4; + use crate::engine::GenericUlp; + use super::*; #[test] fn find_rule() { - use crate::engine::headers::IpMeta; - use crate::engine::headers::UlpMeta; - use crate::engine::ip4::Ipv4Meta; - use crate::engine::ip4::Protocol; - use crate::engine::packet::InnerMeta; use crate::engine::predicate::Ipv4AddrMatch; use crate::engine::predicate::Predicate; use crate::engine::rule; - use crate::engine::tcp::TcpMeta; let mut rule_table = RuleTable::new("port", "test", Direction::Out); let mut rule = Rule::new( @@ -1879,45 +1852,32 @@ mod test { rule_table.add(rule.finalize()); - let ip = IpMeta::from(Ipv4Meta { - src: "10.0.0.77".parse().unwrap(), - dst: "52.10.128.69".parse().unwrap(), - proto: Protocol::TCP, - ttl: 64, - ident: 1, - hdr_len: 20, - total_len: 40, - csum: [0; 2], - }); - let ulp = UlpMeta::from(TcpMeta { - src: 5555, - dst: 443, - flags: 0, - seq: 0, - ack: 0, - window_size: 64240, - options_bytes: None, - options_len: 0, - ..Default::default() - }); - - let pmeta = PacketMeta { - outer: Default::default(), - inner: InnerMeta { - ip: Some(ip), - ulp: Some(ulp), + let mut test_pkt = MsgBlk::new_ethernet_pkt(( + Ethernet { ethertype: Ethertype::IPV4, ..Default::default() }, + Ipv4 { + source: "10.0.0.77".parse().unwrap(), + destination: "52.10.128.69".parse().unwrap(), + protocol: ingot::ip::IpProtocol::TCP, + identification: 1, + total_len: (20 + Tcp::MINIMUM_LENGTH) as u16, ..Default::default() }, - }; + Tcp { + source: 5555, + destination: 443, + window_size: 64240, + ..Default::default() + }, + )); + + let pmeta = Packet::parse_outbound(test_pkt.iter_mut(), GenericUlp {}) + .unwrap() + .to_full_meta(); // The pkt/rdr aren't actually used in this case. - let pkt = Packet::copy(&[0xA]); - let mut rdr = pkt.get_rdr(); let ameta = ActionMeta::new(); - let ifid = InnerFlowId::from(&pmeta); - assert!(rule_table - .find_match(&ifid, &pmeta, &ameta, &mut rdr) - .is_some()); + let ifid = *pmeta.flow(); + assert!(rule_table.find_match(&ifid, &pmeta.meta(), &ameta).is_some()); } } // TODO Reinstate diff --git a/lib/opte/src/engine/mod.rs b/lib/opte/src/engine/mod.rs index 93d7132a..217cee37 100644 --- a/lib/opte/src/engine/mod.rs +++ b/lib/opte/src/engine/mod.rs @@ -19,14 +19,12 @@ pub mod geneve; pub mod headers; pub mod icmp; pub mod ioctl; -#[macro_use] -pub mod ip4; -#[macro_use] -pub mod ip6; +pub mod ip; pub mod layer; pub mod nat; #[macro_use] pub mod packet; +pub mod parse; pub mod port; pub mod predicate; #[cfg(any(feature = "std", test))] @@ -39,61 +37,21 @@ pub mod tcp_state; #[macro_use] pub mod udp; -use alloc::string::String; -use core::fmt; -use core::num::ParseIntError; -use ip4::IpError; +use crate::ddi::mblk::MsgBlk; +use checksum::Checksum; +use ingot::tcp::TcpRef; +use ingot::types::IntoBufPointer; +use ingot::types::Parsed as IngotParsed; +use ingot::types::Read; pub use opte_api::Direction; - -// TODO Currently I'm using this for parsing many different things. It -// might be wise to have different parse error types. E.g., one for -// parsing ioctl strings, another for parsing IPv4 strings, for IPv6, -// etc. -// -// TODO This probably doesn't belong in this module. -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum ParseErr { - BadAction, - BadAddrError, - BadDirectionError, - BadProtoError, - BadToken(String), - InvalidPort, - IpError(IpError), - Malformed, - MalformedInt, - MalformedPort, - MissingField, - Other(String), - UnknownToken(String), - ValTooLong(String, usize), -} - -impl fmt::Display for ParseErr { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{:?}", self) - } -} - -pub type ParseResult = core::result::Result; - -impl From for ParseErr { - fn from(err: IpError) -> Self { - ParseErr::IpError(err) - } -} - -impl From for ParseErr { - fn from(_err: ParseIntError) -> Self { - ParseErr::MalformedInt - } -} - -impl From for ParseErr { - fn from(err: String) -> Self { - ParseErr::Other(err) - } -} +use packet::FullParsed; +use packet::OpteMeta; +use packet::Packet; +use packet::Pullup; +use parse::ValidNoEncap; +use rule::CompiledTransform; +use zerocopy::ByteSlice; +use zerocopy::ByteSliceMut; /// When set to 1 we will panic in some situations instead of just /// flagging in error. This can be useful for debugging certain @@ -154,20 +112,24 @@ cfg_if! { #[macro_export] macro_rules! err_macro { ($s:tt) => { + { + let out_str = format!(concat!($s, "\0")); unsafe { - let out_str = format!(concat!($s, "\0")); // Unwrap safety: we just concat'd a NUL. let cstr = ::core::ffi::CStr::from_bytes_with_nul(out_str.as_bytes()).unwrap(); ::illumos_sys_hdrs::cmn_err(::illumos_sys_hdrs::CE_WARN, cstr.as_ptr()); } + } }; ($s:tt, $($arg:tt)*) => { + { + let out_str = format!(concat!($s, "\0"), $($arg)*); unsafe { - let out_str = format!(concat!($s, "\0"), $($arg)*); // Unwrap safety: we just concat'd a NUL. let cstr = ::core::ffi::CStr::from_bytes_with_nul(out_str.as_bytes()).unwrap(); ::illumos_sys_hdrs::cmn_err(::illumos_sys_hdrs::CE_WARN, cstr.as_ptr()); } + } }; } } @@ -176,18 +138,9 @@ cfg_if! { pub use dbg_macro as dbg; pub use err_macro as err; -use crate::engine::ether::EtherType; use crate::engine::flow_table::FlowTable; -use crate::engine::ip4::Protocol; -use crate::engine::packet::HeaderOffsets; -use crate::engine::packet::Initialized; use crate::engine::packet::InnerFlowId; -use crate::engine::packet::Packet; -use crate::engine::packet::PacketInfo; -use crate::engine::packet::PacketMeta; -use crate::engine::packet::PacketReaderMut; use crate::engine::packet::ParseError; -use crate::engine::packet::Parsed; use crate::engine::port::UftEntry; /// The action to take for a single packet, based on the processing of @@ -205,7 +158,7 @@ pub enum HdlPktAction { /// input packet. /// /// The input packet is dropped. - Hairpin(Packet), + Hairpin(MsgBlk), } /// Some type of problem occurred during [`NetworkImpl::handle_pkt()`] @@ -221,13 +174,13 @@ pub struct HdlPktError(pub &'static str); /// implementation does this is two ways. /// /// 1. It provides its own unique stack of [`layer::Layer`] -/// definitions; each made up of its unique set of [`rule::Rule`] & -/// [`rule::Action`] pairings. Furthermore, the actions themselves may -/// be built atop generic OPTE actions or may be provided in whole by -/// the network implementation. +/// definitions; each made up of its unique set of [`rule::Rule`] & +/// [`rule::Action`] pairings. Furthermore, the actions themselves may +/// be built atop generic OPTE actions or may be provided in whole by +/// the network implementation. /// /// 2. It uses this trait to provide hooks into the parsing of packets -/// as well as single packet processing (non-flow processing). +/// as well as single packet processing (non-flow processing). /// /// OPTE itself provides a general structure for parsing; limiting the /// possible parse graph to that of a typical L2 + L3 + L4 packet, @@ -269,13 +222,15 @@ pub trait NetworkImpl { /// myriad of reasons. The error returned is for informational /// purposes, rather than having any obvious direct action to take /// in response. - fn handle_pkt( + fn handle_pkt<'a, T: Read + Pullup + 'a>( &self, dir: Direction, - pkt: &mut Packet, + pkt: &mut Packet>, uft_in: &FlowTable>, uft_out: &FlowTable>, - ) -> Result; + ) -> Result + where + T::Chunk: ByteSliceMut + IntoBufPointer<'a>; /// Return the parser for this network implementation. fn parser(&self) -> Self::Parser; @@ -286,106 +241,87 @@ pub trait NetworkImpl { /// This provides parsing for inbound/outbound packets for a given /// [`NetworkImpl`]. pub trait NetworkParser { + type InMeta: LightweightMeta; + type OutMeta: LightweightMeta; + /// Parse an outbound packet. /// - /// An outbound packet is one traveling from the [`port::Port`] + /// An outbound packet is one travelling from the [`port::Port`] /// client to the network. - fn parse_outbound( + fn parse_outbound<'a, T: Read + 'a>( &self, - rdr: &mut PacketReaderMut, - ) -> Result; + rdr: T, + ) -> Result, T>, ParseError> + where + T::Chunk: IntoBufPointer<'a> + ByteSliceMut; /// Parse an inbound packet. /// /// An inbound packet is one traveling from the network to the /// [`port::Port`] client. - fn parse_inbound( + fn parse_inbound<'a, T: Read + 'a>( &self, - rdr: &mut PacketReaderMut, - ) -> Result; + rdr: T, + ) -> Result, T>, ParseError> + where + T::Chunk: IntoBufPointer<'a> + ByteSliceMut; } -/// A generic ULP parser, useful for testing inside of the opte crate -/// itself. -pub struct GenericUlp {} - -impl GenericUlp { - /// Parse a generic L2 + L3 + L4 packet, storing the headers in - /// the inner position. - fn parse_ulp( - &self, - rdr: &mut PacketReaderMut, - ) -> Result { - let mut meta = PacketMeta::default(); - let mut offsets = HeaderOffsets::default(); - - let (ether_hi, _ether_hdr) = Packet::parse_ether(rdr)?; - meta.inner.ether = ether_hi.meta; - offsets.inner.ether = ether_hi.offset; - let ether_type = ether_hi.meta.ether_type; +/// Header formats which allow a flow ID to be read out, and which can be converted +/// into the shared `OpteMeta` format. +pub trait LightweightMeta: Into> { + /// Runs a compiled fastpath action against the target metadata. + fn run_compiled_transform(&mut self, transform: &CompiledTransform) + where + T: ByteSliceMut; - let (ip_hi, pseudo_csum) = match ether_type { - EtherType::Arp => { - return Ok(PacketInfo { - meta, - offsets, - body_csum: None, - extra_hdr_space: None, - }); - } + /// Derive the checksum for the packet body from inner headers. + fn compute_body_csum(&self) -> Option; - EtherType::Ipv4 => { - let (ip_hi, hdr) = Packet::parse_ip4(rdr)?; - (ip_hi, hdr.pseudo_csum()) - } + // This is a dedicated fn since `where for<'a> &'a Self: Into` + // had *awful* ergonomics around that bound's propagation. + /// Return the flow ID (5-tuple, or other composite key) which + /// identifies this packet's parent flow. + fn flow(&self) -> InnerFlowId; - EtherType::Ipv6 => { - let (ip_hi, hdr) = Packet::parse_ip6(rdr)?; - (ip_hi, hdr.pseudo_csum()) - } + /// Returns the number of bytes occupied by the packet's outer encapsulation. + fn encap_len(&self) -> u16; - _ => return Err(ParseError::UnexpectedEtherType(ether_type)), - }; + /// Recalculate checksums within inner headers, derived from a pre-computed `body_csum`. + fn update_inner_checksums(&mut self, body_csum: Checksum); - meta.inner.ip = Some(ip_hi.meta); - offsets.inner.ip = Some(ip_hi.offset); + /// Provide a view of internal TCP state. + fn inner_tcp(&self) -> Option<&impl TcpRef>; - let (ulp_hi, ulp_hdr) = match ip_hi.meta.proto() { - Protocol::ICMP => Packet::parse_icmp(rdr)?, - Protocol::ICMPv6 => Packet::parse_icmp6(rdr)?, - Protocol::TCP => Packet::parse_tcp(rdr)?, - Protocol::UDP => Packet::parse_udp(rdr)?, - proto => return Err(ParseError::UnexpectedProtocol(proto)), - }; - - let use_pseudo = ulp_hi.meta.is_pseudoheader_in_csum(); - meta.inner.ulp = Some(ulp_hi.meta); - offsets.inner.ulp = Some(ulp_hi.offset); - let body_csum = if let Some(mut csum) = ulp_hdr.csum_minus_hdr() { - if use_pseudo { - csum -= pseudo_csum; - } - Some(csum) - } else { - None - }; - - Ok(PacketInfo { meta, offsets, body_csum, extra_hdr_space: None }) - } + /// Determines whether headers have consistent lengths/mandatory fields set. + fn validate(&self, pkt_len: usize) -> Result<(), ParseError>; } +/// A generic ULP parser, useful for testing inside of the opte crate +/// itself. +pub struct GenericUlp {} + impl NetworkParser for GenericUlp { - fn parse_inbound( + type InMeta = ValidNoEncap; + type OutMeta = ValidNoEncap; + + fn parse_inbound<'a, T: Read + 'a>( &self, - rdr: &mut PacketReaderMut, - ) -> Result { - self.parse_ulp(rdr) + rdr: T, + ) -> Result, T>, ParseError> + where + T::Chunk: IntoBufPointer<'a> + ByteSliceMut, + { + Ok(ValidNoEncap::parse_read(rdr)?) } - fn parse_outbound( + fn parse_outbound<'a, T: Read + 'a>( &self, - rdr: &mut PacketReaderMut, - ) -> Result { - self.parse_ulp(rdr) + rdr: T, + ) -> Result, T>, ParseError> + where + T::Chunk: IntoBufPointer<'a> + ByteSliceMut, + { + Ok(ValidNoEncap::parse_read(rdr)?) } } diff --git a/lib/opte/src/engine/nat.rs b/lib/opte/src/engine/nat.rs index d51f3c22..5e0e0e4e 100644 --- a/lib/opte/src/engine/nat.rs +++ b/lib/opte/src/engine/nat.rs @@ -2,15 +2,15 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2024 Oxide Computer Company //! 1:1 NAT. use super::headers::HeaderAction; use super::headers::IpMod; use super::packet::InnerFlowId; +use super::packet::MblkFullParsed; use super::packet::Packet; -use super::packet::Parsed; use super::port::meta::ActionMeta; use super::predicate::DataPredicate; use super::predicate::Predicate; @@ -25,7 +25,6 @@ use alloc::sync::Arc; use alloc::vec::Vec; use core::fmt; use core::hash::Hash; -use core::marker::PhantomData; use crc32fast::Hasher; use itertools::Itertools; use opte_api::Direction; @@ -86,7 +85,7 @@ impl StatefulAction for OutboundNat { fn gen_desc( &self, flow_id: &InnerFlowId, - _pkt: &Packet, + _pkt: &Packet, _meta: &mut ActionMeta, ) -> rule::GenDescResult { // When we have several external IPs at our disposal, we are @@ -149,7 +148,7 @@ impl StatefulAction for InboundNat { fn gen_desc( &self, flow_id: &InnerFlowId, - _pkt: &Packet, + _pkt: &Packet, _meta: &mut ActionMeta, ) -> rule::GenDescResult { // We rely on the attached predicates to filter out IPs which are *not* @@ -187,7 +186,7 @@ impl ActionDesc for NatDesc { HdrTransform { name: NAT_NAME.to_string(), - inner_ip: HeaderAction::Modify(ip, PhantomData), + inner_ip: HeaderAction::Modify(ip), ..Default::default() } } @@ -197,7 +196,7 @@ impl ActionDesc for NatDesc { HdrTransform { name: NAT_NAME.to_string(), - inner_ip: HeaderAction::Modify(ip, PhantomData), + inner_ip: HeaderAction::Modify(ip), ..Default::default() } } @@ -216,9 +215,19 @@ impl ActionDesc for NatDesc { #[cfg(test)] mod test { use super::*; - use crate::engine::ether::EtherMeta; + + use crate::ddi::mblk::MsgBlk; + use crate::engine::ether::Ethernet; + use crate::engine::ether::EthernetRef; + use crate::engine::ip::v4::Ipv4; + use crate::engine::ip::v4::Ipv4Ref; use crate::engine::GenericUlp; - use opte_api::Direction::*; + use ingot::ethernet::Ethertype; + use ingot::ip::IpProtocol; + use ingot::tcp::Tcp; + use ingot::tcp::TcpFlags; + use ingot::tcp::TcpRef; + use ingot::types::HeaderLen; #[derive(Debug)] struct DummyVerify; @@ -231,14 +240,6 @@ mod test { #[test] fn nat4_rewrite() { - use crate::engine::ether::EtherHdr; - use crate::engine::ether::EtherType; - use crate::engine::headers::IpMeta; - use crate::engine::headers::UlpMeta; - use crate::engine::ip4::Ipv4Hdr; - use crate::engine::ip4::Ipv4Meta; - use crate::engine::ip4::Protocol; - use crate::engine::tcp::TcpMeta; use opte_api::MacAddr; let priv_mac = MacAddr::from([0xA8, 0x40, 0x25, 0xF0, 0x00, 0x01]); @@ -254,29 +255,32 @@ mod test { // ================================================================ // Build the packet metadata // ================================================================ - let body = vec![]; - let tcp = - TcpMeta { src: priv_port, dst: outside_port, ..Default::default() }; - let mut ip4 = Ipv4Meta { - src: priv_ip, - dst: outside_ip, - proto: Protocol::TCP, - total_len: (Ipv4Hdr::BASE_SIZE + tcp.hdr_len() + body.len()) as u16, + let body: Vec = vec![]; + let tcp = Tcp { + source: priv_port, + destination: outside_port, ..Default::default() }; - ip4.compute_hdr_csum(); - let eth = EtherMeta { - ether_type: EtherType::Ipv4, - src: priv_mac, - dst: dest_mac, + let mut ip4 = Ipv4 { + source: priv_ip, + destination: outside_ip, + protocol: IpProtocol::TCP, + total_len: (Ipv4::MINIMUM_LENGTH + (&tcp, &body).packet_length()) + as u16, + ..Default::default() }; - let mut pkt = Packet::alloc_and_expand(128); - let mut wtr = pkt.seg0_wtr(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip4.emit(wtr.slice_mut(ip4.hdr_len()).unwrap()); - tcp.emit(wtr.slice_mut(tcp.hdr_len()).unwrap()); - wtr.write(&body).unwrap(); - let mut pkt = pkt.parse(Out, GenericUlp {}).unwrap(); + ip4.compute_checksum(); + + let eth = Ethernet { + destination: dest_mac, + source: priv_mac, + ethertype: Ethertype::IPV4, + }; + + let mut pkt_m = MsgBlk::new_ethernet_pkt((ð, &ip4, &tcp, &body)); + let mut pkt = Packet::parse_outbound(pkt_m.iter_mut(), GenericUlp {}) + .unwrap() + .to_full_meta(); // ================================================================ // Verify descriptor generation. @@ -294,79 +298,82 @@ mod test { let pmo = pkt.meta_mut(); out_ht.run(pmo).unwrap(); - let ether_meta = pmo.inner.ether; - assert_eq!(ether_meta.src, priv_mac); - assert_eq!(ether_meta.dst, dest_mac); + let ether_meta = pmo.inner_ether(); + assert_eq!(ether_meta.source(), priv_mac); + assert_eq!(ether_meta.destination(), dest_mac); - let ip4_meta = match pmo.inner.ip.as_ref().unwrap() { - IpMeta::Ip4(v) => v, + let ip4_meta = match pmo.inner_ip4() { + Some(v) => v, _ => panic!("expect Ipv4Meta"), }; - assert_eq!(ip4_meta.src, pub_ip); - assert_eq!(ip4_meta.dst, outside_ip); - assert_eq!(ip4_meta.proto, Protocol::TCP); + assert_eq!(ip4_meta.source(), pub_ip); + assert_eq!(ip4_meta.destination(), outside_ip); + assert_eq!(ip4_meta.protocol(), IpProtocol::TCP); - let tcp_meta = match pmo.inner.ulp.as_ref().unwrap() { - UlpMeta::Tcp(v) => v, + let tcp_meta = match pmo.inner_tcp() { + Some(v) => v, _ => panic!("expect TcpMeta"), }; - assert_eq!(tcp_meta.src, priv_port); - assert_eq!(tcp_meta.dst, outside_port); - assert_eq!(tcp_meta.flags, 0); + assert_eq!(tcp_meta.source(), priv_port); + assert_eq!(tcp_meta.destination(), outside_port); + assert_eq!(tcp_meta.flags(), TcpFlags::empty()); // ================================================================ // Verify inbound header transformation. // ================================================================ - let body = vec![]; - let tcp = - TcpMeta { src: outside_port, dst: priv_port, ..Default::default() }; - let mut ip4 = Ipv4Meta { - src: outside_ip, - dst: priv_ip, - proto: Protocol::TCP, - total_len: (Ipv4Hdr::BASE_SIZE + tcp.hdr_len() + body.len()) as u16, + let body: Vec = vec![]; + let tcp = Tcp { + source: outside_port, + destination: priv_port, ..Default::default() }; - ip4.compute_hdr_csum(); - let eth = EtherMeta { - dst: priv_mac, - src: dest_mac, - ether_type: EtherType::Ipv4, + let mut ip4 = Ipv4 { + source: outside_ip, + destination: pub_ip, + protocol: IpProtocol::TCP, + total_len: (Ipv4::MINIMUM_LENGTH + (&tcp, &body).packet_length()) + as u16, + ..Default::default() + }; + ip4.compute_checksum(); + + let eth = Ethernet { + destination: priv_mac, + source: dest_mac, + ethertype: Ethertype::IPV4, }; - let mut pkt = Packet::alloc_and_expand(128); - let mut wtr = pkt.seg0_wtr(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip4.emit(wtr.slice_mut(ip4.hdr_len()).unwrap()); - tcp.emit(wtr.slice_mut(tcp.hdr_len()).unwrap()); - wtr.write(&body).unwrap(); - let mut pkt = pkt.parse(Out, GenericUlp {}).unwrap(); + + let mut pkt_m = MsgBlk::new_ethernet_pkt((ð, &ip4, &tcp, &body)); + let mut pkt = Packet::parse_inbound(pkt_m.iter_mut(), GenericUlp {}) + .unwrap() + .to_full_meta(); let pmi = pkt.meta_mut(); let in_ht = desc.gen_ht(Direction::In); in_ht.run(pmi).unwrap(); - let ether_meta = pmi.inner.ether; - assert_eq!(ether_meta.src, dest_mac); - assert_eq!(ether_meta.dst, priv_mac); + let ether_meta = pmi.inner_ether(); + assert_eq!(ether_meta.source(), dest_mac); + assert_eq!(ether_meta.destination(), priv_mac); - let ip4_meta = match pmi.inner.ip.as_ref().unwrap() { - IpMeta::Ip4(v) => v, + let ip4_meta = match pmi.inner_ip4() { + Some(v) => v, _ => panic!("expect Ipv4Meta"), }; - assert_eq!(ip4_meta.src, outside_ip); - assert_eq!(ip4_meta.dst, priv_ip); - assert_eq!(ip4_meta.proto, Protocol::TCP); + assert_eq!(ip4_meta.source(), outside_ip); + assert_eq!(ip4_meta.destination(), priv_ip); + assert_eq!(ip4_meta.protocol(), IpProtocol::TCP); - let tcp_meta = match pmi.inner.ulp.as_ref().unwrap() { - UlpMeta::Tcp(v) => v, + let tcp_meta = match pmi.inner_tcp() { + Some(v) => v, _ => panic!("expect TcpMeta"), }; - assert_eq!(tcp_meta.src, outside_port); - assert_eq!(tcp_meta.dst, priv_port); - assert_eq!(tcp_meta.flags, 0); + assert_eq!(tcp_meta.source(), outside_port); + assert_eq!(tcp_meta.destination(), priv_port); + assert_eq!(tcp_meta.flags(), TcpFlags::empty()); } } diff --git a/lib/opte/src/engine/packet.rs b/lib/opte/src/engine/packet.rs index d9ac2107..3269c0da 100644 --- a/lib/opte/src/engine/packet.rs +++ b/lib/opte/src/engine/packet.rs @@ -8,83 +8,93 @@ //! //! TODO //! -//! * Add a PacketChain type to represent a chain of one or more -//! indepenndent packets. Also consider having chains that represent -//! multiple packets for the same flow if it would be advantageous to -//! do so. -//! //! * Add hardware offload information to [`Packet`]. //! -use super::arp::ArpHdrError; use super::checksum::Checksum; -use super::checksum::HeaderChecksum; -use super::ether::EtherHdr; -use super::ether::EtherHdrError; -use super::ether::EtherMeta; -use super::geneve::GeneveHdr; -use super::geneve::GeneveHdrError; -use super::geneve::GeneveMeta; -use super::geneve::GENEVE_PORT; +use super::ether::Ethernet; +use super::ether::EthernetPacket; +use super::ether::ValidEthernet; use super::headers::EncapMeta; +use super::headers::EncapPush; use super::headers::IpAddr; -use super::headers::IpMeta; -use super::headers::UlpHdr; -use super::headers::UlpMeta; +use super::headers::IpPush; +use super::headers::SizeHoldingEncap; +use super::headers::ValidEncapMeta; use super::headers::AF_INET; use super::headers::AF_INET6; -use super::icmp::IcmpHdr; -use super::icmp::IcmpHdrError; -use super::icmp::IcmpMeta; -use super::icmp::Icmpv4Meta; -use super::icmp::Icmpv6Meta; -use super::ip4::Ipv4Addr; -use super::ip4::Ipv4Hdr; -use super::ip4::Ipv4HdrError; -use super::ip4::Ipv4Meta; -use super::ip4::Protocol; -use super::ip6::Ipv6Addr; -use super::ip6::Ipv6Hdr; -use super::ip6::Ipv6HdrError; -use super::ip6::Ipv6Meta; +use super::ip::v4::Ipv4Addr; +use super::ip::v4::Ipv4Packet; +use super::ip::v4::Ipv4Ref; +use super::ip::v4::Protocol; +use super::ip::v6::Ipv6Addr; +use super::ip::v6::Ipv6Packet; +use super::ip::v6::Ipv6Ref; +use super::ip::L3Repr; +use super::ip::L3; +use super::parse::NoEncap; +use super::parse::Ulp; +use super::parse::UlpRepr; +use super::rule::CompiledEncap; +use super::rule::CompiledTransform; +use super::rule::HdrTransform; +use super::rule::HdrTransformError; +use super::Direction; +use super::LightweightMeta; use super::NetworkParser; use crate::d_error::DError; +use crate::ddi::mblk::MsgBlk; +use crate::ddi::mblk::MsgBlkIterMut; +use crate::ddi::mblk::MsgBlkNode; +use crate::engine::geneve::valid_geneve_has_oxide_external; +use crate::engine::geneve::GeneveMeta; +use alloc::boxed::Box; +use alloc::string::String; +use alloc::sync::Arc; +use alloc::vec::Vec; +use core::cell::Cell; +use core::ffi::CStr; use core::fmt; use core::fmt::Display; -use core::ptr; +use core::hash::Hash; use core::ptr::NonNull; use core::result; -use core::slice; +use core::sync::atomic::AtomicPtr; +use core::sync::atomic::Ordering; use crc32fast::Hasher; use dyn_clone::DynClone; -use serde::Deserialize; -use serde::Serialize; -// TODO should probably move these two into this module now. -use super::rule::HdrTransform; -use super::rule::HdrTransformError; -use super::tcp::TcpHdr; -use super::tcp::TcpHdrError; -use super::tcp::TcpMeta; -use super::udp::UdpHdr; -use super::udp::UdpHdrError; -use super::udp::UdpMeta; -use super::Direction; -use alloc::string::String; -use alloc::vec::Vec; -use illumos_sys_hdrs::dblk_t; use illumos_sys_hdrs::mblk_t; use illumos_sys_hdrs::uintptr_t; - -cfg_if! { - if #[cfg(all(not(feature = "std"), not(test)))] { - use illumos_sys_hdrs as ddi; - } else { - use std::boxed::Box; - use illumos_sys_hdrs::c_uchar; - } -} - -pub static MBLK_MAX_SIZE: usize = u16::MAX as usize; +use ingot::geneve::GeneveRef; +use ingot::icmp::IcmpV4Mut; +use ingot::icmp::IcmpV4Packet; +use ingot::icmp::IcmpV4Ref; +use ingot::icmp::IcmpV6Mut; +use ingot::icmp::IcmpV6Packet; +use ingot::icmp::IcmpV6Ref; +use ingot::tcp::TcpMut; +use ingot::tcp::TcpPacket; +use ingot::tcp::TcpRef; +use ingot::types::BoxedHeader; +use ingot::types::Emit; +use ingot::types::Header; +use ingot::types::HeaderLen; +use ingot::types::InlineHeader; +use ingot::types::IntoBufPointer; +use ingot::types::NextLayer; +use ingot::types::PacketParseError; +use ingot::types::Parsed as IngotParsed; +use ingot::types::Read; +use ingot::types::ToOwnedPacket; +use ingot::udp::UdpMut; +use ingot::udp::UdpPacket; +use ingot::udp::UdpRef; +use opte_api::Vni; +use serde::Deserialize; +use serde::Serialize; +use zerocopy::ByteSlice; +use zerocopy::ByteSliceMut; +use zerocopy::IntoBytes; pub static FLOW_ID_DEFAULT: InnerFlowId = InnerFlowId { proto: 255, @@ -128,12 +138,22 @@ pub struct InnerFlowId { pub dst_port: u16, } +impl InnerFlowId { + pub fn crc32(&self) -> u32 { + let mut hasher = Hasher::new(); + self.hash(&mut hasher); + hasher.finalize() + } +} + impl Default for InnerFlowId { fn default() -> Self { FLOW_ID_DEFAULT } } +/// Tagged union of a source-dest IP address pair, used to avoid +/// duplicating the discriminator. #[derive( Clone, Copy, @@ -206,1126 +226,981 @@ impl Display for InnerFlowId { } } -impl From<&PacketMeta> for InnerFlowId { - fn from(meta: &PacketMeta) -> Self { - let (proto, addrs) = match &meta.inner.ip { - Some(IpMeta::Ip4(ip4)) => { - (ip4.proto, AddrPair::V4 { src: ip4.src, dst: ip4.dst }) - } - Some(IpMeta::Ip6(ip6)) => { - (ip6.proto, AddrPair::V6 { src: ip6.src, dst: ip6.dst }) - } - None => (Protocol::Unknown(255), FLOW_ID_DEFAULT.addrs), - }; +pub trait PacketState {} - let (src_port, dst_port) = meta - .inner - .ulp - .map(|ulp| { - ( - ulp.src_port().or_else(|| ulp.pseudo_port()).unwrap_or(0), - ulp.dst_port().or_else(|| ulp.pseudo_port()).unwrap_or(0), - ) - }) - .unwrap_or((0, 0)); +/// A packet body transformation. +/// +/// A body transformation allows an action to modify zero, one, or +/// more bytes of a packet's body. The body starts directly after the +/// ULP header, and continues to the last byte of the packet. This +/// transformation is currently limited to only modifying bytes; it +/// does not allow adding or removing bytes (e.g. to encrypt the body). +pub trait BodyTransform: fmt::Display + DynClone + Send + Sync { + /// Execute the body transformation. The body segments include + /// **only** body data, starting directly after the end of the ULP + /// header. + /// + /// # Errors + /// + /// The transformation can choose to return a + /// [`BodyTransformError`] at any time if the body is not + /// acceptable. On error, none or some of the bytes may have been + /// modified. + fn run( + &self, + dir: Direction, + body: &mut [u8], + ) -> Result<(), BodyTransformError>; +} - InnerFlowId { proto: proto.into(), addrs, src_port, dst_port } - } +dyn_clone::clone_trait_object!(BodyTransform); + +#[derive(Debug)] +pub enum BodyTransformError { + NoPayload, + ParseFailure(String), + Todo(String), + UnexpectedBody(String), } -/// The outer header metadata. -/// -/// All outer headers are always optional. -#[derive(Debug, Default)] -pub struct OuterMeta { - pub ether: Option, - pub ip: Option, - pub encap: Option, +impl From for BodyTransformError { + fn from(e: smoltcp::wire::Error) -> Self { + Self::ParseFailure(format!("{}", e)) + } } -impl OuterMeta { - fn hdr_len(&self) -> usize { - let mut hdr_len = 0; +#[derive(Clone, Copy, Debug)] +pub enum SegAdjustError { + /// Attempt to place the end of the writable/readable area of the + /// segment past the limit of the underlying buffer. + EndPastLimit, - if let Some(ether) = self.ether { - hdr_len += ether.hdr_len(); - } + /// Attempt to place the start of the writable/readable area of + /// the segment before the base of the underlying buffer. + StartBeforeBase, - if let Some(ip) = self.ip { - hdr_len += ip.hdr_len(); - } + /// Attempt to place the start the writable/readable area of the + /// segment outside the range of the underlying buffer. + StartPastEnd, +} - if let Some(encap) = self.encap { - hdr_len += encap.hdr_len(); - } +#[derive(Clone, Copy, Debug)] +pub enum ModifierCreateError { + StartOutOfRange, + EndOutOfRange, +} - hdr_len - } +#[derive(Clone, Copy, Debug, DError)] +pub enum WrapError { + /// We tried to wrap a NULL pointer. + NullPtr, + /// We tried to wrap a packet chain as though it were a single mblk. + Chain, } -/// The inner header metadata. -/// -/// There is always an Ethernet frame. -#[derive(Debug, Default)] -pub struct InnerMeta { - pub ether: EtherMeta, - pub ip: Option, - pub ulp: Option, +#[derive(Clone, Debug, Eq, PartialEq, DError)] +#[derror(leaf_data = ParseError::data)] +pub enum ParseError { + IngotError(PacketParseError), + IllegalValue(MismatchError), + BadLength(MismatchError), + UnrecognisedTunnelOpt { class: u16, ty: u8 }, } -impl InnerMeta { - fn has_ip_csum(&self) -> bool { - match self.ip { - Some(ip) => ip.has_csum(), - None => false, +impl ParseError { + fn data(&self, data: &mut [u64]) { + // Allow due to possibility of future options. + #[allow(clippy::single_match)] + match self { + ParseError::UnrecognisedTunnelOpt { class, ty } => { + [data[0], data[1]] = [*class as u64, *ty as u64]; + } + _ => {} } } +} - fn has_ulp_csum(&self) -> bool { - match self.ulp { - Some(ulp) => ulp.has_csum(), - None => false, - } +impl DError for PacketParseError { + #[inline] + fn discriminant(&self) -> &'static core::ffi::CStr { + self.header().as_cstr() } - fn hdr_len(&self) -> usize { - let mut hdr_len = self.ether.hdr_len(); - - if let Some(ip) = self.ip { - hdr_len += ip.hdr_len(); - } - - if let Some(ulp) = self.ulp { - hdr_len += ulp.hdr_len(); - } + #[inline] + fn child(&self) -> Option<&dyn DError> { + Some(self.error()) + } +} - hdr_len +impl DError for ingot::types::ParseError { + #[inline] + fn discriminant(&self) -> &'static core::ffi::CStr { + self.as_cstr() } - pub fn is_tcp(&self) -> bool { - match self.ip.as_ref() { - Some(IpMeta::Ip4(ip4)) => ip4.proto == Protocol::TCP, - Some(IpMeta::Ip6(ip6)) => ip6.proto == Protocol::TCP, - _ => false, - } + #[inline] + fn child(&self) -> Option<&dyn DError> { + None } } -/// The various metadata of a packet. -/// -/// The packet metadata is a logical representation of the header data -/// that is relevant to processing. -#[derive(Debug, Default)] -pub struct PacketMeta { - pub outer: OuterMeta, - pub inner: InnerMeta, +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct MismatchError { + pub location: &'static CStr, + pub expected: u64, + pub actual: u64, } -impl PacketMeta { - /// Return the number of bytes requires to emit the header - /// metadata into full headers. - fn hdr_len(&self) -> usize { - self.outer.hdr_len() + self.inner.hdr_len() +impl DError for MismatchError { + fn discriminant(&self) -> &'static CStr { + self.location } - /// Return the inner Ether metadata. - pub fn inner_ether(&self) -> &EtherMeta { - &self.inner.ether + fn child(&self) -> Option<&dyn DError> { + None } - /// Return the inner IPv4 metadata. - pub fn inner_ip4(&self) -> Option<&Ipv4Meta> { - match &self.inner.ip { - Some(IpMeta::Ip4(ip4_meta)) => Some(ip4_meta), - _ => None, + fn leaf_data(&self, data: &mut [u64]) { + if let Some(v) = data.get_mut(0) { + *v = self.expected; } - } - - /// Return the inner IPv6 metadata. - pub fn inner_ip6(&self) -> Option<&Ipv6Meta> { - match &self.inner.ip { - Some(IpMeta::Ip6(x)) => Some(x), - _ => None, + if let Some(v) = data.get_mut(1) { + *v = self.expected; } } +} - /// Return the inner ICMP metadata, if the inner ULP is ICMP. - pub fn inner_icmp(&self) -> Option<&Icmpv4Meta> { - match &self.inner.ulp { - Some(UlpMeta::Icmpv4(icmp)) => Some(icmp), - _ => None, - } +impl From for ParseError { + fn from(value: PacketParseError) -> Self { + Self::IngotError(value) } +} - /// Return the inner ICMPv6 metadata, if the inner ULP is ICMPv6. - pub fn inner_icmp6(&self) -> Option<&Icmpv6Meta> { - match &self.inner.ulp { - Some(UlpMeta::Icmpv6(icmp6)) => Some(icmp6), - _ => None, - } - } +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum WriteError { + BadLayout, + EndOfPacket, + NotEnoughBytes { available: usize, needed: usize }, + StraddledWrite, +} - /// Return the inner TCP metadata, if the inner ULP is TCP. - /// Otherwise, return `None`. - pub fn inner_tcp(&self) -> Option<&TcpMeta> { - match &self.inner.ulp { - Some(UlpMeta::Tcp(tcp)) => Some(tcp), - _ => None, - } - } +pub type WriteResult = result::Result; - /// Return true if the inner ULP is TCP. - pub fn is_inner_tcp(&self) -> bool { - self.inner.is_tcp() - } +/// The initial parsed length of every header in a packet. +/// +/// Used to track structural changes to any packet headers +/// which would require full serialisation of a header and +/// its prior layers. +#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] +pub struct InitialLayerLens { + pub outer_eth: usize, + pub outer_l3: usize, + pub outer_encap: usize, - /// Return the inner UDP metadata, if the inner ULP is UDP. - /// Otherwise return `None`. - pub fn inner_udp(&self) -> Option<&UdpMeta> { - match &self.inner.ulp { - Some(UlpMeta::Udp(udp)) => Some(udp), - _ => None, - } - } + pub inner_eth: usize, + pub inner_l3: usize, + pub inner_ulp: usize, +} - pub fn l4_hash(&self) -> Option { - let ulp = match self.inner.ulp { - Some(ulp) => ulp, - None => return None, - }; - let mut h = Hasher::new(); - match &self.inner.ip { - Some(IpMeta::Ip4(m)) => { - h.update(&m.src.bytes()); - h.update(&m.dst.bytes()); - h.update(&[u8::from(m.proto)]); - } - Some(IpMeta::Ip6(m)) => { - h.update(&m.src.bytes()); - h.update(&m.dst.bytes()); - h.update(&[u8::from(m.proto)]); - } - None => return None, - }; - let (src, dst) = match ulp { - UlpMeta::Tcp(t) => (t.src, t.dst), - UlpMeta::Udp(u) => (u.src, u.dst), - UlpMeta::Icmpv4(_) => (0, 0), //TODO use icmp id - UlpMeta::Icmpv6(_) => (0, 0), //TODO use icmp id - }; - h.update(&src.to_be_bytes()); - h.update(&dst.to_be_bytes()); - Some(h.finalize()) +impl InitialLayerLens { + #[inline] + pub fn hdr_len(&self) -> usize { + self.outer_eth + + self.outer_l3 + + self.outer_encap + + self.inner_eth + + self.inner_l3 + + self.inner_ulp } } -/// The head and tail of an mblk_t list. -struct PacketChainInner { - head: NonNull, - tail: NonNull, +/// Full metadata representation for a packet entering the standard ULP +/// path, or a full table walk over the slowpath. +pub struct OpteMeta { + pub outer_eth: Option>>, + pub outer_l3: Option>, + pub outer_encap: Option>>, + + pub inner_eth: EthernetPacket, + pub inner_l3: Option>, + pub inner_ulp: Option>, } -/// A chain of network packets. +/// Helper for conditionally pulling up a packet when required, +/// to provide safe read/write access to the packet body. /// -/// Network packets are provided by illumos as a linked list, using -/// the `b_next` and `b_prev` fields. +/// This is necessary because we must account for a condition +/// where MsgBlks containing packet headers are fully owned by +/// the host OS, but the packet body points to guest memory. +/// In this case, it is unsafe to take either a `&[]` or `&mut[]` +/// against the underlying packet contents, as the guest may +/// modify them. [`MsgBlk::wrap_mblk`] details this condition. /// -/// See the documentation for [`Packet`] for full context. -// TODO: We might modify Packet to do away with the `Vec`. -// I could see Chain being retooled accordingly (i.e., Packets could -// be allocated a lifetime via PhantomData based on whether we want -// to remove them from the chain or modify in place). -// Today's code is all equivalent to always using 'static, because -// we remove and re-add the mblks to work on them. -pub struct PacketChain { - inner: Option, -} +/// The current disposition is that if we have any non-header +/// segments, then we're pulling the remainder of the packet up. +/// In theory we could check ref counts to determine whether we +/// can in fact serve a `&[&[u8]]`, but no fastpath packets need +/// this capability so it's wasted effort on that front. +struct PktBodyWalker { + last_chunk: Option, + remainder: T, + // The use of atomics/interior mutability here is primarily to + // allow us to work under &self for `body()`, dynamically filling + // out the pulled up mblk as needed. + state: Cell, + // TODO: It would be nice to separate this from MsgBlk. + // `T::Owned` in future? + msg_blk: AtomicPtr, +} + +#[derive(Copy, Clone, Debug)] +enum BodySegState { + NoPullup, + NeedsPullup, + PulledUp, +} + +impl PktBodyWalker { + fn new(last_chunk: Option, remainder: T) -> Self { + let state = if remainder.is_empty() { + BodySegState::NoPullup + } else { + BodySegState::NeedsPullup + } + .into(); -impl PacketChain { - /// Create an empty packet chain. - pub fn empty() -> Self { - Self { inner: None } + Self { + last_chunk, + remainder, + state, + msg_blk: core::ptr::null_mut::().into(), + } } +} - /// Convert an mblk_t packet chain into a safe source of `Packet`s. - /// - /// # Safety - /// The `mp` pointer must point to an `mblk_t` allocated by - /// `allocb(9F)` or provided by some kernel API which itself used - /// one of the DDI/DKI APIs to allocate it. - /// Packets must form a valid linked list (no loops). - /// The original mblk_t pointer must not be used again. - pub unsafe fn new(mp: *mut mblk_t) -> Result { - let head = NonNull::new(mp).ok_or(WrapError::NullPtr)?; - - // Walk the chain to find the tail, and support faster append. - let mut tail = head; - while let Some(next_ptr) = NonNull::new((*tail.as_ptr()).b_next) { - tail = next_ptr; - } +impl PktBodyWalker { + #[inline(always)] + fn prepare(&self) { + let BodySegState::NeedsPullup = self.state.clone().into_inner() else { + return; + }; + + let prepend_slice = self.last_chunk.as_ref().map(|v| &v[..]); + let mblk = self.remainder.pullup(prepend_slice); - Ok(Self { inner: Some(PacketChainInner { head, tail }) }) + let mblk_ptr = mblk.unwrap_mblk(); + + self.msg_blk + .compare_exchange( + core::ptr::null_mut(), + mblk_ptr.as_ptr(), + Ordering::Relaxed, + Ordering::Relaxed, + ) + .expect("invariant violated: tried to double-prepare mblk"); + self.state.set(BodySegState::PulledUp); } - /// Removes the next packet from the top of the chain and returns - /// it, taking ownership. - pub fn pop_front(&mut self) -> Option> { - if let Some(ref mut list) = &mut self.inner { - unsafe { - let curr = list.head.as_ptr(); - let next = NonNull::new((*curr).b_next); + fn body(&self) -> &[u8] { + self.prepare(); - // Break the forward link on the packet we have access to, - // and the backward link on the next element if possible. - if let Some(next) = next { - (*next.as_ptr()).b_prev = ptr::null_mut(); - } - (*curr).b_next = ptr::null_mut(); + match self.state.clone().into_inner() { + BodySegState::NoPullup => { + self.last_chunk.as_ref().map(|v| &v[..]).unwrap_or_default() + } + BodySegState::NeedsPullup => unreachable!(), + BodySegState::PulledUp => { + let ptr = NonNull::new(self.msg_blk.load(Ordering::Relaxed)) + .expect("invariant violated: PulledUp with nullptr"); + + // SAFETY: MsgBlk(NonNull) has identical layout to + // NonNull, and the inner mblk lives as long as self. + // Since ownership is unaffected, the &[u8] derived from msg_blk + // is valid for the same lifetime as &self. + unsafe { + let mblk_ref = + core::mem::transmute::<&NonNull, &MsgBlk>(&ptr); - // Update the current head. If the next element is null, - // we're now empty. - if let Some(next) = next { - list.head = next; - } else { - self.inner = None; + core::mem::transmute::<&[u8], &[u8]>(&mblk_ref[..]) } - - // Unwrap safety: We have already guaranteed that this - // ptr is NonNull in this case, and violating that is - // the only failure mode for wrap_mblk. - Some(Packet::wrap_mblk(curr).unwrap()) } - } else { - None } } - /// Adds an owned `Packet` to the end of this chain. - /// - /// Internally, this unwraps the `Packet` back into an mblk_t, - /// before placing it at the tail. - pub fn append(&mut self, packet: Packet) { - // Unwrap safety: a valid Packet implies a non-null mblk_t. - // Jamming `NonNull` into PacketSeg/Packet might take some - // work just to avoid this unwrap. - let pkt = NonNull::new(packet.unwrap_mblk()).unwrap(); - - // We're guaranteeing today that a 'static Packet has - // no neighbours and is not part of a chain. - // This simplifies tail updates in both cases (no chain walk). - unsafe { - assert!((*pkt.as_ptr()).b_prev.is_null()); - assert!((*pkt.as_ptr()).b_next.is_null()); - } + fn body_mut(&mut self) -> &mut [u8] + where + T::Chunk: ByteSliceMut, + { + self.prepare(); - if let Some(ref mut list) = &mut self.inner { - let pkt_p = pkt.as_ptr(); - let tail_p = list.tail.as_ptr(); - unsafe { - (*tail_p).b_next = pkt_p; - (*pkt_p).b_prev = tail_p; - // pkt_p->b_next is already null. + match self.state.clone().into_inner() { + BodySegState::NoPullup => { + self.last_chunk.as_mut().map(|v| &mut v[..]).unwrap_or_default() + } + BodySegState::NeedsPullup => unreachable!(), + BodySegState::PulledUp => { + let mut ptr = + NonNull::new(self.msg_blk.load(Ordering::Relaxed)) + .expect("invariant violated: PulledUp with nullptr"); + + // SAFETY: MsgBlk(NonNull) has identical layout to + // NonNull, and the inner mblk lives as long as self. + // Since ownership is unaffected, the &mut [u8] derived from msg_blk + // is valid for the same lifetime as &mut self. + unsafe { + let mblk_ref = core::mem::transmute::< + &mut NonNull, + &mut MsgBlk, + >(&mut ptr); + + core::mem::transmute::<&mut [u8], &mut [u8]>( + &mut mblk_ref[..], + ) + } } - list.tail = pkt; - } else { - self.inner = Some(PacketChainInner { head: pkt, tail: pkt }); } } - /// Return the head of the underlying `mblk_t` packet chain and - /// consume `self`. The caller of this function now owns the - /// `mblk_t` segment chain. - pub fn unwrap_mblk(mut self) -> Option> { - self.inner.take().map(|v| v.head) + fn extract_mblk(&mut self) -> Option { + let state = self.state.get_mut(); + + if !matches!(state, BodySegState::PulledUp) { + return None; + } + + // If we were pulled up, a later prepare will need to pullup again. + *state = BodySegState::NeedsPullup; + + let ptr = self.msg_blk.load(Ordering::Relaxed); + + // SAFETY: this mblk was created by using the MsgBlk::new api. + // PulledUp asserts its value is non-null. + unsafe { + Some( + MsgBlk::wrap_mblk(ptr) + .expect("invariant violated: PulledUp with nullptr"), + ) + } } } -impl Drop for PacketChain { +impl Drop for PktBodyWalker { fn drop(&mut self) { - // This is a minor variation on Packet's logic. illumos - // contains helper functions from STREAMS to just drop a whole - // chain. - cfg_if! { - if #[cfg(all(not(feature = "std"), not(test)))] { - // Safety: This is safe as long as the original - // `mblk_t` came from a call to `allocb(9F)` (or - // similar API). - if let Some(list) = &self.inner { - unsafe { ddi::freemsgchain(list.head.as_ptr()) }; - } - } else { - while let Some(pkt) = self.pop_front() { - drop(pkt); - } - } - } + self.extract_mblk(); } } -/// A network packet. -/// -/// The [`Packet`] type presents an abstraction for manipulating -/// network packets in both a `std` and `no_std` environment. The -/// first is useful for writing tests against the OPTE core engine and -/// executing them in userland, without the need for standing up a -/// full-blown virtual machine. To the engine this [`Packet`] is -/// absolutely no different than if it was running in-kernel for a -/// real virtual machine. -/// -/// The `no_std` implementation is used when running in-kernel. The -/// main difference is the `mblk_t` and `dblk_t` structures are coming -/// from viona (outbound/Tx) and mac (inbound/Rx), and we consume them -/// via [`Packet::wrap_mblk()`]. In reality this is typically holding -/// an Ethernet _frame_, but we prefer to use the colloquial -/// nomenclature of "packet". -/// -/// A [`Packet`] is made up of one or more segments ([`PacketSeg`]). -/// Any given header is *always* contained in a single segment, i.e. a -/// header never straddles multiple segments. While it's preferable to -/// have all headers in the first segment, it *may* be the case that -/// the headers span multiple segments; but a *single* header type -/// (e.g. the IP header) will *never* straddle two segments. The -/// payload, however, *may* span multiple segments. -/// -/// # illumos terminology -/// -/// In illumos there is no real notion of an mblk "packet" or -/// "segment": a packet is just a linked list of `mblk_t` values. -/// The "packet" is simply a pointer to the first `mblk_t` in the -/// list, which also happens to be the first "segment", and any -/// further segments are linked via `b_cont`. In the illumos -/// kernel code you'll *sometimes* find variables named `mp_head` -/// to indicate that it points to a packet. -/// -/// There is also the notion of a "chain" of packets. This is -/// represented by a list of `mblk_t` structure as well, but instead -/// of using `b_cont` the individual packets are linked via the -/// `b_next` field. In the illumos kernel code this this is often -/// referred to with the variable name `mp_chain`, but sometimes also -/// `mp_head` (or just `mp`). It's a bit ambiguous, and something you -/// kind of figure out as you work in the code more. Though part of me -/// would like to create some rust-like "new type pattern" in C to -/// disambiguate packets from packet chains across APIs so the -/// compiler can detect when your API is working against the wrong -/// contract (for example a function that expects a single packet but -/// is being fed a packet chain). -/// -/// TODOx -/// -/// * Document the various type states, their purpose, their data, and -/// how the [`Packet`] generally transitions between them. -/// -/// * Somewhere we'll want to enforce and document a 2-byte prefix pad -/// to keep IP header alignment (the host expects this). -/// -#[derive(Debug)] -pub struct Packet { - avail: usize, - segs: Vec, - state: S, +/// Packet state for the standard ULP path, or a full table walk over the slowpath. +pub struct PacketData { + pub(crate) headers: OpteMeta, + initial_lens: Option>, + body: PktBodyWalker, } -/// The type state of a packet that has been initialized and allocated, but -/// about which nothing else is known besides the length. -#[derive(Debug)] -pub struct Initialized { - // Total length of packet, in bytes. This is equal to the sum of - // the length of the _initialized_ window in all the segments - // (`b_wptr - b_rptr`). - len: usize, +impl From> for OpteMeta { + #[inline] + fn from(value: NoEncap) -> Self { + OpteMeta { + outer_eth: None, + outer_l3: None, + outer_encap: None, + inner_eth: value.inner_eth, + inner_l3: value.inner_l3, + inner_ulp: value.inner_ulp, + } + } } -/// The offset and length of a header. -#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] -pub struct HdrOffset { - /// The header's offset from start of packet, in bytes. - pub pkt_pos: usize, +impl core::fmt::Debug for PacketData { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.write_str("PacketHeaders(..)") + } +} - /// The index of the segment the header lives in, starting at 0. - pub seg_idx: usize, +impl PacketData { + pub fn initial_lens(&self) -> Option<&InitialLayerLens> { + self.initial_lens.as_deref() + } - /// The header's offset from the start of the segment, in bytes. - pub seg_pos: usize, + pub fn outer_ether( + &self, + ) -> Option<&InlineHeader>> { + self.headers.outer_eth.as_ref() + } - /// The length of the header. - pub hdr_len: usize, -} + pub fn outer_ip(&self) -> Option<&L3> { + self.headers.outer_l3.as_ref() + } -impl HdrOffset { - fn new(rdr_offset: ReaderOffset, hdr_len: usize) -> Self { - // We always take the reader offset _after_ parsing, thus we - // need to adjust the positions based on the header length. - Self { - pkt_pos: rdr_offset.pkt_pos - hdr_len, - seg_idx: rdr_offset.seg_idx, - seg_pos: rdr_offset.seg_pos - hdr_len, - hdr_len, + /// Returns whether this packet is sourced from outside the rack, + /// in addition to its VNI. + pub fn outer_encap_geneve_vni_and_origin(&self) -> Option<(Vni, bool)> { + match &self.headers.outer_encap { + Some(InlineHeader::Repr(EncapMeta::Geneve(g))) => { + Some((g.vni, g.oxide_external_pkt)) + } + Some(InlineHeader::Raw(ValidEncapMeta::Geneve(_, g))) => { + Some((g.vni(), valid_geneve_has_oxide_external(g))) + } + None => None, } } -} - -/// Bytes offsets for the outer headers. -/// -/// All outer headers are optional. -#[derive(Clone, Debug, Default)] -pub struct OuterHeaderOffsets { - pub ether: Option, - pub ip: Option, - pub encap: Option, -} -/// Byte offsets for the inner headers. -/// -/// The inner headers must consist of at least an Ethernet header. -#[derive(Clone, Debug, Default)] -pub struct InnerHeaderOffsets { - pub ether: HdrOffset, - pub ip: Option, - pub ulp: Option, -} + pub fn inner_ether(&self) -> &EthernetPacket { + &self.headers.inner_eth + } -/// Byte offsets for all headers. -#[derive(Clone, Debug, Default)] -pub struct HeaderOffsets { - pub outer: OuterHeaderOffsets, - pub inner: InnerHeaderOffsets, -} + pub fn inner_l3(&self) -> Option<&L3> { + self.headers.inner_l3.as_ref() + } -pub struct HdrInfo { - pub meta: M, - pub offset: HdrOffset, -} + pub fn inner_ulp(&self) -> Option<&Ulp> { + self.headers.inner_ulp.as_ref() + } -pub struct PacketInfo { - pub meta: PacketMeta, - pub offsets: HeaderOffsets, - // The body's checksum. It is up to the `NetworkImpl::Parser` on - // whether to populate this field or not. The reason for - // populating this field is to avoid duplicate work if the client - // has provided a ULP checksum. Rather than redoing the body - // checksum calculation, we can use incremental checksum - // techniques to stash the body's checksum for reuse when emitting - // the new headers. - // - // However, if the client does not provide a checksum, presumably - // because they are relying on checksum offload, this value should - // be `None`. In such case, `emit_headers()` will perform no ULP - // checksum update. - // - // This value may also be none if the packet has no notion of a - // ULP checksum; e.g., ARP. - pub body_csum: Option, - // Extra header space to avoid multiple allocations during encapsulation. - pub extra_hdr_space: Option, -} + pub fn inner_ip4(&self) -> Option<&Ipv4Packet> { + self.inner_l3().and_then(|v| match v { + L3::Ipv4(v) => Some(v), + _ => None, + }) + } -/// Body offset and length information. -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub struct BodyInfo { - pub pkt_offset: usize, - pub seg_index: usize, - pub seg_offset: usize, - pub len: usize, -} + pub fn inner_ip6(&self) -> Option<&Ipv6Packet> { + self.inner_l3().and_then(|v| match v { + L3::Ipv6(v) => Some(v), + _ => None, + }) + } -/// The type state of a parsed packet. -/// -/// The parsed type state represents that a packet has been -/// successfully parsed and contains all pertinent information derived -/// from parsing. -#[derive(Debug)] -pub struct Parsed { - len: usize, - meta: PacketMeta, - flow: InnerFlowId, - hdr_offsets: HeaderOffsets, - body_csum: Option, - body: BodyInfo, - body_modified: bool, -} + pub fn inner_icmp(&self) -> Option<&IcmpV4Packet> { + self.inner_ulp().and_then(|v| match v { + Ulp::IcmpV4(v) => Some(v), + _ => None, + }) + } -pub trait PacketState {} + pub fn inner_icmp6(&self) -> Option<&IcmpV6Packet> { + self.inner_ulp().and_then(|v| match v { + Ulp::IcmpV6(v) => Some(v), + _ => None, + }) + } -pub trait CanRead { - fn len(&self) -> usize; -} + pub fn inner_tcp(&self) -> Option<&TcpPacket> { + self.inner_ulp().and_then(|v| match v { + Ulp::Tcp(v) => Some(v), + _ => None, + }) + } -impl PacketState for Initialized {} -impl PacketState for Parsed {} + pub fn inner_udp(&self) -> Option<&UdpPacket> { + self.inner_ulp().and_then(|v| match v { + Ulp::Udp(v) => Some(v), + _ => None, + }) + } -impl CanRead for Initialized { - fn len(&self) -> usize { - self.len + pub fn is_inner_tcp(&self) -> bool { + matches!(self.inner_ulp(), Some(Ulp::Tcp(_))) } -} -impl CanRead for Parsed { - fn len(&self) -> usize { - self.len + pub fn prep_body(&mut self) + where + T::Chunk: ByteSliceMut, + T: Pullup, + { + self.body.prepare() } -} -impl Packet { - /// Return the amount of buffer space available to this packet. - pub fn avail(&self) -> usize { - self.avail + pub fn body(&self) -> &[u8] + where + T::Chunk: ByteSliceMut, + T: Pullup, + { + self.body.body() } - /// Return the pointer address of the underlying mblk_t. - /// - /// NOTE: This is purely to allow passing the pointer value up to - /// DTrace so that the mblk can be inspected (read only) in probe - /// context. - pub fn mblk_addr(&self) -> uintptr_t { - self.segs[0].mp as uintptr_t + pub fn copy_remaining(&self) -> Vec + where + T::Chunk: ByteSliceMut, + T: Pullup, + { + let base = self.body(); + base.to_vec() } - /// Return the number of segments that make up this packet. - pub fn num_segs(&self) -> usize { - self.segs.len() + pub fn append_remaining(&self, buf: &mut Vec) + where + T::Chunk: ByteSliceMut, + T: Pullup, + { + let base = self.body(); + buf.extend_from_slice(base); } - /// Return the head of the underlying `mblk_t` segment chain and - /// consume `self`. The caller of this function now owns the - /// `mblk_t` segment chain. - pub fn unwrap_mblk(mut self) -> *mut mblk_t { - let mp_head = self.segs[0].mp; - // We need to make sure to NULL out the mp pointer or else - // `drop()` will `freemsg(9F)` even though ownership of the - // mblk has passed on to someone else. - self.segs[0].mp = ptr::null_mut(); - mp_head + pub fn body_mut(&mut self) -> &mut [u8] + where + T::Chunk: ByteSliceMut, + T: Pullup, + { + self.body.body_mut() } -} -/// For the `no_std`/illumos kernel environment, we want the `mblk_t` -/// drop to occur at the [`Packet`] level, where we can make use of -/// `freemsg(9F)`. -impl Drop for Packet { - fn drop(&mut self) { - // Drop the segment chain if there is one. Consumers of Packet - // will never own a packet with no segments. Rather, this - // happens when a Packet transitions from one type-state to - // another, and the segments are passed onto the new Packet. - // This guarantees that we only free the segment chain once. - if !self.segs.is_empty() { - let head_mp = self.segs[0].mp; - drop(core::mem::take(&mut self.segs)); - cfg_if! { - if #[cfg(all(not(feature = "std"), not(test)))] { - // Safety: This is safe as long as the original - // `mblk_t` came from a call to `allocb(9F)` (or - // similar API). - unsafe { ddi::freemsg(head_mp) }; - } else { - mock_freemsg(head_mp); - } - } + /// Return whether the IP layer has a checksum both structurally + /// and that it is non-zero (i.e., not offloaded). + pub fn has_ip_csum(&self) -> bool { + match &self.headers.inner_l3 { + Some(L3::Ipv4(v4)) => v4.checksum() != 0, + Some(L3::Ipv6(_)) => false, + None => false, } } -} -impl Packet { - /// Allocate a new [`Packet`] containing a data buffer of `size` - /// bytes. - /// - /// The returned packet consists of exactly one [`PacketSeg`]. - /// - /// In the kernel environment this uses `allocb(9F)` and - /// `freemsg(9F)` under the hood. - /// - /// In the `std` environment this uses a mock implementation of - /// `allocb(9F)` and `freeb(9F)`, which contains enough scaffolding - /// to satisfy OPTE's use of the underlying `mblk_t` and `dblk_t` - /// structures. - pub fn alloc(size: usize) -> Self { - let mp = allocb(size); + /// Return whether the ULP layer has a checksum both structurally + /// and that it is non-zero (i.e., not offloaded). + pub fn has_ulp_csum(&self) -> bool { + let csum = match &self.headers.inner_ulp { + Some(Ulp::Tcp(t)) => t.checksum(), + Some(Ulp::Udp(u)) => u.checksum(), + Some(Ulp::IcmpV4(i4)) => i4.checksum(), + Some(Ulp::IcmpV6(i6)) => i6.checksum(), + None => return false, + }; - // Safety: We know this is safe because we just built the `mp` - // in a safe manner. - let seg = unsafe { PacketSeg::wrap_mblk(mp) }; - Packet::new(seg) + csum != 0 } +} - pub fn alloc_and_expand(size: usize) -> Self { - let mut seg = PacketSeg::alloc(size); - seg.expand_end(size).unwrap(); - Packet::new(seg) - } +impl From<&PacketData> for InnerFlowId { + #[inline] + fn from(meta: &PacketData) -> Self { + let (proto, addrs) = match meta.inner_l3() { + Some(L3::Ipv4(pkt)) => ( + pkt.protocol().0, + AddrPair::V4 { src: pkt.source(), dst: pkt.destination() }, + ), + Some(L3::Ipv6(pkt)) => ( + pkt.next_layer().unwrap_or_default().0, + AddrPair::V6 { src: pkt.source(), dst: pkt.destination() }, + ), + None => (255, FLOW_ID_DEFAULT.addrs), + }; - /// Create a [`Packet`] value from the passed in - /// `bytes`. - /// - /// The returned packet consists of exactly one [`PacketSeg`] with - /// enough space to hold `bytes.len()`. - pub fn copy(bytes: &[u8]) -> Self { - let mut pkt = Packet::alloc_and_expand(bytes.len()); - let mut wtr = pkt.seg0_wtr(); - // Unwrap: We know there cannot be an error because we - // allocate a packet large enough to hold all bytes. - wtr.write(bytes).unwrap(); - pkt.state.len = bytes.len(); - pkt - } - - pub fn get_rdr(&self) -> PacketReader { - PacketReader::new(&self.segs) - } - - pub fn get_rdr_mut(&mut self) -> PacketReaderMut { - PacketReaderMut::new(&mut self.segs) - } - - /// Create a new packet from `seg0`. - fn new(seg0: PacketSeg) -> Self { - let segs = vec![seg0]; - let len: usize = segs.iter().map(|s| s.len).sum(); - let avail: usize = segs.iter().map(|s| s.avail).sum(); - - Packet { avail, segs, state: Initialized { len } } - } - - #[cfg(test)] - fn new2(seg0: PacketSeg, seg1: PacketSeg) -> Self { - let segs = vec![seg0, seg1]; - let len: usize = segs.iter().map(|s| s.len).sum(); - let avail: usize = segs.iter().map(|s| s.avail).sum(); - - Packet { avail, segs, state: Initialized { len } } - } - - pub fn parse_ether<'a>( - rdr: &mut PacketReaderMut<'a>, - ) -> Result<(HdrInfo, EtherHdr<'a>), ParseError> { - let ether = EtherHdr::parse(rdr)?; - let offset = HdrOffset::new(rdr.offset(), ether.hdr_len()); - let meta = EtherMeta::from(ðer); - Ok((HdrInfo { meta, offset }, ether)) - } - - pub fn parse_ip4<'a>( - rdr: &mut PacketReaderMut<'a>, - ) -> Result<(HdrInfo, Ipv4Hdr<'a>), ParseError> { - let ip = Ipv4Hdr::parse(rdr)?; - let offset = HdrOffset::new(rdr.offset(), usize::from(ip.hdr_len())); - let meta = IpMeta::from(Ipv4Meta::from(&ip)); - Ok((HdrInfo { meta, offset }, ip)) - } - - pub fn parse_ip6<'a>( - rdr: &mut PacketReaderMut<'a>, - ) -> Result<(HdrInfo, Ipv6Hdr<'a>), ParseError> { - let ip = Ipv6Hdr::parse(rdr)?; - let offset = HdrOffset::new(rdr.offset(), ip.hdr_len()); - let meta = IpMeta::from(Ipv6Meta::from(&ip)); - Ok((HdrInfo { meta, offset }, ip)) - } - - pub fn parse_icmp<'a>( - rdr: &mut PacketReaderMut<'a>, - ) -> Result<(HdrInfo, UlpHdr<'a>), ParseError> { - let icmp = IcmpHdr::parse(rdr)?; - let offset = HdrOffset::new(rdr.offset(), icmp.hdr_len()); - let icmp_meta = Icmpv4Meta::from(&icmp); - let meta = UlpMeta::from(icmp_meta); - Ok((HdrInfo { meta, offset }, UlpHdr::Icmpv4(icmp))) - } - - pub fn parse_icmp6<'a>( - rdr: &mut PacketReaderMut<'a>, - ) -> Result<(HdrInfo, UlpHdr<'a>), ParseError> { - let icmp6 = IcmpHdr::parse(rdr)?; - let offset = HdrOffset::new(rdr.offset(), icmp6.hdr_len()); - let icmp_meta = Icmpv6Meta::from(&icmp6); - let meta = UlpMeta::from(icmp_meta); - Ok((HdrInfo { meta, offset }, UlpHdr::Icmpv6(icmp6))) - } - - pub fn parse_tcp<'a>( - rdr: &mut PacketReaderMut<'a>, - ) -> Result<(HdrInfo, UlpHdr<'a>), ParseError> { - let tcp = TcpHdr::parse(rdr)?; - let offset = HdrOffset::new(rdr.offset(), tcp.hdr_len()); - let meta = UlpMeta::from(TcpMeta::from(&tcp)); - Ok((HdrInfo { meta, offset }, UlpHdr::from(tcp))) - } - - pub fn parse_udp<'a>( - rdr: &mut PacketReaderMut<'a>, - ) -> Result<(HdrInfo, UlpHdr<'a>), ParseError> { - let udp = UdpHdr::parse(rdr)?; - let offset = HdrOffset::new(rdr.offset(), udp.hdr_len()); - let meta = UlpMeta::from(UdpMeta::from(&udp)); - Ok((HdrInfo { meta, offset }, UlpHdr::from(udp))) - } - - pub fn parse_geneve<'a>( - rdr: &mut PacketReaderMut<'a>, - ) -> Result<(HdrInfo, GeneveHdr<'a>), ParseError> { - // We don't need to store the UDP metadata here because any - // relevant fields can be reconstructed from knowledge of the - // packet body and the encap itself. - let udp_hdr = UdpHdr::parse(rdr)?; + let (src_port, dst_port) = meta + .inner_ulp() + .map(|ulp| { + ( + ulp.true_src_port() + .or_else(|| ulp.pseudo_port()) + .unwrap_or(0), + ulp.true_dst_port() + .or_else(|| ulp.pseudo_port()) + .unwrap_or(0), + ) + }) + .unwrap_or((0, 0)); - match udp_hdr.dst_port() { - GENEVE_PORT => { - let geneve = GeneveHdr::parse(rdr)?; - let offset = HdrOffset::new( - rdr.offset(), - geneve.hdr_len() + udp_hdr.hdr_len(), - ); - let meta = GeneveMeta::from((&udp_hdr, &geneve)); - Ok((HdrInfo { meta, offset }, geneve)) - } - port => Err(ParseError::UnexpectedDestPort(port)), - } + InnerFlowId { proto, addrs, src_port, dst_port } } +} - pub fn parse_geneve_inner<'a>( - rdr: &mut PacketReaderMut<'a>, - ) -> Result<(HdrInfo, GeneveHdr<'a>), ParseError> { - let geneve = GeneveHdr::parse(rdr)?; - let offset = HdrOffset::new(rdr.offset(), geneve.hdr_len()); - let meta = GeneveMeta::from(&geneve); - Ok((HdrInfo { meta, offset }, geneve)) - } +/// A network packet. +/// +/// A packet is made up of one or more segments. Any given header is +/// *always* contained in a single segment, i.e. a header never straddles +/// multiple segments. While it's preferable to have all headers in the +/// first segment, it *may* be the case that the headers span multiple +/// segments; but a *single* header type (e.g. the IP header) will *never* +/// straddle two segments. The payload, however, *may* span multiple segments. +/// +/// # illumos terminology +/// +/// In illumos there is no real notion of an mblk "packet" or +/// "segment": a packet is just a linked list of `mblk_t` values. +/// This type indicates that an `mblk_t` chain is to be treated as +/// a network packet, as far as its bytes are concerned. +/// The "packet" is simply a pointer to the first `mblk_t` in the +/// list, which also happens to be the first "segment", and any +/// further segments are linked via `b_cont`. In the illumos +/// kernel code you'll *sometimes* find variables named `mp_head` +/// to indicate that it points to a packet. +/// +/// There is also the notion of a "chain" of packets. This is +/// represented by a list of `mblk_t` structure as well, but instead +/// of using `b_cont` the individual packets are linked via the +/// `b_next` field. In the illumos kernel code this this is often +/// referred to with the variable name `mp_chain`, but sometimes also +/// `mp_head` (or just `mp`). It's a bit ambiguous, and something you +/// kind of figure out as you work in the code more. In OPTE, we +/// disambiguate using the `MsgBlk` and `MsgBlkChain` types. The former +/// enforces that `b_next` and `b_prev` are disconnected. +// TODO: In theory, this can be any `Read` type giving us `&mut [u8]`s, +// but in practice we are internally reliant on returning `MsgBlk`s in +// hairpin actions and the like. Fighting the battle of making this generic +// is a bridge too far for the `ingot` datapath rewrite. This might have +// value in future. +#[derive(Debug)] +pub struct Packet { + state: S, +} - pub fn parse( - mut self, - dir: Direction, - net: impl NetworkParser, - ) -> Result, ParseError> { - let mut rdr = self.get_rdr_mut(); +pub type LiteInPkt = + Packet::InMeta<::Chunk>>>; +pub type LiteOutPkt = + Packet::OutMeta<::Chunk>>>; - let mut info = match dir { - Direction::Out => net.parse_outbound(&mut rdr)?, - Direction::In => net.parse_inbound(&mut rdr)?, - }; +impl<'a, T: Read + BufferState + Pullup + 'a, M: LightweightMeta> + Packet> +where + T::Chunk: ByteSliceMut + IntoBufPointer<'a>, +{ + #[inline(always)] + pub fn parse_inbound = M>>( + pkt: T, + net: NP, + ) -> Result, ParseError> { + let len = pkt.len(); + let base_ptr = pkt.base_ptr(); - let (pkt_offset, mut seg_index, mut seg_offset, end_of_seg) = - rdr.finish(); + let meta = net.parse_inbound(pkt)?; + meta.headers.validate(len)?; - // If we finished on the end of a segment, and there are more - // segments to go, then bump the segment index and reset the - // segment offset to properly indicate the start of the body. - if end_of_seg && ((seg_index + 1) < self.segs.len()) { - seg_index += 1; - seg_offset = 0; - } + Ok(Packet { state: LiteParsed { meta, base_ptr, len } }) + } - assert!( - self.state.len >= pkt_offset, - "{} >= {}", - self.state.len, - pkt_offset, - ); + #[inline(always)] + pub fn parse_outbound = M>>( + pkt: T, + net: NP, + ) -> Result, ParseError> { + let len = pkt.len(); + let base_ptr = pkt.base_ptr(); - let ulp_hdr_len = info.meta.inner.ulp.map(|u| u.hdr_len()).unwrap_or(0); - let body_len = match info.meta.inner.ip { - // If we have IP and ULP metadata, we can use those to compute - // the payload length. - // If there's no ULP, just return the L3 payload length. - Some(IpMeta::Ip4(ip4)) => { - // Total length here refers to the n_bytes in this packet, - // so we won't get bogus overly long values in case of - // fragmentation. - let expected = ip4.hdr_len() + ulp_hdr_len; - - usize::from(ip4.total_len).checked_sub(expected).ok_or( - ParseError::BadInnerIpLen { - expected, - actual: usize::from(ip4.total_len), - }, - )? - } - Some(IpMeta::Ip6(ip6)) => usize::from(ip6.pay_len) - .checked_sub(ulp_hdr_len) - .ok_or(ParseError::BadInnerIpLen { - expected: ulp_hdr_len, - actual: usize::from(ip6.pay_len), - })?, - - // If there's no IP metadata, we fallback to considering any - // remaining bytes in the packet buffer to be the body. - None => self.state.len - pkt_offset, - }; - let mut body = - BodyInfo { pkt_offset, seg_index, seg_offset, len: body_len }; - let flow = InnerFlowId::from(&info.meta); - - // Packet processing logic requires all headers to be in the leading - // segment. Detect if this is not the case and squash segments - // containing headers into one segment. This value represents the - // inclusive upper bound of the squash. - let squash_to = match (body.seg_index, body.seg_offset) { - // The body is in the first segment meaning all headers are also in - // the first segment. No squashing needed. - (0, _) => 0, - - // The body starts at a zero offset in segment n. This means we need - // to squash all segments prior to n. - (n, 0) => n - 1, - - // The body starts at a non-zero offset in segment n. This means we - // need to squash all segments up to and including n. - (n, _) => n, - }; + let meta = net.parse_outbound(pkt)?; + meta.headers.validate(len)?; - // If the squash bound is zero, there is nothing left to do here, just - // return. - if squash_to == 0 { - return Ok(Packet { - avail: self.avail, - // The new packet is taking ownership of the segments. - segs: core::mem::take(&mut self.segs), - state: Parsed { - len: self.state.len, - hdr_offsets: info.offsets, - meta: info.meta, - flow, - body_csum: info.body_csum, - body, - body_modified: false, - }, - }); - } + Ok(Packet { state: LiteParsed { meta, base_ptr, len } }) + } - // Calculate the body offset within the new squashed segment - if body.seg_offset != 0 { - for s in &self.segs[..squash_to] { - body.seg_offset += s.len; + #[inline] + pub fn to_full_meta(self) -> Packet> { + let Packet { state: LiteParsed { len, base_ptr, meta } } = self; + let IngotParsed { headers, data, last_chunk } = meta; + + // TODO: we can probably not do this in some cases, but we + // don't have a way for `HeaderAction`s to signal that they + // *may* change the fields we need in the slowpath. + let body_csum = headers.compute_body_csum(); + let flow = headers.flow(); + + let headers: OpteMeta<_> = headers.into(); + let initial_lens = Some( + InitialLayerLens { + outer_eth: headers.outer_eth.packet_length(), + outer_l3: headers.outer_l3.packet_length(), + outer_encap: headers.outer_encap.packet_length(), + inner_eth: headers.inner_eth.packet_length(), + inner_l3: headers.inner_l3.packet_length(), + inner_ulp: headers.inner_ulp.packet_length(), } - } - body.seg_index -= squash_to; + .into(), + ); + let body = PktBodyWalker::new(last_chunk, data); + let meta = Box::new(PacketData { headers, initial_lens, body }); - // Determine how big the message block for the squashed segment needs to - // be. - let mut new_seg_size = 0; - for s in &self.segs[..squash_to + 1] { - new_seg_size += s.len; + Packet { + state: FullParsed { + meta, + flow, + body_csum, + base_ptr, + l4_hash: Memoised::Uninit, + body_modified: false, + len, + inner_csum_dirty: false, + }, } + } - let extra_space = info.extra_hdr_space.unwrap_or(0); - let mp = allocb(new_seg_size + extra_space); - unsafe { - (*mp).b_wptr = (*mp).b_wptr.add(extra_space); - (*mp).b_rptr = (*mp).b_rptr.add(extra_space); - for s in &self.segs[..squash_to + 1] { - core::ptr::copy_nonoverlapping( - (*s.mp).b_rptr, - (*mp).b_wptr, - s.len, - ); - (*mp).b_wptr = (*mp).b_wptr.add(s.len); - } - } + #[inline] + pub fn meta(&self) -> &M { + &self.state.meta.headers + } - // Construct a new segment vector, tacking on any remaining segments - // after the header segments. - let orig_segs = core::mem::take(&mut self.segs); - let mut segs = vec![unsafe { PacketSeg::wrap_mblk(mp) }]; - if squash_to + 1 < orig_segs.len() { - segs[0].link(&orig_segs[squash_to + 1]); - segs.extend_from_slice(&orig_segs[squash_to + 1..]); - } - #[cfg(any(feature = "std", test))] - for s in &orig_segs[..squash_to + 1] { - mock_freeb(s.mp); - } + #[inline] + pub fn meta_mut(&mut self) -> &mut M { + &mut self.state.meta.headers + } - let mut off = 0; - for header_offsets in [ - info.offsets.outer.ether.as_mut(), - info.offsets.outer.ip.as_mut(), - info.offsets.outer.encap.as_mut(), - Some(&mut info.offsets.inner.ether), - info.offsets.inner.ip.as_mut(), - info.offsets.inner.ulp.as_mut(), - ] - .into_iter() - .flatten() - { - header_offsets.pkt_pos = off; - header_offsets.seg_idx = 0; - header_offsets.seg_pos = off; - off += header_offsets.hdr_len; - } + #[inline] + pub fn len(&self) -> usize { + self.state.len + } - Ok(Packet { - avail: self.avail, - segs, - state: Parsed { - len: self.state.len, - hdr_offsets: info.offsets, - meta: info.meta, - flow, - body_csum: info.body_csum, - body, - body_modified: false, - }, - }) + #[inline] + pub fn mblk_addr(&self) -> uintptr_t { + self.state.base_ptr } - pub fn seg0_wtr(&mut self) -> PacketSegWriter { - self.segs[0].get_writer() + #[inline] + pub fn flow(&self) -> InnerFlowId { + self.meta().flow() } +} - pub fn seg_wtr(&mut self, i: usize) -> PacketSegWriter { - self.segs[i].get_writer() +impl Packet> { + pub fn meta(&self) -> &PacketData { + &self.state.meta } - pub fn add_seg( - &mut self, - size: usize, - ) -> Result { - let mut seg = PacketSeg::alloc(size); - seg.expand_end(size)?; - let len = self.segs.len(); - if len > 0 { - let last_seg = &mut self.segs[len - 1]; - last_seg.link(&seg); - } - self.segs.push(seg); - self.state.len += size; + pub fn meta_mut(&mut self) -> &mut PacketData { + &mut self.state.meta + } - Ok(self.seg_wtr(len)) + pub fn checksums_dirty(&self) -> bool { + self.state.inner_csum_dirty } - /// Wrap the `mblk_t` packet in a [`Packet`], taking ownership of - /// the `mblk_t` packet as a result. An `mblk_t` packet consists - /// of one or more `mblk_t` segments chained together via - /// `b_cont`. As a result, this [`Packet`] may consist of *one or - /// more* [`PacketSeg`]s. When the [`Packet`] is dropped, the - /// underlying `mblk_t` segment chain is freed. If you wish to - /// pass on ownership you must call the [`Packet::unwrap_mblk()`] - /// function. - /// - /// # Safety - /// - /// The `mp` pointer must point to an `mblk_t` allocated by - /// `allocb(9F)` or provided by some kernel API which itself used - /// one of the DDI/DKI APIs to allocate it. - /// - /// # Errors - /// - /// * Return [`WrapError::NullPtr`] is `mp` is `NULL`. - pub unsafe fn wrap_mblk(mp: *mut mblk_t) -> Result { - if mp.is_null() { - return Err(WrapError::NullPtr); + #[inline] + /// Convert a packet's metadata into a set of instructions + /// needed to serialize all its changes to the wire. + pub fn emit_spec(&mut self) -> Result + where + T::Chunk: ByteSliceMut, + { + // Roughly how this works: + // - Identify rightmost structural-changed field. + // - fill out owned versions into the push_spec of all + // present fields we rewound past. + // - Rewind up to+including that point in original + // pkt space. + let l4_hash = self.l4_hash(); + let state = &mut self.state; + let init_lens = state.meta.initial_lens.as_ref().unwrap(); + let headers = &state.meta.headers; + let payload_len = state.len - init_lens.hdr_len(); + let mut encapped_len = payload_len; + + let mut push_spec = OpteEmit::default(); + let mut rewind = 0; + + if state.body_modified { + push_spec.replace_body = state.meta.body.extract_mblk(); + } + + // structural change if: + // hdr_len is different. + // needs_emit is true (i.e., now on an owned repr). + + // Part of the initial design idea of ingot was the desire to automatically + // do this sort of thing. We are so, so far from that... + let mut force_serialize = false; + + match &headers.inner_ulp { + Some(ulp) => { + let l = ulp.packet_length(); + encapped_len += l; + + if ulp.needs_emit() || l != init_lens.inner_ulp { + let inner = + push_spec.inner.get_or_insert_with(Default::default); + + inner.ulp = Some(match ulp { + Ulp::Tcp(Header::Repr(t)) => UlpRepr::Tcp(*t.clone()), + Ulp::Tcp(Header::Raw(t)) => UlpRepr::Tcp(t.into()), + Ulp::Udp(Header::Repr(t)) => UlpRepr::Udp(*t.clone()), + Ulp::Udp(Header::Raw(t)) => UlpRepr::Udp(t.into()), + Ulp::IcmpV4(Header::Repr(t)) => { + UlpRepr::IcmpV4(*t.clone()) + } + Ulp::IcmpV4(Header::Raw(t)) => { + UlpRepr::IcmpV4(t.into()) + } + Ulp::IcmpV6(Header::Repr(t)) => { + UlpRepr::IcmpV6(*t.clone()) + } + Ulp::IcmpV6(Header::Raw(t)) => { + UlpRepr::IcmpV6(t.into()) + } + }); + force_serialize = true; + rewind += init_lens.inner_ulp; + } + } + None if init_lens.inner_ulp != 0 => { + force_serialize = true; + rewind += init_lens.inner_ulp; + } + _ => {} } - // Compute the number of `mblk_t`s in this segment chain. - // - // We are currently forced to take at least one memory allocation. - // That's because we're wrapping each `mblk_t` in a segment chain (the - // `b_cont` items) in a `PacketSeg`, and then storing all those in - // `self`. We previously had a statically-sized array here, of length 4, - // to avoid those allocs. However, that obviously assumes we never have - // chains of more than 4 elements, which we've now hit. - // - // We pass over the linked-list twice here: once to compute the length, - // so that we can allocate exactly once, and once to actually wrap - // everything. - let mut n_segments = 1; - let mut next_seg = (*mp).b_cont; - while !next_seg.is_null() { - n_segments += 1; - next_seg = (*next_seg).b_cont; + match &headers.inner_l3 { + Some(l3) => { + let l = l3.packet_length(); + encapped_len += l; + + if force_serialize || l3.needs_emit() || l != init_lens.inner_l3 + { + let inner = + push_spec.inner.get_or_insert_with(Default::default); + + inner.l3 = Some(match l3 { + L3::Ipv4(Header::Repr(v4)) => L3Repr::Ipv4(*v4.clone()), + L3::Ipv4(Header::Raw(v4)) => L3Repr::Ipv4(v4.into()), + L3::Ipv6(Header::Repr(v6)) => L3Repr::Ipv6(*v6.clone()), + + // We can't actually do structural mods here today using OPTE, + // but account for the possibility at least. + L3::Ipv6(Header::Raw(v6)) => { + L3Repr::Ipv6(v6.to_owned(None)?) + } + }); + force_serialize = true; + rewind += init_lens.inner_l3; + } + } + None if init_lens.inner_l3 != 0 => { + force_serialize = true; + rewind += init_lens.inner_l3; + } + _ => {} } - let mut segs = Vec::with_capacity(n_segments); - - // Restore `next_seg`, since we iterate over the list another time to - // actually wrap the `mblk_t`s. - let mut next_seg = (*mp).b_cont; - let mut len = 0; - let mut avail = 0; - let mut seg = PacketSeg::wrap_mblk(mp); - avail += seg.avail; - len += seg.len; - segs.push(seg); - - while !next_seg.is_null() { - let tmp = (*next_seg).b_cont; - seg = PacketSeg::wrap_mblk(next_seg); - avail += seg.avail; - len += seg.len; - segs.push(seg); - next_seg = tmp; + + // inner eth + encapped_len += headers.inner_eth.packet_length(); + if force_serialize { + let inner = push_spec.inner.get_or_insert_with(Default::default); + inner.eth = match &headers.inner_eth { + Header::Repr(p) => **p, + Header::Raw(p) => p.into(), + }; + rewind += init_lens.inner_eth; } - Ok(Packet { avail, segs, state: Initialized { len } }) - } + match &headers.outer_encap { + Some(encap) + if force_serialize + || encap.needs_emit() + || encap.packet_length() != init_lens.outer_encap => + { + push_spec.outer_encap = Some(match encap { + InlineHeader::Repr(o) => *o, + InlineHeader::Raw(ValidEncapMeta::Geneve(u, g)) => { + EncapMeta::Geneve(GeneveMeta { + entropy: u.source(), + vni: g.vni(), + oxide_external_pkt: valid_geneve_has_oxide_external( + g, + ), + }) + } + }); - /// A combination of [`Self::wrap_mblk()`] followed by [`Self::parse()`]. - /// - /// This is a bit more convenient than dealing with the possible - /// error from each separately. - /// - /// # Safety - /// - /// See [`Self::wrap_mblk()`]. - pub unsafe fn wrap_mblk_and_parse( - mp: *mut mblk_t, - dir: Direction, - net: N, - ) -> Result, PacketError> { - let pkt = Self::wrap_mblk(mp)?; - pkt.parse(dir, net).map_err(PacketError::from) - } -} + force_serialize = true; + rewind += init_lens.outer_encap; + } + None if init_lens.outer_encap != 0 => { + force_serialize = true; + rewind += init_lens.outer_encap; + } + _ => {} + } -/// A packet body transformation. -/// -/// A body transformation allows an action to modify zero, one, or -/// more bytes of a packet's body. The body starts directly after the -/// ULP header, and continues to the last byte of the packet. This -/// transformation is currently limited to only modifying bytes; it -/// does not allow adding or removing bytes (e.g. to encrypt the body). -pub trait BodyTransform: fmt::Display + DynClone { - /// Execute the body transformation. The body segments include - /// **only** body data, starting directly after the end of the ULP - /// header. - /// - /// # Errors - /// - /// The transformation can choose to return a - /// [`BodyTransformError`] at any time if the body is not - /// acceptable. On error, none or some of the bytes may have been - /// modified. - fn run( - &self, - dir: Direction, - body_segs: &mut [&mut [u8]], - ) -> Result<(), BodyTransformError>; -} + match &headers.outer_l3 { + Some(l3) + if force_serialize + || l3.needs_emit() + || l3.packet_length() != init_lens.outer_l3 => + { + let encap_len = push_spec.outer_encap.packet_length(); -dyn_clone::clone_trait_object!(BodyTransform); + push_spec.outer_ip = Some(match l3 { + L3::Ipv6(BoxedHeader::Repr(o)) => L3Repr::Ipv6(*o.clone()), + L3::Ipv4(BoxedHeader::Repr(o)) => L3Repr::Ipv4(*o.clone()), + L3::Ipv6(BoxedHeader::Raw(o)) => { + L3Repr::Ipv6(o.to_owned(None)?) + } + L3::Ipv4(BoxedHeader::Raw(o)) => L3Repr::Ipv4(o.into()), + }); -#[derive(Debug)] -pub enum BodyTransformError { - NoPayload, - ParseFailure(String), - Todo(String), - UnexpectedBody(String), -} + let inner_sz = (encapped_len + encap_len) as u16; -impl From for BodyTransformError { - fn from(e: smoltcp::wire::Error) -> Self { - Self::ParseFailure(format!("{}", e)) + match &mut push_spec.outer_ip { + Some(L3Repr::Ipv4(v4)) => { + v4.total_len = (v4.ihl as u16) * 4 + inner_sz; + } + Some(L3Repr::Ipv6(v6)) => { + v6.payload_len = inner_sz; + } + _ => {} + } + + force_serialize = true; + rewind += init_lens.outer_l3; + } + None if init_lens.outer_l3 != 0 => { + force_serialize = true; + rewind += init_lens.outer_l3; + } + _ => {} + } + + match &headers.outer_eth { + Some(eth) + if force_serialize + || eth.needs_emit() + || eth.packet_length() != init_lens.outer_eth => + { + push_spec.outer_eth = Some(match eth { + InlineHeader::Repr(o) => *o, + InlineHeader::Raw(r) => r.into(), + }); + + rewind += init_lens.outer_eth; + } + None if init_lens.outer_eth != 0 => { + rewind += init_lens.outer_eth; + } + _ => {} + } + + Ok(EmitSpec { + rewind: rewind as u16, + ulp_len: encapped_len as u32, + prepend: PushSpec::Slowpath(push_spec.into()), + l4_hash, + }) } -} -impl Packet { - pub fn body_csum(&self) -> Option { - self.state.body_csum + pub fn len(&self) -> usize { + self.state.len } - pub fn body_info(&self) -> BodyInfo { - self.state.body + #[inline] + pub fn flow(&self) -> &InnerFlowId { + &self.state.flow } - pub fn body_offset(&self) -> usize { - self.state.body.pkt_offset + /// Run the [`HdrTransform`] against this packet. + #[inline] + pub fn hdr_transform( + &mut self, + xform: &HdrTransform, + ) -> Result<(), HdrTransformError> + where + T::Chunk: ByteSliceMut, + { + self.state.inner_csum_dirty |= xform.run(&mut self.state.meta)?; + + // Recomputing this is a little bit wasteful, since we're moving + // rebuilding a static repr from packet fields. This is a necessary + // part of slowpath use because layers are designed around intermediate + // flowkeys. + // + // We *could* elide this on non-compiled UFT transforms, but we do not + // need those today. + self.state.flow = InnerFlowId::from(self.meta()); + Ok(()) } /// Run the [`BodyTransform`] against this packet. @@ -1333,7 +1208,11 @@ impl Packet { &mut self, dir: Direction, xform: &dyn BodyTransform, - ) -> Result<(), BodyTransformError> { + ) -> Result<(), BodyTransformError> + where + T::Chunk: ByteSliceMut, + T: Pullup, + { // We set the flag now with the assumption that the transform // could fail after modifying part of the body. In the future // we could have something more sophisticated that only sets @@ -1341,10 +1220,10 @@ impl Packet { // this does the job as nothing that needs top performance // should make use of body transformations. self.state.body_modified = true; + self.state.meta.body.prepare(); - match self.body_segs_mut() { - Some(mut body_segs) => xform.run(dir, &mut body_segs), - + match self.body_mut() { + Some(body_segs) => xform.run(dir, body_segs), None => { self.state.body_modified = false; Err(BodyTransformError::NoPayload) @@ -1352,184 +1231,139 @@ impl Packet { } } - pub fn body_seg(&self) -> usize { - self.state.body.seg_index - } - - /// Return a list of the body segments, or `None` if there is no - /// body. - pub fn body_segs(&self) -> Option> { - if self.state.body.len == 0 { - return None; - } - - let mut body_segs = vec![]; - let body_seg = self.state.body.seg_index; - - for (i, seg) in self.segs[body_seg..].iter().enumerate() { - if i == 0 { - // Panic: We are slicing with the parse data. If - // we parsed correctly, this should not panic. - body_segs.push( - seg.slice_unchecked(self.state.body.seg_offset, None), - ); - } else { - body_segs.push(seg.slice()); - } + #[inline] + pub fn body(&self) -> Option<&[u8]> + where + T::Chunk: ByteSliceMut, + T: Pullup, + { + let out = self.state.meta.body(); + if out.is_empty() { + None + } else { + Some(out) } - - Some(body_segs) } - /// Return a list of mutable body segments, or `None` if there is - /// no body. - pub fn body_segs_mut(&mut self) -> Option> { - if self.state.body.len == 0 { - return None; - } - - let mut body_segs = vec![]; - let body_seg = self.state.body.seg_index; - - for (i, seg) in self.segs[body_seg..].iter_mut().enumerate() { - if i == 0 { - // Panic: We are slicing with the parse data. If - // we parsed correctly, this should not panic. - body_segs.push( - seg.slice_mut_unchecked(self.state.body.seg_offset, None), - ); - } else { - body_segs.push(seg.slice_mut()); - } + #[inline] + pub fn body_mut(&mut self) -> Option<&mut [u8]> + where + T::Chunk: ByteSliceMut, + T: Pullup, + { + let out = self.state.meta.body_mut(); + if out.is_empty() { + None + } else { + Some(out) } + } - Some(body_segs) + #[inline] + pub fn mblk_addr(&self) -> uintptr_t { + self.state.base_ptr } /// Compute ULP and IP header checksum from scratch. /// - /// This should really only be used for testing. - pub fn compute_checksums(&mut self) { - if let Some(ulp_off) = self.state.hdr_offsets.inner.ulp { - let mut body_rdr = self.get_body_rdr(); - let mut csum = Checksum::from(0u32); - loop { - let len = body_rdr.seg_left(); - match body_rdr.slice(len) { - Ok(seg_bytes) => csum.add_bytes(seg_bytes), - _ => break, - } - } - - self.state.body_csum = Some(csum); + /// This should really only be used for testing, or in the case + /// where we have applied body transforms and know that any initial + /// body_csum cannot be valid. + pub fn compute_checksums(&mut self) + where + T::Chunk: ByteSliceMut, + T: Pullup, + { + let mut body_csum = Checksum::new(); + body_csum.add_bytes(self.body().unwrap_or_default()); + self.state.body_csum = Some(body_csum); + + if let Some(ulp) = &mut self.state.meta.headers.inner_ulp { + let mut csum = body_csum; // Unwrap: Can't have a ULP without an IP. - let ip = self.meta().inner.ip.unwrap(); + let ip = self.state.meta.headers.inner_l3.as_ref().unwrap(); // Add pseudo header checksum. - let pseudo_csum = ip.pseudo_csum(); + let pseudo_csum = ip.pseudo_header(); csum += pseudo_csum; - // All headers must reside in the first segment. - let seg0_bytes = self.segs[0].slice_mut(); // Determine ULP slice and add its bytes to the // checksum. - let ulp_start = ulp_off.seg_pos; - let ulp_end = ulp_start + ulp_off.hdr_len; - let ulp = &mut seg0_bytes[ulp_start..ulp_end]; - - match self.state.meta.inner.ulp.as_mut().unwrap() { - UlpMeta::Icmpv4(icmp) => { - Self::update_icmp_csum( - icmp, - self.state.body_csum.unwrap(), - ulp, - ); + match ulp { + // ICMP4 requires the body_csum *without* + // the pseudoheader added back in. + Ulp::IcmpV4(i4) => { + let mut bytes = [0u8; 8]; + i4.set_checksum(0); + i4.emit_raw(&mut bytes[..]); + body_csum.add_bytes(&bytes[..]); + i4.set_checksum(body_csum.finalize_for_ingot()); } - - UlpMeta::Icmpv6(icmp) => { - Self::update_icmp_csum(icmp, csum, ulp); + Ulp::IcmpV6(i6) => { + let mut bytes = [0u8; 8]; + i6.set_checksum(0); + i6.emit_raw(&mut bytes[..]); + csum.add_bytes(&bytes[..]); + i6.set_checksum(csum.finalize_for_ingot()); } - - UlpMeta::Tcp(tcp) => { - Self::update_tcp_csum(tcp, csum, ulp); + Ulp::Tcp(tcp) => { + tcp.set_checksum(0); + match tcp { + Header::Repr(tcp) => { + let mut bytes = [0u8; 56]; + tcp.emit_raw(&mut bytes[..]); + csum.add_bytes(&bytes[..]); + } + Header::Raw(tcp) => { + csum.add_bytes(tcp.0.as_bytes()); + match &tcp.1 { + Header::Repr(opts) => { + csum.add_bytes(opts); + } + Header::Raw(opts) => { + csum.add_bytes(opts); + } + } + } + } + tcp.set_checksum(csum.finalize_for_ingot()); } - - UlpMeta::Udp(udp) => { - Self::update_udp_csum(udp, csum, ulp); + Ulp::Udp(udp) => { + udp.set_checksum(0); + match udp { + Header::Repr(udp) => { + let mut bytes = [0u8; 8]; + udp.emit_raw(&mut bytes[..]); + csum.add_bytes(&bytes[..]); + } + Header::Raw(udp) => { + csum.add_bytes(udp.0.as_bytes()); + } + } + udp.set_checksum(csum.finalize_for_ingot()); } } } // Compute and fill in the IPv4 header checksum. - if let Some(IpMeta::Ip4(ip)) = self.state.meta.inner.ip.as_mut() { - let ip_off = self.state.hdr_offsets.inner.ip.unwrap(); - let all_hdr_bytes = self.segs[0].slice_mut(); - let ip_start = ip_off.seg_pos; - let ip_end = ip_start + ip_off.hdr_len; - let csum = HeaderChecksum::from(Checksum::compute( - &all_hdr_bytes[ip_start..ip_end], - )) - .bytes(); - - // Update the metadata. - ip.csum = csum; - - // Update the header bytes. - let csum_begin = ip_start + Ipv4Hdr::CSUM_BEGIN; - let csum_end = ip_start + Ipv4Hdr::CSUM_END; - all_hdr_bytes[csum_begin..csum_end].copy_from_slice(&csum[..]); + if let Some(l3) = self.state.meta.headers.inner_l3.as_mut() { + l3.compute_checksum(); } } - fn update_icmp_csum( - icmp: &mut IcmpMeta, - mut csum: Checksum, - ulp: &mut [u8], - ) { - let csum_start = IcmpHdr::CSUM_BEGIN_OFFSET; - let csum_end = IcmpHdr::CSUM_END_OFFSET; - - // First we must zero the existing checksum. - ulp[csum_start..csum_end].copy_from_slice(&[0; 2]); - // Then we can add the ULP header bytes to the checksum. - csum.add_bytes(ulp); - // Convert the checksum to its final form. - let ulp_csum = HeaderChecksum::from(csum).bytes(); - // Update the ICMP(v6) metadata. - icmp.csum = ulp_csum; - // Update the ICMP(v6) header bytes. - ulp[csum_start..csum_end].copy_from_slice(&ulp_csum); - } - - fn update_tcp_csum(tcp: &mut TcpMeta, mut csum: Checksum, ulp: &mut [u8]) { - let csum_start = TcpHdr::CSUM_BEGIN_OFFSET; - let csum_end = TcpHdr::CSUM_END_OFFSET; - - // First we must zero the existing checksum. - ulp[csum_start..csum_end].copy_from_slice(&[0; 2]); - // Then we can add the ULP header bytes to the checksum. - csum.add_bytes(ulp); - // Convert the checksum to its final form. - let ulp_csum = HeaderChecksum::from(csum).bytes(); - // Update the TCP metadata. - tcp.csum = ulp_csum; - // Update the TCP header bytes. - ulp[csum_start..csum_end].copy_from_slice(&ulp_csum); - } - - fn update_udp_csum(udp: &mut UdpMeta, mut csum: Checksum, ulp: &mut [u8]) { - let csum_start = UdpHdr::CSUM_BEGIN_OFFSET; - let csum_end = UdpHdr::CSUM_END_OFFSET; - - // First we must zero the existing checksum. - ulp[csum_start..csum_end].copy_from_slice(&[0; 2]); - // Then we can add the ULP header bytes to the checksum. - csum.add_bytes(ulp); - // Convert the checksum to its final form. - let ulp_csum = HeaderChecksum::from(csum).bytes(); - // Update the UDP metadata. - udp.csum = ulp_csum; - // Update the UDP header bytes. - ulp[csum_start..csum_end].copy_from_slice(&ulp_csum); + pub fn body_csum(&mut self) -> Option { + self.state.body_csum + } + + pub fn l4_hash(&mut self) -> u32 { + *self.state.l4_hash.get(|| { + let mut hasher = crc32fast::Hasher::new(); + self.state.flow.hash(&mut hasher); + hasher.finalize() + }) + } + + pub fn set_l4_hash(&mut self, hash: u32) { + self.state.l4_hash.set(hash); } /// Perform an incremental checksum update for the ULP checksums @@ -1537,1707 +1371,489 @@ impl Packet { /// /// This avoids duplicating work already done by the client in the /// case where checksums are **not** being offloaded to the hardware. - fn update_checksums(&mut self, update_ip: bool, update_ulp: bool) { + pub fn update_checksums(&mut self) + where + T::Chunk: ByteSliceMut, + T: Pullup, + { + // If we know that no transform touched a field which features in + // an inner transport cksum (L4/L3 src/dst, most realistically), + // and no body transform occurred then we can exit early. + if !self.checksums_dirty() && !self.state.body_modified { + return; + } + + // Flag to indicate if an IP header/ULP checksums were + // provided. If the checksum is zero, it's assumed heardware + // checksum offload is being used, and OPTE should not update + // the checksum. + let update_ip = self.state.meta.has_ip_csum(); + let update_ulp = self.state.meta.has_ulp_csum(); + + // We expect that any body transform will necessarily invalidate + // the body_csum. Recompute from scratch. + if self.state.body_modified && (update_ip || update_ulp) { + return self.compute_checksums(); + } + + // Start by reusing the known checksum of the body. + let mut body_csum = self.body_csum().unwrap_or_default(); + // If a ULP exists, then compute and set its checksum. - if let (true, Some(ulp_off)) = - (update_ulp, self.state.hdr_offsets.inner.ulp) + if let (true, Some(ulp)) = + (update_ulp, &mut self.state.meta.headers.inner_ulp) { - // Start by reusing the known checksum of the body. - let mut csum = self.state.body_csum.unwrap(); + let mut csum = body_csum; // Unwrap: Can't have a ULP without an IP. - let ip = self.meta().inner.ip.unwrap(); + let ip = self.state.meta.headers.inner_l3.as_ref().unwrap(); // Add pseudo header checksum. - let pseudo_csum = ip.pseudo_csum(); + let pseudo_csum = ip.pseudo_header(); csum += pseudo_csum; - // All headers must reside in the first segment. - let all_hdr_bytes = self.segs[0].slice_mut(); // Determine ULP slice and add its bytes to the // checksum. - let ulp_start = ulp_off.seg_pos; - let ulp_end = ulp_start + ulp_off.hdr_len; - let ulp = &mut all_hdr_bytes[ulp_start..ulp_end]; - - match self.state.meta.inner.ulp.as_mut().unwrap() { - UlpMeta::Icmpv4(icmp) => { - Self::update_icmp_csum( - icmp, - // ICMP4 requires the body_csum *without* - // the pseudoheader added back in. - self.state.body_csum.unwrap(), - ulp, - ); + match ulp { + // ICMP4 requires the body_csum *without* + // the pseudoheader added back in. + Ulp::IcmpV4(i4) => { + let mut bytes = [0u8; 8]; + i4.set_checksum(0); + i4.emit_raw(&mut bytes[..]); + body_csum.add_bytes(&bytes[..]); + i4.set_checksum(body_csum.finalize_for_ingot()); } - - UlpMeta::Icmpv6(icmp) => { - Self::update_icmp_csum(icmp, csum, ulp); + Ulp::IcmpV6(i6) => { + let mut bytes = [0u8; 8]; + i6.set_checksum(0); + i6.emit_raw(&mut bytes[..]); + csum.add_bytes(&bytes[..]); + i6.set_checksum(csum.finalize_for_ingot()); } - - UlpMeta::Tcp(tcp) => { - Self::update_tcp_csum(tcp, csum, ulp); + Ulp::Tcp(tcp) => { + tcp.set_checksum(0); + match tcp { + Header::Repr(tcp) => { + let mut bytes = [0u8; 56]; + tcp.emit_raw(&mut bytes[..]); + csum.add_bytes(&bytes[..]); + } + Header::Raw(tcp) => { + csum.add_bytes(tcp.0.as_bytes()); + match &tcp.1 { + Header::Repr(opts) => { + csum.add_bytes(opts); + } + Header::Raw(opts) => { + csum.add_bytes(opts); + } + } + } + } + tcp.set_checksum(csum.finalize_for_ingot()); } - - UlpMeta::Udp(udp) => { - Self::update_udp_csum(udp, csum, ulp); + Ulp::Udp(udp) => { + udp.set_checksum(0); + match udp { + Header::Repr(udp) => { + let mut bytes = [0u8; 8]; + udp.emit_raw(&mut bytes[..]); + csum.add_bytes(&bytes[..]); + } + Header::Raw(udp) => { + csum.add_bytes(udp.0.as_bytes()); + } + } + udp.set_checksum(csum.finalize_for_ingot()); } } } // Compute and fill in the IPv4 header checksum. - if let (true, Some(IpMeta::Ip4(ip))) = - (update_ip, self.state.meta.inner.ip.as_mut()) + if let (true, Some(l3)) = + (update_ip, &mut self.state.meta.headers.inner_l3) { - let ip_off = self.state.hdr_offsets.inner.ip.unwrap(); - let all_hdr_bytes = self.segs[0].slice_mut(); - let ip_start = ip_off.seg_pos; - let ip_end = ip_start + ip_off.hdr_len; - let ip_bytes = &mut all_hdr_bytes[ip_start..ip_end]; - let csum_start = Ipv4Hdr::CSUM_BEGIN; - let csum_end = Ipv4Hdr::CSUM_END; - ip_bytes[csum_start..csum_end].copy_from_slice(&[0; 2]); - let csum = - HeaderChecksum::from(Checksum::compute(ip_bytes)).bytes(); - - // Update the metadata. - ip.csum = csum; - - // Update the header bytes. - ip_bytes[csum_start..csum_end].copy_from_slice(&csum[..]); - } - } - - pub fn hdr_offsets(&self) -> HeaderOffsets { - self.state.hdr_offsets.clone() - } - - /// Run the [`HdrTransform`] against this packet. - #[inline] - pub fn hdr_transform( - &mut self, - xform: &HdrTransform, - ) -> Result<(), HdrTransformError> { - xform.run(&mut self.state.meta)?; - self.state.flow = InnerFlowId::from(&self.state.meta); - Ok(()) - } - - /// Return a reference to the flow ID of this packet. - #[inline] - pub fn flow(&self) -> &InnerFlowId { - &self.state.flow - } - - pub fn get_body_rdr(&self) -> PacketReader { - let mut rdr = PacketReader::new(&self.segs); - // XXX While this works for now it might be nice to have a - // better mechanism for dealing with the body. For example, we - // know this seek() call can't fail, but the current - // abstraction isn't powerful enough to encode that in the - // type system. - rdr.seek(self.body_offset()).expect("failed to seek to body"); - rdr - } - - pub fn get_rdr(&self) -> PacketReader { - PacketReader::new(&self.segs) - } - - pub fn get_rdr_mut(&mut self) -> PacketReaderMut { - PacketReaderMut::new(&mut self.segs) - } - - #[inline] - pub fn is_tcp(&self) -> bool { - self.state.meta.inner.is_tcp() - } - - #[inline] - pub fn meta(&self) -> &PacketMeta { - &self.state.meta - } - - #[inline] - pub fn meta_mut(&mut self) -> &mut PacketMeta { - &mut self.state.meta - } - - /// Return the mblk pointer value as a formatted String. This is - /// for debugging purposes. - pub fn mblk_ptr_str(&self) -> String { - format!("{:p}", self.segs[0].mp) - } - - // Determine if the new header fits in the existing first segment. - // If it does, then modify the mblk pointers to reflect the length - // of the new header. If it does not, then insert a new segment to - // the front. - fn hdr_seg( - segs: &mut Vec, - new_hdr_len: usize, - body: &mut BodyInfo, - ) { - let prefix_len = segs[0].prefix_len(); - // Determine the length of the original headers. This is - // equivalent to where the body starts. - let old_hdr_len = body.pkt_offset; - - #[allow(clippy::comparison_chain)] - if new_hdr_len > old_hdr_len { - if prefix_len + old_hdr_len >= new_hdr_len { - // In this case we can fix the new headers in the existing - // first segment. - let delta = new_hdr_len - old_hdr_len; - segs[0].expand_start(delta).unwrap(); - - // If the body starts in this first segment, then make - // sure to update its segment offset. - if body.seg_index == 0 { - body.seg_offset = new_hdr_len; - } - } else { - // In this case we need to "erase" the old headers and - // allocate an mblk to hold the new headers. - // - // This assumes that the headers all reside in the - // first segment. This is checked for in parsing and if the - // headers are not all in the first segment, the leading - // segments are squashed into one until this becomes true. - segs[0].shrink_start(old_hdr_len).unwrap(); - - // Create the new segment for holding the new headers. - let mut seg = unsafe { - let mp = allocb(new_hdr_len); - PacketSeg::wrap_mblk(mp) - }; - - // Make room to write the new headers. - seg.expand_end(new_hdr_len).unwrap(); - - // We shrunk the first segment to erase the old - // headers. If the body starts in this same segment, - // then we need to adjust its segment offset to - // reflect the fact that there is no header data - // before it. That is, since we know we are erasing - // the entirety of the original headers in the - // original first segment, we also know that the body - // must now start at segment offset 0. - // - // If the body **does not** start in the same segment - // as the original headers, then its offset does not - // change, because its segment is not adjusted. - if body.seg_index == 0 { - assert_eq!(body.seg_offset - old_hdr_len, 0); - body.seg_offset = 0; - } - if segs[0].len() > 0 { - seg.link(&segs[0]); - // TODO-performance: This may necessitate another allocation. We - // will want to measure how often we hit this branch, and the - // impact of the allocation. - segs.insert(0, seg); - } else { - // If we shrunk the segment to nothing, do not link a zero - // sized segment as a continuation block. This is not a - // generally expected thing and has caused NIC hardware to - // stop working. - if segs.len() > 1 { - seg.link(&segs[1]); - } - let mut zero_sized = core::mem::replace(&mut segs[0], seg); - zero_sized.unlink(); - zero_sized.free(); - } - - // We've added a segment to the front of the list; the - // body segment moves over by one. - body.seg_index += 1; - } - } else if new_hdr_len < old_hdr_len { - let delta = old_hdr_len - new_hdr_len; - segs[0].shrink_start(delta).unwrap(); - - // If the body starts in this first segment, then make - // sure to update its segment offset. - if body.seg_index == 0 { - body.seg_offset = new_hdr_len; - } - } - - unsafe { - assert!((*segs[0].mp).b_rptr >= (*segs[0].dblk).db_base); - assert!((*segs[0].mp).b_rptr <= (*segs[0].mp).b_wptr); - } - - // With regards to the overall packet, we know the body should - // start after the new headers. - body.pkt_offset = new_hdr_len; - } - - /// Emit the new headers to the [`Packet`] based on its current - /// metadata. - pub fn emit_new_headers(&mut self) -> Result<(), WriteError> { - // At this point the packet metadata represents the - // transformations made by the pipeline. We take the following - // steps to emit the new headers and update the packet data. - // - // 1. Figure out length required to emit the new headers. - // - // 2. Determine if this length can be met by the current first - // segment. If not, allocate a new segment to prepend to - // the xlist. - // - // 3. Emit the new header bytes based on the current metadata. - // - // 4. Update the headers offsets, body info, and checksums. - let innerm = &self.state.meta.inner; - - // Flag to indicate if an IP header/ULP checksums were - // provided. If the checksum is zero, it's assumed heardware - // checksum offload is being used, and OPTE should not update - // the checksum. - let inner_ip_csum = innerm.has_ip_csum(); - let inner_ulp_csum = innerm.has_ulp_csum(); - - // The length of the new headers. - let new_hdr_len = self.state.meta.hdr_len(); - // The total length of the new packet, including headers and - // body. This is used to determine the offset/length values of - // the new headers. - let new_pkt_len = new_hdr_len + self.state.body.len; - - // Given the new header length requirement, determine if it - // can be met with the current segment buffers, or if a new - // segment must be allocated and tacked onto the front of the - // segment list. - // - // Upon returning from this function the header offsets are no - // longer correct. New offsets are calculated as part of - // emitting the new headers below. - // - // The body offset **is** updated as part of this function, - // and is correct upon return. - Self::hdr_seg(&mut self.segs, new_hdr_len, &mut self.state.body); - let mut wtr = self.segs[0].get_writer(); - let new_offsets = Self::emit_headers( - &mut wtr, - &mut self.state.meta.outer, - &mut self.state.meta.inner, - new_pkt_len, - )?; - - // Update the header offsets. - self.state.hdr_offsets = new_offsets; - self.avail = self.segs.iter().map(|s| s.avail).sum(); - self.state.len = self.segs.iter().map(|s| s.len).sum(); - - // Update the ULP and IP header checksums. - self.update_checksums(inner_ip_csum, inner_ulp_csum); - Ok(()) - } - - fn emit_outer_headers( - wtr: &mut PacketSegWriter, - meta: &mut OuterMeta, - new_pkt_len: usize, - ) -> Result<(usize, OuterHeaderOffsets), WriteError> { - let mut offsets = OuterHeaderOffsets::default(); - let mut pkt_offset = 0; - - match &meta.ether { - Some(ether) => { - ether.emit(wtr.slice_mut(EtherHdr::SIZE)?); - offsets.ether = Some(HdrOffset { - pkt_pos: pkt_offset, - seg_idx: 0, - seg_pos: pkt_offset, - hdr_len: EtherHdr::SIZE, - }); - pkt_offset += EtherHdr::SIZE; - } - - // If there is no outer Ethernet, then there can be no - // outer headers at all. - None => return Ok((pkt_offset, offsets)), - } - - match meta.ip.as_mut() { - Some(IpMeta::Ip4(ip4)) => { - ip4.total_len = (new_pkt_len - pkt_offset) as u16; - ip4.emit(wtr.slice_mut(ip4.hdr_len())?); - offsets.ip = Some(HdrOffset { - pkt_pos: pkt_offset, - seg_idx: 0, - seg_pos: pkt_offset, - hdr_len: ip4.hdr_len(), - }); - pkt_offset += ip4.hdr_len(); - } - - Some(IpMeta::Ip6(ip6)) => { - // IPv6 Payload Length field is defined in RFC 2640 section 3 - // as: - // - // > Length of the IPv6 payload, i.e., the rest of the packet - // > following this IPv6 header, in octets. (Note that any - // > extension headers [section 4] present are considered part - // > of the payload, i.e., included in the length count.) - // - // So we need to remove the size of the fixed header (40 - // octets), which is included in the total new packet length, - // when setting the payload length. - ip6.pay_len = - (new_pkt_len - pkt_offset - Ipv6Hdr::BASE_SIZE) as u16; - ip6.emit(wtr.slice_mut(ip6.hdr_len())?); - offsets.ip = Some(HdrOffset { - pkt_pos: pkt_offset, - seg_idx: 0, - seg_pos: pkt_offset, - hdr_len: ip6.hdr_len(), - }); - pkt_offset += ip6.hdr_len(); - } - - None => return Ok((pkt_offset, offsets)), - } - - match meta.encap.as_mut() { - Some(EncapMeta::Geneve(geneve)) => { - geneve.emit( - (new_pkt_len - pkt_offset) as u16, - wtr.slice_mut(geneve.hdr_len())?, - ); - // geneve.emit(wtr.slice_mut(geneve.hdr_len())?); - offsets.ip = Some(HdrOffset { - pkt_pos: pkt_offset, - seg_idx: 0, - seg_pos: pkt_offset, - hdr_len: geneve.hdr_len(), - }); - pkt_offset += geneve.hdr_len(); - } - - None => return Ok((pkt_offset, offsets)), - } - - Ok((pkt_offset, offsets)) - } - - fn emit_inner_headers( - wtr: &mut PacketSegWriter, - meta: &mut InnerMeta, - mut pkt_offset: usize, - new_pkt_len: usize, - ) -> Result { - let mut offsets = InnerHeaderOffsets::default(); - - // ================================================================ - // Ether - // ================================================================ - meta.ether.emit(wtr.slice_mut(EtherHdr::SIZE)?); - offsets.ether = HdrOffset { - pkt_pos: pkt_offset, - seg_idx: 0, - seg_pos: pkt_offset, - hdr_len: EtherHdr::SIZE, - }; - pkt_offset += EtherHdr::SIZE; - - // ================================================================ - // IP - // ================================================================ - match meta.ip.as_mut() { - Some(IpMeta::Ip4(ip4)) => { - ip4.total_len = (new_pkt_len - pkt_offset) as u16; - ip4.emit(wtr.slice_mut(ip4.hdr_len())?); - offsets.ip = Some(HdrOffset { - pkt_pos: pkt_offset, - seg_idx: 0, - seg_pos: pkt_offset, - hdr_len: ip4.hdr_len(), - }); - pkt_offset += ip4.hdr_len(); - } - - Some(IpMeta::Ip6(ip6)) => { - // IPv6 Payload Length field is defined in RFC 2640 section 3 - // as: - // - // > Length of the IPv6 payload, i.e., the rest of the packet - // > following this IPv6 header, in octets. (Note that any - // > extension headers [section 4] present are considered part - // > of the payload, i.e., included in the length count.) - // - // So we need to remove the size of the fixed header (40 - // octets), which is included in the total new packet length, - // when setting the payload length. - ip6.pay_len = - (new_pkt_len - pkt_offset - Ipv6Hdr::BASE_SIZE) as u16; - ip6.emit(wtr.slice_mut(ip6.hdr_len())?); - offsets.ip = Some(HdrOffset { - pkt_pos: pkt_offset, - seg_idx: 0, - seg_pos: pkt_offset, - hdr_len: ip6.hdr_len(), - }); - pkt_offset += ip6.hdr_len(); - } - - None => return Ok(offsets), + l3.compute_checksum(); } - - // ================================================================ - // ULP - // ================================================================ - match meta.ulp.as_mut() { - Some(UlpMeta::Icmpv4(icmp)) => { - icmp.emit(wtr.slice_mut(icmp.hdr_len())?); - offsets.ulp = Some(HdrOffset { - pkt_pos: pkt_offset, - seg_idx: 0, - seg_pos: pkt_offset, - hdr_len: icmp.hdr_len(), - }); - } - - Some(UlpMeta::Icmpv6(icmp6)) => { - icmp6.emit(wtr.slice_mut(icmp6.hdr_len())?); - offsets.ulp = Some(HdrOffset { - pkt_pos: pkt_offset, - seg_idx: 0, - seg_pos: pkt_offset, - hdr_len: icmp6.hdr_len(), - }); - } - - Some(UlpMeta::Udp(udp)) => { - udp.len = (new_pkt_len - pkt_offset) as u16; - udp.emit(wtr.slice_mut(udp.hdr_len())?); - offsets.ulp = Some(HdrOffset { - pkt_pos: pkt_offset, - seg_idx: 0, - seg_pos: pkt_offset, - hdr_len: udp.hdr_len(), - }); - } - - Some(UlpMeta::Tcp(tcp)) => { - tcp.emit(wtr.slice_mut(tcp.hdr_len())?); - offsets.ulp = Some(HdrOffset { - pkt_pos: pkt_offset, - seg_idx: 0, - seg_pos: pkt_offset, - hdr_len: tcp.hdr_len(), - }); - } - - None => return Ok(offsets), - } - - Ok(offsets) - } - - /// Emit header bytes to the given writer based on the passed-in - /// metadata. - fn emit_headers( - wtr: &mut PacketSegWriter<'_>, - outer_meta: &mut OuterMeta, - inner_meta: &mut InnerMeta, - new_pkt_len: usize, - ) -> Result { - let (pkt_offset, outer_offsets) = - Self::emit_outer_headers(wtr, outer_meta, new_pkt_len)?; - - let inner_offsets = - Self::emit_inner_headers(wtr, inner_meta, pkt_offset, new_pkt_len)?; - - Ok(HeaderOffsets { outer: outer_offsets, inner: inner_offsets }) } } -impl Packet { - /// Clone and return all bytes. This is used for testing. - pub fn all_bytes(&self) -> Vec { - let mut bytes = Vec::with_capacity(self.state.len()); - for seg in &self.segs { - let s = unsafe { slice::from_raw_parts((*seg.mp).b_rptr, seg.len) }; - bytes.extend_from_slice(s); - } - bytes - } - - /// Return the length of the packet. - /// - /// NOTE: This length only includes the _initialized_ bytes of the - /// packet. Each [`PacketSeg`] may contain _uninitialized_ bytes - /// at the head or tail (or both) of the segment. - /// - /// This is equivalent to the `msgsize(9F)` function in illumos. - pub fn len(&self) -> usize { - self.state.len() - } - - /// Return a byte slice of the bytes in `seg`. - pub fn seg_bytes(&self, seg: usize) -> &[u8] { - let seg = &self.segs[seg]; - // Safety: As long as the `mp` pointer is legit this is safe. - unsafe { slice::from_raw_parts((*seg.mp).b_rptr, seg.len) } - } +impl> PacketState + for LiteParsed +{ } +impl PacketState for FullParsed {} -/// A packet segment represents one or more (or all) bytes of a -/// [`Packet`]. -#[derive(Clone, Debug)] -pub struct PacketSeg { - mp: *mut mblk_t, - dblk: *mut dblk_t, +/// Zerocopy view onto a parsed packet, accompanied by locally +/// computed state. +pub struct FullParsed { + /// Total length of packet, in bytes. This is equal to the sum of + /// the length of the _initialized_ window in all the segments + /// (`b_wptr - b_rptr`). len: usize, - avail: usize, -} - -#[derive(Clone, Copy, Debug)] -pub enum SegAdjustError { - /// Attempt to place the end of the writable/readable area of the - /// segment past the limit of the underlying buffer. - EndPastLimit, - - /// Attempt to place the start of the writable/readable area of - /// the segment before the base of the underlying buffer. - StartBeforeBase, - - /// Attempt to place the start the writable/readable area of the - /// segment outside the range of the underlying buffer. - StartPastEnd, -} - -impl PacketSeg { - fn alloc(len: usize) -> Self { - // Safety: We know this is safe because we are literally - // passing in an mblk derived from `allocb(9F)`. - unsafe { PacketSeg::wrap_mblk(allocb(len)) } - } - - fn free(&mut self) { - cfg_if! { - if #[cfg(all(not(feature = "std"), not(test)))] { - unsafe { ddi::freemsg(self.mp) }; - } else { - mock_freemsg(self.mp); - } - } - } - - /// Return the bytes of the packet. - /// - /// This is useful for testing. - #[cfg(test)] - pub fn bytes(&self) -> &[u8] { - unsafe { slice::from_raw_parts((*self.mp).b_rptr, self.len) } - } - - /// Expand the writable/readable area by pushing `b_wptr` out by - /// len. - /// - /// # Errors - /// - /// `SegAdjustError::EndPastLimit`: Expanding by `len` would put the - /// `b_wptr` past the underlying buffer's limit (`db_lim`). - pub fn expand_end(&mut self, len: usize) -> Result<(), SegAdjustError> { - let wptr = unsafe { (*self.mp).b_wptr }; - let lim = unsafe { (*self.dblk).db_lim }; - let new_wptr = unsafe { wptr.add(len) }; - - if new_wptr > lim { - return Err(SegAdjustError::EndPastLimit); - } - - unsafe { - (*self.mp).b_wptr = new_wptr; - } - self.len = unsafe { - (*self.mp).b_wptr.offset_from((*self.mp).b_rptr) as usize - }; - Ok(()) - } - - /// Expand the writable/readable area by shifting `b_rptr` by len; - /// effectively adding bytes to the start of the packet. - /// - /// # Errors - /// - /// `SegAdjustError::StartBeforeBase`: Shift the read pointer left - /// by `len` bytes would place `b_rptr` before the underlying - /// buffer's base (`db_base`). - pub fn expand_start(&mut self, len: usize) -> Result<(), SegAdjustError> { - let rptr = unsafe { (*self.mp).b_rptr }; - let base = unsafe { (*self.dblk).db_base }; - let new_rptr = unsafe { rptr.sub(len) }; - - if new_rptr < base { - return Err(SegAdjustError::StartBeforeBase); - } - - unsafe { - (*self.mp).b_rptr = new_rptr; - } - self.len = unsafe { - (*self.mp).b_wptr.offset_from((*self.mp).b_rptr) as usize - }; - Ok(()) - } - - /// Shrink the writable/readable area by shifting the `b_rptr` by - /// `len`; effectively removing bytes from the start of the packet. - /// - /// # Errors - /// - /// `SegAdjustError::StartPastEnd`: Shifting the read pointer by - /// `len` would move `b_rptr` past `b_wptr`. - pub fn shrink_start(&mut self, len: usize) -> Result<(), SegAdjustError> { - let wptr = unsafe { (*self.mp).b_wptr }; - let rptr = unsafe { (*self.mp).b_rptr }; - let new_rptr = unsafe { rptr.add(len) }; - - if new_rptr > wptr { - return Err(SegAdjustError::StartPastEnd); - } - - unsafe { - (*self.mp).b_rptr = new_rptr; - } - self.len = unsafe { - (*self.mp).b_wptr.offset_from((*self.mp).b_rptr) as usize - }; - Ok(()) - } - - pub fn get_writer(&mut self) -> PacketSegWriter { - PacketSegWriter::new(self, 0, self.len).unwrap() - } - - pub fn len(&self) -> usize { - self.len - } - - fn link(&mut self, seg: &PacketSeg) { - unsafe { - // We should not be creating message block continuations to zero - // sized blocks. This is not a generally expected thing and has - // caused NIC hardware to stop working. Stopping short of a - // production panic, but this should fail any tests. - debug_assert!( - (*seg.mp).b_wptr != (*seg.mp).b_rptr, - "zero-length continuation", - ); - (*self.mp).b_cont = seg.mp - }; - } - - fn unlink(&mut self) { - unsafe { - (*self.mp).b_cont = ptr::null_mut(); - } - } - - // The amount of space available between the data buffer's base - // (`dblk_t.db_base`) and the packet's start (`mblk_t.b_rptr`). - fn prefix_len(&self) -> usize { - let prefix = - unsafe { (*self.mp).b_rptr.offset_from((*self.dblk).db_base) }; - assert!(prefix >= 0); - prefix as usize - } - - /// Get a slice of the entire segment. - fn slice(&self) -> &[u8] { - // Panic: We are using the segment's own data to take a slice - // of the entire segment. - self.slice_unchecked(0, None) - } - - /// Get a mutable slice of the entire segment. - fn slice_mut(&mut self) -> &mut [u8] { - // Panic: We are using the segment's own data to take a slice - // of the entire segment. - self.slice_mut_unchecked(0, None) - } - - /// Get a slice of the segment. - /// - /// The slice starts at `offset` and consists of `len` bytes. If - /// the length is `None`, then the slice extends to the end of the - /// segment. This includes only the part of the dblk which has - /// been written, i.e. the bytes from `mblk.b_rptr` to - /// `mblk.b_wptr`. - /// - /// # Safety - /// - /// It is up to the caller to ensure that `offset` and `offset + - /// len` reside within the segment boundaries. - /// - /// # Panic - /// - /// The slice formed by the `offset` and `offset + len` MUST be - /// within the bounds of the segment, otherwise panic. - fn slice_unchecked(&self, offset: usize, len: Option) -> &[u8] { - if offset > self.len { - panic!( - "offset is outside the bounds of the mblk: \ - offset: {} len: {} mblk: {:p}", - offset, self.len, self.mp - ); - } - - // Safety: This pointer was handed to us by the system. - let start = unsafe { (*self.mp).b_rptr.add(offset) }; - let len = len.unwrap_or(self.len - offset); - // Safety: If this end is outside the bound of the segment we - // panic below. - let end = unsafe { start.add(len) }; - // Safety: This pointer was handed to us by the system. - let b_wptr = unsafe { (*self.mp).b_wptr }; - assert!( - end <= b_wptr, - "slice past end of segment: offset: {} len: {} end: {:p} \ - mblk: {:p} b_wptr: {:p}", - offset, - len, - end, - self.mp, - b_wptr, - ); - - // Safety: We have verified that the slice is within the - // bounds of the segment. - unsafe { slice::from_raw_parts(start, len) } - } - - /// Get a mutable slice of the segment. - /// - /// The slice starts at `offset` and consists of `len` bytes. If - /// the length is `None`, then the slice extends to the end of the - /// segment. This includes only the part of the dblk which has - /// been written, i.e. the bytes from `mblk.b_rptr` to - /// `mblk.b_wptr`. - /// - /// # Panic - /// - /// The slice formed by the `offset` and `offset + len` MUST be - /// within the bounds of the segment, otherwise panic. - fn slice_mut_unchecked( - &mut self, - offset: usize, - len: Option, - ) -> &mut [u8] { - if offset > self.len { - panic!( - "offset is outside the bounds of the mblk: \ - offset: {} len: {} mblk: {:p}", - offset, self.len, self.mp - ); - } - - // Safety: This pointer was handed to us by the system. - let start = unsafe { (*self.mp).b_rptr.add(offset) }; - let len = len.unwrap_or(self.len - offset); - // Safety: If this end is outside the bound of the segment we - // panic below. - let end = unsafe { start.add(len) }; - // Safety: This pointer was handed to us by the system. - let b_wptr = unsafe { (*self.mp).b_wptr }; - assert!( - end <= b_wptr, - "slice past end of segment: offset: {} len: {} end: {:p} \ - mblk: {:p} b_wptr: {:p}", - offset, - len, - end, - self.mp, - b_wptr, - ); - - // Safety: We have verified that the slice is within the - // bounds of the segment. - unsafe { slice::from_raw_parts_mut(start, len) } - } - - // Wrap an existing `mblk_t`, taking ownership of it. - // - // # Safety - // - // The `mp` passed must be a non-NULL pointer to an `mblk_t` - // created by one of the `allocb(9F)` family of calls. - // - // After calling this function, the original mp pointer should - // not be dereferenced. - unsafe fn wrap_mblk(mp: *mut mblk_t) -> Self { - let dblk = (*mp).b_datap as *mut dblk_t; - let len = (*mp).b_wptr.offset_from((*mp).b_rptr) as usize; - let avail = (*dblk).db_lim.offset_from((*dblk).db_base) as usize; - PacketSeg { mp, dblk, avail, len } - } -} - -/// Modify the bytes of a packet segment. -/// -/// This type allows one to modify all or some of the bytes of a -/// [`PacketSeg`]. This is limited to the initialized bytes of the -/// segment, i.e., those that sit between `b_rptr` and `b_wptr`. -pub struct PacketSegWriter<'a> { - // Current position in the bytes slice. - pos: usize, - avail: usize, - bytes: &'a mut [u8], -} - -#[derive(Clone, Copy, Debug)] -pub enum ModifierCreateError { - StartOutOfRange, - EndOutOfRange, -} - -impl<'a> PacketSegWriter<'a> { - /// Create a new [`PacketSegWriter`], starting at `offset` from - /// `b_rptr`, and running for `len` bytes. - /// - /// The slice of bytes selected must be within `b_rptr` and `b_wptr`. - /// - /// # Errors - /// - /// `ModifierCreateError::StartOutOfRange`: The `offset` value has - /// gone beyond `b_wptr`. - /// - /// `ModifierCreateError::EndOutOfRange`: The `b_rptr + offset + - /// len` has gone beyond `b_wptr`. - fn new( - seg: &'a mut PacketSeg, - offset: usize, - len: usize, - ) -> Result { - let b_rptr = unsafe { (*seg.mp).b_rptr }; - let b_wptr = unsafe { (*seg.mp).b_wptr }; - let start = unsafe { b_rptr.add(offset) }; - - if start > b_wptr { - return Err(ModifierCreateError::StartOutOfRange); - } - - let end = unsafe { start.add(len) }; - - if end > b_wptr { - return Err(ModifierCreateError::EndOutOfRange); - } - - let bytes = unsafe { slice::from_raw_parts_mut(start, len) }; - - Ok(Self { pos: 0, bytes, avail: len }) - } - - pub fn slice_mut(&mut self, len: usize) -> Result<&mut [u8], WriteError> { - if len > self.avail { - return Err(WriteError::NotEnoughBytes { - available: self.avail, - needed: len, - }); - } - - let end = self.pos + len; - let slice = &mut self.bytes[self.pos..end]; - self.pos += len; - self.avail -= len; - Ok(slice) - } - - pub fn write(&mut self, src: &[u8]) -> Result<(), WriteError> { - debug_assert!(self.bytes[self.pos..].len() >= src.len()); - let len = src.len(); - if len > self.avail { - return Err(WriteError::NotEnoughBytes { - available: self.avail, - needed: len, - }); - } - - let end = self.pos + len; - self.bytes[self.pos..end].copy_from_slice(src); - self.pos += len; - self.avail -= len; - Ok(()) - } - - pub fn write_u8(&mut self, val: u8) -> Result<(), WriteError> { - self.write(&[val]) - } - - pub fn write_u16(&mut self, val: u16) -> Result<(), WriteError> { - self.write(&val.to_be_bytes()) - } - - pub fn write_u32(&mut self, val: u32) -> Result<(), WriteError> { - self.write(&val.to_be_bytes()) - } -} - -#[derive(Clone, Copy, Debug, DError)] -pub enum WrapError { - /// We tried to wrap a NULL pointer. - NullPtr, -} - -/// Some functions may return multiple types of errors. -#[derive(Clone, Debug, DError)] -pub enum PacketError { - Parse(ParseError), - Wrap(WrapError), -} - -impl From for PacketError { - fn from(e: ParseError) -> Self { - Self::Parse(e) - } -} - -impl From for PacketError { - fn from(e: WrapError) -> Self { - Self::Wrap(e) - } -} - -#[derive(Clone, Debug, Eq, PartialEq, DError)] -#[derror(leaf_data = ParseError::data)] -pub enum ParseError { - BadHeader(HeaderReadErr), - BadInnerIpLen { - expected: usize, - actual: usize, - }, - BadInnerUlpLen { - expected: usize, - actual: usize, - }, - BadOuterIpLen { - expected: usize, - actual: usize, - }, - BadOuterUlpLen { - expected: usize, - actual: usize, - }, - BadRead(ReadErr), - TruncatedBody { - expected: usize, - actual: usize, - }, - #[leaf] - UnexpectedEtherType(super::ether::EtherType), - #[leaf] - UnsupportedEtherType(u16), - #[leaf] - UnexpectedProtocol(Protocol), - #[leaf] - UnexpectedDestPort(u16), - #[leaf] - UnsupportedProtocol(Protocol), -} - -impl ParseError { - fn data(&self, data: &mut [u64]) { - match self { - Self::BadInnerIpLen { expected, actual } - | Self::BadInnerUlpLen { expected, actual } - | Self::BadOuterIpLen { expected, actual } - | Self::BadOuterUlpLen { expected, actual } - | Self::TruncatedBody { expected, actual } => { - [data[0], data[1]] = [*expected as u64, *actual as u64] - } - Self::UnexpectedEtherType(eth) => data[0] = u16::from(*eth).into(), - Self::UnsupportedEtherType(eth) => data[0] = *eth as u64, - Self::UnexpectedProtocol(proto) => { - data[0] = u8::from(*proto).into() - } - Self::UnexpectedDestPort(port) => data[0] = (*port).into(), - Self::UnsupportedProtocol(proto) => { - data[0] = u8::from(*proto).into() - } - - _ => {} - } - } -} - -impl From for ParseError { - fn from(err: ReadErr) -> Self { - Self::BadRead(err) - } -} - -impl> From for ParseError { - fn from(value: T) -> Self { - Self::BadHeader(value.into()) - } -} - -#[derive(Clone, Debug, Eq, PartialEq, DError)] -pub enum HeaderReadErr { - EtherHdr(EtherHdrError), - ArpHdr(ArpHdrError), - GeneveHdr(GeneveHdrError), - Ipv4Hdr(Ipv4HdrError), - Ipv6Hdr(Ipv6HdrError), - IcmpHdr(IcmpHdrError), - TcpHdr(TcpHdrError), - UdpHdr(UdpHdrError), -} - -impl From for HeaderReadErr { - fn from(v: EtherHdrError) -> HeaderReadErr { - Self::EtherHdr(v) - } -} -impl From for HeaderReadErr { - fn from(v: ArpHdrError) -> HeaderReadErr { - Self::ArpHdr(v) - } -} -impl From for HeaderReadErr { - fn from(v: GeneveHdrError) -> HeaderReadErr { - Self::GeneveHdr(v) - } -} -impl From for HeaderReadErr { - fn from(v: Ipv4HdrError) -> HeaderReadErr { - Self::Ipv4Hdr(v) - } -} -impl From for HeaderReadErr { - fn from(v: Ipv6HdrError) -> HeaderReadErr { - Self::Ipv6Hdr(v) - } -} -impl From for HeaderReadErr { - fn from(v: IcmpHdrError) -> HeaderReadErr { - Self::IcmpHdr(v) - } -} -impl From for HeaderReadErr { - fn from(v: TcpHdrError) -> HeaderReadErr { - Self::TcpHdr(v) - } -} -impl From for HeaderReadErr { - fn from(v: UdpHdrError) -> HeaderReadErr { - Self::UdpHdr(v) - } -} - -#[derive(Clone, Copy, Debug, Eq, PartialEq, DError)] -pub enum ReadErr { - BadLayout, - EndOfPacket, - NotEnoughBytes, - OutOfRange, - StraddledRead, - NotImplemented, -} - -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub enum WriteError { - BadLayout, - EndOfPacket, - EtherHdr(EtherHdrError), - GeneveHdr(GeneveHdrError), - Ipv4Hdr(Ipv4HdrError), - Ipv6Hdr(Ipv6HdrError), - NotEnoughBytes { available: usize, needed: usize }, - Read(ReadErr), - StraddledWrite, - TcpHdr(TcpHdrError), - UdpHdr(UdpHdrError), -} - -impl From for WriteError { - fn from(e: TcpHdrError) -> Self { - Self::TcpHdr(e) - } -} - -impl From for WriteError { - fn from(e: UdpHdrError) -> Self { - Self::UdpHdr(e) - } -} - -impl From for WriteError { - fn from(e: EtherHdrError) -> Self { - Self::EtherHdr(e) - } -} - -impl From for WriteError { - fn from(e: GeneveHdrError) -> Self { - Self::GeneveHdr(e) - } -} - -impl From for WriteError { - fn from(e: Ipv4HdrError) -> Self { - Self::Ipv4Hdr(e) - } -} - -impl From for WriteError { - fn from(e: Ipv6HdrError) -> Self { - Self::Ipv6Hdr(e) - } -} - -impl From for WriteError { - fn from(e: ReadErr) -> Self { - Self::Read(e) - } -} - -pub type ReadResult = result::Result; -pub type WriteResult = result::Result; - -/// A trait for reading bytes from packets. -/// -/// All operations start from the current position and move it -/// forward, with the exception of `seek_back()`, which moves the -/// position backwards within the current segment. -pub trait PacketRead<'a> { - /// Copy all bytes from current position to the end of the packet - /// leaving the reader's internal state untouched. - fn copy_remaining(&self) -> Vec; - - /// Return the current position in the packet. - fn pos(&self) -> usize; - - /// Seek forwards from the current position by `amount`. The seek - /// may cross segment boundaries. - /// - /// # Errors - /// - /// If the seek would move beyond the end of the packet, then a - /// [`ReadErr::EndOfPacket`] is returned. - fn seek(&mut self, amount: usize) -> ReadResult<()>; - - /// Seek backwards from the current position by `amount`. - /// - /// # Errors - /// - /// If the seek would move beyond the beginning of the current - /// segment, then an error is returned. - fn seek_back(&mut self, amount: usize) -> ReadResult<()>; - - fn seg_left(&self) -> usize; - fn seg_idx(&self) -> usize; - fn seg_pos(&self) -> usize; + /// Base pointer of the contained T, used in dtrace SDTs and the like + /// for correlation and inspection of packet events. + base_ptr: uintptr_t, + /// Access to parsed packet headers and the packet body. + meta: Box>, + /// Current Flow ID of this packet, accountgin for any applied + /// transforms. + flow: InnerFlowId, - /// Return the slice of `len` bytes starting from the current - /// position. + /// The body's checksum. It is up to the `NetworkImpl::Parser` on + /// whether to populate this field or not. The reason for + /// populating this field is to avoid duplicate work if the client + /// has provided a ULP checksum. Rather than redoing the body + /// checksum calculation, we can use incremental checksum + /// techniques to stash the body's checksum for reuse when emitting + /// the new headers. /// - /// The slice *must* exist entirely in a single packet segment -- - /// it can never straddle multiple segments. + /// However, if the client does not provide a checksum, presumably + /// because they are relying on checksum offload, this value should + /// be `None`. In such case, `emit_headers()` will perform no ULP + /// checksum update. /// - /// # Errors - /// - /// If `self` cannot satisfy this request a `ReadErr` is returned. - fn slice<'b>(&'b mut self, len: usize) -> ReadResult<&'a [u8]>; -} - -/// Append: Append to the end of the segment or packet, i.e. start at -/// `b_wptr`. -/// -/// Modify(offset): Modify bytes starting at `offset` from the -/// beginning of the segment or packet (`b_rptr`). The length of the -/// write must fit within the end of the current segment (`b_wptr`). -pub enum WritePos { - Append, - Modify(u16), -} - -#[derive(Debug)] -pub struct PacketReader<'a> { - pkt_segs: &'a [PacketSeg], - pkt_pos: usize, - seg_idx: usize, - seg_pos: usize, - seg_len: usize, + /// This value may also be none if the packet has no notion of a + /// ULP checksum; e.g., ARP. + body_csum: Option, + /// L4 hash for this packet, computed from the flow ID. + l4_hash: Memoised, + /// Tracks whether any body transforms have been executed on this + /// packet. + body_modified: bool, + /// Tracks whether any transform has been applied to this packet + /// which would dirty the inner L3 and/or ULP header checksums. + inner_csum_dirty: bool, } -impl<'a> PacketReader<'a> { - pub fn finish(self) -> (usize, usize, usize, bool) { - let end_of_seg = self.seg_pos == self.seg_len; - (self.pkt_pos, self.seg_idx, self.seg_pos, end_of_seg) - } - - pub fn new(pkt_segs: &'a [PacketSeg]) -> Self { - let seg_len = pkt_segs[0].len; - - PacketReader { pkt_segs, pkt_pos: 0, seg_idx: 0, seg_pos: 0, seg_len } - } - - pub fn pkt_pos(&self) -> usize { - self.pkt_pos - } +/// Minimum-size zerocopy view onto a parsed packet, sufficient for fast +/// packet transformation. +pub struct LiteParsed> { + /// Total length of packet, in bytes. This is equal to the sum of + /// the length of the _initialized_ window in all the segments + /// (`b_wptr - b_rptr`). + len: usize, + /// Base pointer of the contained T, used in dtrace SDTs and the like + /// for correlation and inspection of packet events. + base_ptr: uintptr_t, + meta: IngotParsed, } -impl<'a> PacketRead<'a> for PacketReader<'a> { - fn pos(&self) -> usize { - self.pkt_pos - } - - fn seek(&mut self, mut amount: usize) -> ReadResult<()> { - while self.seg_pos + amount > self.seg_len { - if self.seg_idx + 1 == self.pkt_segs.len() { - return Err(ReadErr::OutOfRange); - } - - self.seg_idx += 1; - amount -= self.seg_len - self.seg_pos; - self.pkt_pos += self.seg_len - self.seg_pos; - self.seg_len = self.pkt_segs[self.seg_idx].len; - self.seg_pos = 0; - } - - self.seg_pos += amount; - self.pkt_pos += amount; - Ok(()) - } - - /// Seek backwards by `offset`. - /// - /// NOTE: Currently we only allow seeking back to the beginning of - /// the current segment, which should be enough in all situations - /// this is needed (this API is in flux so no point putting in - /// work that isn't needed at the moment). - fn seek_back(&mut self, amount: usize) -> ReadResult<()> { - if amount > self.seg_pos { - return Err(ReadErr::NotEnoughBytes); - } - - self.seg_pos -= amount; - self.pkt_pos -= amount; - Ok(()) - } - - fn seg_left(&self) -> usize { - self.seg_len - self.seg_pos - } - - fn seg_idx(&self) -> usize { - self.seg_idx - } - - fn seg_pos(&self) -> usize { - self.seg_pos - } - - fn slice<'b>(&'b mut self, len: usize) -> ReadResult<&'a [u8]> { - let mut seg = &self.pkt_segs[self.seg_idx]; - - // If we've reached the end of the initialized bytes in this - // segment. - if self.seg_pos == seg.len { - // There are no more segments to be read. - if (self.seg_idx + 1) == self.pkt_segs.len() { - return Err(ReadErr::EndOfPacket); - } - - // Move onto next segment. - self.seg_idx += 1; - seg = &self.pkt_segs[self.seg_idx]; - self.seg_pos = 0; - self.seg_len = seg.len; - } - - if self.seg_pos + len > self.seg_len { - return Err(ReadErr::NotEnoughBytes); - } - - let ret = unsafe { - let start = (*seg.mp).b_rptr.add(self.seg_pos); - slice::from_raw_parts(start, len) - }; - - self.pkt_pos += len; - self.seg_pos += len; - Ok(ret) - } +impl> LiteParsed {} - fn copy_remaining(&self) -> Vec { - let total_len: usize = self.pkt_segs.iter().map(|s| s.len).sum(); - let mut bytes = Vec::with_capacity(total_len - self.pkt_pos); - let mut seg_idx = self.seg_idx; - let mut seg_pos = self.seg_pos; - let mut seg_len = self.seg_len; - let mut seg = &self.pkt_segs[seg_idx]; +// These are needed for now to account for not wanting to redesign +// ActionDescs to be generic over T (trait object safety rules, etc.), +// in addition to needing to rework Hairpin actions. +pub type MblkPacketData<'a> = PacketData>; +pub type MblkFullParsed<'a> = FullParsed>; +pub type MblkLiteParsed<'a, M> = LiteParsed, M>; - loop { - let seg_slice = unsafe { - let start = (*seg.mp).b_rptr.add(seg_pos); - slice::from_raw_parts(start, seg_len - seg_pos) - }; - bytes.extend_from_slice(seg_slice); +pub trait BufferState { + fn len(&self) -> usize; + fn base_ptr(&self) -> uintptr_t; +} - seg_idx += 1; +pub trait Pullup { + /// Pulls all remaining segments of a packet into a new + /// `Self` containing a single buffer. + fn pullup(&self, prepend: Option<&[u8]>) -> MsgBlk; +} - if seg_idx >= self.pkt_segs.len() { - break; - } +/// A set of headers to be emitted at the head of a packet, and +/// possibly a replacement body as required in the slowpath. +#[derive(Debug, Default)] +pub struct OpteEmit { + outer_eth: Option, + outer_ip: Option, + outer_encap: Option, - seg = &self.pkt_segs[seg_idx]; - seg_pos = 0; - seg_len = seg.len - } + // We can (but do not often) push/pop inner meta. + // Splitting via Box minimises struct size in the general case. + inner: Option>, - bytes - } + // In some cases, applying body transforms requires a packet pullup, + // which the body transforms will then be applied to. If there is a + // modified body, it must be taken from here. + replace_body: Option, } -/// A trait for getting mutable slices of bytes from packets. -/// -/// All operations start from the current position and move it -/// forward. -pub trait PacketReadMut<'a>: PacketRead<'a> { - /// Reutrn the current offset into the packet. - fn offset(&self) -> ReaderOffset; - - /// Return a mutable reference to a slice of `len` bytes starting - /// from the current position. - /// - /// The slice *must* exist entirely in a single packet segment -- - /// it can never straddle multiple segments. - /// - /// # Errors - /// - /// If `self` cannot satisfy this request a `ReadErr` is returned. - fn slice_mut<'b>(&'b mut self, len: usize) -> ReadResult<&'a mut [u8]>; +/// Inner headers needing completely rewritten/emitted in a packet. +#[derive(Clone, Debug, Default)] +pub struct OpteInnerEmit { + eth: Ethernet, + l3: Option, + ulp: Option, } +/// A specification of how a packet should be modified to finish processing, +/// after existing fields have been updated. +/// +/// This will add and/or remove several layers from the underlying `MsgBlk`, +/// and can be queried for routing specific info (access to new encap, l4 hash). #[derive(Debug)] -pub struct PacketReaderMut<'a> { - pkt_segs: &'a mut [PacketSeg], - pkt_pos: usize, - seg_idx: usize, - seg_pos: usize, - seg_len: usize, +pub struct EmitSpec { + pub(crate) prepend: PushSpec, + pub(crate) l4_hash: u32, + pub(crate) rewind: u16, + pub(crate) ulp_len: u32, } -impl<'a> PacketReaderMut<'a> { - pub fn finish(self) -> (usize, usize, usize, bool) { - let end_of_seg = self.seg_pos == self.seg_len; - (self.pkt_pos, self.seg_idx, self.seg_pos, end_of_seg) - } - - pub fn new(pkt_segs: &'a mut [PacketSeg]) -> Self { - let seg_len = pkt_segs[0].len; - - PacketReaderMut { - pkt_segs, - pkt_pos: 0, - seg_idx: 0, - seg_pos: 0, - seg_len, - } +impl Default for EmitSpec { + fn default() -> Self { + Self { prepend: PushSpec::NoOp, l4_hash: 0, rewind: 0, ulp_len: 0 } } } -#[derive(Clone, Copy, Debug)] -pub struct ReaderOffset { - pub pkt_pos: usize, - pub seg_idx: usize, - pub seg_pos: usize, -} - -impl<'a> PacketRead<'a> for PacketReaderMut<'a> { - fn pos(&self) -> usize { - self.pkt_pos +impl EmitSpec { + /// Return the L4 hash of the inner flow, used for multipath selection. + #[inline] + #[must_use] + pub fn l4_hash(&self) -> u32 { + self.l4_hash } - fn seek(&mut self, mut amount: usize) -> ReadResult<()> { - while self.seg_pos + amount > self.seg_len { - if self.seg_idx + 1 == self.pkt_segs.len() { - return Err(ReadErr::OutOfRange); + /// Perform final structural transformations to a packet (removal of + /// existing headers, and copying in new/replacement headers). + #[inline] + #[must_use] + pub fn apply(self, mut pkt: MsgBlk) -> MsgBlk { + // Rewind + { + let mut slots = heapless::Vec::<&mut MsgBlkNode, 6>::new(); + let mut to_rewind = self.rewind as usize; + + if to_rewind > 0 { + let mut reader = pkt.iter_mut(); + while to_rewind != 0 { + let this = reader.next(); + let Some(node) = this else { + break; + }; + + let has = node.len(); + let droppable = to_rewind.min(has); + node.drop_front_bytes(droppable) + .expect("droppable should be bounded above by len"); + to_rewind -= droppable; + + slots.push(node).unwrap(); + } } - - self.seg_idx += 1; - amount -= self.seg_len - self.seg_pos; - self.pkt_pos += self.seg_len - self.seg_pos; - self.seg_len = self.pkt_segs[self.seg_idx].len; - self.seg_pos = 0; } - self.seg_pos += amount; - self.pkt_pos += amount; - Ok(()) - } + // TODO: actually push in to existing slots we rewound past if needed, + // then run this step at the end. + // This is not really an issue in practice -- no packets should need + // to rewind *and* prepend new segments with how we're using OPTE today, + // much less so in the fastpath. + pkt.drop_empty_segments(); - /// Seek backwards by `offset`. - /// - /// NOTE: Currently we only allow seeking back to the beginning of - /// the current segment, which should be enough in all situations - /// this is needed (this API is in flux so no point putting in - /// work that isn't needed at the moment). - fn seek_back(&mut self, amount: usize) -> ReadResult<()> { - if amount > self.seg_pos { - return Err(ReadErr::NotEnoughBytes); - } + match self.prepend { + PushSpec::Fastpath(push_spec) => { + push_spec.encap.prepend(pkt, self.ulp_len as usize) + } + PushSpec::Slowpath(push_spec) => { + let mut needed_push = push_spec.outer_eth.packet_length() + + push_spec.outer_ip.packet_length() + + push_spec.outer_encap.packet_length(); + + if let Some(inner_new) = &push_spec.inner { + needed_push += inner_new.eth.packet_length() + + inner_new.l3.packet_length() + + inner_new.ulp.packet_length(); + } - self.seg_pos -= amount; - self.pkt_pos -= amount; - Ok(()) - } + if let Some(replace_body) = push_spec.replace_body { + pkt.truncate_chain( + self.ulp_len as usize - replace_body.byte_len(), + ); + pkt.append(replace_body); + } - fn seg_idx(&self) -> usize { - self.seg_idx - } + let needed_alloc = needed_push; - fn seg_left(&self) -> usize { - self.seg_len - self.seg_pos - } + let mut prepend = if needed_alloc > 0 { + let mut new_mblk = MsgBlk::new_ethernet(needed_alloc); + new_mblk.pop_all(); + Some(new_mblk) + } else { + None + }; - fn seg_pos(&self) -> usize { - self.seg_pos - } + if let Some(inner_new) = &push_spec.inner { + if let Some(inner_ulp) = &inner_new.ulp { + let target = if let Some(v) = prepend.as_mut() { + v + } else { + &mut pkt + }; - fn slice<'b>(&'b mut self, len: usize) -> ReadResult<&'a [u8]> { - let mut seg = &self.pkt_segs[self.seg_idx]; + target.emit_front(inner_ulp).unwrap(); + } - // If we've reached the end of the initialized bytes in this - // segment. - if self.seg_pos == seg.len { - // There are no more segments to be read. - if (self.seg_idx + 1) == self.pkt_segs.len() { - return Err(ReadErr::EndOfPacket); - } + if let Some(inner_l3) = &inner_new.l3 { + let target = if let Some(v) = prepend.as_mut() { + v + } else { + &mut pkt + }; - // Move onto next segment. - self.seg_idx += 1; - seg = &self.pkt_segs[self.seg_idx]; - self.seg_pos = 0; - self.seg_len = seg.len; - } + target.emit_front(inner_l3).unwrap(); + } - if self.seg_pos + len > self.seg_len { - return Err(ReadErr::NotEnoughBytes); - } + let target = if let Some(v) = prepend.as_mut() { + v + } else { + &mut pkt + }; - let ret = unsafe { - let start = (*seg.mp).b_rptr.add(self.seg_pos); - slice::from_raw_parts(start, len) - }; + target.emit_front(inner_new.eth).unwrap(); + } - self.pkt_pos += len; - self.seg_pos += len; - Ok(ret) - } + if let Some(outer_encap) = &push_spec.outer_encap { + let encap = SizeHoldingEncap { + encapped_len: self.ulp_len as u16, + meta: outer_encap, + }; - fn copy_remaining(&self) -> Vec { - let total_len: usize = self.pkt_segs.iter().map(|s| s.len).sum(); - let mut bytes = Vec::with_capacity(total_len - self.pkt_pos); - let mut seg_idx = self.seg_idx; - let mut seg_pos = self.seg_pos; - let mut seg_len = self.seg_len; - let mut seg = &self.pkt_segs[seg_idx]; + let target = if let Some(v) = prepend.as_mut() { + v + } else { + &mut pkt + }; - loop { - let seg_slice = unsafe { - let start = (*seg.mp).b_rptr.add(seg_pos); - slice::from_raw_parts(start, seg_len - seg_pos) - }; - bytes.extend_from_slice(seg_slice); + target.emit_front(&encap).unwrap(); + } - seg_idx += 1; + if let Some(outer_ip) = &push_spec.outer_ip { + let target = if let Some(v) = prepend.as_mut() { + v + } else { + &mut pkt + }; - if seg_idx >= self.pkt_segs.len() { - break; - } + target.emit_front(outer_ip).unwrap(); + } - seg = &self.pkt_segs[seg_idx]; - seg_pos = 0; - seg_len = seg.len - } + if let Some(outer_eth) = &push_spec.outer_eth { + let target = if let Some(v) = prepend.as_mut() { + v + } else { + &mut pkt + }; - bytes - } -} + target.emit_front(outer_eth).unwrap(); + } -impl<'a> PacketReadMut<'a> for PacketReaderMut<'a> { - fn offset(&self) -> ReaderOffset { - ReaderOffset { - pkt_pos: self.pkt_pos, - seg_idx: self.seg_idx, - seg_pos: self.seg_pos, + if let Some(mut prepend) = prepend { + prepend.append(pkt); + prepend + } else { + pkt + } + } + PushSpec::NoOp => pkt, } } - fn slice_mut<'b>(&'b mut self, len: usize) -> ReadResult<&'a mut [u8]> { - let mut seg = &self.pkt_segs[self.seg_idx]; - - // If we've reached the end of the initialized bytes in this - // segment. - if self.seg_pos == seg.len { - // There are no more segments to be read. - if (self.seg_idx + 1) == self.pkt_segs.len() { - return Err(ReadErr::EndOfPacket); - } - - // Move onto next segment. - self.seg_idx += 1; - seg = &self.pkt_segs[self.seg_idx]; - self.seg_pos = 0; - self.seg_len = seg.len; + /// Returns the Geneve VNI when this spec pushes Geneve encapsulation. + #[inline] + pub fn outer_encap_vni(&self) -> Option { + match &self.prepend { + PushSpec::Fastpath(c) => match &c.encap { + CompiledEncap::Push { encap: EncapPush::Geneve(g), .. } => { + Some(g.vni) + } + _ => None, + }, + PushSpec::Slowpath(s) => match &s.outer_encap { + Some(EncapMeta::Geneve(g)) => Some(g.vni), + _ => None, + }, + PushSpec::NoOp => None, } + } - if self.seg_pos + len > self.seg_len { - return Err(ReadErr::NotEnoughBytes); + /// Returns the outer IPv6 src/dst when this spec pushes Geneve encapsulation. + #[inline] + pub fn outer_ip6_addrs(&self) -> Option<(Ipv6Addr, Ipv6Addr)> { + match &self.prepend { + PushSpec::Fastpath(c) => match &c.encap { + CompiledEncap::Push { ip: IpPush::Ip6(v6), .. } => { + Some((v6.src, v6.dst)) + } + _ => None, + }, + PushSpec::Slowpath(s) => match &s.outer_ip { + Some(L3Repr::Ipv6(v6)) => Some((v6.source, v6.destination)), + _ => None, + }, + PushSpec::NoOp => None, } - - let ret = unsafe { - let start = (*seg.mp).b_rptr.add(self.seg_pos); - slice::from_raw_parts_mut(start, len) - }; - - self.pkt_pos += len; - self.seg_pos += len; - Ok(ret) } } -/// The common entry into an `allocb(9F)` implementation that works in -/// both std and `no_std` environments. -/// -/// NOTE: We do not emulate the priority argument as it is not -/// relevant to OPTE's implementation. In the case of `no_std`, we -/// always pass a priority value of `0` to `allocb(9F)`. -pub fn allocb(size: usize) -> *mut mblk_t { - assert!(size <= MBLK_MAX_SIZE); - - #[cfg(any(feature = "std", test))] - return mock_allocb(size); - - // Safety: allocb(9F) should be safe for any size equal to or - // less than MBLK_MAX_SIZE. - #[cfg(all(not(feature = "std"), not(test)))] - unsafe { - ddi::allocb(size, 0) - } +/// Specification of additional header layers to push at the head of a packet. +#[derive(Debug)] +pub enum PushSpec { + /// Bytes to prepend to packet which have been serialised ahead of time + /// and can be copied in one shot. + Fastpath(Arc), + /// Full representations of each header to serialise and prepend ahead + /// of the current packet contents. + Slowpath(Box), + /// No prepend. + NoOp, } -#[cfg(any(feature = "std", test))] -pub fn mock_allocb(size: usize) -> *mut mblk_t { - // If the requested size is 0 we mimic allocb(9F) and allocate 16 - // bytes. See `uts/common/io/stream.c`. - let size = if size == 0 { 16 } else { size }; - let buf = Vec::with_capacity(size); - mock_desballoc(buf) +#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq, Ord, PartialOrd, Default)] +pub enum Memoised { + #[default] + Uninit, + Known(T), } -#[cfg(any(feature = "std", test))] -pub fn mock_desballoc(buf: Vec) -> *mut mblk_t { - let mut buf = std::mem::ManuallyDrop::new(buf); - let ptr = buf.as_mut_ptr(); - let len = buf.len(); - let avail = buf.capacity(); - - // For the purposes of mocking in std the only fields that - // matter here are the ones relating to the data buffer: - // db_base and db_lim. - let dblk = Box::new(dblk_t { - db_frtnp: ptr::null(), - db_base: ptr, - // Safety: We rely on the Vec implementation to give us - // the correct value for avail. - db_lim: unsafe { ptr.add(avail) }, - db_ref: 0, - db_type: 0, - db_flags: 0, - db_struioflag: 0, - db_cpid: 0, - db_cache: ptr::null(), - db_mblk: ptr::null(), - db_free: ptr::null(), - db_lastfree: ptr::null(), - db_cksumstart: 0, - db_cksumend: 0, - db_cksumstuff: 0, - db_struioun: 0, - db_fthdr: ptr::null(), - db_credp: ptr::null(), - }); - - let dbp = Box::into_raw(dblk); - - // For the purposes of mocking in std the only fields that - // matter are b_rptr and b_wptr. However, in the future we - // will probably want to mock segments packets via b_cont and - // packet chains via b_next. - let mblk = Box::new(mblk_t { - b_next: ptr::null_mut(), - b_prev: ptr::null_mut(), - b_cont: ptr::null_mut(), - // Safety: We know dbp is valid because we just created it. - b_rptr: unsafe { (*dbp).db_base as *mut c_uchar }, - b_wptr: unsafe { (*dbp).db_base.add(len) as *mut c_uchar }, - b_datap: dbp, - b_band: 0, - b_tag: 0, - b_flag: 0, - b_queue: ptr::null(), - }); - - let mp = Box::into_raw(mblk); - // Safety: We know dbp is valid because we just created it. - unsafe { (*dbp).db_mblk = mp as *const mblk_t }; - - mp -} +impl Memoised { + #[inline] + pub fn get(&mut self, or: impl FnOnce() -> T) -> &T { + if self.try_get().is_none() { + self.set(or()); + } -// The std equivalent to `freemsg(9F)`. -#[cfg(any(feature = "std", test))] -fn mock_freemsg(mut mp: *mut mblk_t) { - while !mp.is_null() { - let cont = unsafe { (*mp).b_cont }; - mock_freeb(mp); - mp = cont; + self.try_get().unwrap() } -} -// The std equivalent to `freeb(9F)`. -#[cfg(any(feature = "std", test))] -fn mock_freeb(mp: *mut mblk_t) { - // Safety: All of these were created safely in `mock_alloc()`. - // As long as the other methods don't do any of the following, - // this is safe: - // - // * Modify the `mp`/`dblk` pointers. - // * Increase `len` beyond `limit`. - // * Modify `limit`. - unsafe { - let bmblk = Box::from_raw(mp); - let bdblk = Box::from_raw(bmblk.b_datap as *mut dblk_t); - let buffer = Vec::from_raw_parts( - bdblk.db_base, - bmblk.b_wptr.offset_from(bmblk.b_rptr) as usize, - bdblk.db_lim.offset_from(bdblk.db_base) as usize, - ); - drop(buffer); - drop(bdblk); - drop(bmblk); + #[inline] + pub fn try_get(&self) -> Option<&T> { + match self { + Memoised::Uninit => None, + Memoised::Known(v) => Some(v), + } + } + + #[inline] + pub fn set(&mut self, val: T) { + *self = Self::Known(val); } } #[cfg(test)] mod test { use super::*; - use crate::engine::ether::EtherHdr; - use crate::engine::ether::EtherType; - use crate::engine::ip4::Ipv4Hdr; - use crate::engine::ip6::Ipv6Hdr; - use crate::engine::tcp::TcpFlags; - use crate::engine::tcp::TcpHdr; + use crate::ddi::mblk::MsgBlk; + use crate::engine::ether::Ethernet; + use crate::engine::ether::EthernetRef; + use crate::engine::ip::v4::Ipv4; + use crate::engine::ip::v4::Ipv4Ref; + use crate::engine::ip::v6::Ipv6; + use crate::engine::ip::v6::Ipv6Ref; + use crate::engine::packet::Packet; use crate::engine::GenericUlp; - use opte_api::Direction::*; + use ingot::ethernet::Ethertype; + use ingot::ip::IpProtocol; + use ingot::tcp::Tcp; + use ingot::tcp::TcpFlags; + use ingot::tcp::TcpRef; + use ingot::types::HeaderLen; + use ingot::udp::Udp; use opte_api::Ipv6Addr; use opte_api::MacAddr; @@ -3254,369 +1870,136 @@ mod test { const DST_IP6: Ipv6Addr = Ipv6Addr::from_const([0xFD00, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2]); - fn tcp_pkt(body: &[u8]) -> Packet { - let tcp = TcpMeta { - src: 3839, - dst: 80, - seq: 4224936861, + fn tcp_pkt(body: &[u8]) -> MsgBlk { + let tcp = Tcp { + source: 3839, + destination: 80, + sequence: 4224936861, flags: TcpFlags::SYN, ..Default::default() }; - let ip4_total_len = Ipv4Hdr::BASE_SIZE + tcp.hdr_len() + body.len(); - let ip4 = Ipv4Meta { - src: SRC_IP4, - dst: DST_IP4, - proto: Protocol::TCP, - ttl: 64, - ident: 99, - hdr_len: Ipv4Hdr::BASE_SIZE.try_into().unwrap(), - total_len: ip4_total_len.try_into().unwrap(), - csum: [0; 2], + let ip4_total_len = + Ipv4::MINIMUM_LENGTH + (&tcp, &body).packet_length(); + let ip4 = Ipv4 { + source: SRC_IP4, + destination: DST_IP4, + protocol: IpProtocol::TCP, + hop_limit: 64, + identification: 99, + total_len: ip4_total_len as u16, + ..Default::default() }; - let eth = EtherMeta { - ether_type: EtherType::Ipv4, - src: SRC_MAC, - dst: DST_MAC, + let eth = Ethernet { + destination: DST_MAC, + source: SRC_MAC, + ethertype: Ethertype::IPV4, }; - let pkt_sz = EtherHdr::SIZE + ip4_total_len; - let mut seg = PacketSeg::alloc(pkt_sz); - seg.expand_end(pkt_sz).unwrap(); - let mut wtr = seg.get_writer(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip4.emit(wtr.slice_mut(ip4.hdr_len()).unwrap()); - tcp.emit(wtr.slice_mut(tcp.hdr_len()).unwrap()); - wtr.write(body).unwrap(); - let pkt = Packet::new(seg); - assert_eq!(pkt.len(), pkt_sz); - pkt - } - - #[test] - fn zero_byte_packet() { - let pkt = Packet::alloc(0); - assert_eq!(pkt.len(), 0); - assert_eq!(pkt.num_segs(), 1); - assert_eq!(pkt.avail(), 16); - let res = pkt.parse(Out, GenericUlp {}); - match res { - Err(ParseError::BadHeader(msg)) => { - assert_eq!( - msg, - EtherHdrError::ReadError(ReadErr::EndOfPacket).into() - ); - } - - _ => panic!("expected read error, got: {:?}", res), - } - - let pkt2 = Packet::copy(&[]); - assert_eq!(pkt2.len(), 0); - assert_eq!(pkt2.num_segs(), 1); - assert_eq!(pkt2.avail(), 16); - let res = pkt2.parse(Out, GenericUlp {}); - match res { - Err(ParseError::BadHeader(msg)) => { - assert_eq!( - msg, - EtherHdrError::ReadError(ReadErr::EndOfPacket).into() - ); - } - - _ => panic!("expected read error, got: {:?}", res), - } - } - - // Verify uninitialized packet. - #[test] - fn uninitialized_packet() { - let pkt = Packet::alloc(200); - assert_eq!(pkt.avail(), 200); - assert_eq!(pkt.num_segs(), 1); - } - - // Verify that a segment's bytes can be read in the CanRead state. - #[test] - fn read_seg() { - let buf1 = vec![0x1, 0x2, 0x3, 0x4]; - let buf2 = vec![0x5, 0x6]; - let mp1 = mock_desballoc(buf1); - let mp2 = mock_desballoc(buf2); - - unsafe { - (*mp1).b_cont = mp2; - } - - let pkt = unsafe { Packet::wrap_mblk(mp1).unwrap() }; - assert_eq!(pkt.len(), 6); - assert_eq!(pkt.num_segs(), 2); - assert_eq!(pkt.seg_bytes(0), &[0x1, 0x2, 0x3, 0x4]); - assert_eq!(pkt.seg_bytes(1), &[0x5, 0x6]); - } - - #[test] - fn wrap() { - let mut buf1 = Vec::with_capacity(20); - let mut buf2 = Vec::with_capacity(2); - buf1.extend_from_slice(&[0x1, 0x2, 0x3, 0x4]); - buf2.extend_from_slice(&[0x5, 0x6]); - let mp1 = mock_desballoc(buf1); - let mp2 = mock_desballoc(buf2); - - unsafe { - (*mp1).b_cont = mp2; - } - - let pkt = unsafe { Packet::wrap_mblk(mp1).unwrap() }; - assert_eq!(pkt.num_segs(), 2); - assert_eq!(pkt.avail(), 22); - assert_eq!(pkt.len(), 6); + MsgBlk::new_ethernet_pkt((eth, ip4, tcp, body)) } #[test] fn read_single_segment() { - let parsed = tcp_pkt(&[]).parse(Out, GenericUlp {}).unwrap(); - assert_eq!(parsed.state.hdr_offsets.inner.ether.seg_idx, 0); - assert_eq!(parsed.state.hdr_offsets.inner.ether.seg_pos, 0); + let mut pkt = tcp_pkt(&[]); + let parsed = Packet::parse_outbound(pkt.iter_mut(), GenericUlp {}) + .unwrap() + .to_full_meta(); - let eth_meta = parsed.state.meta.inner.ether; - assert_eq!(eth_meta.ether_type, EtherType::Ipv4); - assert_eq!(eth_meta.dst, DST_MAC); - assert_eq!(eth_meta.src, SRC_MAC); + let eth_meta = parsed.meta().inner_ether(); + assert_eq!(eth_meta.destination(), DST_MAC); + assert_eq!(eth_meta.source(), SRC_MAC); + assert_eq!(eth_meta.ethertype(), Ethertype::IPV4); - let offsets = &parsed.state.hdr_offsets; + let ip4_meta = parsed.meta().inner_ip4().unwrap(); + assert_eq!(ip4_meta.source(), SRC_IP4); + assert_eq!(ip4_meta.destination(), DST_IP4); + assert_eq!(ip4_meta.protocol(), IpProtocol::TCP); - let ip4_meta = match parsed.state.meta.inner.ip.as_ref().unwrap() { - IpMeta::Ip4(v) => v, - _ => panic!("expected IPv4"), - }; - assert_eq!(ip4_meta.src, SRC_IP4); - assert_eq!(ip4_meta.dst, DST_IP4); - assert_eq!(ip4_meta.proto, Protocol::TCP); - assert_eq!(offsets.inner.ip.as_ref().unwrap().seg_idx, 0); - assert_eq!(offsets.inner.ip.as_ref().unwrap().seg_pos, 14); - - let tcp_meta = match parsed.state.meta.inner.ulp.as_ref().unwrap() { - UlpMeta::Tcp(v) => v, - _ => panic!("expected TCP"), - }; - assert_eq!(tcp_meta.src, 3839); - assert_eq!(tcp_meta.dst, 80); - assert_eq!(tcp_meta.flags, TcpFlags::SYN); - assert_eq!(tcp_meta.seq, 4224936861); - assert_eq!(tcp_meta.ack, 0); - assert_eq!(offsets.inner.ulp.as_ref().unwrap().seg_idx, 0); - assert_eq!(offsets.inner.ulp.as_ref().unwrap().seg_pos, 34); + let tcp_meta = parsed.meta().inner_tcp().unwrap(); + assert_eq!(tcp_meta.source(), 3839); + assert_eq!(tcp_meta.destination(), 80); + assert_eq!(tcp_meta.flags(), TcpFlags::SYN); + assert_eq!(tcp_meta.sequence(), 4224936861); + assert_eq!(tcp_meta.acknowledgement(), 0); } #[test] - fn write_and_read_multi_segment() { - let mp1 = allocb(34); - let mp2 = allocb(20); - - unsafe { - (*mp1).b_cont = mp2; - } - - let mut seg1 = unsafe { PacketSeg::wrap_mblk(mp1) }; - let mut seg2 = unsafe { PacketSeg::wrap_mblk(mp2) }; - - let tcp = TcpMeta { - src: 3839, - dst: 80, + fn read_multi_segment() { + let mut mp1 = MsgBlk::new_ethernet_pkt(Ethernet { + destination: DST_MAC, + source: SRC_MAC, + ethertype: Ethertype::IPV4, + }); + + let tcp = Tcp { + source: 3839, + destination: 80, flags: TcpFlags::SYN, - seq: 4224936861, - ..Default::default() - }; - let ip4 = Ipv4Meta { - src: SRC_IP4, - dst: DST_IP4, - proto: Protocol::TCP, - total_len: (Ipv4Hdr::BASE_SIZE + tcp.hdr_len()) as u16, + sequence: 4224936861, ..Default::default() }; - let eth = EtherMeta { - ether_type: EtherType::Ipv4, - src: SRC_MAC, - dst: DST_MAC, - }; - seg1.expand_end(34).unwrap(); - let mut wtr1 = seg1.get_writer(); - eth.emit(wtr1.slice_mut(EtherHdr::SIZE).unwrap()); - ip4.emit(wtr1.slice_mut(ip4.hdr_len()).unwrap()); - - seg2.expand_end(20).unwrap(); - let mut wtr2 = seg2.get_writer(); - tcp.emit(wtr2.slice_mut(tcp.hdr_len()).unwrap()); - let pkt = Packet::new2(seg1, seg2); - let parsed = pkt.parse(Out, GenericUlp {}).unwrap(); - - let eth_parsed = parsed.state.meta.inner.ether; - assert_eq!(parsed.state.hdr_offsets.inner.ether.seg_idx, 0); - assert_eq!(parsed.state.hdr_offsets.inner.ether.seg_pos, 0); - assert_eq!(eth_parsed.ether_type, EtherType::Ipv4); - assert_eq!(eth_parsed.dst, DST_MAC); - assert_eq!(eth_parsed.src, SRC_MAC); - - let offsets = &parsed.state.hdr_offsets; - - let ip4_parsed = match parsed.state.meta.inner.ip.unwrap() { - IpMeta::Ip4(v) => v, - _ => panic!("expected IPv4"), - }; - assert_eq!(ip4_parsed.src, SRC_IP4); - assert_eq!(ip4_parsed.dst, DST_IP4); - assert_eq!(ip4_parsed.proto, Protocol::TCP); - assert_eq!(offsets.inner.ip.as_ref().unwrap().seg_idx, 0); - assert_eq!(offsets.inner.ip.as_ref().unwrap().seg_pos, 14); - - let tcp_parsed = match parsed.state.meta.inner.ulp.unwrap() { - UlpMeta::Tcp(v) => v, - _ => panic!("expected TCP"), - }; - assert_eq!(tcp_parsed.src, 3839); - assert_eq!(tcp_parsed.dst, 80); - assert_eq!(tcp_parsed.flags, TcpFlags::SYN); - assert_eq!(tcp_parsed.seq, 4224936861); - assert_eq!(tcp_parsed.ack, 0); - assert_eq!(offsets.inner.ulp.as_ref().unwrap().seg_idx, 0); - assert_eq!(offsets.inner.ulp.as_ref().unwrap().seg_pos, 34); - } - // Verify that we catch when a read requires more bytes than are - // available. - #[test] - fn not_enough_bytes_read() { - let eth = EtherMeta { - ether_type: EtherType::Ipv4, - src: SRC_MAC, - dst: DST_MAC, + let ip4 = Ipv4 { + source: SRC_IP4, + destination: DST_IP4, + protocol: IpProtocol::TCP, + total_len: (Ipv4::MINIMUM_LENGTH + tcp.packet_length()) as u16, + ..Default::default() }; - let mut seg = PacketSeg::alloc(34); - seg.expand_end(24).unwrap(); - let mut wtr = seg.get_writer(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - // The actual bytes do not matter for this test. - let ip4_partial = [0xA; 10]; - wtr.write(&ip4_partial).unwrap(); - let pkt = Packet::new(seg); - assert_eq!(pkt.num_segs(), 1); - assert_eq!(pkt.len(), 24); - assert_eq!(pkt.avail(), 34); - let mut rdr = pkt.get_rdr(); - let _ = rdr.slice(EtherHdr::SIZE); - assert!(matches!( - rdr.slice(Ipv4Hdr::BASE_SIZE), - Err(ReadErr::NotEnoughBytes) - )); - } - - #[test] - #[should_panic] - fn slice_unchecked_bad_offset() { - let parsed = tcp_pkt(&[]).parse(Out, GenericUlp {}).unwrap(); - // Offset past end of segment. - parsed.segs[0].slice_unchecked(99, None); - } + let mp2 = MsgBlk::new_pkt((ip4, tcp)); - #[test] - #[should_panic] - fn slice_mut_unchecked_bad_offset() { - let mut parsed = tcp_pkt(&[]).parse(Out, GenericUlp {}).unwrap(); - // Offset past end of segment. - parsed.segs[0].slice_mut_unchecked(99, None); - } + mp1.append(mp2); - #[test] - #[should_panic] - fn slice_unchecked_bad_len() { - let parsed = tcp_pkt(&[]).parse(Out, GenericUlp {}).unwrap(); - // Length past end of segment. - parsed.segs[0].slice_unchecked(0, Some(99)); - } + let pkt = Packet::parse_outbound(mp1.iter_mut(), GenericUlp {}) + .unwrap() + .to_full_meta(); - #[test] - #[should_panic] - fn slice_mut_unchecked_bad_len() { - let mut parsed = tcp_pkt(&[]).parse(Out, GenericUlp {}).unwrap(); - // Length past end of segment. - parsed.segs[0].slice_mut_unchecked(0, Some(99)); - } + let eth_parsed = pkt.meta().inner_ether(); + assert_eq!(eth_parsed.destination(), DST_MAC); + assert_eq!(eth_parsed.source(), SRC_MAC); + assert_eq!(eth_parsed.ethertype(), Ethertype::IPV4); - #[test] - fn slice_unchecked_zero() { - let parsed = tcp_pkt(&[]).parse(Out, GenericUlp {}).unwrap(); - // Set offset to end of packet and slice the "rest" by - // passing None. - assert_eq!(parsed.segs[0].slice_unchecked(54, None).len(), 0); - } + let ip4_parsed = pkt.meta().inner_ip4().unwrap(); + assert_eq!(ip4_parsed.source(), SRC_IP4); + assert_eq!(ip4_parsed.destination(), DST_IP4); + assert_eq!(ip4_parsed.protocol(), IpProtocol::TCP); - #[test] - fn slice_mut_unchecked_zero() { - let mut parsed = tcp_pkt(&[]).parse(Out, GenericUlp {}).unwrap(); - // Set offset to end of packet and slice the "rest" by - // passing None. - assert_eq!(parsed.segs[0].slice_mut_unchecked(54, None).len(), 0); + let tcp_parsed = pkt.meta().inner_tcp().unwrap(); + assert_eq!(tcp_parsed.source(), 3839); + assert_eq!(tcp_parsed.destination(), 80); + assert_eq!(tcp_parsed.flags(), TcpFlags::SYN); + assert_eq!(tcp_parsed.sequence(), 4224936861); + assert_eq!(tcp_parsed.acknowledgement(), 0); } // Verify that if the TCP header straddles an mblk we return an // error. #[test] fn straddled_tcp() { - let mp1 = allocb(46); - let mp2 = allocb(8); + let base = tcp_pkt(&[]); - unsafe { - (*mp1).b_cont = mp2; - } + let mut st1 = MsgBlk::copy(&base[..42]); + let st2 = MsgBlk::copy(&base[42..]); - let mut seg1 = unsafe { PacketSeg::wrap_mblk(mp1) }; - let mut seg2 = unsafe { PacketSeg::wrap_mblk(mp2) }; + st1.append(st2); + + assert_eq!(st1.seg_len(), 2); + assert_eq!(st1.byte_len(), base.len()); - let tcp = TcpMeta { src: 3839, dst: 80, ..Default::default() }; - let ip4 = Ipv4Meta { - src: SRC_IP4, - dst: DST_IP4, - proto: Protocol::TCP, - total_len: (Ipv4Hdr::BASE_SIZE + tcp.hdr_len()) as u16, - ..Default::default() - }; - let eth = EtherMeta { - ether_type: EtherType::Ipv4, - src: SRC_MAC, - dst: DST_MAC, - }; - seg1.expand_end(46).unwrap(); - let mut wtr1 = seg1.get_writer(); - eth.emit(wtr1.slice_mut(EtherHdr::SIZE).unwrap()); - ip4.emit(wtr1.slice_mut(ip4.hdr_len()).unwrap()); - let mut tcp_bytes = vec![0u8; tcp.hdr_len()]; - tcp.emit(&mut tcp_bytes); - wtr1.write(&tcp_bytes[0..12]).unwrap(); - - seg2.expand_end(8).unwrap(); - let mut wtr2 = seg2.get_writer(); - wtr2.write(&tcp_bytes[12..]).unwrap(); - let pkt = Packet::new2(seg1, seg2); - assert_eq!(pkt.num_segs(), 2); - assert_eq!( - pkt.len(), - EtherHdr::SIZE + Ipv4Hdr::BASE_SIZE + TcpHdr::BASE_SIZE - ); assert!(matches!( - pkt.parse(Out, GenericUlp {}), - Err(ParseError::BadHeader(_)) + Packet::parse_outbound(st1.iter_mut(), GenericUlp {}), + Err(ParseError::IngotError(_)) )); } // Verify that we correctly parse an IPv6 packet with extension headers #[test] fn parse_ipv6_extension_headers_ok() { - use crate::engine::ip6::test::generate_test_packet; - use crate::engine::ip6::test::SUPPORTED_EXTENSIONS; + use crate::engine::ip::v6::test::generate_test_packet; + use crate::engine::ip::v6::test::SUPPORTED_EXTENSIONS; use itertools::Itertools; use smoltcp::wire::IpProtocol; for n_extensions in 0..SUPPORTED_EXTENSIONS.len() { @@ -3630,182 +2013,54 @@ mod test { let next_hdr = *(extensions.first().unwrap_or(&IpProtocol::Tcp)); - let ext_hdrs = &buf[Ipv6Hdr::BASE_SIZE..ipv6_header_size]; + let ext_hdrs = &buf[Ipv6::MINIMUM_LENGTH..ipv6_header_size]; // Append a TCP header - let tcp = TcpMeta { - src: 3839, - dst: 80, - seq: 4224936861, + let tcp = Tcp { + source: 3839, + destination: 80, + sequence: 4224936861, ..Default::default() }; - let mut ext_bytes = [0; 64]; - let ext_len = ext_hdrs.len(); - assert!(ext_len <= 64); - ext_bytes[0..ext_len].copy_from_slice(ext_hdrs); - - let pay_len = tcp.hdr_len() + ext_len; - let ip6 = Ipv6Meta { - src: SRC_IP6, - dst: DST_IP6, - proto: Protocol::TCP, - next_hdr, + + let pay_len = tcp.packet_length() + ext_hdrs.len(); + let ip6 = Ipv6 { + source: SRC_IP6, + destination: DST_IP6, + next_header: IpProtocol(u8::from(next_hdr)), hop_limit: 255, - pay_len: pay_len as u16, - ext: Some(ext_bytes), - ext_len, + payload_len: pay_len as u16, + + // Manually append extension hdrs rather than including + // here -- either way will test ingot's parsing logic. + ..Default::default() }; - let eth = EtherMeta { - ether_type: EtherType::Ipv6, - src: SRC_MAC, - dst: DST_MAC, + let eth = Ethernet { + destination: DST_MAC, + source: SRC_MAC, + ethertype: Ethertype::IPV6, }; - let mut seg = PacketSeg::alloc(1024); - seg.expand_end(14 + ipv6_header_size + tcp.hdr_len()).unwrap(); - let mut wtr = seg.get_writer(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip6.emit(wtr.slice_mut(ip6.hdr_len()).unwrap()); - tcp.emit(wtr.slice_mut(tcp.hdr_len()).unwrap()); - let parsed = - Packet::new(seg).parse(Out, GenericUlp {}).unwrap(); - - // Assert that the computed offsets of the headers and payloads - // are accurate - let offsets = &parsed.state.hdr_offsets; - let ip = offsets - .inner - .ip - .as_ref() - .expect("Expected IP header offsets"); - assert_eq!( - ip.seg_idx, 0, - "Expected IP headers to be in segment 0" - ); - assert_eq!( - ip.seg_pos, - EtherHdr::SIZE, - "Expected the IP header to start immediately \ - after the Ethernet header" - ); - assert_eq!( - ip.pkt_pos, - EtherHdr::SIZE, - "Expected the IP header to start immediately \ - after the Ethernet header" - ); - let ulp = &offsets - .inner - .ulp - .as_ref() - .expect("Expected ULP header offsets"); - assert_eq!( - ulp.seg_idx, 0, - "Expected the ULP header to be in segment 0" - ); - assert_eq!( - ulp.seg_pos, - EtherHdr::SIZE + ipv6_header_size, - "Expected the ULP header to start immediately \ - after the IP header", - ); + let mut pkt = + MsgBlk::new_ethernet_pkt((eth, ip6, ext_hdrs, tcp)); + let pkt = Packet::parse_outbound(pkt.iter_mut(), GenericUlp {}) + .unwrap() + .to_full_meta(); + + // Assert that the packet parses back out, and we can reach + // the TCP meta no matter which permutation of EHs we have. assert_eq!( - ulp.pkt_pos, - EtherHdr::SIZE + ipv6_header_size, - "Expected the ULP header to start immediately \ - after the IP header", + pkt.meta().inner_ip6().unwrap().v6ext_ref().packet_length(), + ipv6_header_size - Ipv6::MINIMUM_LENGTH ); + let tcp_meta = pkt.meta().inner_tcp().unwrap(); + assert_eq!(tcp_meta.source(), 3839); + assert_eq!(tcp_meta.destination(), 80); + assert_eq!(tcp_meta.sequence(), 4224936861); } } } - #[test] - fn seg_writer() { - let mut seg = PacketSeg::alloc(18); - seg.expand_end(18).unwrap(); - - // Verify that an offset past the end results in error. - assert!(matches!( - PacketSegWriter::new(&mut seg, 20, 20), - Err(ModifierCreateError::StartOutOfRange), - )); - - // Verify that a length past the end results in error. - assert!(matches!( - PacketSegWriter::new(&mut seg, 0, 20), - Err(ModifierCreateError::EndOutOfRange), - )); - - // Writer for entire segment. - let wtr = PacketSegWriter::new(&mut seg, 0, 18).unwrap(); - assert_eq!(wtr.pos, 0); - assert_eq!(wtr.avail, 18); - - // Writer for last 4 bytes of segment. - let wtr = PacketSegWriter::new(&mut seg, 14, 4).unwrap(); - assert_eq!(wtr.pos, 0); - assert_eq!(wtr.avail, 4); - } - - #[test] - fn expand_and_shrink() { - let mut seg = PacketSeg::alloc(18); - assert_eq!(seg.len(), 0); - seg.expand_end(18).unwrap(); - assert_eq!(seg.len(), 18); - seg.shrink_start(4).unwrap(); - assert_eq!(seg.len(), 14); - seg.expand_start(4).unwrap(); - assert_eq!(seg.len(), 18); - assert!(seg.expand_end(20).is_err()); - assert!(seg.shrink_start(20).is_err()); - assert!(seg.expand_start(4).is_err()); - } - - #[test] - fn prefix_len() { - let mut seg = PacketSeg::alloc(18); - assert_eq!(seg.prefix_len(), 0); - seg.expand_end(18).unwrap(); - assert_eq!(seg.prefix_len(), 0); - seg.shrink_start(4).unwrap(); - assert_eq!(seg.prefix_len(), 4); - seg.expand_start(4).unwrap(); - assert_eq!(seg.prefix_len(), 0); - } - - // Verify that we do not panic when we get long chains of mblks linked by - // `b_cont`. This is a regression test for - // https://github.com/oxidecomputer/opte/issues/335 - #[test] - fn test_long_packet_continuation() { - const N_SEGMENTS: usize = 8; - let mut blocks: Vec<*mut mblk_t> = Vec::with_capacity(N_SEGMENTS); - for i in 0..N_SEGMENTS { - let mp = allocb(32); - - // Link previous block to this one. - if i > 0 { - let prev = blocks[i - 1]; - unsafe { - (*prev).b_cont = mp; - } - } - blocks.push(mp); - } - - // Wrap the first mblk in a Packet, and check that we still have a - // reference to everything. - let packet = unsafe { Packet::wrap_mblk(blocks[0]) } - .expect("Failed to wrap mblk chain with many segments"); - - assert_eq!(packet.segs.len(), N_SEGMENTS); - assert_eq!(packet.segs.len(), blocks.len()); - for (seg, mblk) in packet.segs.iter().zip(blocks) { - assert_eq!(seg.mp, mblk); - } - } - #[test] fn small_packet_with_padding() { const MINIMUM_ETH_FRAME_SZ: usize = 64; @@ -3830,204 +2085,84 @@ mod test { // Note that we do NOT update any of the packet headers themselves // as this padding process should be transparent to the upper // layers. - let mut padding_seg_wtr = pkt.add_seg(padding_len).unwrap(); - padding_seg_wtr.write(&vec![0; padding_len]).unwrap(); - assert_eq!(pkt.len(), MINIMUM_ETH_FRAME_SZ - FRAME_CHECK_SEQ_SZ); + let mut padding_seg = MsgBlk::new(padding_len); + padding_seg.resize(padding_len).unwrap(); + + pkt.append(padding_seg); + assert_eq!(pkt.byte_len(), MINIMUM_ETH_FRAME_SZ - FRAME_CHECK_SEQ_SZ); // Generate the metadata by parsing the packet - let mut pkt = pkt.parse(Direction::In, GenericUlp {}).unwrap(); + let parsed = Packet::parse_inbound(pkt.iter_mut(), GenericUlp {}) + .unwrap() + .to_full_meta(); // Grab parsed metadata - let ip4_meta = pkt.meta().inner_ip4().cloned().unwrap(); - let tcp_meta = pkt.meta().inner_tcp().cloned().unwrap(); + let ip4_meta = parsed.meta().inner_ip4().unwrap(); + let tcp_meta = parsed.meta().inner_tcp().unwrap(); // Length in packet headers shouldn't reflect include padding + // This should not fail even though there are more bytes in + // the initialised area ofthe mblk chain than the packet expects. assert_eq!( - usize::from(ip4_meta.total_len), - ip4_meta.hdr_len() + tcp_meta.hdr_len() + body.len(), - ); - - // The computed body length also shouldn't include the padding - assert_eq!(pkt.state.body.len, body.len()); - - // Pretend some processing happened... - // And now we need to update the packet headers based on the - // modified packet metadata. - pkt.emit_new_headers().unwrap(); - - // Grab the actual packet headers - let ip4_off = pkt.hdr_offsets().inner.ip.unwrap().pkt_pos; - let mut rdr = pkt.get_rdr_mut(); - rdr.seek(ip4_off).unwrap(); - let ip4_hdr = Ipv4Hdr::parse(&mut rdr).unwrap(); - let tcp_hdr = TcpHdr::parse(&mut rdr).unwrap(); - - // And make sure they don't include the padding bytes - assert_eq!( - usize::from(ip4_hdr.total_len()), - usize::from(ip4_hdr.hdr_len()) + tcp_hdr.hdr_len() + body.len() + usize::from(ip4_meta.total_len()), + (ip4_meta, tcp_meta, &body[..]).packet_length(), ); } #[test] fn udp6_packet_with_padding() { let body = [1, 2, 3, 4]; - let udp = UdpMeta { - src: 124, - dst: 5673, - len: u16::try_from(UdpHdr::SIZE + body.len()).unwrap(), + let udp = Udp { + source: 124, + destination: 5673, + length: u16::try_from(Udp::MINIMUM_LENGTH + body.len()).unwrap(), ..Default::default() }; - let ip6 = Ipv6Meta { - src: SRC_IP6, - dst: DST_IP6, - proto: Protocol::UDP, - next_hdr: smoltcp::wire::IpProtocol::Udp, + let ip6 = Ipv6 { + source: SRC_IP6, + destination: DST_IP6, + next_header: IpProtocol::UDP, hop_limit: 255, - pay_len: udp.len, - ext: None, - ext_len: 0, + payload_len: (&udp, &body[..]).packet_length() as u16, + + ..Default::default() }; - let eth = EtherMeta { - ether_type: EtherType::Ipv6, - src: SRC_MAC, - dst: DST_MAC, + let eth = Ethernet { + destination: DST_MAC, + source: SRC_MAC, + ethertype: Ethertype::IPV6, }; - let pkt_sz = eth.hdr_len() + ip6.hdr_len() + usize::from(ip6.pay_len); - let mut pkt = Packet::alloc_and_expand(pkt_sz); - let mut wtr = pkt.seg0_wtr(); - eth.emit(wtr.slice_mut(eth.hdr_len()).unwrap()); - ip6.emit(wtr.slice_mut(ip6.hdr_len()).unwrap()); - udp.emit(wtr.slice_mut(udp.hdr_len()).unwrap()); - wtr.write(&body).unwrap(); + let pkt_sz = eth.packet_length() + + ip6.packet_length() + + usize::from(ip6.payload_len); + let mut pkt = MsgBlk::new_ethernet_pkt((eth, ip6, udp, &body[..])); assert_eq!(pkt.len(), pkt_sz); // Tack on a new segment filled zero padding at // the end that's not part of the payload as indicated // by the packet headers. let padding_len = 8; - let mut padding_seg_wtr = pkt.add_seg(padding_len).unwrap(); - padding_seg_wtr.write(&vec![0; padding_len]).unwrap(); - assert_eq!(pkt.len(), pkt_sz + padding_len); - - // Generate the metadata by parsing the packet - let mut pkt = pkt.parse(Direction::In, GenericUlp {}).unwrap(); + let mut padding_seg = MsgBlk::new(padding_len); + padding_seg.resize(padding_len).unwrap(); + pkt.append(padding_seg); + assert_eq!(pkt.byte_len(), pkt_sz + padding_len); + + // Generate the metadata by parsing the packet. + // This should not fail even though there are more bytes in + // the initialised area ofthe mblk chain than the packet expects. + let pkt = Packet::parse_inbound(pkt.iter_mut(), GenericUlp {}) + .unwrap() + .to_full_meta(); // Grab parsed metadata - let ip6_meta = pkt.meta().inner_ip6().cloned().unwrap(); - let udp_meta = pkt.meta().inner_udp().cloned().unwrap(); + let ip6_meta = pkt.meta().inner_ip6().unwrap(); + let udp_meta = pkt.meta().inner_udp().unwrap(); // Length in packet headers shouldn't reflect include padding assert_eq!( - usize::from(ip6_meta.pay_len), - udp_meta.hdr_len() + body.len(), + usize::from(ip6_meta.payload_len()), + udp_meta.packet_length() + body.len(), ); - - // The computed body length also shouldn't include the padding - assert_eq!(pkt.state.body.len, body.len()); - - // Pretend some processing happened... - // And now we need to update the packet headers based on the - // modified packet metadata. - pkt.emit_new_headers().unwrap(); - - // Grab the actual packet headers - let ip6_off = pkt.hdr_offsets().inner.ip.unwrap().pkt_pos; - let mut rdr = pkt.get_rdr_mut(); - rdr.seek(ip6_off).unwrap(); - let ip6_hdr = Ipv6Hdr::parse(&mut rdr).unwrap(); - let udp_hdr = UdpHdr::parse(&mut rdr).unwrap(); - - // And make sure they don't include the padding bytes - assert_eq!(ip6_hdr.pay_len(), udp_hdr.hdr_len() + body.len()); - } - - fn create_linked_mblks(n: usize) -> Vec<*mut mblk_t> { - let mut els = vec![]; - for _ in 0..n { - els.push(allocb(8)); - } - - // connect the elements in a chain - for (lhs, rhs) in els.iter().zip(els[1..].iter()) { - unsafe { - (**lhs).b_next = *rhs; - (**rhs).b_prev = *lhs; - } - } - - els - } - - #[test] - fn chain_has_correct_ends() { - let els = create_linked_mblks(3); - - let chain = unsafe { PacketChain::new(els[0]) }.unwrap(); - let chain_inner = chain.inner.as_ref().unwrap(); - assert_eq!(chain_inner.head.as_ptr(), els[0]); - assert_eq!(chain_inner.tail.as_ptr(), els[2]); - } - - #[test] - fn chain_breaks_links() { - let els = create_linked_mblks(3); - - let mut chain = unsafe { PacketChain::new(els[0]) }.unwrap(); - - let p0 = chain.pop_front().unwrap(); - assert_eq!(p0.mblk_addr(), els[0] as uintptr_t); - unsafe { - assert!((*els[0]).b_prev.is_null()); - assert!((*els[0]).b_next.is_null()); - } - - // Chain head/tail ptrs are correct - let chain_inner = chain.inner.as_ref().unwrap(); - assert_eq!(chain_inner.head.as_ptr(), els[1]); - assert_eq!(chain_inner.tail.as_ptr(), els[2]); - unsafe { - assert!((*els[1]).b_prev.is_null()); - assert!((*els[2]).b_next.is_null()); - } - } - - #[test] - fn chain_append_links() { - let els = create_linked_mblks(3); - let new_el = allocb(8); - - let mut chain = unsafe { PacketChain::new(els[0]) }.unwrap(); - let pkt = unsafe { Packet::wrap_mblk(new_el) }.unwrap(); - - chain.append(pkt); - - // Chain head/tail ptrs are correct - let chain_inner = chain.inner.as_ref().unwrap(); - assert_eq!(chain_inner.head.as_ptr(), els[0]); - assert_eq!(chain_inner.tail.as_ptr(), new_el); - - // Last el has been linked to the new pkt, and it has a valid - // backward link. - unsafe { - assert_eq!((*new_el).b_prev, els[2]); - assert!((*new_el).b_next.is_null()); - assert_eq!((*els[2]).b_next, new_el); - } - } - - #[test] - fn chain_drain_complete() { - let els = create_linked_mblks(64); - - let mut chain = unsafe { PacketChain::new(els[0]) }.unwrap(); - - for i in 0..els.len() { - let pkt = chain.pop_front().unwrap(); - assert_eq!(pkt.mblk_addr(), els[i] as uintptr_t); - } - - assert!(chain.pop_front().is_none()); } } diff --git a/lib/opte/src/engine/parse.rs b/lib/opte/src/engine/parse.rs new file mode 100644 index 00000000..39ed81ff --- /dev/null +++ b/lib/opte/src/engine/parse.rs @@ -0,0 +1,678 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2024 Oxide Computer Company + +//! Constructs used in packet parsing, such as choices over protocol +//! and complete packet definitions. + +use super::checksum::Checksum; +use super::checksum::HeaderChecksum; +use super::ether::EthernetPacket; +use super::ether::EthernetRef; +use super::ether::ValidEthernet; +use super::geneve::validate_geneve; +use super::geneve::GENEVE_PORT; +use super::headers::HasInnerCksum; +use super::headers::HeaderActionError; +use super::headers::HeaderActionModify; +use super::headers::UlpMetaModify; +use super::headers::ValidEncapMeta; +use super::icmp::IcmpEchoMut; +use super::icmp::QueryEcho; +use super::icmp::ValidIcmpEcho; +use super::ip::v4::Ipv4Ref; +use super::ip::v6::Ipv6Packet; +use super::ip::v6::Ipv6Ref; +use super::ip::ValidL3; +use super::ip::L3; +use super::packet::AddrPair; +use super::packet::InnerFlowId; +use super::packet::MismatchError; +use super::packet::OpteMeta; +use super::packet::ParseError; +use super::packet::FLOW_ID_DEFAULT; +use super::rule::CompiledTransform; +use super::LightweightMeta; +use ingot::choice; +use ingot::ethernet::Ethertype; +use ingot::geneve::GenevePacket; +use ingot::icmp::IcmpV4; +use ingot::icmp::IcmpV4Mut; +use ingot::icmp::IcmpV4Ref; +use ingot::icmp::IcmpV6; +use ingot::icmp::IcmpV6Mut; +use ingot::icmp::IcmpV6Ref; +use ingot::icmp::ValidIcmpV4; +use ingot::icmp::ValidIcmpV6; +use ingot::ip::IpProtocol; +use ingot::tcp::Tcp; +use ingot::tcp::TcpFlags; +use ingot::tcp::TcpMut; +use ingot::tcp::TcpRef; +use ingot::tcp::ValidTcp; +use ingot::types::ByteSlice; +use ingot::types::Header; +use ingot::types::HeaderLen; +use ingot::types::HeaderParse; +use ingot::types::InlineHeader; +use ingot::types::NextLayer; +use ingot::types::ParseControl; +use ingot::udp::Udp; +use ingot::udp::UdpMut; +use ingot::udp::UdpPacket; +use ingot::udp::UdpRef; +use ingot::udp::ValidUdp; +use ingot::Parse; +use zerocopy::ByteSliceMut; +use zerocopy::IntoBytes; + +#[choice(on = IpProtocol)] +pub enum L4 { + Tcp = IpProtocol::TCP, + Udp = IpProtocol::UDP, +} + +#[choice(on = IpProtocol)] +pub enum Ulp { + Tcp = IpProtocol::TCP, + Udp = IpProtocol::UDP, + IcmpV4 = IpProtocol::ICMP, + IcmpV6 = IpProtocol::ICMP_V6, +} + +impl ValidUlp { + pub fn csum(&self) -> [u8; 2] { + match self { + ValidUlp::Tcp(t) => t.checksum(), + ValidUlp::Udp(u) => u.checksum(), + ValidUlp::IcmpV4(i4) => i4.checksum(), + ValidUlp::IcmpV6(i6) => i6.checksum(), + } + .to_be_bytes() + } +} + +impl ValidUlp { + pub fn compute_checksum( + &mut self, + mut body_csum: Checksum, + l3: &ValidL3, + ) { + match self { + // ICMP4 requires the body_csum *without* + // the pseudoheader added back in. + ValidUlp::IcmpV4(i4) => { + i4.set_checksum(0); + body_csum.add_bytes(i4.0.as_bytes()); + i4.set_checksum(body_csum.finalize_for_ingot()); + } + ValidUlp::IcmpV6(i6) => { + body_csum += l3.pseudo_header(); + + i6.set_checksum(0); + body_csum.add_bytes(i6.0.as_bytes()); + i6.set_checksum(body_csum.finalize_for_ingot()); + } + ValidUlp::Tcp(tcp) => { + body_csum += l3.pseudo_header(); + + tcp.set_checksum(0); + body_csum.add_bytes(tcp.0.as_bytes()); + match &tcp.1 { + Header::Repr(opts) => { + body_csum.add_bytes(opts); + } + Header::Raw(opts) => { + body_csum.add_bytes(opts); + } + } + tcp.set_checksum(body_csum.finalize_for_ingot()); + } + ValidUlp::Udp(udp) => { + body_csum += l3.pseudo_header(); + + udp.set_checksum(0); + body_csum.add_bytes(udp.0.as_bytes()); + udp.set_checksum(body_csum.finalize_for_ingot()); + } + } + } +} + +impl Ulp { + pub fn src_port(&self) -> Option { + match self { + Ulp::Tcp(t) => Some(t.source()), + Ulp::Udp(u) => Some(u.source()), + _ => None, + } + } + + pub fn dst_port(&self) -> Option { + match self { + Ulp::Tcp(t) => Some(t.destination()), + Ulp::Udp(t) => Some(t.destination()), + _ => None, + } + } +} + +#[derive(Parse)] +pub struct GeneveOverV6 { + pub outer_eth: EthernetPacket, + #[ingot(from = "L3")] + pub outer_v6: Ipv6Packet, + #[ingot(from = "L4", control = geneve_dst_port)] + pub outer_udp: UdpPacket, + pub outer_encap: GenevePacket, + + pub inner_eth: EthernetPacket, + pub inner_l3: L3, + pub inner_ulp: Ulp, +} + +#[inline] +fn geneve_dst_port(l4: &ValidL4) -> ParseControl { + match l4 { + ValidL4::Udp(u) if u.destination() == GENEVE_PORT => { + ParseControl::Continue + } + _ => ParseControl::Reject, + } +} + +#[inline] +fn exit_on_arp(eth: &ValidEthernet) -> ParseControl { + if eth.ethertype() == Ethertype::ARP { + ParseControl::Accept + } else { + ParseControl::Continue + } +} + +#[inline(always)] +fn flow_id( + l3: Option<&ValidL3>, + ulp: Option<&ValidUlp>, +) -> InnerFlowId { + let (proto, addrs) = match l3 { + Some(ValidL3::Ipv4(pkt)) => ( + pkt.protocol().0, + AddrPair::V4 { src: pkt.source(), dst: pkt.destination() }, + ), + Some(ValidL3::Ipv6(pkt)) => ( + pkt.next_layer().unwrap_or_default().0, + AddrPair::V6 { src: pkt.source(), dst: pkt.destination() }, + ), + None => (255, FLOW_ID_DEFAULT.addrs), + }; + + let (src_port, dst_port) = ulp + .map(|ulp| { + ( + ulp.true_src_port().or_else(|| ulp.pseudo_port()).unwrap_or(0), + ulp.true_dst_port().or_else(|| ulp.pseudo_port()).unwrap_or(0), + ) + }) + .unwrap_or((0, 0)); + + InnerFlowId { proto, addrs, src_port, dst_port } +} + +#[derive(Parse)] +pub struct NoEncap { + #[ingot(control = exit_on_arp)] + pub inner_eth: EthernetPacket, + pub inner_l3: Option>, + pub inner_ulp: Option>, +} + +impl From> for OpteMeta { + #[inline] + fn from(value: ValidNoEncap) -> Self { + NoEncap::from(value).into() + } +} + +impl LightweightMeta for ValidNoEncap { + #[inline] + fn flow(&self) -> InnerFlowId { + flow_id(self.inner_l3.as_ref(), self.inner_ulp.as_ref()) + } + + #[inline] + fn run_compiled_transform(&mut self, transform: &CompiledTransform) + where + V: ByteSliceMut, + { + transform.transform_ether(&mut self.inner_eth); + if let Some(l3) = self.inner_l3.as_mut() { + transform.transform_l3(l3); + } + if let Some(ulp) = self.inner_ulp.as_mut() { + transform.transform_ulp(ulp); + } + } + + #[inline] + fn compute_body_csum(&self) -> Option { + let use_pseudo = if let Some(v) = &self.inner_ulp { + !matches!(v, ValidUlp::IcmpV4(_)) + } else { + false + }; + + let pseudo_csum = match self.inner_eth.ethertype() { + Ethertype::IPV4 | Ethertype::IPV6 => { + self.inner_l3.as_ref().map(|v| v.pseudo_header()) + } + // Includes ARP. + _ => return None, + }; + + let pseudo_csum = pseudo_csum?; + + self.inner_ulp.as_ref().and_then(csum_minus_hdr).map(|mut v| { + if use_pseudo { + v -= pseudo_csum; + } + v + }) + } + + #[inline] + fn encap_len(&self) -> u16 { + 0 + } + + #[inline] + fn update_inner_checksums(&mut self, body_csum: Checksum) { + if let Some(l3) = self.inner_l3.as_mut() { + if let Some(ulp) = self.inner_ulp.as_mut() { + ulp.compute_checksum(body_csum, l3); + } + l3.compute_checksum(); + } + } + + #[inline] + fn inner_tcp(&self) -> Option<&impl TcpRef> { + match self.inner_ulp.as_ref() { + Some(ValidUlp::Tcp(t)) => Some(t), + _ => None, + } + } + + #[inline] + fn validate(&self, pkt_len: usize) -> Result<(), ParseError> { + if let Some(l3) = &self.inner_l3 { + let rem_len = pkt_len - (&self.inner_eth, l3).packet_length(); + l3.validate(rem_len)?; + if let Some(ulp) = &self.inner_ulp { + let rem_len = rem_len - ulp.packet_length(); + ulp.validate(rem_len)?; + } + } + + Ok(()) + } +} + +#[inline] +fn validate_udp( + pkt: &ValidUdp, + bytes_after: usize, +) -> Result<(), ParseError> { + // Packets can have arbitrary zero-padding at the end so + // our length *could* be larger than the packet reports. + // Unlikely in practice as Encap headers push us past the 64B + // minimum packet size. + let wanted_len = bytes_after + pkt.packet_length(); + if pkt.length() as usize <= wanted_len { + Ok(()) + } else { + Err(ParseError::BadLength(MismatchError { + location: c"Udp.length", + expected: wanted_len as u64, + actual: pkt.length() as u64, + })) + } +} + +impl ValidUlp { + #[inline] + fn validate(&self, bytes_after: usize) -> Result<(), ParseError> { + match self { + ValidUlp::Udp(u) => validate_udp(u, bytes_after), + _ => Ok(()), + } + } +} + +impl From> for OpteMeta { + #[inline] + fn from(value: ValidGeneveOverV6) -> Self { + OpteMeta { + outer_eth: Some(value.outer_eth.into()), + outer_l3: Some(L3::Ipv6(value.outer_v6.into())), + outer_encap: Some(InlineHeader::Raw(ValidEncapMeta::Geneve( + value.outer_udp, + value.outer_encap, + ))), + inner_eth: value.inner_eth.into(), + inner_l3: Some(value.inner_l3.into()), + inner_ulp: Some(value.inner_ulp.into()), + } + } +} + +impl LightweightMeta for ValidGeneveOverV6 { + #[inline] + fn flow(&self) -> InnerFlowId { + flow_id(Some(&self.inner_l3), Some(&self.inner_ulp)) + } + + #[inline] + fn run_compiled_transform(&mut self, transform: &CompiledTransform) + where + V: ByteSliceMut, + { + transform.transform_ether(&mut self.inner_eth); + transform.transform_l3(&mut self.inner_l3); + transform.transform_ulp(&mut self.inner_ulp); + } + + #[inline] + fn compute_body_csum(&self) -> Option { + let use_pseudo = !matches!(self.inner_ulp, ValidUlp::IcmpV4(_)); + + let pseudo_csum = match self.inner_eth.ethertype() { + Ethertype::IPV4 | Ethertype::IPV6 => { + Some(self.inner_l3.pseudo_header()) + } + // Includes ARP. + _ => return None, + }; + + let pseudo_csum = pseudo_csum?; + + csum_minus_hdr(&self.inner_ulp).map(|mut v| { + if use_pseudo { + v -= pseudo_csum; + } + v + }) + } + + #[inline] + fn encap_len(&self) -> u16 { + (self.outer_eth.packet_length() + + self.outer_v6.packet_length() + + self.outer_udp.packet_length() + + self.outer_encap.packet_length()) as u16 + } + + #[inline] + fn update_inner_checksums(&mut self, body_csum: Checksum) { + self.inner_ulp.compute_checksum(body_csum, &self.inner_l3); + self.inner_l3.compute_checksum(); + } + + #[inline] + fn inner_tcp(&self) -> Option<&impl TcpRef> { + match &self.inner_ulp { + ValidUlp::Tcp(t) => Some(t), + _ => None, + } + } + + #[inline] + fn validate(&self, pkt_len: usize) -> Result<(), ParseError> { + // Outer layers. + let rem_len = + pkt_len - (&self.outer_eth, &self.outer_v6).packet_length(); + self.outer_v6.validate(rem_len)?; + + let rem_len = rem_len - self.outer_udp.packet_length(); + validate_udp(&self.outer_udp, rem_len)?; + + validate_geneve(&self.outer_encap)?; + + // Inner layers. + let rem_len = rem_len + - (&self.outer_encap, &self.outer_eth, &self.inner_l3) + .packet_length(); + self.inner_l3.validate(rem_len)?; + + let rem_len = rem_len - self.inner_ulp.packet_length(); + self.inner_ulp.validate(rem_len)?; + + Ok(()) + } +} + +#[inline] +fn csum_minus_hdr(ulp: &ValidUlp) -> Option { + match ulp { + ValidUlp::IcmpV4(icmp) => { + if icmp.checksum() == 0 { + return None; + } + + let mut csum = Checksum::from(HeaderChecksum::wrap( + icmp.checksum().to_be_bytes(), + )); + + csum.sub_bytes(&[icmp.ty(), icmp.code()]); + csum.sub_bytes(icmp.rest_of_hdr_ref()); + + Some(csum) + } + ValidUlp::IcmpV6(icmp) => { + if icmp.checksum() == 0 { + return None; + } + + let mut csum = Checksum::from(HeaderChecksum::wrap( + icmp.checksum().to_be_bytes(), + )); + + csum.sub_bytes(&[icmp.ty(), icmp.code()]); + csum.sub_bytes(icmp.rest_of_hdr_ref()); + + Some(csum) + } + ValidUlp::Tcp(tcp) => { + if tcp.checksum() == 0 { + return None; + } + + let mut csum = Checksum::from(HeaderChecksum::wrap( + tcp.checksum().to_be_bytes(), + )); + + let b = tcp.0.as_bytes(); + + csum.sub_bytes(&b[0..16]); + csum.sub_bytes(&b[18..]); + + csum.sub_bytes(match &tcp.1 { + ingot::types::Header::Repr(v) => &v[..], + ingot::types::Header::Raw(v) => &v[..], + }); + + Some(csum) + } + ValidUlp::Udp(udp) => { + if udp.checksum() == 0 { + return None; + } + + let mut csum = Checksum::from(HeaderChecksum::wrap( + udp.checksum().to_be_bytes(), + )); + + let b = udp.0.as_bytes(); + csum.sub_bytes(&b[0..6]); + + Some(csum) + } + } +} + +impl Ulp { + #[inline] + pub fn true_src_port(&self) -> Option { + match self { + Ulp::Tcp(pkt) => Some(pkt.source()), + Ulp::Udp(pkt) => Some(pkt.source()), + _ => None, + } + } + + #[inline] + pub fn true_dst_port(&self) -> Option { + match self { + Ulp::Tcp(pkt) => Some(pkt.destination()), + Ulp::Udp(pkt) => Some(pkt.destination()), + _ => None, + } + } + + #[inline] + pub fn pseudo_port(&self) -> Option { + match self { + Ulp::IcmpV4(pkt) => pkt.echo_id(), + Ulp::IcmpV6(pkt) => pkt.echo_id(), + _ => None, + } + } +} + +impl ValidUlp { + #[inline] + pub fn true_src_port(&self) -> Option { + match self { + ValidUlp::Tcp(pkt) => Some(pkt.source()), + ValidUlp::Udp(pkt) => Some(pkt.source()), + _ => None, + } + } + + #[inline] + pub fn true_dst_port(&self) -> Option { + match self { + ValidUlp::Tcp(pkt) => Some(pkt.destination()), + ValidUlp::Udp(pkt) => Some(pkt.destination()), + _ => None, + } + } + + #[inline] + pub fn pseudo_port(&self) -> Option { + match self { + ValidUlp::IcmpV4(pkt) => pkt.echo_id(), + ValidUlp::IcmpV6(pkt) => pkt.echo_id(), + _ => None, + } + } +} + +impl HasInnerCksum for Ulp { + const HAS_CKSUM: bool = true; +} + +impl HeaderActionModify for Ulp { + #[inline] + fn run_modify( + &mut self, + mod_spec: &UlpMetaModify, + ) -> Result<(), HeaderActionError> { + match self { + Ulp::Tcp(t) => { + if let Some(src) = mod_spec.generic.src_port { + t.set_source(src); + } + if let Some(dst) = mod_spec.generic.dst_port { + t.set_destination(dst); + } + if let Some(flags) = mod_spec.tcp_flags { + t.set_flags(TcpFlags::from_bits_retain(flags)); + } + } + Ulp::Udp(u) => { + if let Some(src) = mod_spec.generic.src_port { + u.set_source(src); + } + if let Some(dst) = mod_spec.generic.dst_port { + u.set_destination(dst); + } + } + Ulp::IcmpV4(i4) => { + if let Some(id) = mod_spec.icmp_id { + if i4.echo_id().is_some() { + let roh = i4.rest_of_hdr_mut(); + ValidIcmpEcho::parse(&mut roh[..]) + .expect( + "ICMP ROH is exactly as large as ValidIcmpEcho", + ) + .0 + .set_id(id); + } + } + } + Ulp::IcmpV6(i6) => { + if let Some(id) = mod_spec.icmp_id { + if i6.echo_id().is_some() { + let roh = i6.rest_of_hdr_mut(); + ValidIcmpEcho::parse(&mut roh[..]) + .expect( + "ICMP ROH is exactly as large as ValidIcmpEcho", + ) + .0 + .set_id(id); + } + } + } + } + + Ok(()) + } +} + +#[cfg(test)] +mod test { + use crate::engine::checksum::Checksum as OpteCsum; + use ingot::types::ParseChoice; + use smoltcp::phy::ChecksumCapabilities; + use smoltcp::wire::Icmpv4Packet; + use smoltcp::wire::Icmpv4Repr; + + use super::*; + + #[test] + fn icmp4_body_csum_equals_body() { + let data = b"reunion\0"; + let mut body_csum = OpteCsum::default(); + body_csum.add_bytes(data); + + let mut cksum_cfg = ChecksumCapabilities::ignored(); + cksum_cfg.icmpv4 = smoltcp::phy::Checksum::Both; + + let test_pkt = Icmpv4Repr::EchoRequest { ident: 7, seq_no: 7777, data }; + let mut out = vec![0u8; test_pkt.buffer_len()]; + let mut packet = Icmpv4Packet::new_unchecked(&mut out); + test_pkt.emit(&mut packet, &cksum_cfg); + + let src = &mut out[..IcmpV4::MINIMUM_LENGTH]; + let (ulp, ..) = + ValidUlp::parse_choice(src, Some(IpProtocol::ICMP)).unwrap(); + + assert_eq!( + Some(body_csum.finalize()), + csum_minus_hdr(&ulp).map(|mut v| v.finalize()), + ); + } +} diff --git a/lib/opte/src/engine/port.rs b/lib/opte/src/engine/port.rs index a5cdb4de..94fe776a 100644 --- a/lib/opte/src/engine/port.rs +++ b/lib/opte/src/engine/port.rs @@ -7,14 +7,23 @@ //! A virtual switch port. use self::meta::ActionMeta; +use super::ether::Ethernet; use super::flow_table::Dump; use super::flow_table::FlowEntry; use super::flow_table::FlowTable; use super::flow_table::Ttl; +use super::geneve::GENEVE_PORT; +use super::headers::EncapPush; +use super::headers::HeaderAction; +use super::headers::IpPush; +use super::headers::UlpHeaderAction; use super::ioctl; use super::ioctl::TcpFlowEntryDump; use super::ioctl::TcpFlowStateDump; use super::ioctl::UftEntryDump; +use super::ip::v4::Ipv4; +use super::ip::v6::Ipv6; +use super::ip::L3Repr; use super::layer; use super::layer::Layer; use super::layer::LayerError; @@ -23,13 +32,16 @@ use super::layer::LayerStatsSnap; use super::layer::RuleId; use super::packet::BodyTransform; use super::packet::BodyTransformError; -use super::packet::Initialized; +use super::packet::FullParsed; use super::packet::InnerFlowId; +use super::packet::LiteParsed; +use super::packet::MblkFullParsed; +use super::packet::MblkPacketData; use super::packet::Packet; -use super::packet::PacketMeta; -use super::packet::Parsed; +use super::packet::Pullup; use super::packet::FLOW_ID_DEFAULT; use super::rule::Action; +use super::rule::CompiledTransform; use super::rule::Finalized; use super::rule::HdrTransform; use super::rule::HdrTransformError; @@ -40,6 +52,7 @@ use super::tcp::TIME_WAIT_EXPIRE_TTL; use super::tcp_state::TcpFlowState; use super::tcp_state::TcpFlowStateError; use super::HdlPktAction; +use super::LightweightMeta; use super::NetworkImpl; use crate::d_error::DError; #[cfg(all(not(feature = "std"), not(test)))] @@ -48,11 +61,15 @@ use crate::ddi::kstat; use crate::ddi::kstat::KStatNamed; use crate::ddi::kstat::KStatProvider; use crate::ddi::kstat::KStatU64; +use crate::ddi::mblk::MsgBlk; +use crate::ddi::mblk::MsgBlkIterMut; use crate::ddi::sync::KMutex; use crate::ddi::sync::KMutexType; use crate::ddi::time::Moment; use crate::engine::flow_table::ExpiryPolicy; -use crate::engine::tcp::TcpMeta; +use crate::engine::packet::EmitSpec; +use crate::engine::packet::PushSpec; +use crate::engine::rule::CompiledEncap; use crate::ExecCtx; use alloc::boxed::Box; use alloc::ffi::CString; @@ -60,6 +77,7 @@ use alloc::string::String; use alloc::string::ToString; use alloc::sync::Arc; use alloc::vec::Vec; +use core::ffi::CStr; use core::fmt; use core::fmt::Display; use core::num::NonZeroU32; @@ -67,12 +85,20 @@ use core::result; use core::str::FromStr; use core::sync::atomic::AtomicU64; use core::sync::atomic::Ordering::SeqCst; -#[cfg(all(not(feature = "std"), not(test)))] use illumos_sys_hdrs::uintptr_t; -use kstat_macro::KStatProvider; +use ingot::ethernet::Ethertype; +use ingot::geneve::Geneve; +use ingot::ip::IpProtocol; +use ingot::tcp::TcpRef; +use ingot::types::Emit; +use ingot::types::HeaderLen; +use ingot::types::Read; +use ingot::udp::Udp; use opte_api::Direction; use opte_api::MacAddr; use opte_api::OpteError; +use zerocopy::ByteSlice; +use zerocopy::ByteSliceMut; pub type Result = result::Result; @@ -86,6 +112,7 @@ pub enum ProcessError { WriteError(super::packet::WriteError), MissingFlow(InnerFlowId), TcpFlow(TcpFlowStateError), + BadEmitSpec, FlowTableFull { kind: &'static str, limit: u64 }, } @@ -116,18 +143,18 @@ impl From for ProcessError { /// The result of processing a packet. /// /// * Bypass: Let this packet bypass the system; do not process it at -/// all. XXX This is probably going away as its only use is for -/// punting on traffic I didn't want to deal with yet. +/// all. XXX This is probably going away as its only use is for +/// punting on traffic I didn't want to deal with yet. /// -/// * Drop: The packet has beend dropped, as determined by the rules -/// or because of resource exhaustion. Included is the reason for the -/// drop. +/// * Drop: The packet has been dropped, as determined by the rules +/// or because of resource exhaustion. Included is the reason for the +/// drop. /// /// * Modified: The packet has been modified based on its matching rules. /// /// * Hairpin: One of the layers has determined that it should reply -/// directly with a packet of its own. In this case the original -/// packet is dropped. +/// directly with a packet of its own. In this case the original +/// packet is dropped. #[derive(Debug, DError)] pub enum ProcessResult { Bypass, @@ -135,12 +162,34 @@ pub enum ProcessResult { Drop { reason: DropReason, }, - Modified, #[leaf] - Hairpin(Packet), + Modified(EmitSpec), + // TODO: it would be nice if this packet type could be user-specified, but might + // be tricky. + #[leaf] + Hairpin(MsgBlk), } impl From for ProcessResult { + fn from(hpa: HdlPktAction) -> Self { + match hpa { + // TODO: In theory HdlPacket::Allow should have an emit spec, too. + // We are not using any op other than Hairpin, so kick that particular + // can down the road. + HdlPktAction::Allow => Self::Modified(EmitSpec::default()), + HdlPktAction::Deny => Self::Drop { reason: DropReason::HandlePkt }, + HdlPktAction::Hairpin(pkt) => Self::Hairpin(pkt), + } + } +} + +enum InternalProcessResult { + Drop { reason: DropReason }, + Modified, + Hairpin(MsgBlk), +} + +impl From for InternalProcessResult { fn from(hpa: HdlPktAction) -> Self { match hpa { HdlPktAction::Allow => Self::Modified, @@ -473,17 +522,23 @@ pub enum DumpLayerError { } /// An entry in the Unified Flow Table. -#[derive(Clone, Debug)] pub struct UftEntry { /// The flow ID for the other side. - pair: Option, + pair: KMutex>, /// The transformations to perform. - xforms: Transforms, + xforms: Arc, + + /// Cached flow hash to speed up route selection. + l4_hash: u32, /// The port epoch upon which this entry was established. Used for /// invalidation when the rule set is updated. epoch: u64, + + /// Cached reference to a flow's TCP state, if applicable. + /// This allows us to maintain up-to-date TCP flow table info + tcp_flow: Option>>, } impl Dump for UftEntry { @@ -514,6 +569,20 @@ impl Display for UftEntry { } } +impl fmt::Debug for UftEntry { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let UftEntry { pair: _pair, xforms, l4_hash, epoch, tcp_flow } = self; + + f.debug_struct("UftEntry") + .field("pair", &"") + .field("xforms", xforms) + .field("l4_hash", l4_hash) + .field("epoch", epoch) + .field("tcp_flow", tcp_flow) + .finish() + } +} + /// Cumulative counters for a single [`Port`]. #[derive(KStatProvider)] struct PortStats { @@ -837,10 +906,10 @@ impl Port { data: &FlowTable, dir: Direction, msg: String, - pkt: &mut Packet, + pkt: &mut Packet, ) { if unsafe { super::opte_panic_debug != 0 } { - super::err!("mblk: {}", pkt.mblk_ptr_str()); + super::err!("mblk: {}", pkt.mblk_addr()); super::err!("flow: {}", pkt.flow()); super::err!("meta: {:?}", pkt.meta()); super::err!("flows: {:?}", data); @@ -853,7 +922,7 @@ impl Port { fn tcp_err_probe( &self, dir: Direction, - pkt: Option<&Packet>, + pkt: Option<&Packet>, flow: &InnerFlowId, msg: String, ) { @@ -1141,47 +1210,331 @@ impl Port { /// # States /// /// This command is valid only for [`PortState::Running`]. - pub fn process( + #[inline(always)] + pub fn process<'a, M>( &self, dir: Direction, - pkt: &mut Packet, - mut ameta: ActionMeta, - ) -> result::Result { - let flow_before = *pkt.flow(); - let epoch = self.epoch.load(SeqCst); - let mut data = self.data.lock(); + // TODO: might want to pass in a &mut to an enum + // which can advance to (and hold) light->full-fat metadata. + // My gutfeel is that there's a perf cost here -- this struct + // is pretty large, but expressing the transform on a &mut is also + // less than ideal. + mut pkt: Packet, M>>, + ) -> result::Result + where + M: LightweightMeta< as Read>::Chunk>, + { + let process_start = Moment::now(); + let flow_before = pkt.flow(); + let mblk_addr = pkt.mblk_addr(); + + // Packet processing is split into a few mechanisms based on + // expected speed, based on actions and the size of required metadata: + // + // 1. UFT exists. Pure push/pop with simple modifications to + // inner ULP fields. No body transform. + // 2. UFT exists. Flow transform could not be compiled as above. + // Convert to full metadata and apply saved transform list. + // 3. No UFT exists. Walk all tables, save and apply transforms + // piecemeal OR produce a non-`Modified` decision. + // + // Generally, 1 > 2 >>> 3 in terms of rate of pps. + // Both 1 and 2 are able to drop the port lock very quickly. + // + // This tiering exists because we can save space on metadata + // when we know that we won't have mixed owned/borrowed packet + // data, and when we don't need to keep space for absent layers. + // The size of metadata structs is a large bottleneck on packet + // parsing performance, so we expect that minimising it for the + // majority of packets pays off in the limit. + // + // In case 1, we can also cache and reuse the same EmitSpec for + // all hit packets. + + // The lock needs to be optional here because there is one + // case wherein we need to reacquire the lock -- invalidation + // by TCP state. + let mut lock = Some(self.data.lock()); + let data = lock.as_mut().expect("lock should be held on this codepath"); + + // (1) Check for UFT and precompiled. + let epoch = self.epoch(); check_state!(data.state, [PortState::Running]) .map_err(|_| ProcessError::BadState(data.state))?; - self.port_process_entry_probe(dir, &flow_before, epoch, pkt); - let res = match dir { - Direction::Out => { - let res = self.process_out(&mut data, epoch, pkt, &mut ameta); - Self::update_stats_out(&mut data.stats.vals, &res); - res + self.port_process_entry_probe(dir, &flow_before, epoch, mblk_addr); + + let uft: Option<&Arc>>> = match dir { + Direction::Out => data.uft_out.get(&flow_before), + Direction::In => data.uft_in.get(&flow_before), + }; + + enum FastPathDecision { + CompiledUft(Arc>>), + Uft(Arc>>), + Slow, + } + + let decision = match uft { + // We have a valid UFT entry of some kind -- clone out the + // saved transforms so that we can drop the lock ASAP. + Some(entry) if entry.state().epoch == epoch => { + // The Fast Path. + let xforms = &entry.state().xforms; + let out = if xforms.compiled.is_some() { + FastPathDecision::CompiledUft(Arc::clone(entry)) + } else { + FastPathDecision::Uft(Arc::clone(entry)) + }; + + match dir { + Direction::In => data.stats.vals.in_uft_hit += 1, + Direction::Out => data.stats.vals.out_uft_hit += 1, + } + + out } - Direction::In => { - let res = self.process_in( - &mut data, + // The entry is from a previous epoch; invalidate its UFT + // entries and proceed to rule processing. + Some(entry) => { + let epoch = entry.state().epoch; + let owned_pair = *entry.state().pair.lock(); + let (ufid_in, ufid_out) = match dir { + Direction::Out => (owned_pair.as_ref(), Some(&flow_before)), + Direction::In => (Some(&flow_before), owned_pair.as_ref()), + }; + self.uft_invalidate(data, ufid_out, ufid_in, epoch); + + FastPathDecision::Slow + } + None => FastPathDecision::Slow, + }; + + // (1)/(2) UFT hit. Update stats, drop locks, validate TCP state. + // We *almost always* know the result is modified. + // This will produce an incorrect stat in the event that TCP invalidation + // forces a reprocess, but I believe this is a necessary evil to keep work + // out of the portlock today. The correct fix is to AtomicU64 those stats, + // which we'll need for later metrics too. + // However, fixing this up if we get it wrong is simple enough. + let mut invalidated_tcp = None; + let mut reprocess = false; + + match &decision { + FastPathDecision::CompiledUft(entry) + | FastPathDecision::Uft(entry) => { + // TODO: Ideally the Kstat should be holding AtomicU64s, then we get + // out of the lock sooner. Note that we don't need to *apply* a given + // set of transforms in order to know which stats we'll modify. + let dummy_res = Ok(InternalProcessResult::Modified); + match dir { + Direction::In => { + Self::update_stats_in(&mut data.stats.vals, &dummy_res); + } + Direction::Out => { + Self::update_stats_out( + &mut data.stats.vals, + &dummy_res, + ); + } + } + + let _ = data; + drop(lock.take()); + + entry.hit_at(process_start); + self.uft_hit_probe(dir, &flow_before, epoch, &process_start); + + let tcp = entry.state().tcp_flow.as_ref(); + if let Some(tcp_flow) = tcp { + tcp_flow.hit_at(process_start); + + let tcp = pkt + .meta() + .inner_tcp() + .expect("failed to find TCP state on known TCP flow"); + + let ufid_in = match dir { + Direction::In => Some(&flow_before), + Direction::Out => None, + }; + + match tcp_flow.state().update( + self.name_cstr.as_c_str(), + tcp, + dir, + pkt.len() as u64, + ufid_in, + ) { + Ok(TcpState::Closed) => { + invalidated_tcp = Some(Arc::clone(tcp_flow)); + } + Err(TcpFlowStateError::NewFlow { .. }) => { + invalidated_tcp = Some(Arc::clone(tcp_flow)); + reprocess = true; + } + _ => {} + } + } + } + _ => {} + } + + // reprocess => invalidated_tcp.is_some(); + debug_assert!(!reprocess || invalidated_tcp.is_some()); + + // We've determined we're actually starting a new TCP flow (e.g., SYN + // on any other state) from an existing UFT entry. + // We know the lock is dropped -- reacquire the lock to remove the flow. + // Elevate lock to full scope, if we are reprocessing as well. + if let Some(entry) = invalidated_tcp { + let mut local_lock = self.data.lock(); + + let flow_lock = entry.state().inner.lock(); + let ufid_out = &flow_lock.outbound_ufid; + + let ufid_in = flow_lock.inbound_ufid.as_ref(); + self.uft_tcp_closed(&mut local_lock, ufid_out, ufid_in); + + let _ = local_lock.tcp_flows.remove(ufid_out).unwrap(); + + if reprocess { + lock = Some(local_lock); + } + } + + if !reprocess { + // (1) Execute precompiled, and exit. + if let FastPathDecision::CompiledUft(entry) = decision { + let l4_hash = entry.state().l4_hash; + let tx = + entry.state().xforms.compiled.as_ref().cloned().unwrap(); + + let len = pkt.len(); + let meta = pkt.meta_mut(); + let body_csum = if tx.checksums_dirty { + meta.compute_body_csum() + } else { + None + }; + meta.run_compiled_transform(&tx); + if let Some(csum) = body_csum { + meta.update_inner_checksums(csum); + } + let encap_len = meta.encap_len(); + let ulp_len = (len - (encap_len as usize)) as u32; + let rewind = match tx.encap { + CompiledEncap::Pop => encap_len, + _ => 0, + }; + let out = EmitSpec { + prepend: PushSpec::Fastpath(tx), + l4_hash, + rewind, + ulp_len, + }; + + let flow_after = meta.flow(); + let res = Ok(ProcessResult::Modified(out)); + self.port_process_return_probe( + dir, + &flow_before, + &flow_after, epoch, - pkt, + mblk_addr, + &res, + 1, + ); + return res; + } + } + + // (2)/(3) Full-fat metadata is required. + let mut pkt = pkt.to_full_meta(); + let mut ameta = ActionMeta::new(); + + let (res, path) = match (&decision, dir) { + // (2) Apply retrieved transform. Lock is dropped. + // Reuse cached l4 hash. + (FastPathDecision::Uft(entry), _) if !reprocess => { + let l4_hash = entry.state().l4_hash; + let tx = Arc::clone(&entry.state().xforms); + + pkt.set_l4_hash(l4_hash); + tx.apply(&mut pkt, dir)?; + (Ok(InternalProcessResult::Modified), 2) + } + + // (3) Full-table processing for the packet, then drop the lock. + // Cksum updates are left undone, so we perform those manually + // outside the port lock. + (_, Direction::In) => { + let data = lock + .as_mut() + .expect("lock should be held on this codepath"); + + let res = self.process_in_miss( + data, + epoch, + &mut pkt, &flow_before, &mut ameta, ); - Self::update_stats_in(&mut data.stats.vals, &res); - res + + // Prevent double-counting reprocessed modify entries. + if !(reprocess + && matches!(res, Ok(InternalProcessResult::Modified))) + { + Self::update_stats_in(&mut data.stats.vals, &res); + } + drop(lock); + + pkt.update_checksums(); + (res, 3) + } + (_, Direction::Out) => { + let data = lock + .as_mut() + .expect("lock should be held on this codepath"); + + let res = + self.process_out_miss(data, epoch, &mut pkt, &mut ameta); + + // Prevent double-counting reprocessed modify entries. + if !(reprocess + && matches!(res, Ok(InternalProcessResult::Modified))) + { + Self::update_stats_out(&mut data.stats.vals, &res); + } + drop(lock); + + pkt.update_checksums(); + (res, 3) } }; - drop(data); - // Emit the updated headers if the packet was modified as part - // of processing. - if let Ok(ProcessResult::Modified) = res { - pkt.emit_new_headers()?; - } + let flow_after = *pkt.flow(); - self.port_process_return_probe(dir, &flow_before, epoch, pkt, &res); + let res = res.and_then(|v| match v { + InternalProcessResult::Drop { reason } => { + Ok(ProcessResult::Drop { reason }) + } + InternalProcessResult::Hairpin(v) => Ok(ProcessResult::Hairpin(v)), + InternalProcessResult::Modified => pkt + .emit_spec() + .map_err(|_| ProcessError::BadEmitSpec) + .map(ProcessResult::Modified), + }); + self.port_process_return_probe( + dir, + &flow_before, + &flow_after, + epoch, + mblk_addr, + &res, + path, + ); res } @@ -1223,6 +1576,8 @@ impl Port { // future we could eliminate this window by passing a // reference to the epoch to `Layer::remove_rule()` // and let it perform the increment. + // XXX(kyle) This is not a concern while we have the + // port lock in place. self.epoch.fetch_add(1, SeqCst); return Ok(()); } @@ -1306,7 +1661,7 @@ impl Port { .lock() .tcp_flows .get(flow) - .map(|entry| entry.state().tcp_state.tcp_state()) + .map(|entry| entry.state().tcp_state()) } } @@ -1314,16 +1669,7 @@ impl Port { #[derive(Debug)] enum TcpMaybeClosed { Closed { ufid_inbound: Option }, - NewState(TcpState), -} - -impl From for TcpState { - fn from(value: TcpMaybeClosed) -> Self { - match value { - TcpMaybeClosed::Closed { .. } => TcpState::Closed, - TcpMaybeClosed::NewState(s) => s, - } - } + NewState(TcpState, Arc>), } // This is a convenience wrapper for keeping the header and body @@ -1333,11 +1679,216 @@ impl From for TcpState { pub(crate) struct Transforms { pub(crate) hdr: Vec, pub(crate) body: Vec>, + pub(crate) compiled: Option>, } impl Transforms { fn new() -> Self { - Self { hdr: Vec::with_capacity(8), body: Vec::with_capacity(2) } + Self { + hdr: Vec::with_capacity(8), + body: Vec::with_capacity(2), + compiled: None, + } + } + + #[inline] + fn apply<'a, T: Read + Pullup + 'a>( + &self, + pkt: &mut Packet>, + dir: Direction, + ) -> result::Result<(), ProcessError> + where + T::Chunk: ByteSliceMut, + { + // TODO: It should be possible to combine header transforms + // into a single operation per layer, particularly when + // they are disjoint like we do in the Compiled case. + for ht in &self.hdr { + pkt.hdr_transform(ht)?; + } + + for bt in &self.body { + pkt.body_transform(dir, &**bt)?; + } + + pkt.update_checksums(); + + Ok(()) + } + + #[inline] + fn compile(mut self, checksums_dirty: bool) -> Arc { + // Compile to a fasterpath transform iff. no body transform. + if self.body.is_empty() { + let mut still_permissable = true; + + let mut outer_ether = None; + let mut outer_ip = None; + let mut outer_encap = None; + + let mut inner_ether = None; + let mut inner_ip = None; + let mut inner_ulp = None; + for transform in &self.hdr { + if !still_permissable { + continue; + } + + // All outer layers must be pushed (or popped/ignored) at the same + // time for compilation. No modifications are permissable. + fn store_outer_push( + tx: &HeaderAction, + still_permissable: &mut bool, + slot: &mut Option

, + ) { + match tx { + HeaderAction::Push(p) => *slot = Some(*p), + HeaderAction::Pop => *slot = None, + HeaderAction::Modify(_) => *still_permissable = false, + HeaderAction::Ignore => {} + } + } + store_outer_push( + &transform.outer_ether, + &mut still_permissable, + &mut outer_ether, + ); + store_outer_push( + &transform.outer_ip, + &mut still_permissable, + &mut outer_ip, + ); + store_outer_push( + &transform.outer_encap, + &mut still_permissable, + &mut outer_encap, + ); + + // Allow up to one action per ULP field, which must be modify. + // We can't yet combine sets of `Modify` actions, + // but the Oxide dataplane does not use this in practice. + fn store_inner_mod<'a, P, M>( + tx: &'a HeaderAction, + still_permissable: &mut bool, + slot: &mut Option<&'a M>, + ) { + match tx { + HeaderAction::Push(_) | HeaderAction::Pop => { + *still_permissable = false; + } + HeaderAction::Modify(m) => { + *still_permissable &= slot.replace(m).is_none(); + } + HeaderAction::Ignore => {} + } + } + store_inner_mod( + &transform.inner_ether, + &mut still_permissable, + &mut inner_ether, + ); + store_inner_mod( + &transform.inner_ip, + &mut still_permissable, + &mut inner_ip, + ); + + match &transform.inner_ulp { + UlpHeaderAction::Modify(m) => { + still_permissable &= inner_ulp.replace(m).is_none(); + } + UlpHeaderAction::Ignore => {} + } + } + + if still_permissable { + let encap = match (outer_ether, outer_ip, outer_encap) { + (Some(eth), Some(ip), Some(encap)) => { + let encap_repr = match encap { + EncapPush::Geneve(g) => ( + Udp { + source: g.entropy, + destination: GENEVE_PORT, + ..Default::default() + }, + Geneve { vni: g.vni, ..Default::default() }, + ), + }; + + let eth_repr = Ethernet { + destination: eth.dst, + source: eth.src, + ethertype: Ethertype(eth.ether_type.into()), + }; + let (ip_repr, l3_extra_bytes, ip_len_offset) = match ip + { + IpPush::Ip4(v4) => ( + L3Repr::Ipv4(Ipv4 { + protocol: IpProtocol(v4.proto.into()), + source: v4.src, + destination: v4.dst, + total_len: Ipv4::MINIMUM_LENGTH as u16, + ..Default::default() + }), + Ipv4::MINIMUM_LENGTH, + 2, + ), + IpPush::Ip6(v6) => ( + L3Repr::Ipv6(Ipv6 { + next_header: IpProtocol(v6.proto.into()), + source: v6.src, + destination: v6.dst, + payload_len: 0, + ..Default::default() + }), + 0, + 4, + ), + }; + + let encap_sz = encap_repr.packet_length(); + let l3_len_offset = + eth_repr.packet_length() + ip_len_offset; + + // UDP has a length field 4B into its header. + // in event of TCP, l4_len_offset is ignored. + let l4_len_offset = eth_repr.packet_length() + + ip_repr.packet_length() + + 4; + + let bytes = (eth_repr, ip_repr, encap_repr).emit_vec(); + + Some(CompiledEncap::Push { + encap, + eth, + ip, + bytes, + l3_len_offset, + l3_extra_bytes, + l4_len_offset, + encap_sz, + }) + } + (None, None, None) => Some(CompiledEncap::Pop), + _ => None, + }; + + if let Some(encap) = encap { + self.compiled = Some( + CompiledTransform { + encap, + inner_ether: inner_ether.cloned(), + inner_ip: inner_ip.cloned(), + inner_ulp: inner_ulp.cloned(), + checksums_dirty, + } + .into(), + ); + } + } + } + + Arc::new(self) } } @@ -1348,6 +1899,7 @@ impl fmt::Debug for Transforms { f.debug_struct("Transforms") .field("hdr", &self.hdr) .field("body", &body_strs) + .field("compiled", &self.compiled) .finish() } } @@ -1372,7 +1924,7 @@ impl Port { &self, data: &mut PortData, dir: Direction, - pkt: &mut Packet, + pkt: &mut Packet, xforms: &mut Transforms, ameta: &mut ActionMeta, ) -> result::Result { @@ -1411,12 +1963,13 @@ impl Port { Ok(LayerResult::Allow) } + #[inline] fn port_process_entry_probe( &self, dir: Direction, flow: &InnerFlowId, epoch: u64, - pkt: &Packet, + mblk_addr: uintptr_t, ) { cfg_if::cfg_if! { if #[cfg(all(not(feature = "std"), not(test)))] { @@ -1426,30 +1979,32 @@ impl Port { self.name_cstr.as_ptr() as uintptr_t, flow, epoch as uintptr_t, - pkt.mblk_addr(), + mblk_addr, ); } } else if #[cfg(feature = "usdt")] { let flow_s = flow.to_string(); crate::opte_provider::port__process__entry!( - || (dir, &self.name, flow_s, epoch, pkt.mblk_addr()) + || (dir, &self.name, flow_s, epoch, mblk_addr) ); } else { - let (..) = (dir, flow, epoch, pkt); + let (..) = (dir, flow, epoch, mblk_addr); } } } + #[allow(clippy::too_many_arguments)] + #[inline(always)] fn port_process_return_probe( &self, dir: Direction, flow_before: &InnerFlowId, + flow_after: &InnerFlowId, epoch: u64, - pkt: &Packet, + mblk_addr: uintptr_t, res: &result::Result, + path: u64, ) { - let flow_after = pkt.flow(); - cfg_if! { if #[cfg(all(not(feature = "std"), not(test)))] { @@ -1494,9 +2049,10 @@ impl Port { flow_before, flow_after, epoch as uintptr_t, - pkt.mblk_addr(), + mblk_addr, hp_pkt_ptr, eb.as_ptr(), + path as uintptr_t, ); } } else if #[cfg(feature = "usdt")] { @@ -1506,18 +2062,19 @@ impl Port { Ok(v) => format!("{:?}", v), Err(e) => format!("ERROR: {:?}", e), }; + let _ = path; crate::opte_provider::port__process__return!( || ( (dir, self.name.as_str()), (flow_b_s.as_ref(), flow_a_s.as_ref()), epoch, - pkt.mblk_addr(), + mblk_addr, res_str ) ); } else { - let (..) = (dir, flow_before, flow_after, epoch, pkt, res); + let (..) = (dir, flow_before, flow_after, epoch, mblk_addr, res, path); } } } @@ -1528,13 +2085,13 @@ impl Port { /// * `OpteError::MaxCapacity(_)` if the TCP flows table is full. /// * `ProcessError::TcpFlow(_)` if we do not have a valid transition from /// `Closed` based on the packet state. - fn create_new_tcp_entry( + fn create_new_tcp_entry( &self, tcp_flows: &mut FlowTable, - tcp: &TcpMeta, + tcp: &impl TcpRef, dir: &TcpDirection, pkt_len: u64, - ) -> result::Result { + ) -> result::Result { // Create a new entry and find its current state. In // this case it should always be `SynSent`, unless we're // recovering an `Established` flow. @@ -1559,31 +2116,36 @@ impl Port { let (ufid_out, tfes) = match *dir { TcpDirection::In { ufid_in, ufid_out } => ( ufid_out, - TcpFlowEntryState::new_inbound(*ufid_in, tfs, pkt_len), + TcpFlowEntryState::new_inbound( + *ufid_out, *ufid_in, tfs, pkt_len, + ), + ), + TcpDirection::Out { ufid_out } => ( + ufid_out, + TcpFlowEntryState::new_outbound(*ufid_out, tfs, pkt_len), ), - TcpDirection::Out { ufid_out } => { - (ufid_out, TcpFlowEntryState::new_outbound(tfs, pkt_len)) - } }; - match tcp_flows.add(*ufid_out, tfes) { - Ok(_) => {} + match tcp_flows.add_and_return(*ufid_out, tfes) { + Ok(entry) => Ok(TcpMaybeClosed::NewState(tcp_state, entry)), Err(OpteError::MaxCapacity(limit)) => { - return Err(ProcessError::FlowTableFull { - kind: "TCP", - limit, - }); + Err(ProcessError::FlowTableFull { kind: "TCP", limit }) } Err(_) => unreachable!( "Cannot return other errors from FlowTable::add" ), - }; + } + } else { + Ok(TcpMaybeClosed::Closed { + ufid_inbound: match *dir { + TcpDirection::In { ufid_in, .. } => Some(*ufid_in), + TcpDirection::Out { .. } => None, + }, + }) } - - Ok(tcp_state) } /// Attempts to lookup and update TCP flowstate in response to a given - /// packet. + /// packet from within the slowpath. /// /// Unexpected TCP segments on existing connections will be allowed, /// but will fire DTrace probes via `Self::tcp_err_probe`. @@ -1597,65 +2159,47 @@ impl Port { /// (e.g. `process_out_tcp_existing`) should respond to `NewFlow` by creating /// a new TCP flow table entry. Where possible, this should be done by treating /// a packet as a UFT miss (e.g., `process_out_miss`) and reprocessing the flow. - fn update_tcp_entry( + fn update_tcp_entry( &self, - mut data: PortDataOrSubset, - tcp: &TcpMeta, + data: &mut PortData, + tcp: &impl TcpRef, dir: &TcpDirection, pkt_len: u64, ) -> result::Result { - let tcp_flows = data.tcp_flows(); let (ufid_out, ufid_in) = match *dir { TcpDirection::In { ufid_in, ufid_out } => (ufid_out, Some(ufid_in)), TcpDirection::Out { ufid_out } => (ufid_out, None), }; - let Some(entry) = tcp_flows.get_mut(ufid_out) else { + let Some(entry) = data.tcp_flows.get(ufid_out) else { return Err(ProcessError::MissingFlow(*ufid_out)); }; + let entry = entry.clone(); entry.hit(); - let tfes = entry.state_mut(); - match *dir { - TcpDirection::In { .. } => { - tfes.segs_in += 1; - tfes.bytes_in += pkt_len; - } - TcpDirection::Out { .. } => { - tfes.segs_out += 1; - tfes.bytes_out += pkt_len; - } - } + let tfes_base = entry.state(); - let next_state = tfes.tcp_state.process( + let next_state = tfes_base.update( self.name_cstr.as_c_str(), - dir.dir(), - ufid_out, tcp, + dir.dir(), + pkt_len, + ufid_in, ); - if let Some(ufid_in) = ufid_in { - // We need to store the UFID of the inbound packet - // before it was processed so that we can retire the - // correct UFT/LFT entries upon connection - // termination. - tfes.inbound_ufid = Some(*ufid_in); - } - let ufid_inbound = if matches!( next_state, Ok(TcpState::Closed) | Err(TcpFlowStateError::NewFlow { .. }) ) { // Due to order of operations, out_tcp_existing must // call uft_tcp_closed separately. - let entry = tcp_flows.remove(ufid_out).unwrap(); - let state_ufid = entry.state().inbound_ufid; + let entry = data.tcp_flows.remove(ufid_out).unwrap(); + let lock = entry.state().inner.lock(); + let state_ufid = lock.inbound_ufid; - if let PortDataOrSubset::Port(data) = data { - // The inbound side of the UFT is based on - // the network-side of the flow (pre-processing). - self.uft_tcp_closed(data, ufid_out, state_ufid.as_ref()); - } + // The inbound side of the UFT is based on + // the network-side of the flow (pre-processing). + self.uft_tcp_closed(data, ufid_out, state_ufid.as_ref()); ufid_in.copied().or(state_ufid) } else { @@ -1678,7 +2222,7 @@ impl Port { Ok(match next_state { TcpState::Closed => TcpMaybeClosed::Closed { ufid_inbound }, - a => TcpMaybeClosed::NewState(a), + a => TcpMaybeClosed::NewState(a, entry), }) } @@ -1687,10 +2231,10 @@ impl Port { fn process_in_tcp( &self, data: &mut PortData, - pmeta: &PacketMeta, + pmeta: &MblkPacketData, ufid_in: &InnerFlowId, pkt_len: u64, - ) -> result::Result { + ) -> result::Result { // All TCP flows are keyed with respect to the outbound Flow // ID, therefore we mirror the flow. This value must represent // the guest-side of the flow and thus come from the passed-in @@ -1706,28 +2250,19 @@ impl Port { let dir = TcpDirection::In { ufid_in, ufid_out: &ufid_out }; - match self.update_tcp_entry( - PortDataOrSubset::Port(data), - tcp, - &dir, - pkt_len, - ) { + match self.update_tcp_entry(data, tcp, &dir, pkt_len) { // We need to create a new TCP entry here because we can't call // `process_in_miss` on the already-modified packet. - e @ Err( + Err( ProcessError::TcpFlow(TcpFlowStateError::NewFlow { .. }) | ProcessError::MissingFlow(_), - ) => { - self.create_new_tcp_entry( - &mut data.tcp_flows, - tcp, - &dir, - pkt_len, - )?; - e.map(Into::into) - } - Ok(v) => Ok(v.into()), - Err(e) => Err(e), + ) => self.create_new_tcp_entry( + &mut data.tcp_flows, + tcp, + &dir, + pkt_len, + ), + v => v, } } @@ -1735,10 +2270,10 @@ impl Port { &self, data: &mut PortData, epoch: u64, - pkt: &mut Packet, + pkt: &mut Packet, ufid_in: &InnerFlowId, ameta: &mut ActionMeta, - ) -> result::Result { + ) -> result::Result { use Direction::In; data.stats.vals.in_uft_miss += 1; @@ -1749,22 +2284,22 @@ impl Port { // If there is no flow ID, then do not create a UFT // entry. if *ufid_in == FLOW_ID_DEFAULT { - return Ok(ProcessResult::Modified); + return Ok(InternalProcessResult::Modified); } } Ok(LayerResult::Deny { name, reason }) => { - return Ok(ProcessResult::Drop { + return Ok(InternalProcessResult::Drop { reason: DropReason::Layer { name, reason }, }) } Ok(LayerResult::Hairpin(hppkt)) => { - return Ok(ProcessResult::Hairpin(hppkt)) + return Ok(InternalProcessResult::Hairpin(hppkt)) } Ok(LayerResult::HandlePkt) => { - return Ok(ProcessResult::from(self.net.handle_pkt( + return Ok(InternalProcessResult::from(self.net.handle_pkt( In, pkt, &data.uft_in, @@ -1776,17 +2311,24 @@ impl Port { } let ufid_out = pkt.flow().mirror(); - let hte = UftEntry { pair: Some(ufid_out), xforms, epoch }; + let mut hte = UftEntry { + pair: KMutex::new(Some(ufid_out), KMutexType::Spin), + xforms: xforms.compile(pkt.checksums_dirty()), + epoch, + l4_hash: ufid_in.crc32(), + tcp_flow: None, + }; // Keep around the comment on the `None` arm #[allow(clippy::single_match)] - match data.uft_out.get_mut(&ufid_out) { + match data.uft_out.get(&ufid_out) { // If an outbound packet has already created an outbound // UFT entry, make sure to pair it to this inbound entry. Some(out_entry) => { // Remember, the inbound UFID is the flow as seen by // the network, before any processing is done by OPTE. - out_entry.state_mut().pair = Some(*ufid_in); + + *out_entry.state().pair.lock() = Some(*ufid_in); } // Ideally we would simulate the outbound flow if no @@ -1810,17 +2352,16 @@ impl Port { ufid_in, pkt.len() as u64, ) { - Ok(TcpState::Closed) => Ok(ProcessResult::Modified), + Ok(TcpMaybeClosed::Closed { .. }) => { + Ok(InternalProcessResult::Modified) + } // Found existing TCP flow, or have just created a new one. - Ok(_) - | Err(ProcessError::TcpFlow(TcpFlowStateError::NewFlow { - .. - })) - | Err(ProcessError::MissingFlow(_)) => { + Ok(TcpMaybeClosed::NewState(_, flow)) => { // We have a good TCP flow, create a new UFT entry. + hte.tcp_flow = Some(flow); match data.uft_in.add(*ufid_in, hte) { - Ok(_) => Ok(ProcessResult::Modified), + Ok(_) => Ok(InternalProcessResult::Modified), Err(OpteError::MaxCapacity(limit)) => { Err(ProcessError::FlowTableFull { kind: "UFT", @@ -1839,12 +2380,16 @@ impl Port { Err(ProcessError::TcpFlow(err)) => { let e = format!("{err}"); self.tcp_err(&data.tcp_flows, Direction::In, e, pkt); - Ok(ProcessResult::Drop { reason: DropReason::TcpErr }) + Ok(InternalProcessResult::Drop { + reason: DropReason::TcpErr, + }) } Err(ProcessError::FlowTableFull { kind, limit }) => { let e = format!("{kind} flow table full ({limit} entries)"); self.tcp_err(&data.tcp_flows, Direction::In, e, pkt); - Ok(ProcessResult::Drop { reason: DropReason::TcpErr }) + Ok(InternalProcessResult::Drop { + reason: DropReason::TcpErr, + }) } res => unreachable!( "Cannot return other errors from \ @@ -1853,7 +2398,7 @@ impl Port { } } else { match data.uft_in.add(*ufid_in, hte) { - Ok(_) => Ok(ProcessResult::Modified), + Ok(_) => Ok(InternalProcessResult::Modified), Err(OpteError::MaxCapacity(limit)) => { Err(ProcessError::FlowTableFull { kind: "UFT", limit }) } @@ -1880,7 +2425,7 @@ impl Port { self.name_cstr.as_ptr() as uintptr_t, ufid, epoch as uintptr_t, - last_hit.raw_millis().unwrap_or_default() as usize + last_hit.raw_millis() as usize ); } } else if #[cfg(feature = "usdt")] { @@ -1895,192 +2440,28 @@ impl Port { } } - fn process_in( - &self, - data: &mut PortData, - epoch: u64, - pkt: &mut Packet, - ufid_in: &InnerFlowId, - ameta: &mut ActionMeta, - ) -> result::Result { - use Direction::In; - - // Use the compiled UFT entry if one exists. Otherwise - // fallback to layer processing. - match data.uft_in.get_mut(ufid_in) { - Some(entry) if entry.state().epoch == epoch => { - // TODO At the moment I'm holding the UFT locks not - // just for lookup, but for the entire duration of - // processing. It might be better to ht.clone() or - // Arc; that way we only hold the lock - // for lookup. - entry.hit(); - data.stats.vals.in_uft_hit += 1; - self.uft_hit_probe(In, pkt.flow(), epoch, entry.last_hit()); - - for ht in &entry.state().xforms.hdr { - pkt.hdr_transform(ht)?; - } - - for bt in &entry.state().xforms.body { - pkt.body_transform(In, &**bt)?; - } - - // For inbound traffic the TCP flow table must be - // checked _after_ processing take place. - if pkt.meta().is_inner_tcp() { - match self.process_in_tcp( - data, - pkt.meta(), - ufid_in, - pkt.len() as u64, - ) { - Ok(_) => return Ok(ProcessResult::Modified), - Err(ProcessError::TcpFlow( - e @ TcpFlowStateError::NewFlow { .. }, - )) => { - self.tcp_err( - &data.tcp_flows, - In, - e.to_string(), - pkt, - ); - // We cant redo processing here like we can in `process_out`: - // we already modified the packet to check TCP state. - // However, we *have* deleted and replaced the TCP FSM and - // removed the UFT. The next packet on this flow (SYN-ACK) will - // create the UFT, reference the existing TCP flow, and increment - // all other layers' stats. - return Ok(ProcessResult::Modified); - } - Err(ProcessError::MissingFlow(flow_id)) => { - let e = format!("Missing TCP flow ID: {flow_id}"); - self.tcp_err( - &data.tcp_flows, - Direction::In, - e, - pkt, - ); - // If we have a UFT but no TCP flow ID, there is likely a bug - // and we are now out of sync. As above we can't reprocess, - // but we have regenerated the TCP entry to be less disruptive - // than a drop. Remove the UFT entry on the same proviso since the - // next packet to use it will regenerate it. - self.uft_invalidate( - data, - None, - Some(ufid_in), - epoch, - ); - return Ok(ProcessResult::Modified); - } - Err(ProcessError::TcpFlow( - e @ TcpFlowStateError::UnexpectedSegment { .. }, - )) => { - // Technically unreachable, as we filter these out in `update_tcp_entry`. - // Panicking here would probably be overly fragile, however. - self.tcp_err( - &data.tcp_flows, - Direction::In, - e.to_string(), - pkt, - ); - return Ok(ProcessResult::Drop { - reason: DropReason::TcpErr, - }); - } - Err(ProcessError::FlowTableFull { kind, limit }) => { - let e = format!( - "{kind} flow table full ({limit} entries)" - ); - self.tcp_err( - &data.tcp_flows, - Direction::In, - e, - pkt, - ); - return Ok(ProcessResult::Drop { - reason: DropReason::TcpErr, - }); - } - _ => unreachable!( - "Cannot return other errors from process_in_tcp" - ), - } - } else { - return Ok(ProcessResult::Modified); - } - } - - // The entry is from a previous epoch; invalidate its UFT - // entries and proceed to rule processing. - Some(entry) => { - let epoch = entry.state().epoch; - let ufid_in = Some(ufid_in); - let ufid_out = entry.state().pair; - self.uft_invalidate(data, ufid_out.as_ref(), ufid_in, epoch); - } - - // There is no entry; proceed to rule processing; - None => (), - }; - - self.process_in_miss(data, epoch, pkt, ufid_in, ameta) - } - - // Process the TCP packet for the purposes of connection tracking - // when an outbound UFT entry exists. - fn process_out_tcp_existing( - &self, - tcp_flows: &mut FlowTable, - ufid_out: &InnerFlowId, - pmeta: &PacketMeta, - pkt_len: u64, - ) -> result::Result { - let tcp = pmeta.inner_tcp().unwrap(); - self.update_tcp_entry( - PortDataOrSubset::Tcp(tcp_flows), - tcp, - &TcpDirection::Out { ufid_out }, - pkt_len, - ) - } - // Process the TCP packet for the purposes of connection tracking // when an outbound UFT entry was just created. fn process_out_tcp_new( &self, data: &mut PortData, ufid_out: &InnerFlowId, - pmeta: &PacketMeta, + pmeta: &MblkPacketData, pkt_len: u64, ) -> result::Result { let tcp = pmeta.inner_tcp().unwrap(); let dir = TcpDirection::Out { ufid_out }; - match self.update_tcp_entry( - PortDataOrSubset::Port(data), - tcp, - &dir, - pkt_len, - ) { + match self.update_tcp_entry(data, tcp, &dir, pkt_len) { Err( ProcessError::TcpFlow(TcpFlowStateError::NewFlow { .. }) | ProcessError::MissingFlow(_), - ) => match self.create_new_tcp_entry( + ) => self.create_new_tcp_entry( &mut data.tcp_flows, tcp, &dir, pkt_len, - ) { - // Note: don't need to remove on this case, as create_new_tcp_entry - // will only insert to the map if state != Closed. - Ok(TcpState::Closed) => { - Ok(TcpMaybeClosed::Closed { ufid_inbound: None }) - } - Ok(a) => Ok(TcpMaybeClosed::NewState(a)), - Err(e) => Err(e), - }, + ), other => other, } } @@ -2089,9 +2470,9 @@ impl Port { &self, data: &mut PortData, epoch: u64, - pkt: &mut Packet, + pkt: &mut Packet, ameta: &mut ActionMeta, - ) -> result::Result { + ) -> result::Result { use Direction::Out; data.stats.vals.out_uft_miss += 1; @@ -2099,7 +2480,7 @@ impl Port { // For outbound traffic the TCP flow table must be checked // _before_ processing take place. - if pkt.meta().is_inner_tcp() { + let tcp_flow = if pkt.meta().is_inner_tcp() { match self.process_out_tcp_new( data, pkt.flow(), @@ -2113,10 +2494,11 @@ impl Port { pkt.flow(), ufid_inbound.as_ref(), ); + None } // Continue with processing. - Ok(_) => (), + Ok(TcpMaybeClosed::NewState (_, flow)) => Some(flow), // Unlike for existing flows, we don't allow through // unexpected packets here for now -- the `TcpState` FSM @@ -2124,21 +2506,21 @@ impl Port { Err(ProcessError::TcpFlow(err)) => { let e = format!("{err}"); self.tcp_err(&data.tcp_flows, Out, e, pkt); - return Ok(ProcessResult::Drop { + return Ok(InternalProcessResult::Drop { reason: DropReason::TcpErr, }); } Err(ProcessError::MissingFlow(flow_id)) => { let e = format!("Missing TCP flow ID: {flow_id}"); self.tcp_err(&data.tcp_flows, Direction::In, e, pkt); - return Ok(ProcessResult::Drop { + return Ok(InternalProcessResult::Drop { reason: DropReason::TcpErr, }); } Err(ProcessError::FlowTableFull { kind, limit }) => { let e = format!("{kind} flow table full ({limit} entries)"); self.tcp_err(&data.tcp_flows, Direction::In, e, pkt); - return Ok(ProcessResult::Drop { + return Ok(InternalProcessResult::Drop { reason: DropReason::TcpErr, }); } @@ -2146,21 +2528,30 @@ impl Port { "Cannot return other errors from process_in_tcp_new, returned: {res:?}" ), } - } + } else { + None + }; let mut xforms = Transforms::new(); let flow_before = *pkt.flow(); let res = self.layers_process(data, Out, pkt, &mut xforms, ameta); - let hte = UftEntry { pair: None, xforms, epoch }; + + let hte = UftEntry { + pair: KMutex::new(None, KMutexType::Spin), + xforms: xforms.compile(pkt.checksums_dirty()), + epoch, + l4_hash: flow_before.crc32(), + tcp_flow, + }; match res { Ok(LayerResult::Allow) => { // If there is no Flow ID, then there is no UFT entry. if flow_before == FLOW_ID_DEFAULT || tcp_closed { - return Ok(ProcessResult::Modified); + return Ok(InternalProcessResult::Modified); } match data.uft_out.add(flow_before, hte) { - Ok(_) => Ok(ProcessResult::Modified), + Ok(_) => Ok(InternalProcessResult::Modified), Err(OpteError::MaxCapacity(limit)) => { Err(ProcessError::FlowTableFull { kind: "UFT", limit }) } @@ -2171,14 +2562,16 @@ impl Port { } Ok(LayerResult::Hairpin(hppkt)) => { - Ok(ProcessResult::Hairpin(hppkt)) + Ok(InternalProcessResult::Hairpin(hppkt)) } - Ok(LayerResult::Deny { name, reason }) => Ok(ProcessResult::Drop { - reason: DropReason::Layer { name, reason }, - }), + Ok(LayerResult::Deny { name, reason }) => { + Ok(InternalProcessResult::Drop { + reason: DropReason::Layer { name, reason }, + }) + } - Ok(LayerResult::HandlePkt) => Ok(ProcessResult::from( + Ok(LayerResult::HandlePkt) => Ok(InternalProcessResult::from( self.net.handle_pkt(Out, pkt, &data.uft_in, &data.uft_out)?, )), @@ -2186,143 +2579,6 @@ impl Port { } } - fn process_out( - &self, - data: &mut PortData, - epoch: u64, - pkt: &mut Packet, - ameta: &mut ActionMeta, - ) -> result::Result { - use Direction::Out; - - let uft_out = &mut data.uft_out; - - // Use the compiled UFT entry if one exists. Otherwise - // fallback to layer processing. - match uft_out.get_mut(pkt.flow()) { - Some(entry) if entry.state().epoch == epoch => { - entry.hit(); - data.stats.vals.out_uft_hit += 1; - self.uft_hit_probe(Out, pkt.flow(), epoch, entry.last_hit()); - - let mut invalidated = false; - let mut reprocess = false; - let mut ufid_in = None; - - // For outbound traffic the TCP flow table must be - // checked _before_ processing take place. - if pkt.meta().is_inner_tcp() { - match self.process_out_tcp_existing( - &mut data.tcp_flows, - pkt.flow(), - pkt.meta(), - pkt.len() as u64, - ) { - // Continue with processing. - Ok(TcpMaybeClosed::NewState(_)) => (), - - Ok(TcpMaybeClosed::Closed { ufid_inbound }) => { - invalidated = true; - ufid_in = ufid_inbound; - } - - Err(ProcessError::TcpFlow( - e @ TcpFlowStateError::NewFlow { .. }, - )) => { - invalidated = true; - reprocess = true; - self.tcp_err( - &data.tcp_flows, - Out, - e.to_string(), - pkt, - ); - } - - Err(ProcessError::MissingFlow(flow_id)) => { - // If we have a UFT but no TCP flow ID, there is likely a bug - // and we are now out of sync. A full reprocess will be - // slower for this packet but will sync up the tables again. - invalidated = true; - reprocess = true; - let e = format!("Missing TCP flow ID: {flow_id}"); - self.tcp_err( - &data.tcp_flows, - Direction::In, - e, - pkt, - ); - } - - Err(ProcessError::TcpFlow( - e @ TcpFlowStateError::UnexpectedSegment { .. }, - )) => { - // Technically unreachable, as we filter these out in `update_tcp_entry`. - // Panicking here would probably be overly fragile, however. - self.tcp_err( - &data.tcp_flows, - Direction::In, - e.to_string(), - pkt, - ); - return Ok(ProcessResult::Drop { - reason: DropReason::TcpErr, - }); - } - - _ => unreachable!( - "Cannot return other errors from process_in_tcp_new" - ), - } - } - - let flow_to_invalidate = invalidated.then(|| *pkt.flow()); - - // If we suspect this is a new flow, we need to not perform - // existing transforms if we're going to behave as though we - // have a UFT miss. - if !reprocess { - for ht in &entry.state().xforms.hdr { - pkt.hdr_transform(ht)?; - } - - for bt in &entry.state().xforms.body { - pkt.body_transform(Out, &**bt)?; - } - - // Due to borrowing constraints from order of operations, we have - // to remove the UFT entry here rather than in `update_tcp_entry`. - // The TCP entry itself is already removed. - if let Some(flow_before) = flow_to_invalidate { - self.uft_tcp_closed( - data, - &flow_before, - ufid_in.as_ref(), - ); - } - - return Ok(ProcessResult::Modified); - } else if let Some(flow_before) = flow_to_invalidate { - self.uft_tcp_closed(data, &flow_before, ufid_in.as_ref()); - } - } - - // The entry is from a previous epoch; invalidate its UFT - // entries and proceed to rule processing. - Some(entry) => { - let epoch = entry.state().epoch; - let ufid_out = Some(pkt.flow()); - let ufid_in = entry.state().pair; - self.uft_invalidate(data, ufid_out, ufid_in.as_ref(), epoch); - } - - // There is no entry; proceed to layer processing. - None => (), - } - - self.process_out_miss(data, epoch, pkt, ameta) - } - fn uft_invalidate( &self, data: &mut PortData, @@ -2407,12 +2663,10 @@ impl Port { fn update_stats_in( stats: &mut PortStats, - res: &result::Result, + res: &result::Result, ) { match res { - Ok(ProcessResult::Bypass) => stats.in_bypass += 1, - - Ok(ProcessResult::Drop { reason }) => { + Ok(InternalProcessResult::Drop { reason }) => { stats.in_drop += 1; match reason { @@ -2422,9 +2676,9 @@ impl Port { } } - Ok(ProcessResult::Modified) => stats.in_modified += 1, + Ok(InternalProcessResult::Modified) => stats.in_modified += 1, - Ok(ProcessResult::Hairpin(_)) => stats.in_hairpin += 1, + Ok(InternalProcessResult::Hairpin(_)) => stats.in_hairpin += 1, // XXX We should split the different error types out into // individual stats. However, I'm not sure exactly how I @@ -2440,12 +2694,10 @@ impl Port { fn update_stats_out( stats: &mut PortStats, - res: &result::Result, + res: &result::Result, ) { match res { - Ok(ProcessResult::Bypass) => stats.out_bypass += 1, - - Ok(ProcessResult::Drop { reason }) => { + Ok(InternalProcessResult::Drop { reason }) => { stats.out_drop += 1; match reason { @@ -2455,9 +2707,9 @@ impl Port { } } - Ok(ProcessResult::Modified) => stats.out_modified += 1, + Ok(InternalProcessResult::Modified) => stats.out_modified += 1, - Ok(ProcessResult::Hairpin(_)) => stats.out_hairpin += 1, + Ok(InternalProcessResult::Hairpin(_)) => stats.out_hairpin += 1, // XXX We should split the different error types out into // individual stats. However, I'm not sure exactly how I @@ -2524,22 +2776,6 @@ impl Port { } } -/// Helper enum used to delay UFT entry removal in case of -/// `tcp_out_existing`. -enum PortDataOrSubset<'a> { - Port(&'a mut PortData), - Tcp(&'a mut FlowTable), -} - -impl<'a> PortDataOrSubset<'a> { - fn tcp_flows(&mut self) -> &mut FlowTable { - match self { - Self::Port(p) => &mut p.tcp_flows, - Self::Tcp(t) => t, - } - } -} - /// Helper enum for encoding what UFIDs are available when /// updating TCP flow state. enum TcpDirection<'a> { @@ -2547,7 +2783,7 @@ enum TcpDirection<'a> { Out { ufid_out: &'a InnerFlowId }, } -impl<'a> TcpDirection<'a> { +impl TcpDirection<'_> { fn dir(&self) -> Direction { match self { Self::In { .. } => Direction::In, @@ -2573,7 +2809,10 @@ pub enum Pos { /// An entry in the TCP flow table. #[derive(Clone, Debug)] -pub struct TcpFlowEntryState { +pub struct TcpFlowEntryStateInner { + // We store this for the benefit of inbound flows who have UFTs + // but which need to know their partner UFID to perform an invalidation. + outbound_ufid: InnerFlowId, // This must be the UFID of inbound traffic _as it arrives_ from // the network, not after it's processed. inbound_ufid: Option, @@ -2584,35 +2823,102 @@ pub struct TcpFlowEntryState { bytes_out: u64, } +pub struct TcpFlowEntryState { + inner: KMutex, +} + impl TcpFlowEntryState { fn new_inbound( + outbound_ufid: InnerFlowId, inbound_ufid: InnerFlowId, tcp_state: TcpFlowState, bytes_in: u64, ) -> Self { Self { - inbound_ufid: Some(inbound_ufid), - tcp_state, - segs_in: 1, - segs_out: 0, - bytes_in, - bytes_out: 0, + inner: KMutex::new( + TcpFlowEntryStateInner { + outbound_ufid, + inbound_ufid: Some(inbound_ufid), + tcp_state, + segs_in: 1, + segs_out: 0, + bytes_in, + bytes_out: 0, + }, + KMutexType::Spin, + ), } } - fn new_outbound(tcp_state: TcpFlowState, bytes_out: u64) -> Self { + fn new_outbound( + outbound_ufid: InnerFlowId, + tcp_state: TcpFlowState, + bytes_out: u64, + ) -> Self { Self { - inbound_ufid: None, - tcp_state, - segs_in: 0, - segs_out: 1, - bytes_in: 0, - bytes_out, + inner: KMutex::new( + TcpFlowEntryStateInner { + outbound_ufid, + inbound_ufid: None, + tcp_state, + segs_in: 0, + segs_out: 1, + bytes_in: 0, + bytes_out, + }, + KMutexType::Spin, + ), + } + } + + fn tcp_state(&self) -> TcpState { + let lock = self.inner.lock(); + lock.tcp_state.tcp_state() + } + + #[inline(always)] + fn update( + &self, + port_name: &CStr, + tcp: &impl TcpRef, + dir: Direction, + pkt_len: u64, + ufid_in: Option<&InnerFlowId>, + ) -> result::Result { + let mut tfes = self.inner.lock(); + match dir { + Direction::In { .. } => { + tfes.segs_in += 1; + tfes.bytes_in += pkt_len; + } + Direction::Out { .. } => { + tfes.segs_out += 1; + tfes.bytes_out += pkt_len; + } + } + + if let Some(ufid_in) = ufid_in { + // We need to store the UFID of the inbound packet + // before it was processed so that we can retire the + // correct UFT/LFT entries upon connection + // termination. + tfes.inbound_ufid = Some(*ufid_in); } + let ufid_out = tfes.outbound_ufid; + let tcp_state = &mut tfes.tcp_state; + + tcp_state.process(port_name, dir, &ufid_out, tcp) } } -impl Display for TcpFlowEntryState { +impl core::fmt::Debug for TcpFlowEntryState { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let inner = self.inner.lock(); + core::fmt::Debug::fmt(&*inner, f) + } +} + +impl Display for TcpFlowEntryStateInner { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match &self.inbound_ufid { None => write!(f, "None {}", self.tcp_state), @@ -2621,7 +2927,14 @@ impl Display for TcpFlowEntryState { } } -impl Dump for TcpFlowEntryState { +impl Display for TcpFlowEntryState { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let inner = self.inner.lock(); + Display::fmt(&*inner, f) + } +} + +impl Dump for TcpFlowEntryStateInner { type DumpVal = TcpFlowEntryDump; fn dump(&self, hits: u64) -> TcpFlowEntryDump { @@ -2637,6 +2950,15 @@ impl Dump for TcpFlowEntryState { } } +impl Dump for TcpFlowEntryState { + type DumpVal = TcpFlowEntryDump; + + fn dump(&self, hits: u64) -> TcpFlowEntryDump { + let inner = self.inner.lock(); + inner.dump(hits) + } +} + /// Expiry behaviour for TCP flows dependent on the connection FSM. #[derive(Debug)] pub struct TcpExpiry { @@ -2659,11 +2981,11 @@ impl ExpiryPolicy for TcpExpiry { entry: &FlowEntry, now: Moment, ) -> bool { - let ttl = match entry.state().tcp_state.tcp_state() { + let ttl = match entry.state().tcp_state() { TcpState::TimeWait => self.time_wait_ttl, _ => self.keepalive_ttl, }; - ttl.is_expired(*entry.last_hit(), now) + ttl.is_expired(entry.last_hit(), now) } } @@ -2685,6 +3007,7 @@ extern "C" { pkt: uintptr_t, hp_pkt: uintptr_t, err_b: *const LabelBlock<2>, + path: uintptr_t, ); pub fn __dtrace_probe_tcp__err( dir: uintptr_t, diff --git a/lib/opte/src/engine/predicate.rs b/lib/opte/src/engine/predicate.rs index fd69ae51..141d5b4c 100644 --- a/lib/opte/src/engine/predicate.rs +++ b/lib/opte/src/engine/predicate.rs @@ -2,25 +2,26 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2024 Oxide Computer Company //! Predicates used for `Rule` matching. use super::dhcp::MessageType as DhcpMessageType; use super::dhcpv6::MessageType as Dhcpv6MessageType; use super::ether::EtherType; -use super::headers::IpMeta; +use super::ether::EthernetRef; use super::icmp::v4::MessageType as IcmpMessageType; use super::icmp::v6::MessageType as Icmpv6MessageType; -use super::ip4::Ipv4Addr; -use super::ip4::Ipv4Cidr; -use super::ip4::Ipv4Meta; -use super::ip4::Protocol; -use super::ip6::Ipv6Addr; -use super::ip6::Ipv6Cidr; -use super::ip6::Ipv6Meta; -use super::packet::PacketMeta; -use super::packet::PacketRead; +use super::ip::v4::Ipv4Addr; +use super::ip::v4::Ipv4Cidr; +use super::ip::v4::Ipv4Ref; +use super::ip::v4::Protocol; +use super::ip::v6::v6_get_next_header; +use super::ip::v6::Ipv6Addr; +use super::ip::v6::Ipv6Cidr; +use super::ip::v6::Ipv6Ref; +use super::ip::L3; +use super::packet::MblkPacketData; use super::port::meta::ActionMeta; use alloc::boxed::Box; use alloc::string::String; @@ -29,6 +30,8 @@ use alloc::vec::Vec; use core::fmt; use core::fmt::Display; use core::ops::RangeInclusive; +use ingot::icmp::IcmpV4Ref; +use ingot::icmp::IcmpV6Ref; use opte_api::MacAddr; use serde::Deserialize; use serde::Serialize; @@ -352,7 +355,7 @@ impl Display for Predicate { impl Predicate { pub(crate) fn is_match( &self, - meta: &PacketMeta, + meta: &MblkPacketData, action_meta: &ActionMeta, ) -> bool { match self { @@ -368,7 +371,9 @@ impl Predicate { Self::InnerEtherType(list) => { for m in list { - if m.matches(meta.inner.ether.ether_type) { + if m.matches(EtherType::from( + meta.inner_ether().ethertype().0, + )) { return true; } } @@ -376,7 +381,7 @@ impl Predicate { Self::InnerEtherDst(list) => { for m in list { - if m.matches(meta.inner.ether.dst) { + if m.matches(meta.inner_ether().destination()) { return true; } } @@ -384,16 +389,18 @@ impl Predicate { Self::InnerEtherSrc(list) => { for m in list { - if m.matches(meta.inner.ether.src) { + if m.matches(meta.inner_ether().source()) { return true; } } } - Self::InnerIpProto(list) => match meta.inner.ip { + Self::InnerIpProto(list) => match meta.inner_l3() { None => return false, - Some(IpMeta::Ip4(Ipv4Meta { proto, .. })) => { + Some(L3::Ipv4(ipv4)) => { + let proto = Protocol::from(ipv4.protocol().0); + for m in list { if m.matches(proto) { return true; @@ -401,7 +408,13 @@ impl Predicate { } } - Some(IpMeta::Ip6(Ipv6Meta { proto, .. })) => { + Some(L3::Ipv6(ipv6)) => { + let proto = Protocol::from( + v6_get_next_header(ipv6) + .unwrap_or_else(|_| ipv6.next_header()) + .0, + ); + for m in list { if m.matches(proto) { return true; @@ -410,8 +423,9 @@ impl Predicate { } }, - Self::InnerSrcIp4(list) => match meta.inner.ip { - Some(IpMeta::Ip4(Ipv4Meta { src: ip, .. })) => { + Self::InnerSrcIp4(list) => match meta.inner_ip4() { + Some(v4) => { + let ip = v4.source(); for m in list { if m.matches(ip) { return true; @@ -424,8 +438,9 @@ impl Predicate { _ => return false, }, - Self::InnerDstIp4(list) => match meta.inner.ip { - Some(IpMeta::Ip4(Ipv4Meta { dst: ip, .. })) => { + Self::InnerDstIp4(list) => match meta.inner_ip4() { + Some(v4) => { + let ip = v4.destination(); for m in list { if m.matches(ip) { return true; @@ -438,8 +453,9 @@ impl Predicate { _ => return false, }, - Self::InnerSrcIp6(list) => match meta.inner.ip { - Some(IpMeta::Ip6(Ipv6Meta { src: ip, .. })) => { + Self::InnerSrcIp6(list) => match meta.inner_ip6() { + Some(v6) => { + let ip = v6.source(); for m in list { if m.matches(ip) { return true; @@ -449,8 +465,9 @@ impl Predicate { _ => return false, }, - Self::InnerDstIp6(list) => match meta.inner.ip { - Some(IpMeta::Ip6(Ipv6Meta { dst: ip, .. })) => { + Self::InnerDstIp6(list) => match meta.inner_ip6() { + Some(v6) => { + let ip = v6.destination(); for m in list { if m.matches(ip) { return true; @@ -461,11 +478,11 @@ impl Predicate { }, Self::InnerSrcPort(list) => { - match meta.inner.ulp.map(|m| m.src_port()) { + match meta.inner_ulp().and_then(|v| v.src_port()) { // No ULP metadata or no source port (e.g. ICMPv6). - None | Some(None) => return false, + None => return false, - Some(Some(port)) => { + Some(port) => { for m in list { if m.matches(port) { return true; @@ -476,11 +493,11 @@ impl Predicate { } Self::InnerDstPort(list) => { - match meta.inner.ulp.map(|m| m.dst_port()) { + match meta.inner_ulp().and_then(|v| v.dst_port()) { // No ULP metadata or no destination port (e.g. ICMPv6). - None | Some(None) => return false, + None => return false, - Some(Some(port)) => { + Some(port) => { for m in list { if m.matches(port) { return true; @@ -579,19 +596,13 @@ impl DataPredicate { // use `PacketMeta` to determine if there is a suitable payload to // be inspected. That is, if there is no metadata for a given // header, there is certainly no payload. - pub(crate) fn is_match<'a, 'b, R>( - &self, - meta: &PacketMeta, - rdr: &'b mut R, - ) -> bool - where - R: PacketRead<'a>, - { + pub(crate) fn is_match(&self, meta: &MblkPacketData) -> bool { match self { - Self::Not(pred) => !pred.is_match(meta, rdr), + Self::Not(pred) => !pred.is_match(meta), Self::DhcpMsgType(mt) => { - let bytes = rdr.copy_remaining(); + let bytes = meta.body(); + let pkt = match DhcpPacket::new_checked(&bytes) { Ok(v) => v, Err(e) => { @@ -602,6 +613,7 @@ impl DataPredicate { return false; } }; + let dhcp = match DhcpRepr::parse(&pkt) { Ok(v) => v, Err(e) => { @@ -620,7 +632,7 @@ impl DataPredicate { return false; }; - mt.is_match(&icmp.msg_type) + mt.is_match(&IcmpMessageType::from(icmp.ty())) } Self::Icmpv6MsgType(mt) => { @@ -629,18 +641,18 @@ impl DataPredicate { return false; }; - mt.is_match(&icmp6.msg_type) + mt.is_match(&Icmpv6MessageType::from(icmp6.ty())) } Self::Dhcpv6MsgType(mt) => { - if let Ok(buf) = rdr.slice(1) { - rdr.seek_back(1).expect("Failed to seek back"); - mt.is_match(&buf[0].into()) - } else { + let body = meta.body(); + if body.is_empty() { super::err!( "Failed to read DHCPv6 message type from packet" ); false + } else { + mt.is_match(&body[0].into()) } } } diff --git a/lib/opte/src/engine/rule.rs b/lib/opte/src/engine/rule.rs index 881d7fde..e4fa9936 100644 --- a/lib/opte/src/engine/rule.rs +++ b/lib/opte/src/engine/rule.rs @@ -8,27 +8,37 @@ use super::ether::EtherMeta; use super::ether::EtherMod; +use super::ether::Ethernet; +use super::ether::EthernetMut; +use super::ether::EthernetPacket; +use super::ether::ValidEthernet; use super::flow_table::StateSummary; -use super::headers::EncapMeta; use super::headers::EncapMod; use super::headers::EncapPush; use super::headers::HeaderAction; use super::headers::HeaderActionError; -use super::headers::IpMeta; use super::headers::IpMod; use super::headers::IpPush; +use super::headers::Transform; use super::headers::UlpHeaderAction; +use super::headers::UlpMetaModify; +use super::ip::v4::Ipv4Mut; +use super::ip::v6::v6_set_next_header; +use super::ip::v6::Ipv6Mut; +use super::ip::ValidL3; +use super::ip::L3; use super::packet::BodyTransform; -use super::packet::Initialized; use super::packet::InnerFlowId; +use super::packet::MblkFullParsed; +use super::packet::MblkPacketData; use super::packet::Packet; -use super::packet::PacketMeta; -use super::packet::PacketRead; -use super::packet::PacketReader; -use super::packet::Parsed; +use super::packet::PacketData; +use super::packet::Pullup; +use super::parse::ValidUlp; use super::port::meta::ActionMeta; use super::predicate::DataPredicate; use super::predicate::Predicate; +use crate::ddi::mblk::MsgBlk; use alloc::boxed::Box; use alloc::ffi::CString; use alloc::string::String; @@ -41,9 +51,20 @@ use core::fmt::Debug; use core::fmt::Display; use illumos_sys_hdrs::c_char; use illumos_sys_hdrs::uintptr_t; +use ingot::icmp::IcmpV4Mut; +use ingot::icmp::IcmpV4Ref; +use ingot::icmp::IcmpV6Mut; +use ingot::icmp::IcmpV6Ref; +use ingot::ip::IpProtocol; +use ingot::tcp::TcpFlags; +use ingot::tcp::TcpMut; +use ingot::types::InlineHeader; +use ingot::types::Read; +use ingot::udp::UdpMut; use opte_api::Direction; use serde::Deserialize; use serde::Serialize; +use zerocopy::ByteSliceMut; /// A marker trait indicating a type is an entry acuired from a [`Resource`]. pub trait ResourceEntry {} @@ -153,8 +174,8 @@ pub trait ActionDesc { fn gen_bt( &self, _dir: Direction, - _meta: &PacketMeta, - _payload_segs: &[&[u8]], + _meta: &MblkPacketData, + _payload_seg: &[u8], ) -> Result>, GenBtError> { Ok(None) } @@ -251,7 +272,7 @@ impl StaticAction for Identity { &self, _dir: Direction, _flow_id: &InnerFlowId, - _pkt_meta: &PacketMeta, + _pkt_meta: &MblkPacketData, _action_meta: &mut ActionMeta, ) -> GenHtResult { Ok(AllowOrDeny::Allow(HdrTransform::identity(&self.name))) @@ -277,13 +298,13 @@ pub enum ModifyAction { #[derive(Clone, Debug, Default, Deserialize, Serialize)] pub struct HdrTransform { pub name: String, - pub outer_ether: HeaderAction, - pub outer_ip: HeaderAction, - pub outer_encap: HeaderAction, - pub inner_ether: HeaderAction, - pub inner_ip: HeaderAction, + pub outer_ether: HeaderAction, + pub outer_ip: HeaderAction, + pub outer_encap: HeaderAction, + pub inner_ether: HeaderAction, + pub inner_ip: HeaderAction, // We don't support push/pop for inner_ulp. - pub inner_ulp: UlpHeaderAction, + pub inner_ulp: UlpHeaderAction, } impl StateSummary for Vec { @@ -298,6 +319,193 @@ impl Display for HdrTransform { } } +/// Header transformations matching a simple format, amenable +/// to fastpath compilation: +/// * Encap is either pushed or popped in its entirety, +/// * The inner packet is only modified, with no layers pushed or +/// popped. +/// * The packet action must be `Modified`. +#[derive(Clone, Debug, Deserialize, Serialize)] +pub struct CompiledTransform { + pub encap: CompiledEncap, + pub inner_ether: Option, + pub inner_ip: Option, + pub inner_ulp: Option, + pub checksums_dirty: bool, +} + +impl CompiledTransform { + #[inline(always)] + pub fn transform_ether( + &self, + ether: &mut ValidEthernet, + ) { + if let Some(ether_tx) = &self.inner_ether { + if let Some(new_src) = ðer_tx.src { + ether.set_source(*new_src); + } + if let Some(new_dst) = ðer_tx.dst { + ether.set_destination(*new_dst); + } + } + } + + #[inline(always)] + pub fn transform_l3(&self, l3: &mut ValidL3) { + match (l3, &self.inner_ip) { + (ValidL3::Ipv4(pkt), Some(IpMod::Ip4(tx))) => { + if let Some(new_src) = &tx.src { + pkt.set_source(*new_src); + } + if let Some(new_dst) = &tx.dst { + pkt.set_destination(*new_dst); + } + if let Some(new_proto) = &tx.proto { + pkt.set_protocol(IpProtocol(u8::from(*new_proto))); + } + } + (ValidL3::Ipv6(pkt), Some(IpMod::Ip6(tx))) => { + if let Some(new_src) = &tx.src { + pkt.set_source(*new_src); + } + if let Some(new_dst) = &tx.dst { + pkt.set_destination(*new_dst); + } + if let Some(new_proto) = &tx.proto { + let ipp = IpProtocol(u8::from(*new_proto)); + + // `expect`ing is too risky, but we know we won't fail + // here for two reasons: + // * We just succeeded at parsing. + // * Compiled transforms cannot perform *structural* + // changes to packets (incl. push/pop/modify EHs). + let _ = v6_set_next_header(ipp, pkt); + } + } + _ => {} + } + } + + #[inline(always)] + pub fn transform_ulp(&self, ulp: &mut ValidUlp) { + match (ulp, &self.inner_ulp) { + (ValidUlp::Tcp(pkt), Some(tx)) => { + if let Some(flags) = tx.tcp_flags { + pkt.set_flags(TcpFlags::from_bits_retain(flags)); + } + + if let Some(new_src) = &tx.generic.src_port { + pkt.set_source(*new_src); + } + + if let Some(new_dst) = &tx.generic.dst_port { + pkt.set_destination(*new_dst); + } + } + (ValidUlp::Udp(pkt), Some(tx)) => { + if let Some(new_src) = &tx.generic.src_port { + pkt.set_source(*new_src); + } + + if let Some(new_dst) = &tx.generic.dst_port { + pkt.set_destination(*new_dst); + } + } + (ValidUlp::IcmpV4(pkt), Some(tx)) + if pkt.ty() == 0 || pkt.ty() == 8 => + { + if let Some(new_id) = tx.icmp_id { + pkt.rest_of_hdr_mut()[..2] + .copy_from_slice(&new_id.to_be_bytes()) + } + } + (ValidUlp::IcmpV6(pkt), Some(tx)) + if pkt.ty() == 128 || pkt.ty() == 129 => + { + if let Some(new_id) = tx.icmp_id { + pkt.rest_of_hdr_mut()[..2] + .copy_from_slice(&new_id.to_be_bytes()) + } + } + _ => {} + } + } +} + +#[derive(Clone, Debug, Deserialize, Serialize)] +pub enum CompiledEncap { + Pop, + // TODO: can we cache these in an Arc'd buffer? + Push { + eth: EtherMeta, + ip: IpPush, + encap: EncapPush, + bytes: Vec, + l3_len_offset: usize, + l3_extra_bytes: usize, + l4_len_offset: usize, + encap_sz: usize, + }, +} + +impl CompiledEncap { + #[inline] + pub fn prepend(&self, mut pkt: MsgBlk, ulp_len: usize) -> MsgBlk { + let Self::Push { + ref bytes, + l3_len_offset, + l3_extra_bytes, + l4_len_offset, + encap_sz, + .. + } = self + else { + return pkt; + }; + + let mut prepend = if pkt.head_capacity() < bytes.len() { + let mut pkt = MsgBlk::new_ethernet(bytes.len()); + pkt.pop_all(); + Some(pkt) + } else { + None + }; + + let target = if let Some(prepend) = prepend.as_mut() { + prepend + } else { + &mut pkt + }; + + // Unwrap safety -- we either had enough bytes, or we just allocated them. + target.write_bytes_front(bytes).unwrap(); + + let l4_len = ulp_len + encap_sz; + let l3_len = l4_len + l3_extra_bytes; + + let l3_len_slot: &mut [u8; core::mem::size_of::()] = (&mut target + [*l3_len_offset..l3_len_offset + core::mem::size_of::()]) + .try_into() + .expect("exact no bytes"); + + *l3_len_slot = (l3_len as u16).to_be_bytes(); + + let l4_len_slot: &mut [u8; core::mem::size_of::()] = (&mut target + [*l4_len_offset..l4_len_offset + core::mem::size_of::()]) + .try_into() + .expect("exact no bytes"); + + *l4_len_slot = (l4_len as u16).to_be_bytes(); + + if let Some(mut prepend) = prepend { + prepend.append(pkt); + prepend + } else { + pkt + } + } +} + #[cfg(all(not(feature = "std"), not(test)))] extern "C" { pub fn __dtrace_probe_ht__run(arg: uintptr_t); @@ -365,34 +573,53 @@ impl HdrTransform { } /// Run this header transformation against the passed in - /// [`PacketMeta`], mutating it in place. + /// [`PacketData`], mutating it in place. + /// + /// Returns whether the inner checksum needs recomputed. /// /// # Errors /// /// If there is an [`HeaderAction::Modify`], but no metadata is /// present for that particular header, then a /// [`HdrTransformError::MissingHeader`] is returned. - pub fn run(&self, meta: &mut PacketMeta) -> Result<(), HdrTransformError> { + pub fn run( + &self, + meta: &mut PacketData, + ) -> Result + where + T::Chunk: ByteSliceMut, + { self.outer_ether - .run(&mut meta.outer.ether) + .act_on_option::>, _>( + &mut meta.headers.outer_eth, + ) .map_err(Self::err_fn("outer ether"))?; + self.outer_ip - .run(&mut meta.outer.ip) + .act_on_option::, _>(&mut meta.headers.outer_l3) .map_err(Self::err_fn("outer IP"))?; + self.outer_encap - .run(&mut meta.outer.encap) + .act_on_option(&mut meta.headers.outer_encap) .map_err(Self::err_fn("outer encap"))?; - // XXX A hack so that inner ethernet can meet the interface of - // `HeaderAction::run().` - let mut tmp = Some(meta.inner.ether); - self.inner_ether.run(&mut tmp).map_err(Self::err_fn("inner ether"))?; - meta.inner.ether = tmp.unwrap(); - self.inner_ip - .run(&mut meta.inner.ip) + + as Transform, _, _>>::act_on( + &mut meta.headers.inner_eth, + &self.inner_ether, + ) + .map_err(Self::err_fn("inner eth"))?; + + let l3_dirty = self + .inner_ip + .act_on_option::, _>(&mut meta.headers.inner_l3) .map_err(Self::err_fn("inner IP"))?; - self.inner_ulp - .run(&mut meta.inner.ulp) - .map_err(Self::err_fn("inner ULP")) + + let ulp_dirty = self + .inner_ulp + .run(&mut meta.headers.inner_ulp) + .map_err(Self::err_fn("inner ULP"))?; + + Ok(l3_dirty || ulp_dirty) } fn err_fn( @@ -403,6 +630,12 @@ impl HdrTransform { HeaderActionError::MissingHeader => { HdrTransformError::MissingHeader(header) } + HeaderActionError::CantPop => { + HdrTransformError::CantPop(header) + } + HeaderActionError::MalformedExtension => { + HdrTransformError::MalformedExtension(header) + } } } } @@ -411,6 +644,8 @@ impl HdrTransform { #[derive(Clone, Copy, Debug)] pub enum HdrTransformError { MissingHeader(&'static str), + CantPop(&'static str), + MalformedExtension(&'static str), } #[derive(Debug)] @@ -435,14 +670,14 @@ pub trait StatefulAction: Display { /// # Errors /// /// * [`GenDescError::ResourceExhausted`]: This action relies on a - /// dynamic resource which has been exhausted. + /// dynamic resource which has been exhausted. /// /// * [`GenDescError::Unexpected`]: This action encountered an - /// unexpected error while trying to generate a descriptor. + /// unexpected error while trying to generate a descriptor. fn gen_desc( &self, flow_id: &InnerFlowId, - pkt: &Packet, + pkt: &Packet, meta: &mut ActionMeta, ) -> GenDescResult; @@ -462,7 +697,7 @@ pub trait StaticAction: Display { &self, dir: Direction, flow_id: &InnerFlowId, - packet_meta: &PacketMeta, + packet_meta: &MblkPacketData, action_meta: &mut ActionMeta, ) -> GenHtResult; @@ -497,25 +732,18 @@ pub trait MetaAction: Display { #[derive(Debug)] pub enum GenErr { - BadPayload(super::packet::ReadErr), Malformed, MissingMeta, Unexpected(String), } -impl From for GenErr { - fn from(err: super::packet::ReadErr) -> Self { - Self::BadPayload(err) - } -} - impl From for GenErr { fn from(_err: smoltcp::wire::Error) -> Self { Self::Malformed } } -pub type GenPacketResult = ActionResult, GenErr>; +pub type GenPacketResult = ActionResult; /// An error while generating a [`BodyTransform`]. #[derive(Clone, Debug)] @@ -531,21 +759,17 @@ impl From for GenBtError { /// A hairpin action is one that generates a new packet based on the /// current inbound/outbound packet, and then "hairpins" that new -/// packet back to the source of the original packet. For example, you -/// could use this to hairpin an ARP Reply in response to a guest's -/// ARP request. +/// packet back to the source of the original packet. +/// +/// For example, you could use this to hairpin an ARP Reply in response +/// to a guest's ARP request. pub trait HairpinAction: Display { /// Generate a [`Packet`] to hairpin back to the source. The - /// `meta` argument holds the packet metadata, inlucding any - /// modifications made by previous layers up to this point. The - /// `rdr` argument provides a [`PacketReader`] against - /// [`Packet`], with its starting position set to the - /// beginning of the packet's payload. - fn gen_packet( - &self, - meta: &PacketMeta, - rdr: &mut PacketReader, - ) -> GenPacketResult; + /// `meta` argument holds the packet metadata, including any + /// modifications made by previous layers up to this point. + /// This also provides access to a reader over the packet body, + /// positioned after the parsed metadata. + fn gen_packet(&self, meta: &MblkPacketData) -> GenPacketResult; /// Return the predicates implicit to this action. /// @@ -821,16 +1045,12 @@ impl Rule { } } -impl<'a> Rule { - pub fn is_match<'b, R>( +impl Rule { + pub fn is_match( &self, - meta: &PacketMeta, + meta: &MblkPacketData, action_meta: &ActionMeta, - rdr: &'b mut R, - ) -> bool - where - R: PacketRead<'a>, - { + ) -> bool { #[cfg(debug_assertions)] { if let Some(preds) = &self.state.preds { @@ -855,7 +1075,7 @@ impl<'a> Rule { } for p in &preds.data_preds { - if !p.is_match(meta, rdr) { + if !p.is_match(meta) { return false; } } @@ -892,13 +1112,15 @@ impl From<&Rule> for super::ioctl::RuleDump { #[test] fn rule_matching() { - use super::ip4::Protocol; - use crate::engine::headers::UlpMeta; - use crate::engine::ip4::Ipv4Meta; - use crate::engine::packet::InnerMeta; + use crate::engine::ip::v4::Ipv4; + use crate::engine::ip::v4::Ipv4Mut; use crate::engine::predicate::Ipv4AddrMatch; use crate::engine::predicate::Predicate; - use crate::engine::tcp::TcpMeta; + use crate::engine::GenericUlp; + use ingot::ethernet::Ethertype; + use ingot::ip::IpProtocol; + use ingot::tcp::Tcp; + use ingot::types::HeaderLen; let action = Identity::new("rule_matching"); let mut r1 = Rule::new(1, Action::Static(Arc::new(action))); @@ -908,35 +1130,28 @@ fn rule_matching() { let dst_port = "443".parse().unwrap(); // There is no DataPredicate usage in this test, so this pkt/rdr // can be bogus. - let pkt = Packet::copy(&[0xA]); - let mut rdr = pkt.get_rdr(); - - let ip = IpMeta::from(Ipv4Meta { - src: src_ip, - dst: dst_ip, - proto: Protocol::TCP, - ttl: 64, - ident: 1, - hdr_len: 20, - total_len: 40, - csum: [0; 2], - }); - let ulp = UlpMeta::from(TcpMeta { - src: src_port, - dst: dst_port, - flags: 0, - seq: 0, - ack: 0, - options_bytes: None, - options_len: 0, + let tcp = Tcp { + source: src_port, + destination: dst_port, window_size: 64240, ..Default::default() - }); - - let meta = PacketMeta { - outer: Default::default(), - inner: InnerMeta { ip: Some(ip), ulp: Some(ulp), ..Default::default() }, }; + let ip4 = Ipv4 { + source: src_ip, + destination: dst_ip, + protocol: IpProtocol::TCP, + total_len: (Ipv4::MINIMUM_LENGTH + tcp.packet_length()) as u16, + ..Default::default() + }; + + let eth = Ethernet { ethertype: Ethertype::IPV4, ..Default::default() }; + + let mut pkt_m = MsgBlk::new_ethernet_pkt((ð, &ip4, &tcp)); + let mut pkt = Packet::parse_outbound(pkt_m.iter_mut(), GenericUlp {}) + .unwrap() + .to_full_meta(); + pkt.compute_checksums(); + let meta = pkt.meta(); r1.add_predicate(Predicate::InnerSrcIp4(vec![Ipv4AddrMatch::Exact( src_ip, @@ -944,36 +1159,14 @@ fn rule_matching() { let r1 = r1.finalize(); let ameta = ActionMeta::new(); - assert!(r1.is_match(&meta, &ameta, &mut rdr)); + assert!(r1.is_match(&meta, &ameta)); let new_src_ip = "10.11.11.99".parse().unwrap(); - let ip = IpMeta::from(Ipv4Meta { - src: new_src_ip, - dst: dst_ip, - proto: Protocol::TCP, - ttl: 64, - ident: 1, - hdr_len: 20, - total_len: 40, - csum: [0; 2], - }); - let ulp = UlpMeta::from(TcpMeta { - src: src_port, - dst: dst_port, - flags: 0, - seq: 0, - ack: 0, - options_bytes: None, - options_len: 0, - window_size: 64240, - ..Default::default() - }); - - let meta = PacketMeta { - outer: Default::default(), - inner: InnerMeta { ip: Some(ip), ulp: Some(ulp), ..Default::default() }, - }; + let meta = pkt.meta_mut(); + if let Some(L3::Ipv4(v4)) = &mut meta.headers.inner_l3 { + v4.set_source(new_src_ip); + } - assert!(!r1.is_match(&meta, &ameta, &mut rdr)); + assert!(!r1.is_match(&meta, &ameta)); } diff --git a/lib/opte/src/engine/snat.rs b/lib/opte/src/engine/snat.rs index 39b2ed85..2b0a8ede 100644 --- a/lib/opte/src/engine/snat.rs +++ b/lib/opte/src/engine/snat.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2024 Oxide Computer Company //! Types for working with IP Source NAT, both IPv4 and IPv6. @@ -12,8 +12,8 @@ use super::headers::UlpGenericModify; use super::headers::UlpHeaderAction; use super::headers::UlpMetaModify; use super::packet::InnerFlowId; +use super::packet::MblkFullParsed; use super::packet::Packet; -use super::packet::Parsed; use super::port::meta::ActionMeta; use super::predicate::DataPredicate; use super::predicate::Predicate; @@ -37,8 +37,9 @@ use alloc::sync::Arc; use alloc::vec::Vec; use core::fmt; use core::fmt::Display; -use core::marker::PhantomData; use core::ops::RangeInclusive; +use ingot::icmp::IcmpV4Ref; +use ingot::icmp::IcmpV6Ref; use opte_api::Direction; use opte_api::IpAddr; use opte_api::Ipv4Addr; @@ -240,7 +241,7 @@ impl SNat { fn gen_icmp_desc( &self, nat: SNatAlloc, - pkt: &Packet, + pkt: &Packet, ) -> GenDescResult { let meta = pkt.meta(); @@ -249,8 +250,8 @@ impl SNat { let icmp = meta .inner_icmp() .ok_or(GenIcmpErr::::MetaNotFound)?; - if icmp.msg_type != Icmpv4Message::EchoRequest.into() { - Err(GenIcmpErr::NotRequest(icmp.msg_type).into()) + if icmp.ty() != u8::from(Icmpv4Message::EchoRequest) { + Err(GenIcmpErr::NotRequest(icmp.ty()).into()) } else { Ok(icmp.echo_id()) } @@ -259,8 +260,8 @@ impl SNat { let icmp6 = meta .inner_icmp6() .ok_or(GenIcmpErr::::MetaNotFound)?; - if icmp6.msg_type != Icmpv6Message::EchoRequest.into() { - Err(GenIcmpErr::NotRequest(icmp6.msg_type).into()) + if icmp6.ty() != u8::from(Icmpv6Message::EchoRequest) { + Err(GenIcmpErr::NotRequest(icmp6.ty()).into()) } else { Ok(icmp6.echo_id()) } @@ -302,7 +303,7 @@ where fn gen_desc( &self, flow_id: &InnerFlowId, - pkt: &Packet, + pkt: &Packet, _meta: &mut ActionMeta, ) -> GenDescResult { let priv_port = flow_id.src_port; @@ -365,7 +366,7 @@ impl ActionDesc for SNatDesc { HdrTransform { name: SNAT_NAME.to_string(), - inner_ip: HeaderAction::Modify(ip, PhantomData), + inner_ip: HeaderAction::Modify(ip), inner_ulp: UlpHeaderAction::Modify(UlpMetaModify { generic: UlpGenericModify { src_port: Some(self.nat.entry.port), @@ -385,7 +386,7 @@ impl ActionDesc for SNatDesc { HdrTransform { name: SNAT_NAME.to_string(), - inner_ip: HeaderAction::Modify(ip, PhantomData), + inner_ip: HeaderAction::Modify(ip), inner_ulp: UlpHeaderAction::Modify(UlpMetaModify { generic: UlpGenericModify { dst_port: Some(self.priv_port), @@ -426,7 +427,7 @@ impl ActionDesc for SNatIcmpEchoDesc { HdrTransform { name: SNAT_NAME.to_string(), - inner_ip: HeaderAction::Modify(ip, PhantomData), + inner_ip: HeaderAction::Modify(ip), inner_ulp: UlpHeaderAction::Modify(UlpMetaModify { icmp_id: Some(self.nat.entry.port), ..Default::default() @@ -443,7 +444,7 @@ impl ActionDesc for SNatIcmpEchoDesc { HdrTransform { name: SNAT_NAME.to_string(), - inner_ip: HeaderAction::Modify(ip, PhantomData), + inner_ip: HeaderAction::Modify(ip), inner_ulp: UlpHeaderAction::Modify(UlpMetaModify { icmp_id: Some(self.echo_ident), ..Default::default() @@ -461,6 +462,19 @@ impl ActionDesc for SNatIcmpEchoDesc { #[cfg(test)] mod test { + use ingot::ethernet::Ethertype; + use ingot::ip::IpProtocol; + use ingot::tcp::Tcp; + use ingot::tcp::TcpFlags; + use ingot::tcp::TcpRef; + use ingot::types::HeaderLen; + + use crate::ddi::mblk::MsgBlk; + use crate::engine::ether::Ethernet; + use crate::engine::ether::EthernetRef; + use crate::engine::ip::v4::Ipv4; + use crate::engine::ip::v4::Ipv4Ref; + use super::*; #[test] @@ -485,15 +499,6 @@ mod test { #[test] fn snat4_desc_lifecycle() { - use crate::engine::ether::EtherHdr; - use crate::engine::ether::EtherMeta; - use crate::engine::ether::EtherType; - use crate::engine::headers::IpMeta; - use crate::engine::headers::UlpMeta; - use crate::engine::ip4::Ipv4Hdr; - use crate::engine::ip4::Ipv4Meta; - use crate::engine::ip4::Protocol; - use crate::engine::tcp::TcpMeta; use crate::engine::GenericUlp; use opte_api::Ipv4Addr; use opte_api::MacAddr; @@ -515,29 +520,30 @@ mod test { // ================================================================ // Build the packet // ================================================================ - let body = vec![]; - let tcp = - TcpMeta { src: priv_port, dst: outside_port, ..Default::default() }; - let ip4 = Ipv4Meta { - src: priv_ip, - dst: outside_ip, - proto: Protocol::TCP, - total_len: (Ipv4Hdr::BASE_SIZE + tcp.hdr_len() + body.len()) as u16, + let body: Vec = vec![]; + let tcp = Tcp { + source: priv_port, + destination: outside_port, + ..Default::default() + }; + let ip4 = Ipv4 { + source: priv_ip, + destination: outside_ip, + protocol: IpProtocol::TCP, + total_len: (Ipv4::MINIMUM_LENGTH + (&tcp, &body).packet_length()) + as u16, ..Default::default() }; - let eth = EtherMeta { - ether_type: EtherType::Ipv4, - src: priv_mac, - dst: dest_mac, + let eth = Ethernet { + destination: dest_mac, + source: priv_mac, + ethertype: Ethertype::IPV4, }; - let pkt_len = EtherHdr::SIZE + usize::from(ip4.total_len); - let mut pkt = Packet::alloc_and_expand(pkt_len); - let mut wtr = pkt.seg0_wtr(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip4.emit(wtr.slice_mut(ip4.hdr_len()).unwrap()); - tcp.emit(wtr.slice_mut(tcp.hdr_len()).unwrap()); - wtr.write(&body).unwrap(); - let mut pkt = pkt.parse(Direction::Out, GenericUlp {}).unwrap(); + + let mut pkt_m = MsgBlk::new_ethernet_pkt((ð, &ip4, &tcp, &body)); + let mut pkt = Packet::parse_outbound(pkt_m.iter_mut(), GenericUlp {}) + .unwrap() + .to_full_meta(); pkt.compute_checksums(); // ================================================================ @@ -557,81 +563,81 @@ mod test { out_ht.run(pkt.meta_mut()).unwrap(); let pmo = pkt.meta(); - let ether_meta = pmo.inner.ether; - assert_eq!(ether_meta.src, priv_mac); - assert_eq!(ether_meta.dst, dest_mac); + let ether_meta = pmo.inner_ether(); + assert_eq!(ether_meta.source(), priv_mac); + assert_eq!(ether_meta.destination(), dest_mac); - let ip4_meta = match pmo.inner.ip.as_ref().unwrap() { - IpMeta::Ip4(v) => v, + let ip4_meta = match pmo.inner_ip4() { + Some(v) => v, _ => panic!("expect Ipv4Meta"), }; - assert_eq!(ip4_meta.src, pub_ip); - assert_eq!(ip4_meta.dst, outside_ip); - assert_eq!(ip4_meta.proto, Protocol::TCP); + assert_eq!(ip4_meta.source(), pub_ip); + assert_eq!(ip4_meta.destination(), outside_ip); + assert_eq!(ip4_meta.protocol(), IpProtocol::TCP); - let tcp_meta = match pmo.inner.ulp.as_ref().unwrap() { - UlpMeta::Tcp(v) => v, + let tcp_meta = match pmo.inner_tcp() { + Some(v) => v, _ => panic!("expect TcpMeta"), }; - assert_eq!(tcp_meta.src, pub_port); - assert_eq!(tcp_meta.dst, outside_port); - assert_eq!(tcp_meta.flags, 0); + assert_eq!(tcp_meta.source(), pub_port); + assert_eq!(tcp_meta.destination(), outside_port); + assert_eq!(tcp_meta.flags(), TcpFlags::empty()); // ================================================================ // Verify inbound header transformation. // ================================================================ - let body = vec![]; - let tcp = - TcpMeta { src: outside_port, dst: priv_port, ..Default::default() }; - let ip4 = Ipv4Meta { - src: outside_ip, - dst: priv_ip, - proto: Protocol::TCP, - total_len: (Ipv4Hdr::BASE_SIZE + tcp.hdr_len() + body.len()) as u16, + let tcp = Tcp { + source: outside_port, + destination: pub_port, ..Default::default() }; - let eth = EtherMeta { - ether_type: EtherType::Ipv4, - src: dest_mac, - dst: priv_mac, + let ip4 = Ipv4 { + source: outside_ip, + destination: pub_ip, + protocol: IpProtocol::TCP, + total_len: (Ipv4::MINIMUM_LENGTH + (&tcp, &body).packet_length()) + as u16, + ..Default::default() + }; + let eth = Ethernet { + destination: priv_mac, + source: dest_mac, + ethertype: Ethertype::IPV4, }; - let pkt_len = EtherHdr::SIZE + usize::from(ip4.total_len); - let mut pkt = Packet::alloc_and_expand(pkt_len); - let mut wtr = pkt.seg0_wtr(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip4.emit(wtr.slice_mut(ip4.hdr_len()).unwrap()); - tcp.emit(wtr.slice_mut(tcp.hdr_len()).unwrap()); - wtr.write(&body).unwrap(); - let mut pkt = pkt.parse(Direction::In, GenericUlp {}).unwrap(); + + let mut pkt_m = MsgBlk::new_ethernet_pkt((ð, &ip4, &tcp, &body)); + let mut pkt = Packet::parse_inbound(pkt_m.iter_mut(), GenericUlp {}) + .unwrap() + .to_full_meta(); pkt.compute_checksums(); let in_ht = desc.gen_ht(Direction::In); in_ht.run(pkt.meta_mut()).unwrap(); let pmi = pkt.meta(); - let ether_meta = pmi.inner.ether; - assert_eq!(ether_meta.src, dest_mac); - assert_eq!(ether_meta.dst, priv_mac); + let ether_meta = pmi.inner_ether(); + assert_eq!(ether_meta.source(), dest_mac); + assert_eq!(ether_meta.destination(), priv_mac); - let ip4_meta = match pmi.inner.ip.as_ref().unwrap() { - IpMeta::Ip4(v) => v, + let ip4_meta = match pmi.inner_ip4() { + Some(v) => v, _ => panic!("expect Ipv4Meta"), }; - assert_eq!(ip4_meta.src, outside_ip); - assert_eq!(ip4_meta.dst, priv_ip); - assert_eq!(ip4_meta.proto, Protocol::TCP); + assert_eq!(ip4_meta.source(), outside_ip); + assert_eq!(ip4_meta.destination(), priv_ip); + assert_eq!(ip4_meta.protocol(), IpProtocol::TCP); - let tcp_meta = match pmi.inner.ulp.as_ref().unwrap() { - UlpMeta::Tcp(v) => v, + let tcp_meta = match pmi.inner_tcp() { + Some(v) => v, _ => panic!("expect TcpMeta"), }; - assert_eq!(tcp_meta.src, outside_port); - assert_eq!(tcp_meta.dst, priv_port); - assert_eq!(tcp_meta.flags, 0); + assert_eq!(tcp_meta.source(), outside_port); + assert_eq!(tcp_meta.destination(), priv_port); + assert_eq!(tcp_meta.flags(), TcpFlags::empty()); // ================================================================ // Verify other ULPs are unaffected. diff --git a/lib/opte/src/engine/tcp.rs b/lib/opte/src/engine/tcp.rs index 618c773c..5517002f 100644 --- a/lib/opte/src/engine/tcp.rs +++ b/lib/opte/src/engine/tcp.rs @@ -6,27 +6,11 @@ //! TCP headers. -use super::checksum::Checksum; -use super::checksum::HeaderChecksum; use super::flow_table::Ttl; -use super::headers::HeaderActionModify; -use super::headers::ModifyAction; -use super::headers::PushAction; -use super::headers::RawHeader; -use super::headers::UlpMetaModify; -use super::packet::PacketReadMut; -use super::packet::ReadErr; -use crate::d_error::DError; use core::fmt; use core::fmt::Display; -use opte_api::DYNAMIC_PORT; use serde::Deserialize; use serde::Serialize; -use zerocopy::AsBytes; -use zerocopy::FromBytes; -use zerocopy::FromZeroes; -use zerocopy::Ref; -use zerocopy::Unaligned; pub const TCP_HDR_OFFSET_MASK: u8 = 0xF0; pub const TCP_HDR_OFFSET_SHIFT: u8 = 4; @@ -52,19 +36,6 @@ pub const KEEPALIVE_EXPIRE_SECS: u64 = 8_000; pub const TIME_WAIT_EXPIRE_TTL: Ttl = Ttl::new_seconds(TIME_WAIT_EXPIRE_SECS); pub const KEEPALIVE_EXPIRE_TTL: Ttl = Ttl::new_seconds(KEEPALIVE_EXPIRE_SECS); -/// The standard TCP flags. We don't bother with the experimental NS -/// flag. -pub mod TcpFlags { - pub const FIN: u8 = crate::bit_on(0); - pub const SYN: u8 = crate::bit_on(1); - pub const RST: u8 = crate::bit_on(2); - pub const PSH: u8 = crate::bit_on(3); - pub const ACK: u8 = crate::bit_on(4); - pub const URG: u8 = crate::bit_on(5); - pub const ECE: u8 = crate::bit_on(6); - pub const CWR: u8 = crate::bit_on(7); -} - // The standard TCP states. // // See Figure 13-8 of TCP/IP Illustrated Vol. 1 Ed. 2 @@ -100,72 +71,6 @@ impl Display for TcpState { } } -#[derive(Clone, Copy, Debug, Default, Eq, Ord, PartialEq, PartialOrd)] -pub struct TcpMeta { - pub src: u16, - pub dst: u16, - pub flags: u8, - pub seq: u32, - pub ack: u32, - pub window_size: u16, - pub csum: [u8; 2], - // Fow now we keep options as raw bytes, allowing up to 40 bytes - // of options. - pub options_bytes: Option<[u8; TcpHdr::MAX_OPTION_SIZE]>, - pub options_len: usize, -} - -impl TcpMeta { - // This assumes the slice is large enough to hold the header. - #[inline] - pub fn emit(&self, dst: &mut [u8]) { - debug_assert_eq!(dst.len(), self.hdr_len()); - let base = &mut dst[0..TcpHdrRaw::SIZE]; - let mut raw = TcpHdrRaw::new_mut(base).unwrap(); - raw.write(TcpHdrRaw::from(self)); - if let Some(bytes) = self.options_bytes { - dst[TcpHdr::BASE_SIZE..] - .copy_from_slice(&bytes[0..self.options_len]); - } - } - - #[inline] - pub fn has_flag(&self, flag: u8) -> bool { - (self.flags & flag) != 0 - } - - #[inline] - pub fn hdr_len(&self) -> usize { - TcpHdr::BASE_SIZE + self.options_len - } -} - -impl<'a> From<&TcpHdr<'a>> for TcpMeta { - fn from(tcp: &TcpHdr) -> Self { - let (options_bytes, options_len) = match tcp.options_raw() { - None => (None, 0), - Some(src) => { - let mut dst = [0; TcpHdr::MAX_OPTION_SIZE]; - dst[0..src.len()].copy_from_slice(src); - (Some(dst), src.len()) - } - }; - - let raw = tcp.base.read(); - Self { - src: u16::from_be_bytes(raw.src_port), - dst: u16::from_be_bytes(raw.dst_port), - flags: raw.flags, - seq: u32::from_be_bytes(raw.seq), - ack: u32::from_be_bytes(raw.ack), - window_size: u16::from_be_bytes(raw.window_size), - csum: raw.csum, - options_bytes, - options_len, - } - } -} - #[derive( Clone, Copy, @@ -183,528 +88,8 @@ pub struct TcpPush { pub dst: u16, } -impl PushAction for TcpPush { - fn push(&self) -> TcpMeta { - TcpMeta { src: self.src, dst: self.dst, ..Default::default() } - } -} - #[derive(Clone, Debug, Deserialize, Serialize)] pub struct TcpMod { src: Option, dst: Option, } - -impl ModifyAction for TcpMod { - fn modify(&self, meta: &mut TcpMeta) { - if let Some(src) = self.src { - meta.src = src; - } - - if let Some(dst) = self.dst { - meta.dst = dst; - } - } -} - -impl HeaderActionModify for TcpMeta { - fn run_modify(&mut self, spec: &UlpMetaModify) { - if spec.generic.src_port.is_some() { - self.src = spec.generic.src_port.unwrap() - } - - if spec.generic.dst_port.is_some() { - self.dst = spec.generic.dst_port.unwrap() - } - - if spec.tcp_flags.is_some() { - self.flags = spec.tcp_flags.unwrap() - } - } -} - -#[derive(Debug)] -pub struct TcpHdr<'a> { - base: Ref<&'a mut [u8], TcpHdrRaw>, - options: Option<&'a mut [u8]>, -} - -impl<'a> TcpHdr<'a> { - pub const BASE_SIZE: usize = TcpHdrRaw::SIZE; - pub const CSUM_BEGIN_OFFSET: usize = 16; - pub const CSUM_END_OFFSET: usize = 18; - - /// The maximum size of a TCP header. - /// - /// The header length is derived from the data offset field. - /// Given it is a 4-bit field and specifies the size in 32-bit words, - /// the maximum header size is therefore (2^4 - 1) * 4 = 60 bytes. - pub const MAX_SIZE: usize = 60; - - /// The maximum size of any TCP options in a TCP header. - pub const MAX_OPTION_SIZE: usize = Self::MAX_SIZE - Self::BASE_SIZE; - - /// Return the acknowledgement number. - pub fn ack(&self) -> u32 { - u32::from_be_bytes(self.base.ack) - } - - pub fn csum(&self) -> [u8; 2] { - self.base.csum - } - - pub fn base_bytes(&self) -> &[u8] { - self.base.bytes() - } - - pub fn options_bytes(&self) -> Option<&[u8]> { - match &self.options { - None => None, - Some(options) => Some(*options), - } - } - - /// Return the checksum value minus header TCP header bytes, - /// producing the checksum value of the body. - pub fn csum_minus_hdr(&self) -> Option { - // There was no checksum to begin with. - if self.base.csum == [0; 2] { - return None; - } - - let mut csum = Checksum::from(HeaderChecksum::wrap(self.base.csum)); - // When a checksum is calculated you treat the checksum field - // bytes themselves as zero; therefore its imperative we do - // not include the checksum field bytes when subtracting from - // the checksum value. - csum.sub_bytes(&self.base.bytes()[0..Self::CSUM_BEGIN_OFFSET]); - csum.sub_bytes(&self.base.bytes()[Self::CSUM_END_OFFSET..]); - - if let Some(options) = self.options.as_ref() { - csum.sub_bytes(options); - } - Some(csum) - } - - /// Return destination port. - pub fn dst_port(&self) -> u16 { - u16::from_be_bytes(self.base.dst_port) - } - - /// Return the TCP flags. - pub fn flags(&self) -> u8 { - self.base.flags - } - - /// Return the leangth of the TCP header, in bytes. - /// - /// This length includes the TCP options. - pub fn hdr_len(&self) -> usize { - usize::from(self.base.offset()) * 4 - } - - /// Return a reference to the options data. - pub fn options_raw(&self) -> Option<&[u8]> { - match &self.options { - None => None, - Some(options) => Some(*options), - } - } - - pub fn parse<'b>( - rdr: &'b mut impl PacketReadMut<'a>, - ) -> Result { - let src = rdr.slice_mut(TcpHdrRaw::SIZE)?; - let mut hdr = Self { base: TcpHdrRaw::new_mut(src)?, options: None }; - - if hdr.src_port() == DYNAMIC_PORT { - return Err(TcpHdrError::BadSrcPort { src_port: hdr.src_port() }); - } - - if hdr.dst_port() == DYNAMIC_PORT { - return Err(TcpHdrError::BadDstPort { dst_port: hdr.dst_port() }); - } - - let hdr_len = hdr.hdr_len(); - - if hdr_len < Self::BASE_SIZE { - return Err(TcpHdrError::TruncatedHdr { - hdr_len_bytes: hdr.hdr_len(), - }); - } - - if hdr_len > Self::BASE_SIZE { - let opts_len = hdr.hdr_len() - Self::BASE_SIZE; - match rdr.slice_mut(opts_len) { - Ok(opts) => hdr.options = Some(opts), - Err(e) => { - return Err(TcpHdrError::TruncatedOptions(e)); - } - } - } - - Ok(hdr) - } - - /// Return the sequence number. - pub fn seq(&self) -> u32 { - u32::from_be_bytes(self.base.seq) - } - - /// Set the checksum value. - pub fn set_csum(&mut self, csum: [u8; 2]) { - self.base.csum = csum - } - - /// Return the source port. - pub fn src_port(&self) -> u16 { - u16::from_be_bytes(self.base.src_port) - } - - /// Return the window size value. - pub fn window_size(&self) -> u16 { - u16::from_be_bytes(self.base.window_size) - } -} - -#[derive(Clone, Copy, Debug, Eq, PartialEq, DError)] -#[derror(leaf_data = TcpHdrError::derror_data)] -pub enum TcpHdrError { - BadDstPort { dst_port: u16 }, - BadOffset { offset: u8, len_in_bytes: u8 }, - BadSrcPort { src_port: u16 }, - ReadError(ReadErr), - Straddled, - TruncatedHdr { hdr_len_bytes: usize }, - TruncatedOptions(ReadErr), -} - -impl TcpHdrError { - fn derror_data(&self, data: &mut [u64]) { - [data[0], data[1]] = match self { - Self::BadDstPort { dst_port } => [*dst_port as u64, 0], - Self::BadOffset { offset, len_in_bytes } => { - [*offset as u64, *len_in_bytes as u64] - } - Self::BadSrcPort { src_port } => [*src_port as u64, 0], - Self::TruncatedHdr { hdr_len_bytes } => [*hdr_len_bytes as u64, 0], - _ => [0, 0], - } - } -} - -impl From for TcpHdrError { - fn from(error: ReadErr) -> Self { - TcpHdrError::ReadError(error) - } -} - -/// Note: For now we keep this unaligned to be safe. -#[repr(C)] -#[derive(Clone, Debug, FromBytes, AsBytes, FromZeroes, Unaligned)] -pub struct TcpHdrRaw { - pub src_port: [u8; 2], - pub dst_port: [u8; 2], - pub seq: [u8; 4], - pub ack: [u8; 4], - pub offset: u8, - pub flags: u8, - pub window_size: [u8; 2], - pub csum: [u8; 2], - pub urg: [u8; 2], -} - -impl TcpHdrRaw { - fn offset(&self) -> u8 { - (self.offset & TCP_HDR_OFFSET_MASK) >> TCP_HDR_OFFSET_SHIFT - } -} - -impl<'a> RawHeader<'a> for TcpHdrRaw { - #[inline] - fn new_mut(src: &mut [u8]) -> Result, ReadErr> { - debug_assert_eq!(src.len(), Self::SIZE); - let hdr = match Ref::new(src) { - Some(hdr) => hdr, - None => return Err(ReadErr::BadLayout), - }; - Ok(hdr) - } -} - -impl From<&TcpMeta> for TcpHdrRaw { - #[inline] - fn from(meta: &TcpMeta) -> Self { - Self { - src_port: meta.src.to_be_bytes(), - dst_port: meta.dst.to_be_bytes(), - seq: meta.seq.to_be_bytes(), - ack: meta.ack.to_be_bytes(), - offset: ((meta.hdr_len() as u8 / 4) & 0x0F) << 4, - flags: meta.flags, - window_size: meta.window_size.to_be_bytes(), - csum: meta.csum, - urg: [0; 2], - } - } -} - -#[cfg(test)] -mod test { - use super::*; - use crate::engine::packet::Packet; - - #[test] - fn emit_no_opts() { - let tcp = TcpMeta { - src: 49154, - dst: 80, - seq: 2511121667, - ack: 754208397, - flags: TcpFlags::ACK, - window_size: 64436, - options_bytes: None, - options_len: 0, - csum: [0; 2], - }; - - let len = tcp.hdr_len(); - let mut pkt = Packet::alloc_and_expand(len); - let mut wtr = pkt.seg0_wtr(); - tcp.emit(wtr.slice_mut(tcp.hdr_len()).unwrap()); - assert_eq!(len, pkt.len()); - #[rustfmt::skip] - let expected_bytes = vec![ - // source - 0xC0, 0x02, - // dest - 0x00, 0x50, - // seq - 0x95, 0xAC, 0xAD, 0x03, - // ack - 0x2C, 0xF4, 0x4E, 0x8D, - // offset + flags - 0x50, 0x10, - // window - 0xFB, 0xB4, - // checksum - 0x00, 0x00, - // URG pointer - 0x00, 0x00, - ]; - assert_eq!(&expected_bytes, pkt.seg_bytes(0)); - } - - #[test] - fn emit_opts() { - let mut opts = [0x00; TcpHdr::MAX_OPTION_SIZE]; - let bytes = [ - 0x02, 0x04, 0x05, 0xB4, 0x04, 0x02, 0x08, 0x0A, 0x09, 0xB4, 0x2A, - 0xA9, 0x00, 0x00, 0x00, 0x00, 0x01, 0x03, 0x03, 0x01, - ]; - opts[0..bytes.len()].copy_from_slice(&bytes); - - let tcp = TcpMeta { - src: 49154, - dst: 80, - seq: 2511121590, - ack: 0, - flags: TcpFlags::SYN, - window_size: 64240, - options_bytes: Some(opts), - options_len: bytes.len(), - csum: [0; 2], - }; - - let len = tcp.hdr_len(); - assert_eq!(40, len); - let mut pkt = Packet::alloc_and_expand(len); - let mut wtr = pkt.seg0_wtr(); - tcp.emit(wtr.slice_mut(tcp.hdr_len()).unwrap()); - assert_eq!(len, pkt.len()); - - #[rustfmt::skip] - let expected_bytes = vec![ - // source - 0xC0, 0x02, - // dest - 0x00, 0x50, - // seq - 0x95, 0xAC, 0xAC, 0xB6, - // ack - 0x00, 0x00, 0x00, 0x00, - // offset + flags - 0xA0, 0x02, - // window - 0xFA, 0xF0, - // checksum - 0x00, 0x00, - // URG pointer - 0x00, 0x00, - // MSS - 0x02, 0x04, 0x05, 0xB4, - // SACK permitted - 0x04, 0x02, - // Timestamps - 0x08, 0x0A, 0x09, 0xB4, 0x2A, 0xA9, 0x00, 0x00, 0x00, 0x00, - // No-op - 0x01, - // Window Scale - 0x03, 0x03, 0x01, - - ]; - assert_eq!(&expected_bytes, pkt.seg_bytes(0)); - } - - #[test] - fn parse_no_opts() { - let hdr_len = TcpHdr::BASE_SIZE; - #[rustfmt::skip] - let base_bytes = vec![ - // source - 0xC0, 0x02, - // dest - 0x00, 0x50, - // seq - 0x95, 0xAC, 0xAC, 0xB6, - // ack - 0x00, 0x00, 0x00, 0x00, - // offset - ((hdr_len / 4) as u8) << TCP_HDR_OFFSET_SHIFT, - // flags - 0x02, - // window - 0xFA, 0xF0, - // checksum - 0x00, 0x00, - // URG pointer - 0x00, 0x00, - ]; - assert_eq!(base_bytes.len(), TcpHdr::BASE_SIZE); - - let mut pkt = Packet::copy(&base_bytes); - let mut rdr = pkt.get_rdr_mut(); - let tcp_hdr = TcpHdr::parse(&mut rdr).unwrap(); - - assert_eq!(tcp_hdr.base_bytes(), &base_bytes); - assert_eq!(tcp_hdr.options_bytes(), None); - } - - #[test] - fn parse_max_opts() { - #[rustfmt::skip] - let option_bytes = [ - // MSS - 0x02, 0x04, 0x05, 0xB4, - // SACK permitted - 0x04, 0x02, - // Timestamps - 0x08, 0x0A, 0x09, 0xB4, 0x2A, 0xA9, 0x00, 0x00, 0x00, 0x00, - // No-op - 0x01, - // Window Scale - 0x03, 0x03, 0x01, - // No-ops - 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, - ]; - - let hdr_len = TcpHdr::BASE_SIZE + option_bytes.len(); - #[rustfmt::skip] - let base_bytes = [ - // source - 0xC0, 0x02, - // dest - 0x00, 0x50, - // seq - 0x95, 0xAC, 0xAC, 0xB6, - // ack - 0x00, 0x00, 0x00, 0x00, - // offset - ((hdr_len / 4) as u8) << TCP_HDR_OFFSET_SHIFT, - // flags - 0x02, - // window - 0xFA, 0xF0, - // checksum - 0x00, 0x00, - // URG pointer - 0x00, 0x00, - ]; - assert_eq!(base_bytes.len(), TcpHdr::BASE_SIZE); - - let pkt_bytes = base_bytes - .iter() - .copied() - .chain(option_bytes.iter().copied()) - .collect::>(); - - let mut pkt = Packet::copy(&pkt_bytes); - let mut rdr = pkt.get_rdr_mut(); - let tcp_hdr = TcpHdr::parse(&mut rdr).unwrap(); - - assert_eq!(tcp_hdr.base_bytes(), &base_bytes); - assert_eq!(tcp_hdr.options_bytes(), Some(&option_bytes[..])); - } - - #[test] - fn parse_opts_truncated() { - #[rustfmt::skip] - let option_bytes = [ - // MSS - 0x02, 0x04, 0x05, 0xB4, - // SACK permitted - 0x04, 0x02, - // Timestamps - 0x08, 0x0A, 0x09, 0xB4, 0x2A, 0xA9, 0x00, 0x00, 0x00, 0x00, - // No-op - 0x01, - // Window Scale - 0x03, 0x03, 0x01, - ]; - - let hdr_len = TcpHdr::BASE_SIZE - + option_bytes.len() - // Indicate there's an extra 32-bit word of options - + 4; - - #[rustfmt::skip] - let base_bytes = [ - // source - 0xC0, 0x02, - // dest - 0x00, 0x50, - // seq - 0x95, 0xAC, 0xAC, 0xB6, - // ack - 0x00, 0x00, 0x00, 0x00, - // offset - ((hdr_len / 4) as u8) << TCP_HDR_OFFSET_SHIFT, - // flags - 0x02, - // window - 0xFA, 0xF0, - // checksum - 0x00, 0x00, - // URG pointer - 0x00, 0x00, - ]; - assert_eq!(base_bytes.len(), TcpHdr::BASE_SIZE); - - let pkt_bytes = base_bytes - .iter() - .copied() - .chain(option_bytes.iter().copied()) - .collect::>(); - - let mut pkt = Packet::copy(&pkt_bytes); - let mut rdr = pkt.get_rdr_mut(); - let tcp_hdr_err = TcpHdr::parse(&mut rdr) - .expect_err("expected to fail parsing malformed TCP header"); - - assert_eq!( - tcp_hdr_err, - TcpHdrError::TruncatedOptions(ReadErr::NotEnoughBytes) - ); - } -} diff --git a/lib/opte/src/engine/tcp_state.rs b/lib/opte/src/engine/tcp_state.rs index 62423648..207839cf 100644 --- a/lib/opte/src/engine/tcp_state.rs +++ b/lib/opte/src/engine/tcp_state.rs @@ -2,20 +2,21 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2022 Oxide Computer Company +// Copyright 2024 Oxide Computer Company //! Basic TCP state machine. use super::packet::InnerFlowId; -use super::tcp::TcpFlags; -use super::tcp::TcpMeta; use super::tcp::TcpState; use core::ffi::CStr; use core::fmt; use core::fmt::Display; #[cfg(all(not(feature = "std"), not(test)))] use illumos_sys_hdrs::uintptr_t; +use ingot::tcp::TcpFlags as IngotTcpFlags; +use ingot::tcp::TcpRef; use opte_api::Direction; +use zerocopy::ByteSlice; /// An error processing a TCP flow. #[derive(Clone, Copy, Debug, PartialEq)] @@ -135,10 +136,14 @@ impl TcpFlowState { /// `return None` and replace them with a single `None` value at /// the end of the function; but the author finds it useful to be /// explicit for each case. - fn flow_in(&mut self, tcp: &TcpMeta) -> Option { + fn flow_in( + &mut self, + flags: IngotTcpFlags, + tcp_ack: u32, + ) -> Option { use TcpState::*; - if tcp.has_flag(TcpFlags::RST) { + if flags.contains(IngotTcpFlags::RST) { return Some(Closed); } @@ -147,7 +152,7 @@ impl TcpFlowState { // We have a new inbound SYN. We assume for now the // guest is listening on the given port by moving to // the LISTEN state. - if tcp.has_flag(TcpFlags::SYN) { + if flags.contains(IngotTcpFlags::SYN) { return Some(Listen); } @@ -160,7 +165,7 @@ impl TcpFlowState { // respond with an ACK or RST. In the future we could // instead keep this in some type of probationary // state (or separate table). - if tcp.has_flag(TcpFlags::ACK) { + if flags.contains(IngotTcpFlags::ACK) { return Some(Established); } @@ -171,7 +176,7 @@ impl TcpFlowState { // If the guest doesn't respond to the first SYN, or // the sender never sees the guest's ACK, then the // sender may send more SYNs. - if tcp.has_flag(TcpFlags::SYN) { + if flags.contains(IngotTcpFlags::SYN) { return Some(Listen); } @@ -181,7 +186,7 @@ impl TcpFlowState { // The guest is in active open and waiting for the // remote's SYN+ACK. SynSent => { - if tcp.has_flag(TcpFlags::SYN) && tcp.has_flag(TcpFlags::ACK) { + if flags.contains(IngotTcpFlags::SYN | IngotTcpFlags::ACK) { Some(Established) } else { // Could be simultaneous open, but not worrying @@ -193,14 +198,14 @@ impl TcpFlowState { // The guest is in passive open and waiting for the // remote's ACK. SynRcvd => { - if tcp.has_flag(TcpFlags::ACK) { + if flags.contains(IngotTcpFlags::ACK) { return Some(Established); } // In this case the client is retransmitting its SYN; // probably because the guest's SYN+ACK reply got lost // or stuck in a buffer somewhere. - if tcp.has_flag(TcpFlags::SYN) { + if flags.contains(IngotTcpFlags::SYN) { return Some(SynRcvd); } @@ -210,13 +215,13 @@ impl TcpFlowState { } Established => { - if tcp.has_flag(TcpFlags::FIN) { + if flags.contains(IngotTcpFlags::FIN) { // In this case remote end has initiated the close // and the guest is entering passive close. return Some(CloseWait); } - if tcp.has_flag(TcpFlags::SYN) { + if flags.contains(IngotTcpFlags::SYN) { // We may have gotten stuck in `Established` due to // a delayed FIN+ACK/ACK at connection close, or // unexpected OS reset/panic. @@ -236,7 +241,7 @@ impl TcpFlowState { // // We could also see an ACK for previous data sent // from the guest. - if tcp.has_flag(TcpFlags::FIN) || tcp.has_flag(TcpFlags::ACK) { + if flags.intersects(IngotTcpFlags::FIN | IngotTcpFlags::ACK) { return Some(CloseWait); } @@ -255,8 +260,8 @@ impl TcpFlowState { // 2. We are seeing an ACK from the remote for a // previous data segment. Pass it up to the guest // so it can log the duplicate ACK. - if tcp.has_flag(TcpFlags::ACK) { - if tcp.ack == self.guest_seq.unwrap() + 1 { + if flags.contains(IngotTcpFlags::ACK) { + if tcp_ack == self.guest_seq.unwrap() + 1 { return Some(Closed); } @@ -273,22 +278,22 @@ impl TcpFlowState { // at this point. // // TODO Verify ack number. - if tcp.has_flag(TcpFlags::FIN) && tcp.has_flag(TcpFlags::ACK) { + if flags.contains(IngotTcpFlags::FIN | IngotTcpFlags::ACK) { return Some(TimeWait); } // The remote sent its ACK for out active FIN. We now // need to wait for the remote to passive close and // send its FIN. - if tcp.has_flag(TcpFlags::ACK) - && tcp.ack == self.guest_seq.unwrap() + 1 + if flags.contains(IngotTcpFlags::ACK) + && tcp_ack == self.guest_seq.unwrap() + 1 { return Some(FinWait2); } // Presumably an ACK for some previous data. Let the // guest decide. - if tcp.has_flag(TcpFlags::ACK) { + if flags.contains(IngotTcpFlags::ACK) { return Some(FinWait1); } @@ -298,8 +303,8 @@ impl TcpFlowState { // The guest is in active close. FinWait2 => { - if tcp.has_flag(TcpFlags::FIN) - && tcp.ack == self.guest_seq.unwrap() + 1 + if flags.contains(IngotTcpFlags::FIN) + && tcp_ack == self.guest_seq.unwrap() + 1 { // In this case the guest was the active closer, // has sent its FIN, and has seen an ACK for that @@ -309,7 +314,7 @@ impl TcpFlowState { return Some(TimeWait); } - if tcp.has_flag(TcpFlags::ACK) { + if flags.contains(IngotTcpFlags::ACK) { return Some(FinWait2); } @@ -320,7 +325,7 @@ impl TcpFlowState { TimeWait => { // The guest is receiving additional copies of FIN for // remote's passive close. - if tcp.has_flag(TcpFlags::FIN) { + if flags.contains(IngotTcpFlags::FIN) { return Some(TimeWait); } @@ -328,7 +333,7 @@ impl TcpFlowState { // so I'm not sure why we would get an ACK in the // TIME_WAIT state. But for now I allow it to make // progress. - if tcp.has_flag(TcpFlags::ACK) { + if flags.contains(IngotTcpFlags::ACK) { return Some(TimeWait); } @@ -341,10 +346,10 @@ impl TcpFlowState { /// `return None` and replace them with a single `None` value at /// the end of the function; but the author finds it useful to be /// explicit for each case. - fn flow_out(&mut self, tcp: &TcpMeta) -> Option { + fn flow_out(&mut self, flags: IngotTcpFlags) -> Option { use TcpState::*; - if tcp.has_flag(TcpFlags::RST) { + if flags.contains(IngotTcpFlags::RST) { return Some(Closed); } @@ -352,13 +357,13 @@ impl TcpFlowState { Closed => { // The guest is trying to create a new outbound // connection. - if tcp.has_flag(TcpFlags::SYN) { + if flags.contains(IngotTcpFlags::SYN) { return Some(SynSent); } // The guest is responding to a data segment, // immediately move to established. - if tcp.has_flag(TcpFlags::ACK) { + if flags.contains(IngotTcpFlags::ACK) { return Some(Established); } @@ -369,7 +374,7 @@ impl TcpFlowState { // In this case the guest process is responding to the // remote client with SYN+ACK. Listen => { - if tcp.has_flag(TcpFlags::SYN) && tcp.has_flag(TcpFlags::ACK) { + if flags.contains(IngotTcpFlags::SYN | IngotTcpFlags::ACK) { return Some(SynRcvd); } @@ -378,7 +383,7 @@ impl TcpFlowState { SynSent => { // In this case we are retransmitting the SYN packet. - if tcp.has_flag(TcpFlags::SYN) { + if flags.contains(IngotTcpFlags::SYN) { return Some(SynSent); } @@ -388,7 +393,7 @@ impl TcpFlowState { SynRcvd => { // In this case the guest is retransmitting the // SYN+ACK from its SYN_RCVD state. - if tcp.has_flag(TcpFlags::SYN) && tcp.has_flag(TcpFlags::ACK) { + if flags.contains(IngotTcpFlags::SYN | IngotTcpFlags::ACK) { return Some(SynRcvd); } @@ -397,11 +402,11 @@ impl TcpFlowState { // TODO passive close Established => { - if tcp.has_flag(TcpFlags::FIN) { + if flags.contains(IngotTcpFlags::FIN) { return Some(FinWait1); } - if tcp.has_flag(TcpFlags::SYN) { + if flags.contains(IngotTcpFlags::SYN) { return None; } @@ -412,11 +417,11 @@ impl TcpFlowState { FinWait1 => { // The guest is resending its FIN to the remote to // indicate its active close. - if tcp.has_flag(TcpFlags::FIN) { + if flags.contains(IngotTcpFlags::FIN) { return Some(FinWait1); } - if tcp.has_flag(TcpFlags::ACK) { + if flags.contains(IngotTcpFlags::ACK) { return Some(FinWait1); } @@ -428,7 +433,7 @@ impl TcpFlowState { // The guest has closed its side but the remote might // still be sending data, make sure to allow ACKs get // out. - if tcp.has_flag(TcpFlags::ACK) { + if flags.contains(IngotTcpFlags::ACK) { return Some(FinWait2); } @@ -443,7 +448,7 @@ impl TcpFlowState { // passive FIN. Eventually this connection will time // out on the guest and in that case an RST reply is // sent. Or this flow will expire. - if tcp.has_flag(TcpFlags::ACK) { + if flags.contains(IngotTcpFlags::ACK) { return Some(TimeWait); } @@ -454,7 +459,7 @@ impl TcpFlowState { CloseWait => { // The guest is performing its half of the passive // close now. - if tcp.has_flag(TcpFlags::FIN) { + if flags.contains(IngotTcpFlags::FIN) { return Some(LastAck); } @@ -469,7 +474,7 @@ impl TcpFlowState { LastAck => { // The guest is either reacknowledging the remote's // FIN or resending its own FIN to the remote. - if tcp.has_flag(TcpFlags::FIN) || tcp.has_flag(TcpFlags::ACK) { + if flags.intersects(IngotTcpFlags::FIN | IngotTcpFlags::ACK) { return Some(LastAck); } @@ -488,14 +493,16 @@ impl TcpFlowState { } } - pub fn process( + pub fn process( &mut self, port: &CStr, dir: Direction, flow_id: &InnerFlowId, - tcp: &TcpMeta, + tcp: &impl TcpRef, ) -> Result { let curr_state = self.tcp_state; + let flags = tcp.flags(); + let ack = tcp.acknowledgement(); // Run the segment through the corresponding side of the TCP // state machine. A successful transition should return @@ -504,19 +511,19 @@ impl TcpFlowState { // unexpected transition. let res = match dir { Direction::In => { - let res = self.flow_in(tcp); - self.remote_seq = Some(tcp.seq); - if tcp.has_flag(TcpFlags::ACK) { - self.remote_ack = Some(tcp.ack); + let res = self.flow_in(flags, ack); + self.remote_seq = Some(tcp.sequence()); + if flags.contains(IngotTcpFlags::ACK) { + self.remote_ack = Some(ack); } res } Direction::Out => { - let res = self.flow_out(tcp); - self.guest_seq = Some(tcp.seq); - if tcp.has_flag(TcpFlags::ACK) { - self.guest_ack = Some(tcp.ack); + let res = self.flow_out(flags); + self.guest_seq = Some(tcp.sequence()); + if flags.contains(IngotTcpFlags::ACK) { + self.guest_ack = Some(ack); } res } @@ -541,22 +548,22 @@ impl TcpFlowState { // close (active or simul) will leave a flow in TIME-WAIT, which // is the most common case. If the guest is not yet ready, we expect // it will send its own RST in response. - None if tcp.has_flag(TcpFlags::SYN) => { + None if flags.contains(IngotTcpFlags::SYN) => { return Err(TcpFlowStateError::NewFlow { direction: dir, flow_id: *flow_id, state: curr_state, - flags: tcp.flags, + flags: flags.bits(), }); } None => { - self.tcp_flow_drop_probe(port, flow_id, dir, tcp.flags); + self.tcp_flow_drop_probe(port, flow_id, dir, flags.bits()); return Err(TcpFlowStateError::UnexpectedSegment { direction: dir, flow_id: *flow_id, state: curr_state, - flags: tcp.flags, + flags: flags.bits(), }); } }; diff --git a/lib/opte/src/engine/udp.rs b/lib/opte/src/engine/udp.rs index 6fb87ec2..3fac26ab 100644 --- a/lib/opte/src/engine/udp.rs +++ b/lib/opte/src/engine/udp.rs @@ -6,60 +6,8 @@ //! UDP headers. -use crate::d_error::DError; -use crate::engine::checksum::Checksum; -use crate::engine::checksum::HeaderChecksum; -use crate::engine::headers::HeaderActionModify; -use crate::engine::headers::ModifyAction; -use crate::engine::headers::PushAction; -use crate::engine::headers::RawHeader; -use crate::engine::headers::UlpMetaModify; -use crate::engine::packet::PacketReadMut; -use crate::engine::packet::ReadErr; -use core::mem; -use opte_api::DYNAMIC_PORT; use serde::Deserialize; use serde::Serialize; -use zerocopy::AsBytes; -use zerocopy::FromBytes; -use zerocopy::FromZeroes; -use zerocopy::Ref; -use zerocopy::Unaligned; - -#[derive(Clone, Copy, Debug, Default, Eq, Ord, PartialEq, PartialOrd)] -pub struct UdpMeta { - pub src: u16, - pub dst: u16, - pub len: u16, - pub csum: [u8; 2], -} - -impl UdpMeta { - // This assumes the dst is large enough. - #[inline] - pub fn emit(&self, dst: &mut [u8]) { - debug_assert!(dst.len() >= UdpHdr::SIZE); - dst[0..2].copy_from_slice(&self.src.to_be_bytes()); - dst[2..4].copy_from_slice(&self.dst.to_be_bytes()); - dst[4..6].copy_from_slice(&self.len.to_be_bytes()); - dst[6..8].copy_from_slice(&self.csum); - } - - pub fn hdr_len(&self) -> usize { - UdpHdr::SIZE - } -} - -impl<'a> From<&UdpHdr<'a>> for UdpMeta { - fn from(udp: &UdpHdr) -> Self { - UdpMeta { - src: udp.src_port(), - dst: udp.dst_port(), - len: udp.len(), - csum: udp.csum_bytes(), - } - } -} #[derive( Clone, @@ -78,200 +26,8 @@ pub struct UdpPush { pub dst: u16, } -impl PushAction for UdpPush { - fn push(&self) -> UdpMeta { - UdpMeta { src: self.src, dst: self.dst, ..Default::default() } - } -} - #[derive(Clone, Debug, Deserialize, Serialize)] pub struct UdpMod { src: Option, dst: Option, } - -impl ModifyAction for UdpMod { - fn modify(&self, meta: &mut UdpMeta) { - if let Some(src) = self.src { - meta.src = src; - } - - if let Some(dst) = self.dst { - meta.dst = dst; - } - } -} - -impl HeaderActionModify for UdpMeta { - fn run_modify(&mut self, spec: &UlpMetaModify) { - if spec.generic.src_port.is_some() { - self.src = spec.generic.src_port.unwrap() - } - - if spec.generic.dst_port.is_some() { - self.dst = spec.generic.dst_port.unwrap() - } - } -} - -#[derive(Debug)] -pub struct UdpHdr<'a> { - base: Ref<&'a mut [u8], UdpHdrRaw>, -} - -impl<'a> UdpHdr<'a> { - pub const SIZE: usize = UdpHdrRaw::SIZE; - pub const CSUM_BEGIN_OFFSET: usize = 6; - pub const CSUM_END_OFFSET: usize = 8; - - pub fn bytes(&self) -> &[u8] { - self.base.bytes() - } - - pub fn csum_bytes(&self) -> [u8; 2] { - self.base.csum - } - - pub fn csum_minus_hdr(&self) -> Option { - if self.base.csum != [0; 2] { - let mut csum = Checksum::from(HeaderChecksum::wrap(self.base.csum)); - csum.sub_bytes(&self.base.bytes()[0..Self::CSUM_BEGIN_OFFSET]); - Some(csum) - } else { - None - } - } - - pub fn dst_port(&self) -> u16 { - u16::from_be_bytes(self.base.dst_port) - } - - /// Return the header length, in bytes. - pub fn hdr_len(&self) -> usize { - Self::SIZE - } - - pub fn parse<'b>( - rdr: &'b mut impl PacketReadMut<'a>, - ) -> Result { - let src = rdr.slice_mut(UdpHdrRaw::SIZE)?; - let udp = Self { base: UdpHdrRaw::new_mut(src)? }; - - let src_port = udp.src_port(); - if src_port == DYNAMIC_PORT { - return Err(UdpHdrError::BadSrcPort { src_port }); - } - - let dst_port = udp.dst_port(); - if dst_port == DYNAMIC_PORT { - return Err(UdpHdrError::BadDstPort { dst_port }); - } - - let length = udp.len(); - if length < Self::SIZE as u16 { - return Err(UdpHdrError::BadLength { length }); - } - - Ok(udp) - } - - pub fn set_csum(&mut self, csum: [u8; 2]) { - self.base.csum = csum; - } - - pub fn len(&self) -> u16 { - u16::from_be_bytes(self.base.length) - } - - /// Set the length, in bytes. - /// - /// The UDP length field includes both header and payload. - pub fn set_len(&mut self, len: u16) { - self.base.length = len.to_be_bytes(); - } - - pub fn set_pay_len(&mut self, len: u16) { - self.base.length = (Self::SIZE as u16 + len).to_be_bytes(); - } - - pub fn src_port(&self) -> u16 { - u16::from_be_bytes(self.base.src_port) - } -} - -#[derive(Clone, Copy, Debug, Eq, PartialEq, DError)] -#[derror(leaf_data = UdpHdrError::derror_data)] -pub enum UdpHdrError { - BadDstPort { dst_port: u16 }, - BadLength { length: u16 }, - BadSrcPort { src_port: u16 }, - ReadError(ReadErr), -} - -impl UdpHdrError { - fn derror_data(&self, data: &mut [u64]) { - data[0] = match self { - Self::BadDstPort { dst_port } => *dst_port as u64, - Self::BadLength { length } => *length as u64, - Self::BadSrcPort { src_port } => *src_port as u64, - _ => 0, - } - } -} - -impl From for UdpHdrError { - fn from(error: ReadErr) -> Self { - UdpHdrError::ReadError(error) - } -} - -/// Note: For now we keep this unaligned to be safe. -#[repr(C)] -#[derive(Clone, Debug, FromBytes, AsBytes, FromZeroes, Unaligned)] -pub struct UdpHdrRaw { - pub src_port: [u8; 2], - pub dst_port: [u8; 2], - pub length: [u8; 2], - pub csum: [u8; 2], -} - -impl UdpHdrRaw { - pub const SIZE: usize = mem::size_of::(); -} - -impl<'a> RawHeader<'a> for UdpHdrRaw { - #[inline] - fn new_mut(src: &mut [u8]) -> Result, ReadErr> { - debug_assert_eq!(src.len(), Self::SIZE); - let hdr = match Ref::new(src) { - Some(hdr) => hdr, - None => return Err(ReadErr::BadLayout), - }; - Ok(hdr) - } -} - -#[cfg(test)] -mod test { - use super::*; - use crate::engine::packet::Packet; - - #[test] - fn emit() { - let udp = UdpMeta { src: 5353, dst: 5353, len: 142, csum: [0; 2] }; - let len = udp.hdr_len(); - let mut pkt = Packet::alloc_and_expand(len); - let mut wtr = pkt.seg0_wtr(); - udp.emit(wtr.slice_mut(udp.hdr_len()).unwrap()); - assert_eq!(len, pkt.len()); - - #[rustfmt::skip] - let expected_bytes = [ - // source port + dest port - 0x14, 0xE9, 0x14, 0xE9, - // length + checksum - 0x00, 0x8E, 0x00, 0x00, - ]; - assert_eq!(&expected_bytes, pkt.seg_bytes(0)); - } -} diff --git a/lib/opte/src/lib.rs b/lib/opte/src/lib.rs index 970e02ee..42ec1668 100644 --- a/lib/opte/src/lib.rs +++ b/lib/opte/src/lib.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2024 Oxide Computer Company #![cfg_attr(not(feature = "std"), no_std)] #![allow(clippy::len_without_is_empty)] @@ -31,6 +31,8 @@ use alloc::boxed::Box; use core::fmt; use core::fmt::Display; +pub use ingot; + #[cfg(any(feature = "api", test))] pub mod api { pub use opte_api::*; @@ -190,8 +192,9 @@ mod opte_provider { // ================================================================ /// A logging provider provides the means to log messages to some -/// destination based on the context in which OPTE is running. For -/// example, in a unit test this could map to `println!`. In the +/// destination based on the context in which OPTE is running. +/// +/// For example, in a unit test this could map to `println!`. In the /// illumos kernel it would map to `cmn_err(9F)`. /// /// Logging levels are provided by [`LogLevel`]. These levels will map diff --git a/lib/oxide-vpc/.gitignore b/lib/oxide-vpc/.gitignore index 5d43dab0..e2ef55f9 100644 --- a/lib/oxide-vpc/.gitignore +++ b/lib/oxide-vpc/.gitignore @@ -6,3 +6,4 @@ overlay_guest_to_guest-phys-1.pcap overlay_guest_to_guest-phys-2.pcap dhcpv6_solicit_reply.pcap guest_to_internet_ipv[46].pcap +snat-v[46]-echo-id.pcap diff --git a/lib/oxide-vpc/src/api.rs b/lib/oxide-vpc/src/api.rs index 9e4be6a2..7f8464f5 100644 --- a/lib/oxide-vpc/src/api.rs +++ b/lib/oxide-vpc/src/api.rs @@ -347,10 +347,10 @@ impl From for GuestPhysAddr { /// * Drop: Packets matching this entry are dropped. /// /// * InternetGateway: Packets matching this entry are forwarded to -/// the internet. In the case of the Oxide Network the IG is not an -/// actual destination, but rather a configuration that determines how -/// we should NAT the flow. The address in the gateway is the source -/// address that is to be used. +/// the internet. In the case of the Oxide Network the IG is not an +/// actual destination, but rather a configuration that determines how +/// we should NAT the flow. The address in the gateway is the source +/// address that is to be used. /// /// * Ip: Packets matching this entry are forwarded to the specified IP. /// @@ -358,10 +358,10 @@ impl From for GuestPhysAddr { /// matches the destination IP type. /// /// * VpcSubnet: Packets matching this entry are forwarded to the -/// specified VPC Subnet. In the Oxide Network this is just an -/// abstraction, it's simply allowing one subnet to talk to another. -/// There is no separate VPC router process, the real routing is done -/// by the underlay. +/// specified VPC Subnet. In the Oxide Network this is just an +/// abstraction, it's simply allowing one subnet to talk to another. +/// There is no separate VPC router process, the real routing is done +/// by the underlay. #[derive(Clone, Debug, Copy, Deserialize, Serialize)] pub enum RouterTarget { Drop, @@ -846,7 +846,9 @@ impl FromStr for Address { )), Some(("ip", val)) => Ok(Address::Ip(val.parse()?)), Some(("subnet", val)) => Ok(Address::Subnet(val.parse()?)), - Some(("vni", val)) => Ok(Address::Vni(val.parse()?)), + Some(("vni", val)) => { + Ok(Address::Vni(val.parse().map_err(|e| format!("{e:?}"))?)) + } Some((key, _)) => Err(format!("invalid address type: {}", key)), }, } diff --git a/lib/oxide-vpc/src/engine/gateway/dhcp.rs b/lib/oxide-vpc/src/engine/gateway/dhcp.rs index ee616441..d10698e6 100644 --- a/lib/oxide-vpc/src/engine/gateway/dhcp.rs +++ b/lib/oxide-vpc/src/engine/gateway/dhcp.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2024 Oxide Computer Company //! The DHCP implementation of the Virtual Gateway. @@ -17,7 +17,7 @@ use opte::api::Ipv4PrefixLen; use opte::api::OpteError; use opte::api::SubnetRouterPair; use opte::engine::dhcp::DhcpAction; -use opte::engine::ip4::Ipv4Cidr; +use opte::engine::ip::v4::Ipv4Cidr; use opte::engine::layer::Layer; use opte::engine::rule::Action; use opte::engine::rule::Rule; diff --git a/lib/oxide-vpc/src/engine/gateway/mod.rs b/lib/oxide-vpc/src/engine/gateway/mod.rs index a3b04065..0067deb3 100644 --- a/lib/oxide-vpc/src/engine/gateway/mod.rs +++ b/lib/oxide-vpc/src/engine/gateway/mod.rs @@ -52,7 +52,6 @@ use alloc::sync::Arc; use alloc::vec::Vec; use core::fmt; use core::fmt::Display; -use core::marker::PhantomData; use opte::api::Direction; use opte::api::OpteError; use opte::engine::ether::EtherMod; @@ -61,7 +60,7 @@ use opte::engine::layer::DefaultAction; use opte::engine::layer::Layer; use opte::engine::layer::LayerActions; use opte::engine::packet::InnerFlowId; -use opte::engine::packet::PacketMeta; +use opte::engine::packet::MblkPacketData; use opte::engine::port::meta::ActionMeta; use opte::engine::port::PortBuilder; use opte::engine::port::Pos; @@ -144,14 +143,14 @@ impl StaticAction for RewriteSrcMac { &self, _dir: Direction, _flow_id: &InnerFlowId, - _packet_meta: &PacketMeta, + _packet_meta: &MblkPacketData, _action_meta: &mut ActionMeta, ) -> GenHtResult { Ok(AllowOrDeny::Allow(HdrTransform { - inner_ether: HeaderAction::Modify( - EtherMod { src: Some(self.gateway_mac), ..Default::default() }, - PhantomData, - ), + inner_ether: HeaderAction::Modify(EtherMod { + src: Some(self.gateway_mac), + ..Default::default() + }), ..Default::default() })) } diff --git a/lib/oxide-vpc/src/engine/mod.rs b/lib/oxide-vpc/src/engine/mod.rs index 516d54bd..94ef64cf 100644 --- a/lib/oxide-vpc/src/engine/mod.rs +++ b/lib/oxide-vpc/src/engine/mod.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2024 Oxide Computer Company pub mod firewall; pub mod gateway; @@ -13,31 +13,33 @@ pub mod print; pub mod router; use crate::cfg::VpcCfg; -use opte::engine::ether::EtherType; +use opte::engine::arp; +use opte::engine::arp::ArpEthIpv4Ref; +use opte::engine::arp::ArpOp; +use opte::engine::arp::ValidArpEthIpv4; +use opte::engine::arp::ARP_HTYPE_ETHERNET; +use opte::engine::ether::EthernetRef; use opte::engine::flow_table::FlowTable; -use opte::engine::headers::EncapMeta; -use opte::engine::ip4::Protocol; -use opte::engine::packet::HeaderOffsets; +use opte::engine::ip::v4::Ipv4Addr; +use opte::engine::packet::FullParsed; use opte::engine::packet::InnerFlowId; use opte::engine::packet::Packet; -use opte::engine::packet::PacketInfo; -use opte::engine::packet::PacketMeta; -use opte::engine::packet::PacketRead; -use opte::engine::packet::PacketReaderMut; use opte::engine::packet::ParseError; -use opte::engine::packet::Parsed; +use opte::engine::packet::Pullup; +use opte::engine::parse::ValidGeneveOverV6; +use opte::engine::parse::ValidNoEncap; use opte::engine::port::UftEntry; use opte::engine::Direction; use opte::engine::HdlPktAction; use opte::engine::HdlPktError; use opte::engine::NetworkImpl; use opte::engine::NetworkParser; - -use opte::engine::arp; -use opte::engine::arp::ArpEthIpv4; -use opte::engine::arp::ArpOp; -use opte::engine::ether::ETHER_TYPE_IPV4; -use opte::engine::ip4::Ipv4Addr; +use opte::ingot::ethernet::Ethertype; +use opte::ingot::types::HeaderParse; +use opte::ingot::types::IntoBufPointer; +use opte::ingot::types::Parsed as IngotParsed; +use opte::ingot::types::Read; +use zerocopy::ByteSliceMut; #[derive(Clone, Copy, Debug, Default)] pub struct VpcParser {} @@ -53,35 +55,39 @@ pub struct VpcNetwork { pub cfg: VpcCfg, } -// The ARP HTYPE for Ethernet. -const HTYPE_ETHER: u16 = 1; - -fn is_arp_req(arp: &ArpEthIpv4) -> bool { - arp.htype == HTYPE_ETHER - && arp.ptype == ETHER_TYPE_IPV4 - && arp.op == ArpOp::Request +fn is_arp_req(arp: &impl ArpEthIpv4Ref) -> bool { + arp.htype() == ARP_HTYPE_ETHERNET + && arp.ptype() == Ethertype::IPV4 + && arp.op() == ArpOp::REQUEST } -fn is_arp_req_for_tpa(tpa: Ipv4Addr, arp: &ArpEthIpv4) -> bool { - is_arp_req(arp) && arp.tpa == tpa +fn is_arp_req_for_tpa(tpa: Ipv4Addr, arp: &impl ArpEthIpv4Ref) -> bool { + is_arp_req(arp) && arp.tpa() == tpa } impl VpcNetwork { - fn handle_arp_out( + fn handle_arp_out<'a, T: Read + Pullup + 'a>( &self, - pkt: &mut Packet, - ) -> Result { - let arp_start = pkt.hdr_offsets().inner.ether.hdr_len; - let mut rdr = pkt.get_rdr_mut(); - rdr.seek(arp_start).unwrap(); - let arp = ArpEthIpv4::parse(&mut rdr) - .map_err(|_| HdlPktError("outbound ARP"))?; + pkt: &mut Packet>, + ) -> Result + where + T::Chunk: ByteSliceMut + IntoBufPointer<'a>, + { + let body = pkt.body().ok_or(HdlPktError("outbound ARP (no body)"))?; + + let (arp, ..) = ValidArpEthIpv4::parse(body) + .map_err(|_| HdlPktError("outbound ARP (parse)"))?; + + if !arp.values_valid() { + return Err(HdlPktError("outbound ARP (parse -- bad values)")); + } + let gw_ip = self.cfg.ipv4_cfg().unwrap().gateway_ip; if is_arp_req_for_tpa(gw_ip, &arp) { let gw_mac = self.cfg.gateway_mac; - let hp = arp::gen_arp_reply(gw_mac, gw_ip, arp.sha, arp.spa); + let hp = arp::gen_arp_reply(gw_mac, gw_ip, arp.sha(), arp.spa()); return Ok(HdlPktAction::Hairpin(hp)); } @@ -92,15 +98,18 @@ impl VpcNetwork { impl NetworkImpl for VpcNetwork { type Parser = VpcParser; - fn handle_pkt( + fn handle_pkt<'a, T: Read + Pullup + 'a>( &self, dir: Direction, - pkt: &mut Packet, + pkt: &mut Packet>, _uft_in: &FlowTable>, _uft_out: &FlowTable>, - ) -> Result { - match (dir, pkt.meta().inner.ether.ether_type) { - (Direction::Out, EtherType::Arp) => self.handle_arp_out(pkt), + ) -> Result + where + T::Chunk: ByteSliceMut + IntoBufPointer<'a>, + { + match (dir, pkt.meta().inner_ether().ethertype()) { + (Direction::Out, Ethertype::ARP) => self.handle_arp_out(pkt), _ => Ok(HdlPktAction::Deny), } @@ -112,156 +121,28 @@ impl NetworkImpl for VpcNetwork { } impl NetworkParser for VpcParser { - fn parse_outbound( - &self, - rdr: &mut PacketReaderMut, - ) -> Result { - let mut meta = PacketMeta::default(); - let mut offsets = HeaderOffsets::default(); - let (ether_hi, _hdr) = Packet::parse_ether(rdr)?; - meta.inner.ether = ether_hi.meta; - offsets.inner.ether = ether_hi.offset; - let ether_type = ether_hi.meta.ether_type; - - // Allocate a message block and copy in the squashed data. Provide - // enough extra space for geneve encapsulation to not require an extra - // allocation later on. 128 is based on - // - 18 byte ethernet header (vlan space) - // - 40 byte ipv6 header - // - 8 byte udp header - // - 8 byte geneve header - // - space for geneve options - const EXTRA_SPACE: Option = Some(128); - - let (ip_hi, pseudo_csum) = match ether_type { - EtherType::Arp => { - return Ok(PacketInfo { - meta, - offsets, - body_csum: None, - extra_hdr_space: EXTRA_SPACE, - }); - } - - EtherType::Ipv4 => { - let (ip_hi, hdr) = Packet::parse_ip4(rdr)?; - (ip_hi, hdr.pseudo_csum()) - } - - EtherType::Ipv6 => { - let (ip_hi, hdr) = Packet::parse_ip6(rdr)?; - (ip_hi, hdr.pseudo_csum()) - } - - _ => return Err(ParseError::UnexpectedEtherType(ether_type)), - }; - - meta.inner.ip = Some(ip_hi.meta); - offsets.inner.ip = Some(ip_hi.offset); - - let (ulp_hi, ulp_hdr) = match ip_hi.meta.proto() { - Protocol::ICMP => Packet::parse_icmp(rdr)?, - Protocol::ICMPv6 => Packet::parse_icmp6(rdr)?, - Protocol::TCP => Packet::parse_tcp(rdr)?, - Protocol::UDP => Packet::parse_udp(rdr)?, - proto => return Err(ParseError::UnexpectedProtocol(proto)), - }; - - let use_pseudo = ulp_hi.meta.is_pseudoheader_in_csum(); - meta.inner.ulp = Some(ulp_hi.meta); - offsets.inner.ulp = Some(ulp_hi.offset); + type InMeta = ValidGeneveOverV6; + type OutMeta = ValidNoEncap; - let body_csum = if let Some(mut csum) = ulp_hdr.csum_minus_hdr() { - if use_pseudo { - csum -= pseudo_csum; - } - Some(csum) - } else { - None - }; - - Ok(PacketInfo { - meta, - offsets, - body_csum, - extra_hdr_space: EXTRA_SPACE, - }) + #[inline(always)] + fn parse_outbound<'a, T: Read + 'a>( + &self, + rdr: T, + ) -> Result, T>, ParseError> + where + T::Chunk: opte::ingot::types::IntoBufPointer<'a> + ByteSliceMut, + { + Ok(ValidNoEncap::parse_read(rdr)?) } - fn parse_inbound( + #[inline(always)] + fn parse_inbound<'a, T: Read + 'a>( &self, - rdr: &mut PacketReaderMut, - ) -> Result { - let mut meta = PacketMeta::default(); - let mut offsets = HeaderOffsets::default(); - - let (outer_ether_hi, _hdr) = Packet::parse_ether(rdr)?; - meta.outer.ether = Some(outer_ether_hi.meta); - offsets.outer.ether = Some(outer_ether_hi.offset); - let outer_et = outer_ether_hi.meta.ether_type; - - // VPC traffic is delivered exclusively on an IPv6 + - // Geneve underlay. - let outer_ip_hi = match outer_et { - EtherType::Ipv6 => Packet::parse_ip6(rdr)?.0, - - _ => return Err(ParseError::UnexpectedEtherType(outer_et)), - }; - - meta.outer.ip = Some(outer_ip_hi.meta); - offsets.outer.ip = Some(outer_ip_hi.offset); - - let (geneve_hi, _geneve_hdr) = match outer_ip_hi.meta.proto() { - Protocol::UDP => Packet::parse_geneve(rdr)?, - proto => return Err(ParseError::UnexpectedProtocol(proto)), - }; - - meta.outer.encap = Some(EncapMeta::from(geneve_hi.meta)); - offsets.outer.encap = Some(geneve_hi.offset); - - let (inner_ether_hi, _) = Packet::parse_ether(rdr)?; - meta.inner.ether = inner_ether_hi.meta; - offsets.inner.ether = inner_ether_hi.offset; - let inner_et = inner_ether_hi.meta.ether_type; - - let (inner_ip_hi, pseudo_csum) = match inner_et { - EtherType::Ipv4 => { - let (ip_hi, hdr) = Packet::parse_ip4(rdr)?; - (ip_hi, hdr.pseudo_csum()) - } - - EtherType::Ipv6 => { - let (ip_hi, hdr) = Packet::parse_ip6(rdr)?; - (ip_hi, hdr.pseudo_csum()) - } - - _ => return Err(ParseError::UnexpectedEtherType(inner_et)), - }; - - meta.inner.ip = Some(inner_ip_hi.meta); - offsets.inner.ip = Some(inner_ip_hi.offset); - - let (inner_ulp_hi, inner_ulp_hdr) = match inner_ip_hi.meta.proto() { - Protocol::ICMP => Packet::parse_icmp(rdr)?, - Protocol::ICMPv6 => Packet::parse_icmp6(rdr)?, - Protocol::TCP => Packet::parse_tcp(rdr)?, - Protocol::UDP => Packet::parse_udp(rdr)?, - proto => return Err(ParseError::UnexpectedProtocol(proto)), - }; - - let use_pseudo = inner_ulp_hi.meta.is_pseudoheader_in_csum(); - meta.inner.ulp = Some(inner_ulp_hi.meta); - offsets.inner.ulp = Some(inner_ulp_hi.offset); - - let body_csum = if let Some(mut csum) = inner_ulp_hdr.csum_minus_hdr() { - if use_pseudo { - csum -= pseudo_csum; - } - Some(csum) - } else { - None - }; - - Ok(PacketInfo { meta, offsets, body_csum, extra_hdr_space: None }) + rdr: T, + ) -> Result, T>, ParseError> + where + T::Chunk: opte::ingot::types::IntoBufPointer<'a> + ByteSliceMut, + { + Ok(ValidGeneveOverV6::parse_read(rdr)?) } } diff --git a/lib/oxide-vpc/src/engine/overlay.rs b/lib/oxide-vpc/src/engine/overlay.rs index a3c70948..1255d52f 100644 --- a/lib/oxide-vpc/src/engine/overlay.rs +++ b/lib/oxide-vpc/src/engine/overlay.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2024 Oxide Computer Company //! The Oxide Network VPC Overlay. //! @@ -22,7 +22,6 @@ use alloc::string::ToString; use alloc::sync::Arc; use alloc::vec::Vec; use core::fmt; -use core::marker::PhantomData; use opte::api::Direction; use opte::api::Ipv4Addr; use opte::api::Ipv4Cidr; @@ -38,21 +37,20 @@ use opte::engine::ether::EtherMod; use opte::engine::ether::EtherType; use opte::engine::geneve::GenevePush; use opte::engine::geneve::Vni; -use opte::engine::headers::EncapMeta; use opte::engine::headers::EncapPush; use opte::engine::headers::HeaderAction; use opte::engine::headers::IpAddr; use opte::engine::headers::IpCidr; use opte::engine::headers::IpPush; -use opte::engine::ip4::Protocol; -use opte::engine::ip6::Ipv6Addr; -use opte::engine::ip6::Ipv6Cidr; -use opte::engine::ip6::Ipv6Push; +use opte::engine::ip::v4::Protocol; +use opte::engine::ip::v6::Ipv6Addr; +use opte::engine::ip::v6::Ipv6Cidr; +use opte::engine::ip::v6::Ipv6Push; use opte::engine::layer::DefaultAction; use opte::engine::layer::Layer; use opte::engine::layer::LayerActions; use opte::engine::packet::InnerFlowId; -use opte::engine::packet::PacketMeta; +use opte::engine::packet::MblkPacketData; use opte::engine::port::meta::ActionMeta; use opte::engine::port::meta::ActionMetaValue; use opte::engine::port::PortBuilder; @@ -205,9 +203,11 @@ impl StaticAction for EncapAction { // The encap action is only used for outgoing. _dir: Direction, flow_id: &InnerFlowId, - pkt_meta: &PacketMeta, + _pkt_meta: &MblkPacketData, action_meta: &mut ActionMeta, ) -> GenHtResult { + let f_hash = flow_id.crc32(); + // The router layer determines a RouterTarget and stores it in // the meta map. We need to map this virtual target to a // physical one. @@ -243,16 +243,7 @@ impl StaticAction for EncapAction { // Hash the packet onto a route target. This is a very // rudimentary mechanism. Should level-up to an ECMP // algorithm with well known statistical properties. - let hash = match pkt_meta.l4_hash() { - Some(h) => h, - None => { - return Err(GenHtError::Unexpected { - msg: "could not compute l4 hash for packet" - .to_string(), - }); - } - }; - let hash = hash as usize; + let hash = f_hash as usize; let target = match phys.iter().nth(hash % phys.len()) { Some(target) => target, None => return Ok(AllowOrDeny::Deny), @@ -321,22 +312,16 @@ impl StaticAction for EncapAction { let tfrm = HdrTransform { name: ENCAP_NAME.to_string(), // We leave the outer src/dst up to the driver. - outer_ether: HeaderAction::Push( - EtherMeta { - src: MacAddr::ZERO, - dst: MacAddr::ZERO, - ether_type: EtherType::Ipv6, - }, - PhantomData, - ), - outer_ip: HeaderAction::Push( - IpPush::from(Ipv6Push { - src: self.phys_ip_src, - dst: phys_target.ip, - proto: Protocol::UDP, - }), - PhantomData, - ), + outer_ether: HeaderAction::Push(EtherMeta { + src: MacAddr::ZERO, + dst: MacAddr::ZERO, + ether_type: EtherType::Ipv6, + }), + outer_ip: HeaderAction::Push(IpPush::from(Ipv6Push { + src: self.phys_ip_src, + dst: phys_target.ip, + proto: Protocol::UDP, + })), // XXX Geneve uses the UDP source port as a flow label // value for the purposes of ECMP -- a hash of the // 5-tuple. However, when using Geneve in IPv6 one could @@ -346,17 +331,21 @@ impl StaticAction for EncapAction { // network is always IPv6, perhaps we should just use // that? For now I defer the choice and leave this // hard-coded. - outer_encap: HeaderAction::Push( - EncapPush::from(GenevePush { - vni: phys_target.vni, - entropy: 7777, - }), - PhantomData, - ), - inner_ether: HeaderAction::Modify( - EtherMod { dst: Some(phys_target.ether), ..Default::default() }, - PhantomData, - ), + // + // (kyle) -- I think we should use both, mainly because + // we can expose the extra entropy to devices which can use it. + // We may want flow id to be symmetric, however... + // It's worth keeping in mind that Chelsio's RSS picks us a ring + // based on Toeplitz hash of the 5-tuple, so we need to write into + // there regardless. I don't believe it *looks* at v6 flowid. + outer_encap: HeaderAction::Push(EncapPush::from(GenevePush { + vni: phys_target.vni, + entropy: flow_id.crc32() as u16, + })), + inner_ether: HeaderAction::Modify(EtherMod { + dst: Some(phys_target.ether), + ..Default::default() + }), ..Default::default() }; @@ -393,22 +382,20 @@ impl StaticAction for DecapAction { // The decap action is only used for inbound. _dir: Direction, _flow_id: &InnerFlowId, - pkt_meta: &PacketMeta, + pkt_meta: &MblkPacketData, action_meta: &mut ActionMeta, ) -> GenHtResult { - match &pkt_meta.outer.encap { - Some(EncapMeta::Geneve(geneve)) => { + match pkt_meta.outer_encap_geneve_vni_and_origin() { + Some((vni, oxide_external_pkt)) => { // We only conditionally add this metadata because the // `Address::VNI` filter uses it to select VPC-originated // traffic. // External packets carry an extra Geneve tag from the // switch during NAT -- if found, `oxide_external_packet` // is filled. - if !geneve.oxide_external_pkt { - action_meta.insert( - ACTION_META_VNI.to_string(), - geneve.vni.to_string(), - ); + if !oxide_external_pkt { + action_meta + .insert(ACTION_META_VNI.to_string(), vni.to_string()); } } diff --git a/lib/oxide-vpc/tests/firewall_tests.rs b/lib/oxide-vpc/tests/firewall_tests.rs index 345b8f61..22a4fa98 100644 --- a/lib/oxide-vpc/tests/firewall_tests.rs +++ b/lib/oxide-vpc/tests/firewall_tests.rs @@ -1,3 +1,10 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2024 Oxide Computer Company + +use opte::ddi::mblk::MsgBlk; use opte_test_utils as common; use common::*; @@ -32,9 +39,10 @@ fn firewall_replace_rules() { // Run the SYN packet through g1's port in the outbound direction // and verify if passes the firewall. // ================================================================ - let mut pkt1 = http_syn(&g1_cfg, &g2_cfg); - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let mut pkt1_m = http_syn(&g1_cfg, &g2_cfg); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt1); + expect_modified!(res, pkt1_m); incr!( g1, [ @@ -71,9 +79,10 @@ fn firewall_replace_rules() { ] ); - let mut pkt2 = http_syn(&g1_cfg, &g2_cfg); - let res = g1.port.process(Out, &mut pkt2, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let mut pkt2_m = http_syn(&g1_cfg, &g2_cfg); + let pkt2 = parse_outbound(&mut pkt2_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt2); + expect_modified!(res, pkt2_m); incr!( g1, [ @@ -88,14 +97,16 @@ fn firewall_replace_rules() { // of the real process we first dump the raw bytes of g1's // outgoing packet and then reparse it. // ================================================================ - let mblk = pkt2.unwrap_mblk(); - let mut pkt3 = unsafe { - Packet::wrap_mblk_and_parse(mblk, In, VpcParser::new()).unwrap() - }; - let mut pkt3_copy = - Packet::copy(&pkt3.all_bytes()).parse(In, VpcParser::new()).unwrap(); - let res = g2.port.process(In, &mut pkt3, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + + let mut pkt3_m = pkt2_m; + let pkt3_bytes = pkt3_m.copy_all(); + let mut pkt3_copy_m = MsgBlk::copy(pkt3_bytes); + + let pkt3 = parse_inbound(&mut pkt3_m, VpcParser {}).unwrap(); + let pkt3_copy = parse_inbound(&mut pkt3_copy_m, VpcParser {}).unwrap(); + + let res = g2.port.process(In, pkt3); + expect_modified!(res, pkt3_m); incr!( g2, [ @@ -130,7 +141,7 @@ fn firewall_replace_rules() { // Verify the packet is dropped and that the firewall flow table // entry (along with its dual) was invalidated. - let res = g2.port.process(In, &mut pkt3_copy, ActionMeta::new()); + let res = g2.port.process(In, pkt3_copy); assert_drop!( res, DropReason::Layer { name: "firewall", reason: DenyReason::Rule } @@ -181,20 +192,21 @@ fn firewall_vni_inbound() { mac: g2_cfg.guest_mac, vni: g2_cfg.vni, }; - let mut pkt1 = http_syn2( + let mut pkt1_m = http_syn2( g2_cfg.guest_mac, g2_cfg.ipv4().private_ip, g1_cfg.guest_mac, g1_cfg.ipv4().private_ip, ); - pkt1 = encap(pkt1, phys_src, phys_dst); + pkt1_m = encap(pkt1_m, phys_src, phys_dst); + let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); // ================================================================ // Verify that g1's firewall rejects this packet, as the default // VPC firewall rules dictate that only inbound traffic from the // same VPC should be allowed. // ================================================================ - let res = g1.port.process(In, &mut pkt1, ActionMeta::new()); + let res = g1.port.process(In, pkt1); assert_drop!( res, DropReason::Layer { name: "firewall", reason: DenyReason::Default } @@ -222,15 +234,16 @@ fn firewall_vni_inbound() { mac: g2_cfg.guest_mac, vni: g2_cfg.vni, }; - let mut pkt2 = http_syn2( + let mut pkt2_m = http_syn2( g2_cfg.guest_mac, g2_cfg.ipv4().private_ip, g1_cfg.guest_mac, g1_cfg.ipv4().private_ip, ); - pkt2 = encap(pkt2, phys_src, phys_dst); - let res = g1.port.process(In, &mut pkt2, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + pkt2_m = encap(pkt2_m, phys_src, phys_dst); + let pkt2 = parse_inbound(&mut pkt2_m, VpcParser {}).unwrap(); + let res = g1.port.process(In, pkt2); + expect_modified!(res, pkt2_m); incr!( g1, [ @@ -283,28 +296,18 @@ fn firewall_vni_outbound() { // ================================================================ // Create a packet that is leaving g1 with g2 as its destination. // ================================================================ - let phys_src = TestIpPhys { - ip: g1_cfg.phys_ip, - mac: g1_cfg.guest_mac, - vni: g1_cfg.vni, - }; - let phys_dst = TestIpPhys { - ip: g2_cfg.phys_ip, - mac: g2_cfg.guest_mac, - vni: g2_cfg.vni, - }; - let mut pkt1 = http_syn2( + let mut pkt1_m = http_syn2( g1_cfg.guest_mac, g1_cfg.ipv4().private_ip, g1_cfg.guest_mac, g2_cfg.ipv4().private_ip, ); - pkt1 = encap(pkt1, phys_src, phys_dst); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); // ================================================================ // Try to send the packet and verify the firewall does not allow it. // ================================================================ - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); + let res = g1.port.process(Out, pkt1); assert_drop!( res, DropReason::Layer { name: "firewall", reason: DenyReason::Rule } @@ -356,20 +359,21 @@ fn firewall_external_inbound() { vni: g1_cfg.vni, }; - let mut pkt1 = http_syn2( + let mut pkt1_m = http_syn2( BS_MAC_ADDR, std::net::IpAddr::from([1, 1, 1, 1]), g1_cfg.guest_mac, g1_cfg.ipv4().private_ip, ); - pkt1 = encap_external(pkt1, bsvc_phys, guest_phys); + pkt1_m = encap_external(pkt1_m, bsvc_phys, guest_phys); + let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); // ================================================================ // Verify that g1's firewall rejects this packet, as the default // VPC firewall rules dictate that only inbound traffic from the // same VPC should be allowed. // ================================================================ - let res = g1.port.process(In, &mut pkt1, ActionMeta::new()); + let res = g1.port.process(In, pkt1); assert_drop!( res, DropReason::Layer { name: "firewall", reason: DenyReason::Default } diff --git a/lib/oxide-vpc/tests/fuzz_regression.rs b/lib/oxide-vpc/tests/fuzz_regression.rs index 0e159429..2f454b78 100644 --- a/lib/oxide-vpc/tests/fuzz_regression.rs +++ b/lib/oxide-vpc/tests/fuzz_regression.rs @@ -9,8 +9,8 @@ //! These tests capture past known-bad packets which have made some part //! of OPTE panic in the past, and ensure that it does not today. +use opte::ddi::mblk::MsgBlk; use opte::engine::packet::Packet; -use opte::engine::Direction; use oxide_vpc::engine::VpcParser; use serde::Deserialize; use serde::Serialize; @@ -110,19 +110,15 @@ fn run_tests( #[test] fn parse_in_regression() { run_tests("parse_in", |data| { - let mut pkt = Packet::alloc_and_expand(data.len()); - let mut wtr = pkt.seg0_wtr(); - wtr.write(data).unwrap(); - let _ = pkt.parse(Direction::In, VpcParser {}); + let mut msg = MsgBlk::copy(data); + let _ = Packet::parse_inbound(msg.iter_mut(), VpcParser {}); }); } #[test] fn parse_out_regression() { run_tests("parse_out", |data| { - let mut pkt = Packet::alloc_and_expand(data.len()); - let mut wtr = pkt.seg0_wtr(); - wtr.write(data).unwrap(); - let _ = pkt.parse(Direction::Out, VpcParser {}); + let mut msg = MsgBlk::copy(data); + let _ = Packet::parse_outbound(msg.iter_mut(), VpcParser {}); }); } diff --git a/lib/oxide-vpc/tests/integration_tests.rs b/lib/oxide-vpc/tests/integration_tests.rs index 74d70e31..f796408a 100644 --- a/lib/oxide-vpc/tests/integration_tests.rs +++ b/lib/oxide-vpc/tests/integration_tests.rs @@ -13,43 +13,45 @@ //! OPTE pipeline by single-stepping the packets in each capture and //! verifying that OPTE processing produces the expected bytes. -use opte_test_utils as common; - use common::icmp::*; use common::*; use opte::api::MacAddr; use opte::api::OpteError; +use opte::ddi::mblk::MsgBlk; use opte::ddi::time::Moment; use opte::engine::arp::ArpEthIpv4; -use opte::engine::arp::ArpEthIpv4Raw; +use opte::engine::arp::ArpEthIpv4Ref; +use opte::engine::arp::ValidArpEthIpv4; +use opte::engine::arp::ARP_HTYPE_ETHERNET; use opte::engine::dhcpv6; -use opte::engine::ether::EtherHdr; -use opte::engine::ether::EtherHdrRaw; -use opte::engine::ether::EtherMeta; +use opte::engine::ether::Ethernet; +use opte::engine::ether::EthernetRef; use opte::engine::flow_table::FLOW_DEF_EXPIRE_SECS; use opte::engine::geneve::Vni; -use opte::engine::headers::EncapMeta; -use opte::engine::headers::IpMeta; -use opte::engine::headers::UlpMeta; -use opte::engine::icmp::IcmpHdr; -use opte::engine::ip4::Ipv4Addr; -use opte::engine::ip4::Ipv4Hdr; -use opte::engine::ip4::Ipv4HdrError; -use opte::engine::ip4::Ipv4Meta; -use opte::engine::ip4::Protocol; -use opte::engine::ip6::Ipv6Hdr; -use opte::engine::ip6::Ipv6Meta; -use opte::engine::packet::Initialized; +use opte::engine::ip::v4::Ipv4Addr; +use opte::engine::ip::v4::Ipv4Ref; +use opte::engine::ip::v6::Ipv6; +use opte::engine::ip::v6::Ipv6Ref; +use opte::engine::ip::ValidL3; +use opte::engine::ip::L3; use opte::engine::packet::InnerFlowId; +use opte::engine::packet::MblkFullParsed; +use opte::engine::packet::MismatchError; use opte::engine::packet::Packet; -use opte::engine::packet::PacketRead; -use opte::engine::packet::Parsed; +use opte::engine::parse::ValidUlp; use opte::engine::port::ProcessError; use opte::engine::tcp::TcpState; use opte::engine::tcp::TIME_WAIT_EXPIRE_SECS; -use opte::engine::udp::UdpHdr; -use opte::engine::udp::UdpMeta; use opte::engine::Direction; +use opte::ingot::geneve::GeneveRef; +use opte::ingot::icmp::IcmpV6Ref; +use opte::ingot::tcp::TcpRef; +use opte::ingot::types::Emit; +use opte::ingot::types::HeaderLen; +use opte::ingot::types::HeaderParse; +use opte::ingot::udp::Udp; +use opte::ingot::udp::UdpRef; +use opte_test_utils as common; use oxide_vpc::api::ExternalIpCfg; use oxide_vpc::api::FirewallRule; use oxide_vpc::api::RouterClass; @@ -59,6 +61,7 @@ use pcap::*; use smoltcp::phy::ChecksumCapabilities as CsumCapab; use smoltcp::wire::Icmpv4Packet; use smoltcp::wire::Icmpv4Repr; +use smoltcp::wire::Icmpv6Message; use smoltcp::wire::Icmpv6Packet; use smoltcp::wire::Icmpv6Repr; use smoltcp::wire::IpAddress; @@ -71,14 +74,6 @@ use std::collections::BTreeMap; use std::prelude::v1::*; use std::time::Duration; use uuid::Uuid; -use zerocopy::AsBytes; - -const IP4_SZ: usize = EtherHdr::SIZE + Ipv4Hdr::BASE_SIZE; -const IP6_SZ: usize = EtherHdr::SIZE + Ipv6Hdr::BASE_SIZE; -const TCP4_SZ: usize = IP4_SZ + TcpHdr::BASE_SIZE; -const TCP6_SZ: usize = IP6_SZ + TcpHdr::BASE_SIZE; - -const VPC_ENCAP_SZ: usize = IP6_SZ + UdpHdr::SIZE + GeneveHdr::BASE_SIZE; // If we are running `cargo test`, then make sure to // register the USDT probes before running any tests. @@ -141,14 +136,17 @@ fn port_transition_running() { // Try processing the packet while taking the port through a Ready // -> Running. // ================================================================ - let mut pkt1 = tcp_telnet_syn(&g1_cfg, &g2_cfg); - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); + let mut pkt1_m = tcp_telnet_syn(&g1_cfg, &g2_cfg); + + let pkt1 = parse_outbound(&mut pkt1_m, GenericUlp {}).unwrap(); + let res = g1.port.process(Out, pkt1); assert!(matches!(res, Err(ProcessError::BadState(_)))); assert_port!(g1); g1.port.start(); set!(g1, "port_state=running"); - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let pkt1 = parse_outbound(&mut pkt1_m, GenericUlp {}).unwrap(); + let res = g1.port.process(Out, pkt1); + assert!(matches!(res, Ok(Modified(_)))); incr!( g1, [ @@ -173,11 +171,12 @@ fn port_transition_reset() { // -> Running -> Ready transition. Verify that flows are cleared // but rules remain. // ================================================================ - let mut pkt1 = tcp_telnet_syn(&g1_cfg, &g2_cfg); + let mut pkt1_m = tcp_telnet_syn(&g1_cfg, &g2_cfg); + let pkt1 = parse_outbound(&mut pkt1_m, GenericUlp {}).unwrap(); g1.port.start(); set!(g1, "port_state=running"); - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let res = g1.port.process(Out, pkt1); + expect_modified!(res, pkt1_m); incr!( g1, [ @@ -188,7 +187,8 @@ fn port_transition_reset() { ); g1.port.reset(); update!(g1, ["set:port_state=ready", "zero_flows"]); - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); + let pkt1 = parse_outbound(&mut pkt1_m, GenericUlp {}).unwrap(); + let res = g1.port.process(Out, pkt1); assert!(matches!(res, Err(ProcessError::BadState(_)))); assert_port!(g1); } @@ -223,9 +223,10 @@ fn port_transition_pause() { // ================================================================ // Send the HTTP SYN. // ================================================================ - let mut pkt1 = http_syn(&g2_cfg, &g1_cfg); - let res = g2.port.process(Out, &mut pkt1, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let mut pkt1_m = http_syn(&g2_cfg, &g1_cfg); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g2.port.process(Out, pkt1); + expect_modified!(res, pkt1_m); incr!( g2, [ @@ -235,8 +236,9 @@ fn port_transition_pause() { ] ); - let res = g1.port.process(In, &mut pkt1, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(In, pkt1); + expect_modified!(res, pkt1_m); incr!( g1, [ @@ -274,7 +276,9 @@ fn port_transition_pause() { ), Err(OpteError::BadState(_)) )); - let res = g2.port.process(Out, &mut pkt1, ActionMeta::new()); + + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g2.port.process(Out, pkt1); assert!(matches!(res, Err(ProcessError::BadState(_)))); let fw_rule: FirewallRule = "action=allow priority=10 dir=in protocol=tcp port=22".parse().unwrap(); @@ -303,13 +307,15 @@ fn port_transition_pause() { g2.port.start(); set!(g2, "port_state=running"); - let mut pkt2 = http_syn_ack(&g1_cfg, &g2_cfg); - let res = g1.port.process(Out, &mut pkt2, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let mut pkt2_m = http_syn_ack(&g1_cfg, &g2_cfg); + let pkt2 = parse_outbound(&mut pkt2_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt2); + expect_modified!(res, pkt2_m); incr!(g1, ["uft.out", "stats.port.out_modified, stats.port.out_uft_miss"]); - let res = g2.port.process(In, &mut pkt2, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let pkt2 = parse_inbound(&mut pkt2_m, VpcParser {}).unwrap(); + let res = g2.port.process(In, pkt2); + expect_modified!(res, pkt2_m); incr!(g2, ["uft.in", "stats.port.in_modified, stats.port.in_uft_miss"]); } @@ -360,7 +366,7 @@ fn gateway_icmp4_ping() { // ================================================================ // Generate an ICMP Echo Request from G1 to Virtual GW // ================================================================ - let mut pkt1 = gen_icmp_echo_req( + let mut pkt1_m = gen_icmp_echo_req( g1_cfg.guest_mac, g1_cfg.gateway_mac, g1_cfg.ipv4_cfg().unwrap().private_ip.into(), @@ -370,15 +376,16 @@ fn gateway_icmp4_ping() { &data[..], 1, ); - pcap.add_pkt(&pkt1); + pcap.add_pkt(&pkt1_m); // ================================================================ // Run the Echo Request through g1's port in the outbound // direction and verify it results in an Echo Reply Hairpin packet // back to guest. // ================================================================ - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); - let hp = match res { + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt1); + let mut hp = match res { Ok(Hairpin(hp)) => hp, _ => panic!("expected Hairpin, got {:?}", res), }; @@ -386,32 +393,33 @@ fn gateway_icmp4_ping() { // In this case we are parsing a hairpin reply, so we can't use // the VpcParser since it would expect any inbound packet to be // encapsulated. - let reply = hp.parse(In, GenericUlp {}).unwrap(); - pcap.add_pkt(&reply); - assert_eq!(reply.body_offset(), IP4_SZ + IcmpHdr::SIZE); - assert_eq!(reply.body_seg(), 0); + pcap.add_pkt(&hp); + // let reply = hp.parse(In, GenericUlp {}).unwrap(); + let reply = parse_inbound(&mut hp, GenericUlp {}).unwrap().to_full_meta(); let meta = reply.meta(); - assert!(meta.outer.ether.is_none()); - assert!(meta.outer.ip.is_none()); - assert!(meta.outer.encap.is_none()); - - let eth = meta.inner.ether; - assert_eq!(eth.src, g1_cfg.gateway_mac); - assert_eq!(eth.dst, g1_cfg.guest_mac); - - match meta.inner.ip.as_ref().unwrap() { - IpMeta::Ip4(ip4) => { - assert_eq!(ip4.src, g1_cfg.ipv4_cfg().unwrap().gateway_ip); - assert_eq!(ip4.dst, g1_cfg.ipv4_cfg().unwrap().private_ip); - assert_eq!(ip4.proto, Protocol::ICMP); + assert!(meta.outer_ether().is_none()); + assert!(meta.outer_ip().is_none()); + assert!(meta.outer_encap_geneve_vni_and_origin().is_none()); + + let eth = meta.inner_ether(); + assert_eq!(eth.source(), g1_cfg.gateway_mac); + assert_eq!(eth.destination(), g1_cfg.guest_mac); + + match meta.inner_l3().as_ref().unwrap() { + L3::Ipv4(ip4) => { + assert_eq!(ip4.source(), g1_cfg.ipv4_cfg().unwrap().gateway_ip); + assert_eq!( + ip4.destination(), + g1_cfg.ipv4_cfg().unwrap().private_ip + ); + assert_eq!(ip4.protocol(), IngotIpProto::ICMP); } - ip6 => panic!("expected inner IPv4 metadata, got IPv6: {:?}", ip6), + L3::Ipv6(_) => panic!("expected inner IPv4 metadata, got IPv6"), } - let mut rdr = reply.get_body_rdr(); - rdr.seek_back(IcmpHdr::SIZE).unwrap(); - let reply_body = rdr.copy_remaining(); + let mut reply_body = meta.inner_ulp().expect("ICMPv4 is a ULP").emit_vec(); + reply.meta().append_remaining(&mut reply_body); let reply_pkt = Icmpv4Packet::new_checked(&reply_body).unwrap(); let mut csum = CsumCapab::ignored(); csum.ipv4 = smoltcp::phy::Checksum::Rx; @@ -432,6 +440,46 @@ fn gateway_icmp4_ping() { } } +// Verify that guest packet bodies are correctly pulled up if they run +// past the same segment(s) containing the rest of the headers. +#[test] +fn packet_body_pullup() { + let g1_cfg = g1_cfg(); + let mut g1 = oxide_net_setup("g1_port", &g1_cfg, None, None); + g1.port.start(); + set!(g1, "port_state=running"); + let ident = 7; + let seq_no = 777; + let data = c"...did Sephiroth do this?"; + + // ================================================================ + // Generate an ICMP Echo Request from G1 to Virtual GW + // ================================================================ + let mut pkt1_m = gen_icmp_echo_req( + g1_cfg.guest_mac, + g1_cfg.gateway_mac, + g1_cfg.ipv4_cfg().unwrap().private_ip.into(), + g1_cfg.ipv4_cfg().unwrap().gateway_ip.into(), + ident, + seq_no, + data.to_bytes_with_nul(), + // Instruct the packet builder to split the body 8 bytes in. + 4, + ); + + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt1); + let hp = match res { + Ok(Hairpin(hp)) => hp, + _ => panic!("expected Hairpin, got {:?}", res), + }; + + // Verify that the contents are correctly replicated. + let (_hdrs, new_body) = + hp.split_at(hp.len() - data.to_bytes_with_nul().len()); + assert_eq!(new_body, data.to_bytes_with_nul()); +} + // Try to send a TCP packet from one guest to another; but in this // case the guest has not route to the other guest, resulting in the // packet being dropped. @@ -452,8 +500,9 @@ fn guest_to_guest_no_route() { ) .unwrap(); update!(g1, ["incr:epoch", "set:router.rules.out=0"]); - let mut pkt1 = http_syn(&g1_cfg, &g2_cfg); - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); + let mut pkt1_m = http_syn(&g1_cfg, &g2_cfg); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt1); assert_drop!( res, DropReason::Layer { name: "router", reason: DenyReason::Default } @@ -512,18 +561,19 @@ fn guest_to_guest() { PcapBuilder::new("overlay_guest_to_guest-guest-2.pcap"); let mut pcap_phys2 = PcapBuilder::new("overlay_guest_to_guest-phys-2.pcap"); - let mut pkt1 = http_syn(&g1_cfg, &g2_cfg); - pcap_guest1.add_pkt(&pkt1); - let ulp_csum_b4 = pkt1.meta().inner.ulp.unwrap().csum(); - let ip_csum_b4 = pkt1.meta().inner.ip.unwrap().csum(); + let mut pkt1_m = http_syn(&g1_cfg, &g2_cfg); + pcap_guest1.add_pkt(&pkt1_m); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let ulp_csum_b4 = pkt1.meta().inner_ulp.as_ref().unwrap().csum(); + let ip_csum_b4 = pkt1.meta().inner_l3.as_ref().unwrap().csum(); // ================================================================ // Run the packet through g1's port in the outbound direction and // verify the resulting packet meets expectations. // ================================================================ - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); - pcap_phys1.add_pkt(&pkt1); - assert!(matches!(res, Ok(Modified))); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt1); + expect_modified!(res, pkt1_m); incr!( g1, [ @@ -532,64 +582,54 @@ fn guest_to_guest() { "stats.port.out_modified, stats.port.out_uft_miss", ] ); + pcap_phys1.add_pkt(&pkt1_m); + + let nodes = pkt1_m.iter(); + assert_eq!(nodes.count(), 2); - assert_eq!(pkt1.body_offset(), VPC_ENCAP_SZ + TCP4_SZ + HTTP_SYN_OPTS_LEN); - assert_eq!(pkt1.body_seg(), 1); - let ulp_csum_after = pkt1.meta().inner.ulp.unwrap().csum(); - let ip_csum_after = pkt1.meta().inner.ip.unwrap().csum(); + let pkt2 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); + let ulp_csum_after = pkt2.meta().inner_ulp.csum(); + let ip_csum_after = pkt2.meta().inner_l3.csum(); assert_eq!(ulp_csum_after, ulp_csum_b4); assert_eq!(ip_csum_after, ip_csum_b4); - let meta = pkt1.meta(); - match meta.outer.ether.as_ref() { - Some(eth) => { - assert_eq!(eth.src, MacAddr::ZERO); - assert_eq!(eth.dst, MacAddr::ZERO); - } - - None => panic!("no outer ether header"), - } - - match meta.outer.ip.as_ref().unwrap() { - IpMeta::Ip6(ip6) => { - assert_eq!(ip6.src, g1_cfg.phys_ip); - assert_eq!(ip6.dst, g2_cfg.phys_ip); - } + let meta = pkt2.meta(); + assert_eq!(meta.outer_eth.source(), MacAddr::ZERO); + assert_eq!(meta.outer_eth.destination(), MacAddr::ZERO); - val => panic!("expected outer IPv6, got: {:?}", val), - } - - match meta.outer.encap.as_ref() { - Some(EncapMeta::Geneve(geneve)) => { - assert_eq!(geneve.entropy, 7777); - assert_eq!(geneve.vni, Vni::new(g1_cfg.vni).unwrap()); - } + assert_eq!(meta.outer_v6.source(), g1_cfg.phys_ip); + assert_eq!(meta.outer_v6.destination(), g2_cfg.phys_ip); - None => panic!("expected outer Geneve metadata"), - } + // Geneve entropy. + assert_eq!(meta.outer_udp.source(), 12700); + assert_eq!(meta.outer_encap.vni(), g1_cfg.vni); - let eth = meta.inner.ether; - assert_eq!(eth.src, g1_cfg.guest_mac); - assert_eq!(eth.dst, g2_cfg.guest_mac); - assert_eq!(eth.ether_type, EtherType::Ipv4); + let eth = &meta.inner_eth; + assert_eq!(eth.source(), g1_cfg.guest_mac); + assert_eq!(eth.destination(), g2_cfg.guest_mac); + assert_eq!(eth.ethertype(), Ethertype::IPV4); - match meta.inner.ip.as_ref().unwrap() { - IpMeta::Ip4(ip4) => { - assert_eq!(ip4.src, g1_cfg.ipv4_cfg().unwrap().private_ip); - assert_eq!(ip4.dst, g2_cfg.ipv4_cfg().unwrap().private_ip); - assert_eq!(ip4.proto, Protocol::TCP); + match &meta.inner_l3 { + ValidL3::Ipv4(ip4) => { + assert_eq!(ip4.source(), g1_cfg.ipv4_cfg().unwrap().private_ip); + assert_eq!( + ip4.destination(), + g2_cfg.ipv4_cfg().unwrap().private_ip + ); + assert_eq!(ip4.protocol(), IngotIpProto::TCP); } - - ip6 => panic!("expected inner IPv4 metadata, got IPv6: {:?}", ip6), + _ => panic!("expected inner IPv4 metadata, got IPv6"), } - match meta.inner.ulp.as_ref().unwrap() { - UlpMeta::Tcp(tcp) => { - assert_eq!(tcp.src, 44490); - assert_eq!(tcp.dst, 80); + match &meta.inner_ulp { + ValidUlp::Tcp(tcp) => { + assert_eq!(tcp.source(), 44490); + assert_eq!(tcp.destination(), 80); } - ulp => panic!("expected inner TCP metadata, got: {:?}", ulp), + // todo: derive Debug on choice? + // ulp => panic!("expected inner TCP metadata, got: {:?}", ulp), + _ => panic!("expected inner TCP metadata, got (other)"), } // ================================================================ @@ -598,15 +638,13 @@ fn guest_to_guest() { // of the real process we first dump the raw bytes of g1's // outgoing packet and then reparse it. // ================================================================ - let mblk = pkt1.unwrap_mblk(); - let mut pkt2 = unsafe { - Packet::wrap_mblk_and_parse(mblk, In, VpcParser::new()).unwrap() - }; - pcap_phys2.add_pkt(&pkt2); + let mut pkt2_m = pkt1_m; + pcap_phys2.add_pkt(&pkt2_m); + let pkt2 = parse_inbound(&mut pkt2_m, VpcParser {}).unwrap(); - let res = g2.port.process(In, &mut pkt2, ActionMeta::new()); - pcap_guest2.add_pkt(&pkt2); - assert!(matches!(res, Ok(Modified))); + let res = g2.port.process(In, pkt2); + expect_modified!(res, pkt2_m); + pcap_guest2.add_pkt(&pkt2_m); incr!( g2, [ @@ -615,36 +653,41 @@ fn guest_to_guest() { "stats.port.in_modified, stats.port.in_uft_miss", ] ); - assert_eq!(pkt2.body_offset(), TCP4_SZ + HTTP_SYN_OPTS_LEN); - assert_eq!(pkt2.body_seg(), 0); + // assert_eq!(pkt2.body_offset(), TCP4_SZ + HTTP_SYN_OPTS_LEN); + // assert_eq!(pkt2.body_seg(), 0); + let pkt2 = parse_outbound(&mut pkt2_m, VpcParser {}).unwrap(); let g2_meta = pkt2.meta(); - assert!(g2_meta.outer.ether.is_none()); - assert!(g2_meta.outer.ip.is_none()); - assert!(g2_meta.outer.encap.is_none()); - - let g2_eth = g2_meta.inner.ether; - assert_eq!(g2_eth.src, g1_cfg.gateway_mac); - assert_eq!(g2_eth.dst, g2_cfg.guest_mac); - assert_eq!(g2_eth.ether_type, EtherType::Ipv4); - - match g2_meta.inner.ip.as_ref().unwrap() { - IpMeta::Ip4(ip4) => { - assert_eq!(ip4.src, g1_cfg.ipv4_cfg().unwrap().private_ip); - assert_eq!(ip4.dst, g2_cfg.ipv4_cfg().unwrap().private_ip); - assert_eq!(ip4.proto, Protocol::TCP); - } - ip6 => panic!("expected inner IPv4 metadata, got IPv6: {:?}", ip6), + // TODO: can we have a convenience method that verifies that the + // emitspec was a rewind/drop from the head of the pkt? + + let g2_eth = &g2_meta.inner_eth; + assert_eq!(g2_eth.source(), g1_cfg.gateway_mac); + assert_eq!(g2_eth.destination(), g2_cfg.guest_mac); + assert_eq!(g2_eth.ethertype(), Ethertype::IPV4); + + match &g2_meta.inner_l3 { + Some(ValidL3::Ipv4(ip4)) => { + assert_eq!(ip4.source(), g1_cfg.ipv4_cfg().unwrap().private_ip); + assert_eq!( + ip4.destination(), + g2_cfg.ipv4_cfg().unwrap().private_ip + ); + assert_eq!(ip4.protocol(), IngotIpProto::TCP); + } + _ => panic!("expected inner IPv4 metadata, got IPv6"), } - match g2_meta.inner.ulp.as_ref().unwrap() { - UlpMeta::Tcp(tcp) => { - assert_eq!(tcp.src, 44490); - assert_eq!(tcp.dst, 80); + match &g2_meta.inner_ulp { + Some(ValidUlp::Tcp(tcp)) => { + assert_eq!(tcp.source(), 44490); + assert_eq!(tcp.destination(), 80); } - ulp => panic!("expected inner TCP metadata, got: {:?}", ulp), + // todo: derive Debug on choice? + // ulp => panic!("expected inner TCP metadata, got: {:?}", ulp), + _ => panic!("expected inner TCP metadata, got (other)"), } } @@ -684,7 +727,8 @@ fn guest_to_guest_diff_vpc_no_peer() { // verify the packet is dropped. // ================================================================ let mut g1_pkt = http_syn(&g1_cfg, &g2_cfg); - let res = g1.port.process(Out, &mut g1_pkt, ActionMeta::new()); + let pkt1 = parse_outbound(&mut g1_pkt, GenericUlp {}).unwrap(); + let res = g1.port.process(Out, pkt1); assert_drop!( res, DropReason::Layer { name: "overlay", reason: DenyReason::Action } @@ -702,6 +746,7 @@ fn guest_to_guest_diff_vpc_no_peer() { // Verify that a guest can communicate with the internet over IPv4. #[test] fn guest_to_internet_ipv4() { + let mut pcap_guest = PcapBuilder::new("guest_to_internet_ipv4.pcap"); let g1_cfg = g1_cfg(); let mut g1 = oxide_net_setup("g1_port", &g1_cfg, None, None); g1.port.start(); @@ -721,19 +766,22 @@ fn guest_to_internet_ipv4() { // Generate a TCP SYN packet from g1 to zinascii.com // ================================================================ let dst_ip = "52.10.128.69".parse().unwrap(); - let mut pkt1 = http_syn2( + let mut pkt1_m = http_syn2( g1_cfg.guest_mac, g1_cfg.ipv4_cfg().unwrap().private_ip, GW_MAC_ADDR, dst_ip, ); + pcap_guest.add_pkt(&pkt1_m); + + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); // ================================================================ // Run the packet through g1's port in the outbound direction and // verify the resulting packet meets expectations. // ================================================================ - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); - assert!(matches!(res, Ok(Modified)), "bad result: {:?}", res); + let res = g1.port.process(Out, pkt1); + expect_modified!(res, pkt1_m); incr!( g1, [ @@ -743,89 +791,73 @@ fn guest_to_internet_ipv4() { "stats.port.out_modified, stats.port.out_uft_miss", ] ); - assert_eq!(pkt1.body_offset(), VPC_ENCAP_SZ + TCP4_SZ + HTTP_SYN_OPTS_LEN); - assert_eq!(pkt1.body_seg(), 1); - let meta = pkt1.meta(); - match meta.outer.ether.as_ref() { - Some(eth) => { - assert_eq!(eth.src, MacAddr::ZERO); - assert_eq!(eth.dst, MacAddr::ZERO); - } - - None => panic!("no outer ether header"), - } - let inner_bytes = match meta.outer.ip.as_ref().unwrap() { - IpMeta::Ip6(ip6) => { - assert_eq!(ip6.src, g1_cfg.phys_ip); + // Inbound parse asserts specifically that we have: + // - Ethernet + // - Ipv6 + // - Udp (dstport 6081) + // - Geneve + // - (Inner ULP headers) + let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); + let meta = pkt1.meta(); - // Check that the encoded payload length in the outer header is - // correct, and matches the actual number of bytes in the rest of - // the packet. - let mut bytes = pkt1.get_rdr().copy_remaining(); - assert_eq!( - ip6.pay_len as usize, - bytes.len() - EtherHdr::SIZE - Ipv6Hdr::BASE_SIZE - ); + assert_eq!(meta.outer_eth.source(), MacAddr::ZERO); + assert_eq!(meta.outer_eth.destination(), MacAddr::ZERO); - // Strip off the encapsulation headers - bytes.drain(..VPC_ENCAP_SZ); - bytes - } + assert_eq!(meta.outer_v6.source(), g1_cfg.phys_ip); + // Check that the encoded payload length in the outer header is + // correct, and matches the actual number of bytes in the rest of + // the packet. + let len_post_v6 = + pkt1.len() - (&meta.outer_eth, &meta.outer_v6).packet_length(); + assert_eq!(meta.outer_v6.payload_len() as usize, len_post_v6); - val => panic!("expected outer IPv6, got: {:?}", val), - }; - - match meta.outer.encap.as_ref() { - Some(EncapMeta::Geneve(geneve)) => { - assert_eq!(geneve.entropy, 7777); - } + assert_eq!(meta.outer_udp.source(), 24329); + assert_eq!(meta.outer_udp.length() as usize, len_post_v6); - None => panic!("expected outer Geneve metadata"), - } + assert_eq!(meta.inner_eth.source(), g1_cfg.guest_mac); + assert_eq!(meta.inner_eth.ethertype(), Ethertype::IPV4); - let eth = meta.inner.ether; - assert_eq!(eth.src, g1_cfg.guest_mac); - assert_eq!(eth.ether_type, EtherType::Ipv4); + match &meta.inner_l3 { + ValidL3::Ipv4(ip4) => { + assert_eq!(ip4.source(), g1_cfg.snat().external_ip); + assert_eq!(ip4.destination(), dst_ip); + assert_eq!(ip4.protocol(), IngotIpProto::TCP); - match meta.inner.ip.as_ref().unwrap() { - IpMeta::Ip4(ip4) => { - assert_eq!(ip4.src, g1_cfg.snat().external_ip); - assert_eq!(ip4.dst, dst_ip); - assert_eq!(ip4.proto, Protocol::TCP); + let inner_len = len_post_v6 + - (&meta.outer_udp, &meta.outer_encap, &meta.inner_eth) + .packet_length(); // Check that the encoded payload length in the inner header is // correct, and matches the actual number of bytes in the rest of // the packet. // IPv4 total length _DOES_ include the IPv4 header. - assert_eq!( - ip4.total_len as usize, - inner_bytes.len() - EtherHdr::SIZE, - ); + assert_eq!(ip4.total_len() as usize, inner_len,); } - - ip6 => panic!("expected inner IPv4 metadata, got IPv6: {:?}", ip6), + _ => panic!("expected inner IPv4 metadata, got IPv6"), } - match meta.inner.ulp.as_ref().unwrap() { - UlpMeta::Tcp(tcp) => { + match &meta.inner_ulp { + ValidUlp::Tcp(tcp) => { assert_eq!( - tcp.src, - g1_cfg.snat().ports.clone().next_back().unwrap(), + tcp.source(), + g1_cfg.snat().ports.clone().next_back().unwrap() ); - assert_eq!(tcp.dst, 80); + assert_eq!(tcp.destination(), 80); } - ulp => panic!("expected inner TCP metadata, got: {:?}", ulp), + // todo: derive Debug on choice? + // ulp => panic!("expected inner TCP metadata, got: {:?}", ulp), + _ => panic!("expected inner TCP metadata, got (other)"), } - let mut pcap_guest = PcapBuilder::new("guest_to_internet_ipv4.pcap"); - pcap_guest.add_pkt(&pkt1); + pcap_guest.add_pkt(&pkt1_m); } // Verify that a guest can communicate with the internet over IPv6. #[test] fn guest_to_internet_ipv6() { + let mut pcap_guest = PcapBuilder::new("guest_to_internet_ipv6.pcap"); let g1_cfg = g1_cfg(); let mut g1 = oxide_net_setup("g1_port", &g1_cfg, None, None); g1.port.start(); @@ -845,19 +877,21 @@ fn guest_to_internet_ipv6() { // Generate a TCP SYN packet from g1 to example.com // ================================================================ let dst_ip = "2606:2800:220:1:248:1893:25c8:1946".parse().unwrap(); - let mut pkt1 = http_syn2( + let mut pkt1_m = http_syn2( g1_cfg.guest_mac, g1_cfg.ipv6_cfg().unwrap().private_ip, GW_MAC_ADDR, dst_ip, ); + pcap_guest.add_pkt(&pkt1_m); // ================================================================ // Run the packet through g1's port in the outbound direction and // verify the resulting packet meets expectations. // ================================================================ - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); - assert!(matches!(res, Ok(Modified)), "bad result: {:?}", res); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt1); + expect_modified!(res, pkt1_m); incr!( g1, [ @@ -868,85 +902,64 @@ fn guest_to_internet_ipv6() { ] ); - assert_eq!(pkt1.body_offset(), VPC_ENCAP_SZ + TCP6_SZ + HTTP_SYN_OPTS_LEN); - assert_eq!(pkt1.body_seg(), 1); + let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); let meta = pkt1.meta(); - match meta.outer.ether.as_ref() { - Some(eth) => { - assert_eq!(eth.src, MacAddr::ZERO); - assert_eq!(eth.dst, MacAddr::ZERO); - } - - None => panic!("no outer ether header"), - } - - let inner_bytes = match meta.outer.ip.as_ref().unwrap() { - IpMeta::Ip6(ip6) => { - assert_eq!(ip6.src, g1_cfg.phys_ip); - // Check that the encoded payload length in the outer header is - // correct, and matches the actual number of bytes in the rest of - // the packet. - let mut bytes = pkt1.get_rdr().copy_remaining(); - assert_eq!( - ip6.pay_len as usize, - bytes.len() - EtherHdr::SIZE - Ipv6Hdr::BASE_SIZE - ); - - // Strip off the encapsulation headers - bytes.drain(..VPC_ENCAP_SZ); - bytes - } - - val => panic!("expected outer IPv6, got: {:?}", val), - }; - - match meta.outer.encap.as_ref() { - Some(EncapMeta::Geneve(geneve)) => { - assert_eq!(geneve.entropy, 7777); - } - - None => panic!("expected outer Geneve metadata"), - } - - let eth = meta.inner.ether; - assert_eq!(eth.src, g1_cfg.guest_mac); - assert_eq!(eth.ether_type, EtherType::Ipv6); - - match meta.inner.ip.as_ref().unwrap() { - IpMeta::Ip6(ip6) => { - assert_eq!(ip6.src, g1_cfg.snat6().external_ip); - assert_eq!(ip6.dst, dst_ip); - assert_eq!(ip6.proto, Protocol::TCP); - assert_eq!(ip6.next_hdr, IpProtocol::Tcp); + assert_eq!(meta.outer_eth.source(), MacAddr::ZERO); + assert_eq!(meta.outer_eth.destination(), MacAddr::ZERO); + + assert_eq!(meta.outer_v6.source(), g1_cfg.phys_ip); + // Check that the encoded payload length in the outer header is + // correct, and matches the actual number of bytes in the rest of + // the packet. + let len_post_v6 = + pkt1.len() - (&meta.outer_eth, &meta.outer_v6).packet_length(); + assert_eq!(meta.outer_v6.payload_len() as usize, len_post_v6); + + assert_eq!(meta.outer_udp.source(), 63246); + assert_eq!(meta.outer_udp.length() as usize, len_post_v6); + + assert_eq!(meta.inner_eth.source(), g1_cfg.guest_mac); + assert_eq!(meta.inner_eth.ethertype(), Ethertype::IPV6); + + match &meta.inner_l3 { + ValidL3::Ipv6(ip6) => { + assert_eq!(ip6.source(), g1_cfg.snat6().external_ip); + assert_eq!(ip6.destination(), dst_ip); + assert_eq!(ip6.next_header(), IngotIpProto::TCP); + + let inner_len = len_post_v6 + - ( + &meta.outer_udp, + &meta.outer_encap, + &meta.inner_eth, + &meta.inner_l3, + ) + .packet_length(); // Check that the encoded payload length in the inner header is // correct, and matches the actual number of bytes in the rest of // the packet. // IPv6 payload length _DOES NOT_ include the IPv6 header. - assert_eq!( - ip6.pay_len as usize, - inner_bytes.len() - EtherHdr::SIZE - Ipv6Hdr::BASE_SIZE - ); + assert_eq!(ip6.payload_len() as usize, inner_len); } - - ip4 => panic!("expected inner IPv6 metadata, got IPv4: {:?}", ip4), + _ => panic!("expected inner IPv4 metadata, got IPv6"), } - match meta.inner.ulp.as_ref().unwrap() { - UlpMeta::Tcp(tcp) => { + match &meta.inner_ulp { + ValidUlp::Tcp(tcp) => { assert_eq!( - tcp.src, - g1_cfg.snat6().ports.clone().next_back().unwrap(), + tcp.source(), + g1_cfg.snat6().ports.clone().next_back().unwrap() ); - assert_eq!(tcp.dst, 80); + assert_eq!(tcp.destination(), 80); } - ulp => panic!("expected inner TCP metadata, got: {:?}", ulp), + // todo: derive Debug on choice? + // ulp => panic!("expected inner TCP metadata, got: {:?}", ulp), + _ => panic!("expected inner TCP metadata, got (other)"), } - - let mut pcap_guest = PcapBuilder::new("guest_to_internet_ipv6.pcap"); - pcap_guest.add_pkt(&pkt1); + pcap_guest.add_pkt(&pkt1_m); } fn multi_external_setup( @@ -1106,9 +1119,10 @@ fn check_external_ip_inbound_behaviour( flow_port, 80, ); - let mut pkt1 = encap_external(pkt1, bsvc_phys, g1_phys); + let mut pkt1_m = encap_external(pkt1, bsvc_phys, g1_phys); + let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); - let res = port.port.process(In, &mut pkt1, ActionMeta::new()); + let res = port.port.process(In, pkt1); if old_ip_gone { // If we lose an external IP, the failure mode is obvious: // invalidate the action, do not rewrite dst IP to target the @@ -1127,10 +1141,7 @@ fn check_external_ip_inbound_behaviour( ] ); } else { - assert!( - matches!(res, Ok(Modified)), - "bad result for ip {ext_ip:?}: {res:?}" - ); + expect_modified!(res, pkt1_m); let rules = [ "firewall.flows.out, firewall.flows.in", "nat.flows.out, nat.flows.in", @@ -1144,8 +1155,11 @@ fn check_external_ip_inbound_behaviour( IpAddr::Ip4(_) => { let private_ip = cfg.ipv4().private_ip; if !old_ip_gone { + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}) + .unwrap() + .to_full_meta(); assert_eq!( - pkt1.meta().inner_ip4().unwrap().dst, + pkt1.meta().inner_ip4().unwrap().destination(), private_ip ); } @@ -1154,8 +1168,11 @@ fn check_external_ip_inbound_behaviour( IpAddr::Ip6(_) => { let private_ip = cfg.ipv6().private_ip; if !old_ip_gone { + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}) + .unwrap() + .to_full_meta(); assert_eq!( - pkt1.meta().inner_ip6().unwrap().dst, + pkt1.meta().inner_ip6().unwrap().destination(), private_ip ); } @@ -1175,20 +1192,23 @@ fn check_external_ip_inbound_behaviour( // IP (ephemeral) that the wrong src_ip will be selected (as it will // draw from a separate pool). // ================================================================ - let mut pkt2 = http_syn_ack2( + let mut pkt2_m = http_syn_ack2( cfg.guest_mac, private_ip, GW_MAC_ADDR, partner_ip, flow_port, ); - let res = port.port.process(Out, &mut pkt2, ActionMeta::new()); + let pkt2 = parse_outbound(&mut pkt2_m, VpcParser {}).unwrap(); + let res = port.port.process(Out, pkt2); + expect_modified!(res, pkt2_m); + let pkt2 = + parse_inbound(&mut pkt2_m, VpcParser {}).unwrap().to_full_meta(); if old_ip_gone { // Failure mode here is different (assuming we have at least one // external IP). The packet must fail to send via the old IP, // invalidate the entry, and then choose the new external IP. - assert!(matches!(res, Ok(Modified)), "bad result: {:?}", res); update!( port, [ @@ -1201,18 +1221,17 @@ fn check_external_ip_inbound_behaviour( match ext_ip { IpAddr::Ip4(ip) => { - let chosen_ip = pkt2.meta().inner_ip4().unwrap().src; + let chosen_ip = pkt2.meta().inner_ip4().unwrap().source(); assert_ne!(chosen_ip, ip); assert_ne!(IpAddr::from(chosen_ip), private_ip); } IpAddr::Ip6(ip) => { - let chosen_ip = pkt2.meta().inner_ip6().unwrap().src; + let chosen_ip = pkt2.meta().inner_ip6().unwrap().source(); assert_ne!(chosen_ip, ip); assert_ne!(IpAddr::from(chosen_ip), private_ip); } }; } else { - assert!(matches!(res, Ok(Modified)), "bad result: {:?}", res); update!( port, [ @@ -1222,10 +1241,10 @@ fn check_external_ip_inbound_behaviour( ); match ext_ip { IpAddr::Ip4(ip) => { - assert_eq!(pkt2.meta().inner_ip4().unwrap().src, ip); + assert_eq!(pkt2.meta().inner_ip4().unwrap().source(), ip); } IpAddr::Ip6(ip) => { - assert_eq!(pkt2.meta().inner_ip6().unwrap().src, ip); + assert_eq!(pkt2.meta().inner_ip6().unwrap().source(), ip); } }; } @@ -1245,17 +1264,6 @@ fn external_ip_receive_and_reply_on_all() { fn external_ip_balanced_over_floating_ips() { let (mut g1, g1_cfg, ext_v4, ext_v6) = multi_external_ip_setup(8, true); - let bsvc_phys = TestIpPhys { - ip: BS_IP_ADDR, - mac: BS_MAC_ADDR, - vni: Vni::new(BOUNDARY_SERVICES_VNI).unwrap(), - }; - let g1_phys = TestIpPhys { - ip: g1_cfg.phys_ip, - mac: g1_cfg.guest_mac, - vni: g1_cfg.vni, - }; - let partner_ipv4: IpAddr = "93.184.216.34".parse().unwrap(); let partner_ipv6: IpAddr = "2606:2800:220:1:248:1893:25c8:1946".parse().unwrap(); @@ -1274,7 +1282,7 @@ fn external_ip_balanced_over_floating_ips() { IpAddr::Ip6(_) => g1_cfg.ipv6().private_ip.into(), }; - let pkt = http_syn3( + let mut pkt_m = http_syn3( g1_cfg.guest_mac, private_ip, g1_cfg.gateway_mac, @@ -1282,10 +1290,10 @@ fn external_ip_balanced_over_floating_ips() { flow_port, 80, ); - let mut pkt = encap_external(pkt, bsvc_phys, g1_phys); + let pkt = parse_outbound(&mut pkt_m, VpcParser {}).unwrap(); - let res = g1.port.process(Out, &mut pkt, ActionMeta::new()); - assert!(matches!(res, Ok(Modified)), "bad result: {res:?}"); + let res = g1.port.process(Out, pkt); + expect_modified!(res, pkt_m); incr!( g1, [ @@ -1296,12 +1304,15 @@ fn external_ip_balanced_over_floating_ips() { ] ); + let pkt = + parse_inbound(&mut pkt_m, VpcParser {}).unwrap().to_full_meta(); + match partner_ip { IpAddr::Ip4(_) => { - seen_v4s.push(pkt.meta().inner_ip4().unwrap().src); + seen_v4s.push(pkt.meta().inner_ip4().unwrap().source()); } IpAddr::Ip6(_) => { - seen_v6s.push(pkt.meta().inner_ip6().unwrap().src); + seen_v6s.push(pkt.meta().inner_ip6().unwrap().source()); } } } @@ -1384,13 +1395,11 @@ fn external_ip_epoch_affinity_preserved() { }; let pkt1 = http_syn2(BS_MAC_ADDR, partner_ip, g1_cfg.guest_mac, ext_ip); - let mut pkt1 = encap_external(pkt1, bsvc_phys, g1_phys); + let mut pkt1_m = encap_external(pkt1, bsvc_phys, g1_phys); + let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); - let res = g1.port.process(In, &mut pkt1, ActionMeta::new()); - assert!( - matches!(res, Ok(Modified)), - "bad result for ip {ext_ip:?}: {res:?}" - ); + let res = g1.port.process(In, pkt1); + expect_modified!(res, pkt1_m); incr!( g1, [ @@ -1413,15 +1422,16 @@ fn external_ip_epoch_affinity_preserved() { // The reply packet must still originate from the ephemeral port // after an epoch change. // ================================================================ - let mut pkt2 = http_syn_ack2( + let mut pkt2_m = http_syn_ack2( g1_cfg.guest_mac, private_ip, GW_MAC_ADDR, partner_ip, 44490, ); - let res = g1.port.process(Out, &mut pkt2, ActionMeta::new()); - assert!(matches!(res, Ok(Modified)), "bad result: {:?}", res); + let pkt2 = parse_outbound(&mut pkt2_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt2); + expect_modified!(res, pkt2_m); update!( g1, [ @@ -1429,12 +1439,15 @@ fn external_ip_epoch_affinity_preserved() { "incr:stats.port.out_modified, stats.port.out_uft_miss", ] ); + + let pkt2 = + parse_inbound(&mut pkt2_m, VpcParser {}).unwrap().to_full_meta(); match ext_ip { IpAddr::Ip4(ip) => { - assert_eq!(pkt2.meta().inner_ip4().unwrap().src, ip); + assert_eq!(pkt2.meta().inner_ip4().unwrap().source(), ip); } IpAddr::Ip6(ip) => { - assert_eq!(pkt2.meta().inner_ip6().unwrap().src, ip); + assert_eq!(pkt2.meta().inner_ip6().unwrap().source(), ip); } }; } @@ -1518,22 +1531,25 @@ struct IcmpSnatParams { } fn unpack_and_verify_icmp( - pkt: &Packet, + pkt: &mut MsgBlk, cfg: &VpcCfg, params: &IcmpSnatParams, dir: Direction, seq_no: u16, - body_seg: usize, ) { - let meta = pkt.meta(); + // Note the reversed direction -- parse the expected *output* format. + let parsed = match dir { + In => parse_outbound(pkt, VpcParser {}).unwrap().to_full_meta(), + Out => parse_inbound(pkt, VpcParser {}).unwrap().to_full_meta(), + }; + let meta = parsed.meta(); - let (src_eth, dst_eth, src_ip, dst_ip, encapped, ident) = match dir { + let (src_eth, dst_eth, src_ip, dst_ip, ident) = match dir { Direction::Out => ( cfg.guest_mac, BS_MAC_ADDR, params.public_ip, params.partner_ip, - true, params.snat_port, ), Direction::In => ( @@ -1541,59 +1557,57 @@ fn unpack_and_verify_icmp( cfg.guest_mac, params.partner_ip, params.private_ip, - false, params.icmp_id, ), }; - let eth = meta.inner.ether; - assert_eq!(eth.src, src_eth); - assert_eq!(eth.dst, dst_eth); + let eth = meta.inner_ether(); + assert_eq!(eth.source(), src_eth); + assert_eq!(eth.destination(), dst_eth); - match (dst_ip, meta.inner.ip.as_ref().unwrap()) { - (IpAddr::Ip4(_), IpMeta::Ip4(meta)) => { - assert_eq!(eth.ether_type, EtherType::Ipv4); - assert_eq!(IpAddr::from(meta.src), src_ip); - assert_eq!(IpAddr::from(meta.dst), dst_ip); - assert_eq!(meta.proto, Protocol::ICMP); + match (dst_ip, meta.inner_l3().as_ref().unwrap()) { + (IpAddr::Ip4(_), L3::Ipv4(meta)) => { + assert_eq!(eth.ethertype(), Ethertype::IPV4); + assert_eq!(IpAddr::from(meta.source()), src_ip); + assert_eq!(IpAddr::from(meta.destination()), dst_ip); + assert_eq!(meta.protocol(), IngotIpProto::ICMP); - unpack_and_verify_icmp4(pkt, ident, seq_no, encapped, body_seg); + unpack_and_verify_icmp4(&parsed, ident, seq_no); } - (IpAddr::Ip6(_), IpMeta::Ip6(meta)) => { - assert_eq!(eth.ether_type, EtherType::Ipv6); - assert_eq!(IpAddr::from(meta.src), src_ip); - assert_eq!(IpAddr::from(meta.dst), dst_ip); - assert_eq!(meta.proto, Protocol::ICMPv6); + (IpAddr::Ip6(_), L3::Ipv6(meta)) => { + assert_eq!(eth.ethertype(), Ethertype::IPV6); + assert_eq!(IpAddr::from(meta.source()), src_ip); + assert_eq!(IpAddr::from(meta.destination()), dst_ip); + assert_eq!(meta.next_header(), IngotIpProto::ICMP_V6); unpack_and_verify_icmp6( - pkt, ident, seq_no, encapped, body_seg, meta.src, meta.dst, + &parsed, + ident, + seq_no, + meta.source(), + meta.destination(), ); } - (IpAddr::Ip4(_), ip6) => { - panic!("expected inner IPv4 metadata, got IPv6: {:?}", ip6) + (IpAddr::Ip4(_), _) => { + panic!("expected inner IPv4 metadata, got IPv6") } - (IpAddr::Ip6(_), ip4) => { - panic!("expected inner IPv6 metadata, got IPv4: {:?}", ip4) + (IpAddr::Ip6(_), _) => { + panic!("expected inner IPv6 metadata, got IPv4") } } } fn unpack_and_verify_icmp4( - pkt: &Packet, + pkt: &Packet, expected_ident: u16, seq_no: u16, - encapped: bool, - body_seg: usize, ) { - let icmp_offset = pkt.body_offset() - IcmpHdr::SIZE; - let tgt_offset = IP4_SZ + if encapped { VPC_ENCAP_SZ } else { 0 }; - assert_eq!(icmp_offset, tgt_offset); - assert_eq!(pkt.body_seg(), body_seg); - // Because we treat ICMPv4 as a full-fledged ULP, we need to // unsplit the emitted header from the body. - let pkt_bytes = pkt.all_bytes(); - let icmp = Icmpv4Packet::new_checked(&pkt_bytes[icmp_offset..]).unwrap(); + let mut icmp = pkt.meta().inner_ulp().unwrap().emit_vec(); + pkt.meta().append_remaining(&mut icmp); + + let icmp = Icmpv4Packet::new_checked(&icmp[..]).unwrap(); assert!(icmp.verify_checksum()); assert_eq!(icmp.echo_ident(), expected_ident); @@ -1601,31 +1615,20 @@ fn unpack_and_verify_icmp4( } fn unpack_and_verify_icmp6( - pkt: &Packet, + pkt: &Packet, expected_ident: u16, seq_no: u16, - encapped: bool, - body_seg: usize, src_ip: Ipv6Addr, dst_ip: Ipv6Addr, ) { - // Length is factored into pseudo header calc. - // We know there are no ext headers. - let pay_len = pkt.meta().inner_ip6().unwrap().pay_len as usize; - let src_ip = smoltcp::wire::Ipv6Address::from(src_ip).into(); let dst_ip = smoltcp::wire::Ipv6Address::from(dst_ip).into(); - let icmp_offset = pkt.body_offset() - IcmpHdr::SIZE; - let tgt_offset = IP6_SZ + if encapped { VPC_ENCAP_SZ } else { 0 }; - assert_eq!(icmp_offset, tgt_offset); - assert_eq!(pkt.body_seg(), body_seg); - - // Because we treat ICMPv6 as a full-fledged ULP, we need to + // Because we treat ICMPv4 as a full-fledged ULP, we need to // unsplit the emitted header from the body. - let pkt_bytes = pkt.all_bytes(); - let icmp = Icmpv6Packet::new_checked(&pkt_bytes[icmp_offset..][..pay_len]) - .unwrap(); + let mut icmp = pkt.meta().inner_ulp().unwrap().emit_vec(); + pkt.meta().append_remaining(&mut icmp); + let icmp = Icmpv6Packet::new_checked(&icmp[..]).unwrap(); assert!(icmp.verify_checksum(&src_ip, &dst_ip)); assert_eq!(icmp.echo_ident(), expected_ident); @@ -1647,6 +1650,11 @@ fn snat_icmp6_echo_rewrite() { } fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { + let mut pcap = match &dst_ip { + IpAddr::Ip4(_) => PcapBuilder::new("snat-v4-echo-id.pcap"), + IpAddr::Ip6(_) => PcapBuilder::new("snat-v6-echo-id.pcap"), + }; + let g1_cfg = g1_cfg(); let mut g1 = oxide_net_setup("g1_port", &g1_cfg, None, None); g1.port.start(); @@ -1698,7 +1706,7 @@ fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { // ================================================================ // Verify echo request rewrite. // ================================================================ - let mut pkt1 = gen_icmp_echo_req( + let mut pkt1_m = gen_icmp_echo_req( g1_cfg.guest_mac, g1_cfg.gateway_mac, private_ip, @@ -1708,9 +1716,13 @@ fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { &data[..], 2, ); + pcap.add_pkt(&pkt1_m); - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); - assert!(matches!(res, Ok(Modified)), "bad result: {:?}", res); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + + let res = g1.port.process(Out, pkt1); + expect_modified!(res, pkt1_m); + pcap.add_pkt(&pkt1_m); incr!( g1, [ @@ -1721,12 +1733,12 @@ fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { ] ); - unpack_and_verify_icmp(&pkt1, &g1_cfg, ¶ms, Out, seq_no, 0); + unpack_and_verify_icmp(&mut pkt1_m, &g1_cfg, ¶ms, Out, seq_no); // ================================================================ // Verify echo reply rewrite. // ================================================================ - let mut pkt2 = gen_icmp_echo_reply( + let mut pkt2_m = gen_icmp_echo_reply( BS_MAC_ADDR, g1_cfg.guest_mac, dst_ip, @@ -1736,6 +1748,7 @@ fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { &data[..], 3, ); + let g1_phys = TestIpPhys { ip: g1_cfg.phys_ip, mac: g1_cfg.guest_mac, @@ -1746,13 +1759,17 @@ fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { mac: BS_MAC_ADDR, vni: Vni::new(BOUNDARY_SERVICES_VNI).unwrap(), }; - pkt2 = encap_external(pkt2, bsvc_phys, g1_phys); + pkt2_m = encap_external(pkt2_m, bsvc_phys, g1_phys); + pcap.add_pkt(&pkt2_m); - let res = g1.port.process(In, &mut pkt2, ActionMeta::new()); - assert!(matches!(res, Ok(Modified)), "bad result: {:?}", res); + let pkt2 = parse_inbound(&mut pkt2_m, VpcParser {}).unwrap(); + + let res = g1.port.process(In, pkt2); + expect_modified!(res, pkt2_m); + pcap.add_pkt(&pkt2_m); incr!(g1, ["uft.in", "stats.port.in_modified, stats.port.in_uft_miss"]); - unpack_and_verify_icmp(&pkt2, &g1_cfg, ¶ms, In, seq_no, 0); + unpack_and_verify_icmp(&mut pkt2_m, &g1_cfg, ¶ms, In, seq_no); // ================================================================ // Send ICMP Echo Req a second time. We want to verify that a) the @@ -1760,7 +1777,7 @@ fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { // transformation. // ================================================================ seq_no += 1; - let mut pkt3 = gen_icmp_echo_req( + let mut pkt3_m = gen_icmp_echo_req( g1_cfg.guest_mac, g1_cfg.gateway_mac, private_ip, @@ -1770,21 +1787,24 @@ fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { &data[..], 1, ); + pcap.add_pkt(&pkt3_m); + let pkt3 = parse_outbound(&mut pkt3_m, VpcParser {}).unwrap(); assert_eq!(g1.port.stats_snap().out_uft_hit, 0); - let res = g1.port.process(Out, &mut pkt3, ActionMeta::new()); - assert!(matches!(res, Ok(Modified)), "bad result: {:?}", res); + let res = g1.port.process(Out, pkt3); + expect_modified!(res, pkt3_m); + pcap.add_pkt(&pkt3_m); incr!(g1, ["stats.port.out_modified, stats.port.out_uft_hit"]); assert_eq!(g1.port.stats_snap().out_uft_hit, 1); - unpack_and_verify_icmp(&pkt3, &g1_cfg, ¶ms, Out, seq_no, 1); + unpack_and_verify_icmp(&mut pkt3_m, &g1_cfg, ¶ms, Out, seq_no); // ================================================================ // Process ICMP Echo Reply a second time. Once again, this time we // want to verify that the body transformation comes from the UFT // entry. // ================================================================ - let mut pkt4 = gen_icmp_echo_reply( + let mut pkt4_m = gen_icmp_echo_reply( BS_MAC_ADDR, g1_cfg.guest_mac, dst_ip, @@ -1794,14 +1814,18 @@ fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { &data[..], 2, ); + pkt4_m = encap_external(pkt4_m, bsvc_phys, g1_phys); + pcap.add_pkt(&pkt4_m); + let pkt4 = parse_inbound(&mut pkt4_m, VpcParser {}).unwrap(); assert_eq!(g1.port.stats_snap().in_uft_hit, 0); - let res = g1.port.process(In, &mut pkt4, ActionMeta::new()); - assert!(matches!(res, Ok(Modified)), "bad result: {:?}", res); + let res = g1.port.process(In, pkt4); + expect_modified!(res, pkt4_m); + pcap.add_pkt(&pkt4_m); incr!(g1, ["stats.port.in_modified, stats.port.in_uft_hit"]); assert_eq!(g1.port.stats_snap().in_uft_hit, 1); - unpack_and_verify_icmp(&pkt4, &g1_cfg, ¶ms, In, seq_no, 0); + unpack_and_verify_icmp(&mut pkt4_m, &g1_cfg, ¶ms, In, seq_no); // ================================================================ // Insert a new packet along the same S/D pair: this should occupy @@ -1810,7 +1834,7 @@ fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { let new_params = IcmpSnatParams { icmp_id: 8, snat_port: mapped_port - 1, ..params }; - let mut pkt5 = gen_icmp_echo_req( + let mut pkt5_m = gen_icmp_echo_req( g1_cfg.guest_mac, g1_cfg.gateway_mac, private_ip, @@ -1820,9 +1844,12 @@ fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { &data[..], 2, ); + pcap.add_pkt(&pkt5_m); + let pkt5 = parse_outbound(&mut pkt5_m, VpcParser {}).unwrap(); - let res = g1.port.process(Out, &mut pkt5, ActionMeta::new()); - assert!(matches!(res, Ok(Modified)), "bad result: {:?}", res); + let res = g1.port.process(Out, pkt5); + expect_modified!(res, pkt5_m); + pcap.add_pkt(&pkt5_m); incr!( g1, [ @@ -1833,44 +1860,45 @@ fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { ] ); - unpack_and_verify_icmp(&pkt5, &g1_cfg, &new_params, Out, seq_no, 0); + unpack_and_verify_icmp(&mut pkt5_m, &g1_cfg, &new_params, Out, seq_no); } #[test] fn bad_ip_len() { let cfg = lab_cfg(); - let eth = EtherMeta { - src: cfg.guest_mac, - dst: MacAddr::BROADCAST, - ether_type: EtherType::Ipv4, + let eth = Ethernet { + destination: MacAddr::BROADCAST, + source: cfg.guest_mac, + ethertype: Ethertype::IPV4, }; - let ip = Ipv4Meta { - src: "0.0.0.0".parse().unwrap(), - dst: Ipv4Addr::LOCAL_BCAST, - proto: Protocol::UDP, - ttl: 64, - ident: 1, - hdr_len: 20, - // We write a total legnth of 4 bytes, which is completely + let ip = Ipv4 { + source: "0.0.0.0".parse().unwrap(), + destination: Ipv4Addr::LOCAL_BCAST, + protocol: IngotIpProto::UDP, + hop_limit: 64, + identification: 1, + ihl: 5, + // We write a total length of 4 bytes, which is completely // bogus for an IP header and should return an error during // processing. total_len: 4, ..Default::default() }; - let udp = UdpMeta { src: 68, dst: 67, ..Default::default() }; - let total_len = EtherHdr::SIZE + usize::from(ip.hdr_len) + udp.hdr_len(); - let mut pkt = Packet::alloc_and_expand(total_len); - let mut wtr = pkt.seg0_wtr(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip.emit(wtr.slice_mut(ip.hdr_len()).unwrap()); - udp.emit(wtr.slice_mut(udp.hdr_len()).unwrap()); - let res = pkt.parse(Out, VpcParser::new()); + let udp = Udp { source: 68, destination: 67, ..Default::default() }; + + let mut pkt_m = MsgBlk::new_ethernet_pkt((eth, ip, udp)); + let res = + Packet::parse_outbound(pkt_m.iter_mut(), VpcParser {}).err().unwrap(); assert_eq!( - res.err().unwrap(), - Ipv4HdrError::BadTotalLen { total_len: 4 }.into() + res, + ParseError::BadLength(MismatchError { + location: c"Ipv4.total_len(min)", + expected: 20, + actual: 4 + }) ); } @@ -1885,52 +1913,53 @@ fn arp_gateway() { g1.port.start(); set!(g1, "port_state=running"); - let eth_hdr = EtherHdrRaw { - dst: [0xff; 6], - src: cfg.guest_mac.bytes(), - ether_type: [0x08, 0x06], + let eth_hdr = Ethernet { + destination: MacAddr::BROADCAST, + source: cfg.guest_mac, + ethertype: Ethertype::ARP, }; + // TODO: ingot? let arp = ArpEthIpv4 { - htype: 1, - ptype: u16::from(EtherType::Ipv4), + htype: ARP_HTYPE_ETHERNET, + ptype: Ethertype::IPV4, hlen: 6, plen: 4, - op: ArpOp::Request, + op: ArpOp::REQUEST, sha: cfg.guest_mac, spa: cfg.ipv4_cfg().unwrap().private_ip, tha: MacAddr::from([0x00; 6]), tpa: cfg.ipv4_cfg().unwrap().gateway_ip, }; - let mut bytes = vec![]; - bytes.extend_from_slice(eth_hdr.as_bytes()); - bytes.extend_from_slice(ArpEthIpv4Raw::from(&arp).as_bytes()); - let mut pkt = Packet::copy(&bytes).parse(Out, VpcParser::new()).unwrap(); + // let mut bytes = eth_hdr.emit_vec(); + // bytes.extend_from_slice(ArpEthIpv4Raw::from(&arp).as_bytes()); + + let mut pkt_m = MsgBlk::new_ethernet_pkt((eth_hdr, arp)); + let pkt = parse_outbound(&mut pkt_m, VpcParser {}).unwrap(); - let res = g1.port.process(Out, &mut pkt, ActionMeta::new()); + let res = g1.port.process(Out, pkt); match res { - Ok(Hairpin(hppkt)) => { + Ok(Hairpin(mut hppkt)) => { // In this case we are parsing a hairpin reply, so we // can't use the VpcParser since it would expect any // inbound packet to be encapsulated. - let mut hppkt = hppkt.parse(In, GenericUlp {}).unwrap(); + let hppkt = parse_inbound(&mut hppkt, GenericUlp {}).unwrap(); let meta = hppkt.meta(); - let ethm = meta.inner.ether; - assert_eq!(ethm.dst, cfg.guest_mac); - assert_eq!(ethm.src, cfg.gateway_mac); - assert_eq!(ethm.ether_type, EtherType::Arp); - let eth_len = hppkt.hdr_offsets().inner.ether.hdr_len; - - let mut rdr = hppkt.get_rdr_mut(); - assert!(rdr.seek(eth_len).is_ok()); - let arp = ArpEthIpv4::parse(&mut rdr).unwrap(); - assert_eq!(arp.op, ArpOp::Reply); - assert_eq!(arp.ptype, u16::from(EtherType::Ipv4)); - assert_eq!(arp.sha, cfg.gateway_mac); - assert_eq!(arp.spa, cfg.ipv4_cfg().unwrap().gateway_ip); - assert_eq!(arp.tha, cfg.guest_mac); - assert_eq!(arp.tpa, cfg.ipv4_cfg().unwrap().private_ip); + let ethm = &meta.inner_eth; + assert_eq!(ethm.destination(), cfg.guest_mac); + assert_eq!(ethm.source(), cfg.gateway_mac); + assert_eq!(ethm.ethertype(), Ethertype::ARP); + + let body = hppkt.to_full_meta().meta().copy_remaining(); + + let (arp, ..) = ValidArpEthIpv4::parse(&body[..]).unwrap(); + assert_eq!(arp.op(), ArpOp::REPLY); + assert_eq!(arp.ptype(), Ethertype::IPV4); + assert_eq!(arp.sha(), cfg.gateway_mac); + assert_eq!(arp.spa(), cfg.ipv4_cfg().unwrap().gateway_ip); + assert_eq!(arp.tha(), cfg.guest_mac); + assert_eq!(arp.tpa(), cfg.ipv4_cfg().unwrap().private_ip); } res => panic!("expected a Hairpin, got {:?}", res), @@ -1952,9 +1981,10 @@ fn flow_expiration() { // Run the packet through g1's port in the outbound direction and // verify the resulting packet meets expectations. // ================================================================ - let mut pkt1 = http_syn(&g1_cfg, &g2_cfg); - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let mut pkt1_m = http_syn(&g1_cfg, &g2_cfg); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt1); + expect_modified!(res, pkt1_m); incr!( g1, [ @@ -2015,7 +2045,7 @@ fn test_guest_to_gateway_icmpv6_ping( // ================================================================ // Generate an ICMP Echo Request from G1 to Virtual GW // ================================================================ - let mut pkt1 = gen_icmp_echo_req( + let mut pkt1_m = gen_icmp_echo_req( g1_cfg.guest_mac, g1_cfg.gateway_mac, src_ip.into(), @@ -2025,15 +2055,16 @@ fn test_guest_to_gateway_icmpv6_ping( &data[..], 3, ); - pcap.add_pkt(&pkt1); + pcap.add_pkt(&pkt1_m); // ================================================================ // Run the Echo Request through g1's port in the outbound // direction and verify it results in an Echo Reply Hairpin packet // back to guest. // ================================================================ - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); - let hp = match res { + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt1); + let mut hp = match res { Ok(Hairpin(hp)) => hp, _ => panic!("expected Hairpin, got {:?}", res), }; @@ -2042,50 +2073,43 @@ fn test_guest_to_gateway_icmpv6_ping( // In this case we are parsing a hairpin reply, so we can't use // the VpcParser since it would expect any inbound packet to be // encapsulated. - let reply = hp.parse(In, GenericUlp {}).unwrap(); - pcap.add_pkt(&reply); - - // Ether + IPv6 + ICMPv6 - assert_eq!(reply.body_offset(), IP6_SZ + IcmpHdr::SIZE); - assert_eq!(reply.body_seg(), 0); + pcap.add_pkt(&hp); + let reply = parse_inbound(&mut hp, GenericUlp {}).unwrap(); let meta = reply.meta(); - assert!(meta.outer.ether.is_none()); - assert!(meta.outer.ip.is_none()); - assert!(meta.outer.encap.is_none()); - - let eth = meta.inner.ether; - assert_eq!(eth.src, g1_cfg.gateway_mac); - assert_eq!(eth.dst, g1_cfg.guest_mac); - - let (src, dst) = match meta.inner.ip.as_ref().unwrap() { - IpMeta::Ip6(ip6) => { - assert_eq!(ip6.src, dst_ip); - assert_eq!(ip6.dst, src_ip); - assert_eq!(ip6.proto, Protocol::ICMPv6); + + let eth = &meta.inner_eth; + assert_eq!(eth.source(), g1_cfg.gateway_mac); + assert_eq!(eth.destination(), g1_cfg.guest_mac); + + let (src, dst) = match meta.inner_l3.as_ref().unwrap() { + ValidL3::Ipv6(ip6) => { + assert_eq!(ip6.source(), dst_ip); + assert_eq!(ip6.destination(), src_ip); + assert_eq!(ip6.next_header(), IngotIpProto::ICMP_V6); ( - Ipv6Address::from_bytes(&ip6.src), - Ipv6Address::from_bytes(&ip6.dst), + Ipv6Address::from_bytes(&ip6.source()), + Ipv6Address::from_bytes(&ip6.destination()), ) } - ip4 => panic!("expected inner IPv6 metadata, got IPv4: {:?}", ip4), + _ => panic!("expected inner IPv6 metadata, got IPv4"), }; - let Some(icmp6) = meta.inner_icmp6() else { + let Some(ValidUlp::IcmpV6(icmp6)) = &meta.inner_ulp else { panic!("expected inner ICMPv6 metadata"); }; // `Icmpv6Packet` requires the ICMPv6 header and not just the message payload. - // Given we successfully got the ICMPv6 metadata, rewinding here is fine. - let mut rdr = reply.get_body_rdr(); - rdr.seek_back(icmp6.hdr_len()).unwrap(); + let mut reply_body = icmp6.emit_vec(); + let msg_type = Icmpv6Message::from(icmp6.ty()); + let msg_code = icmp6.code(); - let reply_body = rdr.copy_remaining(); + reply_body.extend(reply.to_full_meta().meta().copy_remaining().into_iter()); let reply_pkt = Icmpv6Packet::new_checked(&reply_body).unwrap(); // Verify the parsed metadata matches the packet - assert_eq!(icmp6.msg_code, reply_pkt.msg_code()); - assert_eq!(icmp6.msg_type, reply_pkt.msg_type().into()); + assert_eq!(msg_code, reply_pkt.msg_code()); + assert_eq!(msg_type, reply_pkt.msg_type()); let mut csum = CsumCapab::ignored(); csum.icmpv6 = smoltcp::phy::Checksum::Rx; @@ -2122,16 +2146,17 @@ fn gateway_router_advert_reply() { // ==================================================== // Generate a Router Solicitation from G1 to Virtual GW // ==================================================== - let mut pkt1 = gen_router_solicitation(&g1_cfg.guest_mac); - pcap.add_pkt(&pkt1); + let mut pkt1_m = gen_router_solicitation(&g1_cfg.guest_mac); + pcap.add_pkt(&pkt1_m); // ================================================================ // Run the Solicitation through g1's port in the outbound // direction and verify it results in an Router Advertisement // hairpin back to guest. // ================================================================ - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); - let hp = match res { + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt1); + let mut hp = match res { Ok(Hairpin(hp)) => hp, _ => panic!("expected Hairpin, got {:?}", res), }; @@ -2140,35 +2165,31 @@ fn gateway_router_advert_reply() { // In this case we are parsing a hairpin reply, so we can't use // the VpcParser since it would expect any inbound packet to be // encapsulated. - let reply = hp.parse(In, GenericUlp {}).unwrap(); - pcap.add_pkt(&reply); - - // Ether + IPv6 + ICMPv6 - assert_eq!(reply.body_offset(), IP6_SZ + IcmpHdr::SIZE); - assert_eq!(reply.body_seg(), 0); + pcap.add_pkt(&hp); + let reply = parse_inbound(&mut hp, GenericUlp {}).unwrap(); let meta = reply.meta(); - assert!(meta.outer.ether.is_none()); - assert!(meta.outer.ip.is_none()); - assert!(meta.outer.encap.is_none()); - let eth = meta.inner.ether; + let eth = &meta.inner_eth; assert_eq!( - eth.src, g1_cfg.gateway_mac, + eth.source(), + g1_cfg.gateway_mac, "Router advertisement should come from the gateway's MAC" ); assert_eq!( - eth.dst, g1_cfg.guest_mac, + eth.destination(), + g1_cfg.guest_mac, "Router advertisement should be destined for the guest's MAC" ); - let IpMeta::Ip6(ip6) = meta.inner.ip.as_ref().expect("No inner IP header") + let ValidL3::Ipv6(ip6) = + meta.inner_l3.as_ref().expect("No inner IP header") else { panic!("Inner IP header is not IPv6"); }; assert_eq!( - ip6.src, + ip6.source(), Ipv6Addr::from_eui64(&g1_cfg.gateway_mac), "Router advertisement should come from the \ gateway's link-local IPv6 address, generated \ @@ -2176,32 +2197,35 @@ fn gateway_router_advert_reply() { ); let expected_dst = Ipv6Addr::from_eui64(&g1_cfg.guest_mac); assert_eq!( - ip6.dst, expected_dst, + ip6.destination(), + expected_dst, "Router advertisement should be destined for \ the guest's Link-Local IPv6 address, generated from \ the EUI-64 transform of its MAC" ); - assert_eq!(ip6.proto, Protocol::ICMPv6); + assert_eq!(ip6.next_header(), IngotIpProto::ICMP_V6); // RFC 4861 6.1.2 requires that the hop limit be 255 in an RA. - assert_eq!(ip6.hop_limit, 255); + assert_eq!(ip6.hop_limit(), 255); - let Some(icmp6) = meta.inner_icmp6() else { + let Some(ValidUlp::IcmpV6(icmp6)) = &meta.inner_ulp else { panic!("expected inner ICMPv6 metadata"); }; // `Icmpv6Packet` requires the ICMPv6 header and not just the message payload. // Given we successfully got the ICMPv6 metadata, rewinding here is fine. - let mut rdr = reply.get_body_rdr(); - rdr.seek_back(icmp6.hdr_len()).unwrap(); + let mut reply_body = icmp6.emit_vec(); + let ip6_src = ip6.source(); + let ip6_dst = ip6.destination(); - let reply_body = rdr.copy_remaining(); + reply_body.extend(reply.to_full_meta().meta().copy_remaining().into_iter()); let reply_pkt = Icmpv6Packet::new_checked(&reply_body).unwrap(); + let mut csum = CsumCapab::ignored(); csum.icmpv6 = smoltcp::phy::Checksum::Rx; let reply_icmp = Icmpv6Repr::parse( - &IpAddress::Ipv6(ip6.src.into()), - &IpAddress::Ipv6(ip6.dst.into()), + &IpAddress::Ipv6(ip6_src.into()), + &IpAddress::Ipv6(ip6_dst.into()), &reply_pkt, &csum, ) @@ -2395,60 +2419,54 @@ fn generate_solicit_test_data(cfg: &VpcCfg) -> Vec { // `na`. fn validate_hairpin_advert( pcap: &mut PcapBuilder, - hp: Packet, + mut hp: MsgBlk, na: AdvertInfo, ) { // In this case we are parsing a hairpin reply, so we can't use // the VpcParser since it would expect any inbound packet to be // encapsulated. - let reply = hp.parse(In, GenericUlp {}).unwrap(); - pcap.add_pkt(&reply); + pcap.add_pkt(&hp); + let reply = parse_inbound(&mut hp, GenericUlp {}).unwrap(); - // Verify Ethernet and IPv6 header basics. - assert_eq!(reply.body_offset(), IP6_SZ + IcmpHdr::SIZE); - assert_eq!(reply.body_seg(), 0); let meta = reply.meta(); - assert!(meta.outer.ether.is_none()); - assert!(meta.outer.ip.is_none()); - assert!(meta.outer.encap.is_none()); // Check that the inner MACs are what we expect. - let eth = meta.inner.ether; - assert_eq!(eth.src, na.src_mac); - assert_eq!(eth.dst, na.dst_mac); + let eth = &meta.inner_eth; + assert_eq!(eth.source(), na.src_mac); + assert_eq!(eth.destination(), na.dst_mac); // Check that the inner IPs are what we expect. - let ip6 = if let IpMeta::Ip6(ip6) = - meta.inner.ip.as_ref().expect("No inner IP header") - { - ip6 - } else { + let ValidL3::Ipv6(ip6) = + meta.inner_l3.as_ref().expect("No inner IP header") + else { panic!("Inner IP header is not IPv6"); }; - assert_eq!(ip6.src, na.src_ip); - assert_eq!(ip6.dst, na.dst_ip); - assert_eq!(ip6.proto, Protocol::ICMPv6); + assert_eq!(ip6.source(), na.src_ip); + assert_eq!(ip6.destination(), na.dst_ip); + assert_eq!(ip6.next_header(), IngotIpProto::ICMP_V6); // RFC 4861 7.1.2 requires that the hop limit be 255 in an NA. - assert_eq!(ip6.hop_limit, 255); + assert_eq!(ip6.hop_limit(), 255); - let Some(icmp6) = meta.inner_icmp6() else { + let Some(ValidUlp::IcmpV6(icmp6)) = &meta.inner_ulp else { panic!("expected inner ICMPv6 metadata"); }; // `Icmpv6Packet` requires the ICMPv6 header and not just the message payload. // Given we successfully got the ICMPv6 metadata, rewinding here is fine. - let mut rdr = reply.get_body_rdr(); - rdr.seek_back(icmp6.hdr_len()).unwrap(); + let mut reply_body = icmp6.emit_vec(); + let ip6_src = ip6.source(); + let ip6_dst = ip6.destination(); - // Validate the details of the Neighbor Advertisement itself. - let reply_body = rdr.copy_remaining(); + reply_body.extend(reply.to_full_meta().meta().copy_remaining().into_iter()); let reply_pkt = Icmpv6Packet::new_checked(&reply_body).unwrap(); + + // Validate the details of the Neighbor Advertisement itself. let mut csum = CsumCapab::ignored(); csum.icmpv6 = smoltcp::phy::Checksum::Rx; let reply_icmp = Icmpv6Repr::parse( - &IpAddress::Ipv6(ip6.src.into()), - &IpAddress::Ipv6(ip6.dst.into()), + &IpAddress::Ipv6(ip6_src.into()), + &IpAddress::Ipv6(ip6_dst.into()), &reply_pkt, &csum, ) @@ -2490,11 +2508,14 @@ fn test_gateway_neighbor_advert_reply() { // Alternate between using smoltcp or our `compute_checksums` method // to compute the checksums. if !with_checksum { - pkt.compute_checksums(); + let mut parsed = + parse_outbound(&mut pkt, VpcParser {}).unwrap().to_full_meta(); + parsed.compute_checksums(); } with_checksum = !with_checksum; pcap.add_pkt(&pkt); - let res = g1.port.process(Out, &mut pkt, ActionMeta::new()); + let pkt1 = parse_outbound(&mut pkt, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt1); match (res, d.na) { (Ok(ProcessResult::Drop { .. }), None) => { // Dropped the packet, as we expected @@ -2570,9 +2591,10 @@ fn outbound_ndp_dropped() { flags: NdiscNeighborFlags::OVERRIDE, }; - let mut pkt = generate_neighbor_advertisement(&outbound_na, true); + let mut pkt_m = generate_neighbor_advertisement(&outbound_na, true); + let pkt = parse_outbound(&mut pkt_m, VpcParser {}).unwrap(); - let res = g1.port.process(Out, &mut pkt, ActionMeta::new()).unwrap(); + let res = g1.port.process(Out, pkt).unwrap(); match res { ProcessResult::Drop { .. } => { incr!( @@ -2629,8 +2651,9 @@ fn inbound_ndp_dropped_at_gateway() { }; let pkt = generate_neighbor_solicitation(&ns, true); - let mut pkt = encap(pkt, g2_phys, g1_phys); - let res = g1.port.process(In, &mut pkt, ActionMeta::new()).unwrap(); + let mut pkt_m = encap(pkt, g2_phys, g1_phys); + let pkt = parse_inbound(&mut pkt_m, VpcParser {}).unwrap(); + let res = g1.port.process(In, pkt).unwrap(); println!("{res:?}"); match res { ProcessResult::Drop { .. } => { @@ -2657,26 +2680,27 @@ fn inbound_ndp_dropped_at_gateway() { fn packet_from_client_dhcpv6_message( cfg: &VpcCfg, msg: &dhcpv6::protocol::Message<'_>, -) -> Packet { - let eth = EtherMeta { - dst: dhcpv6::ALL_RELAYS_AND_SERVERS.multicast_mac().unwrap(), - src: cfg.guest_mac, - ether_type: EtherType::Ipv6, +) -> MsgBlk { + let eth = Ethernet { + destination: dhcpv6::ALL_RELAYS_AND_SERVERS.multicast_mac().unwrap(), + source: cfg.guest_mac, + ethertype: Ethertype::IPV6, }; - let ip = Ipv6Meta { - src: Ipv6Addr::from_eui64(&cfg.guest_mac), - dst: dhcpv6::ALL_RELAYS_AND_SERVERS, - proto: Protocol::UDP, - next_hdr: IpProtocol::Udp, - pay_len: (msg.buffer_len() + UdpHdr::SIZE) as u16, + let payload_len = (msg.buffer_len() + Udp::MINIMUM_LENGTH) as u16; + + let ip = Ipv6 { + source: Ipv6Addr::from_eui64(&cfg.guest_mac), + destination: dhcpv6::ALL_RELAYS_AND_SERVERS, + next_header: IngotIpProto::UDP, + payload_len, ..Default::default() }; - let udp = UdpMeta { - src: dhcpv6::CLIENT_PORT, - dst: dhcpv6::SERVER_PORT, - len: (UdpHdr::SIZE + msg.buffer_len()) as u16, + let udp = Udp { + source: dhcpv6::CLIENT_PORT, + destination: dhcpv6::SERVER_PORT, + length: payload_len, ..Default::default() }; @@ -2684,22 +2708,20 @@ fn packet_from_client_dhcpv6_message( } fn write_dhcpv6_packet( - eth: EtherMeta, - ip: Ipv6Meta, - udp: UdpMeta, + eth: Ethernet, + ip: Ipv6, + udp: Udp, msg: &dhcpv6::protocol::Message<'_>, -) -> Packet { - let reply_len = - msg.buffer_len() + UdpHdr::SIZE + Ipv6Hdr::BASE_SIZE + EtherHdr::SIZE; - let mut pkt = Packet::alloc_and_expand(reply_len); - let mut wtr = pkt.seg0_wtr(); - eth.emit(wtr.slice_mut(EtherHdr::SIZE).unwrap()); - ip.emit(wtr.slice_mut(ip.hdr_len()).unwrap()); - udp.emit(wtr.slice_mut(udp.hdr_len()).unwrap()); - let mut msg_buf = vec![0; msg.buffer_len()]; - msg.copy_into(&mut msg_buf).unwrap(); - wtr.write(&msg_buf).unwrap(); - pkt.parse(Out, GenericUlp {}).unwrap() +) -> MsgBlk { + let total_len = msg.buffer_len() + (ð, &ip, &udp).packet_length(); + + let mut pkt = MsgBlk::new_ethernet(total_len); + pkt.emit_back((eth, ip, udp)).unwrap(); + let l = pkt.len(); + pkt.resize(total_len).unwrap(); + msg.copy_into(&mut pkt[l..]); + + pkt } // Assert the essential details of a DHCPv6 exchange. The client request is in @@ -2713,36 +2735,40 @@ fn write_dhcpv6_packet( // - The server must include its own Server ID option. fn verify_dhcpv6_essentials<'a>( cfg: &VpcCfg, - request_pkt: &Packet, + request_pkt: &mut MsgBlk, request: &dhcpv6::protocol::Message<'a>, - reply_pkt: &Packet, + reply_pkt: &mut MsgBlk, reply: &dhcpv6::protocol::Message<'a>, ) { + let request_pkt = + parse_outbound(request_pkt, GenericUlp {}).unwrap().to_full_meta(); + let reply_pkt = + parse_inbound(reply_pkt, GenericUlp {}).unwrap().to_full_meta(); let request_meta = request_pkt.meta(); let reply_meta = reply_pkt.meta(); let request_ether = request_meta.inner_ether(); let reply_ether = reply_meta.inner_ether(); assert_eq!( - request_ether.dst, + request_ether.destination(), dhcpv6::ALL_RELAYS_AND_SERVERS.multicast_mac().unwrap() ); - assert_eq!(request_ether.src, reply_ether.dst); + assert_eq!(request_ether.source(), reply_ether.destination()); let request_ip = request_meta.inner_ip6().unwrap(); let reply_ip = reply_meta.inner_ip6().unwrap(); - assert_eq!(request_ip.src, Ipv6Addr::from_eui64(&cfg.guest_mac)); - assert_eq!(request_ip.dst, dhcpv6::ALL_RELAYS_AND_SERVERS); - assert_eq!(request_ip.proto, Protocol::UDP); - assert_eq!(reply_ip.dst, request_ip.src); - assert_eq!(reply_ip.src, Ipv6Addr::from_eui64(&cfg.gateway_mac)); - assert_eq!(reply_ip.proto, Protocol::UDP); + assert_eq!(request_ip.source(), Ipv6Addr::from_eui64(&cfg.guest_mac)); + assert_eq!(request_ip.destination(), dhcpv6::ALL_RELAYS_AND_SERVERS); + assert_eq!(request_ip.next_header(), IngotIpProto::UDP); + assert_eq!(reply_ip.destination(), request_ip.source()); + assert_eq!(reply_ip.source(), Ipv6Addr::from_eui64(&cfg.gateway_mac)); + assert_eq!(reply_ip.next_header(), IngotIpProto::UDP); let request_udp = request_meta.inner_udp().unwrap(); let reply_udp = reply_meta.inner_udp().unwrap(); - assert_eq!(request_udp.src, dhcpv6::CLIENT_PORT); - assert_eq!(request_udp.dst, dhcpv6::SERVER_PORT); - assert_eq!(reply_udp.dst, dhcpv6::CLIENT_PORT); - assert_eq!(reply_udp.src, dhcpv6::SERVER_PORT); + assert_eq!(request_udp.source(), dhcpv6::CLIENT_PORT); + assert_eq!(request_udp.destination(), dhcpv6::SERVER_PORT); + assert_eq!(reply_udp.destination(), dhcpv6::CLIENT_PORT); + assert_eq!(reply_udp.source(), dhcpv6::SERVER_PORT); // Verify the details of the DHCPv6 exchange itself. assert_eq!(reply.xid, request.xid); @@ -2831,28 +2857,32 @@ fn test_reply_to_dhcpv6_solicit_or_request() { xid: dhcpv6::TransactionId::from(&[0u8, 1, 2]), options, }; - let mut request_pkt = + let mut request_pkt_m = packet_from_client_dhcpv6_message(&g1_cfg, &request); - pcap.add_pkt(&request_pkt); - let res = g1 - .port - .process(Out, &mut request_pkt, ActionMeta::new()) - .unwrap(); - if let Hairpin(hp) = res { + pcap.add_pkt(&request_pkt_m); + let request_pkt = + parse_outbound(&mut request_pkt_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, request_pkt).unwrap(); + + if let Hairpin(mut hp) = res { // In this case we are parsing a hairpin reply, so we // can't use the VpcParser since it would expect any // inbound packet to be encapsulated. - let reply_pkt = hp.parse(In, GenericUlp {}).unwrap(); - pcap.add_pkt(&reply_pkt); + pcap.add_pkt(&hp); + + let reply_pkt = parse_inbound(&mut hp, GenericUlp {}) + .unwrap() + .to_full_meta(); + let out_body = reply_pkt.meta().copy_remaining(); + drop(reply_pkt); - let body = reply_pkt.get_body_rdr().copy_remaining(); let reply = - dhcpv6::protocol::Message::from_bytes(&body).unwrap(); + dhcpv6::protocol::Message::from_bytes(&out_body).unwrap(); verify_dhcpv6_essentials( &g1_cfg, - &request_pkt, + &mut request_pkt_m, &request, - &reply_pkt, + &mut hp, &reply, ); @@ -2962,14 +2992,15 @@ fn establish_http_conn( // Run the SYN packet through g1's port in the outbound direction // and verify it is accepted. // ================================================================ - let mut pkt1 = http_syn2( + let mut pkt1_m = http_syn2( g1_cfg.guest_mac, g1_cfg.ipv4().private_ip, GW_MAC_ADDR, dst_ip, ); - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt1); + expect_modified!(res, pkt1_m); incr!( g1, [ @@ -2979,7 +3010,9 @@ fn establish_http_conn( "stats.port.out_modified, stats.port.out_uft_miss", ] ); - let snat_port = pkt1.meta().inner.ulp.unwrap().src_port().unwrap(); + let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); + let snat_port = + pkt1.to_full_meta().meta().inner_ulp().unwrap().src_port().unwrap(); // ================================================================ // Step 2 @@ -2987,7 +3020,7 @@ fn establish_http_conn( // Run the SYN+ACK packet through g1's port in the inbound // direction and verify it is accepted. // ================================================================ - let mut pkt2 = http_syn_ack2( + let mut pkt2_m = http_syn_ack2( BS_MAC_ADDR, dst_ip, g1_cfg.guest_mac, @@ -3004,9 +3037,10 @@ fn establish_http_conn( mac: BS_MAC_ADDR, vni: Vni::new(BOUNDARY_SERVICES_VNI).unwrap(), }; - pkt2 = encap_external(pkt2, bs_phys, g1_phys); - let res = g1.port.process(In, &mut pkt2, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + pkt2_m = encap_external(pkt2_m, bs_phys, g1_phys); + let pkt2 = parse_inbound(&mut pkt2_m, VpcParser {}).unwrap(); + let res = g1.port.process(In, pkt2); + expect_modified!(res, pkt2_m); incr!(g1, ["uft.in", "stats.port.in_modified, stats.port.in_uft_miss"]); // ================================================================ @@ -3014,14 +3048,15 @@ fn establish_http_conn( // // Send ACK to establish connection. // ================================================================ - let mut pkt3 = http_ack2( + let mut pkt3_m = http_ack2( g1_cfg.guest_mac, g1_cfg.ipv4().private_ip, GW_MAC_ADDR, dst_ip, ); - let res = g1.port.process(Out, &mut pkt3, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let pkt3 = parse_outbound(&mut pkt3_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt3); + expect_modified!(res, pkt3_m); incr!(g1, ["stats.port.out_modified, stats.port.out_uft_hit"]); snat_port } @@ -3090,13 +3125,14 @@ fn uft_lft_invalidation_out() { // ================================================================ // Step 4 // ================================================================ - let mut pkt4 = http_get2( + let mut pkt4_m = http_get2( g1_cfg.guest_mac, g1_cfg.ipv4().private_ip, GW_MAC_ADDR, dst_ip, ); - let res = g1.port.process(Out, &mut pkt4, ActionMeta::new()); + let pkt4 = parse_outbound(&mut pkt4_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt4); assert_drop!( res, DropReason::Layer { name: "firewall", reason: DenyReason::Rule } @@ -3158,17 +3194,18 @@ fn uft_lft_invalidation_in() { }; let snat_port = establish_http_conn(&g1_cfg, &mut g1, dst_ip); - let mut pkt1 = http_get2( + let mut pkt1_m = http_get2( g1_cfg.guest_mac, g1_cfg.ipv4().private_ip, GW_MAC_ADDR, dst_ip, ); - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt1); + expect_modified!(res, pkt1_m); incr!(g1, ["stats.port.out_modified, stats.port.out_uft_hit"]); - let mut pkt2 = http_get_ack2( + let mut pkt2_m = http_get_ack2( BS_MAC_ADDR, dst_ip, g1_cfg.guest_mac, @@ -3180,10 +3217,11 @@ fn uft_lft_invalidation_in() { mac: BS_MAC_ADDR, vni: Vni::new(BOUNDARY_SERVICES_VNI).unwrap(), }; - pkt2 = encap_external(pkt2, bs_phys, g1_phys); - let res = g1.port.process(In, &mut pkt2, ActionMeta::new()); + pkt2_m = encap_external(pkt2_m, bs_phys, g1_phys); + let pkt2 = parse_inbound(&mut pkt2_m, VpcParser {}).unwrap(); + let res = g1.port.process(In, pkt2); incr!(g1, ["stats.port.in_modified, stats.port.in_uft_hit"]); - assert!(matches!(res, Ok(Modified))); + expect_modified!(res, pkt2_m); // ================================================================ // Step 3 @@ -3209,7 +3247,7 @@ fn uft_lft_invalidation_in() { // ================================================================ // Step 4 // ================================================================ - let mut pkt3 = http_301_reply2( + let mut pkt3_m = http_301_reply2( BS_MAC_ADDR, dst_ip, g1_cfg.guest_mac, @@ -3221,8 +3259,9 @@ fn uft_lft_invalidation_in() { mac: BS_MAC_ADDR, vni: Vni::new(BOUNDARY_SERVICES_VNI).unwrap(), }; - pkt3 = encap_external(pkt3, bs_phys, g1_phys); - let res = g1.port.process(In, &mut pkt3, ActionMeta::new()); + pkt3_m = encap_external(pkt3_m, bs_phys, g1_phys); + let pkt3 = parse_inbound(&mut pkt3_m, VpcParser {}).unwrap(); + let res = g1.port.process(In, pkt3); assert_drop!( res, DropReason::Layer { name: "firewall", reason: DenyReason::Default } @@ -3244,20 +3283,26 @@ fn test_outbound_http(g1_cfg: &VpcCfg, g1: &mut PortAndVps) -> InnerFlowId { mac: g1_cfg.guest_mac, vni: g1_cfg.vni, }; + let bs_phys = TestIpPhys { + ip: BS_IP_ADDR, + mac: BS_MAC_ADDR, + vni: Vni::new(BOUNDARY_SERVICES_VNI).unwrap(), + }; // ================================================================ // SYN: Client -> Server // ================================================================ let dst_ip = "52.10.128.69".parse().unwrap(); - let mut pkt1 = http_syn2( + let mut pkt1_m = http_syn2( g1_cfg.guest_mac, g1_cfg.ipv4().private_ip, GW_MAC_ADDR, dst_ip, ); - let flow = *pkt1.flow(); - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let flow = pkt1.flow(); + let res = g1.port.process(Out, pkt1); + expect_modified!(res, pkt1_m); incr!( g1, [ @@ -3267,181 +3312,168 @@ fn test_outbound_http(g1_cfg: &VpcCfg, g1: &mut PortAndVps) -> InnerFlowId { "stats.port.out_modified, stats.port.out_uft_miss", ] ); - let snat_port = pkt1.meta().inner.ulp.unwrap().src_port().unwrap(); + let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); + let snat_port = + pkt1.to_full_meta().meta().inner_ulp().unwrap().src_port().unwrap(); assert_eq!(TcpState::SynSent, g1.port.tcp_state(&flow).unwrap()); // ================================================================ // SYN+ACK: Server -> Client // ================================================================ - let mut pkt2 = http_syn_ack2( + let mut pkt2_m = http_syn_ack2( BS_MAC_ADDR, dst_ip, g1_cfg.guest_mac, g1_cfg.snat().external_ip, snat_port, ); - let bs_phys = TestIpPhys { - ip: BS_IP_ADDR, - mac: BS_MAC_ADDR, - vni: Vni::new(BOUNDARY_SERVICES_VNI).unwrap(), - }; - pkt2 = encap_external(pkt2, bs_phys, g1_phys); - let res = g1.port.process(In, &mut pkt2, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + pkt2_m = encap_external(pkt2_m, bs_phys, g1_phys); + let pkt2 = parse_inbound(&mut pkt2_m, VpcParser {}).unwrap(); + let res = g1.port.process(In, pkt2); + expect_modified!(res, pkt2_m); incr!(g1, ["uft.in", "stats.port.in_modified, stats.port.in_uft_miss"]); assert_eq!(TcpState::Established, g1.port.tcp_state(&flow).unwrap()); // ================================================================ // ACK: Client -> Server // ================================================================ - let mut pkt3 = http_ack2( + let mut pkt3_m = http_ack2( g1_cfg.guest_mac, g1_cfg.ipv4().private_ip, GW_MAC_ADDR, dst_ip, ); - let res = g1.port.process(Out, &mut pkt3, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let pkt3 = parse_outbound(&mut pkt3_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt3); + expect_modified!(res, pkt3_m); incr!(g1, ["stats.port.out_modified, stats.port.out_uft_hit"]); assert_eq!(TcpState::Established, g1.port.tcp_state(&flow).unwrap()); // ================================================================ // HTTP GET: Client -> Server // ================================================================ - let mut pkt4 = http_get2( + let mut pkt4_m = http_get2( g1_cfg.guest_mac, g1_cfg.ipv4().private_ip, GW_MAC_ADDR, dst_ip, ); - let res = g1.port.process(Out, &mut pkt4, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let pkt4 = parse_outbound(&mut pkt4_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt4); + expect_modified!(res, pkt4_m); incr!(g1, ["stats.port.out_modified, stats.port.out_uft_hit"]); assert_eq!(TcpState::Established, g1.port.tcp_state(&flow).unwrap()); // ================================================================ // ACK HTTP GET: Server -> Client // ================================================================ - let mut pkt5 = http_get_ack2( + let mut pkt5_m = http_get_ack2( BS_MAC_ADDR, dst_ip, g1_cfg.guest_mac, g1_cfg.snat().external_ip, snat_port, ); - let bs_phys = TestIpPhys { - ip: BS_IP_ADDR, - mac: BS_MAC_ADDR, - vni: Vni::new(BOUNDARY_SERVICES_VNI).unwrap(), - }; - pkt5 = encap_external(pkt5, bs_phys, g1_phys); - let res = g1.port.process(In, &mut pkt5, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + pkt5_m = encap_external(pkt5_m, bs_phys, g1_phys); + let pkt5 = parse_inbound(&mut pkt5_m, VpcParser {}).unwrap(); + let res = g1.port.process(In, pkt5); + expect_modified!(res, pkt5_m); incr!(g1, ["stats.port.in_modified, stats.port.in_uft_hit"]); assert_eq!(TcpState::Established, g1.port.tcp_state(&flow).unwrap()); // ================================================================ // HTTP 301 Reply: Server -> Client // ================================================================ - let mut pkt6 = http_301_reply2( + let mut pkt6_m = http_301_reply2( BS_MAC_ADDR, dst_ip, g1_cfg.guest_mac, g1_cfg.snat().external_ip, snat_port, ); - let bs_phys = TestIpPhys { - ip: BS_IP_ADDR, - mac: BS_MAC_ADDR, - vni: Vni::new(BOUNDARY_SERVICES_VNI).unwrap(), - }; - pkt6 = encap_external(pkt6, bs_phys, g1_phys); - let res = g1.port.process(In, &mut pkt6, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + pkt6_m = encap_external(pkt6_m, bs_phys, g1_phys); + let pkt6 = parse_inbound(&mut pkt6_m, VpcParser {}).unwrap(); + let res = g1.port.process(In, pkt6); + expect_modified!(res, pkt6_m); incr!(g1, ["stats.port.in_modified, stats.port.in_uft_hit"]); assert_eq!(TcpState::Established, g1.port.tcp_state(&flow).unwrap()); // ================================================================ // ACK HTTP 301: Client -> Server // ================================================================ - let mut pkt7 = http_301_ack2( + let mut pkt7_m = http_301_ack2( g1_cfg.guest_mac, g1_cfg.ipv4().private_ip, GW_MAC_ADDR, dst_ip, ); - let res = g1.port.process(Out, &mut pkt7, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let pkt7 = parse_outbound(&mut pkt7_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt7); + expect_modified!(res, pkt7_m); incr!(g1, ["stats.port.out_modified, stats.port.out_uft_hit"]); assert_eq!(TcpState::Established, g1.port.tcp_state(&flow).unwrap()); // ================================================================ // FIN: Client -> Server // ================================================================ - let mut pkt8 = http_guest_fin2( + let mut pkt8_m = http_guest_fin2( g1_cfg.guest_mac, g1_cfg.ipv4().private_ip, GW_MAC_ADDR, dst_ip, ); - let res = g1.port.process(Out, &mut pkt8, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let pkt8 = parse_outbound(&mut pkt8_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt8); + expect_modified!(res, pkt8_m); incr!(g1, ["stats.port.out_modified, stats.port.out_uft_hit"]); assert_eq!(TcpState::FinWait1, g1.port.tcp_state(&flow).unwrap()); // ================================================================ // ACK FIN: Server -> Client // ================================================================ - let mut pkt9 = http_server_ack_fin2( + let mut pkt9_m = http_server_ack_fin2( BS_MAC_ADDR, dst_ip, g1_cfg.guest_mac, g1_cfg.snat().external_ip, snat_port, ); - let bs_phys = TestIpPhys { - ip: BS_IP_ADDR, - mac: BS_MAC_ADDR, - vni: Vni::new(BOUNDARY_SERVICES_VNI).unwrap(), - }; - pkt9 = encap_external(pkt9, bs_phys, g1_phys); - let res = g1.port.process(In, &mut pkt9, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + pkt9_m = encap_external(pkt9_m, bs_phys, g1_phys); + let pkt9 = parse_inbound(&mut pkt9_m, VpcParser {}).unwrap(); + let res = g1.port.process(In, pkt9); + expect_modified!(res, pkt9_m); incr!(g1, ["stats.port.in_modified, stats.port.in_uft_hit"]); assert_eq!(TcpState::FinWait2, g1.port.tcp_state(&flow).unwrap()); // ================================================================ // FIN: Server -> Client // ================================================================ - let mut pkt10 = http_server_fin2( + let mut pkt10_m = http_server_fin2( BS_MAC_ADDR, dst_ip, g1_cfg.guest_mac, g1_cfg.snat().external_ip, snat_port, ); - let bs_phys = TestIpPhys { - ip: BS_IP_ADDR, - mac: BS_MAC_ADDR, - vni: Vni::new(BOUNDARY_SERVICES_VNI).unwrap(), - }; - pkt10 = encap_external(pkt10, bs_phys, g1_phys); - let res = g1.port.process(In, &mut pkt10, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + pkt10_m = encap_external(pkt10_m, bs_phys, g1_phys); + let pkt10 = parse_inbound(&mut pkt10_m, VpcParser {}).unwrap(); + let res = g1.port.process(In, pkt10); + expect_modified!(res, pkt10_m); incr!(g1, ["stats.port.in_modified, stats.port.in_uft_hit"]); assert_eq!(TcpState::TimeWait, g1.port.tcp_state(&flow).unwrap()); // ================================================================ // ACK Server FIN: Client -> Server // ================================================================ - let mut pkt11 = http_guest_ack_fin2( + let mut pkt11_m = http_guest_ack_fin2( g1_cfg.guest_mac, g1_cfg.ipv4().private_ip, GW_MAC_ADDR, dst_ip, ); - let res = g1.port.process(Out, &mut pkt11, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let pkt11 = parse_outbound(&mut pkt11_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt11); + expect_modified!(res, pkt11_m); incr!(g1, ["stats.port.out_modified, stats.port.out_uft_hit"]); assert_eq!(TcpState::TimeWait, g1.port.tcp_state(&flow).unwrap()); @@ -3541,24 +3573,30 @@ fn early_tcp_invalidation() { // Repeat the exact same flow. This SYN is not blocked, the old // entry is invalidated, and a new one is created. // ================================================================ - let mut pkt1 = http_syn2( + let mut pkt1_m = http_syn2( g1_cfg.guest_mac, g1_cfg.ipv4().private_ip, GW_MAC_ADDR, dst_ip, ); - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); - incr!( + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt1); + expect_modified!(res, pkt1_m); + update!( g1, [ - "stats.port.out_modified, stats.port.out_uft_miss", + "incr:stats.port.out_modified, stats.port.out_uft_miss", // We're hitting the old entry, before it is discarded. - "stats.port.out_uft_hit", + "incr:stats.port.out_uft_hit", + // Both UFTs are wiped out for reprocessing, but OUT is + // re-added. + "decr:uft.in" ] ); assert_eq!(TcpState::SynSent, g1.port.tcp_state(&flow).unwrap()); - let snat_port = pkt1.meta().inner.ulp.unwrap().src_port().unwrap(); + let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); + let snat_port = + pkt1.to_full_meta().meta().inner_ulp().unwrap().src_port().unwrap(); // ================================================================ // Drive to established, then validate the same applies to inbound @@ -3574,20 +3612,21 @@ fn early_tcp_invalidation() { mac: g1_cfg.guest_mac, vni: g1_cfg.vni, }; - let mut pkt2 = http_syn_ack2( + let mut pkt2_m = http_syn_ack2( BS_MAC_ADDR, dst_ip, g1_cfg.guest_mac, g1_cfg.snat().external_ip, snat_port, ); - pkt2 = encap_external(pkt2, bs_phys, g1_phys); - let res = g1.port.process(In, &mut pkt2, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); - incr!(g1, ["stats.port.in_modified, stats.port.in_uft_hit"]); + pkt2_m = encap_external(pkt2_m, bs_phys, g1_phys); + let pkt2 = parse_inbound(&mut pkt2_m, VpcParser {}).unwrap(); + let res = g1.port.process(In, pkt2); + expect_modified!(res, pkt2_m); + incr!(g1, ["stats.port.in_modified, stats.port.in_uft_miss, uft.in"]); assert_eq!(TcpState::Established, g1.port.tcp_state(&flow).unwrap()); - let mut pkt1 = http_syn3( + let mut pkt1_m = http_syn3( BS_MAC_ADDR, dst_ip, g1_cfg.guest_mac, @@ -3595,14 +3634,18 @@ fn early_tcp_invalidation() { 80, snat_port, ); - pkt1 = encap_external(pkt1, bs_phys, g1_phys); - let res = g1.port.process(In, &mut pkt1, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + pkt1_m = encap_external(pkt1_m, bs_phys, g1_phys); + let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(In, pkt1); + expect_modified!(res, pkt1_m); update!( g1, [ + // Hit the old flow... "incr:stats.port.in_modified, stats.port.in_uft_hit", - "set:uft.in=0, uft.out=0", + // Then reprocesssed. + "incr:stats.port.in_uft_miss", + "set:uft.in=1, uft.out=0", ] ); assert_eq!(TcpState::Listen, g1.port.tcp_state(&flow).unwrap()); @@ -3618,15 +3661,16 @@ fn early_tcp_invalidation() { // This case is just an ACK, but the same logic applies for // FIN+ACK. The FIN+ACK case could be special-cased CLOSED->CLOSED, // but we're not doing that for now. - let mut pkt11 = http_guest_ack_fin2( + let mut pkt11_m = http_guest_ack_fin2( g1_cfg.guest_mac, g1_cfg.ipv4().private_ip, GW_MAC_ADDR, dst_ip2, ); - let flow = *pkt11.flow(); - let res = g1.port.process(Out, &mut pkt11, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let pkt11 = parse_outbound(&mut pkt11_m, VpcParser {}).unwrap(); + let flow = pkt11.flow(); + let res = g1.port.process(Out, pkt11); + expect_modified!(res, pkt11_m); incr!( g1, [ @@ -3641,21 +3685,22 @@ fn early_tcp_invalidation() { // ================================================================ // This entry will not block new flows on the same tuple. // ================================================================ - let mut pkt1 = http_syn2( + let mut pkt1_m = http_syn2( g1_cfg.guest_mac, g1_cfg.ipv4().private_ip, GW_MAC_ADDR, dst_ip2, ); - let flow = *pkt1.flow(); - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); - incr!( + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let flow = pkt1.flow(); + let res = g1.port.process(Out, pkt1); + expect_modified!(res, pkt1_m); + update!( g1, [ - "stats.port.out_modified, stats.port.out_uft_miss", + "incr:stats.port.out_modified, stats.port.out_uft_miss", // We're hitting the old entry, before it is discarded. - "stats.port.out_uft_hit", + "incr:stats.port.out_uft_hit", ] ); assert_eq!(TcpState::SynSent, g1.port.tcp_state(&flow).unwrap()); @@ -3710,7 +3755,7 @@ fn ephemeral_ip_preferred_over_snat_outbound() { let client_ip = "52.10.128.69".parse().unwrap(); let data = b"reunion"; - let mut pkt1 = gen_icmpv4_echo_req( + let mut pkt1_m = gen_icmpv4_echo_req( g1_cfg.guest_mac, g1_cfg.gateway_mac, g1_cfg.ipv4().private_ip, @@ -3720,12 +3765,13 @@ fn ephemeral_ip_preferred_over_snat_outbound() { data, 1, ); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); // Process the packet through our port. It should be allowed through: // we have a V2P mapping for the target guest, and a route for the other // subnet. - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); - assert!(matches!(res, Ok(ProcessResult::Modified))); + let res = g1.port.process(Out, pkt1); + expect_modified!(res, pkt1_m); incr!( g1, @@ -3736,8 +3782,10 @@ fn ephemeral_ip_preferred_over_snat_outbound() { ] ); + let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap().to_full_meta(); + assert_eq!( - pkt1.meta().inner_ip4().unwrap().src, + pkt1.meta().inner_ip4().unwrap().source(), "10.60.1.20".parse().unwrap(), "did not choose assigned ephemeral IP" ); @@ -3808,16 +3856,16 @@ fn tcp_inbound() { // ================================================================ // SYN: Client -> Server // ================================================================ - let mut pkt1 = http_syn2(BS_MAC_ADDR, client_ip, serv_mac, serv_ext_ip); + let mut pkt1_m = http_syn2(BS_MAC_ADDR, client_ip, serv_mac, serv_ext_ip); let bs_phys = TestIpPhys { ip: BS_IP_ADDR, mac: BS_MAC_ADDR, vni: Vni::new(BOUNDARY_SERVICES_VNI).unwrap(), }; - pkt1 = encap(pkt1, bs_phys, g1_phys); - let res = g1.port.process(In, &mut pkt1, ActionMeta::new()); - let flow = pkt1.flow().mirror(); - assert!(matches!(res, Ok(Modified))); + pkt1_m = encap(pkt1_m, bs_phys, g1_phys); + let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(In, pkt1); + expect_modified!(res, pkt1_m); incr!( g1, [ @@ -3827,133 +3875,147 @@ fn tcp_inbound() { "stats.port.in_modified, stats.port.in_uft_miss", ] ); - let sport = pkt1.meta().inner.ulp.unwrap().src_port().unwrap(); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let flow = pkt1.flow().mirror(); + let sport = + pkt1.to_full_meta().meta().inner_ulp().unwrap().src_port().unwrap(); assert_eq!(TcpState::Listen, g1.port.tcp_state(&flow).unwrap()); // ================================================================ // SYN+ACK: Server -> Client // ================================================================ - let mut pkt2 = http_syn_ack2( + let mut pkt2_m = http_syn_ack2( serv_mac, g1_cfg.ipv4().private_ip, GW_MAC_ADDR, client_ip, sport, ); - let res = g1.port.process(Out, &mut pkt2, ActionMeta::new()); - assert!(matches!(res, Ok(Modified)), "expected Modified, got {:?}", res); + let pkt2 = parse_outbound(&mut pkt2_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt2); + expect_modified!(res, pkt2_m); incr!(g1, ["uft.out, stats.port.out_modified, stats.port.out_uft_miss"]); assert_eq!(TcpState::SynRcvd, g1.port.tcp_state(&flow).unwrap()); // ================================================================ // ACK: Client -> Server // ================================================================ - let mut pkt3 = http_ack2(BS_MAC_ADDR, client_ip, serv_mac, serv_ext_ip); - pkt3 = encap(pkt3, bs_phys, g1_phys); - let res = g1.port.process(In, &mut pkt3, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let mut pkt3_m = http_ack2(BS_MAC_ADDR, client_ip, serv_mac, serv_ext_ip); + pkt3_m = encap(pkt3_m, bs_phys, g1_phys); + let pkt3 = parse_inbound(&mut pkt3_m, VpcParser {}).unwrap(); + let res = g1.port.process(In, pkt3); + expect_modified!(res, pkt3_m); incr!(g1, ["stats.port.in_modified, stats.port.in_uft_hit"]); assert_eq!(TcpState::Established, g1.port.tcp_state(&flow).unwrap()); // ================================================================ // HTTP GET: Client -> Server // ================================================================ - let mut pkt4 = http_get2(BS_MAC_ADDR, client_ip, serv_mac, serv_ext_ip); - pkt4 = encap(pkt4, bs_phys, g1_phys); - let res = g1.port.process(In, &mut pkt4, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let mut pkt4_m = http_get2(BS_MAC_ADDR, client_ip, serv_mac, serv_ext_ip); + pkt4_m = encap(pkt4_m, bs_phys, g1_phys); + let pkt4 = parse_inbound(&mut pkt4_m, VpcParser {}).unwrap(); + let res = g1.port.process(In, pkt4); + expect_modified!(res, pkt4_m); incr!(g1, ["stats.port.in_modified, stats.port.in_uft_hit"]); assert_eq!(TcpState::Established, g1.port.tcp_state(&flow).unwrap()); // ================================================================ // ACK HTTP GET: Server -> Client // ================================================================ - let mut pkt5 = http_get_ack2( + let mut pkt5_m = http_get_ack2( serv_mac, g1_cfg.ipv4().private_ip, GW_MAC_ADDR, client_ip, sport, ); - let res = g1.port.process(Out, &mut pkt5, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let pkt5 = parse_outbound(&mut pkt5_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt5); + expect_modified!(res, pkt5_m); incr!(g1, ["stats.port.out_modified, stats.port.out_uft_hit"]); assert_eq!(TcpState::Established, g1.port.tcp_state(&flow).unwrap()); // ================================================================ // HTTP 301 Reply: Server -> Client // ================================================================ - let mut pkt6 = http_301_reply2( + let mut pkt6_m = http_301_reply2( serv_mac, g1_cfg.ipv4().private_ip, GW_MAC_ADDR, client_ip, sport, ); - let res = g1.port.process(Out, &mut pkt6, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let pkt6 = parse_outbound(&mut pkt6_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt6); + expect_modified!(res, pkt6_m); incr!(g1, ["stats.port.out_modified, stats.port.out_uft_hit"]); assert_eq!(TcpState::Established, g1.port.tcp_state(&flow).unwrap()); // ================================================================ // ACK HTTP 301: Client -> Server // ================================================================ - let mut pkt7 = http_301_ack2(BS_MAC_ADDR, client_ip, serv_mac, serv_ext_ip); - pkt7 = encap(pkt7, bs_phys, g1_phys); - let res = g1.port.process(In, &mut pkt7, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let mut pkt7_m = + http_301_ack2(BS_MAC_ADDR, client_ip, serv_mac, serv_ext_ip); + pkt7_m = encap(pkt7_m, bs_phys, g1_phys); + let pkt7 = parse_inbound(&mut pkt7_m, VpcParser {}).unwrap(); + let res = g1.port.process(In, pkt7); + expect_modified!(res, pkt7_m); incr!(g1, ["stats.port.in_modified, stats.port.in_uft_hit"]); assert_eq!(TcpState::Established, g1.port.tcp_state(&flow).unwrap()); // ================================================================ // FIN: Client -> Server // ================================================================ - let mut pkt8 = + let mut pkt8_m = http_guest_fin2(BS_MAC_ADDR, client_ip, serv_mac, serv_ext_ip); - pkt8 = encap(pkt8, bs_phys, g1_phys); - let res = g1.port.process(In, &mut pkt8, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + pkt8_m = encap(pkt8_m, bs_phys, g1_phys); + let pkt8 = parse_inbound(&mut pkt8_m, VpcParser {}).unwrap(); + let res = g1.port.process(In, pkt8); + expect_modified!(res, pkt8_m); incr!(g1, ["stats.port.in_modified, stats.port.in_uft_hit"]); assert_eq!(TcpState::CloseWait, g1.port.tcp_state(&flow).unwrap()); // ================================================================ // ACK Client FIN: Server -> Client // ================================================================ - let mut pkt9 = http_server_ack_fin2( + let mut pkt9_m = http_server_ack_fin2( serv_mac, g1_cfg.ipv4().private_ip, GW_MAC_ADDR, client_ip, sport, ); - let res = g1.port.process(Out, &mut pkt9, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let pkt9 = parse_outbound(&mut pkt9_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt9); + expect_modified!(res, pkt9_m); incr!(g1, ["stats.port.out_modified, stats.port.out_uft_hit"]); assert_eq!(TcpState::CloseWait, g1.port.tcp_state(&flow).unwrap()); // ================================================================ // FIN: Server -> Client // ================================================================ - let mut pkt10 = http_server_fin2( + let mut pkt10_m = http_server_fin2( serv_mac, g1_cfg.ipv4().private_ip, GW_MAC_ADDR, client_ip, sport, ); - let res = g1.port.process(Out, &mut pkt10, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + let pkt10 = parse_outbound(&mut pkt10_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt10); + expect_modified!(res, pkt10_m); incr!(g1, ["stats.port.out_modified, stats.port.out_uft_hit"]); assert_eq!(TcpState::LastAck, g1.port.tcp_state(&flow).unwrap()); // ================================================================ // ACK Server FIN: Client -> Server // ================================================================ - let mut pkt11 = + let mut pkt11_m = http_guest_ack_fin2(BS_MAC_ADDR, client_ip, serv_mac, serv_ext_ip); - pkt11 = encap(pkt11, bs_phys, g1_phys); - let res = g1.port.process(In, &mut pkt11, ActionMeta::new()); - assert!(matches!(res, Ok(Modified))); + pkt11_m = encap(pkt11_m, bs_phys, g1_phys); + let pkt11 = parse_inbound(&mut pkt11_m, VpcParser {}).unwrap(); + let res = g1.port.process(In, pkt11); + expect_modified!(res, pkt11_m); update!( g1, [ @@ -3981,13 +4043,14 @@ fn anti_spoof() { // ================================================================ // Try to send an outbound packet with a spoofed IP. // ================================================================ - let mut pkt1 = http_syn2( + let mut pkt1_m = http_syn2( g1_cfg.guest_mac, src_ip, GW_MAC_ADDR, g2_cfg.ipv4().private_ip, ); - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt1); assert_drop!( res, DropReason::Layer { name: "gateway", reason: DenyReason::Default } @@ -4003,13 +4066,14 @@ fn anti_spoof() { // ================================================================ // Try to send an outbound packet with a spoofed MAC address. // ================================================================ - pkt1 = http_syn2( + pkt1_m = http_syn2( src_mac, g1_cfg.ipv4().private_ip, GW_MAC_ADDR, g2_cfg.ipv4().private_ip, ); - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt1); assert_drop!( res, DropReason::Layer { name: "gateway", reason: DenyReason::Default } @@ -4025,8 +4089,9 @@ fn anti_spoof() { // ================================================================ // Try to send an outbound packet with a spoofed MAC address and IP. // ================================================================ - pkt1 = http_syn2(src_mac, src_ip, GW_MAC_ADDR, g2_cfg.ipv4().private_ip); - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); + pkt1_m = http_syn2(src_mac, src_ip, GW_MAC_ADDR, g2_cfg.ipv4().private_ip); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt1); assert_drop!( res, DropReason::Layer { name: "gateway", reason: DenyReason::Default } @@ -4064,7 +4129,7 @@ fn no_panic_on_flow_table_full() { // Send one TCP packet to `zinascii.com`. let dst_ip: Ipv4Addr = "52.10.128.69".parse().unwrap(); - let mut pkt1 = http_syn2( + let mut pkt1_m = http_syn2( g1_cfg.guest_mac, g1_cfg.ipv4_cfg().unwrap().private_ip, GW_MAC_ADDR, @@ -4074,20 +4139,22 @@ fn no_panic_on_flow_table_full() { // Process the packet through our port. We don't actually care about the // contents here, we just want to make sure that the packet can be _sent at // all_. - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt1); assert!(res.is_ok()); // Send another one, which should exhaust the TCP flow table limit we // severely truncated above. Note we need to send to a different IP address. // Let's use google.com. let dst_ip: Ipv4Addr = "142.251.46.238".parse().unwrap(); - let mut pkt2 = http_syn2( + let mut pkt2_m = http_syn2( g1_cfg.guest_mac, g1_cfg.ipv4_cfg().unwrap().private_ip, GW_MAC_ADDR, dst_ip, ); - let res2 = g1.port.process(Out, &mut pkt2, ActionMeta::new()); + let pkt2 = parse_outbound(&mut pkt2_m, VpcParser {}).unwrap(); + let res2 = g1.port.process(Out, pkt2); assert_drop!(res2, DropReason::TcpErr); } @@ -4129,7 +4196,7 @@ fn intra_subnet_routes_with_custom() { let data = b"1234\0"; // Send one ICMP packet to that guest. - let mut pkt1 = gen_icmpv4_echo_req( + let mut pkt1_m = gen_icmpv4_echo_req( g1_cfg.guest_mac, g1_cfg.gateway_mac, g1_cfg.ipv4().private_ip, @@ -4143,8 +4210,9 @@ fn intra_subnet_routes_with_custom() { // Process the packet through our port. It should be allowed through: // we have a V2P mapping for the target guest, and a route for the other // subnet. - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); - assert!(matches!(res, Ok(ProcessResult::Modified))); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt1); + expect_modified!(res, pkt1_m); incr!( g1, [ @@ -4158,7 +4226,7 @@ fn intra_subnet_routes_with_custom() { router::add_entry(&g1.port, cidr, RouterTarget::Drop, RouterClass::Custom) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); - let mut pkt2 = gen_icmpv4_echo_req( + let mut pkt2_m = gen_icmpv4_echo_req( g1_cfg.guest_mac, g1_cfg.gateway_mac, g1_cfg.ipv4().private_ip, @@ -4168,7 +4236,8 @@ fn intra_subnet_routes_with_custom() { data, 1, ); - let res = g1.port.process(Out, &mut pkt2, ActionMeta::new()); + let pkt2 = parse_outbound(&mut pkt2_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt2); assert!(matches!( res, Ok(ProcessResult::Drop { @@ -4188,7 +4257,7 @@ fn intra_subnet_routes_with_custom() { router::del_entry(&g1.port, cidr, RouterTarget::Drop, RouterClass::Custom) .unwrap(); update!(g1, ["incr:epoch", "decr:router.rules.out"]); - let mut pkt3 = gen_icmpv4_echo_req( + let mut pkt3_m = gen_icmpv4_echo_req( g1_cfg.guest_mac, g1_cfg.gateway_mac, g1_cfg.ipv4().private_ip, @@ -4198,8 +4267,9 @@ fn intra_subnet_routes_with_custom() { data, 1, ); - let res = g1.port.process(Out, &mut pkt3, ActionMeta::new()); - assert!(matches!(res, Ok(ProcessResult::Modified))); + let pkt3 = parse_outbound(&mut pkt3_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt3); + expect_modified!(res, pkt3_m); } #[test] @@ -4243,7 +4313,7 @@ fn port_as_router_target() { let data = b"1234\0"; // Send one ICMP packet to that range. - let mut pkt1 = gen_icmpv4_echo_req( + let mut pkt1_m = gen_icmpv4_echo_req( g1_cfg.guest_mac, g1_cfg.gateway_mac, g1_cfg.ipv4().private_ip, @@ -4256,8 +4326,9 @@ fn port_as_router_target() { // That packet should be allowed: the target IP resolves to a valid // V2P Mapping. - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()); - assert!(matches!(res, Ok(ProcessResult::Modified))); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt1); + expect_modified!(res, pkt1_m); incr!( g1, [ @@ -4266,18 +4337,23 @@ fn port_as_router_target() { ] ); + let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); + // Encap routes between sleds correctly, inner IPs are not modified, // and L2 dst matches the guest's NIC. - let v6_encap_meta = pkt1.meta().outer.ip.as_ref().unwrap().ip6().unwrap(); - assert_eq!(v6_encap_meta.src, g1_cfg.phys_ip); - assert_eq!(v6_encap_meta.dst, g2_cfg.phys_ip); - assert_eq!(pkt1.meta().inner_ether().dst, g2_cfg.guest_mac); - assert_eq!(pkt1.meta().inner_ether().src, g1_cfg.guest_mac); - assert_eq!(pkt1.meta().inner_ip4().unwrap().src, g1_cfg.ipv4().private_ip); - assert_eq!(pkt1.meta().inner_ip4().unwrap().dst, dst_ip); + let v6_encap_meta = &pkt1.meta().outer_v6; + assert_eq!(v6_encap_meta.source(), g1_cfg.phys_ip); + assert_eq!(v6_encap_meta.destination(), g2_cfg.phys_ip); + assert_eq!(pkt1.meta().inner_eth.destination(), g2_cfg.guest_mac); + assert_eq!(pkt1.meta().inner_eth.source(), g1_cfg.guest_mac); + let ValidL3::Ipv4(inner_ip4) = &pkt1.meta().inner_l3 else { + panic!("encapped v4 packet did not parse back as v4"); + }; + assert_eq!(inner_ip4.source(), g1_cfg.ipv4().private_ip); + assert_eq!(inner_ip4.destination(), dst_ip); // Now deliver the packet to node g2. - let res = g2.port.process(In, &mut pkt1, ActionMeta::new()); + let res = g2.port.process(In, pkt1); incr!( g2, [ @@ -4285,11 +4361,11 @@ fn port_as_router_target() { "stats.port.in_modified, stats.port.in_uft_miss, uft.in", ] ); - assert!(matches!(res, Ok(ProcessResult::Modified))); + expect_modified!(res, pkt1_m); // A reply from that address must be allowed out by g2, and accepted // by g1. - let mut pkt2 = gen_icmpv4_echo_reply( + let mut pkt2_m = gen_icmpv4_echo_reply( g2_cfg.guest_mac, g2_cfg.gateway_mac, dst_ip, @@ -4299,13 +4375,15 @@ fn port_as_router_target() { data, 1, ); + let pkt2 = parse_outbound(&mut pkt2_m, VpcParser {}).unwrap(); - let res = g2.port.process(Out, &mut pkt2, ActionMeta::new()); + let res = g2.port.process(Out, pkt2); incr!(g2, ["stats.port.out_modified, stats.port.out_uft_miss, uft.out",]); - assert!(matches!(res, Ok(ProcessResult::Modified))); + expect_modified!(res, pkt2_m); - let res = g1.port.process(In, &mut pkt2, ActionMeta::new()); - assert!(matches!(res, Ok(ProcessResult::Modified))); + let pkt2 = parse_inbound(&mut pkt2_m, VpcParser {}).unwrap(); + let res = g1.port.process(In, pkt2); + expect_modified!(res, pkt2_m); } #[test] @@ -4373,25 +4451,6 @@ fn select_eip_conditioned_on_igw() { }, }; - // let ip_cfg = IpCfg::Ipv4( - // Ipv4Cfg { - // vpc_subnet: "172.30.0.0/22".parse().unwrap(), - // private_ip: "172.30.0.5".parse().unwrap(), - // gateway_ip: "172.30.0.1".parse().unwrap(), - // external_ips: ExternalIpCfg { - // snat: Some(SNat4Cfg { - // external_ip: "10.77.77.13".parse().unwrap(), - // ports: 1025..=4096, - // }), - // ephemeral_ip: Some("192.168.0.1".parse().unwrap()), - // floating_ips: vec![ - // "192.168.0.2".parse().unwrap(), - // "192.168.0.3".parse().unwrap(), - // "192.168.0.4".parse().unwrap(), - // ], - // }, - // }); - let g1_cfg = g1_cfg2(ip_cfg); let mut g1 = oxide_net_setup("g1_port", &g1_cfg, None, None); g1.port.start(); @@ -4483,7 +4542,7 @@ fn select_eip_conditioned_on_igw() { let data = b"reunion\0"; // Default route. - let mut pkt1 = gen_icmp_echo_req( + let mut pkt1_m = gen_icmp_echo_req( g1_cfg.guest_mac, g1_cfg.gateway_mac, g1_cfg.ipv4_cfg().unwrap().private_ip.into(), @@ -4493,10 +4552,12 @@ fn select_eip_conditioned_on_igw() { &data[..], 1, ); - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()).unwrap(); - assert!(matches!(res, ProcessResult::Modified)); + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt1); + expect_modified!(res, pkt1_m); + let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap().to_full_meta(); assert_eq!( - pkt1.meta().inner_ip4().unwrap().src, + pkt1.meta().inner_ip4().unwrap().source(), g1_cfg.ipv4().external_ips.ephemeral_ip.unwrap() ); incr!( @@ -4510,7 +4571,7 @@ fn select_eip_conditioned_on_igw() { ); // 1.1.1.0/24 - let mut pkt1 = gen_icmp_echo_req( + let mut pkt2_m = gen_icmp_echo_req( g1_cfg.guest_mac, g1_cfg.gateway_mac, g1_cfg.ipv4_cfg().unwrap().private_ip.into(), @@ -4520,10 +4581,12 @@ fn select_eip_conditioned_on_igw() { &data[..], 1, ); - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()).unwrap(); - assert!(matches!(res, ProcessResult::Modified)); + let pkt2 = parse_outbound(&mut pkt2_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt2); + expect_modified!(res, pkt2_m); + let pkt2 = parse_inbound(&mut pkt2_m, VpcParser {}).unwrap().to_full_meta(); assert!(&g1_cfg.ipv4().external_ips.floating_ips[..2] - .contains(&pkt1.meta().inner_ip4().unwrap().src)); + .contains(&pkt2.meta().inner_ip4().unwrap().source())); incr!( g1, [ @@ -4535,7 +4598,7 @@ fn select_eip_conditioned_on_igw() { ); // 2.2.2.0/24 - let mut pkt1 = gen_icmp_echo_req( + let mut pkt3_m = gen_icmp_echo_req( g1_cfg.guest_mac, g1_cfg.gateway_mac, g1_cfg.ipv4_cfg().unwrap().private_ip.into(), @@ -4545,10 +4608,12 @@ fn select_eip_conditioned_on_igw() { &data[..], 1, ); - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()).unwrap(); - assert!(matches!(res, ProcessResult::Modified)); + let pkt3 = parse_outbound(&mut pkt3_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt3); + expect_modified!(res, pkt3_m); + let pkt3 = parse_inbound(&mut pkt3_m, VpcParser {}).unwrap().to_full_meta(); assert_eq!( - pkt1.meta().inner_ip4().unwrap().src, + pkt3.meta().inner_ip4().unwrap().source(), g1_cfg.ipv4().external_ips.floating_ips[2] ); incr!( @@ -4562,7 +4627,7 @@ fn select_eip_conditioned_on_igw() { ); // 3.3.3.0/24 - let mut pkt1 = gen_icmp_echo_req( + let mut pkt4_m = gen_icmp_echo_req( g1_cfg.guest_mac, g1_cfg.gateway_mac, g1_cfg.ipv4_cfg().unwrap().private_ip.into(), @@ -4572,7 +4637,8 @@ fn select_eip_conditioned_on_igw() { &data[..], 1, ); - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()).unwrap(); + let pkt4 = parse_outbound(&mut pkt4_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt4).unwrap(); assert!(matches!(res, ProcessResult::Drop { .. })); incr!( g1, @@ -4584,7 +4650,7 @@ fn select_eip_conditioned_on_igw() { ); // 4.4.4.0/24 - let mut pkt1 = gen_icmp_echo_req( + let mut pkt5_m = gen_icmp_echo_req( g1_cfg.guest_mac, g1_cfg.gateway_mac, g1_cfg.ipv4_cfg().unwrap().private_ip.into(), @@ -4594,10 +4660,12 @@ fn select_eip_conditioned_on_igw() { &data[..], 1, ); - let res = g1.port.process(Out, &mut pkt1, ActionMeta::new()).unwrap(); - assert!(matches!(res, ProcessResult::Modified)); + let pkt5 = parse_outbound(&mut pkt5_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt5); + expect_modified!(res, pkt5_m); + let pkt5 = parse_inbound(&mut pkt5_m, VpcParser {}).unwrap().to_full_meta(); assert!(&g1_cfg.ipv4().external_ips.floating_ips[..] - .contains(&pkt1.meta().inner_ip4().unwrap().src)); + .contains(&pkt5.meta().inner_ip4().unwrap().source())); incr!( g1, [ diff --git a/rust-toolchain.toml b/rust-toolchain.toml index bbf217f2..5f3ff177 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,3 +1,3 @@ [toolchain] -channel = "1.81.0" +channel = "1.82.0" profile = "default" diff --git a/xde/Cargo.toml b/xde/Cargo.toml index 24ab5076..371a00df 100644 --- a/xde/Cargo.toml +++ b/xde/Cargo.toml @@ -11,6 +11,8 @@ illumos-sys-hdrs = { workspace = true, features = ["kernel"] } opte = { workspace = true, features = ["engine", "kernel"], default-features = false } oxide-vpc = { workspace = true, features = ["engine", "kernel"], default-features = false } +ingot.workspace = true + bitflags.workspace = true postcard.workspace = true serde.workspace = true diff --git a/xde/rust-toolchain.toml b/xde/rust-toolchain.toml index fe1a3bfa..6965878b 100644 --- a/xde/rust-toolchain.toml +++ b/xde/rust-toolchain.toml @@ -1,5 +1,5 @@ [toolchain] -channel = "nightly-2024-05-12" +channel = "nightly-2024-11-18" target = "x86_64-unknown-illumos" components = [ "clippy", "rustfmt", "rust-src" ] profile = "minimal" diff --git a/xde/src/dls/mod.rs b/xde/src/dls/mod.rs index 6cab4783..5cc7aaa0 100644 --- a/xde/src/dls/mod.rs +++ b/xde/src/dls/mod.rs @@ -21,8 +21,7 @@ use illumos_sys_hdrs::c_int; use illumos_sys_hdrs::datalink_id_t; use illumos_sys_hdrs::uintptr_t; use illumos_sys_hdrs::ENOENT; -use opte::engine::packet::Packet; -use opte::engine::packet::PacketState; +use opte::ddi::mblk::MsgBlk; pub use sys::*; /// An integer ID used by DLS to refer to a given link. @@ -146,8 +145,7 @@ impl DlsLink { Ok(DlsStream { inner: Some(DlsStreamInner { dld_str }), link: mph.link_id(), - } - .into()) + }) } else { self.release(mph); Err(res) @@ -200,7 +198,7 @@ impl DlsStream { /// but for now we pass only a single packet at a time. pub fn tx_drop_on_no_desc( &self, - pkt: Packet, + pkt: MsgBlk, hint: uintptr_t, flags: MacTxFlags, ) { @@ -214,9 +212,10 @@ impl DlsStream { let mut raw_flags = flags.bits(); raw_flags |= MAC_DROP_ON_NO_DESC; unsafe { + // mac_tx(self.mch, pkt.unwrap_mblk(), hint, raw_flags, &mut ret_mp) str_mdata_fastpath_put( inner.dld_str.as_ptr(), - pkt.unwrap_mblk(), + pkt.unwrap_mblk().as_ptr(), hint, raw_flags, ) diff --git a/xde/src/lib.rs b/xde/src/lib.rs index 1684813d..4c960f82 100644 --- a/xde/src/lib.rs +++ b/xde/src/lib.rs @@ -6,7 +6,6 @@ // xde - A mac provider for OPTE-based network implementations. #![feature(extern_types)] -#![feature(panic_info_message)] #![no_std] #![allow(non_upper_case_globals)] // XXX We do not use double in the kernel. We should not allow @@ -20,6 +19,7 @@ #![allow(non_snake_case)] // for bindgen code in ip.rs #![feature(alloc_error_handler)] #![feature(rustc_private)] +#![feature(maybe_uninit_slice)] #![deny(unused_must_use)] mod ioctl; @@ -45,6 +45,7 @@ pub mod ip; pub mod mac; pub mod route; pub mod secpolicy; +pub mod stats; pub mod sys; pub mod xde; diff --git a/xde/src/mac/mod.rs b/xde/src/mac/mod.rs index 70cf0aa9..28dcbd2d 100644 --- a/xde/src/mac/mod.rs +++ b/xde/src/mac/mod.rs @@ -20,10 +20,8 @@ use core::ffi::CStr; use core::fmt; use core::ptr; use illumos_sys_hdrs::*; +use opte::ddi::mblk::MsgBlk; use opte::engine::ether::EtherAddr; -use opte::engine::packet::Initialized; -use opte::engine::packet::Packet; -use opte::engine::packet::PacketState; pub use sys::*; /// Errors while opening a MAC handle. @@ -209,16 +207,22 @@ impl MacClientHandle { /// but for now we pass only a single packet at a time. pub fn tx( &self, - pkt: Packet, + pkt: MsgBlk, hint: uintptr_t, flags: MacTxFlags, - ) -> Option> { + ) -> Option { // We must unwrap the raw `mblk_t` out of the `pkt` here, // otherwise the mblk_t would be dropped at the end of this // function along with `pkt`. let mut ret_mp = ptr::null_mut(); unsafe { - mac_tx(self.mch, pkt.unwrap_mblk(), hint, flags.bits(), &mut ret_mp) + mac_tx( + self.mch, + pkt.unwrap_mblk().as_ptr(), + hint, + flags.bits(), + &mut ret_mp, + ) }; if !ret_mp.is_null() { // Unwrap: We know the ret_mp is valid because we gave @@ -229,7 +233,7 @@ impl MacClientHandle { // XXX Technically we are still only passing single // packets, but eventually we will pass packet chains and // the sentence above will hold. - Some(unsafe { Packet::wrap_mblk(ret_mp).unwrap() }) + Some(unsafe { MsgBlk::wrap_mblk(ret_mp).unwrap() }) } else { None } @@ -244,7 +248,7 @@ impl MacClientHandle { /// but for now we pass only a single packet at a time. pub fn tx_drop_on_no_desc( &self, - pkt: Packet, + pkt: MsgBlk, hint: uintptr_t, flags: MacTxFlags, ) { @@ -255,7 +259,13 @@ impl MacClientHandle { raw_flags |= MAC_DROP_ON_NO_DESC; let mut ret_mp = ptr::null_mut(); unsafe { - mac_tx(self.mch, pkt.unwrap_mblk(), hint, raw_flags, &mut ret_mp) + mac_tx( + self.mch, + pkt.unwrap_mblk().as_ptr(), + hint, + raw_flags, + &mut ret_mp, + ) }; debug_assert_eq!(ret_mp, ptr::null_mut()); } diff --git a/xde/src/route.rs b/xde/src/route.rs index 089bc36c..ba863ae3 100644 --- a/xde/src/route.rs +++ b/xde/src/route.rs @@ -20,7 +20,7 @@ use opte::ddi::sync::KRwLock; use opte::ddi::sync::KRwLockType; use opte::ddi::time::Moment; use opte::engine::ether::EtherAddr; -use opte::engine::ip6::Ipv6Addr; +use opte::engine::ip::v6::Ipv6Addr; // XXX: completely arbitrary timeouts. /// The duration a cached route remains valid for before it must be diff --git a/xde/src/stats.rs b/xde/src/stats.rs new file mode 100644 index 00000000..cc02dc5d --- /dev/null +++ b/xde/src/stats.rs @@ -0,0 +1,101 @@ +use opte::api::Direction; +use opte::ddi::kstat::KStatProvider; +use opte::ddi::kstat::KStatU64; +use opte::engine::packet::ParseError; +use opte::ingot::types::ParseError as IngotError; + +/// Top-level KStats for XDE. +#[derive(KStatProvider)] +pub struct XdeStats { + /// The number of inbound packets dropped as explicitly + /// rejected during parsing. + in_drop_reject: KStatU64, + /// The number of inbound packets dropped with an unexpected + /// protocol number. + in_drop_unwanted_proto: KStatU64, + /// The number of inbound packets dropped for having + /// insufficient bytes to read the standard set of headers. + in_drop_truncated: KStatU64, + /// The number of inbound packets dropped due to a header being + /// split across `mblk_t` boundaries. + in_drop_straddled: KStatU64, + /// The number of inbound packets dropped due to having an illegal + /// value in a mandatory/critical field. + in_drop_illegal_val: KStatU64, + /// The number of inbound packets dropped due to reporting more + /// bytes than the packet contains. + in_drop_bad_len: KStatU64, + /// The number of inbound packets dropped due to the presence of + /// unrecognised critical options. + in_drop_bad_tun_opt: KStatU64, + /// The number of inbound packets dropped for other reasons, including + /// parser programming errors. + in_drop_misc: KStatU64, + + /// The number of outbound packets dropped as explicitly + /// rejected during parsing. + out_drop_reject: KStatU64, + /// The number of outbound packets dropped with an unexpected + /// protocol number. + out_drop_unwanted_proto: KStatU64, + /// The number of outbound packets dropped for having + /// insufficient bytes to read the standard set of headers. + out_drop_truncated: KStatU64, + /// The number of outbound packets dropped due to a header being + /// split across `mblk_t` boundaries. + out_drop_straddled: KStatU64, + /// The number of outbound packets dropped due to having an illegal + /// value in a mandatory/critical field. + out_drop_illegal_val: KStatU64, + /// The number of outbound packets dropped due to reporting more + /// bytes than the packet contains. + out_drop_bad_len: KStatU64, + /// The number of outbound packets dropped for other reasons, including + /// parser programming errors. + out_drop_misc: KStatU64, + // NOTE: tun_opt is not relevant to outbound packets -- no encapsulation + // is in use. +} + +impl XdeStats { + pub fn parse_error(&mut self, dir: Direction, err: &ParseError) { + use Direction::*; + match (dir, err) { + (In, ParseError::IngotError(e)) => match e.error() { + IngotError::Unwanted => self.in_drop_unwanted_proto += 1, + IngotError::TooSmall | IngotError::NoRemainingChunks => { + self.in_drop_truncated += 1 + } + IngotError::StraddledHeader => self.in_drop_straddled += 1, + IngotError::Reject => self.in_drop_reject += 1, + IngotError::IllegalValue => self.in_drop_illegal_val += 1, + IngotError::NeedsHint | IngotError::CannotAccept => { + self.in_drop_misc += 1 + } + }, + (In, ParseError::IllegalValue(_)) => self.in_drop_illegal_val += 1, + (In, ParseError::BadLength(_)) => self.in_drop_bad_len += 1, + (In, ParseError::UnrecognisedTunnelOpt { .. }) => { + self.in_drop_bad_tun_opt += 1 + } + + (Out, ParseError::IngotError(e)) => match e.error() { + IngotError::Unwanted => self.out_drop_unwanted_proto += 1, + IngotError::TooSmall | IngotError::NoRemainingChunks => { + self.out_drop_truncated += 1 + } + IngotError::StraddledHeader => self.out_drop_straddled += 1, + IngotError::Reject => self.out_drop_reject += 1, + IngotError::IllegalValue => self.out_drop_illegal_val += 1, + IngotError::NeedsHint | IngotError::CannotAccept => { + self.out_drop_misc += 1 + } + }, + (Out, ParseError::IllegalValue(_)) => { + self.out_drop_illegal_val += 1 + } + (Out, ParseError::BadLength(_)) => self.out_drop_bad_len += 1, + (Out, _) => self.out_drop_misc += 1, + } + } +} diff --git a/xde/src/xde.rs b/xde/src/xde.rs index c842c0eb..d35bb37e 100644 --- a/xde/src/xde.rs +++ b/xde/src/xde.rs @@ -26,6 +26,7 @@ use crate::route::Route; use crate::route::RouteCache; use crate::route::RouteKey; use crate::secpolicy; +use crate::stats::XdeStats; use crate::sys; use crate::warn; use alloc::boxed::Box; @@ -41,6 +42,7 @@ use core::ptr::addr_of; use core::ptr::addr_of_mut; use core::time::Duration; use illumos_sys_hdrs::*; +use ingot::geneve::GeneveRef; use opte::api::ClearXdeUnderlayReq; use opte::api::CmdOk; use opte::api::Direction; @@ -51,24 +53,25 @@ use opte::api::OpteError; use opte::api::SetXdeUnderlayReq; use opte::api::XDE_IOC_OPTE_CMD; use opte::d_error::LabelBlock; +use opte::ddi::kstat::KStatNamed; +use opte::ddi::kstat::KStatProvider; +use opte::ddi::mblk::MsgBlk; +use opte::ddi::mblk::MsgBlkChain; use opte::ddi::sync::KMutex; use opte::ddi::sync::KMutexType; use opte::ddi::sync::KRwLock; +use opte::ddi::sync::KRwLockReadGuard; use opte::ddi::sync::KRwLockType; use opte::ddi::time::Interval; use opte::ddi::time::Periodic; +use opte::engine::ether::EthernetRef; use opte::engine::geneve::Vni; -use opte::engine::headers::EncapMeta; use opte::engine::headers::IpAddr; use opte::engine::ioctl::{self as api}; -use opte::engine::ip6::Ipv6Addr; -use opte::engine::packet::Initialized; +use opte::engine::ip::v6::Ipv6Addr; use opte::engine::packet::InnerFlowId; use opte::engine::packet::Packet; -use opte::engine::packet::PacketChain; -use opte::engine::packet::PacketError; -use opte::engine::packet::Parsed; -use opte::engine::port::meta::ActionMeta; +use opte::engine::packet::ParseError; use opte::engine::port::Port; use opte::engine::port::PortBuilder; use opte::engine::port::ProcessResult; @@ -153,7 +156,7 @@ fn bad_packet_parse_probe( port: Option<&CString>, dir: Direction, mp: uintptr_t, - err: &PacketError, + err: &ParseError, ) { let port_str = match port { None => c"unknown", @@ -223,6 +226,7 @@ struct XdeState { vpc_map: Arc, v2b: Arc, underlay: KMutex>, + stats: KMutex>, } struct UnderlayState { @@ -250,10 +254,22 @@ impl XdeState { ectx, vpc_map: Arc::new(overlay::VpcMappings::new()), v2b: Arc::new(overlay::Virt2Boundary::new()), + stats: KMutex::new( + KStatNamed::new("xde", "xde", XdeStats::new()) + .expect("Name is well-constructed (len, no NUL bytes)"), + KMutexType::Driver, + ), } } } +fn stat_parse_error(dir: Direction, err: &ParseError) { + let xde = get_xde_state(); + let mut stats = xde.stats.lock(); + + stats.vals.parse_error(dir, err); +} + #[repr(C)] pub struct XdeDev { devname: String, @@ -895,7 +911,7 @@ fn clear_xde_underlay() -> Result { msg: "underlay not yet initialized".into(), }); } - if unsafe { xde_devs.read().len() } > 0 { + if unsafe { !xde_devs.read().is_empty() } { return Err(OpteError::System { errno: EBUSY, msg: "underlay in use by attached ports".into(), @@ -1208,7 +1224,7 @@ unsafe extern "C" fn xde_detach( _ => return DDI_FAILURE, } - if xde_devs.read().len() > 0 { + if !xde_devs.read().is_empty() { warn!("failed to detach: outstanding ports"); return DDI_FAILURE; } @@ -1276,19 +1292,17 @@ static mut xde_devops: dev_ops = dev_ops { // Safety: Yes, this is a mutable static. No, there is no race as // it's mutated only during `_init()`. Yes, it needs to be mutable // to allow `dld_init_ops()` to set `cb_str`. - devo_cb_ops: unsafe { addr_of!(xde_cb_ops) }, + devo_cb_ops: addr_of!(xde_cb_ops), devo_bus_ops: 0 as *const bus_ops, devo_power: nodev_power, devo_quiesce: ddi_quiesce_not_needed, }; #[no_mangle] -static xde_modldrv: modldrv = unsafe { - modldrv { - drv_modops: addr_of!(mod_driverops), - drv_linkinfo: XDE_STR, - drv_dev_ops: addr_of!(xde_devops), - } +static xde_modldrv: modldrv = modldrv { + drv_modops: addr_of!(mod_driverops), + drv_linkinfo: XDE_STR, + drv_dev_ops: addr_of!(xde_devops), }; #[no_mangle] @@ -1383,11 +1397,16 @@ unsafe extern "C" fn xde_mc_unicst( 0 } -fn guest_loopback_probe(pkt: &Packet, src: &XdeDev, dst: &XdeDev) { +fn guest_loopback_probe( + mblk_addr: uintptr_t, + flow: &InnerFlowId, + src: &XdeDev, + dst: &XdeDev, +) { unsafe { __dtrace_probe_guest__loopback( - pkt.mblk_addr(), - pkt.flow(), + mblk_addr, + flow, src.port.name_cstr().as_ptr() as uintptr_t, dst.port.name_cstr().as_ptr() as uintptr_t, ) @@ -1397,29 +1416,51 @@ fn guest_loopback_probe(pkt: &Packet, src: &XdeDev, dst: &XdeDev) { #[no_mangle] fn guest_loopback( src_dev: &XdeDev, - mut pkt: Packet, + devs: &KRwLockReadGuard>>, + mut pkt: MsgBlk, vni: Vni, -) -> *mut mblk_t { +) { use Direction::*; - let ether_dst = pkt.meta().inner.ether.dst; - let devs = unsafe { xde_devs.read() }; + + let mblk_addr = pkt.mblk_addr(); + + // Loopback now requires a reparse on loopback to account for UFT fastpath. + // When viona serves us larger packets, we needn't worry about allocing + // the encap on. + // We might be able to do better in the interim, but that costs us time. + + let parsed_pkt = match Packet::parse_inbound(pkt.iter_mut(), VpcParser {}) { + Ok(pkt) => pkt, + Err(e) => { + stat_parse_error(Direction::In, &e); + opte::engine::dbg!("Loopback bad packet: {:?}", e); + bad_packet_parse_probe(None, Direction::In, mblk_addr, &e); + + return; + } + }; + + let flow = parsed_pkt.flow(); + + let ether_dst = parsed_pkt.meta().inner_eth.destination(); let maybe_dest_dev = devs.iter().find(|x| x.vni == vni && x.port.mac_addr() == ether_dst); match maybe_dest_dev { Some(dest_dev) => { - guest_loopback_probe(&pkt, src_dev, dest_dev); + guest_loopback_probe(mblk_addr, &flow, src_dev, dest_dev); // We have found a matching Port on this host; "loop back" // the packet into the inbound processing path of the // destination Port. - match dest_dev.port.process(In, &mut pkt, ActionMeta::new()) { - Ok(ProcessResult::Modified) => { + match dest_dev.port.process(In, parsed_pkt) { + Ok(ProcessResult::Modified(emit_spec)) => { + let pkt = emit_spec.apply(pkt); unsafe { mac::mac_rx( dest_dev.mh, ptr::null_mut(), - pkt.unwrap_mblk(), + pkt.unwrap_mblk().as_ptr(), ) }; } @@ -1441,7 +1482,7 @@ fn guest_loopback( mac::mac_rx( dest_dev.mh, ptr::null_mut(), - pkt.unwrap_mblk(), + pkt.unwrap_mblk().as_ptr(), ) }; } @@ -1466,8 +1507,6 @@ fn guest_loopback( ); } } - - ptr::null_mut() } #[no_mangle] @@ -1494,7 +1533,7 @@ unsafe extern "C" fn xde_mc_tx( // pointers are `Copy`. // ================================================================ __dtrace_probe_tx(mp_chain as uintptr_t); - let Ok(mut chain) = PacketChain::new(mp_chain) else { + let Ok(mut chain) = MsgBlkChain::new(mp_chain) else { bad_packet_probe( Some(src_dev.port.name_cstr()), Direction::Out, @@ -1516,17 +1555,14 @@ unsafe extern "C" fn xde_mc_tx( } #[inline] -unsafe fn xde_mc_tx_one( - src_dev: &XdeDev, - pkt: Packet, -) -> *mut mblk_t { +unsafe fn xde_mc_tx_one(src_dev: &XdeDev, mut pkt: MsgBlk) -> *mut mblk_t { let parser = src_dev.port.network().parser(); let mblk_addr = pkt.mblk_addr(); - let mut pkt = match pkt.parse(Direction::Out, parser) { + let parsed_pkt = match Packet::parse_outbound(pkt.iter_mut(), parser) { Ok(pkt) => pkt, Err(e) => { - // TODO Add bad packet stat. - // + stat_parse_error(Direction::Out, &e); + // NOTE: We are using the individual mblk_t as read only // here to get the pointer value so that the DTrace consumer // can examine the packet on failure. @@ -1535,7 +1571,7 @@ unsafe fn xde_mc_tx_one( Some(src_dev.port.name_cstr()), Direction::Out, mblk_addr, - &e.into(), + &e, ); return ptr::null_mut(); } @@ -1555,6 +1591,7 @@ unsafe fn xde_mc_tx_one( // refresh my memory on all of this. // // TODO Is there way to set mac_tx to must use result? + drop(parsed_pkt); stream.tx_drop_on_no_desc(pkt, hint, MacTxFlags::empty()); return ptr::null_mut(); } @@ -1564,34 +1601,25 @@ unsafe fn xde_mc_tx_one( // The port processing code will fire a probe that describes what // action was taken -- there should be no need to add probes or // prints here. - let res = port.process(Direction::Out, &mut pkt, ActionMeta::new()); - match res { - Ok(ProcessResult::Modified) => { - let meta = pkt.meta(); + let res = port.process(Direction::Out, parsed_pkt); + match res { + Ok(ProcessResult::Modified(emit_spec)) => { // If the outer IPv6 destination is the same as the // source, then we need to loop the packet inbound to the // guest on this same host. - let ip = match meta.outer.ip { + let (ip6_src, ip6_dst) = match emit_spec.outer_ip6_addrs() { Some(v) => v, None => { // XXX add SDT probe // XXX add stat - opte::engine::dbg!("no outer ip header, dropping"); - return ptr::null_mut(); - } - }; - - let ip6 = match ip.ip6() { - Some(v) => v, - None => { - opte::engine::dbg!("outer IP header is not v6, dropping"); + opte::engine::dbg!("no outer IPv6 header, dropping"); return ptr::null_mut(); } }; - let vni = match meta.outer.encap { - Some(EncapMeta::Geneve(geneve)) => geneve.vni, + let vni = match emit_spec.outer_encap_vni() { + Some(vni) => vni, None => { // XXX add SDT probe // XXX add stat @@ -1600,10 +1628,18 @@ unsafe fn xde_mc_tx_one( } }; - if ip6.dst == ip6.src { - return guest_loopback(src_dev, pkt, vni); + let devs = unsafe { xde_devs.read() }; + + let l4_hash = emit_spec.l4_hash(); + let out_pkt = emit_spec.apply(pkt); + + if ip6_src == ip6_dst { + guest_loopback(src_dev, &devs, out_pkt, vni); + return ptr::null_mut(); } + drop(devs); + // Currently the overlay layer leaves the outer frame // destination and source zero'd. Ask IRE for the route // associated with the underlay destination. Then ask NCE @@ -1615,20 +1651,21 @@ unsafe fn xde_mc_tx_one( // results for a given dst + entropy. These have a fairly tight // expiry so that we can actually react to new reachability/load // info from DDM. - let my_key = RouteKey { dst: ip6.dst, l4_hash: meta.l4_hash() }; + let my_key = RouteKey { dst: ip6_dst, l4_hash: Some(l4_hash) }; let Route { src, dst, underlay_dev } = src_dev.routes.next_hop(my_key, src_dev); // Get a pointer to the beginning of the outer frame and // fill in the dst/src addresses before sending out the // device. - let mblk = pkt.unwrap_mblk(); + let mblk = out_pkt.unwrap_mblk().as_ptr(); let rptr = (*mblk).b_rptr; ptr::copy(dst.as_ptr(), rptr, 6); ptr::copy(src.as_ptr(), rptr.add(6), 6); // Unwrap: We know the packet is good because we just // unwrapped it above. - let new_pkt = Packet::::wrap_mblk(mblk).unwrap(); + let new_pkt = MsgBlk::wrap_mblk(mblk).unwrap(); + underlay_dev.stream.tx_drop_on_no_desc( new_pkt, hint, @@ -1641,7 +1678,11 @@ unsafe fn xde_mc_tx_one( } Ok(ProcessResult::Hairpin(hpkt)) => { - mac::mac_rx(src_dev.mh, ptr::null_mut(), hpkt.unwrap_mblk()); + mac::mac_rx( + src_dev.mh, + ptr::null_mut(), + hpkt.unwrap_mblk().as_ptr(), + ); } Ok(ProcessResult::Bypass) => { @@ -1796,7 +1837,7 @@ unsafe extern "C" fn xde_rx( Arc::increment_strong_count(mch_ptr); let stream: Arc = Arc::from_raw(mch_ptr); - let Ok(mut chain) = PacketChain::new(mp_chain) else { + let Ok(mut chain) = MsgBlkChain::new(mp_chain) else { bad_packet_probe( None, Direction::Out, @@ -1819,47 +1860,39 @@ unsafe extern "C" fn xde_rx( unsafe fn xde_rx_one( stream: &DlsStream, mrh: *mut mac::mac_resource_handle, - pkt: Packet, + mut pkt: MsgBlk, ) { + let mblk_addr = pkt.mblk_addr(); + // We must first parse the packet in order to determine where it // is to be delivered. let parser = VpcParser {}; - let mblk_addr = pkt.mblk_addr(); - let mut pkt = match pkt.parse(Direction::In, parser) { + let parsed_pkt = match Packet::parse_inbound(pkt.iter_mut(), parser) { Ok(pkt) => pkt, Err(e) => { - // TODO Add bad packet stat. - // + stat_parse_error(Direction::In, &e); + // NOTE: We are using the individual mblk_t as read only // here to get the pointer value so that the DTrace consumer // can examine the packet on failure. // // We don't know the port yet, thus the None. opte::engine::dbg!("Tx bad packet: {:?}", e); - bad_packet_parse_probe(None, Direction::In, mblk_addr, &e.into()); + bad_packet_parse_probe(None, Direction::In, mblk_addr, &e); return; } }; - let meta = pkt.meta(); + let meta = parsed_pkt.meta(); let devs = xde_devs.read(); // Determine where to send packet based on Geneve VNI and // destination MAC address. - let geneve = match meta.outer.encap { - Some(EncapMeta::Geneve(geneve)) => geneve, - None => { - // TODO add stat - let msg = c"no geneve header, dropping"; - bad_packet_probe(None, Direction::In, pkt.mblk_addr(), msg); - opte::engine::dbg!("no geneve header, dropping"); - return; - } - }; + let vni = meta.outer_encap.vni(); + + let ether_dst = meta.inner_eth.destination(); - let vni = geneve.vni; - let ether_dst = meta.inner.ether.dst; let Some(dev) = devs.iter().find(|x| x.vni == vni && x.port.mac_addr() == ether_dst) else { @@ -1875,15 +1908,23 @@ unsafe fn xde_rx_one( // We are in passthrough mode, skip OPTE processing. if dev.passthrough { - mac::mac_rx(dev.mh, mrh, pkt.unwrap_mblk()); + drop(parsed_pkt); + mac::mac_rx(dev.mh, mrh, pkt.unwrap_mblk().as_ptr()); return; } let port = &dev.port; - let res = port.process(Direction::In, &mut pkt, ActionMeta::new()); + + let res = port.process(Direction::In, parsed_pkt); + match res { - Ok(ProcessResult::Modified | ProcessResult::Bypass) => { - mac::mac_rx(dev.mh, mrh, pkt.unwrap_mblk()); + Ok(ProcessResult::Bypass) => { + mac::mac_rx(dev.mh, mrh, pkt.unwrap_mblk().as_ptr()); + } + Ok(ProcessResult::Modified(emit_spec)) => { + let npkt = emit_spec.apply(pkt); + + mac::mac_rx(dev.mh, mrh, npkt.unwrap_mblk().as_ptr()); } Ok(ProcessResult::Hairpin(hppkt)) => { stream.tx_drop_on_no_desc(hppkt, 0, MacTxFlags::empty()); diff --git a/xde/x86_64-unknown-unknown.json b/xde/x86_64-unknown-unknown.json index 4cafc73d..c96cd9d0 100644 --- a/xde/x86_64-unknown-unknown.json +++ b/xde/x86_64-unknown-unknown.json @@ -8,9 +8,8 @@ "eh-frame-header": false, "frame-pointer": "always", "executables": true, - "features": "-mmx,-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-3dnow,-3dnowa,-avx,-avx2,+soft-float", + "features": "-mmx,-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,+soft-float", "has-rpath": true, - "is-builtin": false, "is-like-solaris": true, "limit-rdylib-exports": false, "linker": "ld",