From e1b70435e9ab3597619acad05ff5397f479443fd Mon Sep 17 00:00:00 2001 From: Valentin Date: Sun, 10 Nov 2024 12:24:33 +0100 Subject: [PATCH] --- .cargo/config.toml | 9 + .github/workflows/check.yml | 44 ++ .gitignore | 1 + Cargo.lock | 473 ++++++++++++++++++ Cargo.toml | 29 ++ LICENSE | 21 + benchmark/Cargo.toml | 14 + benchmark/benches/benchmark.rs | 58 +++ changelog.md | 5 + generated assembly/x86_64_default/f32_to_i128 | 20 + generated assembly/x86_64_default/f32_to_i16 | 7 + generated assembly/x86_64_default/f32_to_i32 | 9 + generated assembly/x86_64_default/f32_to_i64 | 9 + generated assembly/x86_64_default/f32_to_i8 | 7 + generated assembly/x86_64_default/f32_to_u128 | 16 + generated assembly/x86_64_default/f32_to_u16 | 7 + generated assembly/x86_64_default/f32_to_u32 | 10 + generated assembly/x86_64_default/f32_to_u64 | 17 + generated assembly/x86_64_default/f32_to_u8 | 7 + generated assembly/x86_64_default/f64_to_i128 | 20 + generated assembly/x86_64_default/f64_to_i16 | 7 + generated assembly/x86_64_default/f64_to_i32 | 8 + generated assembly/x86_64_default/f64_to_i64 | 9 + generated assembly/x86_64_default/f64_to_i8 | 7 + generated assembly/x86_64_default/f64_to_u128 | 16 + generated assembly/x86_64_default/f64_to_u16 | 7 + generated assembly/x86_64_default/f64_to_u32 | 7 + generated assembly/x86_64_default/f64_to_u64 | 17 + generated assembly/x86_64_default/f64_to_u8 | 7 + generated assembly/x86_64_sse/f32_to_i128 | 20 + generated assembly/x86_64_sse/f32_to_i16 | 3 + generated assembly/x86_64_sse/f32_to_i32 | 3 + generated assembly/x86_64_sse/f32_to_i64 | 3 + generated assembly/x86_64_sse/f32_to_i8 | 3 + generated assembly/x86_64_sse/f32_to_u128 | 16 + generated assembly/x86_64_sse/f32_to_u16 | 3 + generated assembly/x86_64_sse/f32_to_u32 | 3 + generated assembly/x86_64_sse/f32_to_u64 | 9 + generated assembly/x86_64_sse/f32_to_u8 | 3 + generated assembly/x86_64_sse/f64_to_i128 | 20 + generated assembly/x86_64_sse/f64_to_i16 | 3 + generated assembly/x86_64_sse/f64_to_i32 | 3 + generated assembly/x86_64_sse/f64_to_i64 | 3 + generated assembly/x86_64_sse/f64_to_i8 | 3 + generated assembly/x86_64_sse/f64_to_u128 | 16 + generated assembly/x86_64_sse/f64_to_u16 | 3 + generated assembly/x86_64_sse/f64_to_u32 | 3 + generated assembly/x86_64_sse/f64_to_u64 | 9 + generated assembly/x86_64_sse/f64_to_u8 | 3 + generated assembly/x86_sse/f32_to_i128 | 52 ++ generated assembly/x86_sse/f32_to_i16 | 3 + generated assembly/x86_sse/f32_to_i32 | 3 + generated assembly/x86_sse/f32_to_i64 | 37 ++ generated assembly/x86_sse/f32_to_i8 | 3 + generated assembly/x86_sse/f32_to_u128 | 47 ++ generated assembly/x86_sse/f32_to_u16 | 3 + generated assembly/x86_sse/f32_to_u32 | 15 + generated assembly/x86_sse/f32_to_u64 | 43 ++ generated assembly/x86_sse/f32_to_u8 | 3 + generated assembly/x86_sse/f64_to_i128 | 52 ++ generated assembly/x86_sse/f64_to_i16 | 3 + generated assembly/x86_sse/f64_to_i32 | 3 + generated assembly/x86_sse/f64_to_i64 | 37 ++ generated assembly/x86_sse/f64_to_i8 | 3 + generated assembly/x86_sse/f64_to_u128 | 47 ++ generated assembly/x86_sse/f64_to_u16 | 3 + generated assembly/x86_sse/f64_to_u32 | 15 + generated assembly/x86_sse/f64_to_u64 | 43 ++ generated assembly/x86_sse/f64_to_u8 | 3 + readme.md | 39 ++ src/lib.rs | 166 ++++++ src/target_default.rs | 34 ++ src/target_x86_64_sse.rs | 188 +++++++ src/target_x86_sse.rs | 149 ++++++ tests/test.rs | 162 ++++++ xtask/Cargo.toml | 9 + xtask/src/main.rs | 374 ++++++++++++++ 77 files changed, 2539 insertions(+) create mode 100644 .cargo/config.toml create mode 100644 .github/workflows/check.yml create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 LICENSE create mode 100644 benchmark/Cargo.toml create mode 100644 benchmark/benches/benchmark.rs create mode 100644 changelog.md create mode 100644 generated assembly/x86_64_default/f32_to_i128 create mode 100644 generated assembly/x86_64_default/f32_to_i16 create mode 100644 generated assembly/x86_64_default/f32_to_i32 create mode 100644 generated assembly/x86_64_default/f32_to_i64 create mode 100644 generated assembly/x86_64_default/f32_to_i8 create mode 100644 generated assembly/x86_64_default/f32_to_u128 create mode 100644 generated assembly/x86_64_default/f32_to_u16 create mode 100644 generated assembly/x86_64_default/f32_to_u32 create mode 100644 generated assembly/x86_64_default/f32_to_u64 create mode 100644 generated assembly/x86_64_default/f32_to_u8 create mode 100644 generated assembly/x86_64_default/f64_to_i128 create mode 100644 generated assembly/x86_64_default/f64_to_i16 create mode 100644 generated assembly/x86_64_default/f64_to_i32 create mode 100644 generated assembly/x86_64_default/f64_to_i64 create mode 100644 generated assembly/x86_64_default/f64_to_i8 create mode 100644 generated assembly/x86_64_default/f64_to_u128 create mode 100644 generated assembly/x86_64_default/f64_to_u16 create mode 100644 generated assembly/x86_64_default/f64_to_u32 create mode 100644 generated assembly/x86_64_default/f64_to_u64 create mode 100644 generated assembly/x86_64_default/f64_to_u8 create mode 100644 generated assembly/x86_64_sse/f32_to_i128 create mode 100644 generated assembly/x86_64_sse/f32_to_i16 create mode 100644 generated assembly/x86_64_sse/f32_to_i32 create mode 100644 generated assembly/x86_64_sse/f32_to_i64 create mode 100644 generated assembly/x86_64_sse/f32_to_i8 create mode 100644 generated assembly/x86_64_sse/f32_to_u128 create mode 100644 generated assembly/x86_64_sse/f32_to_u16 create mode 100644 generated assembly/x86_64_sse/f32_to_u32 create mode 100644 generated assembly/x86_64_sse/f32_to_u64 create mode 100644 generated assembly/x86_64_sse/f32_to_u8 create mode 100644 generated assembly/x86_64_sse/f64_to_i128 create mode 100644 generated assembly/x86_64_sse/f64_to_i16 create mode 100644 generated assembly/x86_64_sse/f64_to_i32 create mode 100644 generated assembly/x86_64_sse/f64_to_i64 create mode 100644 generated assembly/x86_64_sse/f64_to_i8 create mode 100644 generated assembly/x86_64_sse/f64_to_u128 create mode 100644 generated assembly/x86_64_sse/f64_to_u16 create mode 100644 generated assembly/x86_64_sse/f64_to_u32 create mode 100644 generated assembly/x86_64_sse/f64_to_u64 create mode 100644 generated assembly/x86_64_sse/f64_to_u8 create mode 100644 generated assembly/x86_sse/f32_to_i128 create mode 100644 generated assembly/x86_sse/f32_to_i16 create mode 100644 generated assembly/x86_sse/f32_to_i32 create mode 100644 generated assembly/x86_sse/f32_to_i64 create mode 100644 generated assembly/x86_sse/f32_to_i8 create mode 100644 generated assembly/x86_sse/f32_to_u128 create mode 100644 generated assembly/x86_sse/f32_to_u16 create mode 100644 generated assembly/x86_sse/f32_to_u32 create mode 100644 generated assembly/x86_sse/f32_to_u64 create mode 100644 generated assembly/x86_sse/f32_to_u8 create mode 100644 generated assembly/x86_sse/f64_to_i128 create mode 100644 generated assembly/x86_sse/f64_to_i16 create mode 100644 generated assembly/x86_sse/f64_to_i32 create mode 100644 generated assembly/x86_sse/f64_to_i64 create mode 100644 generated assembly/x86_sse/f64_to_i8 create mode 100644 generated assembly/x86_sse/f64_to_u128 create mode 100644 generated assembly/x86_sse/f64_to_u16 create mode 100644 generated assembly/x86_sse/f64_to_u32 create mode 100644 generated assembly/x86_sse/f64_to_u64 create mode 100644 generated assembly/x86_sse/f64_to_u8 create mode 100644 readme.md create mode 100644 src/lib.rs create mode 100644 src/target_default.rs create mode 100644 src/target_x86_64_sse.rs create mode 100644 src/target_x86_sse.rs create mode 100644 tests/test.rs create mode 100644 xtask/Cargo.toml create mode 100644 xtask/src/main.rs diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 0000000..4e374d5 --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,9 @@ +[alias] +xtask = "run --package xtask --" + +[profile.show-asm] +inherits = "release" +# more determinism +codegen-units=1 +# Values other than "thin" and "fat" cause the assembly to change in an undesirable way. It makes functions that have the same assembly link to eachother via ".set". +lto = "thin" diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml new file mode 100644 index 0000000..17e5cae --- /dev/null +++ b/.github/workflows/check.yml @@ -0,0 +1,44 @@ +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + +jobs: + # We intentionally hardcode the stable/current version for the following reasons: + # + # - It makes it less likely that CI starts failing in the future despite the project not changing. + # - It makes us independent of the default Rust version that the Github runner comes with. + check_stable: + runs-on: ubuntu-24.04 + steps: + - run: sudo apt-get -qq install gcc-multilib qemu-user + - run: | + rustup --quiet toolchain uninstall stable + rustup --quiet toolchain install 1.82 --profile=default + rustup --quiet default 1.82 + - run: cargo install --quiet cargo-show-asm + - uses: actions/checkout@v4 + - run: cargo fmt --check + - run: cargo fetch --quiet --locked + - run: cargo clippy --quiet --workspace --all-targets -- --D=warnings + - run: cargo test --quiet --workspace + - run: cargo build --quiet --package xtask + - run: target/debug/xtask all + - run: git diff --exit-code -- "generated assembly" + + # For the MSRV we only care about the code compiling. + check_minimum_supported_rust_version: + runs-on: ubuntu-24.04 + steps: + - run: sudo apt-get -qq install gcc-multilib + - run: | + rustup --quiet toolchain uninstall stable + rustup --quiet toolchain install 1.82 --profile=default + rustup --quiet toolchain install 1.71 --profile=minimal + rustup --quiet default 1.71 + - uses: actions/checkout@v4 + - run: cargo fetch --quiet --locked + # xtask does not use MSRV because we don't publish it. xtask's internal cargo calls use the default rustup pipeline, which is the MSRV. Note that we cannot run xtask through cargo because that forces the toolchain version for building xtask to be the same as the toolchain version for xtask's internal cargo calls through the RUSTUP_TOOLCHAIN environment variable. + - run: cargo +1.82 build --quiet --package xtask + - run: target/debug/xtask check diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..84dbc8a --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,473 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + +[[package]] +name = "anstyle" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" + +[[package]] +name = "anyhow" +version = "1.0.93" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c95c10ba0b00a02636238b814946408b1322d5ac4760326e6fb8ec956d85775" + +[[package]] +name = "autocfg" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" + +[[package]] +name = "benchmark" +version = "0.1.0" +dependencies = [ + "criterion", + "fast-float-to-integer", +] + +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + +[[package]] +name = "clap" +version = "4.5.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b97f376d85a664d5837dbae44bf546e6477a679ff6610010f17276f686d867e8" +dependencies = [ + "clap_builder", +] + +[[package]] +name = "clap_builder" +version = "4.5.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19bc80abd44e4bed93ca373a0704ccbd1b710dc5749406201bb018272808dc54" +dependencies = [ + "anstyle", + "clap_lex", +] + +[[package]] +name = "clap_lex" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" + +[[package]] +name = "criterion" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "is-terminal", + "itertools", + "num-traits", + "once_cell", + "oorandom", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools", +] + +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + +[[package]] +name = "either" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" + +[[package]] +name = "fast-float-to-integer" +version = "0.1.0" +dependencies = [ + "cfg-if", + "float_next_after", +] + +[[package]] +name = "float_next_after" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bf7cc16383c4b8d58b9905a8509f02926ce3058053c056376248d958c9df1e8" + +[[package]] +name = "half" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" +dependencies = [ + "cfg-if", + "crunchy", +] + +[[package]] +name = "hermit-abi" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc" + +[[package]] +name = "is-terminal" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "261f68e344040fbd0edea105bef17c66edf46f984ddb1115b775ce31be948f4b" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + +[[package]] +name = "libc" +version = "0.2.162" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18d287de67fe55fd7e1581fe933d965a5a9477b38e949cfa9f8574ef01506398" + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" + +[[package]] +name = "oorandom" +version = "11.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9" + +[[package]] +name = "proc-macro2" +version = "1.0.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f139b0662de085916d1fb67d2b4169d1addddda1919e696f3252b740b629986e" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" + +[[package]] +name = "ryu" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "serde" +version = "1.0.214" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f55c3193aca71c12ad7890f1785d2b73e1b9f63a0bbc353c08ef26fe03fc56b5" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.214" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de523f781f095e28fa605cdce0f8307e451cc0fd14e2eb4cd2e98a355b147766" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.132" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", +] + +[[package]] +name = "syn" +version = "2.0.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] +name = "unicode-ident" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "winapi-util" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +dependencies = [ + "windows-sys 0.59.0", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "xtask" +version = "0.1.0" +dependencies = [ + "anyhow", + "regex", +] diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..a8f5ddc --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,29 @@ +[workspace] +members = [ + "benchmark", + "xtask", +] + +[package] +name = "fast-float-to-integer" +version = "0.1.0" +authors = [ "Valentin Kettner " ] +edition = "2021" +rust-version = "1.71" +description = "Convert floating point values to integer types faster than the standard `as` operator." +repository = "https://github.com/e00E/fast-float-to-integer" +license = "MIT" +keywords = [ "float", "floating", "integer", "conversion", "convert" ] +include = [ "/src" ] + +[dependencies] +cfg-if = "1.0" + +[dev-dependencies] +float_next_after = "1.0" + +[features] +# This feature is for internal use. It ensures cargo-show-asm can get the assembly. +show-asm = [ ] +# This feature is for internal use. It disables all target specific code. +force-default = [ ] diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..973ac28 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 Valentin Kettner + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/benchmark/Cargo.toml b/benchmark/Cargo.toml new file mode 100644 index 0000000..6865ba8 --- /dev/null +++ b/benchmark/Cargo.toml @@ -0,0 +1,14 @@ +# This crate contains benchmarks. The benchmarks are not in the main library crate because then carge forces us to compile criterion when compiling tests, which fails on some targets. + +[package] +name = "benchmark" +version = "0.1.0" +edition = "2021" + +[dev-dependencies] +criterion = { version = "0.5", default-features = false, features = [ "cargo_bench_support" ] } +fast-float-to-integer = { path = "..", features=["force-default"] } + +[[bench]] +name = "benchmark" +harness = false diff --git a/benchmark/benches/benchmark.rs b/benchmark/benches/benchmark.rs new file mode 100644 index 0000000..a747a58 --- /dev/null +++ b/benchmark/benches/benchmark.rs @@ -0,0 +1,58 @@ +// Unfortunately, these benchmarks are noisy. There are significant differences in the measured performance based on random code permutation or running the benchmarks at different times or on different machines. The same function benchmarked twice can appear to have very different performance. +// +// We've changed some of the criterion settings to help with this, but the problem persists. It would be nice to have a more real world benchmark. + +use criterion::{criterion_group, criterion_main, Criterion}; +use fast_float_to_integer as ffti; +use std::{hint::black_box, time::Duration}; + +// We create a dependency between the converted numbers so that compiler or CPU cannot skip the computation. +macro_rules! create_benchmark { + ($c:ident, $name:literal, $function:path, $Float:ty) => { + let floats = [0 as $Float; 1_000]; + $c.bench_function($name, |b| { + b.iter(|| { + let mut result = 0; + for float in black_box(floats.as_slice()) { + let converted = $function(*float); + result ^= converted; + } + black_box(result); + }) + }); + }; +} + +pub fn benchmark(c: &mut Criterion) { + let mut group = c.benchmark_group("complex"); + group + .sample_size(10_000) + .measurement_time(Duration::from_secs_f32(1.0)) + .warm_up_time(Duration::from_secs_f32(0.1)) + .nresamples(1); + + create_benchmark! {group, "f32_to_i8_optimized", ffti::f32_to_i8, f32} + create_benchmark! {group, "f32_to_u8_optimized", ffti::f32_to_u8, f32} + create_benchmark! {group, "f32_to_i16_optimized", ffti::f32_to_i16, f32} + create_benchmark! {group, "f32_to_u16_optimized", ffti::f32_to_u16, f32} + create_benchmark! {group, "f32_to_i32_optimized", ffti::f32_to_i32, f32} + create_benchmark! {group, "f32_to_u32_optimized", ffti::f32_to_u32, f32} + create_benchmark! {group, "f32_to_i64_optimized", ffti::f32_to_i64, f32} + create_benchmark! {group, "f32_to_u64_optimized", ffti::f32_to_u64, f32} + create_benchmark! {group, "f32_to_i128_optimized", ffti::f32_to_i128, f32} + create_benchmark! {group, "f32_to_u128_optimized", ffti::f32_to_u128, f32} + + create_benchmark! {group, "f64_to_i8_optimized", ffti::f64_to_i8, f64} + create_benchmark! {group, "f64_to_u8_optimized", ffti::f64_to_u8, f64} + create_benchmark! {group, "f64_to_i16_optimized", ffti::f64_to_i16, f64} + create_benchmark! {group, "f64_to_u16_optimized", ffti::f64_to_u16, f64} + create_benchmark! {group, "f64_to_i32_optimized", ffti::f64_to_i32, f64} + create_benchmark! {group, "f64_to_u32_optimized", ffti::f64_to_u32, f64} + create_benchmark! {group, "f64_to_i64_optimized", ffti::f64_to_i64, f64} + create_benchmark! {group, "f64_to_u64_optimized", ffti::f64_to_u64, f64} + create_benchmark! {group, "f64_to_i128_optimized", ffti::f64_to_i128, f64} + create_benchmark! {group, "f64_to_u128_optimized", ffti::f64_to_u128, f64} +} + +criterion_group!(benches, benchmark); +criterion_main!(benches); diff --git a/changelog.md b/changelog.md new file mode 100644 index 0000000..97a82e0 --- /dev/null +++ b/changelog.md @@ -0,0 +1,5 @@ +## unreleased + +## 0.1.0 - 2024-11-10 + +- initial release diff --git a/generated assembly/x86_64_default/f32_to_i128 b/generated assembly/x86_64_default/f32_to_i128 new file mode 100644 index 0000000..32ca751 --- /dev/null +++ b/generated assembly/x86_64_default/f32_to_i128 @@ -0,0 +1,20 @@ +fast_float_to_integer::f32_to_i128: + push rax + movss dword ptr [rsp + 4], xmm0 + call qword ptr [rip + __fixsfti@GOTPCREL] + xor ecx, ecx + movss xmm0, dword ptr [rsp + 4] + ucomiss xmm0, dword ptr [rip + .L_0] + cmovb rax, rcx + movabs rsi, -9223372036854775808 + cmovb rdx, rsi + ucomiss xmm0, dword ptr [rip + .L_1] + movabs rsi, 9223372036854775807 + cmova rdx, rsi + mov rsi, -1 + cmova rax, rsi + ucomiss xmm0, xmm0 + cmovp rax, rcx + cmovp rdx, rcx + pop rcx + ret diff --git a/generated assembly/x86_64_default/f32_to_i16 b/generated assembly/x86_64_default/f32_to_i16 new file mode 100644 index 0000000..23b3b1d --- /dev/null +++ b/generated assembly/x86_64_default/f32_to_i16 @@ -0,0 +1,7 @@ +fast_float_to_integer::f32_to_i16: + movss xmm1, dword ptr [rip + .L_0] + maxss xmm1, xmm0 + movss xmm0, dword ptr [rip + .L_1] + minss xmm0, xmm1 + cvttss2si eax, xmm0 + ret diff --git a/generated assembly/x86_64_default/f32_to_i32 b/generated assembly/x86_64_default/f32_to_i32 new file mode 100644 index 0000000..9f69c31 --- /dev/null +++ b/generated assembly/x86_64_default/f32_to_i32 @@ -0,0 +1,9 @@ +fast_float_to_integer::f32_to_i32: + cvttss2si eax, xmm0 + ucomiss xmm0, dword ptr [rip + .L_0] + mov ecx, 2147483647 + cmovbe ecx, eax + xor eax, eax + ucomiss xmm0, xmm0 + cmovnp eax, ecx + ret diff --git a/generated assembly/x86_64_default/f32_to_i64 b/generated assembly/x86_64_default/f32_to_i64 new file mode 100644 index 0000000..24a73f4 --- /dev/null +++ b/generated assembly/x86_64_default/f32_to_i64 @@ -0,0 +1,9 @@ +fast_float_to_integer::f32_to_i64: + cvttss2si rax, xmm0 + ucomiss xmm0, dword ptr [rip + .L_0] + movabs rcx, 9223372036854775807 + cmovbe rcx, rax + xor eax, eax + ucomiss xmm0, xmm0 + cmovnp rax, rcx + ret diff --git a/generated assembly/x86_64_default/f32_to_i8 b/generated assembly/x86_64_default/f32_to_i8 new file mode 100644 index 0000000..0c9e7a5 --- /dev/null +++ b/generated assembly/x86_64_default/f32_to_i8 @@ -0,0 +1,7 @@ +fast_float_to_integer::f32_to_i8: + movss xmm1, dword ptr [rip + .L_0] + maxss xmm1, xmm0 + movss xmm0, dword ptr [rip + .L_1] + minss xmm0, xmm1 + cvttss2si eax, xmm0 + ret diff --git a/generated assembly/x86_64_default/f32_to_u128 b/generated assembly/x86_64_default/f32_to_u128 new file mode 100644 index 0000000..776bcb6 --- /dev/null +++ b/generated assembly/x86_64_default/f32_to_u128 @@ -0,0 +1,16 @@ +fast_float_to_integer::f32_to_u128: + push rax + movss dword ptr [rsp + 4], xmm0 + call qword ptr [rip + __fixunssfti@GOTPCREL] + xor ecx, ecx + xorps xmm0, xmm0 + movss xmm1, dword ptr [rsp + 4] + ucomiss xmm1, xmm0 + cmovb rdx, rcx + cmovb rax, rcx + ucomiss xmm1, dword ptr [rip + .L_0] + mov rcx, -1 + cmova rax, rcx + cmova rdx, rcx + pop rcx + ret diff --git a/generated assembly/x86_64_default/f32_to_u16 b/generated assembly/x86_64_default/f32_to_u16 new file mode 100644 index 0000000..224035e --- /dev/null +++ b/generated assembly/x86_64_default/f32_to_u16 @@ -0,0 +1,7 @@ +fast_float_to_integer::f32_to_u16: + xorps xmm1, xmm1 + maxss xmm1, xmm0 + movss xmm0, dword ptr [rip + .L_0] + minss xmm0, xmm1 + cvttss2si eax, xmm0 + ret diff --git a/generated assembly/x86_64_default/f32_to_u32 b/generated assembly/x86_64_default/f32_to_u32 new file mode 100644 index 0000000..7a60977 --- /dev/null +++ b/generated assembly/x86_64_default/f32_to_u32 @@ -0,0 +1,10 @@ +fast_float_to_integer::f32_to_u32: + cvttss2si rax, xmm0 + xor ecx, ecx + xorps xmm1, xmm1 + ucomiss xmm0, xmm1 + cmovae ecx, eax + ucomiss xmm0, dword ptr [rip + .L_0] + mov eax, -1 + cmovbe eax, ecx + ret diff --git a/generated assembly/x86_64_default/f32_to_u64 b/generated assembly/x86_64_default/f32_to_u64 new file mode 100644 index 0000000..b290d52 --- /dev/null +++ b/generated assembly/x86_64_default/f32_to_u64 @@ -0,0 +1,17 @@ +fast_float_to_integer::f32_to_u64: + cvttss2si rax, xmm0 + mov rcx, rax + sar rcx, 63 + movaps xmm1, xmm0 + subss xmm1, dword ptr [rip + .L_0] + cvttss2si rdx, xmm1 + and rdx, rcx + or rdx, rax + xor ecx, ecx + xorps xmm1, xmm1 + ucomiss xmm0, xmm1 + cmovae rcx, rdx + ucomiss xmm0, dword ptr [rip + .L_1] + mov rax, -1 + cmovbe rax, rcx + ret diff --git a/generated assembly/x86_64_default/f32_to_u8 b/generated assembly/x86_64_default/f32_to_u8 new file mode 100644 index 0000000..65668ed --- /dev/null +++ b/generated assembly/x86_64_default/f32_to_u8 @@ -0,0 +1,7 @@ +fast_float_to_integer::f32_to_u8: + xorps xmm1, xmm1 + maxss xmm1, xmm0 + movss xmm0, dword ptr [rip + .L_0] + minss xmm0, xmm1 + cvttss2si eax, xmm0 + ret diff --git a/generated assembly/x86_64_default/f64_to_i128 b/generated assembly/x86_64_default/f64_to_i128 new file mode 100644 index 0000000..da89a58 --- /dev/null +++ b/generated assembly/x86_64_default/f64_to_i128 @@ -0,0 +1,20 @@ +fast_float_to_integer::f64_to_i128: + push rax + movsd qword ptr [rsp], xmm0 + call qword ptr [rip + __fixdfti@GOTPCREL] + xor ecx, ecx + movsd xmm0, qword ptr [rsp] + ucomisd xmm0, qword ptr [rip + .L_0] + cmovb rax, rcx + movabs rsi, -9223372036854775808 + cmovb rdx, rsi + ucomisd xmm0, qword ptr [rip + .L_1] + movabs rsi, 9223372036854775807 + cmova rdx, rsi + mov rsi, -1 + cmova rax, rsi + ucomisd xmm0, xmm0 + cmovp rax, rcx + cmovp rdx, rcx + pop rcx + ret diff --git a/generated assembly/x86_64_default/f64_to_i16 b/generated assembly/x86_64_default/f64_to_i16 new file mode 100644 index 0000000..0dd6131 --- /dev/null +++ b/generated assembly/x86_64_default/f64_to_i16 @@ -0,0 +1,7 @@ +fast_float_to_integer::f64_to_i16: + movsd xmm1, qword ptr [rip + .L_0] + maxsd xmm1, xmm0 + movsd xmm0, qword ptr [rip + .L_1] + minsd xmm0, xmm1 + cvttsd2si eax, xmm0 + ret diff --git a/generated assembly/x86_64_default/f64_to_i32 b/generated assembly/x86_64_default/f64_to_i32 new file mode 100644 index 0000000..9091e35 --- /dev/null +++ b/generated assembly/x86_64_default/f64_to_i32 @@ -0,0 +1,8 @@ +fast_float_to_integer::f64_to_i32: + xor eax, eax + ucomisd xmm0, xmm0 + maxsd xmm0, qword ptr [rip + .L_0] + minsd xmm0, qword ptr [rip + .L_1] + cvttsd2si ecx, xmm0 + cmovnp eax, ecx + ret diff --git a/generated assembly/x86_64_default/f64_to_i64 b/generated assembly/x86_64_default/f64_to_i64 new file mode 100644 index 0000000..c877c69 --- /dev/null +++ b/generated assembly/x86_64_default/f64_to_i64 @@ -0,0 +1,9 @@ +fast_float_to_integer::f64_to_i64: + cvttsd2si rax, xmm0 + ucomisd xmm0, qword ptr [rip + .L_0] + movabs rcx, 9223372036854775807 + cmovbe rcx, rax + xor eax, eax + ucomisd xmm0, xmm0 + cmovnp rax, rcx + ret diff --git a/generated assembly/x86_64_default/f64_to_i8 b/generated assembly/x86_64_default/f64_to_i8 new file mode 100644 index 0000000..80cfe20 --- /dev/null +++ b/generated assembly/x86_64_default/f64_to_i8 @@ -0,0 +1,7 @@ +fast_float_to_integer::f64_to_i8: + movsd xmm1, qword ptr [rip + .L_0] + maxsd xmm1, xmm0 + movsd xmm0, qword ptr [rip + .L_1] + minsd xmm0, xmm1 + cvttsd2si eax, xmm0 + ret diff --git a/generated assembly/x86_64_default/f64_to_u128 b/generated assembly/x86_64_default/f64_to_u128 new file mode 100644 index 0000000..3d74896 --- /dev/null +++ b/generated assembly/x86_64_default/f64_to_u128 @@ -0,0 +1,16 @@ +fast_float_to_integer::f64_to_u128: + push rax + movsd qword ptr [rsp], xmm0 + call qword ptr [rip + __fixunsdfti@GOTPCREL] + xor ecx, ecx + xorpd xmm0, xmm0 + movsd xmm1, qword ptr [rsp] + ucomisd xmm1, xmm0 + cmovb rdx, rcx + cmovb rax, rcx + ucomisd xmm1, qword ptr [rip + .L_0] + mov rcx, -1 + cmova rax, rcx + cmova rdx, rcx + pop rcx + ret diff --git a/generated assembly/x86_64_default/f64_to_u16 b/generated assembly/x86_64_default/f64_to_u16 new file mode 100644 index 0000000..36aa825 --- /dev/null +++ b/generated assembly/x86_64_default/f64_to_u16 @@ -0,0 +1,7 @@ +fast_float_to_integer::f64_to_u16: + xorpd xmm1, xmm1 + maxsd xmm1, xmm0 + movsd xmm0, qword ptr [rip + .L_0] + minsd xmm0, xmm1 + cvttsd2si eax, xmm0 + ret diff --git a/generated assembly/x86_64_default/f64_to_u32 b/generated assembly/x86_64_default/f64_to_u32 new file mode 100644 index 0000000..abada1f --- /dev/null +++ b/generated assembly/x86_64_default/f64_to_u32 @@ -0,0 +1,7 @@ +fast_float_to_integer::f64_to_u32: + xorpd xmm1, xmm1 + maxsd xmm1, xmm0 + movsd xmm0, qword ptr [rip + .L_0] + minsd xmm0, xmm1 + cvttsd2si rax, xmm0 + ret diff --git a/generated assembly/x86_64_default/f64_to_u64 b/generated assembly/x86_64_default/f64_to_u64 new file mode 100644 index 0000000..ec633ac --- /dev/null +++ b/generated assembly/x86_64_default/f64_to_u64 @@ -0,0 +1,17 @@ +fast_float_to_integer::f64_to_u64: + cvttsd2si rax, xmm0 + mov rcx, rax + sar rcx, 63 + movapd xmm1, xmm0 + subsd xmm1, qword ptr [rip + .L_0] + cvttsd2si rdx, xmm1 + and rdx, rcx + or rdx, rax + xor ecx, ecx + xorpd xmm1, xmm1 + ucomisd xmm0, xmm1 + cmovae rcx, rdx + ucomisd xmm0, qword ptr [rip + .L_1] + mov rax, -1 + cmovbe rax, rcx + ret diff --git a/generated assembly/x86_64_default/f64_to_u8 b/generated assembly/x86_64_default/f64_to_u8 new file mode 100644 index 0000000..4143db8 --- /dev/null +++ b/generated assembly/x86_64_default/f64_to_u8 @@ -0,0 +1,7 @@ +fast_float_to_integer::f64_to_u8: + xorpd xmm1, xmm1 + maxsd xmm1, xmm0 + movsd xmm0, qword ptr [rip + .L_0] + minsd xmm0, xmm1 + cvttsd2si eax, xmm0 + ret diff --git a/generated assembly/x86_64_sse/f32_to_i128 b/generated assembly/x86_64_sse/f32_to_i128 new file mode 100644 index 0000000..32ca751 --- /dev/null +++ b/generated assembly/x86_64_sse/f32_to_i128 @@ -0,0 +1,20 @@ +fast_float_to_integer::f32_to_i128: + push rax + movss dword ptr [rsp + 4], xmm0 + call qword ptr [rip + __fixsfti@GOTPCREL] + xor ecx, ecx + movss xmm0, dword ptr [rsp + 4] + ucomiss xmm0, dword ptr [rip + .L_0] + cmovb rax, rcx + movabs rsi, -9223372036854775808 + cmovb rdx, rsi + ucomiss xmm0, dword ptr [rip + .L_1] + movabs rsi, 9223372036854775807 + cmova rdx, rsi + mov rsi, -1 + cmova rax, rsi + ucomiss xmm0, xmm0 + cmovp rax, rcx + cmovp rdx, rcx + pop rcx + ret diff --git a/generated assembly/x86_64_sse/f32_to_i16 b/generated assembly/x86_64_sse/f32_to_i16 new file mode 100644 index 0000000..de7b2e5 --- /dev/null +++ b/generated assembly/x86_64_sse/f32_to_i16 @@ -0,0 +1,3 @@ +fast_float_to_integer::f32_to_i16: + cvttss2si rax, xmm0 + ret diff --git a/generated assembly/x86_64_sse/f32_to_i32 b/generated assembly/x86_64_sse/f32_to_i32 new file mode 100644 index 0000000..44e3ec2 --- /dev/null +++ b/generated assembly/x86_64_sse/f32_to_i32 @@ -0,0 +1,3 @@ +fast_float_to_integer::f32_to_i32: + cvttss2si rax, xmm0 + ret diff --git a/generated assembly/x86_64_sse/f32_to_i64 b/generated assembly/x86_64_sse/f32_to_i64 new file mode 100644 index 0000000..f410ee4 --- /dev/null +++ b/generated assembly/x86_64_sse/f32_to_i64 @@ -0,0 +1,3 @@ +fast_float_to_integer::f32_to_i64: + cvttss2si rax, xmm0 + ret diff --git a/generated assembly/x86_64_sse/f32_to_i8 b/generated assembly/x86_64_sse/f32_to_i8 new file mode 100644 index 0000000..62f9dde --- /dev/null +++ b/generated assembly/x86_64_sse/f32_to_i8 @@ -0,0 +1,3 @@ +fast_float_to_integer::f32_to_i8: + cvttss2si rax, xmm0 + ret diff --git a/generated assembly/x86_64_sse/f32_to_u128 b/generated assembly/x86_64_sse/f32_to_u128 new file mode 100644 index 0000000..776bcb6 --- /dev/null +++ b/generated assembly/x86_64_sse/f32_to_u128 @@ -0,0 +1,16 @@ +fast_float_to_integer::f32_to_u128: + push rax + movss dword ptr [rsp + 4], xmm0 + call qword ptr [rip + __fixunssfti@GOTPCREL] + xor ecx, ecx + xorps xmm0, xmm0 + movss xmm1, dword ptr [rsp + 4] + ucomiss xmm1, xmm0 + cmovb rdx, rcx + cmovb rax, rcx + ucomiss xmm1, dword ptr [rip + .L_0] + mov rcx, -1 + cmova rax, rcx + cmova rdx, rcx + pop rcx + ret diff --git a/generated assembly/x86_64_sse/f32_to_u16 b/generated assembly/x86_64_sse/f32_to_u16 new file mode 100644 index 0000000..2dadc0d --- /dev/null +++ b/generated assembly/x86_64_sse/f32_to_u16 @@ -0,0 +1,3 @@ +fast_float_to_integer::f32_to_u16: + cvttss2si rax, xmm0 + ret diff --git a/generated assembly/x86_64_sse/f32_to_u32 b/generated assembly/x86_64_sse/f32_to_u32 new file mode 100644 index 0000000..da7575c --- /dev/null +++ b/generated assembly/x86_64_sse/f32_to_u32 @@ -0,0 +1,3 @@ +fast_float_to_integer::f32_to_u32: + cvttss2si rax, xmm0 + ret diff --git a/generated assembly/x86_64_sse/f32_to_u64 b/generated assembly/x86_64_sse/f32_to_u64 new file mode 100644 index 0000000..8a8988d --- /dev/null +++ b/generated assembly/x86_64_sse/f32_to_u64 @@ -0,0 +1,9 @@ +fast_float_to_integer::f32_to_u64: + cvttss2si rcx, xmm0 + addss xmm0, dword ptr [rip + .L_0] + cvttss2si rdx, xmm0 + mov rax, rcx + sar rax, 63 + and rax, rdx + or rax, rcx + ret diff --git a/generated assembly/x86_64_sse/f32_to_u8 b/generated assembly/x86_64_sse/f32_to_u8 new file mode 100644 index 0000000..c6b17aa --- /dev/null +++ b/generated assembly/x86_64_sse/f32_to_u8 @@ -0,0 +1,3 @@ +fast_float_to_integer::f32_to_u8: + cvttss2si rax, xmm0 + ret diff --git a/generated assembly/x86_64_sse/f64_to_i128 b/generated assembly/x86_64_sse/f64_to_i128 new file mode 100644 index 0000000..da89a58 --- /dev/null +++ b/generated assembly/x86_64_sse/f64_to_i128 @@ -0,0 +1,20 @@ +fast_float_to_integer::f64_to_i128: + push rax + movsd qword ptr [rsp], xmm0 + call qword ptr [rip + __fixdfti@GOTPCREL] + xor ecx, ecx + movsd xmm0, qword ptr [rsp] + ucomisd xmm0, qword ptr [rip + .L_0] + cmovb rax, rcx + movabs rsi, -9223372036854775808 + cmovb rdx, rsi + ucomisd xmm0, qword ptr [rip + .L_1] + movabs rsi, 9223372036854775807 + cmova rdx, rsi + mov rsi, -1 + cmova rax, rsi + ucomisd xmm0, xmm0 + cmovp rax, rcx + cmovp rdx, rcx + pop rcx + ret diff --git a/generated assembly/x86_64_sse/f64_to_i16 b/generated assembly/x86_64_sse/f64_to_i16 new file mode 100644 index 0000000..c7d2077 --- /dev/null +++ b/generated assembly/x86_64_sse/f64_to_i16 @@ -0,0 +1,3 @@ +fast_float_to_integer::f64_to_i16: + cvttsd2si rax, xmm0 + ret diff --git a/generated assembly/x86_64_sse/f64_to_i32 b/generated assembly/x86_64_sse/f64_to_i32 new file mode 100644 index 0000000..48cc827 --- /dev/null +++ b/generated assembly/x86_64_sse/f64_to_i32 @@ -0,0 +1,3 @@ +fast_float_to_integer::f64_to_i32: + cvttsd2si rax, xmm0 + ret diff --git a/generated assembly/x86_64_sse/f64_to_i64 b/generated assembly/x86_64_sse/f64_to_i64 new file mode 100644 index 0000000..2e43ff5 --- /dev/null +++ b/generated assembly/x86_64_sse/f64_to_i64 @@ -0,0 +1,3 @@ +fast_float_to_integer::f64_to_i64: + cvttsd2si rax, xmm0 + ret diff --git a/generated assembly/x86_64_sse/f64_to_i8 b/generated assembly/x86_64_sse/f64_to_i8 new file mode 100644 index 0000000..2bfd2dd --- /dev/null +++ b/generated assembly/x86_64_sse/f64_to_i8 @@ -0,0 +1,3 @@ +fast_float_to_integer::f64_to_i8: + cvttsd2si rax, xmm0 + ret diff --git a/generated assembly/x86_64_sse/f64_to_u128 b/generated assembly/x86_64_sse/f64_to_u128 new file mode 100644 index 0000000..3d74896 --- /dev/null +++ b/generated assembly/x86_64_sse/f64_to_u128 @@ -0,0 +1,16 @@ +fast_float_to_integer::f64_to_u128: + push rax + movsd qword ptr [rsp], xmm0 + call qword ptr [rip + __fixunsdfti@GOTPCREL] + xor ecx, ecx + xorpd xmm0, xmm0 + movsd xmm1, qword ptr [rsp] + ucomisd xmm1, xmm0 + cmovb rdx, rcx + cmovb rax, rcx + ucomisd xmm1, qword ptr [rip + .L_0] + mov rcx, -1 + cmova rax, rcx + cmova rdx, rcx + pop rcx + ret diff --git a/generated assembly/x86_64_sse/f64_to_u16 b/generated assembly/x86_64_sse/f64_to_u16 new file mode 100644 index 0000000..5b3e66f --- /dev/null +++ b/generated assembly/x86_64_sse/f64_to_u16 @@ -0,0 +1,3 @@ +fast_float_to_integer::f64_to_u16: + cvttsd2si rax, xmm0 + ret diff --git a/generated assembly/x86_64_sse/f64_to_u32 b/generated assembly/x86_64_sse/f64_to_u32 new file mode 100644 index 0000000..6dd06f2 --- /dev/null +++ b/generated assembly/x86_64_sse/f64_to_u32 @@ -0,0 +1,3 @@ +fast_float_to_integer::f64_to_u32: + cvttsd2si rax, xmm0 + ret diff --git a/generated assembly/x86_64_sse/f64_to_u64 b/generated assembly/x86_64_sse/f64_to_u64 new file mode 100644 index 0000000..a7a9efc --- /dev/null +++ b/generated assembly/x86_64_sse/f64_to_u64 @@ -0,0 +1,9 @@ +fast_float_to_integer::f64_to_u64: + cvttsd2si rcx, xmm0 + addsd xmm0, qword ptr [rip + .L_0] + cvttsd2si rdx, xmm0 + mov rax, rcx + sar rax, 63 + and rax, rdx + or rax, rcx + ret diff --git a/generated assembly/x86_64_sse/f64_to_u8 b/generated assembly/x86_64_sse/f64_to_u8 new file mode 100644 index 0000000..01a71e0 --- /dev/null +++ b/generated assembly/x86_64_sse/f64_to_u8 @@ -0,0 +1,3 @@ +fast_float_to_integer::f64_to_u8: + cvttsd2si rax, xmm0 + ret diff --git a/generated assembly/x86_sse/f32_to_i128 b/generated assembly/x86_sse/f32_to_i128 new file mode 100644 index 0000000..eacb1bb --- /dev/null +++ b/generated assembly/x86_sse/f32_to_i128 @@ -0,0 +1,52 @@ +fast_float_to_integer::f32_to_i128: + push ebp + push ebx + push edi + push esi + sub esp, 44 + movss xmm0, dword ptr [esp + 68] + mov esi, dword ptr [esp + 64] + call .L_0$pb +.L_0$pb: + pop ebx + lea eax, [esp + 16] +.L_1: + add ebx, offset _GLOBAL_OFFSET_TABLE_+(.L_1-.L_0$pb) + mov dword ptr [esp], eax + movss dword ptr [esp + 4], xmm0 + call __fixsfti@PLT + sub esp, 4 + movss xmm0, dword ptr [esp + 68] + xor ecx, ecx + mov eax, dword ptr [esp + 16] + mov edx, dword ptr [esp + 20] + mov edi, dword ptr [esp + 24] + mov ebp, -2147483648 + ucomiss xmm0, dword ptr [ebx + .L_2@GOTOFF] + cmovb eax, ecx + cmovb edx, ecx + cmovb edi, ecx + cmovae ebp, dword ptr [esp + 28] + ucomiss xmm0, dword ptr [ebx + .L_3@GOTOFF] + mov ebx, 2147483647 + cmovbe ebx, ebp + mov ebp, -1 + cmova edi, ebp + cmova edx, ebp + cmova eax, ebp + ucomiss xmm0, xmm0 + cmovp ebx, ecx + cmovp eax, ecx + cmovp edx, ecx + cmovp edi, ecx + mov dword ptr [esi + 12], ebx + mov dword ptr [esi + 8], edi + mov dword ptr [esi + 4], edx + mov dword ptr [esi], eax + mov eax, esi + add esp, 44 + pop esi + pop edi + pop ebx + pop ebp + ret 4 diff --git a/generated assembly/x86_sse/f32_to_i16 b/generated assembly/x86_sse/f32_to_i16 new file mode 100644 index 0000000..b3794ac --- /dev/null +++ b/generated assembly/x86_sse/f32_to_i16 @@ -0,0 +1,3 @@ +fast_float_to_integer::f32_to_i16: + cvttss2si eax, dword ptr [esp + 4] + ret diff --git a/generated assembly/x86_sse/f32_to_i32 b/generated assembly/x86_sse/f32_to_i32 new file mode 100644 index 0000000..2f21036 --- /dev/null +++ b/generated assembly/x86_sse/f32_to_i32 @@ -0,0 +1,3 @@ +fast_float_to_integer::f32_to_i32: + cvttss2si eax, dword ptr [esp + 4] + ret diff --git a/generated assembly/x86_sse/f32_to_i64 b/generated assembly/x86_sse/f32_to_i64 new file mode 100644 index 0000000..4dc7dbf --- /dev/null +++ b/generated assembly/x86_sse/f32_to_i64 @@ -0,0 +1,37 @@ +fast_float_to_integer::f32_to_i64: + push edi + push esi + sub esp, 20 + movss xmm0, dword ptr [esp + 32] + call .L_0$pb +.L_0$pb: + pop eax + mov edi, -2147483648 + mov edx, 2147483647 +.L_1: + add eax, offset _GLOBAL_OFFSET_TABLE_+(.L_1-.L_0$pb) + movss dword ptr [esp + 8], xmm0 + fld dword ptr [esp + 8] + fnstcw word ptr [esp + 4] + movzx ecx, word ptr [esp + 4] + or ecx, 3072 + mov word ptr [esp + 6], cx + xor ecx, ecx + ucomiss xmm0, dword ptr [eax + .L_2@GOTOFF] + fldcw word ptr [esp + 6] + fistp qword ptr [esp + 8] + fldcw word ptr [esp + 4] + mov esi, dword ptr [esp + 8] + cmovae edi, dword ptr [esp + 12] + cmovb esi, ecx + ucomiss xmm0, dword ptr [eax + .L_3@GOTOFF] + mov eax, -1 + cmovbe edx, edi + cmovbe eax, esi + ucomiss xmm0, xmm0 + cmovp eax, ecx + cmovp edx, ecx + add esp, 20 + pop esi + pop edi + ret diff --git a/generated assembly/x86_sse/f32_to_i8 b/generated assembly/x86_sse/f32_to_i8 new file mode 100644 index 0000000..f684c72 --- /dev/null +++ b/generated assembly/x86_sse/f32_to_i8 @@ -0,0 +1,3 @@ +fast_float_to_integer::f32_to_i8: + cvttss2si eax, dword ptr [esp + 4] + ret diff --git a/generated assembly/x86_sse/f32_to_u128 b/generated assembly/x86_sse/f32_to_u128 new file mode 100644 index 0000000..0de8438 --- /dev/null +++ b/generated assembly/x86_sse/f32_to_u128 @@ -0,0 +1,47 @@ +fast_float_to_integer::f32_to_u128: + push ebx + push edi + push esi + sub esp, 32 + movss xmm0, dword ptr [esp + 52] + mov esi, dword ptr [esp + 48] + call .L_0$pb +.L_0$pb: + pop ebx + lea eax, [esp + 16] +.L_1: + add ebx, offset _GLOBAL_OFFSET_TABLE_+(.L_1-.L_0$pb) + mov dword ptr [esp], eax + movss dword ptr [esp + 4], xmm0 + call __fixunssfti@PLT + sub esp, 4 + movss xmm1, dword ptr [esp + 52] + xorps xmm0, xmm0 + xor eax, eax + mov ecx, 0 + mov edx, 0 + mov edi, 0 + ucomiss xmm1, xmm0 + movaps xmm0, xmm1 + jb .L_2 + mov eax, dword ptr [esp + 28] + mov ecx, dword ptr [esp + 24] + mov edx, dword ptr [esp + 20] + mov edi, dword ptr [esp + 16] +.L_2: + ucomiss xmm0, dword ptr [ebx + .L_3@GOTOFF] + mov ebx, -1 + cmova eax, ebx + cmova edi, ebx + cmova edx, ebx + cmova ecx, ebx + mov dword ptr [esi + 12], eax + mov dword ptr [esi + 8], ecx + mov dword ptr [esi + 4], edx + mov dword ptr [esi], edi + mov eax, esi + add esp, 32 + pop esi + pop edi + pop ebx + ret 4 diff --git a/generated assembly/x86_sse/f32_to_u16 b/generated assembly/x86_sse/f32_to_u16 new file mode 100644 index 0000000..d5e72f8 --- /dev/null +++ b/generated assembly/x86_sse/f32_to_u16 @@ -0,0 +1,3 @@ +fast_float_to_integer::f32_to_u16: + cvttss2si eax, dword ptr [esp + 4] + ret diff --git a/generated assembly/x86_sse/f32_to_u32 b/generated assembly/x86_sse/f32_to_u32 new file mode 100644 index 0000000..7c88ff0 --- /dev/null +++ b/generated assembly/x86_sse/f32_to_u32 @@ -0,0 +1,15 @@ +fast_float_to_integer::f32_to_u32: + movss xmm0, dword ptr [esp + 4] + call .L_0$pb +.L_0$pb: + pop eax +.L_1: + add eax, offset _GLOBAL_OFFSET_TABLE_+(.L_1-.L_0$pb) + cvttss2si ecx, xmm0 + addss xmm0, dword ptr [eax + .L_2@GOTOFF] + mov eax, ecx + sar eax, 31 + cvttss2si edx, xmm0 + and eax, edx + or eax, ecx + ret diff --git a/generated assembly/x86_sse/f32_to_u64 b/generated assembly/x86_sse/f32_to_u64 new file mode 100644 index 0000000..bc4d1e8 --- /dev/null +++ b/generated assembly/x86_sse/f32_to_u64 @@ -0,0 +1,43 @@ +fast_float_to_integer::f32_to_u64: + push ebx + sub esp, 16 + call .L_0$pb +.L_0$pb: + pop ecx + movss xmm0, dword ptr [esp + 24] + xorps xmm1, xmm1 +.L_1: + add ecx, offset _GLOBAL_OFFSET_TABLE_+(.L_1-.L_0$pb) + movss xmm2, dword ptr [ecx + .L_2@GOTOFF] + ucomiss xmm2, xmm0 + jbe .L_3 + xorps xmm2, xmm2 +.L_3: + movaps xmm3, xmm0 + setbe bl + xor edx, edx + subss xmm3, xmm2 + movss dword ptr [esp + 8], xmm3 + fld dword ptr [esp + 8] + fnstcw word ptr [esp + 4] + movzx eax, word ptr [esp + 4] + or eax, 3072 + ucomiss xmm0, xmm1 + mov word ptr [esp + 6], ax + mov eax, 0 + fldcw word ptr [esp + 6] + fistp qword ptr [esp + 8] + fldcw word ptr [esp + 4] + jb .L_4 + movzx edx, bl + mov eax, dword ptr [esp + 8] + shl edx, 31 + xor edx, dword ptr [esp + 12] +.L_4: + ucomiss xmm0, dword ptr [ecx + .L_5@GOTOFF] + mov ecx, -1 + cmova edx, ecx + cmova eax, ecx + add esp, 16 + pop ebx + ret diff --git a/generated assembly/x86_sse/f32_to_u8 b/generated assembly/x86_sse/f32_to_u8 new file mode 100644 index 0000000..f9d9c0c --- /dev/null +++ b/generated assembly/x86_sse/f32_to_u8 @@ -0,0 +1,3 @@ +fast_float_to_integer::f32_to_u8: + cvttss2si eax, dword ptr [esp + 4] + ret diff --git a/generated assembly/x86_sse/f64_to_i128 b/generated assembly/x86_sse/f64_to_i128 new file mode 100644 index 0000000..ec6b8a6 --- /dev/null +++ b/generated assembly/x86_sse/f64_to_i128 @@ -0,0 +1,52 @@ +fast_float_to_integer::f64_to_i128: + push ebp + push ebx + push edi + push esi + sub esp, 44 + movsd xmm0, qword ptr [esp + 68] + mov esi, dword ptr [esp + 64] + call .L_0$pb +.L_0$pb: + pop ebx + lea eax, [esp + 16] +.L_1: + add ebx, offset _GLOBAL_OFFSET_TABLE_+(.L_1-.L_0$pb) + mov dword ptr [esp], eax + movsd qword ptr [esp + 4], xmm0 + call __fixdfti@PLT + sub esp, 4 + movsd xmm0, qword ptr [esp + 68] + xor ecx, ecx + mov eax, dword ptr [esp + 16] + mov edx, dword ptr [esp + 20] + mov edi, dword ptr [esp + 24] + mov ebp, -2147483648 + ucomisd xmm0, qword ptr [ebx + .L_2@GOTOFF] + cmovb eax, ecx + cmovb edx, ecx + cmovb edi, ecx + cmovae ebp, dword ptr [esp + 28] + ucomisd xmm0, qword ptr [ebx + .L_3@GOTOFF] + mov ebx, 2147483647 + cmovbe ebx, ebp + mov ebp, -1 + cmova edi, ebp + cmova edx, ebp + cmova eax, ebp + ucomisd xmm0, xmm0 + cmovp ebx, ecx + cmovp eax, ecx + cmovp edx, ecx + cmovp edi, ecx + mov dword ptr [esi + 12], ebx + mov dword ptr [esi + 8], edi + mov dword ptr [esi + 4], edx + mov dword ptr [esi], eax + mov eax, esi + add esp, 44 + pop esi + pop edi + pop ebx + pop ebp + ret 4 diff --git a/generated assembly/x86_sse/f64_to_i16 b/generated assembly/x86_sse/f64_to_i16 new file mode 100644 index 0000000..1dce409 --- /dev/null +++ b/generated assembly/x86_sse/f64_to_i16 @@ -0,0 +1,3 @@ +fast_float_to_integer::f64_to_i16: + cvttsd2si eax, qword ptr [esp + 4] + ret diff --git a/generated assembly/x86_sse/f64_to_i32 b/generated assembly/x86_sse/f64_to_i32 new file mode 100644 index 0000000..73530e5 --- /dev/null +++ b/generated assembly/x86_sse/f64_to_i32 @@ -0,0 +1,3 @@ +fast_float_to_integer::f64_to_i32: + cvttsd2si eax, qword ptr [esp + 4] + ret diff --git a/generated assembly/x86_sse/f64_to_i64 b/generated assembly/x86_sse/f64_to_i64 new file mode 100644 index 0000000..7cac98b --- /dev/null +++ b/generated assembly/x86_sse/f64_to_i64 @@ -0,0 +1,37 @@ +fast_float_to_integer::f64_to_i64: + push edi + push esi + sub esp, 20 + movsd xmm0, qword ptr [esp + 32] + call .L_0$pb +.L_0$pb: + pop eax + mov edi, -2147483648 + mov edx, 2147483647 +.L_1: + add eax, offset _GLOBAL_OFFSET_TABLE_+(.L_1-.L_0$pb) + movsd qword ptr [esp + 8], xmm0 + fld qword ptr [esp + 8] + fnstcw word ptr [esp + 4] + movzx ecx, word ptr [esp + 4] + or ecx, 3072 + mov word ptr [esp + 6], cx + xor ecx, ecx + ucomisd xmm0, qword ptr [eax + .L_2@GOTOFF] + fldcw word ptr [esp + 6] + fistp qword ptr [esp + 8] + fldcw word ptr [esp + 4] + mov esi, dword ptr [esp + 8] + cmovae edi, dword ptr [esp + 12] + cmovb esi, ecx + ucomisd xmm0, qword ptr [eax + .L_3@GOTOFF] + mov eax, -1 + cmovbe edx, edi + cmovbe eax, esi + ucomisd xmm0, xmm0 + cmovp eax, ecx + cmovp edx, ecx + add esp, 20 + pop esi + pop edi + ret diff --git a/generated assembly/x86_sse/f64_to_i8 b/generated assembly/x86_sse/f64_to_i8 new file mode 100644 index 0000000..4fcb1bf --- /dev/null +++ b/generated assembly/x86_sse/f64_to_i8 @@ -0,0 +1,3 @@ +fast_float_to_integer::f64_to_i8: + cvttsd2si eax, qword ptr [esp + 4] + ret diff --git a/generated assembly/x86_sse/f64_to_u128 b/generated assembly/x86_sse/f64_to_u128 new file mode 100644 index 0000000..4d58d05 --- /dev/null +++ b/generated assembly/x86_sse/f64_to_u128 @@ -0,0 +1,47 @@ +fast_float_to_integer::f64_to_u128: + push ebx + push edi + push esi + sub esp, 32 + movsd xmm0, qword ptr [esp + 52] + mov esi, dword ptr [esp + 48] + call .L_0$pb +.L_0$pb: + pop ebx + lea eax, [esp + 16] +.L_1: + add ebx, offset _GLOBAL_OFFSET_TABLE_+(.L_1-.L_0$pb) + mov dword ptr [esp], eax + movsd qword ptr [esp + 4], xmm0 + call __fixunsdfti@PLT + sub esp, 4 + movsd xmm1, qword ptr [esp + 52] + xorpd xmm0, xmm0 + xor eax, eax + mov ecx, 0 + mov edx, 0 + mov edi, 0 + ucomisd xmm1, xmm0 + movapd xmm0, xmm1 + jb .L_2 + mov eax, dword ptr [esp + 28] + mov ecx, dword ptr [esp + 24] + mov edx, dword ptr [esp + 20] + mov edi, dword ptr [esp + 16] +.L_2: + ucomisd xmm0, qword ptr [ebx + .L_3@GOTOFF] + mov ebx, -1 + cmova eax, ebx + cmova edi, ebx + cmova edx, ebx + cmova ecx, ebx + mov dword ptr [esi + 12], eax + mov dword ptr [esi + 8], ecx + mov dword ptr [esi + 4], edx + mov dword ptr [esi], edi + mov eax, esi + add esp, 32 + pop esi + pop edi + pop ebx + ret 4 diff --git a/generated assembly/x86_sse/f64_to_u16 b/generated assembly/x86_sse/f64_to_u16 new file mode 100644 index 0000000..64c292e --- /dev/null +++ b/generated assembly/x86_sse/f64_to_u16 @@ -0,0 +1,3 @@ +fast_float_to_integer::f64_to_u16: + cvttsd2si eax, qword ptr [esp + 4] + ret diff --git a/generated assembly/x86_sse/f64_to_u32 b/generated assembly/x86_sse/f64_to_u32 new file mode 100644 index 0000000..b91bf69 --- /dev/null +++ b/generated assembly/x86_sse/f64_to_u32 @@ -0,0 +1,15 @@ +fast_float_to_integer::f64_to_u32: + movsd xmm0, qword ptr [esp + 4] + call .L_0$pb +.L_0$pb: + pop eax +.L_1: + add eax, offset _GLOBAL_OFFSET_TABLE_+(.L_1-.L_0$pb) + cvttsd2si ecx, xmm0 + addsd xmm0, qword ptr [eax + .L_2@GOTOFF] + mov eax, ecx + sar eax, 31 + cvttsd2si edx, xmm0 + and eax, edx + or eax, ecx + ret diff --git a/generated assembly/x86_sse/f64_to_u64 b/generated assembly/x86_sse/f64_to_u64 new file mode 100644 index 0000000..e957366 --- /dev/null +++ b/generated assembly/x86_sse/f64_to_u64 @@ -0,0 +1,43 @@ +fast_float_to_integer::f64_to_u64: + push ebx + sub esp, 16 + call .L_0$pb +.L_0$pb: + pop ecx + movsd xmm0, qword ptr [esp + 24] + xorpd xmm1, xmm1 +.L_1: + add ecx, offset _GLOBAL_OFFSET_TABLE_+(.L_1-.L_0$pb) + movsd xmm2, qword ptr [ecx + .L_2@GOTOFF] + ucomisd xmm2, xmm0 + jbe .L_3 + xorpd xmm2, xmm2 +.L_3: + movapd xmm3, xmm0 + setbe bl + xor edx, edx + subsd xmm3, xmm2 + movsd qword ptr [esp + 8], xmm3 + fld qword ptr [esp + 8] + fnstcw word ptr [esp + 4] + movzx eax, word ptr [esp + 4] + or eax, 3072 + ucomisd xmm0, xmm1 + mov word ptr [esp + 6], ax + mov eax, 0 + fldcw word ptr [esp + 6] + fistp qword ptr [esp + 8] + fldcw word ptr [esp + 4] + jb .L_4 + movzx edx, bl + mov eax, dword ptr [esp + 8] + shl edx, 31 + xor edx, dword ptr [esp + 12] +.L_4: + ucomisd xmm0, qword ptr [ecx + .L_5@GOTOFF] + mov ecx, -1 + cmova edx, ecx + cmova eax, ecx + add esp, 16 + pop ebx + ret diff --git a/generated assembly/x86_sse/f64_to_u8 b/generated assembly/x86_sse/f64_to_u8 new file mode 100644 index 0000000..7b1814e --- /dev/null +++ b/generated assembly/x86_sse/f64_to_u8 @@ -0,0 +1,3 @@ +fast_float_to_integer::f64_to_u8: + cvttsd2si eax, qword ptr [esp + 4] + ret diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..6239d0d --- /dev/null +++ b/readme.md @@ -0,0 +1,39 @@ +Convert floating point values to integer types faster than the standard `as` operator. + +See the [library documentation](https://docs.rs/fast-float-to-integer) for documentation targeting users of the library. + +--- + +# Development + +We use the [xtask](https://github.com/matklad/cargo-xtask) pattern to implement automation tasks in Rust rather than shell scripts. This provides an easy way to compile for different targets and run the tests through qemu. + +CI enforces that all targets compile, pass tests, and that the generated assembly committed to the repository is up to date. + +# Releasing + +- Update the changelog. +- Update the version in Cargo.toml. +- Create a git tag for the version. + +# Improvements + +## More targets + +We should add common targets like aarch64. + +## AVX512 + +AVX512 can convert float to u64 in [one instruction](https://www.felixcloutier.com/x86/vcvttps2udq), but the intrinsics are [not stable](https://github.com/rust-lang/rust/issues/111137). + +We should make sure that AVX512 is actually faster in practice than the current approach. + +## Cross compilation + +The current cross compilation setup is brittle. It assume the host is x86 and that all the targets are x86 variants. This breaks for other architectures like aarch64 that need a custom linker. See the following links for more information: + +- https://rust-lang.github.io/rustup/cross-compilation.html +- https://github.com/japaric/rust-cross/blob/master/README.md#c-cross-toolchain +- https://github.com/cross-rs/cross + +We should improve this setup. Either setup the linking tools manually or use cargo cross. diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..3fa3a0f --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,166 @@ +//! Convert floating point values to integer types faster than the standard `as` operator. +//! +//! The standard way of converting floating point values to integers is with the [`as` operator](https://doc.rust-lang.org/reference/expressions/operator-expr.html#type-cast-expressions). This conversion has various guarantees as listed in the reference. One of them is that it saturates: Input values out of range of the output type convert to the minimal/maximal value of the output type. +//! +//! ``` +//! assert_eq!(300f32 as u8, 255); +//! assert_eq!(-5f32 as u8, 0); +//! ``` +//! +//! This contrasts C/C++, where this kind of cast is [undefined behavior](https://github.com/e00E/cpp-clamp-cast). Saturation comes with a downside. It is slower than the C/C++ version. On many [hardware targets](https://doc.rust-lang.org/nightly/rustc/platform-support.html) a float to integer conversion can be done in one instruction. For example [`CVTTSS2SI`](https://www.felixcloutier.com/x86/cvttss2si) on x86_84+SSE. Rust has to do more work than this, because the instruction does not provide saturation. +//! +//! Sometimes you want faster conversions and don't need saturation. This is what this crate provides. The behavior of the conversion functions in this crate depends on whether the input value is in range of the output type. If in range, then the conversion functions work like the standard `as` operator conversion. If not in range (including NaN), then you get an unspecified value. +//! +//! You never get undefined behavior but you can get unspecified behavior. In the unspecified case, you get an arbitrary value. The function returns and you get a valid value of the output type, but there is no guarantee what that value is. +//! +//! This crate picks an implementation automatically at compile time based on the [target](https://doc.rust-lang.org/reference/conditional-compilation.html#target_arch) and [features](https://doc.rust-lang.org/reference/attributes/codegen.html#the-target_feature-attribute). If there is no specialized implementation, then this crate picks the standard `as` operator conversion. This crate has optimized implementations on the following targets: +//! +//! - `target_arch = "x86_64", target_feature = "sse"`: all conversions except 128 bit integers +//! - `target_arch = "x86", target_feature = "sse"`: all conversions except 64 bit and 128 bit integers +//! +//! # Assembly comparison +//! +//! The [repository](https://github.com/e00E/fast-float-to-integer) contains generated assembly for every conversion and target. Here are some typical examples on x86_64+SSE. +//! +// +// We could do something like `#![doc = include_str!("../generated assembly/x86_64_default/f32_to_i64")]` to include the assembly directly. The downside of that is that compiling the library requires the assembly file to be there and we have to publish the file. +// +//! standard: +//! +//! ```asm +//! f32_to_i64: +//! cvttss2si rax, xmm0 +//! ucomiss xmm0, dword ptr [rip + .L_0] +//! movabs rcx, 9223372036854775807 +//! cmovbe rcx, rax +//! xor eax, eax +//! ucomiss xmm0, xmm0 +//! cmovnp rax, rcx +//! ret +//! ``` +//! +//! fast: +//! +//! ```asm +//! f32_to_i64: +//! cvttss2si rax, xmm0 +//! ret +//! ``` +//! +//! standard: +//! +//! ```asm +//! f32_to_u64: +//! cvttss2si rax, xmm0 +//! mov rcx, rax +//! sar rcx, 63 +//! movaps xmm1, xmm0 +//! subss xmm1, dword ptr [rip + .L_0] +//! cvttss2si rdx, xmm1 +//! and rdx, rcx +//! or rdx, rax +//! xor ecx, ecx +//! xorps xmm1, xmm1 +//! ucomiss xmm0, xmm1 +//! cmovae rcx, rdx +//! ucomiss xmm0, dword ptr [rip + .L_1] +//! mov rax, -1 +//! cmovbe rax, rcx +//! ret +//! ``` +//! +//! fast: +//! +//! ```asm +//! f32_to_u64: +//! cvttss2si rcx, xmm0 +//! addss xmm0, dword ptr [rip + .L_0] +//! cvttss2si rdx, xmm0 +//! mov rax, rcx +//! sar rax, 63 +//! and rax, rdx +//! or rax, rcx +//! ret +//! ``` + +#![cfg_attr(not(test), no_std)] + +/// Raise two to some power. +/// +/// This function exists because libcore does not provide the [`f32::powi]`] family of functions. +#[allow(dead_code)] +const fn power_of_two_f32(exponent: u32) -> f32 { + (2u128).pow(exponent) as f32 +} + +/// Like power_of_two_f32 but for f64. +#[allow(dead_code)] +const fn power_of_two_f64(exponent: u32) -> f64 { + (2u128).pow(exponent) as f64 +} + +macro_rules! create_target { + ($name:ident) => { + use $name as active_target; + + // Create a test with the target name so we can check that the expected target is active. The following command prints the active target through the test name: + // + // cargo test --quiet --package fast-float-to-integer --lib -- --list + #[test] + fn $name() {} + }; +} + +// Conditionally compiled target specific modules.The condition is set based on the availability of the intrinsics they use. This makes it safe to use the module. See the `default` module for the interface. +// +// We would put the mod declaration inside of the create_target macro too, but then rustfmt does not understand it. +cfg_if::cfg_if! { + if #[cfg(feature = "force-default")] { + mod target_default; + create_target!(target_default); + } else if #[cfg(all(target_arch = "x86_64", target_feature = "sse"))] { + mod target_x86_64_sse; + create_target!(target_x86_64_sse); + } else if #[cfg(all(target_arch = "x86", target_feature = "sse"))] { + mod target_x86_sse; + create_target!(target_x86_sse); + } else { + mod target_default; + create_target!(target_default); + } +} + +macro_rules! create_function { + ($name:ident, $Float:ty, $Integer:ty) => { + /// Convert the input floating point value to the output integer type. + /// + /// If the input value is out of range of the output type, then the result is unspecified. Otherwise, the result is the same as the standard `as` conversion. + #[cfg_attr(feature = "show-asm", inline(never))] + #[cfg_attr(not(feature = "show-asm"), inline(always))] + pub fn $name(float: $Float) -> $Integer { + active_target::implementation::$name(float) + } + }; +} + +create_function! {f32_to_i8, f32, i8} +create_function! {f32_to_u8, f32, u8} +create_function! {f32_to_i16, f32, i16} +create_function! {f32_to_u16, f32, u16} +create_function! {f32_to_i32, f32, i32} +create_function! {f32_to_u32, f32, u32} +create_function! {f32_to_i64, f32, i64} +create_function! {f32_to_u64, f32, u64} +create_function! {f32_to_i128, f32, i128} +create_function! {f32_to_u128, f32, u128} + +create_function! {f64_to_i8, f64, i8} +create_function! {f64_to_u8, f64, u8} +create_function! {f64_to_i16, f64, i16} +create_function! {f64_to_u16, f64, u16} +create_function! {f64_to_i32, f64, i32} +create_function! {f64_to_u32, f64, u32} +create_function! {f64_to_i64, f64, i64} +create_function! {f64_to_u64, f64, u64} +create_function! {f64_to_i128, f64, i128} +create_function! {f64_to_u128, f64, u128} diff --git a/src/target_default.rs b/src/target_default.rs new file mode 100644 index 0000000..792fa71 --- /dev/null +++ b/src/target_default.rs @@ -0,0 +1,34 @@ +// There is an inner module to separate the implementation from the interface. + +macro_rules! create_function { + ($name:ident, $Input:ty, $Output: ty) => { + #[inline(always)] + pub fn $name(float: $Input) -> $Output { + float as _ + } + }; +} + +pub mod implementation { + create_function! {f32_to_i8, f32, i8} + create_function! {f32_to_u8, f32, u8} + create_function! {f32_to_i16, f32, i16} + create_function! {f32_to_u16, f32, u16} + create_function! {f32_to_i32, f32, i32} + create_function! {f32_to_u32, f32, u32} + create_function! {f32_to_i64, f32, i64} + create_function! {f32_to_u64, f32, u64} + create_function! {f32_to_i128, f32, i128} + create_function! {f32_to_u128, f32, u128} + + create_function! {f64_to_i8, f64, i8} + create_function! {f64_to_u8, f64, u8} + create_function! {f64_to_i16, f64, i16} + create_function! {f64_to_u16, f64, u16} + create_function! {f64_to_i32, f64, i32} + create_function! {f64_to_u32, f64, u32} + create_function! {f64_to_i64, f64, i64} + create_function! {f64_to_u64, f64, u64} + create_function! {f64_to_i128, f64, i128} + create_function! {f64_to_u128, f64, u128} +} diff --git a/src/target_x86_64_sse.rs b/src/target_x86_64_sse.rs new file mode 100644 index 0000000..d266c7d --- /dev/null +++ b/src/target_x86_64_sse.rs @@ -0,0 +1,188 @@ +use core::arch::x86_64::{_mm_cvttsd_si64, _mm_cvttss_si64, _mm_loadu_pd, _mm_loadu_ps}; + +use crate::{power_of_two_f32, power_of_two_f64}; + +/// Convert f32 to i64 using the CVTTSS2SI instruction. If the input f32 is out of range of the output i64, then the result is i64::MIN. +#[inline(always)] +fn f32_to_i64(float: f32) -> i64 { + // The compiler optimizes this function into a single instruction without the need for inline assembly. + + let floats = [float, 0., 0., 0.]; + let floats_pointer = floats.as_ptr(); + let floats_register = unsafe { _mm_loadu_ps(floats_pointer) }; + unsafe { _mm_cvttss_si64(floats_register) } +} + +// For f32_to_i32 we could use CVTTSS2SI with 32 bit output (_mm_cvttss_si64) instead of the 64 bit output. That might be faster. + +// We can't use the same approach for u64 output because the conversion instruction only works on i64. This is a problem for floats that exceed i64::MAX. We cannot handle this with one instruction, but we can still do better than the as operator. + +// This approach branches into a special case if the input is too large. The branchless approach below is faster and is the one we use. We keep this code around for documentation purposes. +#[inline(always)] +fn _f32_to_u64_branchful(float: f32) -> u64 { + const THRESHOLD_FLOAT: f32 = power_of_two_f32(63); + const THRESHOLD_INTEGER: u64 = 2u64.pow(63); + + let in_range = float <= THRESHOLD_FLOAT; + if in_range { + f32_to_i64(float) as u64 + } else { + // Subtract the threshold from the float. The result is >= 0 because the input is larger than the subtrahend. The result is <= i64::MAX because `u64::MAX - i64::MAX == i64::MAX`. + let in_range_float = float - THRESHOLD_FLOAT; + let integer = f32_to_i64(in_range_float) as u64; + // Overflow is benign because it can only occur for invalid inputs. + integer.overflowing_add(THRESHOLD_INTEGER).0 + } +} + +// This approach avoids the branch. It is faster than the branchful approach. +#[inline(always)] +fn f32_to_u64_branchless(float: f32) -> u64 { + const THRESHOLD: f32 = power_of_two_f32(63); + + let integer1 = f32_to_i64(float); + let integer2 = f32_to_i64(float - THRESHOLD); + // If the input is larger than i64::MAX, then integer1 is i64::MIN. This value has 1 as the leftmost bit and 0 as the remaining bits. Right shift on signed values is arithmetic, not logical [1]. We end up with all 0 (in range) or all 1 (out of range). + let too_large = integer1 >> 63; + // # If the input is not too large: + // + // Integer1 has the correct value. The mask is all 0, which makes the Or result in integer1. + // + // # If the input is too large: + // + // Integer1 is i64::MIN and the mask is all 1. The Or results in `i64::MIN | integer2`. integer2 has the correct result minus 2**63. This is the correct result without the leftmost bit. The Or adds the missing leftmost bit back. + (integer1 | (integer2 & too_large)) as u64 + + // [1] https://doc.rust-lang.org/reference/expressions/operator-expr.html#arithmetic-and-logical-binary-operators +} + +#[inline(always)] +fn f32_to_u64(float: f32) -> u64 { + f32_to_u64_branchless(float) +} + +// Repeat for f64. + +#[inline(always)] +fn f64_to_i64(float: f64) -> i64 { + // see convert_f32 + + let floats = [float, 0.]; + let floats_pointer = floats.as_ptr(); + let floats_register = unsafe { _mm_loadu_pd(floats_pointer) }; + unsafe { _mm_cvttsd_si64(floats_register) } +} + +#[inline(always)] +fn f64_to_u64(float: f64) -> u64 { + // see f32_to_u64 + + const THRESHOLD: f64 = power_of_two_f64(63); + + let integer1 = f64_to_i64(float); + let integer2 = f64_to_i64(float - THRESHOLD); + let too_large = integer1 >> 63; + (integer1 | (integer2 & too_large)) as u64 +} + +pub mod implementation { + #[inline(always)] + pub fn f32_to_i8(float: f32) -> i8 { + super::f32_to_i64(float) as _ + } + + #[inline(always)] + pub fn f32_to_u8(float: f32) -> u8 { + super::f32_to_i64(float) as _ + } + + #[inline(always)] + pub fn f32_to_i16(float: f32) -> i16 { + super::f32_to_i64(float) as _ + } + + #[inline(always)] + pub fn f32_to_u16(float: f32) -> u16 { + super::f32_to_i64(float) as _ + } + + #[inline(always)] + pub fn f32_to_i32(float: f32) -> i32 { + super::f32_to_i64(float) as _ + } + + #[inline(always)] + pub fn f32_to_u32(float: f32) -> u32 { + super::f32_to_i64(float) as _ + } + + #[inline(always)] + pub fn f32_to_i64(float: f32) -> i64 { + super::f32_to_i64(float) as _ + } + + #[inline(always)] + pub fn f32_to_u64(float: f32) -> u64 { + super::f32_to_u64(float) as _ + } + + #[inline(always)] + pub fn f32_to_i128(float: f32) -> i128 { + float as _ + } + + #[inline(always)] + pub fn f32_to_u128(float: f32) -> u128 { + float as _ + } + + #[inline(always)] + pub fn f64_to_i8(float: f64) -> i8 { + f64_to_i64(float) as _ + } + + #[inline(always)] + pub fn f64_to_u8(float: f64) -> u8 { + super::f64_to_i64(float) as _ + } + + #[inline(always)] + pub fn f64_to_i16(float: f64) -> i16 { + super::f64_to_i64(float) as _ + } + + #[inline(always)] + pub fn f64_to_u16(float: f64) -> u16 { + super::f64_to_i64(float) as _ + } + + #[inline(always)] + pub fn f64_to_i32(float: f64) -> i32 { + super::f64_to_i64(float) as _ + } + + #[inline(always)] + pub fn f64_to_u32(float: f64) -> u32 { + super::f64_to_i64(float) as _ + } + + #[inline(always)] + pub fn f64_to_i64(float: f64) -> i64 { + super::f64_to_i64(float) as _ + } + + #[inline(always)] + pub fn f64_to_u64(float: f64) -> u64 { + super::f64_to_u64(float) as _ + } + + #[inline(always)] + pub fn f64_to_i128(float: f64) -> i128 { + float as _ + } + + #[inline(always)] + pub fn f64_to_u128(float: f64) -> u128 { + float as _ + } +} diff --git a/src/target_x86_sse.rs b/src/target_x86_sse.rs new file mode 100644 index 0000000..2c842de --- /dev/null +++ b/src/target_x86_sse.rs @@ -0,0 +1,149 @@ +use core::arch::x86::{_mm_cvttsd_si32, _mm_cvttss_si32, _mm_loadu_pd, _mm_loadu_ps}; + +use crate::{power_of_two_f32, power_of_two_f64}; + +#[inline(always)] +fn f32_to_i32(float: f32) -> i32 { + // see crate::x86_64_sse::f32_to_i64 + + let floats = [float, 0., 0., 0.]; + let floats_pointer = floats.as_ptr(); + let floats_register = unsafe { _mm_loadu_ps(floats_pointer) }; + unsafe { _mm_cvttss_si32(floats_register) } +} + +#[inline(always)] +fn f32_to_u32(float: f32) -> u32 { + // see crate::x86_64_sse::f32_to_u64 + + const THRESHOLD: f32 = power_of_two_f32(31); + + let integer1 = f32_to_i32(float); + let integer2 = f32_to_i32(float - THRESHOLD); + let too_large = integer1 >> 31; + (integer1 | (integer2 & too_large)) as u32 +} + +#[inline(always)] +fn f64_to_i32(float: f64) -> i32 { + // see crate::x86_64_sse::f64_to_i64 + + let floats = [float, 0.]; + let floats_pointer = floats.as_ptr(); + let floats_register = unsafe { _mm_loadu_pd(floats_pointer) }; + unsafe { _mm_cvttsd_si32(floats_register) } +} + +#[inline(always)] +fn f64_to_u32(float: f64) -> u32 { + // see crate::x86_64_sse::f64_to_u64 + + const THRESHOLD: f64 = power_of_two_f64(31); + + let integer1 = f64_to_i32(float); + let integer2 = f64_to_i32(float - THRESHOLD); + let too_large = integer1 >> 31; + (integer1 | (integer2 & too_large)) as u32 +} + +pub mod implementation { + #[inline(always)] + pub fn f32_to_i8(float: f32) -> i8 { + super::f32_to_i32(float) as _ + } + + #[inline(always)] + pub fn f32_to_u8(float: f32) -> u8 { + super::f32_to_i32(float) as _ + } + + #[inline(always)] + pub fn f32_to_i16(float: f32) -> i16 { + super::f32_to_i32(float) as _ + } + + #[inline(always)] + pub fn f32_to_u16(float: f32) -> u16 { + super::f32_to_i32(float) as _ + } + + #[inline(always)] + pub fn f32_to_i32(float: f32) -> i32 { + super::f32_to_i32(float) as _ + } + + #[inline(always)] + pub fn f32_to_u32(float: f32) -> u32 { + super::f32_to_u32(float) as _ + } + + #[inline(always)] + pub fn f32_to_i64(float: f32) -> i64 { + float as _ + } + + #[inline(always)] + pub fn f32_to_u64(float: f32) -> u64 { + float as _ + } + + #[inline(always)] + pub fn f32_to_i128(float: f32) -> i128 { + float as _ + } + + #[inline(always)] + pub fn f32_to_u128(float: f32) -> u128 { + float as _ + } + + #[inline(always)] + pub fn f64_to_i8(float: f64) -> i8 { + super::f64_to_i32(float) as _ + } + + #[inline(always)] + pub fn f64_to_u8(float: f64) -> u8 { + super::f64_to_i32(float) as _ + } + + #[inline(always)] + pub fn f64_to_i16(float: f64) -> i16 { + super::f64_to_i32(float) as _ + } + + #[inline(always)] + pub fn f64_to_u16(float: f64) -> u16 { + super::f64_to_i32(float) as _ + } + + #[inline(always)] + pub fn f64_to_i32(float: f64) -> i32 { + super::f64_to_i32(float) as _ + } + + #[inline(always)] + pub fn f64_to_u32(float: f64) -> u32 { + super::f64_to_u32(float) as _ + } + + #[inline(always)] + pub fn f64_to_i64(float: f64) -> i64 { + float as _ + } + + #[inline(always)] + pub fn f64_to_u64(float: f64) -> u64 { + float as _ + } + + #[inline(always)] + pub fn f64_to_i128(float: f64) -> i128 { + float as _ + } + + #[inline(always)] + pub fn f64_to_u128(float: f64) -> u128 { + float as _ + } +} diff --git a/tests/test.rs b/tests/test.rs new file mode 100644 index 0000000..6787806 --- /dev/null +++ b/tests/test.rs @@ -0,0 +1,162 @@ +// TODO: Consider rewriting this with traits instead of macros. + +use float_next_after::NextAfter; + +trait InRange { + /// Is this float value in range for this integer type? + #[allow(clippy::wrong_self_convention)] + fn in_range(self) -> bool; +} + +macro_rules! implement_is_valid { + ($Float:ty, $Integer:ty, $signed:expr) => { + impl InRange<$Integer> for $Float { + fn in_range(self) -> bool { + let bits = <$Integer>::BITS as i32; + let base: $Float = 2.; + if $signed { + self >= -base.powi(bits - 1) && self < base.powi(bits - 1) + } else { + self >= 0. && self < base.powi(bits) + } + } + } + }; +} + +implement_is_valid! {f32, i8, true} +implement_is_valid! {f32, u8, false} +implement_is_valid! {f32, i16, true} +implement_is_valid! {f32, u16, false} +implement_is_valid! {f32, i32, true} +implement_is_valid! {f32, u32, false} +implement_is_valid! {f32, i64, true} +implement_is_valid! {f32, u64, false} +implement_is_valid! {f32, i128, true} +implement_is_valid! {f32, u128, false} + +implement_is_valid! {f64, i8, true} +implement_is_valid! {f64, u8, false} +implement_is_valid! {f64, i16, true} +implement_is_valid! {f64, u16, false} +implement_is_valid! {f64, i32, true} +implement_is_valid! {f64, u32, false} +implement_is_valid! {f64, i64, true} +implement_is_valid! {f64, u64, false} +implement_is_valid! {f64, i128, true} +implement_is_valid! {f64, u128, false} + +// We can test all f32 values in 10 seconds on a modern processor. On qemu it is too slow. + +macro_rules! create_all_f32_test { + ($name:ident, $convert_custom:path, $Integer:ty) => { + #[test] + #[ignore] + fn $name() { + for i in u32::MIN..=u32::MAX { + let float = f32::from_bits(i); + let result = $convert_custom(float); + let expected = float as $Integer; + // We skip the assert but not the computation. This detects failing debug assertions in the implementation. + if InRange::<$Integer>::in_range(float) { + assert_eq!(result, expected, "{float:.0}"); + } + } + } + }; +} + +create_all_f32_test! {all_f32_i8, fast_float_to_integer::f32_to_i8, i8} +create_all_f32_test! {all_f32_u8, fast_float_to_integer::f32_to_u8, u8} +create_all_f32_test! {all_f32_i16, fast_float_to_integer::f32_to_i16, i16} +create_all_f32_test! {all_f32_u16, fast_float_to_integer::f32_to_u16, u16} +create_all_f32_test! {all_f32_i32, fast_float_to_integer::f32_to_i32, i32} +create_all_f32_test! {all_f32_u32, fast_float_to_integer::f32_to_u32, u32} +create_all_f32_test! {all_f32_i64, fast_float_to_integer::f32_to_i64, i64} +create_all_f32_test! {all_f32_u64, fast_float_to_integer::f32_to_u64, u64} +create_all_f32_test! {all_f32_i128, fast_float_to_integer::f32_to_i128, i128} +create_all_f32_test! {all_f32_u128, fast_float_to_integer::f32_to_u128, u128} + +macro_rules! create_interesting_floats_function { + ($name:ident, $Float:ty) => { + fn $name() -> impl Iterator { + let signs = |float: $Float| [float, -float]; + + let neighbors = |float: $Float| { + [ + float.next_after(<$Float>::INFINITY), + float + .next_after(<$Float>::INFINITY) + .next_after(<$Float>::INFINITY), + float.next_after(<$Float>::NEG_INFINITY), + float + .next_after(<$Float>::NEG_INFINITY) + .next_after(<$Float>::NEG_INFINITY), + ] + }; + + let offsets = |float: $Float| [-2, -1, 0, 1, 2].map(|offset| float + offset as $Float); + + let exponents = 0..70; + exponents.flat_map(move |exponent| { + let float = (2 as $Float).powi(exponent); + offsets(float) + .into_iter() + .chain(neighbors(float)) + .chain([float * 1.5]) + .flat_map(signs) + }) + } + }; +} + +create_interesting_floats_function! {interesting_floats_f32, f32} +create_interesting_floats_function! {interesting_floats_f64, f64} + +#[test] +#[ignore] +fn print_interesting_floats() { + for float in interesting_floats_f32() { + println!("{float:.e} {float:.0} {:.x}", float.to_bits()); + } +} + +macro_rules! create_interesting_floats_test { + ($name:ident, $interesting_floats_function:ident, $convert_custom:path, $Integer:ty) => { + #[test] + fn $name() { + let mut valid_count: u32 = 0; + for float in $interesting_floats_function() { + let result = $convert_custom(float); + let expected = float as $Integer; + if InRange::<$Integer>::in_range(float) { + valid_count += 1; + assert_eq!(result, expected, "{float:.0}"); + } + } + assert!((50..2000).contains(&valid_count), "{valid_count}"); + } + }; +} + +create_interesting_floats_test! {interesting_f32_i8, interesting_floats_f32, fast_float_to_integer::f32_to_i8, i8} +create_interesting_floats_test! {interesting_f32_u8, interesting_floats_f32, fast_float_to_integer::f32_to_u8, u8} +create_interesting_floats_test! {interesting_f32_i16, interesting_floats_f32, fast_float_to_integer::f32_to_i16, i16} +create_interesting_floats_test! {interesting_f32_u16, interesting_floats_f32, fast_float_to_integer::f32_to_u16, u16} +create_interesting_floats_test! {interesting_f32_i32, interesting_floats_f32, fast_float_to_integer::f32_to_i32, i32} +create_interesting_floats_test! {interesting_f32_u32, interesting_floats_f32, fast_float_to_integer::f32_to_u32, u32} +create_interesting_floats_test! {interesting_f32_i64, interesting_floats_f32, fast_float_to_integer::f32_to_i64, i64} +create_interesting_floats_test! {interesting_f32_u64, interesting_floats_f32, fast_float_to_integer::f32_to_u64, u64} +create_interesting_floats_test! {interesting_f32_i128, interesting_floats_f32, fast_float_to_integer::f32_to_i128, i128} +create_interesting_floats_test! {interesting_f32_u128, interesting_floats_f32, fast_float_to_integer::f32_to_u128, u128} + +create_interesting_floats_test! {interesting_f64_i8, interesting_floats_f64, fast_float_to_integer::f64_to_i8, i8} +create_interesting_floats_test! {interesting_f64_u8, interesting_floats_f64, fast_float_to_integer::f64_to_u8, u8} +create_interesting_floats_test! {interesting_f64_i16, interesting_floats_f64, fast_float_to_integer::f64_to_i16, i16} +create_interesting_floats_test! {interesting_f64_u16, interesting_floats_f64, fast_float_to_integer::f64_to_u16, u16} +create_interesting_floats_test! {interesting_f64_i32, interesting_floats_f64, fast_float_to_integer::f64_to_i32, i32} +create_interesting_floats_test! {interesting_f64_u32, interesting_floats_f64, fast_float_to_integer::f64_to_u32, u32} +create_interesting_floats_test! {interesting_f64_i64, interesting_floats_f64, fast_float_to_integer::f64_to_i64, i64} +create_interesting_floats_test! {interesting_f64_u64, interesting_floats_f64, fast_float_to_integer::f64_to_u64, u64} +create_interesting_floats_test! {interesting_f64_i128, interesting_floats_f64, fast_float_to_integer::f64_to_i128, i128} +create_interesting_floats_test! {interesting_f64_u128, interesting_floats_f64, fast_float_to_integer::f64_to_u128, u128} diff --git a/xtask/Cargo.toml b/xtask/Cargo.toml new file mode 100644 index 0000000..0555b03 --- /dev/null +++ b/xtask/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "xtask" +version = "0.1.0" +edition = "2021" +publish = false + +[dependencies] +anyhow = "1.0" +regex = { version = "1.11.1", default-features = false } diff --git a/xtask/src/main.rs b/xtask/src/main.rs new file mode 100644 index 0000000..a6e0bcc --- /dev/null +++ b/xtask/src/main.rs @@ -0,0 +1,374 @@ +use std::{ + borrow::Cow, + collections::{hash_map::Entry, HashMap}, + fmt::Write, + path::PathBuf, + process::{Command, Output}, + sync::LazyLock, +}; + +use anyhow::{anyhow, Context, Result}; +use regex::Regex; + +fn main() -> Result<()> { + let command = std::env::args() + .nth(1) + .context("missing command argument")?; + let command = match command.as_str() { + "check" => |target: &Target| check(target, false), + "clippy" => |target: &Target| check(target, true), + "target" => |target: &Target| expected_target(target), + "test" => |target: &Target| qemu_test(target), + "asm" => |target: &Target| show_asm(target), + "all" => |target: &Target| { + check(target, true).context("check")?; + expected_target(target).context("target")?; + qemu_test(target).context("test")?; + show_asm(target).context("asm")?; + Ok(()) + }, + _ => return Err(anyhow!("unknown command")), + }; + + for target in TARGETS { + println!("Handling target {}.", target.name); + install_rustup_target(target.rust_target).context("install rustup target")?; + command(target)?; + } + + Ok(()) +} + +struct Target { + name: &'static str, + rust_target: &'static str, + expected_target_module: &'static str, + feature: &'static str, + qemu: &'static str, + generate_assembly: bool, + force_default: bool, +} + +const TARGETS: &[Target] = &[ + Target { + name: "x86_64_sse", + rust_target: "x86_64-unknown-linux-gnu", + expected_target_module: "x86_64_sse", + feature: "+sse", + qemu: "x86_64", + generate_assembly: true, + force_default: false, + }, + Target { + name: "x86_64_default", + rust_target: "x86_64-unknown-linux-gnu", + expected_target_module: "default", + feature: "", + qemu: "x86_64", + generate_assembly: true, + force_default: true, + }, + Target { + name: "x86_sse", + rust_target: "i686-unknown-linux-gnu", + expected_target_module: "x86_sse", + feature: "+sse", + qemu: "i386", + generate_assembly: true, + force_default: false, + }, + Target { + name: "default", + rust_target: "i686-unknown-linux-gnu", + expected_target_module: "default", + feature: "-sse", + qemu: "i386", + generate_assembly: false, + force_default: false, + }, +]; + +/// Convert a Command to a string representation you can paste in your terminal. +/// +/// Assumes that the command does not run into tricky formatting edge cases with characters that need to be escaped. +fn command_to_string(command: &Command) -> String { + fn string_is_not_tricky(string: &str) -> bool { + string.chars().all(|char| { + char.is_ascii_alphanumeric() || ['-', '_', '=', '/', '.', '+', ' '].contains(&char) + }) + } + + fn handle_space(s: &str) -> Cow { + if s.contains(' ') { + format!("\"{s}\"").into() + } else { + s.into() + } + } + + let mut string = String::new(); + + let envs = command.get_envs(); + let has_envs = envs.len() > 0; + if has_envs { + write!(&mut string, "env").unwrap(); + } + for (key, value) in envs { + let key = key.to_str().unwrap(); + let value = value.unwrap_or_default().to_str().unwrap(); + assert!(string_is_not_tricky(key), "{key:?}"); + assert!(string_is_not_tricky(value), "{value:?}"); + let key = handle_space(key); + let value = handle_space(value); + write!(&mut string, " {key}={value}").unwrap(); + } + if has_envs { + write!(&mut string, " ").unwrap(); + } + + let program = command.get_program().to_str().unwrap(); + assert!(string_is_not_tricky(program), "{program:?}"); + let program = handle_space(program); + write!(&mut string, "{program}").unwrap(); + + for arg in command.get_args() { + let arg = arg.to_str().unwrap(); + assert!(string_is_not_tricky(arg), "{arg:?}"); + let arg = handle_space(arg); + write!(&mut string, " {}", arg).unwrap(); + } + + string +} + +/// Run a command while checking status code and providing a better error message. +fn run_command(command: &mut Command) -> Result { + let make_string = |command: &Command| format!("command: {}", command_to_string(command)); + let output = command + .output() + .context("command failed") + .with_context(|| make_string(command))?; + if !output.status.success() { + let stdout = String::from_utf8_lossy(output.stdout.as_slice()); + let stderr = String::from_utf8_lossy(output.stderr.as_slice()); + return Err(anyhow!("command status indicates error") + .context(format!("command: {}", make_string(command))) + .context(format!("stdout: {stdout}")) + .context(format!("stderr: {stderr}"))); + } + Ok(output) +} + +fn install_rustup_target(target: &str) -> Result<()> { + run_command(Command::new("rustup").args(["--quiet", "target", "add", target]))?; + Ok(()) +} + +fn cargo_with_target( + Target { + rust_target: target, + feature, + .. + }: &Target, + subcommand: &str, + rustflags: &[&str], +) -> Command { + let mut flags = String::new(); + write!(&mut flags, "-Ctarget-feature={feature}").unwrap(); + for flag in rustflags { + write!(&mut flags, " {flag}").unwrap(); + } + let target_arg = format!("--target={target}"); + let mut command = Command::new("cargo"); + command + .env("RUSTFLAGS", flags.as_str()) + .args([subcommand, target_arg.as_str()]); + command +} + +fn check(target: &Target, clippy: bool) -> Result<()> { + let command = match clippy { + true => "clippy", + false => "check", + }; + let features = if target.force_default { + "--features=force-default" + } else { + "--features=" + }; + let mut command = cargo_with_target(target, command, &[]); + command.args([ + "--quiet", + "--frozen", + "--package=fast-float-to-integer", + "--all-targets", + features, + ]); + if clippy { + command.args(["--", "-D=warnings"]); + } + run_command(&mut command)?; + Ok(()) +} + +fn show_asm(target: &Target) -> Result<()> { + if !target.generate_assembly { + return Ok(()); + } + + let functions = [ + "f32_to_i8", + "f32_to_u8", + "f32_to_i16", + "f32_to_u16", + "f32_to_i32", + "f32_to_u32", + "f32_to_i64", + "f32_to_u64", + "f32_to_i128", + "f32_to_u128", + "f64_to_i8", + "f64_to_u8", + "f64_to_i16", + "f64_to_u16", + "f64_to_i32", + "f64_to_u32", + "f64_to_i64", + "f64_to_u64", + "f64_to_i128", + "f64_to_u128", + ]; + + let mut features = "--features=show-asm".to_owned(); + if target.force_default { + features.push_str(",force-default"); + } + + for function in functions { + let output = run_command(cargo_with_target(target, "asm", &[]).args([ + // "--quiet", // will be supported in next cargo asm release + "--no-color", + "--simplify", + "--include-constants", + "--package=fast-float-to-integer", + "--lib", + features.as_str(), + "--profile=show-asm", + function, + ]))?; + let output = std::str::from_utf8(output.stdout.as_slice()).unwrap(); + let output = normalize_assembly(output); + + let mut path = PathBuf::new(); + path.push("generated assembly"); + path.push(target.name); + std::fs::create_dir_all(&path).context("create_dir_all")?; + path.push(function); + std::fs::write(&path, output.as_ref()).context("write generated assembly")?; + } + + Ok(()) +} + +/// We diff the generated assembly to make sure it doesn't accidentally change. This requires the assembly to be deterministic. By default, some parts of the assembly like labels are not deterministic. This function fixes that. +fn normalize_assembly(assembly: &str) -> Cow { + const REGEX: &str = r"\.L([[:alnum:]]|_)+"; + static RE: LazyLock = LazyLock::new(|| Regex::new(REGEX).unwrap()); + + let mut matches = RE.find_iter(assembly).peekable(); + if matches.peek().is_none() { + return Cow::Borrowed(assembly); + } + + let mut result = String::new(); + let mut labels = HashMap::<&str, usize>::new(); + let mut previous_match_end = 0usize; + for label in matches { + let mut label_index = labels.len(); + label_index = match labels.entry(label.as_str()) { + Entry::Occupied(entry) => *entry.get(), + Entry::Vacant(entry) => *entry.insert(label_index), + }; + + let range = label.range(); + result.push_str(&assembly[previous_match_end..range.start]); + write!(&mut result, ".L_{label_index}").unwrap(); + previous_match_end = range.end; + } + result.push_str(&assembly[previous_match_end..]); + Cow::Owned(result) +} + +#[test] +fn normalize_assembly_() { + let input = "abcd"; + let expected = "abcd"; + let actual = normalize_assembly(input); + assert_eq!(actual, expected); + + let input = "a .LCPI2_0 b .LCPI3_0 c .LCPI2_0 d"; + let expected = "a .L_0 b .L_1 c .L_0 d"; + let actual = normalize_assembly(input); + assert_eq!(actual, expected); +} + +fn qemu_test(target: &Target) -> Result<()> { + let features = if target.force_default { + "--features=force-default" + } else { + "--features=" + }; + let output = run_command(cargo_with_target(target, "test", &[]).args([ + "--frozen", + "--no-run", + "--package=fast-float-to-integer", + "--test=test", + features, + ]))?; + let stderr = std::str::from_utf8(output.stderr.as_slice()).context("output is not utf8")?; + + let test_binary_path = stderr + .rsplit('\n') + .nth(1) + .context("unexpected output")? + .strip_prefix(" Executable tests/test.rs (") + .context("unexpected output")? + .strip_suffix(')') + .context("unexpected output")?; + + run_command( + Command::new(format!("qemu-{}", target.qemu)).args([test_binary_path, "--test-threads=1"]), + )?; + + Ok(()) +} + +fn expected_target(target: &Target) -> Result<()> { + let features = if target.force_default { + "--features=force-default" + } else { + "--features=" + }; + let output = run_command(cargo_with_target(target, "test", &[]).args([ + "--quiet", + "--package=fast-float-to-integer", + features, + "--lib", + "--", + "--list", + ]))?; + let actual = std::str::from_utf8(output.stdout.as_slice()) + .context("output is not utf8")? + .strip_prefix("target_") + .context("unexpected stdout")? + .strip_suffix(": test\n") + .context("unexpected stdout")?; + if actual != target.expected_target_module { + return Err(anyhow!( + "actual target {} does not match expected target {}", + actual, + target.expected_target_module, + )); + } + Ok(()) +}