diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4241206c0..1451b64a5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -32,13 +32,6 @@ jobs: run: | sudo apt-get update -q sudo apt-get install libopenmpi-dev -y --no-install-recommends - - - name: Install the rust-ptx-linker - run: | - wget https://apt.llvm.org/llvm.sh && chmod +x llvm.sh - sudo ./llvm.sh $(rustc --version -v | grep -oP "LLVM version: \K\d+") - rm llvm.sh - cargo install rust-ptx-linker --git https://github.com/juntyr/rust-ptx-linker --force - name: Check the default features run: cargo check @@ -64,13 +57,6 @@ jobs: run: | sudo apt-get update -q sudo apt-get install libopenmpi-dev -y --no-install-recommends - - - name: Install the rust-ptx-linker - run: | - wget https://apt.llvm.org/llvm.sh && chmod +x llvm.sh - sudo ./llvm.sh $(rustc --version -v | grep -oP "LLVM version: \K\d+") - rm llvm.sh - cargo install rust-ptx-linker --git https://github.com/juntyr/rust-ptx-linker --force - name: Run the test-suite run: cargo test --workspace --no-fail-fast @@ -104,13 +90,6 @@ jobs: sudo apt-get update -q sudo apt-get install libopenmpi-dev -y --no-install-recommends - - name: Install the rust-ptx-linker - run: | - wget https://apt.llvm.org/llvm.sh && chmod +x llvm.sh - sudo ./llvm.sh $(rustc --version -v | grep -oP "LLVM version: \K\d+") - rm llvm.sh - cargo install rust-ptx-linker --git https://github.com/juntyr/rust-ptx-linker --force - - name: Check the code style for the default features run: cargo clippy -- -D warnings diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 101d07515..6bcc5138b 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -31,26 +31,26 @@ jobs: sudo apt-get update -q sudo apt-get install libopenmpi-dev -y --no-install-recommends - - name: Install the Rust toolchain + - name: Generate the coverage data run: | - cargo install grcov --force --locked - rustup component add llvm-tools-preview - - - name: Install the rust-ptx-linker + cargo clean + cargo \ + --config "target.'cfg(all())'.rustflags=['-Cinstrument-coverage']" \ + test --workspace --all-targets + env: + CARGO_INCREMENTAL: 0 + RUSTDOCFLAGS: -Cinstrument-coverage + LLVM_PROFILE_FILE: coverage/coverage-%p-%m.profraw + + - name: Download grcov run: | - wget https://apt.llvm.org/llvm.sh && chmod +x llvm.sh - sudo ./llvm.sh $(rustc --version -v | grep -oP "LLVM version: \K\d+") - rm llvm.sh - cargo install rust-ptx-linker --git https://github.com/juntyr/rust-ptx-linker --force - - - name: Generate the code coverage + curl -sL https://github.com/mozilla/grcov/releases/download/v0.8.18/grcov-x86_64-unknown-linux-gnu.tar.bz2 | tar jxf - + chmod +x ./grcov + + - name: Generate the coverage reports run: | - RUSTFLAGS="-Cinstrument-coverage" \ - LLVM_PROFILE_FILE="codecov-%p-%m.profraw" \ - cargo test --workspace - - grcov . -s . --binary-path ./target/debug/ \ - -t cobertura -o cobertura.xml --branch \ + ./grcov . -s . --binary-path ./target/debug/deps \ + -t lcov -o coverage.lcov --branch \ --keep-only "necsim/*" \ --keep-only "rustcoalescence/*" \ --ignore-not-existing \ @@ -62,4 +62,5 @@ jobs: uses: codecov/codecov-action@v1 with: token: ${{ secrets.CODECOV_TOKEN }} + files: coverage.lcov fail_ci_if_error: true diff --git a/.github/workflows/rustdoc.yml b/.github/workflows/rustdoc.yml index 55a80c515..b03fe3334 100644 --- a/.github/workflows/rustdoc.yml +++ b/.github/workflows/rustdoc.yml @@ -21,13 +21,6 @@ jobs: sudo apt-get update -q sudo apt-get install libopenmpi-dev -y --no-install-recommends - - name: Install the rust-ptx-linker - run: | - wget https://apt.llvm.org/llvm.sh && chmod +x llvm.sh - sudo ./llvm.sh $(rustc --version -v | grep -oP "LLVM version: \K\d+") - rm llvm.sh - cargo install rust-ptx-linker --git https://github.com/juntyr/rust-ptx-linker --force - - name: Build the Documentation run: | RUSTDOCFLAGS="\ diff --git a/.gitpod.Dockerfile b/.gitpod.Dockerfile index 8f03bc5a6..ba1ab8901 100644 --- a/.gitpod.Dockerfile +++ b/.gitpod.Dockerfile @@ -8,15 +8,14 @@ RUN echo "debconf debconf/frontend select Noninteractive" | sudo debconf-set-sel echo "keyboard-configuration keyboard-configuration/layout select 'English (US)'" | sudo debconf-set-selections && \ echo "keyboard-configuration keyboard-configuration/layoutcode select 'us'" | sudo debconf-set-selections && \ echo "resolvconf resolvconf/linkify-resolvconf boolean false" | sudo debconf-set-selections && \ - wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-ubuntu1804.pin && \ - sudo mv cuda-ubuntu1804.pin /etc/apt/preferences.d/cuda-repository-pin-600 && \ - sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub && \ - sudo add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/ /" && \ + wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb -O cuda_keyring.deb && \ + sudo dpkg -i cuda_keyring.deb && \ + rm cuda_keyring.deb && \ + wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-ubuntu2204.pin && \ + sudo mv cuda-ubuntu2204.pin /etc/apt/preferences.d/cuda-repository-pin-600 && \ + sudo add-apt-repository deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/ / && \ sudo apt-get update -q && \ - sudo apt-get install cuda -y --no-install-recommends && \ - wget https://apt.llvm.org/llvm.sh && chmod +x llvm.sh && \ - sudo ./llvm.sh $(rustc --version -v | grep -oP "LLVM version: \K\d+") && \ - rm llvm.sh && \ + sudo apt-get install cuda-12-3 -y --no-install-recommends && \ sudo apt-get clean autoclean && \ sudo apt-get autoremove -y && \ sudo rm -rf /var/lib/{apt,dpkg,cache,log}/ @@ -31,6 +30,5 @@ RUN sudo apt-get update -q && \ sudo apt-get autoremove -y && \ sudo rm -rf /var/lib/{apt,dpkg,cache,log}/ -RUN cargo install rust-ptx-linker --git https://github.com/juntyr/rust-ptx-linker --force && \ - cargo install cargo-mpirun --force && \ +RUN cargo install cargo-mpirun --force && \ cargo install cargo-reaper --git https://github.com/juntyr/grim-reaper --force diff --git a/Cargo.lock b/Cargo.lock index 9cda7c632..aa89df4c6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -175,7 +175,7 @@ dependencies = [ "regex", "rustc-hash", "shlex", - "syn 2.0.64", + "syn 2.0.65", "which", ] @@ -236,7 +236,7 @@ checksum = "4da9a32f3fed317401fa3c862968128267c3106685286e15d5aaa3d7389c2f60" dependencies = [ "proc-macro2", "quote", - "syn 2.0.64", + "syn 2.0.65", ] [[package]] @@ -280,9 +280,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.0.97" +version = "1.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "099a5357d84c4c61eb35fc8eafa9a79a902c2f76911e5747ced4e032edd8d9b4" +checksum = "41c270e7540d725e65ac7f1b212ac8ce349719624d7bcff99f8e2e488e8cf03f" dependencies = [ "jobserver", "libc", @@ -355,7 +355,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.64", + "syn 2.0.65", ] [[package]] @@ -382,9 +382,9 @@ dependencies = [ [[package]] name = "const-type-layout" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0de72aa3d2f0cfa2d220013066afd32a9e716447c74205d9c2c5944b2cac37f3" +checksum = "5daceeb879dcbf74fb11d2aba295197eccecaae7b65e19698a3540d53d7345da" dependencies = [ "const-type-layout-derive", ] @@ -423,9 +423,9 @@ dependencies = [ [[package]] name = "crc32fast" -version = "1.4.0" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa" +checksum = "58ebf8d6963185c7625d2c3c3962d99eb8936637b1427536d21dc36ae402ebad" dependencies = [ "cfg-if", ] @@ -505,7 +505,7 @@ dependencies = [ "proc-macro2", "quote", "strsim 0.11.1", - "syn 2.0.64", + "syn 2.0.65", ] [[package]] @@ -527,7 +527,7 @@ checksum = "733cabb43482b1a1b53eee8583c2b9e8684d592215ea83efd305dd31bc2f0178" dependencies = [ "darling_core 0.20.9", "quote", - "syn 2.0.64", + "syn 2.0.65", ] [[package]] @@ -538,7 +538,7 @@ checksum = "4e018fccbeeb50ff26562ece792ed06659b9c2dae79ece77c4456bb10d9bf79b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.64", + "syn 2.0.65", ] [[package]] @@ -580,7 +580,7 @@ dependencies = [ "darling 0.20.9", "proc-macro2", "quote", - "syn 2.0.64", + "syn 2.0.65", ] [[package]] @@ -600,7 +600,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "206868b8242f27cecce124c19fd88157fbd0dd334df2587f36417bafbc85097b" dependencies = [ "derive_builder_core 0.20.0", - "syn 2.0.64", + "syn 2.0.65", ] [[package]] @@ -611,7 +611,7 @@ checksum = "487585f4d0c6655fe74905e2504d8ad6908e4db67f744eb140876906c2f3175d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.64", + "syn 2.0.65", ] [[package]] @@ -626,6 +626,12 @@ version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + [[package]] name = "erased-serde" version = "0.4.5" @@ -670,6 +676,15 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1912868bad388722991f80323855d922e32b09ad00d76a13a98e465358765079" +[[package]] +name = "find_cuda_helper" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9f9e65c593dd01ac77daad909ea4ad17f0d6d1776193fc8ea766356177abdad" +dependencies = [ + "glob", +] + [[package]] name = "findshlibs" version = "0.10.2" @@ -704,6 +719,19 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ee1b05cbd864bcaecbd3455d6d967862d446e4ebfc3c2e5e5b9841e53cba6673" +[[package]] +name = "generator" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cc16584ff22b460a382b7feec54b23d2908d858152e5739a120b949293bd74e" +dependencies = [ + "cc", + "libc", + "log", + "rustversion", + "windows", +] + [[package]] name = "getrandom" version = "0.2.15" @@ -734,7 +762,7 @@ checksum = "53010ccb100b96a67bc32c0175f0ed1426b31b655d562898e57325f81c023ac0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.64", + "syn 2.0.65", ] [[package]] @@ -811,6 +839,16 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" +[[package]] +name = "indexmap" +version = "2.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" +dependencies = [ + "equivalent", + "hashbrown", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.0" @@ -912,6 +950,29 @@ version = "0.4.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" +[[package]] +name = "loom" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff50ecb28bb86013e935fb6683ab1f6d3a20016f123c76fd4c27470076ac30f5" +dependencies = [ + "cfg-if", + "generator", + "pin-utils", + "scoped-tls", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "matchers" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +dependencies = [ + "regex-automata 0.1.10", +] + [[package]] name = "memchr" version = "2.7.2" @@ -964,7 +1025,7 @@ source = "git+https://github.com/juntyr/rsmpi?rev=2988f56#2988f56e350311acc04119 dependencies = [ "proc-macro2", "quote", - "syn 2.0.64", + "syn 2.0.65", ] [[package]] @@ -1196,6 +1257,16 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "nu-ansi-term" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +dependencies = [ + "overload", + "winapi", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -1211,6 +1282,21 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +[[package]] +name = "oneshot" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f6640c6bda7731b1fdbab747981a0f896dd1fedaf9f4a53fa237a04a84431f4" +dependencies = [ + "loom", +] + +[[package]] +name = "overload" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" + [[package]] name = "pcg_rand" version = "0.13.0" @@ -1224,6 +1310,18 @@ dependencies = [ "serde", ] +[[package]] +name = "pin-project-lite" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + [[package]] name = "pkg-config" version = "0.3.30" @@ -1252,7 +1350,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f12335488a2f3b0a83b14edad48dca9879ce89b2edd10e80237e4e852dd645e" dependencies = [ "proc-macro2", - "syn 2.0.64", + "syn 2.0.65", ] [[package]] @@ -1281,9 +1379,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.82" +version = "1.0.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ad3d49ab951a01fbaafe34f2ec74122942fe18a3f9814c3268f1bb72042131b" +checksum = "0b33eb56c327dec362a9e55b3ad14f9d2f0904fb5a5b03b513ab5465399e9f43" dependencies = [ "unicode-ident", ] @@ -1300,8 +1398,8 @@ dependencies = [ [[package]] name = "ptx-builder" -version = "0.5.3" -source = "git+https://github.com/juntyr/rust-ptx-builder?rev=1f1f49d#1f1f49df761e919f721ef234722ee7b2cfcf9104" +version = "0.6.0" +source = "git+https://github.com/juntyr/rust-ptx-builder?rev=aeb3b68#aeb3b68a85e3a5ee10757b357104e554ed44729f" dependencies = [ "anyhow", "colored", @@ -1360,8 +1458,17 @@ checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" dependencies = [ "aho-corasick", "memchr", - "regex-automata", - "regex-syntax", + "regex-automata 0.4.6", + "regex-syntax 0.8.3", +] + +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +dependencies = [ + "regex-syntax 0.6.29", ] [[package]] @@ -1372,9 +1479,15 @@ checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" dependencies = [ "aho-corasick", "memchr", - "regex-syntax", + "regex-syntax 0.8.3", ] +[[package]] +name = "regex-syntax" +version = "0.6.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" + [[package]] name = "regex-syntax" version = "0.8.3" @@ -1410,24 +1523,39 @@ dependencies = [ [[package]] name = "rust-cuda" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=f395253#f395253bb244827bc46600ef5ee87f687eda249b" +source = "git+https://github.com/juntyr/rust-cuda?rev=697dcf5#697dcf54bf362cd08e28d282fa947823382b49ff" dependencies = [ "const-type-layout", "final", + "oneshot", + "regex", "rust-cuda-derive", - "rust-cuda-ptx-jit", + "rust-cuda-kernel", "rustacuda", "rustacuda_core", "rustacuda_derive", + "safer_owning_ref", ] [[package]] name = "rust-cuda-derive" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=f395253#f395253bb244827bc46600ef5ee87f687eda249b" +source = "git+https://github.com/juntyr/rust-cuda?rev=697dcf5#697dcf54bf362cd08e28d282fa947823382b49ff" +dependencies = [ + "proc-macro-error", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "rust-cuda-kernel" +version = "0.1.0" +source = "git+https://github.com/juntyr/rust-cuda?rev=697dcf5#697dcf54bf362cd08e28d282fa947823382b49ff" dependencies = [ "cargo_metadata", "colored", + "find_cuda_helper", "lazy_static", "proc-macro-error", "proc-macro2", @@ -1438,23 +1566,13 @@ dependencies = [ "serde_json", "strip-ansi-escapes", "syn 1.0.109", -] - -[[package]] -name = "rust-cuda-ptx-jit" -version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=f395253#f395253bb244827bc46600ef5ee87f687eda249b" -dependencies = [ - "lazy_static", - "regex", - "rustacuda", + "thiserror", ] [[package]] name = "rustacuda" version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47208516ab5338b592d63560e90eaef405d0ec880347eaf7742d893b0a31e228" +source = "git+https://github.com/juntyr/RustaCUDA?rev=c6ea7cc#c6ea7ccf24b15c4edbd5576852a8dcdc7df272b0" dependencies = [ "bitflags 1.3.2", "cuda-driver-sys", @@ -1465,14 +1583,12 @@ dependencies = [ [[package]] name = "rustacuda_core" version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3858b08976dc2f860c5efbbb48cdcb0d4fafca92a6ac0898465af16c0dbe848" +source = "git+https://github.com/juntyr/RustaCUDA?rev=c6ea7cc#c6ea7ccf24b15c4edbd5576852a8dcdc7df272b0" [[package]] name = "rustacuda_derive" version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43ce8670a1a1d0fc2514a3b846dacdb65646f9bd494b6674cfacbb4ce430bd7e" +source = "git+https://github.com/juntyr/RustaCUDA?rev=c6ea7cc#c6ea7ccf24b15c4edbd5576852a8dcdc7df272b0" dependencies = [ "proc-macro2", "quote", @@ -1651,12 +1767,33 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rustversion" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6" + [[package]] name = "ryu" version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" +[[package]] +name = "safer_owning_ref" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af21b9de2df966f61c07b5b541c81c98225b86e48ababd43366a642654de30ef" +dependencies = [ + "stable_deref_trait", +] + +[[package]] +name = "scoped-tls" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" + [[package]] name = "seahash" version = "4.1.0" @@ -1689,7 +1826,7 @@ checksum = "6048858004bcff69094cd972ed40a32500f153bd3be9f716b2eed2e8217c4838" dependencies = [ "proc-macro2", "quote", - "syn 2.0.64", + "syn 2.0.65", ] [[package]] @@ -1724,6 +1861,15 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_spanned" +version = "0.6.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79e674e01f999af37c49f70a6ede167a8a60b2503e56c5599532a65baa5969a0" +dependencies = [ + "serde", +] + [[package]] name = "serde_state" version = "0.4.8" @@ -1733,6 +1879,15 @@ dependencies = [ "serde", ] +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + [[package]] name = "shell-words" version = "1.1.0" @@ -1766,6 +1921,12 @@ version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + [[package]] name = "streaming-iterator" version = "0.1.9" @@ -1806,9 +1967,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.64" +version = "2.0.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ad3dee41f36859875573074334c200d1add8e4a87bb37113ebd31d926b7b11f" +checksum = "d2863d96a84c6439701d7a38f9de935ec562c8832cc55d1dde0f513b52fad106" dependencies = [ "proc-macro2", "quote", @@ -1832,7 +1993,17 @@ checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" dependencies = [ "proc-macro2", "quote", - "syn 2.0.64", + "syn 2.0.65", +] + +[[package]] +name = "thread_local" +version = "1.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c" +dependencies = [ + "cfg-if", + "once_cell", ] [[package]] @@ -1857,11 +2028,97 @@ dependencies = [ [[package]] name = "toml" -version = "0.5.11" +version = "0.8.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4e43f8cc456c9704c851ae29c67e17ef65d2c30017c17a9765b89c382dc8bba" +dependencies = [ + "serde", + "serde_spanned", + "toml_datetime", + "toml_edit", +] + +[[package]] +name = "toml_datetime" +version = "0.6.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4badfd56924ae69bcc9039335b2e017639ce3f9b001c393c1b2d1ef846ce2cbf" +dependencies = [ + "serde", +] + +[[package]] +name = "toml_edit" +version = "0.22.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" +checksum = "c127785850e8c20836d49732ae6abfa47616e60bf9d9f57c43c250361a9db96c" dependencies = [ + "indexmap", "serde", + "serde_spanned", + "toml_datetime", + "winnow", +] + +[[package]] +name = "tracing" +version = "0.1.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +dependencies = [ + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.65", +] + +[[package]] +name = "tracing-core" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", ] [[package]] @@ -1908,6 +2165,12 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" +[[package]] +name = "valuable" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" + [[package]] name = "vcpkg" version = "0.2.15" @@ -1967,7 +2230,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.64", + "syn 2.0.65", "wasm-bindgen-shared", ] @@ -1989,7 +2252,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.64", + "syn 2.0.65", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -2040,6 +2303,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" +dependencies = [ + "windows-targets 0.48.5", +] + [[package]] name = "windows-sys" version = "0.48.0" @@ -2179,6 +2451,15 @@ version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" +[[package]] +name = "winnow" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3c52e9c97a68071b23e836c9380edae937f17b9c4667bd021973efc689f618d" +dependencies = [ + "memchr", +] + [[package]] name = "zerocopy" version = "0.7.34" @@ -2196,5 +2477,5 @@ checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.64", + "syn 2.0.65", ] diff --git a/README.md b/README.md index 0698d53eb..927e82462 100644 --- a/README.md +++ b/README.md @@ -41,11 +41,6 @@ First, you need to clone the necsim-rust GitHub repository: ``` necsim-rust is written in the [Rust language](https://www.rust-lang.org/tools/install), which must be installed in your `PATH` first. necsim-rust includes a `rust-toolchain` file that configures Rust to use a working nightly toolchain version and install all components required for compilation. If you want to use necsim-rust on a target different than `x86_64-unknown-linux-gnu`, please update the [rust-toolchain](rust-toolchain) config file accordingly. -If you also want to use the CUDA-based algorithm, it is **required** that you also install the following: -```shell -> cargo install ptx-linker --force --locked -``` - ## Installation To install `rustcoalescence`, you need to decide which algorithms you want to compile with it. You can enable the provided algorithms by enabling their corresponding features. For instance, to compile all CPU-based algorithms with all scenarios, you can use diff --git a/necsim/core/Cargo.toml b/necsim/core/Cargo.toml index f481e5de1..ce503bce7 100644 --- a/necsim/core/Cargo.toml +++ b/necsim/core/Cargo.toml @@ -15,12 +15,12 @@ cuda = ["rust-cuda"] necsim-core-maths = { path = "maths" } necsim-core-bond = { path = "bond" } -const-type-layout = { version = "0.3.0", features = ["derive"] } +const-type-layout = { version = "0.3.1", features = ["derive"] } contracts = "0.6.3" serde = { version = "1.0", default-features = false, features = ["derive"] } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "f395253", features = ["derive"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "697dcf5", features = ["derive"], optional = true } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "f395253", features = ["derive", "host"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "697dcf5", features = ["derive", "host"], optional = true } diff --git a/necsim/core/bond/Cargo.toml b/necsim/core/bond/Cargo.toml index fc004b4da..445bbc9a2 100644 --- a/necsim/core/bond/Cargo.toml +++ b/necsim/core/bond/Cargo.toml @@ -13,5 +13,5 @@ default = [] [dependencies] necsim-core-maths = { path = "../maths" } -const-type-layout = { version = "0.3.0", features = ["derive"] } +const-type-layout = { version = "0.3.1", features = ["derive"] } serde = { version = "1.0", default-features = false, features = ["derive"] } diff --git a/necsim/core/bond/src/closed_open_unit_f64.rs b/necsim/core/bond/src/closed_open_unit_f64.rs index 0d2155c13..e6424106a 100644 --- a/necsim/core/bond/src/closed_open_unit_f64.rs +++ b/necsim/core/bond/src/closed_open_unit_f64.rs @@ -88,6 +88,7 @@ impl ClosedOpenUnitF64 { } impl PartialEq for ClosedOpenUnitF64 { + #[allow(clippy::unconditional_recursion)] fn eq(&self, other: &Self) -> bool { self.0.eq(&other.0) } diff --git a/necsim/core/bond/src/closed_unit_f64.rs b/necsim/core/bond/src/closed_unit_f64.rs index 664c9f20e..d5c0bdc02 100644 --- a/necsim/core/bond/src/closed_unit_f64.rs +++ b/necsim/core/bond/src/closed_unit_f64.rs @@ -122,6 +122,7 @@ impl From for ClosedUnitF64 { } impl PartialEq for ClosedUnitF64 { + #[allow(clippy::unconditional_recursion)] fn eq(&self, other: &Self) -> bool { self.0.eq(&other.0) } diff --git a/necsim/core/bond/src/non_negative_f64.rs b/necsim/core/bond/src/non_negative_f64.rs index cf60da503..89e3ea295 100644 --- a/necsim/core/bond/src/non_negative_f64.rs +++ b/necsim/core/bond/src/non_negative_f64.rs @@ -161,6 +161,7 @@ impl From for NonNegativeF64 { } impl PartialEq for NonNegativeF64 { + #[allow(clippy::unconditional_recursion)] fn eq(&self, other: &Self) -> bool { self.0.eq(&other.0) } diff --git a/necsim/core/bond/src/non_positive_f64.rs b/necsim/core/bond/src/non_positive_f64.rs index 62807c4bf..2e7cce0e8 100644 --- a/necsim/core/bond/src/non_positive_f64.rs +++ b/necsim/core/bond/src/non_positive_f64.rs @@ -94,6 +94,7 @@ impl NonPositiveF64 { } impl PartialEq for NonPositiveF64 { + #[allow(clippy::unconditional_recursion)] fn eq(&self, other: &Self) -> bool { self.0.eq(&other.0) } diff --git a/necsim/core/bond/src/open_closed_unit_f64.rs b/necsim/core/bond/src/open_closed_unit_f64.rs index a82fdfc37..b4b3441dc 100644 --- a/necsim/core/bond/src/open_closed_unit_f64.rs +++ b/necsim/core/bond/src/open_closed_unit_f64.rs @@ -94,6 +94,7 @@ impl OpenClosedUnitF64 { } impl PartialEq for OpenClosedUnitF64 { + #[allow(clippy::unconditional_recursion)] fn eq(&self, other: &Self) -> bool { self.0.eq(&other.0) } diff --git a/necsim/core/bond/src/positive_f64.rs b/necsim/core/bond/src/positive_f64.rs index ac8bf4090..de5766741 100644 --- a/necsim/core/bond/src/positive_f64.rs +++ b/necsim/core/bond/src/positive_f64.rs @@ -127,6 +127,7 @@ impl From for PositiveF64 { } impl PartialEq for PositiveF64 { + #[allow(clippy::unconditional_recursion)] fn eq(&self, other: &Self) -> bool { self.0.eq(&other.0) } diff --git a/necsim/core/src/cogs/coalescence_sampler.rs b/necsim/core/src/cogs/coalescence_sampler.rs index 93af7bc92..f4d0aa4da 100644 --- a/necsim/core/src/cogs/coalescence_sampler.rs +++ b/necsim/core/src/cogs/coalescence_sampler.rs @@ -28,7 +28,6 @@ pub trait CoalescenceSampler, S: LineageStore> ) -> (IndexedLocation, LineageInteraction); } -#[allow(clippy::unsafe_derive_deserialize)] #[derive(Debug, PartialEq, Serialize, Deserialize, TypeLayout)] #[repr(transparent)] pub struct CoalescenceRngSample(ClosedOpenUnitF64); diff --git a/necsim/core/src/event.rs b/necsim/core/src/event.rs index 40108ae85..af42ac633 100644 --- a/necsim/core/src/event.rs +++ b/necsim/core/src/event.rs @@ -55,7 +55,6 @@ pub struct Dispersal { } #[allow(clippy::module_name_repetitions)] -#[allow(clippy::unsafe_derive_deserialize)] #[derive(Debug, Clone, Serialize, Deserialize, TypeLayout)] #[repr(C)] pub struct SpeciationEvent { diff --git a/necsim/core/src/landscape/extent.rs b/necsim/core/src/landscape/extent.rs index 063a8c445..c38e7afc3 100644 --- a/necsim/core/src/landscape/extent.rs +++ b/necsim/core/src/landscape/extent.rs @@ -2,12 +2,14 @@ use necsim_core_bond::OffByOneU32; use super::Location; -#[allow(clippy::module_name_repetitions, clippy::unsafe_derive_deserialize)] +#[allow(clippy::module_name_repetitions)] #[derive(PartialEq, Eq, Clone, Debug, serde::Deserialize, serde::Serialize, TypeLayout)] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] +#[cfg_attr(feature = "cuda", cuda(ignore))] #[serde(rename = "Extent")] #[serde(deny_unknown_fields)] -#[repr(C)] pub struct LandscapeExtent { + #[cfg_attr(feature = "cuda", cuda(embed))] origin: Location, width: OffByOneU32, height: OffByOneU32, diff --git a/necsim/core/src/landscape/location.rs b/necsim/core/src/landscape/location.rs index c3686e5c6..6bcc520a6 100644 --- a/necsim/core/src/landscape/location.rs +++ b/necsim/core/src/landscape/location.rs @@ -2,12 +2,14 @@ use serde::{Deserialize, Serialize}; use crate::cogs::Backup; -#[allow(clippy::unsafe_derive_deserialize)] +#[allow(clippy::module_name_repetitions)] #[derive( Eq, PartialEq, PartialOrd, Ord, Clone, Hash, Debug, Serialize, Deserialize, TypeLayout, )] -#[serde(deny_unknown_fields)] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[repr(C)] +#[cfg_attr(feature = "cuda", cuda(ignore))] +#[serde(deny_unknown_fields)] pub struct Location { x: u32, y: u32, @@ -46,10 +48,13 @@ impl From for Location { #[derive( Eq, PartialEq, PartialOrd, Ord, Clone, Hash, Debug, Serialize, Deserialize, TypeLayout, )] -#[allow(clippy::module_name_repetitions, clippy::unsafe_derive_deserialize)] -#[serde(from = "IndexedLocationRaw", into = "IndexedLocationRaw")] +#[allow(clippy::module_name_repetitions)] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[repr(C)] +#[cfg_attr(feature = "cuda", cuda(ignore))] +#[serde(from = "IndexedLocationRaw", into = "IndexedLocationRaw")] pub struct IndexedLocation { + #[cfg_attr(feature = "cuda", cuda(embed))] location: Location, index: u32, } @@ -74,7 +79,6 @@ impl IndexedLocation { #[derive(Serialize, Deserialize)] #[serde(deny_unknown_fields)] #[serde(rename = "IndexedLocation")] -#[repr(C)] struct IndexedLocationRaw { x: u32, y: u32, diff --git a/necsim/core/src/landscape/mod.rs b/necsim/core/src/landscape/mod.rs index 6c05344ca..41a00b87f 100644 --- a/necsim/core/src/landscape/mod.rs +++ b/necsim/core/src/landscape/mod.rs @@ -1,6 +1,6 @@ mod extent; mod location; -#[allow(clippy::useless_attribute, clippy::module_name_repetitions)] +#[allow(clippy::module_name_repetitions)] pub use extent::{LandscapeExtent, LocationIterator}; pub use location::{IndexedLocation, Location}; diff --git a/necsim/core/src/lib.rs b/necsim/core/src/lib.rs index 86e145fbc..a8da66266 100644 --- a/necsim/core/src/lib.rs +++ b/necsim/core/src/lib.rs @@ -1,7 +1,6 @@ #![deny(clippy::pedantic)] #![no_std] #![feature(const_type_name)] -#![feature(control_flow_enum)] #![feature(min_specialization)] #[doc(hidden)] diff --git a/necsim/core/src/lineage.rs b/necsim/core/src/lineage.rs index 8e20ba0a5..398973fd0 100644 --- a/necsim/core/src/lineage.rs +++ b/necsim/core/src/lineage.rs @@ -16,6 +16,7 @@ use crate::{ }; #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, TypeLayout)] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[repr(transparent)] pub struct GlobalLineageReference(u64); @@ -94,21 +95,29 @@ impl From> for LineageInteraction { } } -#[allow(clippy::unsafe_derive_deserialize)] +#[allow(clippy::module_name_repetitions)] #[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, TypeLayout)] -#[serde(deny_unknown_fields)] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[repr(C)] +#[cfg_attr(feature = "cuda", cuda(ignore))] +#[serde(deny_unknown_fields)] pub struct Lineage { + #[cfg_attr(feature = "cuda", cuda(embed))] + #[cfg_attr(feature = "cuda", cuda(ignore))] #[serde(alias = "id", alias = "ref")] pub global_reference: GlobalLineageReference, + #[cfg_attr(feature = "cuda", cuda(ignore))] #[serde(alias = "time")] pub last_event_time: NonNegativeF64, + #[cfg_attr(feature = "cuda", cuda(embed))] + #[cfg_attr(feature = "cuda", cuda(ignore))] #[serde(alias = "loc")] pub indexed_location: IndexedLocation, } impl Lineage { #[must_use] + #[allow(clippy::no_effect_underscore_binding)] #[debug_ensures( ret.indexed_location == old(indexed_location.clone()), "stores the indexed_location" diff --git a/necsim/core/src/reporter/boolean.rs b/necsim/core/src/reporter/boolean.rs index 372b43db1..686330300 100644 --- a/necsim/core/src/reporter/boolean.rs +++ b/necsim/core/src/reporter/boolean.rs @@ -5,7 +5,7 @@ mod private { impl Sealed for super::False {} } -pub trait Boolean: private::Sealed { +pub trait Boolean: 'static + private::Sealed { const VALUE: bool; } diff --git a/necsim/core/src/reporter/mod.rs b/necsim/core/src/reporter/mod.rs index 821ae269f..a934f58b1 100644 --- a/necsim/core/src/reporter/mod.rs +++ b/necsim/core/src/reporter/mod.rs @@ -12,11 +12,11 @@ use used::MaybeUsed; pub mod boolean; pub mod used; -#[allow(clippy::useless_attribute, clippy::module_name_repetitions)] +#[allow(clippy::module_name_repetitions)] pub use combinator::ReporterCombinator; -#[allow(clippy::useless_attribute, clippy::module_name_repetitions)] +#[allow(clippy::module_name_repetitions)] pub use filter::FilteredReporter; -#[allow(clippy::useless_attribute, clippy::module_name_repetitions)] +#[allow(clippy::module_name_repetitions)] pub use null::NullReporter; pub trait Reporter: core::fmt::Debug { diff --git a/necsim/core/src/simulation/builder.rs b/necsim/core/src/simulation/builder.rs index eda540f91..6156662c2 100644 --- a/necsim/core/src/simulation/builder.rs +++ b/necsim/core/src/simulation/builder.rs @@ -86,7 +86,7 @@ impl< } #[derive(Debug, TypeLayout)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M"))] #[repr(C)] pub struct Simulation< diff --git a/necsim/core/src/simulation/mod.rs b/necsim/core/src/simulation/mod.rs index c5356f1a2..29368e5a6 100644 --- a/necsim/core/src/simulation/mod.rs +++ b/necsim/core/src/simulation/mod.rs @@ -18,7 +18,7 @@ use crate::{ reporter::Reporter, }; -#[allow(clippy::useless_attribute, clippy::module_name_repetitions)] +#[allow(clippy::module_name_repetitions)] pub use builder::{Simulation, SimulationBuilder}; use necsim_core_bond::{NonNegativeF64, PositiveF64}; @@ -51,7 +51,7 @@ impl< #[inline] pub fn simulate_incremental_early_stop< - F: FnMut(&Self, u64, PositiveF64) -> ControlFlow<(), ()>, + F: FnMut(&Self, u64, PositiveF64, &P) -> ControlFlow<(), ()>, P: Reporter, >( &mut self, @@ -69,13 +69,17 @@ impl< .map(|lineage| (lineage.event_time, lineage.tie_breaker)); let self_ptr = self as *const Self; + let reporter_ptr = reporter as *const P; let old_rng = unsafe { self.rng.backup_unchecked() }; let mut early_stop_flow = ControlFlow::Continue(()); let early_peek_stop = |next_event_time| { // Safety: We are only passing in an immutable reference - early_stop_flow = early_stop(unsafe { &*self_ptr }, steps, next_event_time); + early_stop_flow = + early_stop(unsafe { &*self_ptr }, steps, next_event_time, unsafe { + &*reporter_ptr + }); if early_stop_flow.is_break() { return ControlFlow::Break(()); @@ -131,6 +135,6 @@ impl< #[inline] pub fn simulate(mut self, reporter: &mut P) -> (NonNegativeF64, u64) { - self.simulate_incremental_early_stop(|_, _, _| ControlFlow::Continue(()), reporter) + self.simulate_incremental_early_stop(|_, _, _, _| ControlFlow::Continue(()), reporter) } } diff --git a/necsim/impls/cuda/Cargo.toml b/necsim/impls/cuda/Cargo.toml index 5288ebe32..37412bab1 100644 --- a/necsim/impls/cuda/Cargo.toml +++ b/necsim/impls/cuda/Cargo.toml @@ -10,12 +10,12 @@ edition = "2021" [dependencies] necsim-core = { path = "../../core", features = ["cuda"] } -const-type-layout = { version = "0.3.0", features = ["derive"] } +const-type-layout = { version = "0.3.1", features = ["derive"] } contracts = "0.6.3" serde = { version = "1.0", default-features = false, features = ["derive"] } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "f395253", features = ["derive"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "697dcf5", features = ["derive"] } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "f395253", features = ["derive", "host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "697dcf5", features = ["derive", "host"] } diff --git a/necsim/impls/cuda/src/cogs/maths.rs b/necsim/impls/cuda/src/cogs/maths.rs index 11c49ffc1..4b5df0d36 100644 --- a/necsim/impls/cuda/src/cogs/maths.rs +++ b/necsim/impls/cuda/src/cogs/maths.rs @@ -36,11 +36,14 @@ impl MathsCore for NvptxMathsCore { } #[cfg(not(target_os = "cuda"))] { - extern "C" { - fn nvptx_maths_core_ln_on_cpu(_x: f64) -> !; - } + // extern "C" { + // fn nvptx_maths_core_ln_on_cpu(_x: f64) -> !; + // } + + // unsafe { nvptx_maths_core_ln_on_cpu(x) } - unsafe { nvptx_maths_core_ln_on_cpu(x) } + // TODO: disallow using NvptxMathsCore::ln on CPU + unsafe { core::intrinsics::logf64(x) } } } diff --git a/necsim/impls/cuda/src/cogs/rng.rs b/necsim/impls/cuda/src/cogs/rng.rs index bc34a8f0f..8237ed1cf 100644 --- a/necsim/impls/cuda/src/cogs/rng.rs +++ b/necsim/impls/cuda/src/cogs/rng.rs @@ -3,49 +3,48 @@ use core::marker::PhantomData; use necsim_core::cogs::{MathsCore, PrimeableRng, RngCore}; use const_type_layout::TypeGraphLayout; -use rust_cuda::safety::StackOnly; +use rust_cuda::{ + safety::{PortableBitSemantics, StackOnly}, + utils::adapter::RustToCudaWithPortableBitCloneSemantics, +}; use serde::{Deserialize, Deserializer, Serialize, Serializer}; #[allow(clippy::module_name_repetitions)] -#[derive(Debug, rust_cuda::common::LendRustToCuda)] +#[derive(Debug, Clone, rust_cuda::lend::LendRustToCuda)] #[cuda(free = "M", free = "R")] pub struct CudaRng where - R: RngCore + StackOnly + TypeGraphLayout, + R: RngCore + StackOnly + PortableBitSemantics + TypeGraphLayout, { - inner: R, + #[cuda(embed)] + inner: RustToCudaWithPortableBitCloneSemantics, marker: PhantomData, } -impl + StackOnly + TypeGraphLayout> Clone for CudaRng { - fn clone(&self) -> Self { - Self { - inner: self.inner.clone(), - marker: PhantomData::, - } - } -} - -impl + StackOnly + TypeGraphLayout> From for CudaRng { +impl + StackOnly + PortableBitSemantics + TypeGraphLayout> From + for CudaRng +{ #[must_use] #[inline] fn from(rng: R) -> Self { Self { - inner: rng, + inner: rng.into(), marker: PhantomData::, } } } -impl + StackOnly + TypeGraphLayout> RngCore for CudaRng { +impl + StackOnly + PortableBitSemantics + TypeGraphLayout> RngCore + for CudaRng +{ type Seed = >::Seed; #[must_use] #[inline] fn from_seed(seed: Self::Seed) -> Self { Self { - inner: R::from_seed(seed), + inner: R::from_seed(seed).into(), marker: PhantomData::, } } @@ -57,8 +56,8 @@ impl + StackOnly + TypeGraphLayout> RngCore for C } } -impl + StackOnly + TypeGraphLayout> PrimeableRng - for CudaRng +impl + StackOnly + PortableBitSemantics + TypeGraphLayout> + PrimeableRng for CudaRng { #[inline] fn prime_with(&mut self, location_index: u64, time_index: u64) { @@ -66,17 +65,19 @@ impl + StackOnly + TypeGraphLayout> PrimeableRn } } -impl + StackOnly + TypeGraphLayout> Serialize for CudaRng { +impl + StackOnly + PortableBitSemantics + TypeGraphLayout> Serialize + for CudaRng +{ fn serialize(&self, serializer: S) -> Result { self.inner.serialize(serializer) } } -impl<'de, M: MathsCore, R: RngCore + StackOnly + TypeGraphLayout> Deserialize<'de> - for CudaRng +impl<'de, M: MathsCore, R: RngCore + StackOnly + PortableBitSemantics + TypeGraphLayout> + Deserialize<'de> for CudaRng { fn deserialize>(deserializer: D) -> Result { - let inner = R::deserialize(deserializer)?; + let inner = R::deserialize(deserializer)?.into(); Ok(Self { inner, diff --git a/necsim/impls/cuda/src/event_buffer.rs b/necsim/impls/cuda/src/event_buffer.rs index 6fb9f314f..1a08d85ca 100644 --- a/necsim/impls/cuda/src/event_buffer.rs +++ b/necsim/impls/cuda/src/event_buffer.rs @@ -1,13 +1,22 @@ -use core::fmt; +use core::{ + fmt, + ops::{Deref, DerefMut}, +}; +use const_type_layout::TypeGraphLayout; #[cfg(not(target_os = "cuda"))] -use rust_cuda::rustacuda::{ +use rust_cuda::deps::rustacuda::{ error::CudaResult, function::{BlockSize, GridSize}, }; -use rust_cuda::utils::{ - aliasing::SplitSliceOverCudaThreadsDynamicStride, exchange::buffer::CudaExchangeBuffer, +use rust_cuda::{ + lend::RustToCudaProxy, + safety::{PortableBitSemantics, SafeMutableAliasing, StackOnly}, + utils::{ + aliasing::SplitSliceOverCudaThreadsDynamicStride, + exchange::buffer::{CudaExchangeBuffer, CudaExchangeItem}, + }, }; use necsim_core::{ @@ -24,11 +33,16 @@ use necsim_core::impl_report; use super::utils::MaybeSome; #[allow(clippy::module_name_repetitions, clippy::type_complexity)] -#[derive(rust_cuda::common::LendRustToCuda)] +#[derive(rust_cuda::lend::LendRustToCuda)] #[cuda(free = "ReportSpeciation", free = "ReportDispersal")] pub struct EventBuffer { + #[cfg(not(target_os = "cuda"))] #[cuda(embed)] event_mask: SplitSliceOverCudaThreadsDynamicStride>, + #[cfg(target_os = "cuda")] + #[cuda(embed = "SplitSliceOverCudaThreadsDynamicStride>")] + event_mask: CudaExchangeSlice>, + #[cfg(not(target_os = "cuda"))] #[cuda(embed)] event_buffer: SplitSliceOverCudaThreadsDynamicStride< CudaExchangeBuffer< @@ -37,14 +51,49 @@ pub struct EventBuffer { true, >, >, - max_events: usize, - event_counter: usize, + #[cfg(target_os = "cuda")] + #[cuda(embed = "SplitSliceOverCudaThreadsDynamicStride< + CudaExchangeBuffer< + MaybeSome< as EventType>::Event>, + false, + true, + >, +>")] + event_buffer: CudaExchangeSlice< + CudaExchangeItem< + MaybeSome< as EventType>::Event>, + false, + true, + >, + >, +} + +// Safety: +// - no mutable aliasing occurs since all parts implement SafeMutableAliasing +// - dropping does not trigger (de)alloc since EventBuffer doesn't impl Drop and +// all parts implement SafeMutableAliasing +// - EventBuffer has no shallow mutable state +unsafe impl SafeMutableAliasing + for EventBuffer +where + SplitSliceOverCudaThreadsDynamicStride>: + SafeMutableAliasing, + SplitSliceOverCudaThreadsDynamicStride< + CudaExchangeBuffer< + MaybeSome< as EventType>::Event>, + false, + true, + >, + >: SafeMutableAliasing, +{ } pub trait EventType { type Event: 'static - + rust_cuda::const_type_layout::TypeGraphLayout + + Sync + + rust_cuda::deps::const_type_layout::TypeGraphLayout + rust_cuda::safety::StackOnly + + rust_cuda::safety::PortableBitSemantics + Into + Into + Clone; @@ -76,10 +125,7 @@ impl fmt::Debug for EventBuffer { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { - fmt.debug_struct("EventBuffer") - .field("max_events", &self.max_events) - .field("event_counter", &self.event_counter) - .finish_non_exhaustive() + fmt.debug_struct("EventBuffer").finish_non_exhaustive() } } @@ -120,8 +166,6 @@ impl CudaExchangeBuffer::from_vec(event_buffer)?, max_events, ), - max_events, - event_counter: 0_usize, }) } @@ -146,9 +190,31 @@ impl mask.write(false); } } +} + +#[cfg(target_os = "cuda")] +impl + EventBuffer +{ + #[must_use] + pub fn can_buffer_next_event(&self) -> bool { + !self.event_buffer.is_empty() + } - pub fn max_events_per_individual(&self) -> usize { - self.max_events + fn report_event( + &mut self, + event: impl Into< as EventType>::Event>, + ) { + if let ([mask, mask_rest @ ..], [buffer, buffer_rest @ ..]) = ( + core::mem::take(&mut *self.event_mask), + core::mem::take(&mut *self.event_buffer), + ) { + mask.write(true); + buffer.write(MaybeSome::Some(event.into())); + + *self.event_mask = mask_rest; + *self.event_buffer = buffer_rest; + } } } @@ -167,19 +233,11 @@ impl Reporter impl Reporter for EventBuffer { impl_report!( #[debug_requires( - self.event_counter < self.max_events, + self.can_buffer_next_event(), "does not report extraneous dispersal events" )] dispersal(&mut self, event: Used) { - if let Some(mask) = self.event_mask.get_mut(self.event_counter) { - mask.write(true); - - unsafe { - self.event_buffer.get_unchecked_mut(self.event_counter) - }.write(MaybeSome::Some(event.clone().into())); - } - - self.event_counter += 1; + self.report_event(event.clone()); } ); } @@ -188,19 +246,14 @@ impl Reporter for EventBuffer { impl Reporter for EventBuffer { impl_report!( #[debug_requires( - self.event_counter == 0, + self.can_buffer_next_event(), "does not report extraneous speciation events" )] speciation(&mut self, event: Used) { - if let Some(mask) = self.event_mask.get_mut(0) { - mask.write(true); + self.report_event(event.clone()); - unsafe { - self.event_buffer.get_unchecked_mut(0) - }.write(MaybeSome::Some(event.clone())); - } - - self.event_counter = self.max_events; + *self.event_mask = &mut []; + *self.event_buffer = &mut []; } ); } @@ -209,37 +262,75 @@ impl Reporter for EventBuffer { impl Reporter for EventBuffer { impl_report!( #[debug_requires( - self.event_counter < self.max_events, + self.can_buffer_next_event(), "does not report extraneous speciation events" )] speciation(&mut self, event: Used) { - if let Some(mask) = self.event_mask.get_mut(self.event_counter) { - mask.write(true); - - unsafe { - self.event_buffer.get_unchecked_mut(self.event_counter) - }.write(MaybeSome::Some(event.clone().into())); - } + self.report_event(event.clone()); - self.event_counter = self.max_events; + *self.event_mask = &mut []; + *self.event_buffer = &mut []; } ); impl_report!( #[debug_requires( - self.event_counter < self.max_events, + self.can_buffer_next_event(), "does not report extraneous dispersal events" )] dispersal(&mut self, event: Used) { - if let Some(mask) = self.event_mask.get_mut(self.event_counter) { - mask.write(true); - - unsafe { - self.event_buffer.get_unchecked_mut(self.event_counter) - }.write(MaybeSome::Some(event.clone().into())); - } - - self.event_counter += 1; + self.report_event(event.clone()); } ); } + +// TODO: find a prettier workaround +struct CudaExchangeSlice( + &'static mut [T], +); + +impl Deref + for CudaExchangeSlice +{ + type Target = &'static mut [T]; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut + for CudaExchangeSlice +{ + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +impl< + T: 'static + StackOnly + PortableBitSemantics + TypeGraphLayout, + const M2D: bool, + const M2H: bool, + > RustToCudaProxy>> + for SplitSliceOverCudaThreadsDynamicStride> +{ + fn from_ref(_val: &CudaExchangeSlice>) -> &Self { + unsafe { unreachable_cuda_event_buffer_hack() } + } + + fn from_mut(_val: &mut CudaExchangeSlice>) -> &mut Self { + unsafe { unreachable_cuda_event_buffer_hack() } + } + + fn into(mut self) -> CudaExchangeSlice> { + let slice: &mut [CudaExchangeItem] = &mut self; + + let slice = unsafe { core::slice::from_raw_parts_mut(slice.as_mut_ptr(), slice.len()) }; + + CudaExchangeSlice(slice) + } +} + +extern "C" { + fn unreachable_cuda_event_buffer_hack() -> !; +} diff --git a/necsim/impls/cuda/src/utils.rs b/necsim/impls/cuda/src/utils.rs index 8ff8033a5..39c1c8285 100644 --- a/necsim/impls/cuda/src/utils.rs +++ b/necsim/impls/cuda/src/utils.rs @@ -3,7 +3,7 @@ use core::mem::MaybeUninit; use rust_cuda::safety::StackOnly; #[derive(TypeLayout)] -#[repr(C)] +#[repr(transparent)] #[doc(hidden)] pub struct MaybeSome(MaybeUninit); diff --git a/necsim/impls/cuda/src/value_buffer.rs b/necsim/impls/cuda/src/value_buffer.rs index 04d844f6f..b1dc71f1a 100644 --- a/necsim/impls/cuda/src/value_buffer.rs +++ b/necsim/impls/cuda/src/value_buffer.rs @@ -3,7 +3,7 @@ use core::iter::Iterator; use const_type_layout::TypeGraphLayout; use rust_cuda::{ - safety::StackOnly, + safety::{PortableBitSemantics, SafeMutableAliasing, StackOnly}, utils::{ aliasing::SplitSliceOverCudaThreadsConstStride, exchange::buffer::{CudaExchangeBuffer, CudaExchangeItem}, @@ -11,19 +11,19 @@ use rust_cuda::{ }; #[cfg(not(target_os = "cuda"))] -use rust_cuda::rustacuda::{ +use rust_cuda::deps::rustacuda::{ error::CudaResult, function::{BlockSize, GridSize}, }; use super::utils::MaybeSome; -#[derive(rust_cuda::common::LendRustToCuda)] +#[derive(rust_cuda::lend::LendRustToCuda)] #[cuda(free = "T")] #[allow(clippy::module_name_repetitions)] pub struct ValueBuffer where - T: StackOnly + TypeGraphLayout, + T: StackOnly + PortableBitSemantics + TypeGraphLayout, { #[cuda(embed)] mask: SplitSliceOverCudaThreadsConstStride, 1_usize>, @@ -32,8 +32,25 @@ where SplitSliceOverCudaThreadsConstStride, M2D, M2H>, 1_usize>, } +// Safety: +// - no mutable aliasing occurs since all parts implement SafeMutableAliasing +// - dropping does not trigger (de)alloc since ValueBuffer doesn't impl Drop and +// all parts implement SafeMutableAliasing +// - ValueBuffer has no shallow mutable state +unsafe impl + SafeMutableAliasing for ValueBuffer +where + SplitSliceOverCudaThreadsConstStride, 1_usize>: + SafeMutableAliasing, + SplitSliceOverCudaThreadsConstStride, M2D, M2H>, 1_usize>: + SafeMutableAliasing, +{ +} + #[cfg(not(target_os = "cuda"))] -impl ValueBuffer { +impl + ValueBuffer +{ /// # Errors /// Returns a `rustacuda::errors::CudaError` iff an error occurs inside CUDA pub fn new(block_size: &BlockSize, grid_size: &GridSize) -> CudaResult { @@ -67,7 +84,9 @@ impl ValueBuff } #[cfg(not(target_os = "cuda"))] -impl ValueBuffer { +impl + ValueBuffer +{ pub fn iter(&self) -> impl Iterator> { self.mask .iter() @@ -90,7 +109,7 @@ impl ValueBuffer } #[cfg(target_os = "cuda")] -impl ValueBuffer { +impl ValueBuffer { pub fn with_value_for_core) -> Option>(&mut self, inner: F) { let value = if self .mask @@ -117,7 +136,9 @@ impl ValueBuffer { } #[cfg(target_os = "cuda")] -impl ValueBuffer { +impl + ValueBuffer +{ pub fn take_value_for_core(&mut self) -> Option { #[allow(clippy::option_if_let_else)] if let Some(mask) = self.mask.get_mut(0) { @@ -135,7 +156,9 @@ impl ValueBuffer } #[cfg(target_os = "cuda")] -impl ValueBuffer { +impl + ValueBuffer +{ pub fn put_value_for_core(&mut self, value: Option) { if let Some(mask) = self.mask.get_mut(0) { mask.write(value.is_some()); @@ -148,13 +171,15 @@ impl ValueBuffer } #[cfg(not(target_os = "cuda"))] -pub struct ValueRefMut<'v, T: StackOnly, const M2D: bool> { +pub struct ValueRefMut<'v, T: StackOnly + PortableBitSemantics + TypeGraphLayout, const M2D: bool> { mask: &'v mut CudaExchangeItem, value: &'v mut CudaExchangeItem, M2D, true>, } #[cfg(not(target_os = "cuda"))] -impl<'v, T: StackOnly, const M2D: bool> ValueRefMut<'v, T, M2D> { +impl<'v, T: StackOnly + PortableBitSemantics + TypeGraphLayout, const M2D: bool> + ValueRefMut<'v, T, M2D> +{ pub fn take(&mut self) -> Option { if *self.mask.read() { self.mask.write(false); @@ -176,7 +201,7 @@ impl<'v, T: StackOnly, const M2D: bool> ValueRefMut<'v, T, M2D> { } #[cfg(not(target_os = "cuda"))] -impl<'v, T: StackOnly> ValueRefMut<'v, T, true> { +impl<'v, T: StackOnly + PortableBitSemantics + TypeGraphLayout> ValueRefMut<'v, T, true> { #[must_use] pub fn as_mut(&mut self) -> Option<&mut T> { if *self.mask.read() { diff --git a/necsim/impls/no-std/Cargo.toml b/necsim/impls/no-std/Cargo.toml index d85745eb1..b6a6b6866 100644 --- a/necsim/impls/no-std/Cargo.toml +++ b/necsim/impls/no-std/Cargo.toml @@ -17,7 +17,7 @@ necsim-core-maths = { path = "../../core/maths" } necsim-core-bond = { path = "../../core/bond" } necsim-partitioning-core = { path = "../../partitioning/core" } -const-type-layout = { version = "0.3.0", features = ["derive"] } +const-type-layout = { version = "0.3.1", features = ["derive"] } contracts = "0.6.3" libm = "0.2" hashbrown = "0.14" @@ -30,7 +30,7 @@ fnv = { version = "1.0", default-features = false, features = [] } rand_core = "0.6" [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "f395253", features = ["derive"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "697dcf5", features = ["derive", "final"], optional = true } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "f395253", features = ["derive", "host"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "697dcf5", features = ["derive", "final", "host"], optional = true } diff --git a/necsim/impls/no-std/src/alias/mod.rs b/necsim/impls/no-std/src/alias/mod.rs index 641478464..558416936 100644 --- a/necsim/impls/no-std/src/alias/mod.rs +++ b/necsim/impls/no-std/src/alias/mod.rs @@ -1,3 +1,5 @@ +use core::cmp::Ordering; + use alloc::vec::Vec; use necsim_core::cogs::{MathsCore, RngCore}; @@ -62,11 +64,10 @@ impl AliasMethodSampler { }; Ks[underfull_index] = Es[overfull_index]; - #[allow(clippy::comparison_chain)] - if Us[overfull_index] < 1.0_f64 { - underfull_indices.push(overfull_index); - } else if Us[overfull_index] > 1.0_f64 { - overfull_indices.push(overfull_index); + match Us[overfull_index].cmp(&NonNegativeF64::one()) { + Ordering::Less => underfull_indices.push(overfull_index), + Ordering::Equal => (), + Ordering::Greater => overfull_indices.push(overfull_index), } } diff --git a/necsim/impls/no-std/src/array2d.rs b/necsim/impls/no-std/src/array2d.rs index 14fe7fc83..dd4552ebe 100644 --- a/necsim/impls/no-std/src/array2d.rs +++ b/necsim/impls/no-std/src/array2d.rs @@ -10,12 +10,12 @@ use core::ops::{Index, IndexMut}; /// A fixed sized two-dimensional array. #[derive(Clone, Eq, PartialEq)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr( feature = "cuda", cuda( free = "T", - bound = "T: rust_cuda::safety::StackOnly + const_type_layout::TypeGraphLayout" + bound = "T: rust_cuda::safety::PortableBitSemantics + const_type_layout::TypeGraphLayout" ) )] pub struct Array2D { diff --git a/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/const.rs b/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/const.rs index b69bc20c0..598721483 100644 --- a/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/const.rs +++ b/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/const.rs @@ -8,7 +8,7 @@ use super::EventTimeSampler; #[allow(clippy::module_name_repetitions)] #[derive(Clone, Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] pub struct ConstEventTimeSampler { event_time: PositiveF64, } diff --git a/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/exp.rs b/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/exp.rs index 8b6bdc9c4..9e7b1207e 100644 --- a/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/exp.rs +++ b/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/exp.rs @@ -11,7 +11,7 @@ const INV_PHI: u64 = 0x9e37_79b9_7f4a_7c15_u64; #[allow(clippy::module_name_repetitions)] #[derive(Clone, Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] pub struct ExpEventTimeSampler { delta_t: PositiveF64, } diff --git a/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/fixed.rs b/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/fixed.rs index 5685d57fe..c6ac3227d 100644 --- a/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/fixed.rs +++ b/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/fixed.rs @@ -8,7 +8,7 @@ use super::EventTimeSampler; #[allow(clippy::module_name_repetitions)] #[derive(Clone, Debug, Default)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] pub struct FixedEventTimeSampler([u8; 0]); #[contract_trait] diff --git a/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/geometric.rs b/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/geometric.rs index be31a8a60..476685396 100644 --- a/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/geometric.rs +++ b/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/geometric.rs @@ -8,7 +8,7 @@ use super::EventTimeSampler; #[allow(clippy::module_name_repetitions)] #[derive(Clone, Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] pub struct GeometricEventTimeSampler { delta_t: PositiveF64, } diff --git a/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/poisson.rs b/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/poisson.rs index fcd1355ab..db7a42683 100644 --- a/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/poisson.rs +++ b/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/poisson.rs @@ -11,7 +11,7 @@ const INV_PHI: u64 = 0x9e37_79b9_7f4a_7c15_u64; #[allow(clippy::module_name_repetitions)] #[derive(Clone, Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] pub struct PoissonEventTimeSampler { delta_t: PositiveF64, } diff --git a/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/mod.rs b/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/mod.rs index 1aafbee33..eb5243a48 100644 --- a/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/mod.rs +++ b/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/mod.rs @@ -25,7 +25,7 @@ use event_time_sampler::EventTimeSampler; #[allow(clippy::module_name_repetitions)] #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M"))] pub struct IndependentActiveLineageSampler< M: MathsCore, @@ -37,10 +37,7 @@ pub struct IndependentActiveLineageSampler< N: SpeciationProbability, J: EventTimeSampler, > { - #[cfg_attr( - feature = "cuda", - cuda(embed = "Option>") - )] + #[cfg_attr(feature = "cuda", cuda(embed))] active_lineage: Option, min_event_time: NonNegativeF64, last_event_time: NonNegativeF64, diff --git a/necsim/impls/no-std/src/cogs/coalescence_sampler/independent.rs b/necsim/impls/no-std/src/cogs/coalescence_sampler/independent.rs index 0e9a16f6a..f15e3f672 100644 --- a/necsim/impls/no-std/src/cogs/coalescence_sampler/independent.rs +++ b/necsim/impls/no-std/src/cogs/coalescence_sampler/independent.rs @@ -15,7 +15,7 @@ use crate::cogs::lineage_store::{ #[allow(clippy::module_name_repetitions)] #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M", free = "H"))] pub struct IndependentCoalescenceSampler>(PhantomData<(M, H)>); diff --git a/necsim/impls/no-std/src/cogs/dispersal_sampler/almost_infinite_clark2dt.rs b/necsim/impls/no-std/src/cogs/dispersal_sampler/almost_infinite_clark2dt.rs index decb649ce..b75075a5e 100644 --- a/necsim/impls/no-std/src/cogs/dispersal_sampler/almost_infinite_clark2dt.rs +++ b/necsim/impls/no-std/src/cogs/dispersal_sampler/almost_infinite_clark2dt.rs @@ -10,7 +10,7 @@ use crate::cogs::habitat::almost_infinite::AlmostInfiniteHabitat; #[allow(clippy::module_name_repetitions)] #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M", free = "G"))] pub struct AlmostInfiniteClark2DtDispersalSampler> { shape_u: PositiveF64, diff --git a/necsim/impls/no-std/src/cogs/dispersal_sampler/almost_infinite_normal.rs b/necsim/impls/no-std/src/cogs/dispersal_sampler/almost_infinite_normal.rs index 5dc7b7cd7..36a731790 100644 --- a/necsim/impls/no-std/src/cogs/dispersal_sampler/almost_infinite_normal.rs +++ b/necsim/impls/no-std/src/cogs/dispersal_sampler/almost_infinite_normal.rs @@ -10,7 +10,7 @@ use crate::cogs::habitat::almost_infinite::AlmostInfiniteHabitat; #[allow(clippy::module_name_repetitions)] #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M", free = "G"))] pub struct AlmostInfiniteNormalDispersalSampler> { sigma: NonNegativeF64, diff --git a/necsim/impls/no-std/src/cogs/dispersal_sampler/in_memory/packed_alias/mod.rs b/necsim/impls/no-std/src/cogs/dispersal_sampler/in_memory/packed_alias/mod.rs index f162c0199..f364b2f50 100644 --- a/necsim/impls/no-std/src/cogs/dispersal_sampler/in_memory/packed_alias/mod.rs +++ b/necsim/impls/no-std/src/cogs/dispersal_sampler/in_memory/packed_alias/mod.rs @@ -40,7 +40,7 @@ impl From for Range { } #[allow(clippy::module_name_repetitions)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M", free = "H", free = "G"))] pub struct InMemoryPackedAliasDispersalSampler, G: RngCore> { #[cfg_attr(feature = "cuda", cuda(embed))] diff --git a/necsim/impls/no-std/src/cogs/dispersal_sampler/non_spatial.rs b/necsim/impls/no-std/src/cogs/dispersal_sampler/non_spatial.rs index 4b5d21861..f1186b1ec 100644 --- a/necsim/impls/no-std/src/cogs/dispersal_sampler/non_spatial.rs +++ b/necsim/impls/no-std/src/cogs/dispersal_sampler/non_spatial.rs @@ -10,7 +10,7 @@ use crate::cogs::habitat::non_spatial::NonSpatialHabitat; #[allow(clippy::module_name_repetitions)] #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M", free = "G"))] pub struct NonSpatialDispersalSampler> { marker: PhantomData<(M, G)>, diff --git a/necsim/impls/no-std/src/cogs/dispersal_sampler/spatially_implicit.rs b/necsim/impls/no-std/src/cogs/dispersal_sampler/spatially_implicit.rs index 9e8d2cd21..fbd5d0dc0 100644 --- a/necsim/impls/no-std/src/cogs/dispersal_sampler/spatially_implicit.rs +++ b/necsim/impls/no-std/src/cogs/dispersal_sampler/spatially_implicit.rs @@ -11,7 +11,7 @@ use crate::cogs::{ #[allow(clippy::module_name_repetitions)] #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M"))] pub struct SpatiallyImplicitDispersalSampler> { #[cfg_attr(feature = "cuda", cuda(embed))] diff --git a/necsim/impls/no-std/src/cogs/dispersal_sampler/trespassing/mod.rs b/necsim/impls/no-std/src/cogs/dispersal_sampler/trespassing/mod.rs index 996dc2684..1ad63b0b7 100644 --- a/necsim/impls/no-std/src/cogs/dispersal_sampler/trespassing/mod.rs +++ b/necsim/impls/no-std/src/cogs/dispersal_sampler/trespassing/mod.rs @@ -28,7 +28,7 @@ pub trait AntiTrespassingDispersalSampler, G: RngCor #[allow(clippy::module_name_repetitions)] #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M"))] pub struct TrespassingDispersalSampler< M: MathsCore, diff --git a/necsim/impls/no-std/src/cogs/dispersal_sampler/trespassing/uniform.rs b/necsim/impls/no-std/src/cogs/dispersal_sampler/trespassing/uniform.rs index 22e3216d2..26bef8225 100644 --- a/necsim/impls/no-std/src/cogs/dispersal_sampler/trespassing/uniform.rs +++ b/necsim/impls/no-std/src/cogs/dispersal_sampler/trespassing/uniform.rs @@ -9,7 +9,7 @@ use super::AntiTrespassingDispersalSampler; #[allow(clippy::module_name_repetitions)] #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M", free = "H", free = "G"))] pub struct UniformAntiTrespassingDispersalSampler< M: MathsCore, diff --git a/necsim/impls/no-std/src/cogs/dispersal_sampler/wrapping_noise.rs b/necsim/impls/no-std/src/cogs/dispersal_sampler/wrapping_noise.rs index 5f38306db..6f3075bf4 100644 --- a/necsim/impls/no-std/src/cogs/dispersal_sampler/wrapping_noise.rs +++ b/necsim/impls/no-std/src/cogs/dispersal_sampler/wrapping_noise.rs @@ -14,7 +14,7 @@ use crate::cogs::{ #[allow(clippy::module_name_repetitions)] #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M"))] pub struct WrappingNoiseApproximateNormalDispersalSampler> { #[cfg_attr(feature = "cuda", cuda(embed))] diff --git a/necsim/impls/no-std/src/cogs/emigration_exit/never.rs b/necsim/impls/no-std/src/cogs/emigration_exit/never.rs index 74a68fdda..62e5320a5 100644 --- a/necsim/impls/no-std/src/cogs/emigration_exit/never.rs +++ b/necsim/impls/no-std/src/cogs/emigration_exit/never.rs @@ -8,7 +8,7 @@ use necsim_core_bond::{NonNegativeF64, PositiveF64}; #[allow(clippy::module_name_repetitions)] #[derive(Debug, Default)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] pub struct NeverEmigrationExit([u8; 0]); #[contract_trait] diff --git a/necsim/impls/no-std/src/cogs/event_sampler/independent.rs b/necsim/impls/no-std/src/cogs/event_sampler/independent.rs index baeb01622..17ac313d0 100644 --- a/necsim/impls/no-std/src/cogs/event_sampler/independent.rs +++ b/necsim/impls/no-std/src/cogs/event_sampler/independent.rs @@ -21,7 +21,7 @@ use super::tracking::{MinSpeciationTrackingEventSampler, SpeciationSample}; #[allow(clippy::module_name_repetitions)] #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr( feature = "cuda", cuda( @@ -43,12 +43,7 @@ pub struct IndependentEventSampler< T: TurnoverRate, N: SpeciationProbability, > { - #[cfg_attr( - feature = "cuda", - cuda( - embed = "Option>" - ) - )] + #[cfg_attr(feature = "cuda", cuda(embed))] min_spec_sample: Option, marker: PhantomData<(M, H, G, X, D, T, N)>, } diff --git a/necsim/impls/no-std/src/cogs/event_sampler/tracking.rs b/necsim/impls/no-std/src/cogs/event_sampler/tracking.rs index 8b5c1cccd..6804e70ea 100644 --- a/necsim/impls/no-std/src/cogs/event_sampler/tracking.rs +++ b/necsim/impls/no-std/src/cogs/event_sampler/tracking.rs @@ -27,10 +27,12 @@ pub trait MinSpeciationTrackingEventSampler< } #[derive(Clone, Debug, TypeLayout)] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[repr(C)] pub struct SpeciationSample { speciation_sample: ClosedOpenUnitF64, sample_time: PositiveF64, + #[cfg_attr(feature = "cuda", cuda(embed))] sample_location: IndexedLocation, } diff --git a/necsim/impls/no-std/src/cogs/habitat/almost_infinite.rs b/necsim/impls/no-std/src/cogs/habitat/almost_infinite.rs index 62b06c356..b974b42ac 100644 --- a/necsim/impls/no-std/src/cogs/habitat/almost_infinite.rs +++ b/necsim/impls/no-std/src/cogs/habitat/almost_infinite.rs @@ -12,7 +12,7 @@ const ALMOST_INFINITE_EXTENT: LandscapeExtent = LandscapeExtent::new(Location::new(0, 0), OffByOneU32::max(), OffByOneU32::max()); #[allow(clippy::module_name_repetitions)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M"))] pub struct AlmostInfiniteHabitat { marker: PhantomData, diff --git a/necsim/impls/no-std/src/cogs/habitat/in_memory.rs b/necsim/impls/no-std/src/cogs/habitat/in_memory.rs index ea6b4d314..a0464df4a 100644 --- a/necsim/impls/no-std/src/cogs/habitat/in_memory.rs +++ b/necsim/impls/no-std/src/cogs/habitat/in_memory.rs @@ -14,13 +14,14 @@ use crate::array2d::Array2D; #[allow(clippy::module_name_repetitions)] #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M"))] pub struct InMemoryHabitat { #[cfg_attr(feature = "cuda", cuda(embed))] habitat: Final>, #[cfg_attr(feature = "cuda", cuda(embed))] u64_injection: Final>, + #[cfg_attr(feature = "cuda", cuda(embed))] extent: LandscapeExtent, marker: PhantomData, } diff --git a/necsim/impls/no-std/src/cogs/habitat/non_spatial.rs b/necsim/impls/no-std/src/cogs/habitat/non_spatial.rs index 5da4667c1..947243289 100644 --- a/necsim/impls/no-std/src/cogs/habitat/non_spatial.rs +++ b/necsim/impls/no-std/src/cogs/habitat/non_spatial.rs @@ -11,9 +11,10 @@ use necsim_core_bond::{OffByOneU32, OffByOneU64}; #[allow(clippy::module_name_repetitions)] #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M"))] pub struct NonSpatialHabitat { + #[cfg_attr(feature = "cuda", cuda(embed))] extent: LandscapeExtent, deme: NonZeroU32, marker: PhantomData, diff --git a/necsim/impls/no-std/src/cogs/habitat/spatially_implicit.rs b/necsim/impls/no-std/src/cogs/habitat/spatially_implicit.rs index 7b13925bc..d2a434daa 100644 --- a/necsim/impls/no-std/src/cogs/habitat/spatially_implicit.rs +++ b/necsim/impls/no-std/src/cogs/habitat/spatially_implicit.rs @@ -13,7 +13,7 @@ const SPATIALLY_IMPLICIT_EXTENT: LandscapeExtent = #[allow(clippy::module_name_repetitions)] #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M"))] pub struct SpatiallyImplicitHabitat { #[cfg_attr(feature = "cuda", cuda(embed))] diff --git a/necsim/impls/no-std/src/cogs/habitat/wrapping_noise/mod.rs b/necsim/impls/no-std/src/cogs/habitat/wrapping_noise/mod.rs index e6482e557..892c02f11 100644 --- a/necsim/impls/no-std/src/cogs/habitat/wrapping_noise/mod.rs +++ b/necsim/impls/no-std/src/cogs/habitat/wrapping_noise/mod.rs @@ -18,7 +18,7 @@ use crate::cogs::{ }; #[allow(clippy::module_name_repetitions)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M"))] pub struct WrappingNoiseHabitat { #[cfg_attr(feature = "cuda", cuda(embed))] diff --git a/necsim/impls/no-std/src/cogs/immigration_entry/never.rs b/necsim/impls/no-std/src/cogs/immigration_entry/never.rs index fc148b60e..9c4df3ac8 100644 --- a/necsim/impls/no-std/src/cogs/immigration_entry/never.rs +++ b/necsim/impls/no-std/src/cogs/immigration_entry/never.rs @@ -5,7 +5,7 @@ use necsim_core::{ #[allow(clippy::module_name_repetitions)] #[derive(Debug, Default)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] pub struct NeverImmigrationEntry([u8; 0]); #[contract_trait] diff --git a/necsim/impls/no-std/src/cogs/lineage_store/independent.rs b/necsim/impls/no-std/src/cogs/lineage_store/independent.rs index d20b0dbd1..606be853e 100644 --- a/necsim/impls/no-std/src/cogs/lineage_store/independent.rs +++ b/necsim/impls/no-std/src/cogs/lineage_store/independent.rs @@ -7,7 +7,7 @@ use necsim_core::{ #[allow(clippy::module_name_repetitions)] #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M", free = "H"))] pub struct IndependentLineageStore> { marker: PhantomData<(M, H)>, diff --git a/necsim/impls/no-std/src/cogs/maths/intrinsics.rs b/necsim/impls/no-std/src/cogs/maths/intrinsics.rs index 7375c9fc8..46801aac8 100644 --- a/necsim/impls/no-std/src/cogs/maths/intrinsics.rs +++ b/necsim/impls/no-std/src/cogs/maths/intrinsics.rs @@ -1,4 +1,2 @@ -#![allow(clippy::useless_attribute)] - #[allow(clippy::module_name_repetitions)] pub use necsim_core_maths::IntrinsicsMathsCore; diff --git a/necsim/impls/no-std/src/cogs/rng/seahash.rs b/necsim/impls/no-std/src/cogs/rng/seahash.rs index 93cc87ecd..bbfc0df7b 100644 --- a/necsim/impls/no-std/src/cogs/rng/seahash.rs +++ b/necsim/impls/no-std/src/cogs/rng/seahash.rs @@ -4,7 +4,7 @@ use necsim_core::cogs::{Backup, MathsCore, PrimeableRng, RngCore}; use serde::{Deserialize, Serialize}; -#[allow(clippy::module_name_repetitions, clippy::unsafe_derive_deserialize)] +#[allow(clippy::module_name_repetitions)] #[derive(Clone, Debug, Serialize, Deserialize, TypeLayout)] #[serde(deny_unknown_fields)] #[layout(free = "M")] diff --git a/necsim/impls/no-std/src/cogs/rng/wyhash.rs b/necsim/impls/no-std/src/cogs/rng/wyhash.rs index c4fdeed68..dfa2d4d3e 100644 --- a/necsim/impls/no-std/src/cogs/rng/wyhash.rs +++ b/necsim/impls/no-std/src/cogs/rng/wyhash.rs @@ -11,7 +11,7 @@ const P1: u64 = 0xe703_7ed1_a0b4_28db; const P2: u64 = 0x8ebc_6af0_9c88_c6e3; const P5: u64 = 0xeb44_acca_b455_d165; -#[allow(clippy::module_name_repetitions, clippy::unsafe_derive_deserialize)] +#[allow(clippy::module_name_repetitions)] #[derive(Clone, Debug, Serialize, Deserialize, TypeLayout)] #[layout(free = "M")] #[serde(deny_unknown_fields)] diff --git a/necsim/impls/no-std/src/cogs/speciation_probability/spatially_implicit.rs b/necsim/impls/no-std/src/cogs/speciation_probability/spatially_implicit.rs index d50e77707..a542e24b1 100644 --- a/necsim/impls/no-std/src/cogs/speciation_probability/spatially_implicit.rs +++ b/necsim/impls/no-std/src/cogs/speciation_probability/spatially_implicit.rs @@ -7,7 +7,7 @@ use necsim_core_bond::{ClosedUnitF64, OpenClosedUnitF64 as PositiveUnitF64}; use crate::cogs::habitat::spatially_implicit::SpatiallyImplicitHabitat; #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[allow(clippy::module_name_repetitions)] pub struct SpatiallyImplicitSpeciationProbability { meta_speciation_probability: PositiveUnitF64, diff --git a/necsim/impls/no-std/src/cogs/speciation_probability/uniform.rs b/necsim/impls/no-std/src/cogs/speciation_probability/uniform.rs index dd8d2dfae..82ceeeba7 100644 --- a/necsim/impls/no-std/src/cogs/speciation_probability/uniform.rs +++ b/necsim/impls/no-std/src/cogs/speciation_probability/uniform.rs @@ -5,7 +5,7 @@ use necsim_core::{ use necsim_core_bond::ClosedUnitF64; #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[allow(clippy::module_name_repetitions)] pub struct UniformSpeciationProbability { speciation_probability: ClosedUnitF64, diff --git a/necsim/impls/no-std/src/cogs/turnover_rate/in_memory.rs b/necsim/impls/no-std/src/cogs/turnover_rate/in_memory.rs index dc884bc0d..6f6f52b20 100644 --- a/necsim/impls/no-std/src/cogs/turnover_rate/in_memory.rs +++ b/necsim/impls/no-std/src/cogs/turnover_rate/in_memory.rs @@ -14,7 +14,7 @@ use crate::{array2d::Array2D, cogs::habitat::in_memory::InMemoryHabitat}; #[allow(clippy::module_name_repetitions)] #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] pub struct InMemoryTurnoverRate { #[cfg_attr(feature = "cuda", cuda(embed))] turnover_rate: Final>, diff --git a/necsim/impls/no-std/src/cogs/turnover_rate/uniform.rs b/necsim/impls/no-std/src/cogs/turnover_rate/uniform.rs index 99411a19a..5255625bf 100644 --- a/necsim/impls/no-std/src/cogs/turnover_rate/uniform.rs +++ b/necsim/impls/no-std/src/cogs/turnover_rate/uniform.rs @@ -5,7 +5,7 @@ use necsim_core::{ use necsim_core_bond::{NonNegativeF64, PositiveF64}; #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[allow(clippy::module_name_repetitions)] pub struct UniformTurnoverRate { turnover_rate: PositiveF64, diff --git a/necsim/impls/no-std/src/parallelisation/independent/individuals.rs b/necsim/impls/no-std/src/parallelisation/independent/individuals.rs index 93fbe37f0..c6355adf4 100644 --- a/necsim/impls/no-std/src/parallelisation/independent/individuals.rs +++ b/necsim/impls/no-std/src/parallelisation/independent/individuals.rs @@ -125,7 +125,7 @@ pub fn simulate< // detected at the next shared duplicate event let (new_time, new_steps) = simulation.simulate_incremental_early_stop( - |_, steps, _| { + |_, steps, _, _| { if steps >= step_slice.get() { ControlFlow::Break(()) } else { diff --git a/necsim/impls/no-std/src/parallelisation/independent/landscape.rs b/necsim/impls/no-std/src/parallelisation/independent/landscape.rs index 75c83085d..0177ec941 100644 --- a/necsim/impls/no-std/src/parallelisation/independent/landscape.rs +++ b/necsim/impls/no-std/src/parallelisation/independent/landscape.rs @@ -137,7 +137,7 @@ pub fn simulate< // detected at the next shared duplicate event let (new_time, new_steps) = simulation.simulate_incremental_early_stop( - |_, steps, _| { + |_, steps, _, _| { if steps >= step_slice.get() { ControlFlow::Break(()) } else { diff --git a/necsim/impls/no-std/src/parallelisation/independent/monolithic/mod.rs b/necsim/impls/no-std/src/parallelisation/independent/monolithic/mod.rs index b3ac9a64c..faecd44c9 100644 --- a/necsim/impls/no-std/src/parallelisation/independent/monolithic/mod.rs +++ b/necsim/impls/no-std/src/parallelisation/independent/monolithic/mod.rs @@ -226,7 +226,7 @@ pub fn simulate< previous_next_event_time = None; let (new_time, new_steps) = simulation.simulate_incremental_early_stop( - |_, steps, next_event_time| { + |_, steps, next_event_time, _| { previous_next_event_time = Some(next_event_time); if steps >= step_slice.get() || next_event_time >= level_time { diff --git a/necsim/impls/no-std/src/parallelisation/monolithic/averaging.rs b/necsim/impls/no-std/src/parallelisation/monolithic/averaging.rs index 187ee4038..143b61156 100644 --- a/necsim/impls/no-std/src/parallelisation/monolithic/averaging.rs +++ b/necsim/impls/no-std/src/parallelisation/monolithic/averaging.rs @@ -78,7 +78,7 @@ pub fn simulate< let next_safe_time = global_safe_time + independent_time_slice; let (_, new_steps) = simulation.simulate_incremental_early_stop( - |_, _, next_event_time| { + |_, _, next_event_time, _| { if next_event_time >= next_safe_time { ControlFlow::Break(()) } else { diff --git a/necsim/impls/no-std/src/parallelisation/monolithic/lockstep.rs b/necsim/impls/no-std/src/parallelisation/monolithic/lockstep.rs index 174c2c358..2b29631aa 100644 --- a/necsim/impls/no-std/src/parallelisation/monolithic/lockstep.rs +++ b/necsim/impls/no-std/src/parallelisation/monolithic/lockstep.rs @@ -77,7 +77,7 @@ pub fn simulate< // Simulate for zero-steps (immediate early stop) without side effects // to peek the next local event time simulation.simulate_incremental_early_stop( - |_, _, next_event_time| { + |_, _, next_event_time, _| { next_local_time = Some(next_event_time); ControlFlow::Break(()) @@ -102,7 +102,7 @@ pub fn simulate< // The partition with the next event gets to simulate just the next step if let Ok(next_global_time) = local_partition.reduce_vote_min_time(next_local_time) { let (_, new_steps) = simulation.simulate_incremental_early_stop( - |_, _, next_event_time| { + |_, _, next_event_time, _| { if next_event_time > next_global_time { ControlFlow::Break(()) } else { diff --git a/necsim/impls/no-std/src/parallelisation/monolithic/monolithic.rs b/necsim/impls/no-std/src/parallelisation/monolithic/monolithic.rs index 895344836..246e582df 100644 --- a/necsim/impls/no-std/src/parallelisation/monolithic/monolithic.rs +++ b/necsim/impls/no-std/src/parallelisation/monolithic/monolithic.rs @@ -69,7 +69,7 @@ pub fn simulate< // ically later time let (time, steps) = simulation.simulate_incremental_early_stop( - |_, _, next_event_time| { + |_, _, next_event_time, _| { pause_before.map_or(ControlFlow::Continue(()), |pause_before| { if next_event_time >= pause_before { ControlFlow::Break(()) diff --git a/necsim/impls/no-std/src/parallelisation/monolithic/optimistic.rs b/necsim/impls/no-std/src/parallelisation/monolithic/optimistic.rs index b12afb7d7..349c74164 100644 --- a/necsim/impls/no-std/src/parallelisation/monolithic/optimistic.rs +++ b/necsim/impls/no-std/src/parallelisation/monolithic/optimistic.rs @@ -109,7 +109,7 @@ pub fn simulate< // e.g. (1->2)|(2->3)|(3->1) => (1->2)|(3->1) let (_, new_steps) = simulation.simulate_incremental_early_stop( - |_, _, next_event_time| { + |_, _, next_event_time, _| { if next_event_time >= next_safe_time { ControlFlow::Break(()) } else { diff --git a/necsim/impls/no-std/src/parallelisation/monolithic/optimistic_lockstep.rs b/necsim/impls/no-std/src/parallelisation/monolithic/optimistic_lockstep.rs index 767e5cbb9..def28db5e 100644 --- a/necsim/impls/no-std/src/parallelisation/monolithic/optimistic_lockstep.rs +++ b/necsim/impls/no-std/src/parallelisation/monolithic/optimistic_lockstep.rs @@ -78,7 +78,7 @@ pub fn simulate< // (we already know at least one partition has some next event time) let next_local_emigration_time = { let (_, new_steps) = simulation.simulate_incremental_early_stop( - |simulation, _, _| { + |simulation, _, _, _| { if simulation.emigration_exit().is_empty() { ControlFlow::Continue(()) } else { @@ -115,7 +115,7 @@ pub fn simulate< // that event Ok(next_global_time) => { let (_, new_steps) = simulation.simulate_incremental_early_stop( - |_, _, next_event_time| { + |_, _, next_event_time, _| { if next_event_time > next_global_time { ControlFlow::Break(()) } else { @@ -139,7 +139,7 @@ pub fn simulate< // All other partitions get to simulate until just before this next migration event Err(next_global_time) => { let (_, new_steps) = simulation.simulate_incremental_early_stop( - |_, _, next_event_time| { + |_, _, next_event_time, _| { if next_event_time >= next_global_time { ControlFlow::Break(()) } else { diff --git a/necsim/impls/std/src/event_log/replay/sorted_segments.rs b/necsim/impls/std/src/event_log/replay/sorted_segments.rs index 2c209cd95..57c18b6e9 100644 --- a/necsim/impls/std/src/event_log/replay/sorted_segments.rs +++ b/necsim/impls/std/src/event_log/replay/sorted_segments.rs @@ -101,6 +101,7 @@ impl PartialOrd for SortedSortedSegments { } impl PartialEq for SortedSortedSegments { + #[allow(clippy::unconditional_recursion)] fn eq(&self, other: &Self) -> bool { self.next.eq(&other.next) } diff --git a/necsim/partitioning/mpi/src/partition/mod.rs b/necsim/partitioning/mpi/src/partition/mod.rs index 90055f711..d05940d3d 100644 --- a/necsim/partitioning/mpi/src/partition/mod.rs +++ b/necsim/partitioning/mpi/src/partition/mod.rs @@ -13,9 +13,9 @@ mod parallel; mod root; mod utils; -#[allow(clippy::useless_attribute, clippy::module_name_repetitions)] +#[allow(clippy::module_name_repetitions)] pub use parallel::MpiParallelPartition; -#[allow(clippy::useless_attribute, clippy::module_name_repetitions)] +#[allow(clippy::module_name_repetitions)] pub use root::MpiRootPartition; #[allow(clippy::module_name_repetitions)] diff --git a/necsim/plugins/core/src/import/combinator.rs b/necsim/plugins/core/src/import/combinator.rs index d948c5e3e..a99fb5784 100644 --- a/necsim/plugins/core/src/import/combinator.rs +++ b/necsim/plugins/core/src/import/combinator.rs @@ -3,7 +3,6 @@ use std::{ iter::{FromIterator, IntoIterator}, marker::PhantomData, path::Path, - rc::Rc, }; use serde::{Deserialize, Deserializer, Serialize, Serializer}; @@ -68,7 +67,8 @@ impl>(); let result = inner(self); diff --git a/rust-toolchain b/rust-toolchain index 73b9f40a2..218c6dd39 100644 --- a/rust-toolchain +++ b/rust-toolchain @@ -1,5 +1,5 @@ [toolchain] # Pin to final 1.79.0 nightly channel = "nightly-2024-04-28" -components = [ "cargo", "rustfmt", "clippy", "rust-src" ] +components = [ "cargo", "rustfmt", "clippy", "rust-src", "llvm-bitcode-linker", "llvm-tools" ] targets = [ "x86_64-unknown-linux-gnu", "nvptx64-nvidia-cuda" ] diff --git a/rustcoalescence/algorithms/cuda/Cargo.toml b/rustcoalescence/algorithms/cuda/Cargo.toml index be7135a5c..1eeeba998 100644 --- a/rustcoalescence/algorithms/cuda/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/Cargo.toml @@ -32,4 +32,4 @@ thiserror = "1.0" serde = { version = "1.0", features = ["derive"] } serde_state = "0.4" serde_derive_state = "0.4" -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "f395253", features = ["host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "697dcf5", features = ["host"] } diff --git a/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml b/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml index bab5e740e..e5735deed 100644 --- a/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml @@ -23,4 +23,4 @@ necsim-impls-no-std = { path = "../../../../necsim/impls/no-std", features = ["c necsim-impls-cuda = { path = "../../../../necsim/impls/cuda" } rustcoalescence-algorithms-cuda-gpu-kernel = { path = "../gpu-kernel" } -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "f395253", features = ["host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "697dcf5", features = ["host"] } diff --git a/rustcoalescence/algorithms/cuda/cpu-kernel/src/lib.rs b/rustcoalescence/algorithms/cuda/cpu-kernel/src/lib.rs index 5c908339e..8f206ab43 100644 --- a/rustcoalescence/algorithms/cuda/cpu-kernel/src/lib.rs +++ b/rustcoalescence/algorithms/cuda/cpu-kernel/src/lib.rs @@ -1,6 +1,5 @@ #![deny(clippy::pedantic)] -#![allow(incomplete_features)] -#![feature(specialization)] +#![allow(long_running_const_eval)] #![recursion_limit = "1024"] use necsim_core::{ @@ -16,145 +15,29 @@ use necsim_impls_no_std::cogs::{ event_sampler::tracking::MinSpeciationTrackingEventSampler, }; -use rust_cuda::{ - common::RustToCuda, - host::{CudaDropWrapper, LaunchConfig, LaunchPackage, Launcher, TypedKernel}, - rustacuda::{ - error::CudaResult, - function::{BlockSize, Function, GridSize}, - stream::Stream, - }, -}; - -use rustcoalescence_algorithms_cuda_gpu_kernel::SimulatableKernel; +use rust_cuda::lend::RustToCuda; mod link; mod patch; -pub type KernelCompilationCallback = dyn FnMut(&Function) -> CudaResult<()>; - -#[allow(clippy::module_name_repetitions)] -pub struct SimulationKernel< - M: MathsCore, - H: Habitat + RustToCuda, - G: PrimeableRng + RustToCuda, - S: LineageStore + RustToCuda, - X: EmigrationExit + RustToCuda, - D: DispersalSampler + RustToCuda, - C: CoalescenceSampler + RustToCuda, - T: TurnoverRate + RustToCuda, - N: SpeciationProbability + RustToCuda, - E: MinSpeciationTrackingEventSampler + RustToCuda, - I: ImmigrationEntry + RustToCuda, - A: SingularActiveLineageSampler + RustToCuda, +#[allow(clippy::type_complexity)] +pub struct SimulationKernelPtx< + M: MathsCore + Sync, + H: Habitat + RustToCuda + Sync, + G: PrimeableRng + RustToCuda + Sync, + S: LineageStore + RustToCuda + Sync, + X: EmigrationExit + RustToCuda + Sync, + D: DispersalSampler + RustToCuda + Sync, + C: CoalescenceSampler + RustToCuda + Sync, + T: TurnoverRate + RustToCuda + Sync, + N: SpeciationProbability + RustToCuda + Sync, + E: MinSpeciationTrackingEventSampler + RustToCuda + Sync, + I: ImmigrationEntry + RustToCuda + Sync, + A: SingularActiveLineageSampler + RustToCuda + Sync, ReportSpeciation: Boolean, ReportDispersal: Boolean, -> { - #[allow(clippy::type_complexity)] - kernel: TypedKernel< - dyn SimulatableKernel< - M, - H, - G, - S, - X, - D, - C, - T, - N, - E, - I, - A, - ReportSpeciation, - ReportDispersal, - >, - >, - stream: CudaDropWrapper, - grid: GridSize, - block: BlockSize, - ptx_jit: bool, - watcher: Box, -} - -impl< - M: MathsCore, - H: Habitat + RustToCuda, - G: PrimeableRng + RustToCuda, - S: LineageStore + RustToCuda, - X: EmigrationExit + RustToCuda, - D: DispersalSampler + RustToCuda, - C: CoalescenceSampler + RustToCuda, - T: TurnoverRate + RustToCuda, - N: SpeciationProbability + RustToCuda, - E: MinSpeciationTrackingEventSampler + RustToCuda, - I: ImmigrationEntry + RustToCuda, - A: SingularActiveLineageSampler + RustToCuda, - ReportSpeciation: Boolean, - ReportDispersal: Boolean, - > SimulationKernel -{ - /// # Errors - /// - /// Returns a `CudaError` if loading the CUDA kernel failed. - pub fn try_new( - stream: Stream, - grid: GridSize, - block: BlockSize, - ptx_jit: bool, - on_compile: Box, - ) -> CudaResult - where - Self: SimulatableKernel< - M, - H, - G, - S, - X, - D, - C, - T, - N, - E, - I, - A, - ReportSpeciation, - ReportDispersal, - >, - { - let stream = CudaDropWrapper::from(stream); - let kernel = Self::new_kernel()?; - - Ok(Self { - kernel, - stream, - grid, - block, - ptx_jit, - watcher: on_compile, - }) - } -} - -impl< - M: MathsCore, - H: Habitat + RustToCuda, - G: PrimeableRng + RustToCuda, - S: LineageStore + RustToCuda, - X: EmigrationExit + RustToCuda, - D: DispersalSampler + RustToCuda, - C: CoalescenceSampler + RustToCuda, - T: TurnoverRate + RustToCuda, - N: SpeciationProbability + RustToCuda, - E: MinSpeciationTrackingEventSampler + RustToCuda, - I: ImmigrationEntry + RustToCuda, - A: SingularActiveLineageSampler + RustToCuda, - ReportSpeciation: Boolean, - ReportDispersal: Boolean, - > Launcher - for SimulationKernel -{ - type CompilationWatcher = Box; - type KernelTraitObject = dyn SimulatableKernel< +>( + std::marker::PhantomData<( M, H, G, @@ -169,25 +52,5 @@ impl< A, ReportSpeciation, ReportDispersal, - >; - - fn get_launch_package(&mut self) -> LaunchPackage { - LaunchPackage { - config: LaunchConfig { - grid: self.grid.clone(), - block: self.block.clone(), - shared_memory_size: 0_u32, - ptx_jit: self.ptx_jit, - }, - - kernel: &mut self.kernel, - stream: &mut self.stream, - - watcher: &mut self.watcher, - } - } - - fn on_compile(kernel: &Function, watcher: &mut Self::CompilationWatcher) -> CudaResult<()> { - (watcher)(kernel) - } -} + )>, +); diff --git a/rustcoalescence/algorithms/cuda/cpu-kernel/src/link.rs b/rustcoalescence/algorithms/cuda/cpu-kernel/src/link.rs index ee77a5d11..c4a99ac6f 100644 --- a/rustcoalescence/algorithms/cuda/cpu-kernel/src/link.rs +++ b/rustcoalescence/algorithms/cuda/cpu-kernel/src/link.rs @@ -11,34 +11,26 @@ use necsim_impls_no_std::cogs::{ event_sampler::tracking::MinSpeciationTrackingEventSampler, }; -use rust_cuda::{ - common::RustToCuda, - host::{LaunchConfig, LaunchPackage, Launcher}, - rustacuda::{error::CudaResult, function::Function}, -}; - -#[allow(unused_imports)] -use rustcoalescence_algorithms_cuda_gpu_kernel::{SimulatableKernel, SimulationKernelArgs}; +use rust_cuda::lend::RustToCuda; -#[repr(transparent)] -pub struct SimulationKernel< - M: MathsCore, - H: Habitat + RustToCuda, - G: PrimeableRng + RustToCuda, - S: LineageStore + RustToCuda, - X: EmigrationExit + RustToCuda, - D: DispersalSampler + RustToCuda, - C: CoalescenceSampler + RustToCuda, - T: TurnoverRate + RustToCuda, - N: SpeciationProbability + RustToCuda, - E: MinSpeciationTrackingEventSampler + RustToCuda, - I: ImmigrationEntry + RustToCuda, - A: SingularActiveLineageSampler + RustToCuda, +#[allow(clippy::type_complexity)] +pub struct SimulationKernelPtx< + M: MathsCore + Sync, + H: Habitat + RustToCuda + Sync, + G: PrimeableRng + RustToCuda + Sync, + S: LineageStore + RustToCuda + Sync, + X: EmigrationExit + RustToCuda + Sync, + D: DispersalSampler + RustToCuda + Sync, + C: CoalescenceSampler + RustToCuda + Sync, + T: TurnoverRate + RustToCuda + Sync, + N: SpeciationProbability + RustToCuda + Sync, + E: MinSpeciationTrackingEventSampler + RustToCuda + Sync, + I: ImmigrationEntry + RustToCuda + Sync, + A: SingularActiveLineageSampler + RustToCuda + Sync, ReportSpeciation: Boolean, ReportDispersal: Boolean, >( - #[allow(clippy::type_complexity)] - pub(crate) crate::SimulationKernel< + std::marker::PhantomData<( M, H, G, @@ -53,66 +45,9 @@ pub struct SimulationKernel< A, ReportSpeciation, ReportDispersal, - >, + )>, ); -impl< - M: MathsCore, - H: Habitat + RustToCuda, - G: PrimeableRng + RustToCuda, - S: LineageStore + RustToCuda, - X: EmigrationExit + RustToCuda, - D: DispersalSampler + RustToCuda, - C: CoalescenceSampler + RustToCuda, - T: TurnoverRate + RustToCuda, - N: SpeciationProbability + RustToCuda, - E: MinSpeciationTrackingEventSampler + RustToCuda, - I: ImmigrationEntry + RustToCuda, - A: SingularActiveLineageSampler + RustToCuda, - ReportSpeciation: Boolean, - ReportDispersal: Boolean, - > Launcher - for SimulationKernel -{ - type CompilationWatcher = Box; - type KernelTraitObject = dyn SimulatableKernel< - M, - H, - G, - S, - X, - D, - C, - T, - N, - E, - I, - A, - ReportSpeciation, - ReportDispersal, - >; - - fn get_launch_package(&mut self) -> LaunchPackage { - LaunchPackage { - config: LaunchConfig { - grid: self.0.grid.clone(), - block: self.0.block.clone(), - shared_memory_size: 0_u32, - ptx_jit: self.0.ptx_jit, - }, - - kernel: &mut self.0.kernel, - stream: &mut self.0.stream, - - watcher: &mut self.0.watcher, - } - } - - fn on_compile(kernel: &Function, watcher: &mut Self::CompilationWatcher) -> CudaResult<()> { - (watcher)(kernel) - } -} - #[allow(unused_macros)] macro_rules! link_kernel { ($habitat:ty, $dispersal:ty, $turnover:ty, $speciation:ty) => { @@ -141,7 +76,7 @@ macro_rules! link_kernel { $habitat:ty, $dispersal:ty, $turnover:ty, $speciation:ty, $report_speciation:ty, $report_dispersal:ty ) => { - rustcoalescence_algorithms_cuda_gpu_kernel::link_kernel!( + rustcoalescence_algorithms_cuda_gpu_kernel::link! { impl simulate< necsim_impls_cuda::cogs::maths::NvptxMathsCore, $habitat, necsim_impls_cuda::cogs::rng::CudaRng< @@ -194,9 +129,9 @@ macro_rules! link_kernel { >, $report_speciation, $report_dispersal, - ); + > for SimulationKernelPtx } - rustcoalescence_algorithms_cuda_gpu_kernel::link_kernel!( + rustcoalescence_algorithms_cuda_gpu_kernel::link! { impl simulate< necsim_impls_cuda::cogs::maths::NvptxMathsCore, $habitat, necsim_impls_cuda::cogs::rng::CudaRng< @@ -309,7 +244,7 @@ macro_rules! link_kernel { >, $report_speciation, $report_dispersal, - ); + > for SimulationKernelPtx } }; } diff --git a/rustcoalescence/algorithms/cuda/cpu-kernel/src/patch.rs b/rustcoalescence/algorithms/cuda/cpu-kernel/src/patch.rs index e9bb646b6..04404ad9b 100644 --- a/rustcoalescence/algorithms/cuda/cpu-kernel/src/patch.rs +++ b/rustcoalescence/algorithms/cuda/cpu-kernel/src/patch.rs @@ -1,239 +1,61 @@ -use std::sync::atomic::AtomicU64; +use std::ffi::CStr; use necsim_core::{ cogs::{ CoalescenceSampler, DispersalSampler, EmigrationExit, Habitat, ImmigrationEntry, LineageStore, MathsCore, PrimeableRng, SpeciationProbability, TurnoverRate, }, - lineage::Lineage, reporter::boolean::{Boolean, False, True}, - simulation::Simulation, }; -use necsim_core_bond::{NonNegativeF64, PositiveF64}; -use necsim_impls_cuda::{event_buffer::EventBuffer, value_buffer::ValueBuffer}; use necsim_impls_no_std::cogs::{ active_lineage_sampler::singular::SingularActiveLineageSampler, - event_sampler::tracking::{MinSpeciationTrackingEventSampler, SpeciationSample}, + event_sampler::tracking::MinSpeciationTrackingEventSampler, }; -use rust_cuda::{ - common::{DeviceAccessible, RustToCuda}, - host::{HostAndDeviceConstRef, HostAndDeviceMutRef, TypedKernel}, - rustacuda::error::CudaResult, - utils::device_copy::SafeDeviceCopyWrapper, -}; +use rust_cuda::{kernel::CompiledKernelPtx, lend::RustToCuda}; + +use rustcoalescence_algorithms_cuda_gpu_kernel::simulate; -use rustcoalescence_algorithms_cuda_gpu_kernel::SimulatableKernel; +use crate::SimulationKernelPtx; -use crate::SimulationKernel; +// If `Kernel` is implemented for `ReportSpeciation` x `ReportDispersal`, i.e. +// for {`False`, `True`} x {`False`, `True`} then it is implemented for all +// `Boolean`s. However, Rust does not recognise that `Boolean` is closed over +// {`False`, `True`}. This explicit impl provides the necessary coersion. -#[allow(clippy::missing_transmute_annotations, clippy::too_many_lines)] unsafe impl< - M: MathsCore, - H: Habitat + RustToCuda, - G: PrimeableRng + RustToCuda, - S: LineageStore + RustToCuda, - X: EmigrationExit + RustToCuda, - D: DispersalSampler + RustToCuda, - C: CoalescenceSampler + RustToCuda, - T: TurnoverRate + RustToCuda, - N: SpeciationProbability + RustToCuda, - E: MinSpeciationTrackingEventSampler + RustToCuda, - I: ImmigrationEntry + RustToCuda, - A: SingularActiveLineageSampler + RustToCuda, + M: MathsCore + Sync, + H: Habitat + RustToCuda + Sync, + G: PrimeableRng + RustToCuda + Sync, + S: LineageStore + RustToCuda + Sync, + X: EmigrationExit + RustToCuda + Sync, + D: DispersalSampler + RustToCuda + Sync, + C: CoalescenceSampler + RustToCuda + Sync, + T: TurnoverRate + RustToCuda + Sync, + N: SpeciationProbability + RustToCuda + Sync, + E: MinSpeciationTrackingEventSampler + RustToCuda + Sync, + I: ImmigrationEntry + RustToCuda + Sync, + A: SingularActiveLineageSampler + RustToCuda + Sync, ReportSpeciation: Boolean, ReportDispersal: Boolean, - > SimulatableKernel - for SimulationKernel + > + CompiledKernelPtx< + simulate, + > for SimulationKernelPtx where - crate::link::SimulationKernel: - SimulatableKernel, - crate::link::SimulationKernel: - SimulatableKernel, - crate::link::SimulationKernel: - SimulatableKernel, - crate::link::SimulationKernel: - SimulatableKernel, + crate::link::SimulationKernelPtx: + CompiledKernelPtx>, + crate::link::SimulationKernelPtx: + CompiledKernelPtx>, + crate::link::SimulationKernelPtx: + CompiledKernelPtx>, + crate::link::SimulationKernelPtx: + CompiledKernelPtx>, { - fn get_ptx_str() -> &'static str { - match (ReportSpeciation::VALUE, ReportDispersal::VALUE) { - (false, false) => crate::link::SimulationKernel::< - M, - H, - G, - S, - X, - D, - C, - T, - N, - E, - I, - A, - False, - False, - >::get_ptx_str(), - (false, true) => crate::link::SimulationKernel::< - M, - H, - G, - S, - X, - D, - C, - T, - N, - E, - I, - A, - False, - True, - >::get_ptx_str(), - (true, false) => crate::link::SimulationKernel::< - M, - H, - G, - S, - X, - D, - C, - T, - N, - E, - I, - A, - True, - False, - >::get_ptx_str(), - (true, true) => crate::link::SimulationKernel::< - M, - H, - G, - S, - X, - D, - C, - T, - N, - E, - I, - A, - True, - True, - >::get_ptx_str(), - } - } - - fn new_kernel() -> CudaResult< - TypedKernel< - dyn SimulatableKernel< - M, - H, - G, - S, - X, - D, - C, - T, - N, - E, - I, - A, - ReportSpeciation, - ReportDispersal, - >, - >, - > { - match (ReportSpeciation::VALUE, ReportDispersal::VALUE) { - (false, false) => unsafe { - std::mem::transmute(crate::link::SimulationKernel::< - M, - H, - G, - S, - X, - D, - C, - T, - N, - E, - I, - A, - False, - False, - >::new_kernel()) - }, - (false, true) => unsafe { - std::mem::transmute(crate::link::SimulationKernel::< - M, - H, - G, - S, - X, - D, - C, - T, - N, - E, - I, - A, - False, - True, - >::new_kernel()) - }, - (true, false) => unsafe { - std::mem::transmute(crate::link::SimulationKernel::< - M, - H, - G, - S, - X, - D, - C, - T, - N, - E, - I, - A, - True, - False, - >::new_kernel()) - }, - (true, true) => unsafe { - std::mem::transmute(crate::link::SimulationKernel::< - M, - H, - G, - S, - X, - D, - C, - T, - N, - E, - I, - A, - True, - True, - >::new_kernel()) - }, - } - } - - fn simulate( - &mut self, - simulation: &mut Simulation, - task_list: &mut ValueBuffer, - event_buffer_reporter: &mut EventBuffer, - min_spec_sample_buffer: &mut ValueBuffer, - next_event_time_buffer: &mut ValueBuffer, - total_time_max: &AtomicU64, - total_steps_sum: &AtomicU64, - max_steps: u64, - max_next_event_time: NonNegativeF64, - ) -> CudaResult<()> { + #[inline] + fn get_ptx() -> &'static CStr { match (ReportSpeciation::VALUE, ReportDispersal::VALUE) { - (false, false) => crate::link::SimulationKernel::< + (false, false) => crate::link::SimulationKernelPtx::< M, H, G, @@ -248,19 +70,8 @@ where A, False, False, - >::simulate( - unsafe { &mut *std::ptr::from_mut(self).cast() }, - simulation, - task_list, - unsafe { &mut *std::ptr::from_mut(event_buffer_reporter).cast() }, - min_spec_sample_buffer, - next_event_time_buffer, - total_time_max, - total_steps_sum, - max_steps, - max_next_event_time, - ), - (false, true) => crate::link::SimulationKernel::< + >::get_ptx(), + (false, true) => crate::link::SimulationKernelPtx::< M, H, G, @@ -275,19 +86,8 @@ where A, False, True, - >::simulate( - unsafe { &mut *std::ptr::from_mut(self).cast() }, - simulation, - task_list, - unsafe { &mut *std::ptr::from_mut(event_buffer_reporter).cast() }, - min_spec_sample_buffer, - next_event_time_buffer, - total_time_max, - total_steps_sum, - max_steps, - max_next_event_time, - ), - (true, false) => crate::link::SimulationKernel::< + >::get_ptx(), + (true, false) => crate::link::SimulationKernelPtx::< M, H, G, @@ -302,19 +102,8 @@ where A, True, False, - >::simulate( - unsafe { &mut *std::ptr::from_mut(self).cast() }, - simulation, - task_list, - unsafe { &mut *std::ptr::from_mut(event_buffer_reporter).cast() }, - min_spec_sample_buffer, - next_event_time_buffer, - total_time_max, - total_steps_sum, - max_steps, - max_next_event_time, - ), - (true, true) => crate::link::SimulationKernel::< + >::get_ptx(), + (true, true) => crate::link::SimulationKernelPtx::< M, H, G, @@ -329,53 +118,14 @@ where A, True, True, - >::simulate( - unsafe { &mut *std::ptr::from_mut(self).cast() }, - simulation, - task_list, - unsafe { &mut *std::ptr::from_mut(event_buffer_reporter).cast() }, - min_spec_sample_buffer, - next_event_time_buffer, - total_time_max, - total_steps_sum, - max_steps, - max_next_event_time, - ), + >::get_ptx(), } } - fn simulate_raw( - &mut self, - simulation: HostAndDeviceMutRef< - DeviceAccessible< - as RustToCuda>::CudaRepresentation, - >, - >, - task_list: HostAndDeviceMutRef< - DeviceAccessible< as RustToCuda>::CudaRepresentation>, - >, - event_buffer_reporter: HostAndDeviceMutRef< - DeviceAccessible< - as RustToCuda>::CudaRepresentation, - >, - >, - min_spec_sample_buffer: HostAndDeviceMutRef< - DeviceAccessible< - as RustToCuda>::CudaRepresentation, - >, - >, - next_event_time_buffer: HostAndDeviceMutRef< - DeviceAccessible< - as RustToCuda>::CudaRepresentation, - >, - >, - total_time_max: HostAndDeviceConstRef>, - total_steps_sum: HostAndDeviceConstRef>, - max_steps: SafeDeviceCopyWrapper, - max_next_event_time: SafeDeviceCopyWrapper, - ) -> CudaResult<()> { + #[inline] + fn get_entry_point() -> &'static CStr { match (ReportSpeciation::VALUE, ReportDispersal::VALUE) { - (false, false) => crate::link::SimulationKernel::< + (false, false) => crate::link::SimulationKernelPtx::< M, H, G, @@ -390,19 +140,8 @@ where A, False, False, - >::simulate_raw( - unsafe { &mut *std::ptr::from_mut(self).cast() }, - simulation, - task_list, - unsafe { std::mem::transmute(event_buffer_reporter) }, - min_spec_sample_buffer, - next_event_time_buffer, - total_time_max, - total_steps_sum, - max_steps, - max_next_event_time, - ), - (false, true) => crate::link::SimulationKernel::< + >::get_entry_point(), + (false, true) => crate::link::SimulationKernelPtx::< M, H, G, @@ -417,19 +156,8 @@ where A, False, True, - >::simulate_raw( - unsafe { &mut *std::ptr::from_mut(self).cast() }, - simulation, - task_list, - unsafe { std::mem::transmute(event_buffer_reporter) }, - min_spec_sample_buffer, - next_event_time_buffer, - total_time_max, - total_steps_sum, - max_steps, - max_next_event_time, - ), - (true, false) => crate::link::SimulationKernel::< + >::get_entry_point(), + (true, false) => crate::link::SimulationKernelPtx::< M, H, G, @@ -444,19 +172,8 @@ where A, True, False, - >::simulate_raw( - unsafe { &mut *std::ptr::from_mut(self).cast() }, - simulation, - task_list, - unsafe { std::mem::transmute(event_buffer_reporter) }, - min_spec_sample_buffer, - next_event_time_buffer, - total_time_max, - total_steps_sum, - max_steps, - max_next_event_time, - ), - (true, true) => crate::link::SimulationKernel::< + >::get_entry_point(), + (true, true) => crate::link::SimulationKernelPtx::< M, H, G, @@ -471,18 +188,7 @@ where A, True, True, - >::simulate_raw( - unsafe { &mut *std::ptr::from_mut(self).cast() }, - simulation, - task_list, - unsafe { std::mem::transmute(event_buffer_reporter) }, - min_spec_sample_buffer, - next_event_time_buffer, - total_time_max, - total_steps_sum, - max_steps, - max_next_event_time, - ), + >::get_entry_point(), } } } diff --git a/rustcoalescence/algorithms/cuda/gpu-kernel/.cargo/config.toml b/rustcoalescence/algorithms/cuda/gpu-kernel/.cargo/config.toml index e310e544a..20ad0d47d 100644 --- a/rustcoalescence/algorithms/cuda/gpu-kernel/.cargo/config.toml +++ b/rustcoalescence/algorithms/cuda/gpu-kernel/.cargo/config.toml @@ -2,7 +2,7 @@ pipelining = false [target.nvptx64-nvidia-cuda] -rustflags = ["-Clink-args=--arch=sm_35", "-Cpanic=abort", "-Clinker-plugin-lto", "-Ccodegen-units=1", "-Clink-arg=-O3", "-Clink-arg=--lto"] +rustflags = ["-Zunstable-options", "-Clinker-flavor=llbc", "-Ctarget-cpu=sm_35", "-Cpanic=abort", "-Ccodegen-units=1", "-Clink-arg=-O3"] [unstable] build-std = ["core", "alloc"] diff --git a/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml b/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml index a45a7c862..dce06b130 100644 --- a/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml @@ -16,4 +16,8 @@ necsim-core-bond = { path = "../../../../necsim/core/bond" } necsim-impls-no-std = { path = "../../../../necsim/impls/no-std", features = ["cuda"] } necsim-impls-cuda = { path = "../../../../necsim/impls/cuda" } -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "f395253", features = ["derive"] } +[target.'cfg(target_os = "cuda")'.dependencies] +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "697dcf5", features = ["derive", "device", "kernel"] } + +[target.'cfg(not(target_os = "cuda"))'.dependencies] +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "697dcf5", features = ["derive", "kernel"] } diff --git a/rustcoalescence/algorithms/cuda/gpu-kernel/src/lib.rs b/rustcoalescence/algorithms/cuda/gpu-kernel/src/lib.rs index 3365963fd..afb17efe7 100644 --- a/rustcoalescence/algorithms/cuda/gpu-kernel/src/lib.rs +++ b/rustcoalescence/algorithms/cuda/gpu-kernel/src/lib.rs @@ -1,12 +1,10 @@ #![deny(clippy::pedantic)] #![no_std] +#![feature(type_alias_impl_trait)] +#![feature(decl_macro)] #![cfg_attr(target_os = "cuda", feature(abi_ptx))] -#![cfg_attr(target_os = "cuda", feature(alloc_error_handler))] -#![cfg_attr(target_os = "cuda", feature(panic_info_message))] -#![cfg_attr(target_os = "cuda", feature(atomic_from_mut))] #![cfg_attr(target_os = "cuda", feature(asm_experimental_arch))] -#![cfg_attr(target_os = "cuda", feature(stdarch_nvptx))] -#![cfg_attr(target_os = "cuda", feature(control_flow_enum))] +#![cfg_attr(target_os = "cuda", feature(alloc_error_handler))] #![allow(long_running_const_eval)] #![recursion_limit = "1024"] @@ -14,81 +12,71 @@ extern crate alloc; #[cfg(target_os = "cuda")] use core::ops::ControlFlow; +use core::sync::atomic::AtomicU64; use necsim_core::{ cogs::{ CoalescenceSampler, DispersalSampler, EmigrationExit, Habitat, ImmigrationEntry, LineageStore, MathsCore, PrimeableRng, SpeciationProbability, TurnoverRate, }, + lineage::Lineage, reporter::boolean::Boolean, + simulation::Simulation, }; +use necsim_core_bond::{NonNegativeF64, PositiveF64}; +use necsim_impls_cuda::{event_buffer::EventBuffer, value_buffer::ValueBuffer}; use necsim_impls_no_std::cogs::{ active_lineage_sampler::singular::SingularActiveLineageSampler, event_sampler::tracking::{MinSpeciationTrackingEventSampler, SpeciationSample}, }; -use rust_cuda::common::RustToCuda; +use rust_cuda::{ + kernel::param::{DeepPerThreadBorrow, PerThreadShallowCopy, PtxJit, ShallowInteriorMutable}, + lend::RustToCuda, +}; -#[rust_cuda::common::kernel( - pub use link_kernel! as impl SimulatableKernel for SimulationKernel +#[rust_cuda::kernel::kernel(pub use link! for impl)] +#[kernel( + allow(ptx::double_precision_use), + allow(ptx::local_memory_use), // FIXME + forbid(ptx::register_spills), )] #[allow(clippy::too_many_arguments)] #[allow(clippy::type_complexity)] pub fn simulate< - M: MathsCore, - H: Habitat + RustToCuda, - G: PrimeableRng + RustToCuda, - S: LineageStore + RustToCuda, - X: EmigrationExit + RustToCuda, - D: DispersalSampler + RustToCuda, - C: CoalescenceSampler + RustToCuda, - T: TurnoverRate + RustToCuda, - N: SpeciationProbability + RustToCuda, - E: MinSpeciationTrackingEventSampler + RustToCuda, - I: ImmigrationEntry + RustToCuda, - A: SingularActiveLineageSampler + RustToCuda, + M: MathsCore + Sync, + H: Habitat + RustToCuda + Sync, + G: PrimeableRng + RustToCuda + Sync, + S: LineageStore + RustToCuda + Sync, + X: EmigrationExit + RustToCuda + Sync, + D: DispersalSampler + RustToCuda + Sync, + C: CoalescenceSampler + RustToCuda + Sync, + T: TurnoverRate + RustToCuda + Sync, + N: SpeciationProbability + RustToCuda + Sync, + E: MinSpeciationTrackingEventSampler + RustToCuda + Sync, + I: ImmigrationEntry + RustToCuda + Sync, + A: SingularActiveLineageSampler + RustToCuda + Sync, ReportSpeciation: Boolean, ReportDispersal: Boolean, >( - #[rustfmt::skip] - #[kernel(pass = LendRustToCuda, jit)] - simulation: &mut ShallowCopy< - necsim_core::simulation::Simulation, - >, - #[rustfmt::skip] - #[kernel(pass = LendRustToCuda, jit)] - task_list: &mut ShallowCopy< - necsim_impls_cuda::value_buffer::ValueBuffer, - >, - #[rustfmt::skip] - #[kernel(pass = LendRustToCuda, jit)] - event_buffer_reporter: &mut ShallowCopy< - necsim_impls_cuda::event_buffer::EventBuffer, + simulation: &PtxJit>>, + task_list: &mut PtxJit>>, + event_buffer_reporter: &mut PtxJit< + DeepPerThreadBorrow>, >, - #[rustfmt::skip] - #[kernel(pass = LendRustToCuda, jit)] - min_spec_sample_buffer: &mut ShallowCopy< - necsim_impls_cuda::value_buffer::ValueBuffer, + min_spec_sample_buffer: &mut PtxJit< + DeepPerThreadBorrow>, >, - #[rustfmt::skip] - #[kernel(pass = LendRustToCuda, jit)] - next_event_time_buffer: &mut ShallowCopy< - necsim_impls_cuda::value_buffer::ValueBuffer, - >, - #[rustfmt::skip] - #[kernel(pass = SafeDeviceCopy)] - total_time_max: &core::sync::atomic::AtomicU64, - #[rustfmt::skip] - #[kernel(pass = SafeDeviceCopy)] - total_steps_sum: &core::sync::atomic::AtomicU64, - #[rustfmt::skip] - #[kernel(pass = SafeDeviceCopy)] - max_steps: u64, - #[rustfmt::skip] - #[kernel(pass = SafeDeviceCopy)] - max_next_event_time: necsim_core_bond::NonNegativeF64, + next_event_time_buffer: &mut PtxJit>>, + total_time_max: &ShallowInteriorMutable, + total_steps_sum: &ShallowInteriorMutable, + max_steps: PerThreadShallowCopy, + max_next_event_time: PerThreadShallowCopy, ) { + // TODO: use simulation with non-allocating clone + let mut simulation = unsafe { core::mem::ManuallyDrop::new(core::ptr::read(simulation)) }; + task_list.with_value_for_core(|task| { // Discard the prior task (the simulation is just a temporary local copy) core::mem::drop( @@ -103,13 +91,16 @@ pub fn simulate< let mut final_next_event_time = None; let (time, steps) = simulation.simulate_incremental_early_stop( - |_, steps, next_event_time| { + |_, steps, next_event_time, reporter| { final_next_event_time = Some(next_event_time); - if steps >= max_steps || next_event_time >= max_next_event_time { - ControlFlow::Break(()) - } else { + if steps < max_steps + && next_event_time < max_next_event_time + && reporter.can_buffer_next_event() + { ControlFlow::Continue(()) + } else { + ControlFlow::Break(()) } }, event_buffer_reporter, @@ -133,37 +124,34 @@ pub fn simulate< #[cfg(target_os = "cuda")] mod cuda_prelude { - use core::arch::nvptx; - - use rust_cuda::device::utils; + use rust_cuda::device::alloc::PTXAllocator; #[global_allocator] - static _GLOBAL_ALLOCATOR: utils::PTXAllocator = utils::PTXAllocator; + static _GLOBAL_ALLOCATOR: PTXAllocator = PTXAllocator; #[cfg(not(debug_assertions))] #[panic_handler] fn panic(_panic_info: &::core::panic::PanicInfo) -> ! { - unsafe { nvptx::trap() } + rust_cuda::device::utils::abort() } #[cfg(debug_assertions)] #[panic_handler] - fn panic(panic_info: &::core::panic::PanicInfo) -> ! { - use rust_cuda::println; - - println!( - "Panic occurred at {:?}: {:?}!", - panic_info.location(), - panic_info - .message() - .unwrap_or(&format_args!("unknown reason")) - ); - - unsafe { nvptx::trap() } + fn panic(info: &::core::panic::PanicInfo) -> ! { + rust_cuda::device::utils::pretty_print_panic_info(info, true, true); + rust_cuda::device::utils::abort() } + #[cfg(not(debug_assertions))] #[alloc_error_handler] fn alloc_error_handler(_: core::alloc::Layout) -> ! { - unsafe { nvptx::trap() } + rust_cuda::device::utils::abort() + } + + #[cfg(debug_assertions)] + #[alloc_error_handler] + fn alloc_error_handler(layout: core::alloc::Layout) -> ! { + rust_cuda::device::utils::pretty_print_alloc_error(layout); + rust_cuda::device::utils::abort() } } diff --git a/rustcoalescence/algorithms/cuda/src/cuda.rs b/rustcoalescence/algorithms/cuda/src/cuda.rs index c523bf2d2..d8222ebb1 100644 --- a/rustcoalescence/algorithms/cuda/src/cuda.rs +++ b/rustcoalescence/algorithms/cuda/src/cuda.rs @@ -1,4 +1,4 @@ -use rust_cuda::rustacuda::{ +use rust_cuda::deps::rustacuda::{ context::{Context, CurrentContext, ResourceLimit}, prelude::*, }; @@ -13,7 +13,7 @@ pub fn with_initialised_cuda, F: FnOnce() -> Result> inner: F, ) -> Result { // Initialize the CUDA API - rust_cuda::rustacuda::init(CudaFlags::empty())?; + rust_cuda::deps::rustacuda::init(CudaFlags::empty())?; // Get the first device let device = Device::get_device(device)?; diff --git a/rustcoalescence/algorithms/cuda/src/error.rs b/rustcoalescence/algorithms/cuda/src/error.rs index e69898247..f81a9e3c1 100644 --- a/rustcoalescence/algorithms/cuda/src/error.rs +++ b/rustcoalescence/algorithms/cuda/src/error.rs @@ -1,4 +1,4 @@ -use rust_cuda::rustacuda::error::CudaError as RustaCudaError; +use rust_cuda::deps::rustacuda::error::CudaError as RustaCudaError; use serde::{Deserialize, Serialize}; #[derive(thiserror::Error, Debug, Clone, Serialize, Deserialize)] diff --git a/rustcoalescence/algorithms/cuda/src/info.rs b/rustcoalescence/algorithms/cuda/src/info.rs index 1abf4ec07..78a5452ea 100644 --- a/rustcoalescence/algorithms/cuda/src/info.rs +++ b/rustcoalescence/algorithms/cuda/src/info.rs @@ -1,4 +1,4 @@ -use rust_cuda::rustacuda::{ +use rust_cuda::deps::rustacuda::{ context::{CurrentContext, ResourceLimit}, function::{Function, FunctionAttribute}, }; diff --git a/rustcoalescence/algorithms/cuda/src/initialiser/fixup.rs b/rustcoalescence/algorithms/cuda/src/initialiser/fixup.rs index 06401c685..6c8dee90a 100644 --- a/rustcoalescence/algorithms/cuda/src/initialiser/fixup.rs +++ b/rustcoalescence/algorithms/cuda/src/initialiser/fixup.rs @@ -28,7 +28,7 @@ use rustcoalescence_algorithms::{ }; use rustcoalescence_scenarios::Scenario; -use rust_cuda::common::RustToCuda; +use rust_cuda::lend::RustToCuda; use crate::CudaError; @@ -42,19 +42,21 @@ pub struct FixUpInitialiser> { impl< L: ExactSizeIterator, - M: MathsCore, - G: PrimeableRng + RustToCuda, + M: MathsCore + Sync, + G: PrimeableRng + RustToCuda + Sync, O: Scenario, > CudaLineageStoreSampleInitialiser> for FixUpInitialiser where - O::Habitat: RustToCuda, - O::DispersalSampler>: RustToCuda, - O::TurnoverRate: RustToCuda, - O::SpeciationProbability: RustToCuda, + O::Habitat: RustToCuda + Sync, + O::DispersalSampler>: RustToCuda + Sync, + O::TurnoverRate: RustToCuda + Sync, + O::SpeciationProbability: RustToCuda + Sync, { type ActiveLineageSampler< - X: EmigrationExit> + RustToCuda, - J: EventTimeSampler + RustToCuda, + X: EmigrationExit> + + RustToCuda + + Sync, + J: EventTimeSampler + RustToCuda + Sync, > = IndependentActiveLineageSampler< M, O::Habitat, @@ -76,8 +78,10 @@ where fn init< 'h, T: TrustedOriginSampler<'h, M, Habitat = O::Habitat>, - J: EventTimeSampler + RustToCuda, - X: EmigrationExit> + RustToCuda, + J: EventTimeSampler + RustToCuda + Sync, + X: EmigrationExit> + + RustToCuda + + Sync, >( self, origin_sampler: T, diff --git a/rustcoalescence/algorithms/cuda/src/initialiser/genesis.rs b/rustcoalescence/algorithms/cuda/src/initialiser/genesis.rs index 5f851c286..72b836902 100644 --- a/rustcoalescence/algorithms/cuda/src/initialiser/genesis.rs +++ b/rustcoalescence/algorithms/cuda/src/initialiser/genesis.rs @@ -14,7 +14,7 @@ use necsim_impls_no_std::cogs::{ use rustcoalescence_scenarios::Scenario; -use rust_cuda::common::RustToCuda; +use rust_cuda::lend::RustToCuda; use crate::CudaError; @@ -23,17 +23,19 @@ use super::CudaLineageStoreSampleInitialiser; #[allow(clippy::module_name_repetitions)] pub struct GenesisInitialiser; -impl + RustToCuda, O: Scenario> +impl + RustToCuda + Sync, O: Scenario> CudaLineageStoreSampleInitialiser for GenesisInitialiser where - O::Habitat: RustToCuda, - O::DispersalSampler>: RustToCuda, - O::TurnoverRate: RustToCuda, - O::SpeciationProbability: RustToCuda, + O::Habitat: RustToCuda + Sync, + O::DispersalSampler>: RustToCuda + Sync, + O::TurnoverRate: RustToCuda + Sync, + O::SpeciationProbability: RustToCuda + Sync, { type ActiveLineageSampler< - X: EmigrationExit> + RustToCuda, - J: EventTimeSampler + RustToCuda, + X: EmigrationExit> + + RustToCuda + + Sync, + J: EventTimeSampler + RustToCuda + Sync, > = IndependentActiveLineageSampler< M, O::Habitat, @@ -50,8 +52,10 @@ where fn init< 'h, T: TrustedOriginSampler<'h, M, Habitat = O::Habitat>, - J: EventTimeSampler + RustToCuda, - X: EmigrationExit> + RustToCuda, + J: EventTimeSampler + RustToCuda + Sync, + X: EmigrationExit> + + RustToCuda + + Sync, >( self, origin_sampler: T, diff --git a/rustcoalescence/algorithms/cuda/src/initialiser/mod.rs b/rustcoalescence/algorithms/cuda/src/initialiser/mod.rs index a1a39e87e..8a0d9a27c 100644 --- a/rustcoalescence/algorithms/cuda/src/initialiser/mod.rs +++ b/rustcoalescence/algorithms/cuda/src/initialiser/mod.rs @@ -17,7 +17,7 @@ use necsim_impls_no_std::cogs::{ use rustcoalescence_scenarios::Scenario; -use rust_cuda::common::RustToCuda; +use rust_cuda::lend::RustToCuda; use crate::CudaError; @@ -28,38 +28,40 @@ pub mod resume; #[allow(clippy::module_name_repetitions)] pub trait CudaLineageStoreSampleInitialiser< M: MathsCore, - G: PrimeableRng + RustToCuda, + G: PrimeableRng + RustToCuda + Sync, O: Scenario, Error: From, > where - O::Habitat: RustToCuda, - O::DispersalSampler>: RustToCuda, - O::TurnoverRate: RustToCuda, - O::SpeciationProbability: RustToCuda, + O::Habitat: RustToCuda + Sync, + O::DispersalSampler>: RustToCuda + Sync, + O::TurnoverRate: RustToCuda + Sync, + O::SpeciationProbability: RustToCuda + Sync, { - type DispersalSampler: DispersalSampler + RustToCuda; + type DispersalSampler: DispersalSampler + RustToCuda + Sync; type ActiveLineageSampler< X: EmigrationExit< M, O::Habitat, G, IndependentLineageStore, - > + RustToCuda, - J: EventTimeSampler + RustToCuda, + > + RustToCuda + Sync, + J: EventTimeSampler + RustToCuda + Sync, >: SingularActiveLineageSampler< M, O::Habitat, G, IndependentLineageStore, X, Self::DispersalSampler, IndependentCoalescenceSampler, O::TurnoverRate, O::SpeciationProbability, IndependentEventSampler< M, O::Habitat, G, X, Self::DispersalSampler, O::TurnoverRate, O::SpeciationProbability >, NeverImmigrationEntry, - > + RustToCuda; + > + RustToCuda + Sync; #[allow(clippy::type_complexity)] fn init< 'h, T: TrustedOriginSampler<'h, M, Habitat = O::Habitat>, - J: EventTimeSampler + RustToCuda, - X: EmigrationExit> + RustToCuda, + J: EventTimeSampler + RustToCuda + Sync, + X: EmigrationExit> + + RustToCuda + + Sync, >( self, origin_sampler: T, diff --git a/rustcoalescence/algorithms/cuda/src/initialiser/resume.rs b/rustcoalescence/algorithms/cuda/src/initialiser/resume.rs index 2cba7640b..478690d96 100644 --- a/rustcoalescence/algorithms/cuda/src/initialiser/resume.rs +++ b/rustcoalescence/algorithms/cuda/src/initialiser/resume.rs @@ -17,7 +17,7 @@ use necsim_impls_no_std::cogs::{ use rustcoalescence_algorithms::result::ResumeError; use rustcoalescence_scenarios::Scenario; -use rust_cuda::common::RustToCuda; +use rust_cuda::lend::RustToCuda; use crate::CudaError; @@ -31,19 +31,21 @@ pub struct ResumeInitialiser> { impl< L: ExactSizeIterator, - M: MathsCore, - G: PrimeableRng + RustToCuda, + M: MathsCore + Sync, + G: PrimeableRng + RustToCuda + Sync, O: Scenario, > CudaLineageStoreSampleInitialiser> for ResumeInitialiser where - O::Habitat: RustToCuda, - O::DispersalSampler>: RustToCuda, - O::TurnoverRate: RustToCuda, - O::SpeciationProbability: RustToCuda, + O::Habitat: RustToCuda + Sync, + O::DispersalSampler>: RustToCuda + Sync, + O::TurnoverRate: RustToCuda + Sync, + O::SpeciationProbability: RustToCuda + Sync, { type ActiveLineageSampler< - X: EmigrationExit> + RustToCuda, - J: EventTimeSampler + RustToCuda, + X: EmigrationExit> + + RustToCuda + + Sync, + J: EventTimeSampler + RustToCuda + Sync, > = IndependentActiveLineageSampler< M, O::Habitat, @@ -60,8 +62,10 @@ where fn init< 'h, T: TrustedOriginSampler<'h, M, Habitat = O::Habitat>, - J: EventTimeSampler + RustToCuda, - X: EmigrationExit> + RustToCuda, + J: EventTimeSampler + RustToCuda + Sync, + X: EmigrationExit> + + RustToCuda + + Sync, >( self, origin_sampler: T, diff --git a/rustcoalescence/algorithms/cuda/src/launch.rs b/rustcoalescence/algorithms/cuda/src/launch.rs index 12589699a..44e0e66f6 100644 --- a/rustcoalescence/algorithms/cuda/src/launch.rs +++ b/rustcoalescence/algorithms/cuda/src/launch.rs @@ -1,9 +1,12 @@ use std::marker::PhantomData; -use necsim_core::{cogs::MathsCore, reporter::Reporter, simulation::SimulationBuilder}; +use necsim_core::{ + cogs::{MathsCore, PrimeableRng}, + reporter::Reporter, + simulation::SimulationBuilder, +}; use necsim_core_bond::NonNegativeF64; -use necsim_impls_cuda::cogs::rng::CudaRng; use necsim_impls_no_std::{ cogs::{ active_lineage_sampler::independent::event_time_sampler::exp::ExpEventTimeSampler, @@ -16,7 +19,6 @@ use necsim_impls_no_std::{ origin_sampler::{ decomposition::DecompositionOriginSampler, pre_sampler::OriginPreSampler, }, - rng::wyhash::WyHash, }, parallelisation::Status, }; @@ -25,15 +27,16 @@ use necsim_partitioning_core::LocalPartition; use rustcoalescence_algorithms::result::SimulationOutcome; use rustcoalescence_scenarios::Scenario; -use rustcoalescence_algorithms_cuda_cpu_kernel::SimulationKernel; -use rustcoalescence_algorithms_cuda_gpu_kernel::SimulatableKernel; +use rustcoalescence_algorithms_cuda_gpu_kernel::simulate; use rust_cuda::{ - common::RustToCuda, - rustacuda::{ + deps::rustacuda::{ function::{BlockSize, GridSize}, prelude::{Stream, StreamFlags}, }, + host::CudaDropWrapper, + kernel::{CompiledKernelPtx, LaunchConfig, Launcher, TypedPtxKernel}, + lend::RustToCuda, }; use crate::{ @@ -49,75 +52,54 @@ use crate::{ #[allow(clippy::too_many_lines)] pub fn initialise_and_simulate< 'p, - M: MathsCore, - O: Scenario>>, + M: MathsCore + Sync, + G: PrimeableRng + RustToCuda + Sync, + O: Scenario, R: Reporter, P: LocalPartition<'p, R>, I: Iterator, - L: CudaLineageStoreSampleInitialiser>, O, Error>, + L: CudaLineageStoreSampleInitialiser, Error: From, ->( - args: &CudaArguments, - rng: CudaRng>, - scenario: O, - pre_sampler: OriginPreSampler, - pause_before: Option, - local_partition: &mut P, - lineage_store_sampler_initialiser: L, -) -> Result>>, Error> -where - O::Habitat: RustToCuda, - O::DispersalSampler>>>: - RustToCuda, - O::TurnoverRate: RustToCuda, - O::SpeciationProbability: RustToCuda, - SimulationKernel< - M, - O::Habitat, - CudaRng>, - IndependentLineageStore, - NeverEmigrationExit, - L::DispersalSampler, - IndependentCoalescenceSampler, - O::TurnoverRate, - O::SpeciationProbability, - IndependentEventSampler< - M, - O::Habitat, - CudaRng>, - NeverEmigrationExit, - L::DispersalSampler, - O::TurnoverRate, - O::SpeciationProbability, - >, - NeverImmigrationEntry, - L::ActiveLineageSampler, - R::ReportSpeciation, - R::ReportDispersal, - >: SimulatableKernel< - M, - O::Habitat, - CudaRng>, - IndependentLineageStore, - NeverEmigrationExit, - L::DispersalSampler, - IndependentCoalescenceSampler, - O::TurnoverRate, - O::SpeciationProbability, - IndependentEventSampler< + Ptx: CompiledKernelPtx< + simulate< M, O::Habitat, - CudaRng>, + G, + IndependentLineageStore, NeverEmigrationExit, L::DispersalSampler, + IndependentCoalescenceSampler, O::TurnoverRate, O::SpeciationProbability, + IndependentEventSampler< + M, + O::Habitat, + G, + NeverEmigrationExit, + L::DispersalSampler, + O::TurnoverRate, + O::SpeciationProbability, + >, + NeverImmigrationEntry, + L::ActiveLineageSampler, + R::ReportSpeciation, + R::ReportDispersal, >, - NeverImmigrationEntry, - L::ActiveLineageSampler, - R::ReportSpeciation, - R::ReportDispersal, >, +>( + args: &CudaArguments, + rng: G, + scenario: O, + pre_sampler: OriginPreSampler, + pause_before: Option, + local_partition: &mut P, + lineage_store_sampler_initialiser: L, +) -> Result, Error> +where + O::Habitat: RustToCuda + Sync, + O::DispersalSampler>: RustToCuda + Sync, + O::TurnoverRate: RustToCuda + Sync, + O::SpeciationProbability: RustToCuda + Sync, { let ( habitat, @@ -126,8 +108,7 @@ where speciation_probability, origin_sampler_auxiliary, decomposition_auxiliary, - ) = scenario - .build::>>>(); + ) = scenario.build::>(); let coalescence_sampler = IndependentCoalescenceSampler::default(); let event_sampler = IndependentEventSampler::default(); @@ -196,26 +177,36 @@ where }; let (mut status, time, steps, lineages) = with_initialised_cuda(args.device, || { - let kernel = SimulationKernel::try_new( - Stream::new(StreamFlags::NON_BLOCKING, None)?, - grid_size.clone(), - block_size.clone(), - args.ptx_jit, - Box::new(|kernel| { - crate::info::print_kernel_function_attributes("simulate", kernel); - Ok(()) - }), - )?; - - parallelisation::monolithic::simulate( - &mut simulation, - kernel, - (grid_size, block_size, args.dedup_cache, args.step_slice), - lineages, - event_slice, - pause_before, - local_partition, - ) + let mut stream = CudaDropWrapper::from(Stream::new(StreamFlags::NON_BLOCKING, None)?); + + let mut kernel = TypedPtxKernel::new::(Some(Box::new(|kernel| { + crate::info::print_kernel_function_attributes("simulate", kernel); + Ok(()) + }))); + + let config = LaunchConfig { + grid: grid_size, + block: block_size, + ptx_jit: args.ptx_jit, + }; + + rust_cuda::host::Stream::with(&mut stream, |stream| { + let launcher = Launcher { + stream, + kernel: &mut kernel, + config, + }; + + parallelisation::monolithic::simulate( + &mut simulation, + launcher, + (args.dedup_cache, args.step_slice), + lineages, + event_slice, + pause_before, + local_partition, + ) + }) }) .map_err(CudaError::from)?; diff --git a/rustcoalescence/algorithms/cuda/src/lib.rs b/rustcoalescence/algorithms/cuda/src/lib.rs index e2c221dca..8aa09353f 100644 --- a/rustcoalescence/algorithms/cuda/src/lib.rs +++ b/rustcoalescence/algorithms/cuda/src/lib.rs @@ -5,7 +5,12 @@ #[macro_use] extern crate serde_derive_state; -use necsim_core::{cogs::MathsCore, lineage::Lineage, reporter::Reporter}; +use initialiser::CudaLineageStoreSampleInitialiser; +use necsim_core::{ + cogs::{MathsCore, PrimeableRng}, + lineage::Lineage, + reporter::Reporter, +}; use necsim_core_bond::{NonNegativeF64, PositiveF64}; use necsim_impls_cuda::cogs::{maths::NvptxMathsCore, rng::CudaRng}; @@ -37,10 +42,10 @@ use rustcoalescence_algorithms::{ }; use rustcoalescence_scenarios::Scenario; -use rustcoalescence_algorithms_cuda_cpu_kernel::SimulationKernel; -use rustcoalescence_algorithms_cuda_gpu_kernel::SimulatableKernel; +use rustcoalescence_algorithms_cuda_cpu_kernel::SimulationKernelPtx; +use rustcoalescence_algorithms_cuda_gpu_kernel::simulate; -use rust_cuda::common::RustToCuda; +use rust_cuda::{kernel::CompiledKernelPtx, lend::RustToCuda}; mod arguments; mod cuda; @@ -68,42 +73,38 @@ impl AlgorithmParamters for CudaAlgorithm { impl AlgorithmDefaults for CudaAlgorithm { type MathsCore = NvptxMathsCore; + type Rng = CudaRng>; } -#[allow(clippy::trait_duplication_in_bounds)] impl< 'p, - M: MathsCore, - O: Scenario>>, + M: MathsCore + Sync, + G: PrimeableRng + RustToCuda + Sync, + O: Scenario, R: Reporter, P: LocalPartition<'p, R>, - > Algorithm<'p, M, O, R, P> for CudaAlgorithm + > Algorithm<'p, M, G, O, R, P> for CudaAlgorithm where - O::Habitat: RustToCuda, - O::DispersalSampler>>>: - RustToCuda, - O::TurnoverRate: RustToCuda, - O::SpeciationProbability: RustToCuda, - SimulationKernel< + O::Habitat: RustToCuda + Sync, + O::DispersalSampler>: RustToCuda + Sync, + O::TurnoverRate: RustToCuda + Sync, + O::SpeciationProbability: RustToCuda + Sync, + SimulationKernelPtx< M, O::Habitat, - CudaRng>, + G, IndependentLineageStore, NeverEmigrationExit, - O::DispersalSampler< - InMemoryPackedAliasDispersalSampler>>, - >, + O::DispersalSampler>, IndependentCoalescenceSampler, O::TurnoverRate, O::SpeciationProbability, IndependentEventSampler< M, O::Habitat, - CudaRng>, + G, NeverEmigrationExit, - O::DispersalSampler< - InMemoryPackedAliasDispersalSampler>>, - >, + O::DispersalSampler>, O::TurnoverRate, O::SpeciationProbability, >, @@ -111,70 +112,62 @@ where IndependentActiveLineageSampler< M, O::Habitat, - CudaRng>, + G, NeverEmigrationExit, - O::DispersalSampler< - InMemoryPackedAliasDispersalSampler>>, - >, + O::DispersalSampler>, O::TurnoverRate, O::SpeciationProbability, ExpEventTimeSampler, >, R::ReportSpeciation, R::ReportDispersal, - >: SimulatableKernel< - M, - O::Habitat, - CudaRng>, - IndependentLineageStore, - NeverEmigrationExit, - O::DispersalSampler< - InMemoryPackedAliasDispersalSampler>>, - >, - IndependentCoalescenceSampler, - O::TurnoverRate, - O::SpeciationProbability, - IndependentEventSampler< + >: CompiledKernelPtx< + simulate< M, O::Habitat, - CudaRng>, + G, + IndependentLineageStore, NeverEmigrationExit, - O::DispersalSampler< - InMemoryPackedAliasDispersalSampler>>, - >, + O::DispersalSampler>, + IndependentCoalescenceSampler, O::TurnoverRate, O::SpeciationProbability, - >, - NeverImmigrationEntry, - IndependentActiveLineageSampler< - M, - O::Habitat, - CudaRng>, - NeverEmigrationExit, - O::DispersalSampler< - InMemoryPackedAliasDispersalSampler>>, + IndependentEventSampler< + M, + O::Habitat, + G, + NeverEmigrationExit, + O::DispersalSampler>, + O::TurnoverRate, + O::SpeciationProbability, >, - O::TurnoverRate, - O::SpeciationProbability, - ExpEventTimeSampler, + NeverImmigrationEntry, + IndependentActiveLineageSampler< + M, + O::Habitat, + G, + NeverEmigrationExit, + O::DispersalSampler>, + O::TurnoverRate, + O::SpeciationProbability, + ExpEventTimeSampler, + >, + R::ReportSpeciation, + R::ReportDispersal, >, - R::ReportSpeciation, - R::ReportDispersal, >, - SimulationKernel< + SimulationKernelPtx< M, O::Habitat, - CudaRng>, + G, IndependentLineageStore, NeverEmigrationExit, TrespassingDispersalSampler< M, O::Habitat, - CudaRng>, - O::DispersalSampler< - InMemoryPackedAliasDispersalSampler>>, - >, - UniformAntiTrespassingDispersalSampler>>, + G, + O::DispersalSampler>, + UniformAntiTrespassingDispersalSampler, >, IndependentCoalescenceSampler, O::TurnoverRate, @@ -182,16 +175,14 @@ where IndependentEventSampler< M, O::Habitat, - CudaRng>, + G, NeverEmigrationExit, TrespassingDispersalSampler< M, O::Habitat, - CudaRng>, - O::DispersalSampler< - InMemoryPackedAliasDispersalSampler>>, - >, - UniformAntiTrespassingDispersalSampler>>, + G, + O::DispersalSampler>, + UniformAntiTrespassingDispersalSampler, >, O::TurnoverRate, O::SpeciationProbability, @@ -200,16 +191,14 @@ where IndependentActiveLineageSampler< M, O::Habitat, - CudaRng>, + G, NeverEmigrationExit, TrespassingDispersalSampler< M, O::Habitat, - CudaRng>, - O::DispersalSampler< - InMemoryPackedAliasDispersalSampler>>, - >, - UniformAntiTrespassingDispersalSampler>>, + G, + O::DispersalSampler>, + UniformAntiTrespassingDispersalSampler, >, O::TurnoverRate, O::SpeciationProbability, @@ -217,66 +206,61 @@ where >, R::ReportSpeciation, R::ReportDispersal, - >: SimulatableKernel< - M, - O::Habitat, - CudaRng>, - IndependentLineageStore, - NeverEmigrationExit, - TrespassingDispersalSampler< - M, - O::Habitat, - CudaRng>, - O::DispersalSampler< - InMemoryPackedAliasDispersalSampler>>, - >, - UniformAntiTrespassingDispersalSampler>>, - >, - IndependentCoalescenceSampler, - O::TurnoverRate, - O::SpeciationProbability, - IndependentEventSampler< + >: CompiledKernelPtx< + simulate< M, O::Habitat, - CudaRng>, + G, + IndependentLineageStore, NeverEmigrationExit, TrespassingDispersalSampler< M, O::Habitat, - CudaRng>, - O::DispersalSampler< - InMemoryPackedAliasDispersalSampler>>, - >, - UniformAntiTrespassingDispersalSampler>>, + G, + O::DispersalSampler>, + UniformAntiTrespassingDispersalSampler, >, + IndependentCoalescenceSampler, O::TurnoverRate, O::SpeciationProbability, - >, - NeverImmigrationEntry, - IndependentActiveLineageSampler< - M, - O::Habitat, - CudaRng>, - NeverEmigrationExit, - TrespassingDispersalSampler< + IndependentEventSampler< M, O::Habitat, - CudaRng>, - O::DispersalSampler< - InMemoryPackedAliasDispersalSampler>>, + G, + NeverEmigrationExit, + TrespassingDispersalSampler< + M, + O::Habitat, + G, + O::DispersalSampler>, + UniformAntiTrespassingDispersalSampler, >, - UniformAntiTrespassingDispersalSampler>>, + O::TurnoverRate, + O::SpeciationProbability, >, - O::TurnoverRate, - O::SpeciationProbability, - ConstEventTimeSampler, + NeverImmigrationEntry, + IndependentActiveLineageSampler< + M, + O::Habitat, + G, + NeverEmigrationExit, + TrespassingDispersalSampler< + M, + O::Habitat, + G, + O::DispersalSampler>, + UniformAntiTrespassingDispersalSampler, + >, + O::TurnoverRate, + O::SpeciationProbability, + ConstEventTimeSampler, + >, + R::ReportSpeciation, + R::ReportDispersal, >, - R::ReportSpeciation, - R::ReportDispersal, >, { type LineageStore = IndependentLineageStore; - type Rng = CudaRng>; fn get_logical_partition(args: &Self::Arguments, _local_partition: &P) -> Partition { match &args.parallelism_mode { @@ -290,13 +274,28 @@ where fn initialise_and_simulate>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, pause_before: Option, local_partition: &mut P, - ) -> Result, Self::Error> { - launch::initialise_and_simulate( + ) -> Result, Self::Error> { + launch::initialise_and_simulate::<_, _, _, _, _, _, _, _, SimulationKernelPtx< + _, + _, + _, + _, + _, + >::DispersalSampler, + _, + _, + _, + _, + _, + >::ActiveLineageSampler<_, _>, + _, + _, + >>( &args, rng, scenario, @@ -311,18 +310,32 @@ where /// /// Returns a `ContinueError::Sample` if initialising the resuming /// simulation failed - #[allow(clippy::too_many_lines)] fn resume_and_simulate, L: ExactSizeIterator>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, lineages: L, resume_after: Option, pause_before: Option, local_partition: &mut P, - ) -> Result, ResumeError> { - launch::initialise_and_simulate( + ) -> Result, ResumeError> { + launch::initialise_and_simulate::<_, _, _, _, _, _, _, _, SimulationKernelPtx< + _, + _, + _, + _, + _, + as CudaLineageStoreSampleInitialiser<_, _, O, _>>::DispersalSampler, + _, + _, + _, + _, + _, + as CudaLineageStoreSampleInitialiser<_, _, O, _>>::ActiveLineageSampler<_, _>, + _, + _, + >>( &args, rng, scenario, @@ -340,24 +353,38 @@ where /// /// Returns a `ContinueError` if fixing up the restarting /// simulation (incl. running the algorithm) failed - #[allow(clippy::too_many_lines)] fn fixup_for_restart, L: ExactSizeIterator>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, lineages: L, restart_at: PositiveF64, fixup_strategy: RestartFixUpStrategy, local_partition: &mut P, - ) -> Result, ResumeError> { - launch::initialise_and_simulate( + ) -> Result, ResumeError> { + launch::initialise_and_simulate::<_, _, _, _, _, _, _, _, SimulationKernelPtx< + _, + _, + _, + _, + _, + as CudaLineageStoreSampleInitialiser<_, _, O, _>>::DispersalSampler, + _, + _, + _, + _, + _, + as CudaLineageStoreSampleInitialiser<_, _, O, _>>::ActiveLineageSampler<_, ConstEventTimeSampler>, + _, + _, + >>( &args, rng, scenario, pre_sampler, - Some(PositiveF64::max_after(restart_at.into(), restart_at.into()).into()), - local_partition, + Some(PositiveF64::max_after(restart_at.into(), + restart_at.into()).into()), local_partition, FixUpInitialiser { lineages, restart_at, diff --git a/rustcoalescence/algorithms/cuda/src/parallelisation/monolithic.rs b/rustcoalescence/algorithms/cuda/src/parallelisation/monolithic.rs index 66e1ff479..213f6aa11 100644 --- a/rustcoalescence/algorithms/cuda/src/parallelisation/monolithic.rs +++ b/rustcoalescence/algorithms/cuda/src/parallelisation/monolithic.rs @@ -1,9 +1,9 @@ use std::{collections::VecDeque, convert::TryInto, num::NonZeroU64, sync::atomic::AtomicU64}; use rust_cuda::{ - common::RustToCuda, - host::{HostAndDeviceMutRef, LendToCuda}, - rustacuda::function::{BlockSize, GridSize}, + host::HostAndDeviceMutRef, + kernel::Launcher, + lend::{LendToCuda, RustToCuda}, utils::exchange::wrapper::ExchangeWrapperOnHost, }; @@ -37,8 +37,7 @@ use necsim_partitioning_core::LocalPartition; use necsim_impls_cuda::{event_buffer::EventBuffer, value_buffer::ValueBuffer}; -use rustcoalescence_algorithms_cuda_cpu_kernel::SimulationKernel; -use rustcoalescence_algorithms_cuda_gpu_kernel::SimulatableKernel; +use rustcoalescence_algorithms_cuda_gpu_kernel::simulate; use crate::error::CudaError; @@ -48,25 +47,24 @@ type Result = std::result::Result; pub fn simulate< 'l, 'p, - M: MathsCore, - H: Habitat + RustToCuda, - G: PrimeableRng + RustToCuda, - S: LineageStore + RustToCuda, - X: EmigrationExit + RustToCuda, - D: DispersalSampler + RustToCuda, - C: CoalescenceSampler + RustToCuda, - T: TurnoverRate + RustToCuda, - N: SpeciationProbability + RustToCuda, - E: MinSpeciationTrackingEventSampler + RustToCuda, - I: ImmigrationEntry + RustToCuda, - A: SingularActiveLineageSampler - + RustToCuda, + M: MathsCore + Sync, + H: Habitat + RustToCuda + Sync, + G: PrimeableRng + RustToCuda + Sync, + S: LineageStore + RustToCuda + Sync, + X: EmigrationExit + RustToCuda + Sync, + D: DispersalSampler + RustToCuda + Sync, + C: CoalescenceSampler + RustToCuda + Sync, + T: TurnoverRate + RustToCuda + Sync, + N: SpeciationProbability + RustToCuda + Sync, + E: MinSpeciationTrackingEventSampler + RustToCuda + Sync, + I: ImmigrationEntry + RustToCuda + Sync, + A: SingularActiveLineageSampler + RustToCuda + Sync, P: Reporter, L: LocalPartition<'p, P>, LI: IntoIterator, >( simulation: &mut Simulation, - mut kernel: SimulationKernel< + mut launcher: Launcher>::WaterLevelReporter as Reporter>::ReportSpeciation, <>::WaterLevelReporter as Reporter>::ReportDispersal, - >, - config: (GridSize, BlockSize, DedupCache, NonZeroU64), + >>, + config: (DedupCache, NonZeroU64), lineages: LI, event_slice: EventSlice, pause_before: Option, local_partition: &'l mut L, -) -> Result<(Status, NonNegativeF64, u64, impl IntoIterator)> - where SimulationKernel< - M, - H, - G, - S, - X, - D, - C, - T, - N, - E, - I, - A, - <>::WaterLevelReporter as Reporter>::ReportSpeciation, - <>::WaterLevelReporter as Reporter>::ReportDispersal, - >: SimulatableKernel< - M, - H, - G, - S, - X, - D, - C, - T, - N, - E, - I, - A, - <>::WaterLevelReporter as Reporter>::ReportSpeciation, - <>::WaterLevelReporter as Reporter>::ReportDispersal, - >, -{ +) -> Result<( + Status, + NonNegativeF64, + u64, + impl IntoIterator, +)> { let mut slow_lineages = lineages .into_iter() .map(|lineage| { @@ -143,7 +114,7 @@ pub fn simulate< L, >>::WaterLevelReporter::new(event_slice.get(), local_partition); - let (grid_size, block_size, dedup_cache, step_slice) = config; + let (dedup_cache, step_slice) = config; #[allow(clippy::or_fun_call)] let intial_max_time = slow_lineages @@ -153,10 +124,13 @@ pub fn simulate< .unwrap_or(NonNegativeF64::zero()); // Initialise the total_time_max and total_steps_sum atomics - let mut total_time_max = AtomicU64::new(intial_max_time.get().to_bits()).into(); - let mut total_steps_sum = AtomicU64::new(0_u64).into(); + let mut total_time_max = AtomicU64::new(intial_max_time.get().to_bits()); + let mut total_steps_sum = AtomicU64::new(0_u64); - let mut task_list = ExchangeWrapperOnHost::new(ValueBuffer::new(&block_size, &grid_size)?)?; + let mut task_list = ExchangeWrapperOnHost::new(ValueBuffer::new( + &launcher.config.block, + &launcher.config.grid, + )?)?; let mut event_buffer: ExchangeWrapperOnHost< EventBuffer< <>::WaterLevelReporter as Reporter>::ReportDispersal, >, > = ExchangeWrapperOnHost::new(EventBuffer::new( - &block_size, - &grid_size, + &launcher.config.block, &launcher.config.grid, step_slice.get().try_into().unwrap_or(usize::MAX), )?)?; - let mut min_spec_sample_buffer = - ExchangeWrapperOnHost::new(ValueBuffer::new(&block_size, &grid_size)?)?; - let mut next_event_time_buffer = - ExchangeWrapperOnHost::new(ValueBuffer::new(&block_size, &grid_size)?)?; + let mut min_spec_sample_buffer = ExchangeWrapperOnHost::new(ValueBuffer::new( + &launcher.config.block, + &launcher.config.grid, + )?)?; + let mut next_event_time_buffer = ExchangeWrapperOnHost::new(ValueBuffer::new( + &launcher.config.block, + &launcher.config.grid, + )?)?; let mut min_spec_samples = dedup_cache.construct(slow_lineages.len()); @@ -195,8 +172,7 @@ pub fn simulate< HostAndDeviceMutRef::with_new(&mut total_time_max, |total_time_max| -> Result<()> { HostAndDeviceMutRef::with_new(&mut total_steps_sum, |total_steps_sum| -> Result<()> { - // TODO: Pipeline async launches and callbacks of simulation/event analysis - simulation.lend_to_cuda_mut(|mut simulation_cuda_repr| -> Result<()> { + simulation.lend_to_cuda(|simulation_cuda_repr| -> Result<()> { while !slow_lineages.is_empty() && pause_before.map_or(true, |pause_before| level_time < pause_before) { @@ -242,8 +218,16 @@ pub fn simulate< proxy.advance_water_level(level_time); // Simulate all slow lineages until they have finished or exceeded the - // new water level + // new water level while !slow_lineages.is_empty() { + // Move the event buffer and min speciation sample buffer to CUDA + let mut event_buffer_cuda_async = + event_buffer.move_to_device_async(launcher.stream)?; + let mut min_spec_sample_buffer_cuda_async = + min_spec_sample_buffer.move_to_device_async(launcher.stream)?; + let mut next_event_time_buffer_cuda_async = + next_event_time_buffer.move_to_device_async(launcher.stream)?; + // Upload the new tasks from the front of the task queue for mut task in task_list.iter_mut() { let next_slow_lineage = loop { @@ -261,31 +245,44 @@ pub fn simulate< task.replace(next_slow_lineage); } - // Move the task list, event buffer and min speciation sample buffer - // to CUDA - let mut event_buffer_cuda = event_buffer.move_to_device()?; - let mut min_spec_sample_buffer_cuda = - min_spec_sample_buffer.move_to_device()?; - let mut next_event_time_buffer_cuda = - next_event_time_buffer.move_to_device()?; - let mut task_list_cuda = task_list.move_to_device()?; - - kernel.simulate_raw( - simulation_cuda_repr.as_mut(), - task_list_cuda.as_mut(), - event_buffer_cuda.as_mut(), - min_spec_sample_buffer_cuda.as_mut(), - next_event_time_buffer_cuda.as_mut(), - total_time_max.as_ref(), - total_steps_sum.as_ref(), - step_slice.get().into(), - level_time.into(), + // Move the task list to CUDA + let mut task_list_cuda_async = + task_list.move_to_device_async(launcher.stream)?; + + let launch = launcher.launch9_async( + simulation_cuda_repr.as_async(launcher.stream).extract_ref(), + task_list_cuda_async.as_mut_async(), + event_buffer_cuda_async.as_mut_async(), + min_spec_sample_buffer_cuda_async.as_mut_async(), + next_event_time_buffer_cuda_async.as_mut_async(), + total_time_max + .as_ref() + .as_async(launcher.stream) + .extract_ref(), + total_steps_sum + .as_ref() + .as_async(launcher.stream) + .extract_ref(), + step_slice.get(), + level_time, )?; - min_spec_sample_buffer = min_spec_sample_buffer_cuda.move_to_host()?; - next_event_time_buffer = next_event_time_buffer_cuda.move_to_host()?; - task_list = task_list_cuda.move_to_host()?; - event_buffer = event_buffer_cuda.move_to_host()?; + let min_spec_sample_buffer_host_async = + min_spec_sample_buffer_cuda_async + .move_to_host_async(launcher.stream)?; + let next_event_time_buffer_host_async = + next_event_time_buffer_cuda_async + .move_to_host_async(launcher.stream)?; + let task_list_host_async = + task_list_cuda_async.move_to_host_async(launcher.stream)?; + let event_buffer_host_async = + event_buffer_cuda_async.move_to_host_async(launcher.stream)?; + + task_list = task_list_host_async.synchronize()?; + next_event_time_buffer = next_event_time_buffer_host_async.synchronize()?; + min_spec_sample_buffer = min_spec_sample_buffer_host_async.synchronize()?; + + launch.synchronize()?; // Fetch the completion of the tasks for ((mut spec_sample, mut next_event_time), mut task) in @@ -303,8 +300,7 @@ pub fn simulate< { if !duplicate_individual { // Reclassify lineages as either slow (still below - // water) or - // fast + // the metaphorical water level) or fast if next_event_time < level_time { slow_lineages.push_back((task, next_event_time.into())); } else { @@ -314,6 +310,8 @@ pub fn simulate< } } + event_buffer = event_buffer_host_async.synchronize()?; + // TODO: explore partial sorting on the GPU event_buffer.report_events_unordered(&mut proxy); proxy.local_partition().get_reporter().report_progress( @@ -336,10 +334,9 @@ pub fn simulate< })?; // Safety: Max of NonNegativeF64 values from the GPU - let total_time_max = unsafe { - NonNegativeF64::new_unchecked(f64::from_bits(total_time_max.into_inner().into_inner())) - }; - let total_steps_sum = total_steps_sum.into_inner().into_inner(); + let total_time_max = + unsafe { NonNegativeF64::new_unchecked(f64::from_bits(total_time_max.into_inner())) }; + let total_steps_sum = total_steps_sum.into_inner(); local_partition.report_progress_sync(slow_lineages.len() as u64); @@ -348,5 +345,9 @@ pub fn simulate< local_partition.reduce_global_time_steps(total_time_max, total_steps_sum); let lineages = slow_lineages.into_iter().map(|(lineage, _)| lineage); + // Note: The simulation requires no mutation, since all components are + // either immutable or have singular swap states, and the list + // of all lineages (which does change) is returned separately + Ok((status, global_time, global_steps, lineages)) } diff --git a/rustcoalescence/algorithms/gillespie/src/event_skipping/mod.rs b/rustcoalescence/algorithms/gillespie/src/event_skipping/mod.rs index 4dc2bfa4e..1c37f0db9 100644 --- a/rustcoalescence/algorithms/gillespie/src/event_skipping/mod.rs +++ b/rustcoalescence/algorithms/gillespie/src/event_skipping/mod.rs @@ -1,5 +1,5 @@ use necsim_core::{ - cogs::{GloballyCoherentLineageStore, MathsCore, SeparableDispersalSampler}, + cogs::{GloballyCoherentLineageStore, MathsCore, SeparableDispersalSampler, SplittableRng}, lineage::Lineage, reporter::Reporter, }; @@ -39,18 +39,24 @@ impl AlgorithmParamters for EventSkippingAlgorithm { impl AlgorithmDefaults for EventSkippingAlgorithm { type MathsCore = IntrinsicsMathsCore; + type Rng = Pcg; } -impl<'p, O: Scenario>, R: Reporter, P: LocalPartition<'p, R>, M: MathsCore> - Algorithm<'p, M, O, R, P> for EventSkippingAlgorithm +impl< + 'p, + O: Scenario, + R: Reporter, + P: LocalPartition<'p, R>, + M: MathsCore, + G: SplittableRng, + > Algorithm<'p, M, G, O, R, P> for EventSkippingAlgorithm where O::LineageStore>: GloballyCoherentLineageStore, - O::DispersalSampler>>: - SeparableDispersalSampler>, + O::DispersalSampler>: + SeparableDispersalSampler, { type LineageStore = O::LineageStore>; - type Rng = Pcg; fn get_logical_partition(args: &Self::Arguments, local_partition: &P) -> Partition { get_gillespie_logical_partition(args, local_partition) @@ -58,12 +64,12 @@ where fn initialise_and_simulate>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, pause_before: Option, local_partition: &mut P, - ) -> Result, Self::Error> { + ) -> Result, Self::Error> { launch::initialise_and_simulate( args, rng, @@ -81,14 +87,14 @@ where /// simulation failed fn resume_and_simulate, L: ExactSizeIterator>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, lineages: L, resume_after: Option, pause_before: Option, local_partition: &mut P, - ) -> Result, ResumeError> { + ) -> Result, ResumeError> { launch::initialise_and_simulate( args, rng, @@ -109,14 +115,14 @@ where /// simulation (incl. running the algorithm) failed fn fixup_for_restart, L: ExactSizeIterator>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, lineages: L, restart_at: PositiveF64, fixup_strategy: RestartFixUpStrategy, local_partition: &mut P, - ) -> Result, ResumeError> { + ) -> Result, ResumeError> { launch::initialise_and_simulate( args, rng, diff --git a/rustcoalescence/algorithms/gillespie/src/gillespie/classical/mod.rs b/rustcoalescence/algorithms/gillespie/src/gillespie/classical/mod.rs index 06114bca7..892b7e285 100644 --- a/rustcoalescence/algorithms/gillespie/src/gillespie/classical/mod.rs +++ b/rustcoalescence/algorithms/gillespie/src/gillespie/classical/mod.rs @@ -1,5 +1,5 @@ use necsim_core::{ - cogs::{LocallyCoherentLineageStore, MathsCore}, + cogs::{LocallyCoherentLineageStore, MathsCore, SplittableRng}, lineage::Lineage, reporter::Reporter, }; @@ -9,7 +9,6 @@ use necsim_impls_no_std::cogs::{ lineage_store::coherent::locally::classical::ClassicalLineageStore, origin_sampler::pre_sampler::OriginPreSampler, turnover_rate::uniform::UniformTurnoverRate, }; -use necsim_impls_std::cogs::rng::pcg::Pcg; use necsim_partitioning_core::LocalPartition; use rustcoalescence_algorithms::{ @@ -31,24 +30,24 @@ use initialiser::{ // Optimised 'Classical' implementation for the `UniformTurnoverSampler` impl< 'p, - O: Scenario, TurnoverRate = UniformTurnoverRate>, + O: Scenario, R: Reporter, P: LocalPartition<'p, R>, M: MathsCore, - > Algorithm<'p, M, O, R, P> for GillespieAlgorithm + G: SplittableRng, + > Algorithm<'p, M, G, O, R, P> for GillespieAlgorithm where O::LineageStore>: LocallyCoherentLineageStore, { - #[allow(clippy::too_many_lines)] fn initialise_and_simulate>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, pause_before: Option, local_partition: &mut P, - ) -> Result, Self::Error> { + ) -> Result, Self::Error> { launch::initialise_and_simulate( args, rng, @@ -66,14 +65,14 @@ where /// simulation failed fn resume_and_simulate, L: ExactSizeIterator>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, lineages: L, resume_after: Option, pause_before: Option, local_partition: &mut P, - ) -> Result, ResumeError> { + ) -> Result, ResumeError> { launch::initialise_and_simulate( args, rng, @@ -92,17 +91,16 @@ where /// /// Returns a `ContinueError` if fixing up the restarting /// simulation (incl. running the algorithm) failed - #[allow(clippy::too_many_lines)] fn fixup_for_restart, L: ExactSizeIterator>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, lineages: L, restart_at: PositiveF64, fixup_strategy: RestartFixUpStrategy, local_partition: &mut P, - ) -> Result, ResumeError> { + ) -> Result, ResumeError> { launch::initialise_and_simulate( args, rng, diff --git a/rustcoalescence/algorithms/gillespie/src/gillespie/mod.rs b/rustcoalescence/algorithms/gillespie/src/gillespie/mod.rs index f485eb6a6..c1f775555 100644 --- a/rustcoalescence/algorithms/gillespie/src/gillespie/mod.rs +++ b/rustcoalescence/algorithms/gillespie/src/gillespie/mod.rs @@ -1,4 +1,6 @@ +use necsim_core::cogs::MathsCore; use necsim_impls_no_std::cogs::maths::intrinsics::IntrinsicsMathsCore; +use necsim_impls_std::cogs::rng::pcg::Pcg; use rustcoalescence_algorithms::{AlgorithmDefaults, AlgorithmParamters}; @@ -17,4 +19,5 @@ impl AlgorithmParamters for GillespieAlgorithm { impl AlgorithmDefaults for GillespieAlgorithm { type MathsCore = IntrinsicsMathsCore; + type Rng = Pcg; } diff --git a/rustcoalescence/algorithms/gillespie/src/gillespie/turnover/mod.rs b/rustcoalescence/algorithms/gillespie/src/gillespie/turnover/mod.rs index 7f44e6280..a08985da7 100644 --- a/rustcoalescence/algorithms/gillespie/src/gillespie/turnover/mod.rs +++ b/rustcoalescence/algorithms/gillespie/src/gillespie/turnover/mod.rs @@ -1,5 +1,5 @@ use necsim_core::{ - cogs::{LocallyCoherentLineageStore, MathsCore}, + cogs::{LocallyCoherentLineageStore, MathsCore, SplittableRng}, lineage::Lineage, reporter::Reporter, }; @@ -9,7 +9,6 @@ use necsim_impls_no_std::cogs::{ lineage_store::coherent::locally::classical::ClassicalLineageStore, origin_sampler::pre_sampler::OriginPreSampler, }; -use necsim_impls_std::cogs::rng::pcg::Pcg; use necsim_partitioning_core::{partition::Partition, LocalPartition}; use rustcoalescence_algorithms::{ @@ -31,28 +30,32 @@ use initialiser::{ }; // Default 'Gillespie' implementation for any turnover sampler -impl<'p, O: Scenario>, R: Reporter, P: LocalPartition<'p, R>, M: MathsCore> - Algorithm<'p, M, O, R, P> for GillespieAlgorithm +impl< + 'p, + O: Scenario, + R: Reporter, + P: LocalPartition<'p, R>, + M: MathsCore, + G: SplittableRng, + > Algorithm<'p, M, G, O, R, P> for GillespieAlgorithm where O::LineageStore>: LocallyCoherentLineageStore, { type LineageStore = O::LineageStore>; - type Rng = Pcg; default fn get_logical_partition(args: &Self::Arguments, local_partition: &P) -> Partition { get_gillespie_logical_partition(args, local_partition) } - #[allow(clippy::shadow_unrelated, clippy::too_many_lines)] default fn initialise_and_simulate>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, pause_before: Option, local_partition: &mut P, - ) -> Result, Self::Error> { + ) -> Result, Self::Error> { launch::initialise_and_simulate( args, rng, @@ -68,20 +71,19 @@ where /// /// Returns a `ContinueError::Sample` if initialising the resuming /// simulation failed - #[allow(clippy::too_many_lines)] default fn resume_and_simulate< I: Iterator, L: ExactSizeIterator, >( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, lineages: L, resume_after: Option, pause_before: Option, local_partition: &mut P, - ) -> Result, ResumeError> { + ) -> Result, ResumeError> { launch::initialise_and_simulate( args, rng, @@ -100,17 +102,16 @@ where /// /// Returns a `ContinueError` if fixing up the restarting /// simulation (incl. running the algorithm) failed - #[allow(clippy::too_many_lines)] default fn fixup_for_restart, L: ExactSizeIterator>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, lineages: L, restart_at: PositiveF64, fixup_strategy: RestartFixUpStrategy, local_partition: &mut P, - ) -> Result, ResumeError> { + ) -> Result, ResumeError> { launch::initialise_and_simulate( args, rng, diff --git a/rustcoalescence/algorithms/independent/src/lib.rs b/rustcoalescence/algorithms/independent/src/lib.rs index 8a7d0473d..7550642c9 100644 --- a/rustcoalescence/algorithms/independent/src/lib.rs +++ b/rustcoalescence/algorithms/independent/src/lib.rs @@ -4,7 +4,11 @@ #[macro_use] extern crate serde_derive_state; -use necsim_core::{cogs::MathsCore, lineage::Lineage, reporter::Reporter}; +use necsim_core::{ + cogs::{MathsCore, PrimeableRng}, + lineage::Lineage, + reporter::Reporter, +}; use necsim_core_bond::{NonNegativeF64, PositiveF64}; use necsim_impls_no_std::cogs::{ @@ -39,13 +43,19 @@ impl AlgorithmParamters for IndependentAlgorithm { impl AlgorithmDefaults for IndependentAlgorithm { type MathsCore = IntrinsicsMathsCore; + type Rng = WyHash; } -impl<'p, O: Scenario>, R: Reporter, P: LocalPartition<'p, R>, M: MathsCore> - Algorithm<'p, M, O, R, P> for IndependentAlgorithm +impl< + 'p, + O: Scenario, + R: Reporter, + P: LocalPartition<'p, R>, + M: MathsCore, + G: PrimeableRng, + > Algorithm<'p, M, G, O, R, P> for IndependentAlgorithm { type LineageStore = IndependentLineageStore; - type Rng = WyHash; fn get_logical_partition(args: &Self::Arguments, local_partition: &P) -> Partition { match &args.parallelism_mode { @@ -62,12 +72,12 @@ impl<'p, O: Scenario>, R: Reporter, P: LocalPartition<'p, R>, M: Ma fn initialise_and_simulate>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, pause_before: Option, local_partition: &mut P, - ) -> Result, Self::Error> { + ) -> Result, Self::Error> { launch::initialise_and_simulate( &args, rng, @@ -85,14 +95,14 @@ impl<'p, O: Scenario>, R: Reporter, P: LocalPartition<'p, R>, M: Ma /// simulation failed fn resume_and_simulate, L: ExactSizeIterator>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, lineages: L, resume_after: Option, pause_before: Option, local_partition: &mut P, - ) -> Result, ResumeError> { + ) -> Result, ResumeError> { launch::initialise_and_simulate( &args, rng, @@ -111,17 +121,16 @@ impl<'p, O: Scenario>, R: Reporter, P: LocalPartition<'p, R>, M: Ma /// /// Returns a `ContinueError` if fixing up the restarting /// simulation (incl. running the algorithm) failed - #[allow(clippy::too_many_lines)] fn fixup_for_restart, L: ExactSizeIterator>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, lineages: L, restart_at: PositiveF64, fixup_strategy: RestartFixUpStrategy, local_partition: &mut P, - ) -> Result, ResumeError> { + ) -> Result, ResumeError> { launch::initialise_and_simulate( &args, rng, diff --git a/rustcoalescence/algorithms/src/lib.rs b/rustcoalescence/algorithms/src/lib.rs index 7ed7ce88a..da7ada445 100644 --- a/rustcoalescence/algorithms/src/lib.rs +++ b/rustcoalescence/algorithms/src/lib.rs @@ -27,17 +27,18 @@ pub trait AlgorithmParamters { pub trait AlgorithmDefaults { type MathsCore: MathsCore; + type Rng: RngCore; } pub trait Algorithm< 'p, M: MathsCore, - O: Scenario, + G: RngCore, + O: Scenario, R: Reporter, P: LocalPartition<'p, R>, >: Sized + AlgorithmParamters + AlgorithmDefaults { - type Rng: RngCore; type LineageStore: LineageStore; fn get_logical_partition(args: &Self::Arguments, local_partition: &P) -> Partition; @@ -48,12 +49,12 @@ pub trait Algorithm< /// the algorithm failed fn initialise_and_simulate>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, pause_before: Option, local_partition: &mut P, - ) -> Result, Self::Error>; + ) -> Result, Self::Error>; /// # Errors /// @@ -62,14 +63,14 @@ pub trait Algorithm< #[allow(clippy::type_complexity, clippy::too_many_arguments)] fn resume_and_simulate, L: ExactSizeIterator>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, lineages: L, resume_after: Option, pause_before: Option, local_partition: &mut P, - ) -> Result, ResumeError>; + ) -> Result, ResumeError>; /// # Errors /// @@ -78,12 +79,12 @@ pub trait Algorithm< #[allow(clippy::type_complexity, clippy::too_many_arguments)] fn fixup_for_restart, L: ExactSizeIterator>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, lineages: L, restart_at: PositiveF64, fixup_strategy: RestartFixUpStrategy, local_partition: &mut P, - ) -> Result, ResumeError>; + ) -> Result, ResumeError>; } diff --git a/rustcoalescence/src/cli/simulate/dispatch/valid/algorithm_scenario.rs b/rustcoalescence/src/cli/simulate/dispatch/valid/algorithm_scenario.rs index 22d818fd6..33d35657e 100644 --- a/rustcoalescence/src/cli/simulate/dispatch/valid/algorithm_scenario.rs +++ b/rustcoalescence/src/cli/simulate/dispatch/valid/algorithm_scenario.rs @@ -41,41 +41,42 @@ use super::{super::super::BufferingSimulateArgsBuilder, rng}; macro_rules! match_scenario_algorithm { ( - ($algorithm:expr, $scenario:expr => $algscen:ident) { + ($algorithm:expr, $scenario:expr => $algscen:ident : $algscenty:ident) { $($(#[$algmeta:meta])* $algpat:pat => $algcode:block),* <=> - $($(#[$scenmeta:meta])* $scenpat:pat => $scencode:block),* + $($(#[$scenmeta:meta])* $scenpat:pat => $scencode:block => $scenty:ident),* } ) => { match_scenario_algorithm! { - impl ($algorithm, $scenario => $algscen) { + impl ($algorithm, $scenario => $algscen : $algscenty) { $($(#[$algmeta])* $algpat => $algcode),* <=> - $($(#[$scenmeta])* $scenpat => $scencode),* + $($(#[$scenmeta])* $scenpat => $scencode => $scenty),* <=> } } }; ( - impl ($algorithm:expr, $scenario:expr => $algscen:ident) { + impl ($algorithm:expr, $scenario:expr => $algscen:ident : $algscenty:ident) { $(#[$algmeta:meta])* $algpat:pat => $algcode:block, $($(#[$algmetarem:meta])* $algpatrem:pat => $algcoderem:block),+ <=> - $($(#[$scenmeta:meta])* $scenpat:pat => $scencode:block),* + $($(#[$scenmeta:meta])* $scenpat:pat => $scencode:block => $scenty:ident),* <=> $($tail:tt)* } ) => { match_scenario_algorithm! { - impl ($algorithm, $scenario => $algscen) { + impl ($algorithm, $scenario => $algscen : $algscenty) { $($(#[$algmetarem])* $algpatrem => $algcoderem),+ <=> - $($(#[$scenmeta])* $scenpat => $scencode),* + $($(#[$scenmeta])* $scenpat => $scencode => $scenty),* <=> $($tail)* $(#[$algmeta])* $algpat => { match $scenario { $($(#[$scenmeta])* $scenpat => { + type $algscenty = $scenty; let $algscen = $scencode; $algcode }),* @@ -85,10 +86,10 @@ macro_rules! match_scenario_algorithm { } }; ( - impl ($algorithm:expr, $scenario:expr => $algscen:ident) { + impl ($algorithm:expr, $scenario:expr => $algscen:ident : $algscenty:ident) { $(#[$algmeta:meta])* $algpat:pat => $algcode:block <=> - $($(#[$scenmeta:meta])* $scenpat:pat => $scencode:block),* + $($(#[$scenmeta:meta])* $scenpat:pat => $scencode:block => $scenty:ident),* <=> $($tail:tt)* } @@ -98,6 +99,7 @@ macro_rules! match_scenario_algorithm { $(#[$algmeta])* $algpat => { match $scenario { $($(#[$scenmeta])* $scenpat => { + type $algscenty = $scenty; let $algscen = $scencode; $algcode }),* @@ -107,7 +109,7 @@ macro_rules! match_scenario_algorithm { }; } -#[allow(clippy::too_many_arguments)] +#[allow(clippy::too_many_arguments, clippy::too_many_lines)] pub(super) fn dispatch<'p, R: Reporter, P: LocalPartition<'p, R>>( local_partition: P, @@ -121,13 +123,14 @@ pub(super) fn dispatch<'p, R: Reporter, P: LocalPartition<'p, R>>( normalised_args: &mut BufferingSimulateArgsBuilder, ) -> anyhow::Result { match_scenario_algorithm!( - (algorithm, scenario => scenario) + (algorithm, scenario => scenario: ScenarioTy) { #[cfg(feature = "gillespie-algorithms")] AlgorithmArgs::Gillespie(algorithm_args) => { rng::dispatch::< ::MathsCore, - GillespieAlgorithm, _, R, P, + ::Rng<_>, + GillespieAlgorithm, ScenarioTy<_, _>, R, P, >( local_partition, sample, algorithm_args, scenario, pause_before, ron_args, normalised_args, @@ -137,7 +140,8 @@ pub(super) fn dispatch<'p, R: Reporter, P: LocalPartition<'p, R>>( AlgorithmArgs::EventSkipping(algorithm_args) => { rng::dispatch::< ::MathsCore, - EventSkippingAlgorithm, _, R, P, + ::Rng<_>, + EventSkippingAlgorithm, ScenarioTy<_, _>, R, P, >( local_partition, sample, algorithm_args, scenario, pause_before, ron_args, normalised_args, @@ -147,7 +151,8 @@ pub(super) fn dispatch<'p, R: Reporter, P: LocalPartition<'p, R>>( AlgorithmArgs::Independent(algorithm_args) => { rng::dispatch::< ::MathsCore, - IndependentAlgorithm, _, R, P, + ::Rng<_>, + IndependentAlgorithm, ScenarioTy<_, _>, R, P, >( local_partition, sample, algorithm_args, scenario, pause_before, ron_args, normalised_args, @@ -157,7 +162,8 @@ pub(super) fn dispatch<'p, R: Reporter, P: LocalPartition<'p, R>>( AlgorithmArgs::Cuda(algorithm_args) => { rng::dispatch::< ::MathsCore, - CudaAlgorithm, _, R, P, + ::Rng<_>, + CudaAlgorithm, ScenarioTy<_, _>, R, P, >( local_partition, sample, algorithm_args, scenario, pause_before, ron_args, normalised_args, @@ -170,14 +176,14 @@ pub(super) fn dispatch<'p, R: Reporter, P: LocalPartition<'p, R>>( scenario_args, speciation_probability_per_generation, )? - }, + } => SpatiallyExplicitUniformTurnoverScenario, #[cfg(feature = "spatially-explicit-turnover-map-scenario")] ScenarioArgs::SpatiallyExplicitTurnoverMap(scenario_args) => { SpatiallyExplicitTurnoverMapScenario::initialise( scenario_args, speciation_probability_per_generation, )? - }, + } => SpatiallyExplicitTurnoverMapScenario, #[cfg(feature = "non-spatial-scenario")] ScenarioArgs::NonSpatial(scenario_args) => { NonSpatialScenario::initialise( @@ -185,7 +191,7 @@ pub(super) fn dispatch<'p, R: Reporter, P: LocalPartition<'p, R>>( speciation_probability_per_generation, ) .into_ok() - }, + } => NonSpatialScenario, #[cfg(feature = "almost-infinite-normal-dispersal-scenario")] ScenarioArgs::AlmostInfiniteNormalDispersal(scenario_args) => { AlmostInfiniteNormalDispersalScenario::initialise( @@ -193,7 +199,7 @@ pub(super) fn dispatch<'p, R: Reporter, P: LocalPartition<'p, R>>( speciation_probability_per_generation, ) .into_ok() - }, + } => AlmostInfiniteNormalDispersalScenario, #[cfg(feature = "almost-infinite-clark2dt-dispersal-scenario")] ScenarioArgs::AlmostInfiniteClark2DtDispersal(scenario_args) => { AlmostInfiniteClark2DtDispersalScenario::initialise( @@ -201,7 +207,7 @@ pub(super) fn dispatch<'p, R: Reporter, P: LocalPartition<'p, R>>( speciation_probability_per_generation, ) .into_ok() - }, + } => AlmostInfiniteClark2DtDispersalScenario, #[cfg(feature = "spatially-implicit-scenario")] ScenarioArgs::SpatiallyImplicit(scenario_args) => { SpatiallyImplicitScenario::initialise( @@ -209,7 +215,7 @@ pub(super) fn dispatch<'p, R: Reporter, P: LocalPartition<'p, R>>( speciation_probability_per_generation, ) .into_ok() - }, + } => SpatiallyImplicitScenario, #[cfg(feature = "wrapping-noise-scenario")] ScenarioArgs::WrappingNoise(scenario_args) => { WrappingNoiseScenario::initialise( @@ -217,6 +223,6 @@ pub(super) fn dispatch<'p, R: Reporter, P: LocalPartition<'p, R>>( speciation_probability_per_generation, ) .into_ok() - } + } => WrappingNoiseScenario }) } diff --git a/rustcoalescence/src/cli/simulate/dispatch/valid/info.rs b/rustcoalescence/src/cli/simulate/dispatch/valid/info.rs index aac4223c9..1b390136c 100644 --- a/rustcoalescence/src/cli/simulate/dispatch/valid/info.rs +++ b/rustcoalescence/src/cli/simulate/dispatch/valid/info.rs @@ -5,7 +5,7 @@ use anyhow::{Context, Result}; use rustcoalescence_algorithms::{result::SimulationOutcome, Algorithm}; use necsim_core::{ - cogs::MathsCore, + cogs::{MathsCore, RngCore}, reporter::{boolean::Boolean, Reporter}, }; use necsim_core_bond::NonNegativeF64; @@ -25,23 +25,23 @@ use super::{super::super::BufferingSimulateArgsBuilder, launch}; pub(super) fn dispatch< 'p, M: MathsCore, - A: Algorithm<'p, M, O, R, P>, - O: Scenario, + G: RngCore, + A: Algorithm<'p, M, G, O, R, P>, + O: Scenario, R: Reporter, P: LocalPartition<'p, R>, >( algorithm_args: A::Arguments, - rng: A::Rng, + rng: G, scenario: O, sample: Sample, pause_before: Option, mut local_partition: P, normalised_args: &BufferingSimulateArgsBuilder, -) -> anyhow::Result> +) -> anyhow::Result> where - Result, A::Error>: - anyhow::Context, A::Error>, + Result, A::Error>: anyhow::Context, A::Error>, { let config_str = normalised_args .build() @@ -118,7 +118,7 @@ where warn!("The simulation will report no events."); } - let result = launch::simulate::( + let result = launch::simulate::( algorithm_args, rng, scenario, diff --git a/rustcoalescence/src/cli/simulate/dispatch/valid/launch.rs b/rustcoalescence/src/cli/simulate/dispatch/valid/launch.rs index e070202d6..ceb5ff4c8 100644 --- a/rustcoalescence/src/cli/simulate/dispatch/valid/launch.rs +++ b/rustcoalescence/src/cli/simulate/dispatch/valid/launch.rs @@ -2,7 +2,10 @@ use anyhow::Context; use rustcoalescence_algorithms::{result::SimulationOutcome, Algorithm}; -use necsim_core::{cogs::MathsCore, reporter::Reporter}; +use necsim_core::{ + cogs::{MathsCore, RngCore}, + reporter::Reporter, +}; use necsim_core_bond::{NonNegativeF64, PositiveF64}; use necsim_impls_no_std::cogs::origin_sampler::pre_sampler::OriginPreSampler; use necsim_partitioning_core::LocalPartition; @@ -14,18 +17,19 @@ use crate::args::config::sample::{Sample, SampleMode, SampleModeRestart, SampleO pub(super) fn simulate< 'p, M: MathsCore, - A: Algorithm<'p, M, O, R, P>, - O: Scenario, + G: RngCore, + A: Algorithm<'p, M, G, O, R, P>, + O: Scenario, R: Reporter, P: LocalPartition<'p, R>, >( algorithm_args: A::Arguments, - rng: A::Rng, + rng: G, scenario: O, sample: Sample, pause_before: Option, local_partition: &mut P, -) -> anyhow::Result> { +) -> anyhow::Result> { let lineages = match sample.origin { SampleOrigin::Habitat => { return A::initialise_and_simulate( diff --git a/rustcoalescence/src/cli/simulate/dispatch/valid/rng.rs b/rustcoalescence/src/cli/simulate/dispatch/valid/rng.rs index 929556339..2907eddc8 100644 --- a/rustcoalescence/src/cli/simulate/dispatch/valid/rng.rs +++ b/rustcoalescence/src/cli/simulate/dispatch/valid/rng.rs @@ -27,8 +27,9 @@ use super::{ pub(super) fn dispatch< 'p, M: MathsCore, - A: Algorithm<'p, M, O, R, P>, - O: Scenario, + G: RngCore, + A: Algorithm<'p, M, G, O, R, P>, + O: Scenario, R: Reporter, P: LocalPartition<'p, R>, >( @@ -43,17 +44,16 @@ pub(super) fn dispatch< normalised_args: &mut BufferingSimulateArgsBuilder, ) -> anyhow::Result where - Result, A::Error>: - anyhow::Context, A::Error>, + Result, A::Error>: anyhow::Context, A::Error>, { - let rng: A::Rng = match parse::rng::parse_and_normalise( + let rng: G = match parse::rng::parse_and_normalise( ron_args, normalised_args, &mut A::get_logical_partition(&algorithm_args, &local_partition), )? { RngArgs::Seed(seed) => SeedableRng::seed_from_u64(seed), RngArgs::Sponge(bytes) => { - let mut seed = >::Seed::default(); + let mut seed = G::Seed::default(); let mut sponge = Keccak::v256(); sponge.update(&bytes); @@ -64,7 +64,7 @@ where RngArgs::State(state) => state.into(), }; - let result = info::dispatch::( + let result = info::dispatch::( algorithm_args, rng, scenario,