diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 68dbe7cb..0ccc9968 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -87,3 +87,20 @@ steps: slurm_ntasks: 1 slurm_gpus: 1 timeout_in_minutes: 60 + + - label: "KernelAbstraction GPU kernel benchmarks" + command: + - "julia --project=gpuenv perf/kernel_bm.jl CuArray" + agents: + slurm_ntasks: 1 + slurm_gpus: 1 + timeout_in_minutes: 60 + + - label: "KernelAbstraction CPU kernel benchmarks" + command: + - "julia --project=gpuenv perf/kernel_bm.jl" + agents: + slurm_ntasks: 1 + slurm_gpus: 1 + timeout_in_minutes: 60 + diff --git a/gpuenv/Project.toml b/gpuenv/Project.toml index 13287bb8..9523d868 100644 --- a/gpuenv/Project.toml +++ b/gpuenv/Project.toml @@ -1,4 +1,5 @@ [deps] +BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" CLIMAParameters = "6eacf6c3-8458-43b9-ae03-caf5306d3d53" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" diff --git a/perf/Project.toml b/perf/Project.toml index 0cceb92b..f873269e 100644 --- a/perf/Project.toml +++ b/perf/Project.toml @@ -2,6 +2,7 @@ BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" CLIMAParameters = "6eacf6c3-8458-43b9-ae03-caf5306d3d53" JET = "c3a54625-cd67-489e-a8e7-0a5a0ff4e31b" +KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" @@ -12,3 +13,9 @@ RootSolvers = "7181ea78-2dcb-4de3-ab41-2b8ab5a31e74" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" Thermodynamics = "b60c26fb-14c3-4610-9d3e-2d17fe7ff00c" + +[compat] +CLIMAParameters = "0.8" +KernelAbstractions = "0.9" +RootSolvers = "0.4" +julia = "1.7" diff --git a/perf/kernel_bm.jl b/perf/kernel_bm.jl new file mode 100644 index 00000000..bb019906 --- /dev/null +++ b/perf/kernel_bm.jl @@ -0,0 +1,171 @@ +#= +using Revise; "CuArray" in ARGS || push!(ARGS, "CuArray"); include("perf/kernel_bm.jl") +using Revise; include("perf/kernel_bm.jl") +=# +using Test + +using KernelAbstractions +using BenchmarkTools +import KernelAbstractions as KA +using Random +using LinearAlgebra +import RootSolvers as RS + +import Thermodynamics as TD +import Thermodynamics.Parameters as TP +import CLIMAParameters as CP + +if get(ARGS, 1, "Array") == "CuArray" + import CUDA + ArrayType = CUDA.CuArray + CUDA.allowscalar(false) +else + ArrayType = Array +end + +const param_set_Float64 = TP.ThermodynamicsParameters(Float64) +const param_set_Float32 = TP.ThermodynamicsParameters(Float32) +parameter_set(::Type{Float64}) = param_set_Float64 +parameter_set(::Type{Float32}) = param_set_Float32 + +@show ArrayType + +@kernel function test_thermo_kernel!( + param_set, + dst::AbstractArray{FT}, + e_int, + ρ, + p, + q_tot, +) where {FT} + i = @index(Global) + @inbounds begin + + param_set = parameter_set(FT) + ts = TD.PhaseEquil_ρeq(param_set, FT(ρ[i]), FT(e_int[i]), FT(q_tot[i])) + + dst[1, i] = TD.air_pressure(param_set, ts) + dst[2, i] = TD.air_temperature(param_set, ts) + dst[3, i] = TD.air_density(param_set, ts) + dst[4, i] = TD.specific_volume(param_set, ts) + dst[5, i] = TD.soundspeed_air(param_set, ts) + dst[6, i] = TD.total_specific_humidity(param_set, ts) + dst[7, i] = TD.liquid_specific_humidity(param_set, ts) + dst[8, i] = TD.ice_specific_humidity(param_set, ts) + dst[9, i] = TD.vapor_specific_humidity(param_set, ts) + dst[10, i] = TD.total_energy(param_set, ts, FT(0), FT(0)) + dst[11, i] = TD.internal_energy(param_set, ts) + dst[12, i] = TD.internal_energy_sat(param_set, ts) + dst[13, i] = TD.internal_energy_dry(param_set, ts) + dst[14, i] = TD.internal_energy_vapor(param_set, ts) + dst[15, i] = TD.internal_energy_liquid(param_set, ts) + dst[16, i] = TD.internal_energy_ice(param_set, ts) + dst[17, i] = TD.cp_m(param_set, ts) + dst[18, i] = TD.cv_m(param_set, ts) + dst[19, i] = TD.gas_constant_air(param_set, ts) + # dst[20, i] = TD.gas_constants(param_set, ts) + dst[21, i] = TD.latent_heat_vapor(param_set, ts) + dst[22, i] = TD.latent_heat_sublim(param_set, ts) + dst[23, i] = TD.latent_heat_fusion(param_set, ts) + dst[24, i] = TD.latent_heat_liq_ice(param_set, ts) + dst[25, i] = TD.saturation_vapor_pressure(param_set, ts, TD.Liquid()) + # dst[26, i] = TD.q_vap_saturation_generic(param_set, ts) + dst[27, i] = TD.q_vap_saturation(param_set, ts) + dst[28, i] = TD.q_vap_saturation_liquid(param_set, ts) + dst[29, i] = TD.q_vap_saturation_ice(param_set, ts) + dst[30, i] = TD.saturation_excess(param_set, ts) + dst[31, i] = TD.supersaturation(param_set, ts, TD.Liquid()) + dst[32, i] = TD.liquid_fraction(param_set, ts) + dst[33, i] = TD.PhasePartition_equil(param_set, ts).tot + dst[34, i] = TD.dry_pottemp(param_set, ts) + dst[35, i] = TD.virtual_pottemp(param_set, ts) + dst[36, i] = TD.virtual_dry_static_energy(param_set, ts, FT(0)) + dst[37, i] = TD.exner(param_set, ts) + # dst[38, i] = TD.shum_to_mixing_ratio(param_set, ts) + dst[39, i] = TD.mixing_ratios(param_set, ts).tot + dst[40, i] = TD.vol_vapor_mixing_ratio(param_set, ts) + dst[41, i] = TD.liquid_ice_pottemp(param_set, ts) + dst[42, i] = TD.liquid_ice_pottemp_sat(param_set, ts) + dst[43, i] = TD.relative_humidity(param_set, ts) + dst[44, i] = TD.virtual_temperature(param_set, ts) + dst[45, i] = TD.condensate(param_set, ts) + dst[46, i] = TD.has_condensate(param_set, ts) + dst[47, i] = TD.specific_enthalpy(param_set, ts) + dst[48, i] = TD.total_specific_enthalpy(param_set, ts, FT(0)) + dst[49, i] = TD.moist_static_energy(param_set, ts, FT(0)) + dst[50, i] = TD.specific_entropy(param_set, ts) + dst[51, i] = TD.saturated(param_set, ts) + + end +end + +# Since we use `rand` to generate the ProfileSet, +# just initialize on the CPU, and provide convert +# function to move arrays to the GPU. +convert_profile_set(ps::TD.TestedProfiles.ProfileSet, ArrayType, slice) = + TD.TestedProfiles.ProfileSet( + ArrayType(ps.z[slice]), + ArrayType(ps.T[slice]), + ArrayType(ps.p[slice]), + ArrayType(ps.RS[slice]), + ArrayType(ps.e_int[slice]), + ArrayType(ps.h[slice]), + ArrayType(ps.ρ[slice]), + ArrayType(ps.θ_liq_ice[slice]), + ArrayType(ps.q_tot[slice]), + ArrayType(ps.q_liq[slice]), + ArrayType(ps.q_ice[slice]), + TD.PhasePartition.(ps.q_tot[slice], ps.q_liq[slice], ps.q_ice[slice]), + ArrayType(ps.RH[slice]), + ArrayType(ps.e_pot[slice]), + ArrayType(ps.u[slice]), + ArrayType(ps.v[slice]), + ArrayType(ps.w[slice]), + ArrayType(ps.e_kin[slice]), + ps.phase_type, + ) + +function test_thermo!(param_set, d_dst, profiles) + (; e_int, ρ, p, q_tot) = profiles + n_profiles = length(profiles.z) + ndrange = (n_profiles,) + backend = KA.get_backend(d_dst) + kernel! = test_thermo_kernel!(backend) + kernel!(param_set, d_dst, e_int, ρ, p, q_tot; ndrange = ndrange) + KA.synchronize(backend) + return nothing +end + +@testset "Thermodynamics - kernels" begin + FT = Float32 + param_set = parameter_set(FT) + profiles = TD.TestedProfiles.PhaseEquilProfiles(param_set, Array) + slice = Colon() + profiles = convert_profile_set(profiles, ArrayType, slice) + + n_profiles = length(profiles.z) + n_vars = length(propertynames(profiles)) + d_dst = ArrayType(Array{FT}(undef, 51, n_profiles)) + fill!(d_dst, 0) + + test_thermo!(param_set, d_dst, profiles) # compile first + trial = + BenchmarkTools.@benchmark test_thermo!($param_set, $d_dst, $profiles) + show(stdout, MIME("text/plain"), trial) + + (; e_int, ρ, p, q_tot) = profiles + # Test + ts_cpu = + TD.PhaseEquil_ρeq.( + param_set, + Array{FT}(ρ), + Array{FT}(e_int), + Array{FT}(q_tot), + ) + @test all(Array(d_dst)[1, :] .≈ TD.air_pressure.(param_set, ts_cpu)) + @test all(Array(d_dst)[2, :] .≈ TD.air_temperature.(param_set, ts_cpu)) + +end + +rm(joinpath(@__DIR__, "logfilepath_Float32.toml"); force = true) +rm(joinpath(@__DIR__, "logfilepath_Float64.toml"); force = true) diff --git a/src/relations.jl b/src/relations.jl index 7a1855ad..f4b83205 100644 --- a/src/relations.jl +++ b/src/relations.jl @@ -1385,7 +1385,7 @@ function PhasePartition_equil( return PhasePartition_equil(param_set, T, ρ, q_tot, p_vap_sat, λ) end -PhasePartition_equil(param_set::APS, ts::AbstractPhaseNonEquil) = +PhasePartition_equil(param_set::APS, ts::ThermodynamicState) = PhasePartition_equil( param_set, air_temperature(param_set, ts), @@ -2179,6 +2179,8 @@ function latent_heat_liq_ice( LH_s0::FT = TP.LH_s0(param_set) return LH_v0 * q.liq + LH_s0 * q.ice end +latent_heat_liq_ice(param_set::APS, ts::ThermodynamicState) = + latent_heat_liq_ice(param_set, PhasePartition(param_set, ts)) """ @@ -2735,6 +2737,8 @@ function vol_vapor_mixing_ratio( q_vap = vapor_specific_humidity(q) return molmass_ratio * shum_to_mixing_ratio(q_vap, q.tot) end +vol_vapor_mixing_ratio(param_set, ts::ThermodynamicState) = + vol_vapor_mixing_ratio(param_set, PhasePartition(param_set, ts)) """ relative_humidity(param_set, T, p, phase_type, q::PhasePartition) diff --git a/test/runtests_gpu.jl b/test/runtests_gpu.jl index 65c25cd7..ba47da66 100644 --- a/test/runtests_gpu.jl +++ b/test/runtests_gpu.jl @@ -102,7 +102,7 @@ convert_profile_set(ps::TD.TestedProfiles.ProfileSet, ArrayType, slice) = ndrange = (n_profiles,) backend = KA.get_backend(d_dst) kernel! = test_thermo_kernel!(backend) - event = kernel!(param_set, d_dst, e_int, ρ, p, q_tot, ndrange = ndrange) + kernel!(param_set, d_dst, e_int, ρ, p, q_tot; ndrange = ndrange) KA.synchronize(backend) ts_cpu =