diff --git a/Project.toml b/Project.toml index ac7502d..ea269e0 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "ProxAL" uuid = "12c3852d-bf95-4e7b-be60-68937c3c927b" authors = ["Anirudh Subramanyam ", "Youngdae Kim ", "Francois Pacaud ", "Michel Schanen "] -version = "0.8.0" +version = "0.9.0" [deps] AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" @@ -23,23 +23,14 @@ TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76" [compat] AMDGPU = "0.4" -CUDA = "3.4" +CUDA = "4.1" CatViews = "1" -ExaAdmm = "0.3" -ExaPF = "0.8" -ExaTron = "2.1" +ExaAdmm = "0.4" +ExaPF = "0.9" +ExaTron = "3" HDF5 = "0.16" Ipopt = "1" JuMP = "1" -KernelAbstractions = "0.8" -MPI = "0.19" +KernelAbstractions = "0.9" +MPI = "0.20" julia = "1.8" - -[extras] -CUDAKernels = "72cfdca4-0801-4ab0-bf6a-d52aa10adc57" -LazyArtifacts = "4af54fe1-eca0-43a8-85a7-787d91b784e3" -ROCKernels = "7eb9e9f0-4bd3-4c4c-8bef-26bd9629d9b9" -Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" - -[targets] -test = ["CUDAKernels", "LazyArtifacts", "ROCKernels", "Test"] diff --git a/docs/src/man/usage.md b/docs/src/man/usage.md index d883b58..0444ff3 100644 --- a/docs/src/man/usage.md +++ b/docs/src/man/usage.md @@ -60,4 +60,4 @@ To disable MPI, simply pass `nothing` as the last argument to `ProxALEvaluator` $ julia --project example.jl ``` -An example using the `ExaTron` backend with `ProxAL.CUDADevice` (GPU) can be found in `examples/exatron.jl`. +An example using the `ExaTron` backend with `ProxAL.CUDABackend` (GPU) can be found in `examples/exatron.jl`. diff --git a/examples/exatron.jl b/examples/exatron.jl index ef365ce..2825d00 100644 --- a/examples/exatron.jl +++ b/examples/exatron.jl @@ -46,7 +46,7 @@ end # choose backend backend = ProxAL.JuMPBackend() -# With ExaAdmmBackend(), CUDADevice will used +# With ExaAdmmBackend(), CUDABackend will used # backend = ProxAL.AdmmBackend() # Model/formulation settings @@ -71,7 +71,7 @@ algparams.tol = 1e-3 algparams.decompCtgs = (K > 0) algparams.iterlim = 10000 if isa(backend, ProxAL.AdmmBackend) - algparams.device = ProxAL.CUDADevice + algparams.device = ProxAL.CUDABackend end algparams.optimizer = optimizer_with_attributes(Ipopt.Optimizer, "print_level" => 0) #, "tol" => 1e-1*algparams.tol) algparams.tron_rho_pq=3e3 diff --git a/src/ExaAdmmBackend/proxal_admm_gpu.jl b/src/ExaAdmmBackend/proxal_admm_gpu.jl index 91da414..2a815ae 100644 --- a/src/ExaAdmmBackend/proxal_admm_gpu.jl +++ b/src/ExaAdmmBackend/proxal_admm_gpu.jl @@ -155,15 +155,15 @@ function generator_kernel_two_level( #wait(driver_kernel_test(device,n)(Val{n}(),max_feval,max_minor,dx,dxl,dxu,dA,dc,d_out,ndrange=(n,nblk),dependencies=Event(device))) # tgpu = CUDA.@timed @cuda threads=32 blocks=ngen shmem=shmem_size generator_kernel_two_level_proxal( # @kernel function generator_kernel_two_level_proxal(ngen::Int, gen_start::Int, - ev = generator_kernel_two_level_proxal_ka(device, 32)( + generator_kernel_two_level_proxal_ka(device, 32)( 32, model.gen_start, u, xbar, zu, lu, rho_u, model.grid_data.pgmin, model.grid_data.pgmax, model.grid_data.qgmin, model.grid_data.qgmax, model.smin, model.smax, model.s_curr, model.Q_ref, model.c_ref, - ndrange=(ngen,ngen), dependencies=Event(device) + ndrange=(ngen,ngen) ) - wait(ev) + KA.synchronize(device) return 0.0 end diff --git a/src/backends.jl b/src/backends.jl index 7ecb5bc..c8c7732 100644 --- a/src/backends.jl +++ b/src/backends.jl @@ -363,8 +363,8 @@ of the structure `OPFBlocks`, used for decomposition purpose. # # Instantiate model in memory # target = if algparams.device == CPU # ExaPF.CPU() -# elseif algparams.device == CUDADevice -# ExaPF.CUDADevice() +# elseif algparams.device == CUDABackend +# ExaPF.CUDABackend() # end # model = ExaPF.ProxALEvaluator(power_network, time; # device=target) diff --git a/src/communication.jl b/src/communication.jl index 2afb88d..eaca6a3 100644 --- a/src/communication.jl +++ b/src/communication.jl @@ -92,8 +92,8 @@ function comm_neighbors!(data::AbstractArray{T,2}, blocks::AbstractBlocks, runin sbuf = @view data[:,t] rbuf = @view data[:,tn] end - push!(requests, MPI.Isend(sbuf, remote, t, comm)) - push!(requests, MPI.Irecv!(rbuf, remote, tn, comm)) + push!(requests, MPI.Isend(sbuf, comm; dest=remote, tag=t)) + push!(requests, MPI.Irecv!(rbuf, comm; source=remote, tag=tn)) end end end @@ -121,8 +121,8 @@ function comm_neighbors!(data::AbstractArray{T,3}, blocks::AbstractBlocks, runin sbuf = @view data[:,k,t] rbuf = @view data[:,kn,tn] end - push!(requests, MPI.Isend(sbuf, remote, k, comm)) - push!(requests, MPI.Irecv!(rbuf, remote, kn, comm)) + push!(requests, MPI.Isend(sbuf, comm; dest=remote, tag=k)) + push!(requests, MPI.Irecv!(rbuf, comm; source=remote, tag=kn)) end end end @@ -141,7 +141,7 @@ Wait until the communciation requests `requests` have been fulfilled. """ function comm_wait!(requests::Vector{MPI.Request}) - return MPI.Waitall!(requests) + return MPI.Waitall(requests) end function comm_wait!(requests) diff --git a/test/Artifacts.toml b/test/Artifacts.toml new file mode 100644 index 0000000..0cb15d6 --- /dev/null +++ b/test/Artifacts.toml @@ -0,0 +1,6 @@ +[ExaData] +git-tree-sha1 = "882112a441efaa242000181b177c4005b190a4c1" +lazy = true + [[ExaData.download]] + url = "https://web.cels.anl.gov/~mschanen/ExaData-4747d0f.tar.gz" + sha256 = "6d929aa12f3841c0fb14100effff42bc5edffd7602afb4844e80996f033c3fc8" diff --git a/test/Project.toml b/test/Project.toml new file mode 100644 index 0000000..e13e2f9 --- /dev/null +++ b/test/Project.toml @@ -0,0 +1,19 @@ +[deps] +AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" +CatViews = "81a5f4ea-a946-549a-aa7e-2a7f63a27d31" +DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" +ExaAdmm = "4d6a948c-1075-4240-a564-361a5d4e22a2" +ExaPF = "0cf0e50c-a82e-488f-ac7e-41ffdff1b8aa" +ExaTron = "28b18bf8-76f9-41ea-81fa-0f922810b349" +HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" +Ipopt = "b6b21f68-93f8-5de0-b562-5493be1d77c9" +JuMP = "4076af6c-e467-56ae-b986-b466b2749572" +KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" +LazyArtifacts = "4af54fe1-eca0-43a8-85a7-787d91b784e3" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" +Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" +SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76" \ No newline at end of file diff --git a/test/blockmodel.jl b/test/blockmodel.jl index af86ad0..098bb7c 100644 --- a/test/blockmodel.jl +++ b/test/blockmodel.jl @@ -24,26 +24,22 @@ load_file = joinpath(DATA_DIR, "mp_demand", "$(case)_oneweek_168") solver_list = ["ExaAdmmCPU"] if CUDA.has_cuda_gpu() - using CUDAKernels - function ProxAL.ExaAdmm.KAArray{T}(n::Int, device::CUDADevice) where {T} + function ProxAL.ExaAdmm.KAArray{T}(n::Int, device::CUDABackend) where {T} return CuArray{T}(undef, n) end - function ProxAL.ExaAdmm.KAArray{T}(n1::Int, n2::Int, device::CUDADevice) where {T} + function ProxAL.ExaAdmm.KAArray{T}(n1::Int, n2::Int, device::CUDABackend) where {T} return CuArray{T}(undef, n1, n2) end - gpu_device = CUDADevice() + gpu_device = CUDABackend() push!(solver_list, "ExaAdmmGPUKA") elseif AMDGPU.has_rocm_gpu() - using ROCKernels - # Set for crusher login node to avoid other users - AMDGPU.default_device!(AMDGPU.devices()[2]) - function ProxAL.ExaAdmm.KAArray{T}(n::Int, device::ROCDevice) where {T} + function ProxAL.ExaAdmm.KAArray{T}(n::Int, device::ROCBackend) where {T} return ROCArray{T}(undef, n) end - function ProxAL.ExaAdmm.KAArray{T}(n1::Int, n2::Int, device::ROCDevice) where {T} + function ProxAL.ExaAdmm.KAArray{T}(n1::Int, n2::Int, device::ROCBackend) where {T} return ROCArray{T}(undef, n1, n2) end - gpu_device = ROCDevice() + gpu_device = ROCBackend() push!(solver_list, "ExaAdmmGPUKA") end diff --git a/test/convergence.jl b/test/convergence.jl index c83fed0..331c050 100644 --- a/test/convergence.jl +++ b/test/convergence.jl @@ -39,26 +39,22 @@ algparams.verbose = 0 solver_list = ["Ipopt", "ExaAdmmCPU"] if CUDA.has_cuda_gpu() push!(solver_list, "ExaAdmmGPU") - using CUDAKernels - function ProxAL.ExaAdmm.KAArray{T}(n::Int, device::CUDADevice) where {T} + function ProxAL.ExaAdmm.KAArray{T}(n::Int, device::CUDABackend) where {T} return CuArray{T}(undef, n) end - function ProxAL.ExaAdmm.KAArray{T}(n1::Int, n2::Int, device::CUDADevice) where {T} + function ProxAL.ExaAdmm.KAArray{T}(n1::Int, n2::Int, device::CUDABackend) where {T} return CuArray{T}(undef, n1, n2) end - gpu_device = CUDADevice() + gpu_device = CUDABackend() push!(solver_list, "ExaAdmmGPUKA") elseif AMDGPU.has_rocm_gpu() - using ROCKernels - # Set for crusher login node to avoid other users - AMDGPU.default_device!(AMDGPU.devices()[2]) - function ProxAL.ExaAdmm.KAArray{T}(n::Int, device::ROCDevice) where {T} + function ProxAL.ExaAdmm.KAArray{T}(n::Int, device::ROCBackend) where {T} return ROCArray{T}(undef, n) end - function ProxAL.ExaAdmm.KAArray{T}(n1::Int, n2::Int, device::ROCDevice) where {T} + function ProxAL.ExaAdmm.KAArray{T}(n1::Int, n2::Int, device::ROCBackend) where {T} return ROCArray{T}(undef, n1, n2) end - gpu_device = ROCDevice() + gpu_device = ROCBackend() push!(solver_list, "ExaAdmmGPUKA") end if isfile(joinpath(dirname(@__FILE__), "..", "build/libhiop.so"))