diff --git a/Project.toml b/Project.toml
index ac7502d..ea269e0 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "ProxAL"
 uuid = "12c3852d-bf95-4e7b-be60-68937c3c927b"
 authors = ["Anirudh Subramanyam <asubramanyam@anl.gov>", "Youngdae Kim <youngdae@anl.gov>", "Francois Pacaud <fpacaud@anl.gov>", "Michel Schanen <mschanen@anl.gov>"]
-version = "0.8.0"
+version = "0.9.0"
 
 [deps]
 AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
@@ -23,23 +23,14 @@ TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
 
 [compat]
 AMDGPU = "0.4"
-CUDA = "3.4"
+CUDA = "4.1"
 CatViews = "1"
-ExaAdmm = "0.3"
-ExaPF = "0.8"
-ExaTron = "2.1"
+ExaAdmm = "0.4"
+ExaPF = "0.9"
+ExaTron = "3"
 HDF5 = "0.16"
 Ipopt = "1"
 JuMP = "1"
-KernelAbstractions = "0.8"
-MPI = "0.19"
+KernelAbstractions = "0.9"
+MPI = "0.20"
 julia = "1.8"
-
-[extras]
-CUDAKernels = "72cfdca4-0801-4ab0-bf6a-d52aa10adc57"
-LazyArtifacts = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
-ROCKernels = "7eb9e9f0-4bd3-4c4c-8bef-26bd9629d9b9"
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-
-[targets]
-test = ["CUDAKernels", "LazyArtifacts", "ROCKernels", "Test"]
diff --git a/docs/src/man/usage.md b/docs/src/man/usage.md
index d883b58..0444ff3 100644
--- a/docs/src/man/usage.md
+++ b/docs/src/man/usage.md
@@ -60,4 +60,4 @@ To disable MPI, simply pass `nothing` as the last argument to `ProxALEvaluator`
 $ julia --project example.jl
 ```
 
-An example using the `ExaTron` backend with `ProxAL.CUDADevice` (GPU) can be found in `examples/exatron.jl`.
+An example using the `ExaTron` backend with `ProxAL.CUDABackend` (GPU) can be found in `examples/exatron.jl`.
diff --git a/examples/exatron.jl b/examples/exatron.jl
index ef365ce..2825d00 100644
--- a/examples/exatron.jl
+++ b/examples/exatron.jl
@@ -46,7 +46,7 @@ end
 
 # choose backend
 backend = ProxAL.JuMPBackend()
-# With ExaAdmmBackend(), CUDADevice will used
+# With ExaAdmmBackend(), CUDABackend will used
 # backend = ProxAL.AdmmBackend()
 
 # Model/formulation settings
@@ -71,7 +71,7 @@ algparams.tol = 1e-3
 algparams.decompCtgs = (K > 0)
 algparams.iterlim = 10000
 if isa(backend, ProxAL.AdmmBackend)
-    algparams.device = ProxAL.CUDADevice
+    algparams.device = ProxAL.CUDABackend
 end
 algparams.optimizer = optimizer_with_attributes(Ipopt.Optimizer, "print_level" => 0) #,  "tol" => 1e-1*algparams.tol)
 algparams.tron_rho_pq=3e3
diff --git a/src/ExaAdmmBackend/proxal_admm_gpu.jl b/src/ExaAdmmBackend/proxal_admm_gpu.jl
index 91da414..2a815ae 100644
--- a/src/ExaAdmmBackend/proxal_admm_gpu.jl
+++ b/src/ExaAdmmBackend/proxal_admm_gpu.jl
@@ -155,15 +155,15 @@ function generator_kernel_two_level(
     #wait(driver_kernel_test(device,n)(Val{n}(),max_feval,max_minor,dx,dxl,dxu,dA,dc,d_out,ndrange=(n,nblk),dependencies=Event(device)))
     # tgpu = CUDA.@timed @cuda threads=32 blocks=ngen shmem=shmem_size generator_kernel_two_level_proxal(
 # @kernel function generator_kernel_two_level_proxal(ngen::Int, gen_start::Int,
-    ev = generator_kernel_two_level_proxal_ka(device, 32)(
+    generator_kernel_two_level_proxal_ka(device, 32)(
         32, model.gen_start,
         u, xbar, zu, lu, rho_u,
         model.grid_data.pgmin, model.grid_data.pgmax,
         model.grid_data.qgmin, model.grid_data.qgmax,
         model.smin, model.smax, model.s_curr,
         model.Q_ref, model.c_ref,
-        ndrange=(ngen,ngen), dependencies=Event(device)
+        ndrange=(ngen,ngen)
     )
-    wait(ev)
+    KA.synchronize(device)
     return 0.0
 end
diff --git a/src/backends.jl b/src/backends.jl
index 7ecb5bc..c8c7732 100644
--- a/src/backends.jl
+++ b/src/backends.jl
@@ -363,8 +363,8 @@ of the structure `OPFBlocks`, used for decomposition purpose.
 #     # Instantiate model in memory
 #     target = if algparams.device == CPU
 #         ExaPF.CPU()
-#     elseif algparams.device == CUDADevice
-#         ExaPF.CUDADevice()
+#     elseif algparams.device == CUDABackend
+#         ExaPF.CUDABackend()
 #     end
 #     model = ExaPF.ProxALEvaluator(power_network, time;
 #                                   device=target)
diff --git a/src/communication.jl b/src/communication.jl
index 2afb88d..eaca6a3 100644
--- a/src/communication.jl
+++ b/src/communication.jl
@@ -92,8 +92,8 @@ function comm_neighbors!(data::AbstractArray{T,2}, blocks::AbstractBlocks, runin
                         sbuf = @view data[:,t]
                         rbuf = @view data[:,tn]
                     end
-                    push!(requests, MPI.Isend(sbuf, remote, t, comm))
-                    push!(requests, MPI.Irecv!(rbuf, remote, tn, comm))
+                    push!(requests, MPI.Isend(sbuf, comm; dest=remote, tag=t))
+                    push!(requests, MPI.Irecv!(rbuf, comm; source=remote, tag=tn))
                 end
             end
         end
@@ -121,8 +121,8 @@ function comm_neighbors!(data::AbstractArray{T,3}, blocks::AbstractBlocks, runin
                         sbuf = @view data[:,k,t]
                         rbuf = @view data[:,kn,tn]
                     end
-                    push!(requests, MPI.Isend(sbuf, remote, k, comm))
-                    push!(requests, MPI.Irecv!(rbuf, remote, kn, comm))
+                    push!(requests, MPI.Isend(sbuf, comm; dest=remote, tag=k))
+                    push!(requests, MPI.Irecv!(rbuf, comm; source=remote, tag=kn))
                 end
             end
         end
@@ -141,7 +141,7 @@ Wait until the communciation requests `requests` have been fulfilled.
 
 """
 function comm_wait!(requests::Vector{MPI.Request})
-    return MPI.Waitall!(requests)
+    return MPI.Waitall(requests)
 end
 
 function comm_wait!(requests)
diff --git a/test/Artifacts.toml b/test/Artifacts.toml
new file mode 100644
index 0000000..0cb15d6
--- /dev/null
+++ b/test/Artifacts.toml
@@ -0,0 +1,6 @@
+[ExaData]
+git-tree-sha1 = "882112a441efaa242000181b177c4005b190a4c1"
+lazy = true
+    [[ExaData.download]]
+    url = "https://web.cels.anl.gov/~mschanen/ExaData-4747d0f.tar.gz"
+    sha256 = "6d929aa12f3841c0fb14100effff42bc5edffd7602afb4844e80996f033c3fc8"
diff --git a/test/Project.toml b/test/Project.toml
new file mode 100644
index 0000000..e13e2f9
--- /dev/null
+++ b/test/Project.toml
@@ -0,0 +1,19 @@
+[deps]
+AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+CatViews = "81a5f4ea-a946-549a-aa7e-2a7f63a27d31"
+DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
+ExaAdmm = "4d6a948c-1075-4240-a564-361a5d4e22a2"
+ExaPF = "0cf0e50c-a82e-488f-ac7e-41ffdff1b8aa"
+ExaTron = "28b18bf8-76f9-41ea-81fa-0f922810b349"
+HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
+Ipopt = "b6b21f68-93f8-5de0-b562-5493be1d77c9"
+JuMP = "4076af6c-e467-56ae-b986-b466b2749572"
+KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
+LazyArtifacts = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
+Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
\ No newline at end of file
diff --git a/test/blockmodel.jl b/test/blockmodel.jl
index af86ad0..098bb7c 100644
--- a/test/blockmodel.jl
+++ b/test/blockmodel.jl
@@ -24,26 +24,22 @@ load_file = joinpath(DATA_DIR, "mp_demand", "$(case)_oneweek_168")
 
 solver_list = ["ExaAdmmCPU"]
 if CUDA.has_cuda_gpu()
-    using CUDAKernels
-    function ProxAL.ExaAdmm.KAArray{T}(n::Int, device::CUDADevice) where {T}
+    function ProxAL.ExaAdmm.KAArray{T}(n::Int, device::CUDABackend) where {T}
         return CuArray{T}(undef, n)
     end
-    function ProxAL.ExaAdmm.KAArray{T}(n1::Int, n2::Int, device::CUDADevice) where {T}
+    function ProxAL.ExaAdmm.KAArray{T}(n1::Int, n2::Int, device::CUDABackend) where {T}
         return CuArray{T}(undef, n1, n2)
     end
-    gpu_device = CUDADevice()
+    gpu_device = CUDABackend()
     push!(solver_list, "ExaAdmmGPUKA")
 elseif AMDGPU.has_rocm_gpu()
-    using ROCKernels
-    # Set for crusher login node to avoid other users
-    AMDGPU.default_device!(AMDGPU.devices()[2])
-    function ProxAL.ExaAdmm.KAArray{T}(n::Int, device::ROCDevice) where {T}
+    function ProxAL.ExaAdmm.KAArray{T}(n::Int, device::ROCBackend) where {T}
         return ROCArray{T}(undef, n)
     end
-    function ProxAL.ExaAdmm.KAArray{T}(n1::Int, n2::Int, device::ROCDevice) where {T}
+    function ProxAL.ExaAdmm.KAArray{T}(n1::Int, n2::Int, device::ROCBackend) where {T}
         return ROCArray{T}(undef, n1, n2)
     end
-    gpu_device = ROCDevice()
+    gpu_device = ROCBackend()
     push!(solver_list, "ExaAdmmGPUKA")
 end
 
diff --git a/test/convergence.jl b/test/convergence.jl
index c83fed0..331c050 100644
--- a/test/convergence.jl
+++ b/test/convergence.jl
@@ -39,26 +39,22 @@ algparams.verbose = 0
 solver_list = ["Ipopt", "ExaAdmmCPU"]
 if CUDA.has_cuda_gpu()
     push!(solver_list, "ExaAdmmGPU")
-    using CUDAKernels
-    function ProxAL.ExaAdmm.KAArray{T}(n::Int, device::CUDADevice) where {T}
+    function ProxAL.ExaAdmm.KAArray{T}(n::Int, device::CUDABackend) where {T}
         return CuArray{T}(undef, n)
     end
-    function ProxAL.ExaAdmm.KAArray{T}(n1::Int, n2::Int, device::CUDADevice) where {T}
+    function ProxAL.ExaAdmm.KAArray{T}(n1::Int, n2::Int, device::CUDABackend) where {T}
         return CuArray{T}(undef, n1, n2)
     end
-    gpu_device = CUDADevice()
+    gpu_device = CUDABackend()
     push!(solver_list, "ExaAdmmGPUKA")
 elseif AMDGPU.has_rocm_gpu()
-    using ROCKernels
-    # Set for crusher login node to avoid other users
-    AMDGPU.default_device!(AMDGPU.devices()[2])
-    function ProxAL.ExaAdmm.KAArray{T}(n::Int, device::ROCDevice) where {T}
+    function ProxAL.ExaAdmm.KAArray{T}(n::Int, device::ROCBackend) where {T}
         return ROCArray{T}(undef, n)
     end
-    function ProxAL.ExaAdmm.KAArray{T}(n1::Int, n2::Int, device::ROCDevice) where {T}
+    function ProxAL.ExaAdmm.KAArray{T}(n1::Int, n2::Int, device::ROCBackend) where {T}
         return ROCArray{T}(undef, n1, n2)
     end
-    gpu_device = ROCDevice()
+    gpu_device = ROCBackend()
     push!(solver_list, "ExaAdmmGPUKA")
 end
 if isfile(joinpath(dirname(@__FILE__), "..", "build/libhiop.so"))