From 7c6cb202b55edce8b0de4e1f37c9cf5abd4b0612 Mon Sep 17 00:00:00 2001 From: Michel Schanen Date: Thu, 29 Jun 2023 10:51:25 -0400 Subject: [PATCH] Add @inbounds fixes hang --- Project.toml | 2 +- src/ExaAdmmBackend/proxal_admm_ka.jl | 54 +++++++++++++++------------- 2 files changed, 30 insertions(+), 26 deletions(-) diff --git a/Project.toml b/Project.toml index 44ab1f2..8f8bc16 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "ProxAL" uuid = "12c3852d-bf95-4e7b-be60-68937c3c927b" authors = ["Anirudh Subramanyam ", "Youngdae Kim ", "Francois Pacaud ", "Michel Schanen "] -version = "0.9.2" +version = "0.9.3" [deps] AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" diff --git a/src/ExaAdmmBackend/proxal_admm_ka.jl b/src/ExaAdmmBackend/proxal_admm_ka.jl index b5b3eb9..b147e1f 100644 --- a/src/ExaAdmmBackend/proxal_admm_ka.jl +++ b/src/ExaAdmmBackend/proxal_admm_ka.jl @@ -1,37 +1,37 @@ @kernel function generator_kernel_two_level_proxal_ka(ngen::Int, gen_start::Int, u, xbar, z, l, rho, - pgmin, pgmax, - qgmin, qgmax, - smin, smax, s, + @Const(pgmin), @Const(pgmax), + @Const(qgmin), @Const(qgmax), + @Const(smin), @Const(smax), s, _A, _c) tx = @index(Local, Linear) I = @index(Group, Linear) - n = 2 + x = @localmem Float64 (2,) + xl = @localmem Float64 (2,) + xu = @localmem Float64 (2,) - if I <= ngen - x = @localmem Float64 (n,) - xl = @localmem Float64 (n,) - xu = @localmem Float64 (n,) + A = @localmem Float64 (2,2) + c = @localmem Float64 (2,) - A = @localmem Float64 (n,n) - c = @localmem Float64 (n,) + @synchronize + if I <= ngen pg_idx = gen_start + 2*(I-1) qg_idx = gen_start + 2*(I-1) + 1 - u[qg_idx] = max(qgmin[I], + @inbounds u[qg_idx] = max(qgmin[I], min(qgmax[I], (-(l[qg_idx] + rho[qg_idx]*(-xbar[qg_idx] + z[qg_idx]))) / rho[qg_idx])) A_start = 4*(I-1) c_start = 2*(I-1) - if tx <= n + if tx <= 2 @inbounds begin - for j=1:n - A[tx,j] = _A[n*(j-1)+tx + A_start] + for j=1:2 + A[tx,j] = _A[2*(j-1)+tx + A_start] end c[tx] = _c[tx + c_start] @@ -44,18 +44,22 @@ @synchronize @inbounds begin - xl[1] = pgmin[I] - xu[1] = pgmax[I] - xl[2] = smin[I] - xu[2] = smax[I] - x[1] = min(xu[1], max(xl[1], u[pg_idx])) - x[2] = min(xu[2], max(xl[2], s[I])) + if tx == 1 + xl[1] = pgmin[I] + xu[1] = pgmax[I] + xl[2] = smin[I] + xu[2] = smax[I] + x[1] = min(xu[1], max(xl[1], u[pg_idx])) + x[2] = min(xu[2], max(xl[2], s[I])) + end @synchronize - status, minor_iter = ExaTron.ExaTronKAKernels.tron_qp_kernel(n, 500, 200, 1e-6, 1.0, x, xl, xu, A, c, tx) - - u[pg_idx] = x[1] - s[I] = x[2] + status, minor_iter = ExaAdmm.ExaTron.ExaTronKAKernels.tron_qp_kernel(2, 500, 200, 1e-6, 1.0, x, xl, xu, A, c, tx) + @synchronize + if tx == 1 + u[pg_idx] = x[1] + s[I] = x[2] + end end end end @@ -68,7 +72,7 @@ function generator_kernel_two_level( ngen = model.grid_data.ngen - generator_kernel_two_level_proxal_ka(device, 32, 32*ngen)( + generator_kernel_two_level_proxal_ka(device, 2, 2*ngen)( ngen, model.gen_start, u, xbar, zu, lu, rho_u, model.grid_data.pgmin, model.grid_data.pgmax,