From 26fce1410b8bd4f82fc72cb48b60967f7b67136d Mon Sep 17 00:00:00 2001 From: Kibaek Kim Date: Fri, 21 May 2021 20:30:25 -0500 Subject: [PATCH 1/6] resolving #21; tested on cpu for now --- Project.toml | 2 +- src/admm/acopf_admm_gpu.jl | 16 +- src/admm/acopf_admm_gpu_two_level.jl | 474 ++++++++++++--------------- src/admm/environment.jl | 98 +++++- 4 files changed, 301 insertions(+), 289 deletions(-) diff --git a/Project.toml b/Project.toml index 559b47b..bcc2cfe 100644 --- a/Project.toml +++ b/Project.toml @@ -10,4 +10,4 @@ DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" -Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" \ No newline at end of file +Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" diff --git a/src/admm/acopf_admm_gpu.jl b/src/admm/acopf_admm_gpu.jl index 7ea9623..be94bbd 100644 --- a/src/admm/acopf_admm_gpu.jl +++ b/src/admm/acopf_admm_gpu.jl @@ -120,8 +120,9 @@ function get_branch_data(data; use_gpu=false) end end -function init_solution!(sol, data, ybus, gen_start, line_start, - rho_pq, rho_va, wRIij) +function init_solution!(env, ybus, rho_pq, rho_va) + sol, data, model = env.solution, env.data, env.model + lines = data.lines buses = data.buses BusIdx = data.BusIdx @@ -132,10 +133,9 @@ function init_solution!(sol, data, ybus, gen_start, line_start, YttR = ybus.YttR; YttI = ybus.YttI YftR = ybus.YftR; YftI = ybus.YftI YtfR = ybus.YtfR; YtfI = ybus.YtfI - YshR = ybus.YshR; YshI = ybus.YshI for g=1:ngen - pg_idx = gen_start + 2*(g-1) + pg_idx = model.gen_start + 2*(g-1) #u_curr[pg_idx] = 0.5*(data.genvec.Pmin[g] + data.genvec.Pmax[g]) #u_curr[pg_idx+1] = 0.5*(data.genvec.Qmin[g] + data.genvec.Qmax[g]) sol.v_curr[pg_idx] = 0.5*(data.generators[g].Pmin + data.generators[g].Pmax) @@ -151,7 +151,7 @@ function init_solution!(sol, data, ybus, gen_start, line_start, wji0 = (buses[BusIdx[lines[l].to]].Vmax^2 + buses[BusIdx[lines[l].to]].Vmin^2) / 2 wR0 = sqrt(wij0 * wji0) - pij_idx = line_start + 8*(l-1) + pij_idx = model.line_start + 8*(l-1) sol.u_curr[pij_idx] = YffR[l] * wij0 + YftR[l] * wR0 sol.u_curr[pij_idx+1] = -YffI[l] * wij0 - YftI[l] * wR0 sol.u_curr[pij_idx+2] = YttR[l] * wji0 + YtfR[l] * wR0 @@ -162,8 +162,8 @@ function init_solution!(sol, data, ybus, gen_start, line_start, u_curr[pij_idx+6] = 0.0 u_curr[pij_idx+7] = 0.0 =# - wRIij[2*(l-1)+1] = wR0 - wRIij[2*l] = 0.0 + # wRIij[2*(l-1)+1] = wR0 + # wRIij[2*l] = 0.0 sol.v_curr[pij_idx+4] = wij0 sol.v_curr[pij_idx+5] = wji0 @@ -223,7 +223,7 @@ function dual_residual_kernel(n::Int, rd::CuDeviceArray{Float64,1}, return end -function check_linelimit_violation(data, u::Array{Float64}) +function check_linelimit_violation(data, u) lines = data.lines nline = length(data.lines) line_start = 2*length(data.generators) + 1 diff --git a/src/admm/acopf_admm_gpu_two_level.jl b/src/admm/acopf_admm_gpu_two_level.jl index 0c6d1f2..a3a678e 100644 --- a/src/admm/acopf_admm_gpu_two_level.jl +++ b/src/admm/acopf_admm_gpu_two_level.jl @@ -1,9 +1,26 @@ -function check_generator_bounds(data, gen_start, xbar) +function check_generator_bounds(data, gen_start, xbar; use_gpu=false) ngen = length(data.generators) - pgmin = data.genvec.Pmin - pgmax = data.genvec.Pmax - qgmin = data.genvec.Qmin - qgmax = data.genvec.Qmax + + if use_gpu + pgmin = CuArray{Float64}(undef, ngen) + pgmax = CuArray{Float64}(undef, ngen) + qgmin = CuArray{Float64}(undef, ngen) + qgmax = CuArray{Float64}(undef, ngen) + else + pgmin = Array{Float64}(undef, ngen) + pgmax = Array{Float64}(undef, ngen) + qgmin = Array{Float64}(undef, ngen) + qgmax = Array{Float64}(undef, ngen) + end + + Pmin = Float64[data.generators[g].Pmin for g in 1:ngen] + Pmax = Float64[data.generators[g].Pmax for g in 1:ngen] + Qmin = Float64[data.generators[g].Qmin for g in 1:ngen] + Qmax = Float64[data.generators[g].Qmax for g in 1:ngen] + copyto!(pgmin, Pmin) + copyto!(pgmax, Pmax) + copyto!(qgmin, Qmin) + copyto!(qgmax, Qmax) max_viol_real = 0.0 max_viol_reactive = 0.0 @@ -91,8 +108,10 @@ function get_branch_bus_index(data; use_gpu=false) end end -function init_values_two_level(data, ybus, gen_start, line_start, bus_start, - rho_pq, rho_va, u, v, xbar, lu, lv, rho_u, rho_v, wRIij) +function init_values_two_level!(env, ybus, rho_pq, rho_va) + sol, data, model = env.solution, env.data, env.model + gen_start, line_start, bus_start = model.gen_start, model.line_start, model.bus_start + lines = data.lines buses = data.buses BusIdx = data.BusIdx @@ -104,7 +123,11 @@ function init_values_two_level(data, ybus, gen_start, line_start, bus_start, YttR = ybus.YttR; YttI = ybus.YttI YftR = ybus.YftR; YftI = ybus.YftI YtfR = ybus.YtfR; YtfI = ybus.YtfI - YshR = ybus.YshR; YshI = ybus.YshI + + u_curr = view(sol.x_curr, 1:model.nvar_u) + v_curr = view(sol.x_curr, model.nvar_u+1:model.nvar) + rho_u = view(sol.rho, 1:model.nvar_u) + rho_v = view(sol.rho, model.nvar_u+1:model.nvar) rho_u .= rho_pq rho_v .= rho_pq @@ -112,10 +135,11 @@ function init_values_two_level(data, ybus, gen_start, line_start, bus_start, for g=1:ngen pg_idx = gen_start + 2*(g-1) - xbar[pg_idx] = 0.5*(data.genvec.Pmin[g] + data.genvec.Pmax[g]) - xbar[pg_idx+1] = 0.5*(data.genvec.Qmin[g] + data.genvec.Qmax[g]) + sol.xbar_curr[pg_idx] = 0.5*(data.generators[g].Pmin + data.generators[g].Pmax) + sol.xbar_curr[pg_idx+1] = 0.5*(data.generators[g].Qmin + data.generators[g].Qmax) end + fill!(sol.wRIij, 0.0) for l=1:nline fr_idx = BusIdx[lines[l].from] to_idx = BusIdx[lines[l].to] @@ -126,24 +150,23 @@ function init_values_two_level(data, ybus, gen_start, line_start, bus_start, u_pij_idx = line_start + 8*(l-1) v_pij_idx = line_start + 4*(l-1) - v[v_pij_idx] = u[u_pij_idx] = YffR[l] * wij0 + YftR[l] * wR0 - v[v_pij_idx+1] = u[u_pij_idx+1] = -YffI[l] * wij0 - YftI[l] * wR0 - v[v_pij_idx+2] = u[u_pij_idx+2] = YttR[l] * wji0 + YtfR[l] * wR0 - v[v_pij_idx+3] = u[u_pij_idx+3] = -YttI[l] * wji0 - YtfI[l] * wR0 + v_curr[v_pij_idx] = u_curr[u_pij_idx] = YffR[l] * wij0 + YftR[l] * wR0 + v_curr[v_pij_idx+1] = u_curr[u_pij_idx+1] = -YffI[l] * wij0 - YftI[l] * wR0 + v_curr[v_pij_idx+2] = u_curr[u_pij_idx+2] = YttR[l] * wji0 + YtfR[l] * wR0 + v_curr[v_pij_idx+3] = u_curr[u_pij_idx+3] = -YttI[l] * wji0 - YtfI[l] * wR0 rho_u[u_pij_idx+4:u_pij_idx+7] .= rho_va - wRIij[2*(l-1)+1] = wR0 - wRIij[2*l] = 0.0 + sol.wRIij[2*(l-1)+1] = wR0 + sol.wRIij[2*l] = 0.0 end for b=1:nbus - xbar[bus_start + 2*(b-1)] = (buses[b].Vmax^2 + buses[b].Vmin^2) / 2 - xbar[bus_start + 2*(b-1)+1] = 0.0 + sol.xbar_curr[bus_start + 2*(b-1)] = (buses[b].Vmax^2 + buses[b].Vmin^2) / 2 + sol.xbar_curr[bus_start + 2*(b-1)+1] = 0.0 end - lu .= 0.0 - lv .= 0.0 + sol.l_curr .= 0.0 return end @@ -492,13 +515,12 @@ function update_rho(rho, rp, rp_old, theta, gamma) end end -function admm_rect_gpu_two_level(case; outer_iterlim=10, inner_iterlim=800, rho_pq=400.0, rho_va=40000.0, scale=1e-4, - use_gpu=false, use_polar=true, gpu_no=1, outer_eps=2*1e-4) - data = opf_loaddata(case) +function admm_restart_two_level(env::AdmmEnv; outer_iterlim=10, inner_iterlim=800, scale=1e-4) + if env.use_gpu + CUDA.device!(env.gpu_no) + end - ngen = length(data.generators) - nline = length(data.lines) - nbus = length(data.buses) + data, par, mod, sol = env.data, env.params, env.model, env.solution # ------------------------------------------------------------------- # Variables are of two types: u and v @@ -511,146 +533,50 @@ function admm_rect_gpu_two_level(case; outer_iterlim=10, inner_iterlim=800, rho_ # xbar: same as v # ------------------------------------------------------------------- - nvar_u = 2*ngen + 8*nline - nvar_v = 2*ngen + 4*nline + 2*nbus - nvar = nvar_u + nvar_v - gen_start = 1 - line_start = 2*ngen + 1 - bus_start = 2*ngen + 4*nline + 1 # this is for varibles of type v. - - baseMVA = data.baseMVA - n = (use_polar == true) ? 4 : 10 - mu_max = 1e8 - rho_max = 1e6 - rho_min_pq = 5.0 - rho_min_w = 5.0 - eps_rp = 1e-4 - eps_rp_min = 1e-5 - rt_inc = 2.0 - rt_dec = 2.0 - eta = 0.99 - Kf = 100 - Kf_mean = 10 - - if use_gpu - CUDA.device!(gpu_no) - end - - ybus = Ybus{Array{Float64}}(computeAdmitances(data.lines, data.buses, data.baseMVA; VI=Array{Int}, VD=Array{Float64})...) - - pgmin, pgmax, qgmin, qgmax, c2, c1, c0 = get_generator_data(data) - YshR, YshI, YffR, YffI, YftR, YftI, YttR, YttI, YtfR, YtfI, FrBound, ToBound = get_branch_data(data) - FrStart, FrIdx, ToStart, ToIdx, GenStart, GenIdx, Pd, Qd = get_bus_data(data) - brBusIdx = get_branch_bus_index(data) - - cu_pgmin, cu_pgmax, cu_qgmin, cu_qgmax, cu_c2, cu_c1, cu_c0 = get_generator_data(data; use_gpu=use_gpu) - cuYshR, cuYshI, cuYffR, cuYffI, cuYftR, cuYftI, cuYttR, cuYttI, cuYtfR, cuYtfI, cuFrBound, cuToBound = get_branch_data(data; use_gpu=use_gpu) - cu_FrStart, cu_FrIdx, cu_ToStart, cu_ToIdx, cu_GenStart, cu_GenIdx, cu_Pd, cu_Qd = get_bus_data(data; use_gpu=use_gpu) - cu_brBusIdx = get_branch_bus_index(data; use_gpu=use_gpu) - - x_curr = zeros(nvar) - xbar_curr = zeros(nvar_v) - z_outer = zeros(nvar) - z_curr = zeros(nvar) - z_prev = zeros(nvar) - l_curr = zeros(nvar) - lz = zeros(nvar) - rho = zeros(nvar) - rd = zeros(nvar) - rp = zeros(nvar) - rp_old = zeros(nvar) - Ax_plus_By = zeros(nvar) - param = zeros(31, nline) - wRIij = zeros(2*nline) - - u_curr = view(x_curr, 1:nvar_u) - v_curr = view(x_curr, nvar_u+1:nvar) - zu_outer = view(z_outer, 1:nvar_u) - zv_outer = view(z_outer, nvar_u+1:nvar) - zu_curr = view(z_curr, 1:nvar_u) - zv_curr = view(z_curr, nvar_u+1:nvar) - zu_prev = view(z_prev, 1:nvar_u) - zv_prev = view(z_curr, nvar_u+1:nvar) - lu_curr = view(l_curr, 1:nvar_u) - lv_curr = view(l_curr, nvar_u+1:nvar) - lz_u = view(lz, 1:nvar_u) - lz_v = view(lz, nvar_u+1:nvar) - rho_u = view(rho, 1:nvar_u) - rho_v = view(rho, nvar_u+1:nvar) - rp_u = view(rp, 1:nvar_u) - rp_v = view(rp, nvar_u+1:nvar) - rd_u = view(rd, 1:nvar_u) - rd_v = view(rd, nvar_u+1:nvar) - - init_values_two_level(data, ybus, gen_start, line_start, bus_start, - rho_pq, rho_va, u_curr, v_curr, xbar_curr, lu_curr, lv_curr, rho_u, rho_v, wRIij) - - if use_gpu - cu_x_curr = CuArray{Float64}(undef, nvar) - cu_xbar_curr = CuArray{Float64}(undef, nvar_v) - cu_z_outer = CuArray{Float64}(undef, nvar) - cu_z_curr = CuArray{Float64}(undef, nvar) - cu_z_prev = CuArray{Float64}(undef, nvar) - cu_l_curr = CuArray{Float64}(undef, nvar) - cu_lz = CuArray{Float64}(undef, nvar) - cu_rho = CuArray{Float64}(undef, nvar) - cu_rd = CuArray{Float64}(undef, nvar) - cu_rp = CuArray{Float64}(undef, nvar) - cu_rp_old = CuArray{Float64}(undef, nvar) - cu_Ax_plus_By = CuArray{Float64}(undef, nvar) - cuParam = CuArray{Float64}(undef, (31, nline)) - cuWRIij = CuArray{Float64}(undef, 2*nline) - - cu_u_curr = view(cu_x_curr, 1:nvar_u) - cu_v_curr = view(cu_x_curr, nvar_u+1:nvar) - cu_zu_curr = view(cu_z_curr, 1:nvar_u) - cu_zv_curr = view(cu_z_curr, nvar_u+1:nvar) - cu_lu_curr = view(cu_l_curr, 1:nvar_u) - cu_lv_curr = view(cu_l_curr, nvar_u+1:nvar) - cu_lz_u = view(cu_lz, 1:nvar_u) - cu_lz_v = view(cu_lz, nvar_u+1:nvar) - cu_rho_u = view(cu_rho, 1:nvar_u) - cu_rho_v = view(cu_rho, nvar_u+1:nvar) - cu_rp_u = view(cu_rp, 1:nvar_u) - cu_rp_v = view(cu_rp, nvar_u+1:nvar) - - copyto!(cu_x_curr, x_curr) - copyto!(cu_xbar_curr, xbar_curr) - copyto!(cu_z_outer, z_outer) - copyto!(cu_z_curr, z_curr) - copyto!(cu_l_curr, l_curr) - copyto!(cu_lz, lz) - copyto!(cu_rho, rho) - copyto!(cu_Ax_plus_By, Ax_plus_By) - copyto!(cuParam, param) - copyto!(cuWRIij, wRIij) - end - - max_auglag = 50 - - nblk_gen = div(ngen, 32, RoundUp) - nblk_br = nline - nblk_bus = div(nbus, 32, RoundUp) - - ABSTOL = 1e-6 - RELTOL = 1e-5 + x_curr = sol.x_curr + xbar_curr = sol.xbar_curr + z_outer = sol.z_outer + z_curr = sol.z_curr + z_prev = sol.z_prev + l_curr = sol.l_curr + lz = sol.lz + rho = sol.rho + rp = sol.rp + rd = sol.rd + rp_old = sol.rp_old + Ax_plus_By = sol.Ax_plus_By + wRIij = sol.wRIij + + u_curr = view(x_curr, 1:mod.nvar_u) + v_curr = view(x_curr, mod.nvar_u+1:mod.nvar) + zu_curr = view(z_curr, 1:mod.nvar_u) + zv_curr = view(z_curr, mod.nvar_u+1:mod.nvar) + lu_curr = view(l_curr, 1:mod.nvar_u) + lv_curr = view(l_curr, mod.nvar_u+1:mod.nvar) + lz_u = view(lz, 1:mod.nvar_u) + lz_v = view(lz, mod.nvar_u+1:mod.nvar) + rho_u = view(rho, 1:mod.nvar_u) + rho_v = view(rho, mod.nvar_u+1:mod.nvar) + rp_u = view(rp, 1:mod.nvar_u) + rp_v = view(rp, mod.nvar_u+1:mod.nvar) + + nblk_gen = div(mod.ngen, 32, RoundUp) + nblk_br = mod.nline + nblk_bus = div(mod.nbus, 32, RoundUp) beta = 1e3 - gamma = 6.0 + gamma = 6.0 # TODO: not used c = 6.0 theta = 0.8 - sqrt_d = sqrt(nvar_u + nvar_v) - - MAX_MULTIPLIER = 1e12 - DUAL_TOL = 1e-8 - OUTER_TOL = sqrt_d*(outer_eps) + sqrt_d = sqrt(mod.nvar_u + mod.nvar_v) + OUTER_TOL = sqrt_d*(par.outer_eps) outer = 0 inner = 0 time_gen = time_br = time_bus = 0 shift_lines = 0 - shmem_size = sizeof(Float64)*(14*n+3*n^2) + sizeof(Int)*(4*n) + shmem_size = sizeof(Float64)*(14*mod.n+3*mod.n^2) + sizeof(Int)*(4*mod.n) mismatch = Inf z_prev_norm = z_curr_norm = Inf @@ -659,12 +585,12 @@ function admm_rect_gpu_two_level(case; outer_iterlim=10, inner_iterlim=800, rho_ while outer < outer_iterlim outer += 1 - if !use_gpu + if !env.use_gpu z_outer .= z_curr z_prev_norm = norm(z_outer) else - @cuda threads=64 blocks=(div(nvar-1, 64)+1) copy_data_kernel(nvar, cu_z_outer, cu_z_curr) - z_prev_norm = CUDA.norm(cu_z_curr) + @cuda threads=64 blocks=(div(nvar-1, 64)+1) copy_data_kernel(mod.nvar, z_outer, z_curr) + z_prev_norm = CUDA.norm(z_curr) CUDA.synchronize() end @@ -672,42 +598,42 @@ function admm_rect_gpu_two_level(case; outer_iterlim=10, inner_iterlim=800, rho_ while inner < inner_iterlim inner += 1 - if !use_gpu + if !env.use_gpu z_prev .= z_curr rp_old .= rp - tcpu = @timed generator_kernel_two_level_cpu(baseMVA, ngen, gen_start, u_curr, xbar_curr, zu_curr, lu_curr, rho_u, - pgmin, pgmax, qgmin, qgmax, c2, c1) + tcpu = @timed generator_kernel_two_level_cpu(data.baseMVA, mod.ngen, mod.gen_start, u_curr, xbar_curr, zu_curr, lu_curr, rho_u, + mod.pgmin, mod.pgmax, mod.qgmin, mod.qgmax, mod.c2, mod.c1) time_gen += tcpu.time #scale = min(scale, (2*1e4) / maximum(abs.(rho_u))) - if use_polar - tcpu = @timed auglag_it, tron_it = polar_kernel_two_level_cpu(n, nline, line_start, bus_start, scale, + if env.use_polar + tcpu = @timed auglag_it, tron_it = polar_kernel_two_level_cpu(mod.n, mod.nline, mod.line_start, mod.bus_start, scale, u_curr, xbar_curr, zu_curr, lu_curr, rho_u, - shift_lines, param, YffR, YffI, YftR, YftI, - YttR, YttI, YtfR, YtfI, FrBound, ToBound, brBusIdx) + shift_lines, env.membuf, mod.YffR, mod.YffI, mod.YftR, mod.YftI, + mod.YttR, mod.YttI, mod.YtfR, mod.YtfI, mod.FrBound, mod.ToBound, mod.brBusIdx) else - tcpu = @timed auglag_it, tron_it = auglag_kernel_cpu(n, nline, inner, max_auglag, line_start, mu_max, + tcpu = @timed auglag_it, tron_it = auglag_kernel_cpu(mod.n, mod.nline, inner, par.max_auglag, mod.line_start, par.mu_max, u_curr, v_curr, l_curr, rho, - wRIij, param, YffR, YffI, YftR, YftI, - YttR, YttI, YtfR, YtfI, FrBound, ToBound) + wRIij, env.membuf, mod.YffR, mod.YffI, mod.YftR, mod.YftI, + mod.YttR, mod.YttI, mod.YtfR, mod.YtfI, mod.FrBound, mod.ToBound) end time_br += tcpu.time - tcpu = @timed bus_kernel_two_level_cpu(baseMVA, nbus, gen_start, line_start, bus_start, - FrStart, FrIdx, ToStart, ToIdx, GenStart, GenIdx, - Pd, Qd, v_curr, xbar_curr, zv_curr, lv_curr, rho_v, YshR, YshI) + tcpu = @timed bus_kernel_two_level_cpu(data.baseMVA, mod.nbus, mod.gen_start, mod.line_start, mod.bus_start, + mod.FrStart, mod.FrIdx, mod.ToStart, mod.ToIdx, mod.GenStart, mod.GenIdx, + mod.Pd, mod.Qd, v_curr, xbar_curr, zv_curr, lv_curr, rho_v, mod.YshR, mod.YshI) time_bus += tcpu.time - update_xbar(data, gen_start, line_start, bus_start, FrStart, ToStart, FrIdx, ToIdx, + update_xbar(data, mod.gen_start, mod.line_start, mod.bus_start, mod.FrStart, mod.ToStart, mod.FrIdx, mod.ToIdx, u_curr, v_curr, xbar_curr, zu_curr, zv_curr, lu_curr, lv_curr, rho_u, rho_v) - update_zu(data, gen_start, line_start, bus_start, u_curr, xbar_curr, zu_curr, lu_curr, rho_u, lz_u, beta) + update_zu(data, mod.gen_start, mod.line_start, mod.bus_start, u_curr, xbar_curr, zu_curr, lu_curr, rho_u, lz_u, beta) zv_curr .= (-(lz_v .+ lv_curr .+ rho_v.*(v_curr .- xbar_curr))) ./ (beta .+ rho_v) l_curr .= -(lz .+ beta.*z_curr) - compute_primal_residual_u(data, gen_start, line_start, bus_start, rp_u, u_curr, xbar_curr, zu_curr) + compute_primal_residual_u(data, mod.gen_start, mod.line_start, mod.bus_start, rp_u, u_curr, xbar_curr, zu_curr) rp_v .= v_curr .- xbar_curr .+ zv_curr #= @@ -725,92 +651,96 @@ function admm_rect_gpu_two_level(case; outer_iterlim=10, inner_iterlim=800, rho_ mismatch = norm(Ax_plus_By) eps_pri = sqrt_d / (2500*outer) - if inner == 1 || (inner % 50) == 0 - @printf("%8s %8s %10s %10s %10s %10s %10s %10s\n", - "Outer", "Inner", "PrimRes", "EpsPrimRes", "DualRes", "||z||", "||Ax+By||", "Beta") + if par.verbose > 0 + if inner == 1 || (inner % 50) == 0 + @printf("%8s %8s %10s %10s %10s %10s %10s %10s\n", + "Outer", "Inner", "PrimRes", "EpsPrimRes", "DualRes", "||z||", "||Ax+By||", "Beta") + end + + @printf("%8d %8d %10.3e %10.3e %10.3e %10.3e %10.3e %10.3e\n", + outer, inner, primres, eps_pri, dualres, z_curr_norm, mismatch, beta) end - @printf("%8d %8d %10.3e %10.3e %10.3e %10.3e %10.3e %10.3e\n", - outer, inner, primres, eps_pri, dualres, z_curr_norm, mismatch, beta) - - if primres <= eps_pri || dualres <= DUAL_TOL + if primres <= eps_pri || dualres <= par.DUAL_TOL break end else - @cuda threads=64 blocks=(div(nvar-1, 64)+1) copy_data_kernel(nvar, cu_z_prev, cu_z_curr) - @cuda threads=64 blocks=(div(nvar-1, 64)+1) copy_data_kernel(nvar, cu_rp_old, cu_rp) + @cuda threads=64 blocks=(div(mod.nvar-1, 64)+1) copy_data_kernel(mod.nvar, z_prev, z_curr) + @cuda threads=64 blocks=(div(mod.nvar-1, 64)+1) copy_data_kernel(mod.nvar, rp_old, rp) CUDA.synchronize() - tgpu = CUDA.@timed @cuda threads=32 blocks=nblk_gen generator_kernel_two_level(baseMVA, ngen, gen_start, - cu_u_curr, cu_xbar_curr, cu_zu_curr, cu_lu_curr, cu_rho_u, - cu_pgmin, cu_pgmax, cu_qgmin, cu_qgmax, cu_c2, cu_c1) + tgpu = CUDA.@timed @cuda threads=32 blocks=nblk_gen generator_kernel_two_level(data.baseMVA, mod.ngen, mod.gen_start, + u_curr, xbar_curr, zu_curr, lu_curr, rho_u, + mod.pgmin, mod.pgmax, mod.qgmin, mod.qgmax, mod.c2, mod.c1) # MPI routines to be implemented: - # - Broadcast cu_v_curr and cu_l_curr to GPUs. - # - Collect cu_u_curr. + # - Broadcast v_curr and l_curr to GPUs. + # - Collect u_curr. # - div(nblk_br / number of GPUs, RoundUp) time_gen += tgpu.time - if use_polar - tgpu = CUDA.@timed @cuda threads=32 blocks=nblk_br shmem=shmem_size polar_kernel_two_level(n, nline, line_start, bus_start, scale, - cu_u_curr, cu_xbar_curr, cu_zu_curr, cu_lu_curr, cu_rho_u, - shift_lines, cuParam, cuYffR, cuYffI, cuYftR, cuYftI, - cuYttR, cuYttI, cuYtfR, cuYtfI, cuFrBound, cuToBound, cu_brBusIdx) + if env.use_polar + tgpu = CUDA.@timed @cuda threads=32 blocks=nblk_br shmem=shmem_size polar_kernel_two_level(mod.n, mod.nline, mod.line_start, mod.bus_start, scale, + u_curr, xbar_curr, zu_curr, lu_curr, rho_u, + shift_lines, env.membuf, mod.YffR, mod.YffI, mod.YftR, mod.YftI, + mod.YttR, mod.YttI, mod.YtfR, mod.YtfI, mod.FrBound, mod.ToBound, mod.brBusIdx) else - tgpu = CUDA.@timed @cuda threads=32 blocks=nblk_br shmem=shmem_size auglag_kernel(n, inner, max_auglag, line_start, scale, mu_max, - cu_u_curr, cu_v_curr, cu_l_curr, cu_rho, - cuWRIij, cuParam, cuYffR, cuYffI, cuYftR, cuYftI, - cuYttR, cuYttI, cuYtfR, cuYtfI, cuFrBound, cuToBound) + tgpu = CUDA.@timed @cuda threads=32 blocks=nblk_br shmem=shmem_size auglag_kernel(mod.n, inner, par.max_auglag, mod.line_start, scale, par.mu_max, + u_curr, v_curr, l_curr, rho, + wRIij, env.membuf, mod.YffR, mod.YffI, mod.YftR, mod.YftI, + mod.YttR, mod.YttI, mod.YtfR, mod.YtfI, mod.FrBound, mod.ToBound) end time_br += tgpu.time - tgpu = CUDA.@timed @cuda threads=32 blocks=nblk_bus bus_kernel_two_level(baseMVA, nbus, gen_start, line_start, bus_start, - cu_FrStart, cu_FrIdx, cu_ToStart, cu_ToIdx, cu_GenStart, cu_GenIdx, - cu_Pd, cu_Qd, cu_v_curr, cu_xbar_curr, cu_zv_curr, cu_lv_curr, - cu_rho_v, cuYshR, cuYshI) + tgpu = CUDA.@timed @cuda threads=32 blocks=nblk_bus bus_kernel_two_level(data.baseMVA, mod.nbus, mod.gen_start, mod.line_start, mod.bus_start, + mod.FrStart, mod.FrIdx, mod.ToStart, mod.ToIdx, mod.GenStart, mod.GenIdx, + mod.Pd, mod.Qd, v_curr, xbar_curr, zv_curr, lv_curr, + rho_v, mod.YshR, mod.YshI) time_bus += tgpu.time # Update xbar. - @cuda threads=64 blocks=(div(ngen-1, 64)+1) update_xbar_generator_kernel(ngen, gen_start, cu_u_curr, cu_v_curr, - cu_xbar_curr, cu_zu_curr, cu_zv_curr, cu_lu_curr, cu_lv_curr, cu_rho_u, cu_rho_v) - @cuda threads=64 blocks=(div(nline-1, 64)+1) update_xbar_branch_kernel(nline, line_start, cu_u_curr, cu_v_curr, - cu_xbar_curr, cu_zu_curr, cu_zv_curr, cu_lu_curr, cu_lv_curr, cu_rho_u, cu_rho_v) - @cuda threads=64 blocks=(div(nbus-1, 64)+1) update_xbar_bus_kernel(nbus, line_start, bus_start, cu_FrStart, cu_FrIdx, cu_ToStart, cu_ToIdx, - cu_u_curr, cu_v_curr, cu_xbar_curr, cu_zu_curr, cu_zv_curr, cu_lu_curr, cu_lv_curr, cu_rho_u, cu_rho_v) + @cuda threads=64 blocks=(div(mod.ngen-1, 64)+1) update_xbar_generator_kernel(mod.ngen, mod.gen_start, u_curr, v_curr, + xbar_curr, zu_curr, zv_curr, lu_curr, lv_curr, rho_u, rho_v) + @cuda threads=64 blocks=(div(mod.nline-1, 64)+1) update_xbar_branch_kernel(mod.nline, mod.line_start, u_curr, v_curr, + xbar_curr, zu_curr, zv_curr, lu_curr, lv_curr, rho_u, rho_v) + @cuda threads=64 blocks=(div(mod.nbus-1, 64)+1) update_xbar_bus_kernel(mod.nbus, mod.line_start, mod.bus_start, mod.FrStart, mod.FrIdx, mod.ToStart, mod.ToIdx, + u_curr, v_curr, xbar_curr, zu_curr, zv_curr, lu_curr, lv_curr, rho_u, rho_v) CUDA.synchronize() # Update z. - @cuda threads=64 blocks=(div(ngen-1, 64)+1) update_zu_generator_kernel(ngen, gen_start, cu_u_curr, - cu_xbar_curr, cu_zu_curr, cu_lu_curr, cu_rho_u, cu_lz_u, beta) - @cuda threads=64 blocks=(div(nline-1, 64)+1) update_zu_branch_kernel(nline, line_start, bus_start, cu_brBusIdx, - cu_u_curr, cu_xbar_curr, cu_zu_curr, cu_lu_curr, cu_rho_u, cu_lz_u, beta) - @cuda threads=64 blocks=(div(nvar_v-1, 64)+1) update_zv_kernel(nvar_v, cu_v_curr, cu_xbar_curr, cu_zv_curr, - cu_lv_curr, cu_rho_v, cu_lz_v, beta) + @cuda threads=64 blocks=(div(mod.ngen-1, 64)+1) update_zu_generator_kernel(mod.ngen, mod.gen_start, u_curr, + xbar_curr, zu_curr, lu_curr, rho_u, lz_u, beta) + @cuda threads=64 blocks=(div(mod.nline-1, 64)+1) update_zu_branch_kernel(mod.nline, mod.line_start, mod.bus_start, mod.brBusIdx, + u_curr, xbar_curr, zu_curr, lu_curr, rho_u, lz_u, beta) + @cuda threads=64 blocks=(div(mod.nvar_v-1, 64)+1) update_zv_kernel(mod.nvar_v, v_curr, xbar_curr, zv_curr, + lv_curr, rho_v, lz_v, beta) CUDA.synchronize() # Update multiiplier and residuals. - @cuda threads=64 blocks=(div(nvar-1, 64)+1) update_l_kernel(nvar, cu_l_curr, cu_z_curr, cu_lz, beta) - @cuda threads=64 blocks=(div(ngen-1, 64)+1) compute_primal_residual_u_generator_kernel(ngen, gen_start, cu_rp_u, cu_u_curr, cu_xbar_curr, cu_zu_curr) - @cuda threads=64 blocks=(div(nline-1, 64)+1) compute_primal_residual_u_branch_kernel(nline, line_start, bus_start, cu_brBusIdx, cu_rp_u, cu_u_curr, cu_xbar_curr, cu_zu_curr) - @cuda threads=64 blocks=(div(nvar_v-1, 64)+1) compute_primal_residual_v_kernel(nvar_v, cu_rp_v, cu_v_curr, cu_xbar_curr, cu_zv_curr) - @cuda threads=64 blocks=(div(nvar-1, 64)+1) vector_difference(nvar, cu_rd, cu_z_curr, cu_z_prev) + @cuda threads=64 blocks=(div(mod.nvar-1, 64)+1) update_l_kernel(mod.nvar, l_curr, z_curr, lz, beta) + @cuda threads=64 blocks=(div(mod.ngen-1, 64)+1) compute_primal_residual_u_generator_kernel(mod.ngen, mod.gen_start, rp_u, u_curr, xbar_curr, zu_curr) + @cuda threads=64 blocks=(div(mod.nline-1, 64)+1) compute_primal_residual_u_branch_kernel(mod.nline, mod.line_start, mod.bus_start, mod.brBusIdx, rp_u, u_curr, xbar_curr, zu_curr) + @cuda threads=64 blocks=(div(mod.nvar_v-1, 64)+1) compute_primal_residual_v_kernel(mod.nvar_v, rp_v, v_curr, xbar_curr, zv_curr) + @cuda threads=64 blocks=(div(mod.nvar-1, 64)+1) vector_difference(mod.nvar, rd, z_curr, z_prev) CUDA.synchronize() - CUDA.@sync @cuda threads=64 blocks=(div(nvar-1, 64)+1) vector_difference(nvar, cu_Ax_plus_By, cu_rp, cu_z_curr) - mismatch = CUDA.norm(cu_Ax_plus_By) + CUDA.@sync @cuda threads=64 blocks=(div(mod.nvar-1, 64)+1) vector_difference(mod.nvar, Ax_plus_By, rp, z_curr) + mismatch = CUDA.norm(Ax_plus_By) - primres = CUDA.norm(cu_rp) - dualres = CUDA.norm(cu_rd) - z_curr_norm = CUDA.norm(cu_z_curr) + primres = CUDA.norm(rp) + dualres = CUDA.norm(rd) + z_curr_norm = CUDA.norm(z_curr) eps_pri = sqrt_d / (2500*outer) - if inner == 1 || (inner % 50) == 0 - @printf("%8s %8s %10s %10s %10s %10s %10s %10s %10s\n", - "Outer", "Inner", "PrimRes", "EpsPrimRes", "DualRes", "||z||", "||Ax+By||", "OuterTol", "Beta") - end + if par.verbose > 0 + if inner == 1 || (inner % 50) == 0 + @printf("%8s %8s %10s %10s %10s %10s %10s %10s %10s\n", + "Outer", "Inner", "PrimRes", "EpsPrimRes", "DualRes", "||z||", "||Ax+By||", "OuterTol", "Beta") + end - @printf("%8d %8d %10.3e %10.3e %10.3e %10.3e %10.3e %10.3e %10.3e\n", - outer, inner, primres, eps_pri, dualres, z_curr_norm, mismatch, OUTER_TOL, beta) + @printf("%8d %8d %10.3e %10.3e %10.3e %10.3e %10.3e %10.3e %10.3e\n", + outer, inner, primres, eps_pri, dualres, z_curr_norm, mismatch, OUTER_TOL, beta) + end - if primres <= eps_pri || dualres <= DUAL_TOL + if primres <= eps_pri || dualres <= par.DUAL_TOL break end end @@ -820,10 +750,10 @@ function admm_rect_gpu_two_level(case; outer_iterlim=10, inner_iterlim=800, rho_ break end - if !use_gpu - lz .+= max.(-MAX_MULTIPLIER, min.(MAX_MULTIPLIER, beta .* z_curr)) + if !env.use_gpu + lz .+= max.(-par.MAX_MULTIPLIER, min.(par.MAX_MULTIPLIER, beta .* z_curr)) else - CUDA.@sync @cuda threads=64 blocks=(div(nvar-1, 64)+1) update_lz_kernel(nvar, MAX_MULTIPLIER, cu_z_curr, cu_lz, beta) + CUDA.@sync @cuda threads=64 blocks=(div(mod.nvar-1, 64)+1) update_lz_kernel(mod.nvar, par.MAX_MULTIPLIER, z_curr, lz, beta) end if z_curr_norm > theta*z_prev_norm @@ -832,43 +762,51 @@ function admm_rect_gpu_two_level(case; outer_iterlim=10, inner_iterlim=800, rho_ end end - if use_gpu - # Copying from view to view generates "Warning: Performing scalar operations on GPU arrays." - # We ignore this seemingly wrong message for now. - copyto!(x_curr, 1, cu_x_curr, 1, nvar_u) - copyto!(x_curr, nvar_u+1, cu_x_curr, nvar_u+1, nvar_v) - copyto!(xbar_curr, cu_xbar_curr) + if par.verbose > 0 + # Test feasibility of global variable xbar: + pg_err, qg_err = check_generator_bounds(data, mod.gen_start, xbar_curr; use_gpu = env.use_gpu) + vm_err = check_voltage_bounds(data, mod.bus_start, xbar_curr) + real_err, reactive_err = check_power_balance_violation(data, mod.gen_start, mod.line_start, mod.bus_start, xbar_curr, mod.YshR, mod.YshI) + @printf(" ** Violations of global variable xbar\n") + @printf("Real power generator bounds = %.6e\n", pg_err) + @printf("Reactive power generator bounds = %.6e\n", qg_err) + @printf("Voltage bounds = %.6e\n", vm_err) + @printf("Real power balance = %.6e\n", real_err) + @printf("Reaactive power balance = %.6e\n", reactive_err) + + rateA_nviols, rateA_maxviol, rateC_nviols, rateC_maxviol = check_linelimit_violation(data, u_curr) + @printf(" ** Line limit violations\n") + @printf("RateA number of violations = %d (%d)\n", rateA_nviols, mod.nline) + @printf("RateA maximum violation = %.2f\n", rateA_maxviol) + @printf("RateC number of violations = %d (%d)\n", rateC_nviols, mod.nline) + @printf("RateC maximum violation = %.2f\n", rateC_maxviol) + + @printf(" ** Time\n") + @printf("Generator = %.2f\n", time_gen) + @printf("Branch = %.2f\n", time_br) + @printf("Bus = %.2f\n", time_bus) + @printf("Total = %.2f\n", time_gen + time_br + time_bus) end - # Test feasibility of global variable xbar: - pg_err, qg_err = check_generator_bounds(data, gen_start, xbar_curr) - vm_err = check_voltage_bounds(data, bus_start, xbar_curr) - real_err, reactive_err = check_power_balance_violation(data, gen_start, line_start, bus_start, xbar_curr, YshR, YshI) - @printf(" ** Violations of global variable xbar\n") - @printf("Real power generator bounds = %.6e\n", pg_err) - @printf("Reactive power generator bounds = %.6e\n", qg_err) - @printf("Voltage bounds = %.6e\n", vm_err) - @printf("Real power balance = %.6e\n", real_err) - @printf("Reaactive power balance = %.6e\n", reactive_err) - - rateA_nviols, rateA_maxviol, rateC_nviols, rateC_maxviol = check_linelimit_violation(data, u_curr) - @printf(" ** Line limit violations\n") - @printf("RateA number of violations = %d (%d)\n", rateA_nviols, nline) - @printf("RateA maximum violation = %.2f\n", rateA_maxviol) - @printf("RateC number of violations = %d (%d)\n", rateC_nviols, nline) - @printf("RateC maximum violation = %.2f\n", rateC_maxviol) - - @printf(" ** Time\n") - @printf("Generator = %.2f\n", time_gen) - @printf("Branch = %.2f\n", time_br) - @printf("Bus = %.2f\n", time_bus) - @printf("Total = %.2f\n", time_gen + time_br + time_bus) - - objval = sum(data.generators[g].coeff[data.generators[g].n-2]*(baseMVA*u_curr[gen_start+2*(g-1)])^2 + - data.generators[g].coeff[data.generators[g].n-1]*(baseMVA*u_curr[gen_start+2*(g-1)]) + + sol.objval = sum(data.generators[g].coeff[data.generators[g].n-2]*(data.baseMVA*u_curr[mod.gen_start+2*(g-1)])^2 + + data.generators[g].coeff[data.generators[g].n-1]*(data.baseMVA*u_curr[mod.gen_start+2*(g-1)]) + data.generators[g].coeff[data.generators[g].n] - for g in 1:ngen) - @printf("Objective value = %.6e\n", objval) + for g in 1:mod.ngen) + @printf("Objective value = %.6e\n", sol.objval) return +end + +function admm_rect_gpu_two_level(case, ::Type{VT}; + outer_iterlim=10, inner_iterlim=800, rho_pq=400.0, rho_va=40000.0, scale=1e-4, + use_gpu=false, use_polar=true, gpu_no=1, verbose=1) where VT + + if use_gpu + CUDA.device!(gpu_no) + end + env = AdmmEnv{Float64, VT{Float64, 1}, VT{Int, 1}, VT{Float64, 2}}( + case, rho_pq, rho_va; use_gpu=use_gpu, use_polar=use_polar, use_twolevel=true, gpu_no=gpu_no, verbose=verbose, + ) + admm_restart_two_level(env, outer_iterlim=outer_iterlim, inner_iterlim=inner_iterlim, scale=scale) + return env end \ No newline at end of file diff --git a/src/admm/environment.jl b/src/admm/environment.jl index e2088de..532c549 100644 --- a/src/admm/environment.jl +++ b/src/admm/environment.jl @@ -19,6 +19,13 @@ mutable struct Parameters eta::Float64 # TODO: not used verbose::Int + # Two-Level ADMM + outer_eps::Float64 + Kf::Int # TODO: not used + Kf_mean::Int # TODO: not used + MAX_MULTIPLIER::Float64 + DUAL_TOL::Float64 + function Parameters() par = new() par.mu_max = 1e8 @@ -34,6 +41,11 @@ mutable struct Parameters par.ABSTOL = 1e-6 par.RELTOL = 1e-5 par.verbose = 1 + par.outer_eps = 2*1e-4 + par.Kf = 100 + par.Kf_mean = 10 + par.MAX_MULTIPLIER = 1e12 + par.DUAL_TOL = 1e-8 return par end end @@ -81,7 +93,13 @@ mutable struct Model{T,TD,TI} Pd::TD Qd::TD - function Model{T,TD,TI}(data::OPFData, use_gpu::Bool, use_polar::Bool) where {T, TD<:AbstractArray{T}, TI<:AbstractArray{Int}} + # Two-Level ADMM + nvar_u::Int + nvar_v::Int + bus_start::Int # this is for varibles of type v. + brBusIdx::TI + + function Model{T,TD,TI}(data::OPFData, use_gpu::Bool, use_polar::Bool, use_twolevel::Bool) where {T, TD<:AbstractArray{T}, TI<:AbstractArray{Int}} model = new{T,TD,TI}() model.n = (use_polar == true) ? 4 : 10 @@ -95,16 +113,27 @@ mutable struct Model{T,TD,TI} model.YshR, model.YshI, model.YffR, model.YffI, model.YftR, model.YftI, model.YttR, model.YttI, model.YtfR, model.YtfI, model.FrBound, model.ToBound = get_branch_data(data; use_gpu=use_gpu) model.FrStart, model.FrIdx, model.ToStart, model.ToIdx, model.GenStart, model.GenIdx, model.Pd, model.Qd = get_bus_data(data; use_gpu=use_gpu) + # These are only for two-level ADMM. + model.nvar_u = 2*model.ngen + 8*model.nline + model.nvar_v = 2*model.ngen + 4*model.nline + 2*model.nbus + model.bus_start = 2*model.ngen + 4*model.nline + 1 + if use_twolevel + model.nvar = model.nvar_u + model.nvar_v + model.brBusIdx = get_branch_bus_index(data; use_gpu=use_gpu) + end + return model end end +abstract type AbstractSolution{T,TD} end + """ Solution{T,TD} This contains the solutions of ACOPF model instance, including the ADMM parameter rho. """ -mutable struct Solution{T,TD} +mutable struct Solution{T,TD} <: AbstractSolution{T,TD} u_curr::TD v_curr::TD l_curr::TD @@ -116,7 +145,7 @@ mutable struct Solution{T,TD} rp::TD objval::T - function Solution{T,TD}(data::OPFData, model::Model, rho_pq, rho_va) where {T, TD<:AbstractArray{T}} + function Solution{T,TD}(model::Model) where {T, TD<:AbstractArray{T}} return new{T,TD}( TD(undef, model.nvar), TD(undef, model.nvar), @@ -132,6 +161,48 @@ mutable struct Solution{T,TD} end end +""" + Solution2{T,TD} + +This contains the solutions of ACOPF model instance for two-level ADMM algorithm, + including the ADMM parameter rho. +""" +mutable struct Solution2{T,TD} <: AbstractSolution{T,TD} + x_curr::TD + xbar_curr::TD + z_outer::TD + z_curr::TD + z_prev::TD + l_curr::TD + lz::TD + rho::TD + rp::TD + rd::TD + rp_old::TD + Ax_plus_By::TD + wRIij::TD + objval::T + + function Solution2{T,TD}(model::Model) where {T, TD<:AbstractArray{T}} + return new{T,TD}( + TD(undef, model.nvar), # x_curr + TD(undef, model.nvar_v), # xbar_curr + TD(undef, model.nvar), # z_outer + TD(undef, model.nvar), # z_curr + TD(undef, model.nvar), # z_prev + TD(undef, model.nvar), # l_curr + TD(undef, model.nvar), # lz + TD(undef, model.nvar), # rho + TD(undef, model.nvar), # rp + TD(undef, model.nvar), # rd + TD(undef, model.nvar), # rp_old + TD(undef, model.nvar), # Ax_plus_By + TD(undef, 2*model.nline), # wRIij + Inf, + ) + end +end + """ AdmmEnv{T,TD,TI} @@ -142,16 +213,17 @@ mutable struct AdmmEnv{T,TD,TI,TM} data::OPFData use_gpu::Bool use_polar::Bool + use_twolevel::Bool gpu_no::Int params::Parameters model::Model{T,TD,TI} - solution::Solution{T,TD} + solution::AbstractSolution{T,TD} membuf::TM # was param function AdmmEnv{T,TD,TI,TM}( - case, rho_pq, rho_va; use_gpu=false, use_polar=true, gpu_no=1, verbose=1, + case, rho_pq, rho_va; use_gpu=false, use_polar=true, use_twolevel=false, gpu_no=1, verbose=1, ) where {T, TD<:AbstractArray{T}, TI<:AbstractArray{Int}, TM<:AbstractArray{T,2}} env = new{T,TD,TI,TM}() @@ -164,16 +236,18 @@ mutable struct AdmmEnv{T,TD,TI,TM} env.params = Parameters() env.params.verbose = verbose - env.model = Model{T,TD,TI}(env.data, use_gpu, use_polar) - env.solution = Solution{T,TD}(env.data, env.model, rho_pq, rho_va) - - wRIij = zeros(2*env.model.nline) # TODO: Not used - + env.model = Model{T,TD,TI}(env.data, use_gpu, use_polar, use_twolevel) ybus = Ybus{Array{T}}(computeAdmitances( env.data.lines, env.data.buses, env.data.baseMVA; VI=Array{Int}, VD=Array{T})...) - init_solution!(env.solution, env.data, ybus, env.model.gen_start, env.model.line_start, - rho_pq, rho_va, wRIij) + if !use_twolevel + env.solution = Solution{T,TD}(env.model) + init_solution!(env, ybus, rho_pq, rho_va) + else + env.solution = Solution2{T,TD}(env.model) + init_values_two_level!(env, ybus, rho_pq, rho_va) + end + env.membuf = TM(undef, (31, env.model.nline)) fill!(env.membuf, 0.0) From e1fb0bf8ea6a9b73092bd6cc0d87f0a8b0462eb0 Mon Sep 17 00:00:00 2001 From: Kibaek Kim Date: Fri, 21 May 2021 20:41:51 -0500 Subject: [PATCH 2/6] fixed and tested the gpu part of the two-level admm --- src/admm/acopf_admm_gpu_two_level.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/admm/acopf_admm_gpu_two_level.jl b/src/admm/acopf_admm_gpu_two_level.jl index a3a678e..84e131b 100644 --- a/src/admm/acopf_admm_gpu_two_level.jl +++ b/src/admm/acopf_admm_gpu_two_level.jl @@ -589,7 +589,7 @@ function admm_restart_two_level(env::AdmmEnv; outer_iterlim=10, inner_iterlim=80 z_outer .= z_curr z_prev_norm = norm(z_outer) else - @cuda threads=64 blocks=(div(nvar-1, 64)+1) copy_data_kernel(mod.nvar, z_outer, z_curr) + @cuda threads=64 blocks=(div(mod.nvar-1, 64)+1) copy_data_kernel(mod.nvar, z_outer, z_curr) z_prev_norm = CUDA.norm(z_curr) CUDA.synchronize() end From 553fa261f43206ffd99b359a96404d9c76b7d3c5 Mon Sep 17 00:00:00 2001 From: Kibaek Kim Date: Mon, 24 May 2021 10:48:16 -0500 Subject: [PATCH 3/6] refactoring to use `struct` as arguments with explicit types --- src/admm/acopf_admm_gpu.jl | 16 ++- src/admm/acopf_admm_gpu_two_level.jl | 142 +++++++++++++++++---------- 2 files changed, 95 insertions(+), 63 deletions(-) diff --git a/src/admm/acopf_admm_gpu.jl b/src/admm/acopf_admm_gpu.jl index be94bbd..f9f881c 100644 --- a/src/admm/acopf_admm_gpu.jl +++ b/src/admm/acopf_admm_gpu.jl @@ -1,4 +1,4 @@ -function get_generator_data(data; use_gpu=false) +function get_generator_data(data::OPFData; use_gpu=false) ngen = length(data.generators) if use_gpu @@ -37,10 +37,8 @@ function get_generator_data(data; use_gpu=false) return pgmin,pgmax,qgmin,qgmax,c2,c1,c0 end -function get_bus_data(data; use_gpu=false) - ngen = length(data.generators) +function get_bus_data(data::OPFData; use_gpu=false) nbus = length(data.buses) - nline = length(data.lines) FrIdx = [l for b=1:nbus for l in data.FromLines[b]] ToIdx = [l for b=1:nbus for l in data.ToLines[b]] @@ -77,7 +75,7 @@ function get_bus_data(data; use_gpu=false) end end -function get_branch_data(data; use_gpu=false) +function get_branch_data(data::OPFData; use_gpu=false) buses = data.buses lines = data.lines BusIdx = data.BusIdx @@ -120,7 +118,7 @@ function get_branch_data(data; use_gpu=false) end end -function init_solution!(env, ybus, rho_pq, rho_va) +function init_solution!(env::AdmmEnv, ybus::Ybus, rho_pq, rho_va) sol, data, model = env.solution, env.data, env.model lines = data.lines @@ -223,7 +221,7 @@ function dual_residual_kernel(n::Int, rd::CuDeviceArray{Float64,1}, return end -function check_linelimit_violation(data, u) +function check_linelimit_violation(data::OPFData, u) lines = data.lines nline = length(data.lines) line_start = 2*length(data.generators) + 1 @@ -407,7 +405,7 @@ function admm_restart(env::AdmmEnv; iterlim=800, scale=1e-4) return end -function admm_rect_gpu(case, ::Type{VT}; iterlim=800, rho_pq=400.0, rho_va=40000.0, scale=1e-4, +function admm_rect_gpu(case::String, ::Type{VT}; iterlim=800, rho_pq=400.0, rho_va=40000.0, scale=1e-4, use_gpu=false, use_polar=true, gpu_no=1, verbose=1) where VT if use_gpu CUDA.device!(gpu_no) @@ -421,7 +419,7 @@ end # TODO: This needs revised to use AdmmEnv. function admm_rect_gpu_mpi( - case; + case::String; iterlim=800, rho_pq=400.0, rho_va=40000.0, scale=1e-4, use_gpu=false, use_polar=true, gpu_no=1, comm::MPI.Comm=MPI.COMM_WORLD, ) diff --git a/src/admm/acopf_admm_gpu_two_level.jl b/src/admm/acopf_admm_gpu_two_level.jl index 84e131b..93c561f 100644 --- a/src/admm/acopf_admm_gpu_two_level.jl +++ b/src/admm/acopf_admm_gpu_two_level.jl @@ -1,7 +1,10 @@ -function check_generator_bounds(data, gen_start, xbar; use_gpu=false) - ngen = length(data.generators) +function check_generator_bounds(env::AdmmEnv, xbar) + generators = env.data.generators + gen_start = env.model.gen_start - if use_gpu + ngen = length(generators) + + if env.use_gpu pgmin = CuArray{Float64}(undef, ngen) pgmax = CuArray{Float64}(undef, ngen) qgmin = CuArray{Float64}(undef, ngen) @@ -13,10 +16,10 @@ function check_generator_bounds(data, gen_start, xbar; use_gpu=false) qgmax = Array{Float64}(undef, ngen) end - Pmin = Float64[data.generators[g].Pmin for g in 1:ngen] - Pmax = Float64[data.generators[g].Pmax for g in 1:ngen] - Qmin = Float64[data.generators[g].Qmin for g in 1:ngen] - Qmax = Float64[data.generators[g].Qmax for g in 1:ngen] + Pmin = Float64[generators[g].Pmin for g in 1:ngen] + Pmax = Float64[generators[g].Pmax for g in 1:ngen] + Qmin = Float64[generators[g].Qmin for g in 1:ngen] + Qmax = Float64[generators[g].Qmax for g in 1:ngen] copyto!(pgmin, Pmin) copyto!(pgmax, Pmax) copyto!(qgmin, Qmin) @@ -39,9 +42,11 @@ function check_generator_bounds(data, gen_start, xbar; use_gpu=false) return max_viol_real, max_viol_reactive end -function check_voltage_bounds(data, bus_start, xbar) - nbus = length(data.buses) - buses = data.buses +function check_voltage_bounds(env::AdmmEnv, xbar) + + buses = env.data.buses + nbus = length(buses) + bus_start = env.model.bus_start max_viol = 0.0 @@ -54,7 +59,11 @@ function check_voltage_bounds(data, bus_start, xbar) return max_viol end -function check_power_balance_violation(data, gen_start, line_start, bus_start, xbar, YshR, YshI) +function check_power_balance_violation(env::AdmmEnv, xbar) + data = env.data + model = env.model + gen_start, line_start, bus_start, YshR, YshI = model.gen_start, model.line_start, model.bus_start, model.YshR, model.YshI + baseMVA = data.baseMVA nbus = length(data.buses) @@ -91,8 +100,7 @@ function check_power_balance_violation(data, gen_start, line_start, bus_start, x return max_viol_real, max_viol_reactive end -function get_branch_bus_index(data; use_gpu=false) - buses = data.buses +function get_branch_bus_index(data::OPFData; use_gpu=false) lines = data.lines BusIdx = data.BusIdx nline = length(lines) @@ -108,7 +116,7 @@ function get_branch_bus_index(data; use_gpu=false) end end -function init_values_two_level!(env, ybus, rho_pq, rho_va) +function init_values_two_level!(env::AdmmEnv, ybus::Ybus, rho_pq, rho_va) sol, data, model = env.solution, env.data, env.model gen_start, line_start, bus_start = model.gen_start, model.line_start, model.bus_start @@ -171,14 +179,21 @@ function init_values_two_level!(env, ybus, rho_pq, rho_va) return end -function update_xbar(data, gen_start, line_start, bus_start, FrStart, ToStart, FrIdx, ToIdx, - u, v, xbar, zu, zv, lu, lv, rho_u, rho_v) - lines = data.lines - BusIdx = data.BusIdx +function update_xbar(env::AdmmEnv, u, v, xbar, zu, zv, lu, lv, rho_u, rho_v) + data = env.data ngen = length(data.generators) nline = length(data.lines) nbus = length(data.buses) + model = env.model + gen_start = model.gen_start + line_start = model.line_start + bus_start = model.bus_start + FrStart = model.FrStart + ToStart = model.ToStart + FrIdx = model.FrIdx + ToIdx = model.ToIdx + gen_end = gen_start + 2*ngen - 1 xbar[gen_start:gen_end] .= (lu[gen_start:gen_end] .+ rho_u[gen_start:gen_end].*(u[gen_start:gen_end] .+ zu[gen_start:gen_end]) .+ lv[gen_start:gen_end] .+ rho_v[gen_start:gen_end].*(v[gen_start:gen_end] .+ zv[gen_start:gen_end])) ./ @@ -225,7 +240,8 @@ function update_xbar(data, gen_start, line_start, bus_start, FrStart, ToStart, F end end -function update_xbar_generator_kernel(n, gen_start, u, v, xbar, zu, zv, lu, lv, rho_u, rho_v) +function update_xbar_generator_kernel(model::Model, u, v, xbar, zu, zv, lu, lv, rho_u, rho_v) + n, gen_start = model.n, model.gen_start tx = threadIdx().x + (blockDim().x * (blockIdx().x - 1)) if tx <= n @@ -239,7 +255,8 @@ function update_xbar_generator_kernel(n, gen_start, u, v, xbar, zu, zv, lu, lv, return end -function update_xbar_branch_kernel(n, line_start, u, v, xbar, zu, zv, lu, lv, rho_u, rho_v) +function update_xbar_branch_kernel(model::Model, u, v, xbar, zu, zv, lu, lv, rho_u, rho_v) + n, line_start = model.n, model.line_start tx = threadIdx().x + (blockDim().x * (blockIdx().x - 1)) if tx <= n @@ -264,7 +281,9 @@ function update_xbar_branch_kernel(n, line_start, u, v, xbar, zu, zv, lu, lv, rh return end -function update_xbar_bus_kernel(n, line_start, bus_start, FrStart, FrIdx, ToStart, ToIdx, u, v, xbar, zu, zv, lu, lv, rho_u, rho_v) +function update_xbar_bus_kernel(model::Model, u, v, xbar, zu, zv, lu, lv, rho_u, rho_v) + n, line_start, bus_start = model.n, model.line_start, model.bus_start + FrStart, FrIdx, ToStart, ToIdx = model.FrStart, model.FrIdx, model.ToStart, model.ToIdx b = threadIdx().x + (blockDim().x * (blockIdx().x - 1)) if b <= n @@ -302,7 +321,8 @@ function update_xbar_bus_kernel(n, line_start, bus_start, FrStart, FrIdx, ToStar return end -function update_lu(data, gen_start, line_start, bus_start, u, xbar, zu, l, rho) +# TODO: This function is not used. +function update_lu(data::OPFData, gen_start, line_start, bus_start, u, xbar, zu, l, rho) lines = data.lines BusIdx = data.BusIdx ngen = length(data.generators) @@ -327,12 +347,16 @@ function update_lu(data, gen_start, line_start, bus_start, u, xbar, zu, l, rho) end end -function update_zu(data, gen_start, line_start, bus_start, u, xbar, z, l, rho, lz, beta) +function update_zu(env::AdmmEnv, u, xbar, z, l, rho, lz, beta) + data = env.data lines = data.lines BusIdx = data.BusIdx ngen = length(data.generators) nline = length(data.lines) + model = env.model + gen_start, line_start, bus_start = model.gen_start, model.line_start, model.bus_start + gen_end = gen_start + 2*ngen - 1 z[gen_start:gen_end] .= (-(lz[gen_start:gen_end] .+ l[gen_start:gen_end] .+ rho[gen_start:gen_end].*(u[gen_start:gen_end] .- xbar[gen_start:gen_end]))) ./ (beta .+ rho[gen_start:gen_end]) @@ -352,7 +376,8 @@ function update_zu(data, gen_start, line_start, bus_start, u, xbar, z, l, rho, l end end -function update_zu_generator_kernel(n, gen_start, u, xbar, z, l, rho, lz, beta) +function update_zu_generator_kernel(model::Model, u, xbar, z, l, rho, lz, beta) + n, gen_start = model.n, model.gen_start tx = threadIdx().x + (blockDim().x * (blockIdx().x - 1)) if tx <= n @@ -366,7 +391,8 @@ function update_zu_generator_kernel(n, gen_start, u, xbar, z, l, rho, lz, beta) return end -function update_zu_branch_kernel(n, line_start, bus_start, brBusIdx, u, xbar, z, l, rho, lz, beta) +function update_zu_branch_kernel(model::Model, u, xbar, z, l, rho, lz, beta) + n, line_start, bus_start, brBusIdx = model.n, model.line_start, model.bus_start, model.brBusIdx tx = threadIdx().x + (blockDim().x * (blockIdx().x - 1)) if tx <= n @@ -391,7 +417,8 @@ function update_zu_branch_kernel(n, line_start, bus_start, brBusIdx, u, xbar, z, return end -function update_zv_kernel(n, v, xbar, z, l, rho, lz, beta) +function update_zv_kernel(model::Model, v, xbar, z, l, rho, lz, beta) + n = model.n tx = threadIdx().x + (blockDim().x * (blockIdx().x - 1)) if tx <= n @@ -403,7 +430,8 @@ function update_zv_kernel(n, v, xbar, z, l, rho, lz, beta) return end -function update_l_kernel(n, l, z, lz, beta) +function update_l_kernel(model::Model, l, z, lz, beta) + n = model.n tx = threadIdx().x + (blockDim().x * (blockIdx().x - 1)) if tx <= n @@ -415,12 +443,16 @@ function update_l_kernel(n, l, z, lz, beta) return end -function compute_primal_residual_u(data, gen_start, line_start, bus_start, rp_u, u, xbar, z) +function compute_primal_residual_u(env::AdmmEnv, rp_u, u, xbar, z) + data = env.data lines = data.lines BusIdx = data.BusIdx ngen = length(data.generators) nline = length(data.lines) + model = env.model + gen_start, line_start, bus_start = model.gen_start, model.line_start, model.bus_start + gen_end = gen_start + 2*ngen - 1 rp_u[gen_start:gen_end] .= u[gen_start:gen_end] .- xbar[gen_start:gen_end] .+ z[gen_start:gen_end] @@ -441,7 +473,8 @@ function compute_primal_residual_u(data, gen_start, line_start, bus_start, rp_u, end end -function compute_primal_residual_u_generator_kernel(n, gen_start, rp, u, xbar, z) +function compute_primal_residual_u_generator_kernel(model::Model, rp, u, xbar, z) + n, gen_start = model.n, model.gen_start tx = threadIdx().x + (blockDim().x * (blockIdx().x - 1)) if tx <= n @@ -455,7 +488,8 @@ function compute_primal_residual_u_generator_kernel(n, gen_start, rp, u, xbar, z return end -function compute_primal_residual_u_branch_kernel(n, line_start, bus_start, brBusIdx, rp, u, xbar, z) +function compute_primal_residual_u_branch_kernel(model::Model, rp, u, xbar, z) + n, line_start, bus_start, brBusIdx = model.n, model.line_start, model.bus_start, model.brBusIdx tx = threadIdx().x + (blockDim().x * (blockIdx().x - 1)) if tx <= n @@ -477,7 +511,8 @@ function compute_primal_residual_u_branch_kernel(n, line_start, bus_start, brBus return end -function compute_primal_residual_v_kernel(n, rp, v, xbar, z) +function compute_primal_residual_v_kernel(model::Model, rp, v, xbar, z) + n = model.n tx = threadIdx().x + (blockDim().x * (blockIdx().x - 1)) if tx <= n @@ -487,7 +522,7 @@ function compute_primal_residual_v_kernel(n, rp, v, xbar, z) return end -function vector_difference(n, c, a, b) +function vector_difference(n::Int, c, a, b) tx = threadIdx().x + (blockDim().x * (blockIdx().x - 1)) if tx <= n @@ -497,7 +532,8 @@ function vector_difference(n, c, a, b) return end -function update_lz_kernel(n, max_limit, z, lz, beta) +function update_lz_kernel(env::AdmmEnv, z, lz, beta) + n, max_limit = env.model.n, env.params.max_limit tx = threadIdx().x + (blockDim().x * (blockIdx().x - 1)) if tx <= n @@ -507,6 +543,7 @@ function update_lz_kernel(n, max_limit, z, lz, beta) return end +# TODO: Not used function update_rho(rho, rp, rp_old, theta, gamma) for i=1:length(rho) if abs(rp[i]) > theta*abs(rp_old[i]) @@ -625,15 +662,14 @@ function admm_restart_two_level(env::AdmmEnv; outer_iterlim=10, inner_iterlim=80 mod.Pd, mod.Qd, v_curr, xbar_curr, zv_curr, lv_curr, rho_v, mod.YshR, mod.YshI) time_bus += tcpu.time - update_xbar(data, mod.gen_start, mod.line_start, mod.bus_start, mod.FrStart, mod.ToStart, mod.FrIdx, mod.ToIdx, - u_curr, v_curr, xbar_curr, zu_curr, zv_curr, lu_curr, lv_curr, rho_u, rho_v) + update_xbar(env, u_curr, v_curr, xbar_curr, zu_curr, zv_curr, lu_curr, lv_curr, rho_u, rho_v) - update_zu(data, mod.gen_start, mod.line_start, mod.bus_start, u_curr, xbar_curr, zu_curr, lu_curr, rho_u, lz_u, beta) + update_zu(env, u_curr, xbar_curr, zu_curr, lu_curr, rho_u, lz_u, beta) zv_curr .= (-(lz_v .+ lv_curr .+ rho_v.*(v_curr .- xbar_curr))) ./ (beta .+ rho_v) l_curr .= -(lz .+ beta.*z_curr) - compute_primal_residual_u(data, mod.gen_start, mod.line_start, mod.bus_start, rp_u, u_curr, xbar_curr, zu_curr) + compute_primal_residual_u(env, rp_u, u_curr, xbar_curr, zu_curr) rp_v .= v_curr .- xbar_curr .+ zv_curr #= @@ -697,28 +733,26 @@ function admm_restart_two_level(env::AdmmEnv; outer_iterlim=10, inner_iterlim=80 time_bus += tgpu.time # Update xbar. - @cuda threads=64 blocks=(div(mod.ngen-1, 64)+1) update_xbar_generator_kernel(mod.ngen, mod.gen_start, u_curr, v_curr, + @cuda threads=64 blocks=(div(mod.ngen-1, 64)+1) update_xbar_generator_kernel(mod, u_curr, v_curr, xbar_curr, zu_curr, zv_curr, lu_curr, lv_curr, rho_u, rho_v) - @cuda threads=64 blocks=(div(mod.nline-1, 64)+1) update_xbar_branch_kernel(mod.nline, mod.line_start, u_curr, v_curr, + @cuda threads=64 blocks=(div(mod.nline-1, 64)+1) update_xbar_branch_kernel(mod, u_curr, v_curr, xbar_curr, zu_curr, zv_curr, lu_curr, lv_curr, rho_u, rho_v) - @cuda threads=64 blocks=(div(mod.nbus-1, 64)+1) update_xbar_bus_kernel(mod.nbus, mod.line_start, mod.bus_start, mod.FrStart, mod.FrIdx, mod.ToStart, mod.ToIdx, - u_curr, v_curr, xbar_curr, zu_curr, zv_curr, lu_curr, lv_curr, rho_u, rho_v) + @cuda threads=64 blocks=(div(mod.nbus-1, 64)+1) update_xbar_bus_kernel(mod, u_curr, v_curr, xbar_curr, zu_curr, zv_curr, lu_curr, lv_curr, rho_u, rho_v) CUDA.synchronize() # Update z. - @cuda threads=64 blocks=(div(mod.ngen-1, 64)+1) update_zu_generator_kernel(mod.ngen, mod.gen_start, u_curr, + @cuda threads=64 blocks=(div(mod.ngen-1, 64)+1) update_zu_generator_kernel(mod, u_curr, xbar_curr, zu_curr, lu_curr, rho_u, lz_u, beta) - @cuda threads=64 blocks=(div(mod.nline-1, 64)+1) update_zu_branch_kernel(mod.nline, mod.line_start, mod.bus_start, mod.brBusIdx, - u_curr, xbar_curr, zu_curr, lu_curr, rho_u, lz_u, beta) - @cuda threads=64 blocks=(div(mod.nvar_v-1, 64)+1) update_zv_kernel(mod.nvar_v, v_curr, xbar_curr, zv_curr, + @cuda threads=64 blocks=(div(mod.nline-1, 64)+1) update_zu_branch_kernel(mod, u_curr, xbar_curr, zu_curr, lu_curr, rho_u, lz_u, beta) + @cuda threads=64 blocks=(div(mod.nvar_v-1, 64)+1) update_zv_kernel(mod, v_curr, xbar_curr, zv_curr, lv_curr, rho_v, lz_v, beta) CUDA.synchronize() # Update multiiplier and residuals. - @cuda threads=64 blocks=(div(mod.nvar-1, 64)+1) update_l_kernel(mod.nvar, l_curr, z_curr, lz, beta) - @cuda threads=64 blocks=(div(mod.ngen-1, 64)+1) compute_primal_residual_u_generator_kernel(mod.ngen, mod.gen_start, rp_u, u_curr, xbar_curr, zu_curr) - @cuda threads=64 blocks=(div(mod.nline-1, 64)+1) compute_primal_residual_u_branch_kernel(mod.nline, mod.line_start, mod.bus_start, mod.brBusIdx, rp_u, u_curr, xbar_curr, zu_curr) - @cuda threads=64 blocks=(div(mod.nvar_v-1, 64)+1) compute_primal_residual_v_kernel(mod.nvar_v, rp_v, v_curr, xbar_curr, zv_curr) + @cuda threads=64 blocks=(div(mod.nvar-1, 64)+1) update_l_kernel(mod, l_curr, z_curr, lz, beta) + @cuda threads=64 blocks=(div(mod.ngen-1, 64)+1) compute_primal_residual_u_generator_kernel(mod, rp_u, u_curr, xbar_curr, zu_curr) + @cuda threads=64 blocks=(div(mod.nline-1, 64)+1) compute_primal_residual_u_branch_kernel(mod, rp_u, u_curr, xbar_curr, zu_curr) + @cuda threads=64 blocks=(div(mod.nvar_v-1, 64)+1) compute_primal_residual_v_kernel(mod, rp_v, v_curr, xbar_curr, zv_curr) @cuda threads=64 blocks=(div(mod.nvar-1, 64)+1) vector_difference(mod.nvar, rd, z_curr, z_prev) CUDA.synchronize() @@ -753,7 +787,7 @@ function admm_restart_two_level(env::AdmmEnv; outer_iterlim=10, inner_iterlim=80 if !env.use_gpu lz .+= max.(-par.MAX_MULTIPLIER, min.(par.MAX_MULTIPLIER, beta .* z_curr)) else - CUDA.@sync @cuda threads=64 blocks=(div(mod.nvar-1, 64)+1) update_lz_kernel(mod.nvar, par.MAX_MULTIPLIER, z_curr, lz, beta) + CUDA.@sync @cuda threads=64 blocks=(div(mod.nvar-1, 64)+1) update_lz_kernel(env, z_curr, lz, beta) end if z_curr_norm > theta*z_prev_norm @@ -764,9 +798,9 @@ function admm_restart_two_level(env::AdmmEnv; outer_iterlim=10, inner_iterlim=80 if par.verbose > 0 # Test feasibility of global variable xbar: - pg_err, qg_err = check_generator_bounds(data, mod.gen_start, xbar_curr; use_gpu = env.use_gpu) - vm_err = check_voltage_bounds(data, mod.bus_start, xbar_curr) - real_err, reactive_err = check_power_balance_violation(data, mod.gen_start, mod.line_start, mod.bus_start, xbar_curr, mod.YshR, mod.YshI) + pg_err, qg_err = check_generator_bounds(env, xbar_curr) + vm_err = check_voltage_bounds(env, xbar_curr) + real_err, reactive_err = check_power_balance_violation(env, xbar_curr) @printf(" ** Violations of global variable xbar\n") @printf("Real power generator bounds = %.6e\n", pg_err) @printf("Reactive power generator bounds = %.6e\n", qg_err) @@ -797,7 +831,7 @@ function admm_restart_two_level(env::AdmmEnv; outer_iterlim=10, inner_iterlim=80 return end -function admm_rect_gpu_two_level(case, ::Type{VT}; +function admm_rect_gpu_two_level(case::String, ::Type{VT}; outer_iterlim=10, inner_iterlim=800, rho_pq=400.0, rho_va=40000.0, scale=1e-4, use_gpu=false, use_polar=true, gpu_no=1, verbose=1) where VT From 1a329ee3b4b379f6157aea6b452433a068933d1a Mon Sep 17 00:00:00 2001 From: Kibaek Kim Date: Mon, 24 May 2021 10:50:33 -0500 Subject: [PATCH 4/6] rename structs --- src/admm/environment.jl | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/admm/environment.jl b/src/admm/environment.jl index 532c549..d751920 100644 --- a/src/admm/environment.jl +++ b/src/admm/environment.jl @@ -129,11 +129,11 @@ end abstract type AbstractSolution{T,TD} end """ - Solution{T,TD} + SolutionOneLevel{T,TD} This contains the solutions of ACOPF model instance, including the ADMM parameter rho. """ -mutable struct Solution{T,TD} <: AbstractSolution{T,TD} +mutable struct SolutionOneLevel{T,TD} <: AbstractSolution{T,TD} u_curr::TD v_curr::TD l_curr::TD @@ -145,7 +145,7 @@ mutable struct Solution{T,TD} <: AbstractSolution{T,TD} rp::TD objval::T - function Solution{T,TD}(model::Model) where {T, TD<:AbstractArray{T}} + function SolutionOneLevel{T,TD}(model::Model) where {T, TD<:AbstractArray{T}} return new{T,TD}( TD(undef, model.nvar), TD(undef, model.nvar), @@ -162,12 +162,12 @@ mutable struct Solution{T,TD} <: AbstractSolution{T,TD} end """ - Solution2{T,TD} + SolutionTwoLevel{T,TD} This contains the solutions of ACOPF model instance for two-level ADMM algorithm, including the ADMM parameter rho. """ -mutable struct Solution2{T,TD} <: AbstractSolution{T,TD} +mutable struct SolutionTwoLevel{T,TD} <: AbstractSolution{T,TD} x_curr::TD xbar_curr::TD z_outer::TD @@ -183,7 +183,7 @@ mutable struct Solution2{T,TD} <: AbstractSolution{T,TD} wRIij::TD objval::T - function Solution2{T,TD}(model::Model) where {T, TD<:AbstractArray{T}} + function SolutionTwoLevel{T,TD}(model::Model) where {T, TD<:AbstractArray{T}} return new{T,TD}( TD(undef, model.nvar), # x_curr TD(undef, model.nvar_v), # xbar_curr @@ -241,10 +241,10 @@ mutable struct AdmmEnv{T,TD,TI,TM} env.data.lines, env.data.buses, env.data.baseMVA; VI=Array{Int}, VD=Array{T})...) if !use_twolevel - env.solution = Solution{T,TD}(env.model) + env.solution = SolutionOneLevel{T,TD}(env.model) init_solution!(env, ybus, rho_pq, rho_va) else - env.solution = Solution2{T,TD}(env.model) + env.solution = SolutionTwoLevel{T,TD}(env.model) init_values_two_level!(env, ybus, rho_pq, rho_va) end From 64916403694bbc64d3ec4012519152b9f7deb4c1 Mon Sep 17 00:00:00 2001 From: Kibaek Kim Date: Mon, 24 May 2021 10:57:05 -0500 Subject: [PATCH 5/6] multiple dispatch to init_solution! --- src/admm/acopf_admm_gpu.jl | 4 ++-- src/admm/acopf_admm_gpu_two_level.jl | 4 ++-- src/admm/environment.jl | 11 ++++------- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/src/admm/acopf_admm_gpu.jl b/src/admm/acopf_admm_gpu.jl index f9f881c..6404e2f 100644 --- a/src/admm/acopf_admm_gpu.jl +++ b/src/admm/acopf_admm_gpu.jl @@ -118,8 +118,8 @@ function get_branch_data(data::OPFData; use_gpu=false) end end -function init_solution!(env::AdmmEnv, ybus::Ybus, rho_pq, rho_va) - sol, data, model = env.solution, env.data, env.model +function init_solution!(env::AdmmEnv, sol::SolutionOneLevel, ybus::Ybus, rho_pq, rho_va) + data, model = env.data, env.model lines = data.lines buses = data.buses diff --git a/src/admm/acopf_admm_gpu_two_level.jl b/src/admm/acopf_admm_gpu_two_level.jl index 93c561f..22291e8 100644 --- a/src/admm/acopf_admm_gpu_two_level.jl +++ b/src/admm/acopf_admm_gpu_two_level.jl @@ -116,8 +116,8 @@ function get_branch_bus_index(data::OPFData; use_gpu=false) end end -function init_values_two_level!(env::AdmmEnv, ybus::Ybus, rho_pq, rho_va) - sol, data, model = env.solution, env.data, env.model +function init_solution!(env::AdmmEnv, sol::SolutionTwoLevel, ybus::Ybus, rho_pq, rho_va) + data, model = env.data, env.model gen_start, line_start, bus_start = model.gen_start, model.line_start, model.bus_start lines = data.lines diff --git a/src/admm/environment.jl b/src/admm/environment.jl index d751920..6f13512 100644 --- a/src/admm/environment.jl +++ b/src/admm/environment.jl @@ -240,13 +240,10 @@ mutable struct AdmmEnv{T,TD,TI,TM} ybus = Ybus{Array{T}}(computeAdmitances( env.data.lines, env.data.buses, env.data.baseMVA; VI=Array{Int}, VD=Array{T})...) - if !use_twolevel - env.solution = SolutionOneLevel{T,TD}(env.model) - init_solution!(env, ybus, rho_pq, rho_va) - else - env.solution = SolutionTwoLevel{T,TD}(env.model) - init_values_two_level!(env, ybus, rho_pq, rho_va) - end + env.solution = ifelse(use_twolevel, + SolutionTwoLevel{T,TD}(env.model), + SolutionOneLevel{T,TD}(env.model)) + init_solution!(env, env.solution, ybus, rho_pq, rho_va) env.membuf = TM(undef, (31, env.model.nline)) fill!(env.membuf, 0.0) From 17a83caa90af2d12c88efeb467924ee56b46f4ab Mon Sep 17 00:00:00 2001 From: Kibaek Kim Date: Mon, 24 May 2021 12:00:01 -0500 Subject: [PATCH 6/6] should not use struct in cuda kernel functions --- src/admm/acopf_admm_gpu_two_level.jl | 56 +++++++++++----------------- 1 file changed, 22 insertions(+), 34 deletions(-) diff --git a/src/admm/acopf_admm_gpu_two_level.jl b/src/admm/acopf_admm_gpu_two_level.jl index 22291e8..4b06977 100644 --- a/src/admm/acopf_admm_gpu_two_level.jl +++ b/src/admm/acopf_admm_gpu_two_level.jl @@ -240,8 +240,7 @@ function update_xbar(env::AdmmEnv, u, v, xbar, zu, zv, lu, lv, rho_u, rho_v) end end -function update_xbar_generator_kernel(model::Model, u, v, xbar, zu, zv, lu, lv, rho_u, rho_v) - n, gen_start = model.n, model.gen_start +function update_xbar_generator_kernel(n::Int, gen_start::Int, u, v, xbar, zu, zv, lu, lv, rho_u, rho_v) tx = threadIdx().x + (blockDim().x * (blockIdx().x - 1)) if tx <= n @@ -255,8 +254,7 @@ function update_xbar_generator_kernel(model::Model, u, v, xbar, zu, zv, lu, lv, return end -function update_xbar_branch_kernel(model::Model, u, v, xbar, zu, zv, lu, lv, rho_u, rho_v) - n, line_start = model.n, model.line_start +function update_xbar_branch_kernel(n::Int, line_start::Int, u, v, xbar, zu, zv, lu, lv, rho_u, rho_v) tx = threadIdx().x + (blockDim().x * (blockIdx().x - 1)) if tx <= n @@ -281,9 +279,7 @@ function update_xbar_branch_kernel(model::Model, u, v, xbar, zu, zv, lu, lv, rho return end -function update_xbar_bus_kernel(model::Model, u, v, xbar, zu, zv, lu, lv, rho_u, rho_v) - n, line_start, bus_start = model.n, model.line_start, model.bus_start - FrStart, FrIdx, ToStart, ToIdx = model.FrStart, model.FrIdx, model.ToStart, model.ToIdx +function update_xbar_bus_kernel(n::Int, line_start::Int, bus_start::Int, FrStart, FrIdx, ToStart, ToIdx, u, v, xbar, zu, zv, lu, lv, rho_u, rho_v) b = threadIdx().x + (blockDim().x * (blockIdx().x - 1)) if b <= n @@ -376,8 +372,7 @@ function update_zu(env::AdmmEnv, u, xbar, z, l, rho, lz, beta) end end -function update_zu_generator_kernel(model::Model, u, xbar, z, l, rho, lz, beta) - n, gen_start = model.n, model.gen_start +function update_zu_generator_kernel(n::Int, gen_start::Int, u, xbar, z, l, rho, lz, beta) tx = threadIdx().x + (blockDim().x * (blockIdx().x - 1)) if tx <= n @@ -391,8 +386,7 @@ function update_zu_generator_kernel(model::Model, u, xbar, z, l, rho, lz, beta) return end -function update_zu_branch_kernel(model::Model, u, xbar, z, l, rho, lz, beta) - n, line_start, bus_start, brBusIdx = model.n, model.line_start, model.bus_start, model.brBusIdx +function update_zu_branch_kernel(n::Int, line_start::Int, bus_start::Int, brBusIdx, u, xbar, z, l, rho, lz, beta) tx = threadIdx().x + (blockDim().x * (blockIdx().x - 1)) if tx <= n @@ -417,8 +411,7 @@ function update_zu_branch_kernel(model::Model, u, xbar, z, l, rho, lz, beta) return end -function update_zv_kernel(model::Model, v, xbar, z, l, rho, lz, beta) - n = model.n +function update_zv_kernel(n::Int, v, xbar, z, l, rho, lz, beta) tx = threadIdx().x + (blockDim().x * (blockIdx().x - 1)) if tx <= n @@ -430,8 +423,7 @@ function update_zv_kernel(model::Model, v, xbar, z, l, rho, lz, beta) return end -function update_l_kernel(model::Model, l, z, lz, beta) - n = model.n +function update_l_kernel(n::Int, l, z, lz, beta) tx = threadIdx().x + (blockDim().x * (blockIdx().x - 1)) if tx <= n @@ -473,8 +465,7 @@ function compute_primal_residual_u(env::AdmmEnv, rp_u, u, xbar, z) end end -function compute_primal_residual_u_generator_kernel(model::Model, rp, u, xbar, z) - n, gen_start = model.n, model.gen_start +function compute_primal_residual_u_generator_kernel(n::Int, gen_start::Int, rp, u, xbar, z) tx = threadIdx().x + (blockDim().x * (blockIdx().x - 1)) if tx <= n @@ -488,8 +479,7 @@ function compute_primal_residual_u_generator_kernel(model::Model, rp, u, xbar, z return end -function compute_primal_residual_u_branch_kernel(model::Model, rp, u, xbar, z) - n, line_start, bus_start, brBusIdx = model.n, model.line_start, model.bus_start, model.brBusIdx +function compute_primal_residual_u_branch_kernel(n::Int, line_start::Int, bus_start::Int, brBusIdx, rp, u, xbar, z) tx = threadIdx().x + (blockDim().x * (blockIdx().x - 1)) if tx <= n @@ -511,8 +501,7 @@ function compute_primal_residual_u_branch_kernel(model::Model, rp, u, xbar, z) return end -function compute_primal_residual_v_kernel(model::Model, rp, v, xbar, z) - n = model.n +function compute_primal_residual_v_kernel(n::Int, rp, v, xbar, z) tx = threadIdx().x + (blockDim().x * (blockIdx().x - 1)) if tx <= n @@ -532,8 +521,7 @@ function vector_difference(n::Int, c, a, b) return end -function update_lz_kernel(env::AdmmEnv, z, lz, beta) - n, max_limit = env.model.n, env.params.max_limit +function update_lz_kernel(n::Int, max_limit::Float64, z, lz, beta) tx = threadIdx().x + (blockDim().x * (blockIdx().x - 1)) if tx <= n @@ -733,26 +721,26 @@ function admm_restart_two_level(env::AdmmEnv; outer_iterlim=10, inner_iterlim=80 time_bus += tgpu.time # Update xbar. - @cuda threads=64 blocks=(div(mod.ngen-1, 64)+1) update_xbar_generator_kernel(mod, u_curr, v_curr, + @cuda threads=64 blocks=(div(mod.ngen-1, 64)+1) update_xbar_generator_kernel(mod.n, mod.gen_start, u_curr, v_curr, xbar_curr, zu_curr, zv_curr, lu_curr, lv_curr, rho_u, rho_v) - @cuda threads=64 blocks=(div(mod.nline-1, 64)+1) update_xbar_branch_kernel(mod, u_curr, v_curr, + @cuda threads=64 blocks=(div(mod.nline-1, 64)+1) update_xbar_branch_kernel(mod.n, mod.line_start, u_curr, v_curr, xbar_curr, zu_curr, zv_curr, lu_curr, lv_curr, rho_u, rho_v) - @cuda threads=64 blocks=(div(mod.nbus-1, 64)+1) update_xbar_bus_kernel(mod, u_curr, v_curr, xbar_curr, zu_curr, zv_curr, lu_curr, lv_curr, rho_u, rho_v) + @cuda threads=64 blocks=(div(mod.nbus-1, 64)+1) update_xbar_bus_kernel(mod.n, mod.line_start, mod.bus_start, mod.FrStart, mod.FrIdx, mod.ToStart, mod.ToIdx, u_curr, v_curr, xbar_curr, zu_curr, zv_curr, lu_curr, lv_curr, rho_u, rho_v) CUDA.synchronize() # Update z. - @cuda threads=64 blocks=(div(mod.ngen-1, 64)+1) update_zu_generator_kernel(mod, u_curr, + @cuda threads=64 blocks=(div(mod.ngen-1, 64)+1) update_zu_generator_kernel(mod.n, mod.gen_start, u_curr, xbar_curr, zu_curr, lu_curr, rho_u, lz_u, beta) - @cuda threads=64 blocks=(div(mod.nline-1, 64)+1) update_zu_branch_kernel(mod, u_curr, xbar_curr, zu_curr, lu_curr, rho_u, lz_u, beta) - @cuda threads=64 blocks=(div(mod.nvar_v-1, 64)+1) update_zv_kernel(mod, v_curr, xbar_curr, zv_curr, + @cuda threads=64 blocks=(div(mod.nline-1, 64)+1) update_zu_branch_kernel(mod.n, mod.line_start, mod.bus_start, mod.brBusIdx, u_curr, xbar_curr, zu_curr, lu_curr, rho_u, lz_u, beta) + @cuda threads=64 blocks=(div(mod.nvar_v-1, 64)+1) update_zv_kernel(mod.n, v_curr, xbar_curr, zv_curr, lv_curr, rho_v, lz_v, beta) CUDA.synchronize() # Update multiiplier and residuals. - @cuda threads=64 blocks=(div(mod.nvar-1, 64)+1) update_l_kernel(mod, l_curr, z_curr, lz, beta) - @cuda threads=64 blocks=(div(mod.ngen-1, 64)+1) compute_primal_residual_u_generator_kernel(mod, rp_u, u_curr, xbar_curr, zu_curr) - @cuda threads=64 blocks=(div(mod.nline-1, 64)+1) compute_primal_residual_u_branch_kernel(mod, rp_u, u_curr, xbar_curr, zu_curr) - @cuda threads=64 blocks=(div(mod.nvar_v-1, 64)+1) compute_primal_residual_v_kernel(mod, rp_v, v_curr, xbar_curr, zv_curr) + @cuda threads=64 blocks=(div(mod.nvar-1, 64)+1) update_l_kernel(mod.n, l_curr, z_curr, lz, beta) + @cuda threads=64 blocks=(div(mod.ngen-1, 64)+1) compute_primal_residual_u_generator_kernel(mod.n, mod.gen_start, rp_u, u_curr, xbar_curr, zu_curr) + @cuda threads=64 blocks=(div(mod.nline-1, 64)+1) compute_primal_residual_u_branch_kernel(mod.n, mod.line_start, mod.bus_start, mod.brBusIdx, rp_u, u_curr, xbar_curr, zu_curr) + @cuda threads=64 blocks=(div(mod.nvar_v-1, 64)+1) compute_primal_residual_v_kernel(mod.n, rp_v, v_curr, xbar_curr, zv_curr) @cuda threads=64 blocks=(div(mod.nvar-1, 64)+1) vector_difference(mod.nvar, rd, z_curr, z_prev) CUDA.synchronize() @@ -787,7 +775,7 @@ function admm_restart_two_level(env::AdmmEnv; outer_iterlim=10, inner_iterlim=80 if !env.use_gpu lz .+= max.(-par.MAX_MULTIPLIER, min.(par.MAX_MULTIPLIER, beta .* z_curr)) else - CUDA.@sync @cuda threads=64 blocks=(div(mod.nvar-1, 64)+1) update_lz_kernel(env, z_curr, lz, beta) + CUDA.@sync @cuda threads=64 blocks=(div(mod.nvar-1, 64)+1) update_lz_kernel(mod.nvar, par.MAX_MULTIPLIER, z_curr, lz, beta) end if z_curr_norm > theta*z_prev_norm