dgpnorm for CPU vs. GPU #51

kibaekkim · 2023-04-28T21:33:20Z

I think the CPU and GPU codes are not using the same norm.

Lines 1 to 20 in 3ebc6bc

    
           function dgpnorm(n::Int, x::Array{Float64}, xl::Array{Float64}, 
        
                            xu::Array{Float64}, g::Array{Float64}) 
        
               inf_norm = 0.0 
        
               for i=1:n 
        
                   if xl[i] != xu[i] 
        
                       if x[i] == xl[i] 
        
                           v = (min(g[i], 0.0))^2 
        
                       elseif x[i] == xu[i] 
        
                           v = (max(g[i], 0.0))^2 
        
                       else 
        
                           v = g[i]^2 
        
                       end 
        
                       v = sqrt(v) 
        
                       inf_norm = (inf_norm > v) ? inf_norm : v 
        
                   end 
        
               end 
        
               return inf_norm 
        
           end

ExaTron.jl/src/CUDA/dgpnorm.jl

Lines 1 to 32 in 3ebc6bc

    
           @inline function ExaTron.dgpnorm(n::Int, x::CuDeviceArray{Float64,1}, xl::CuDeviceArray{Float64,1}, 
        
                                    xu::CuDeviceArray{Float64,1}, g::CuDeviceArray{Float64,1}) 
        
               tx = threadIdx().x 
        
               v = 0.0 
        
               if tx <= n 
        
                   @inbounds begin 
        
                       if xl[tx] != xu[tx] 
        
                           if x[tx] == xl[tx] 
        
                               v = min(g[tx], 0.0) 
        
                           elseif x[tx] == xu[tx] 
        
                               v = max(g[tx], 0.0) 
        
                           else 
        
                               v = g[tx] 
        
                           end 
        
                           v = abs(v) 
        
                       end 
        
                   end 
        
               end 
        
               # shfl_down_sync() will automatically sync threads in a warp. 
        
               offset = 16 
        
               while offset > 0 
        
                   v = max(v, CUDA.shfl_down_sync(0xffffffff, v, offset)) 
        
                   offset >>= 1 
        
               end 
        
               v = CUDA.shfl_sync(0xffffffff, v, 1) 
        
               return v 
        
           end

ExaTron.jl/src/KA/dgpnorm.jl

Lines 1 to 39 in 3ebc6bc

    
           @inline function ExaTron.dgpnorm(n::Int, x, xl, 
        
                                    xu, g, 
        
                                    tx) 
        
               @synchronize 
        
               res = 0.0 
        
               inf_norm = @localmem Float64 (1,) 
        
               v = 0.0 
        
               if tx == 1 
        
                   inf_norm[1] = 0.0 
        
                   for i in 1:n 
        
                       @inbounds begin 
        
                           if xl[i] != xu[i] 
        
                               if x[i] == xl[i] 
        
                                   v = min(g[i], 0.0) 
        
                                   v = v*v 
        
                               elseif x[i] == xu[i] 
        
                                   v = max(g[i], 0.0) 
        
                                   v = v*v 
        
                               else 
        
                                   v = g[i]*g[i] 
        
                               end 
        
                               v = sqrt(v) 
        
                               if inf_norm[1] > v 
        
                                   inf_norm[1] = inf_norm[1] 
        
                               else 
        
                                   inf_norm[1] = v 
        
                               end 
        
                           end 
        
                       end 
        
                   end 
        
               end 
        
               @synchronize 
        
               res = inf_norm[1] 
        
               return res 
        
           end

ExaTron.jl/src/driver.jl

Lines 42 to 67 in 3ebc6bc

    
           function gpnorm(n, x, x_l, x_u, g) 
        
               two_norm = 0.0 
        
               inf_norm = 0.0 
        
               for i=1:n 
        
                   if x_l[i] != x_u[i] 
        
                       if x[i] == x_l[i] 
        
                           val = (min(g[i], 0.0))^2 
        
                       elseif x[i] == x_u[i] 
        
                           val = (max(g[i], 0.0))^2 
        
                       else 
        
                           val = g[i]^2 
        
                       end 
        
                       two_norm += val 
        
                       val = sqrt(val) 
        
                       if inf_norm < val 
        
                           inf_norm = val 
        
                       end 
        
                   end 
        
               end 
        
               two_norm = sqrt(two_norm) 
        
               return two_norm, inf_norm 
        
           end

I am not sure if this is intended. We need @youngdae to confirm this.

kibaekkim · 2023-04-28T21:45:25Z

Never mind.. they are same.. but computing in different ways.

kibaekkim assigned youngdae, kibaekkim and michel2323 Apr 28, 2023

kibaekkim changed the title ~~dgpnorm for CPU vs. GPU~~ dgpnorm for CPU vs. GPU Apr 28, 2023

kibaekkim changed the title ~~dgpnorm for CPU vs. GPU~~ dgpnorm for CPU vs. GPU Apr 28, 2023

kibaekkim closed this as completed Apr 28, 2023

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

dgpnorm for CPU vs. GPU #51

dgpnorm for CPU vs. GPU #51

kibaekkim commented Apr 28, 2023

kibaekkim commented Apr 28, 2023

dgpnorm for CPU vs. GPU #51

dgpnorm for CPU vs. GPU #51

Comments

kibaekkim commented Apr 28, 2023

kibaekkim commented Apr 28, 2023