Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dgpnorm for CPU vs. GPU #51

Closed
kibaekkim opened this issue Apr 28, 2023 · 1 comment
Closed

dgpnorm for CPU vs. GPU #51

kibaekkim opened this issue Apr 28, 2023 · 1 comment
Assignees

Comments

@kibaekkim
Copy link
Member

I think the CPU and GPU codes are not using the same norm.

function dgpnorm(n::Int, x::Array{Float64}, xl::Array{Float64},
xu::Array{Float64}, g::Array{Float64})
inf_norm = 0.0
for i=1:n
if xl[i] != xu[i]
if x[i] == xl[i]
v = (min(g[i], 0.0))^2
elseif x[i] == xu[i]
v = (max(g[i], 0.0))^2
else
v = g[i]^2
end
v = sqrt(v)
inf_norm = (inf_norm > v) ? inf_norm : v
end
end
return inf_norm
end

@inline function ExaTron.dgpnorm(n::Int, x::CuDeviceArray{Float64,1}, xl::CuDeviceArray{Float64,1},
xu::CuDeviceArray{Float64,1}, g::CuDeviceArray{Float64,1})
tx = threadIdx().x
v = 0.0
if tx <= n
@inbounds begin
if xl[tx] != xu[tx]
if x[tx] == xl[tx]
v = min(g[tx], 0.0)
elseif x[tx] == xu[tx]
v = max(g[tx], 0.0)
else
v = g[tx]
end
v = abs(v)
end
end
end
# shfl_down_sync() will automatically sync threads in a warp.
offset = 16
while offset > 0
v = max(v, CUDA.shfl_down_sync(0xffffffff, v, offset))
offset >>= 1
end
v = CUDA.shfl_sync(0xffffffff, v, 1)
return v
end

@inline function ExaTron.dgpnorm(n::Int, x, xl,
xu, g,
tx)
@synchronize
res = 0.0
inf_norm = @localmem Float64 (1,)
v = 0.0
if tx == 1
inf_norm[1] = 0.0
for i in 1:n
@inbounds begin
if xl[i] != xu[i]
if x[i] == xl[i]
v = min(g[i], 0.0)
v = v*v
elseif x[i] == xu[i]
v = max(g[i], 0.0)
v = v*v
else
v = g[i]*g[i]
end
v = sqrt(v)
if inf_norm[1] > v
inf_norm[1] = inf_norm[1]
else
inf_norm[1] = v
end
end
end
end
end
@synchronize
res = inf_norm[1]
return res
end

ExaTron.jl/src/driver.jl

Lines 42 to 67 in 3ebc6bc

function gpnorm(n, x, x_l, x_u, g)
two_norm = 0.0
inf_norm = 0.0
for i=1:n
if x_l[i] != x_u[i]
if x[i] == x_l[i]
val = (min(g[i], 0.0))^2
elseif x[i] == x_u[i]
val = (max(g[i], 0.0))^2
else
val = g[i]^2
end
two_norm += val
val = sqrt(val)
if inf_norm < val
inf_norm = val
end
end
end
two_norm = sqrt(two_norm)
return two_norm, inf_norm
end

I am not sure if this is intended. We need @youngdae to confirm this.

@kibaekkim kibaekkim changed the title dgpnorm for CPU vs. GPU dgpnorm for CPU vs. GPU Apr 28, 2023
@kibaekkim kibaekkim changed the title dgpnorm for CPU vs. GPU dgpnorm for CPU vs. GPU Apr 28, 2023
@kibaekkim
Copy link
Member Author

Never mind.. they are same.. but computing in different ways.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

3 participants