Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deprecate bang! variants towards a cleaner API #133

Merged
merged 1 commit into from
May 28, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 0 additions & 20 deletions docs/src/introduction/gettingstarted.md
Original file line number Diff line number Diff line change
Expand Up @@ -198,26 +198,6 @@ julia> deriv2(L2DistLoss(), true_targets, pred_outputs)
2.0
```

Additionally, we provide mutating versions for the subset of
methods that return an array. These have the same function
signatures with the only difference of requiring an additional
parameter as the first argument. This variable should always be
the preallocated array that is to be used as storage.

```julia-repl
julia> buffer = zeros(3)
3-element Array{Float64,1}:
0.0
0.0
0.0

julia> deriv!(buffer, L2DistLoss(), true_targets, pred_outputs)
3-element Array{Float64,1}:
-1.0
4.0
2.0
```

## Getting Help

To get help on specific functionality you can either look up the
Expand Down
24 changes: 0 additions & 24 deletions docs/src/user/interface.md
Original file line number Diff line number Diff line change
Expand Up @@ -157,18 +157,6 @@ julia> buffer .= value.(L1DistLoss(), [1.,2,3], [2,5,-2]) .* [2,1,0.5]
2.5
```

Even though broadcasting is supported, we do expose a vectorized
method natively. This is done mainly for API consistency reasons.
Internally it even uses broadcast itself, but it does provide the
additional benefit of a more reliable type-inference.

We also provide a mutating version for the same reasons. It
even utilizes `broadcast!` underneath.

```@docs
value!
```

## Computing the 1st Derivatives

Maybe the more interesting aspect of loss functions are their
Expand Down Expand Up @@ -221,18 +209,6 @@ julia> buffer .= deriv.(L2DistLoss(), [1.,2,3], [2,5,-2]) .* [2,1,0.5]
-5.0
```

While broadcast is supported, we do expose a vectorized method
natively. This is done mainly for API consistency reasons.
Internally it even uses broadcast itself, but it does provide the
additional benefit of a more reliable type-inference.

We also provide a mutating version for the same reasons. It
even utilizes ``broadcast!`` underneath.

```@docs
deriv!
```

## Computing the 2nd Derivatives

Additionally to the first derivative, we also provide the
Expand Down
11 changes: 5 additions & 6 deletions src/LossFunctions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,7 @@ import LearnBase:
AggregateMode,
Loss, SupervisedLoss,
DistanceLoss, MarginLoss,
value, value!,
deriv, deriv!,
deriv2, deriv2!,
value, deriv, deriv2,
isdistancebased, ismarginbased,
isminimizable, isdifferentiable,
istwicedifferentiable,
Expand All @@ -40,16 +38,17 @@ include("plotrecipes.jl")
@deprecate weightedloss(l, w) WeightedMarginLoss(l, w)
@deprecate scaled(l, λ) ScaledLoss(l, λ)
@deprecate value_deriv(l,y,ŷ) (value(l,y,ŷ), deriv(l,y,ŷ))
@deprecate value!(b,l,y,ŷ,args...) (b .= value(l,y,ŷ,args...))
@deprecate deriv!(b,l,y,ŷ,args...) (b .= deriv(l,y,ŷ,args...))
@deprecate deriv2!(b,l,y,ŷ,args...) (b .= deriv2(l,y,ŷ,args...))

export
# loss API
Loss,
SupervisedLoss,
MarginLoss,
DistanceLoss,
value, value!,
deriv, deriv!,
deriv2, deriv2!,
value, deriv, deriv2,
isdistancebased, ismarginbased,
isminimizable, isdifferentiable,
istwicedifferentiable,
Expand Down
23 changes: 8 additions & 15 deletions src/sparse.jl
Original file line number Diff line number Diff line change
@@ -1,30 +1,23 @@
@inline function value(loss::MarginLoss, target::AbstractSparseArray, output::AbstractArray)
buffer = similar(output)
value!(buffer, loss, target, output)
end

@generated function value!(
buffer::AbstractArray,
loss::MarginLoss,
target::AbstractSparseArray{Q,Ti,M},
output::AbstractArray{T,N}
) where {T,N,Q,Ti,M}
@generated function value(
loss::MarginLoss,
target::AbstractSparseArray{Q,Ti,M},
output::AbstractArray{T,N}) where {T,N,Q,Ti,M}
M > N && throw(ArgumentError("target has more dimensions than output; broadcasting not supported in this direction."))
quote
@dimcheck size(buffer) == size(output)
@nexprs $M (n)->@dimcheck(size(target,n) == size(output,n))
out = similar(output)
zeroQ = zero(Q)
negQ = Q(-1)
@simd for I in CartesianIndices(size(output))
@nexprs $N n->(i_n = I[n])
tgt = @nref($M,target,i)
if tgt == zeroQ
# convention is that zeros in a sparse array are interpreted as negative one
@inbounds @nref($N,buffer,i) = value(loss, negQ, @nref($N,output,i))
@inbounds @nref($N,out,i) = value(loss, negQ, @nref($N,output,i))
else
@inbounds @nref($N,buffer,i) = value(loss, tgt, @nref($N,output,i))
@inbounds @nref($N,out,i) = value(loss, tgt, @nref($N,output,i))
end
end
buffer
out
end
end
72 changes: 9 additions & 63 deletions src/supervised.jl
Original file line number Diff line number Diff line change
Expand Up @@ -43,15 +43,6 @@ for FUN in (:value, :deriv, :deriv2)
($FUN)(loss, targets, outputs, AggMode.None())
end

# (mutating) by default compute the element-wise result
@inline function ($(Symbol(FUN,:!)))(
buffer::AbstractArray,
loss::SupervisedLoss,
targets::AbstractArray,
outputs::AbstractArray)
($(Symbol(FUN,:!)))(buffer, loss, targets, outputs, AggMode.None())
end

# translate ObsDim.Last to the correct ObsDim.Constant (for code reduction)
@inline function ($FUN)(
loss::SupervisedLoss,
Expand All @@ -62,17 +53,6 @@ for FUN in (:value, :deriv, :deriv2)
($FUN)(loss, targets, outputs, agg, ObsDim.Constant{N}())
end

# (mutating) translate ObsDim.Last to the correct ObsDim.Constant (for code reduction)
@inline function ($(Symbol(FUN,:!)))(
buffer::AbstractArray,
loss::SupervisedLoss,
targets::AbstractArray,
outputs::AbstractArray{T,N},
agg::AggregateMode,
::ObsDim.Last = ObsDim.Last()) where {T,N}
($(Symbol(FUN,:!)))(buffer, loss, targets, outputs, agg, ObsDim.Constant{N}())
end

# -------------------
# AGGREGATION: NONE
# -------------------
Expand All @@ -87,16 +67,6 @@ for FUN in (:value, :deriv, :deriv2)
end
end

function ($(Symbol(FUN,:!)))(
buffer::AbstractArray,
loss::SupervisedLoss,
target::AbstractArray{Q,M},
output::AbstractArray{T,N},
::AggMode.None) where {Q,M,T,N}
buffer .= ($FUN).(loss, target, output)
buffer
end

# ------------------
# AGGREGATION: SUM
# ------------------
Expand Down Expand Up @@ -124,27 +94,15 @@ for FUN in (:value, :deriv, :deriv2)
output::AbstractArray{T,N},
avg::AggMode.Sum,
obsdim::ObsDim.Constant{O}) where {Q,T,N,O}
S = result_type(loss, Q, T)
buffer = zeros(S, size(output, O))
($(Symbol(FUN,:!)))(buffer, loss, target, output, avg, obsdim)
end

function ($(Symbol(FUN,:!)))(
buffer::AbstractVector{B},
loss::SupervisedLoss,
target::AbstractArray{Q,N},
output::AbstractArray{T,N},
::AggMode.Sum,
::ObsDim.Constant{O}) where {B,Q,T,N,O}
N == 1 && throw(ArgumentError("Sum per observation non sensible for two Vectors. Try omitting the obsdim"))
O > N && throw(ArgumentError("The specified obsdim is larger as the available dimensions."))
@dimcheck size(target) == size(output)
@dimcheck length(buffer) == size(output, O)
fill!(buffer, zero(B))
S = result_type(loss, Q, T)
out = zeros(S, size(output, O))
@inbounds @simd for I in CartesianIndices(size(output))
buffer[I[O]] += ($FUN)(loss, target[I], output[I])
out[I[O]] += ($FUN)(loss, target[I], output[I])
end
buffer
out
end

# -------------------
Expand Down Expand Up @@ -175,28 +133,16 @@ for FUN in (:value, :deriv, :deriv2)
output::AbstractArray{T,N},
avg::AggMode.Mean,
obsdim::ObsDim.Constant{O}) where {Q,T,N,O}
S = result_type(loss, Q, T)
buffer = zeros(S, size(output, O))
($(Symbol(FUN,:!)))(buffer, loss, target, output, avg, obsdim)
end

function ($(Symbol(FUN,:!)))(
buffer::AbstractVector{B},
loss::SupervisedLoss,
target::AbstractArray{Q,N},
output::AbstractArray{T,N},
::AggMode.Mean,
::ObsDim.Constant{O}) where {B,Q,T,N,O}
N == 1 && throw(ArgumentError("Mean per observation non sensible for two Vectors. Try omitting the obsdim"))
O > N && throw(ArgumentError("The specified obsdim is larger as the available dimensions."))
@dimcheck size(target) == size(output)
@dimcheck length(buffer) == size(output, O)
fill!(buffer, zero(B))
nrm = 1 / B(prod(size(output,n) for n in 1:N if n != O))
S = result_type(loss, Q, T)
out = zeros(S, size(output, O))
nrm = 1 / S(prod(size(output,n) for n in 1:N if n != O))
@inbounds @simd for I in CartesianIndices(size(output))
buffer[I[O]] += ($FUN)(loss, target[I], output[I]) * nrm
out[I[O]] += ($FUN)(loss, target[I], output[I]) * nrm
end
buffer
out
end

# ---------------------------
Expand Down
1 change: 0 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ tests = [
]

perf = [
#"bm_datasource.jl"
]

# for deterministic testing
Expand Down
25 changes: 0 additions & 25 deletions test/tst_api.jl
Original file line number Diff line number Diff line change
@@ -1,11 +1,5 @@
function test_vector_value(l::MarginLoss, t, y)
ref = [value(l,t[i],y[i]) for i in CartesianIndices(size(y))]
buf = fill!(similar(ref), 0)
@test @inferred(value!(buf, l, t, y)) == ref
@test buf == ref
buf2 = fill!(similar(ref), 0)
@test @inferred(value!(buf2, l, t, y, AggMode.None())) == ref
@test buf2 == ref
@test @inferred(value(l, t, y, AggMode.None())) == ref
@test @inferred(value(l, t, y)) == ref
@test value.(Ref(l), t, y) == ref
Expand Down Expand Up @@ -35,33 +29,20 @@ function test_vector_value(l::MarginLoss, t, y)
# Sum per obs
sv = vec(sum(ref, dims=1:(ndims(ref)-1)))
@test @inferred(value(l, t, y, AggMode.Sum(), ObsDim.Last())) ≈ sv
buffer1 = fill!(similar(sv), 0)
@test @inferred(value!(buffer1, l, t, y, AggMode.Sum(), ObsDim.Last())) ≈ sv
@test buffer1 ≈ sv
buffer2 = fill!(similar(sv), 0)
# Weighted sum compare
@test @inferred(value(l, t, y, AggMode.WeightedSum(1:k), ObsDim.Last())) ≈ sum(sv .* (1:k))
@test round(@inferred(value(l, t, y, AggMode.WeightedSum(1:k,normalize=true), ObsDim.Last())), digits=3) ≈
round(sum(sv .* ((1:k)/(sum(1:k)))), digits=3)
# Mean per obs
mv = vec(mean(ref, dims=1:(ndims(ref)-1)))
@test @inferred(value(l, t, y, AggMode.Mean(), ObsDim.Last())) ≈ mv
buffer3 = fill!(similar(mv), 0)
@test @inferred(value!(buffer3, l, t, y, AggMode.Mean(), ObsDim.Last())) ≈ mv
@test buffer3 ≈ mv
# Weighted mean compare
@test @inferred(value(l, t, y, AggMode.WeightedMean(1:k,normalize=false), ObsDim.Last())) ≈ sum(mv .* (1:k))
end
end

function test_vector_value(l::DistanceLoss, t, y)
ref = [value(l,t[i],y[i]) for i in CartesianIndices(size(y))]
buf = fill!(similar(ref), 0)
@test @inferred(value!(buf, l, t, y)) == ref
@test buf == ref
buf2 = fill!(similar(ref), 0)
@test @inferred(value!(buf2, l, t, y, AggMode.None())) == ref
@test buf2 == ref
@test @inferred(value(l, t, y, AggMode.None())) == ref
@test @inferred(value(l, t, y)) == ref
@test value.(Ref(l), t, y) == ref
Expand Down Expand Up @@ -89,17 +70,11 @@ function test_vector_value(l::DistanceLoss, t, y)
# Sum per obs
sv = vec(sum(ref, dims=1:(ndims(ref)-1)))
@test @inferred(value(l, t, y, AggMode.Sum(), ObsDim.Last())) ≈ sv
buffer1 = fill!(similar(sv), 0)
@test @inferred(value!(buffer1, l, t, y, AggMode.Sum(), ObsDim.Last())) ≈ sv
@test buffer1 ≈ sv
# Weighted sum compare
@test @inferred(value(l, t, y, AggMode.WeightedSum(1:k,normalize=false), ObsDim.Last())) ≈ sum(sv .* (1:k))
# Mean per obs
mv = vec(mean(ref, dims=1:(ndims(ref)-1)))
@test @inferred(value(l, t, y, AggMode.Mean(), ObsDim.Last())) ≈ mv
buffer3 = fill!(copy(mv), 0)
@test @inferred(value!(buffer3, l, t, y, AggMode.Mean(), ObsDim.Last())) ≈ mv
@test buffer3 ≈ mv
# Weighted mean compare
@test @inferred(value(l, t, y, AggMode.WeightedMean(1:k,normalize=false), ObsDim.Last())) ≈ sum(mv .* (1:k))
end
Expand Down