From 3d62956c82051f9d6b36cb1bd4c9c1af4f8eda09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= Date: Mon, 24 Apr 2023 16:10:30 -0300 Subject: [PATCH 1/9] Get rid of AggMode.None --- src/aggmode.jl | 9 --------- src/losses.jl | 24 +----------------------- test/aggmode.jl | 15 ++++----------- test/core.jl | 4 ++-- 4 files changed, 7 insertions(+), 45 deletions(-) diff --git a/src/aggmode.jl b/src/aggmode.jl index eb66e19..728d95e 100644 --- a/src/aggmode.jl +++ b/src/aggmode.jl @@ -8,7 +8,6 @@ abstract type AggregateMode end Types for aggregation of multiple observations. -- `AggMode.None()` - `AggMode.Sum()` - `AggMode.Mean()` - `AggMode.WeightedSum(weights)` @@ -17,14 +16,6 @@ Types for aggregation of multiple observations. module AggMode using ..LossFunctions: AggregateMode - """ - AggMode.None() - - Opt-out of aggregation. This is usually the default value. - Using `None` will cause the element-wise results to be returned. - """ - struct None <: AggregateMode end - """ AggMode.Sum() diff --git a/src/losses.jl b/src/losses.jl index 8f28245..447de9a 100644 --- a/src/losses.jl +++ b/src/losses.jl @@ -31,28 +31,6 @@ end # ------------------------------ for FUN in (:value, :deriv, :deriv2) @eval begin - # by default compute the element-wise result - @inline function ($FUN)( - loss::SupervisedLoss, - outputs::AbstractVector, - targets::AbstractVector) - ($FUN)(loss, outputs, targets, AggMode.None()) - end - - # ------------------- - # AGGREGATION: NONE - # ------------------- - @generated function ($FUN)( - loss::SupervisedLoss, - outputs::AbstractVector, - targets::AbstractVector, - ::AggMode.None) - quote - $(Expr(:meta, :inline)) - ($($FUN)).(loss, outputs, targets) - end - end - # ------------------ # AGGREGATION: SUM # ------------------ @@ -118,4 +96,4 @@ for FUN in (:value, :deriv, :deriv2) end # convenient functor interface -(loss::SupervisedLoss)(outputs::AbstractVector, targets::AbstractVector) = value(loss, outputs, targets) +(loss::SupervisedLoss)(outputs::AbstractVector, targets::AbstractVector) = value.(loss, outputs, targets) diff --git a/test/aggmode.jl b/test/aggmode.jl index d0e5325..d0860bf 100644 --- a/test/aggmode.jl +++ b/test/aggmode.jl @@ -1,33 +1,26 @@ function test_vector_value(l, o, t) ref = [value(l, o[i], t[i]) for i in 1:length(o)] - @test @inferred(value(l, o, t, AggMode.None())) == ref - @test @inferred(value(l, o, t)) == ref - @test value.(l, o, t) == ref @test @inferred(l(o, t)) == ref n = length(ref) s = sum(ref) @test @inferred(value(l, o, t, AggMode.Sum())) ≈ s @test @inferred(value(l, o, t, AggMode.Mean())) ≈ s / n - ## Weighted Sum @test @inferred(value(l, o, t, AggMode.WeightedSum(ones(n)))) ≈ s @test @inferred(value(l, o, t, AggMode.WeightedSum(ones(n),normalize=true))) ≈ s / n - ## Weighted Mean @test @inferred(value(l, o, t, AggMode.WeightedMean(ones(n)))) ≈ (s / n) / n @test @inferred(value(l, o, t, AggMode.WeightedMean(ones(n),normalize=false))) ≈ s / n end function test_vector_deriv(l, o, t) ref = [deriv(l, o[i], t[i]) for i in 1:length(o)] - @test @inferred(deriv(l, o, t, AggMode.None())) == ref - @test @inferred(deriv(l, o, t)) == ref - @test deriv.(Ref(l), o, t) == ref + d(l, o, t) = deriv.(l, o, t) + @test @inferred(d(l, o, t)) == ref end function test_vector_deriv2(l, o, t) ref = [deriv2(l, o[i], t[i]) for i in 1:length(o)] - @test @inferred(deriv2(l, o, t, AggMode.None())) == ref - @test @inferred(deriv2(l, o, t)) == ref - @test deriv2.(Ref(l), o, t) == ref + d(l, o, t) = deriv2.(l, o, t) + @test @inferred(d(l, o, t)) == ref end @testset "Vectorized API" begin diff --git a/test/core.jl b/test/core.jl index fdf1e51..fa18199 100644 --- a/test/core.jl +++ b/test/core.jl @@ -427,7 +427,7 @@ end l = MisclassLoss() @test value(l, c[1], c[1]) == 0.0 @test value(l, c[1], c[2]) == 1.0 - @test value(l, c, reverse(c)) == [0.0, 1.0, 1.0, 0.0] + @test value.(l, c, reverse(c)) == [0.0, 1.0, 1.0, 0.0] @test value(l, c, reverse(c), AggMode.Sum()) == 2.0 @test value(l, c, reverse(c), AggMode.Mean()) == 0.5 @test value(l, c, reverse(c), AggMode.WeightedSum(2*ones(4))) == 4.0 @@ -436,5 +436,5 @@ end l = MisclassLoss{Float32}() @test value(l, c[1], c[1]) isa Float32 - @test value(l, c, c) isa Vector{Float32} + @test value.(l, c, c) isa Vector{Float32} end From c7545f20f112fe430598541c3dae3985e684492f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= Date: Mon, 24 Apr 2023 17:51:21 -0300 Subject: [PATCH 2/9] Formatting issues --- src/losses/scaled.jl | 12 ++++++------ src/losses/weighted.jl | 24 +++++++++++------------- 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/src/losses/scaled.jl b/src/losses/scaled.jl index 6ffd373..10f45c6 100644 --- a/src/losses/scaled.jl +++ b/src/losses/scaled.jl @@ -27,12 +27,12 @@ for FUN in (:value, :deriv, :deriv2) end end -for FUN in [:isminimizable, :isdifferentiable, :istwicedifferentiable, - :isconvex, :isstrictlyconvex, :isstronglyconvex, - :isnemitski, :isunivfishercons, :isfishercons, - :islipschitzcont, :islocallylipschitzcont, - :isclipable, :ismarginbased, :isclasscalibrated, - :isdistancebased, :issymmetric] +for FUN in (:isminimizable, :isdifferentiable, :istwicedifferentiable, + :isconvex, :isstrictlyconvex, :isstronglyconvex, + :isnemitski, :isunivfishercons, :isfishercons, + :islipschitzcont, :islocallylipschitzcont, + :isclipable, :ismarginbased, :isclasscalibrated, + :isdistancebased, :issymmetric) @eval ($FUN)(l::ScaledLoss) = ($FUN)(l.loss) end diff --git a/src/losses/weighted.jl b/src/losses/weighted.jl index 3cb5232..4a78f17 100644 --- a/src/losses/weighted.jl +++ b/src/losses/weighted.jl @@ -47,17 +47,15 @@ isclasscalibrated(l::WeightedMarginLoss{T,W}) where {T,W} = W == 0.5 && isclassc # TODO: Think about this semantic issymmetric(::WeightedMarginLoss) = false -for prop in [:isminimizable, :isdifferentiable, - :istwicedifferentiable, - :isconvex, :isstrictlyconvex, - :isstronglyconvex, :isnemitski, - :isunivfishercons, :isfishercons, - :islipschitzcont, :islocallylipschitzcont, - :isclipable, :ismarginbased, - :isdistancebased] - @eval ($prop)(l::WeightedMarginLoss) = ($prop)(l.loss) -end - -for prop_param in (:isdifferentiable, :istwicedifferentiable) - @eval ($prop_param)(l::WeightedMarginLoss, at) = ($prop_param)(l.loss, at) +for FUN in (:isminimizable, :isdifferentiable, :istwicedifferentiable, + :isconvex, :isstrictlyconvex, :isstronglyconvex, + :isnemitski, :isunivfishercons, :isfishercons, + :islipschitzcont, :islocallylipschitzcont, + :isclipable, :ismarginbased, + :isdistancebased) + @eval ($FUN)(l::WeightedMarginLoss) = ($FUN)(l.loss) +end + +for FUN in (:isdifferentiable, :istwicedifferentiable) + @eval ($FUN)(l::WeightedMarginLoss, at) = ($FUN)(l.loss, at) end From 276c75dfad9810318b42e1a6b191d3a4ecaa57b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= Date: Mon, 24 Apr 2023 17:57:40 -0300 Subject: [PATCH 3/9] Get rid of dimcheck --- src/losses.jl | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/src/losses.jl b/src/losses.jl index 447de9a..c770964 100644 --- a/src/losses.jl +++ b/src/losses.jl @@ -21,14 +21,9 @@ include("losses/other.jl") include("losses/scaled.jl") include("losses/weighted.jl") -# helper macro (for devs) -macro dimcheck(condition) - :(($(esc(condition))) || throw(DimensionMismatch("Dimensions of the parameters don't match: $($(string(condition)))"))) -end - -# ------------------------------ -# DEFAULT AGGREGATION BEHAVIOR -# ------------------------------ +# ---------------------- +# AGGREGATION BEHAVIOR +# ---------------------- for FUN in (:value, :deriv, :deriv2) @eval begin # ------------------ @@ -39,7 +34,6 @@ for FUN in (:value, :deriv, :deriv2) outputs::AbstractVector, targets::AbstractVector, ::AggMode.Sum) - @dimcheck length(outputs) == length(targets) nobs = length(outputs) f(i) = ($FUN)(loss, outputs[i], targets[i]) sum(f, 1:nobs) @@ -53,7 +47,6 @@ for FUN in (:value, :deriv, :deriv2) outputs::AbstractVector, targets::AbstractVector, ::AggMode.Mean) - @dimcheck length(outputs) == length(targets) nobs = length(outputs) f(i) = ($FUN)(loss, outputs[i], targets[i]) sum(f, 1:nobs) / nobs @@ -67,8 +60,6 @@ for FUN in (:value, :deriv, :deriv2) outputs::AbstractVector, targets::AbstractVector, agg::AggMode.WeightedSum) - @dimcheck length(outputs) == length(targets) - @dimcheck length(outputs) == length(agg.weights) nobs = length(outputs) wsum = sum(agg.weights) denom = agg.normalize ? wsum : one(wsum) @@ -84,8 +75,6 @@ for FUN in (:value, :deriv, :deriv2) outputs::AbstractVector, targets::AbstractVector, agg::AggMode.WeightedMean) - @dimcheck length(outputs) == length(targets) - @dimcheck length(outputs) == length(agg.weights) nobs = length(outputs) wsum = sum(agg.weights) denom = agg.normalize ? nobs * wsum : nobs * one(wsum) From e4fdae7ee432dfe6e487f2f6b3b934b5239dca46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= Date: Mon, 24 Apr 2023 18:01:52 -0300 Subject: [PATCH 4/9] Simplify signatures in losses.jl --- src/losses.jl | 26 +++++--------------------- 1 file changed, 5 insertions(+), 21 deletions(-) diff --git a/src/losses.jl b/src/losses.jl index c770964..6a1f5bf 100644 --- a/src/losses.jl +++ b/src/losses.jl @@ -29,11 +29,7 @@ for FUN in (:value, :deriv, :deriv2) # ------------------ # AGGREGATION: SUM # ------------------ - function ($FUN)( - loss::SupervisedLoss, - outputs::AbstractVector, - targets::AbstractVector, - ::AggMode.Sum) + function ($FUN)(loss::SupervisedLoss, outputs, targets, ::AggMode.Sum) nobs = length(outputs) f(i) = ($FUN)(loss, outputs[i], targets[i]) sum(f, 1:nobs) @@ -42,11 +38,7 @@ for FUN in (:value, :deriv, :deriv2) # ------------------- # AGGREGATION: MEAN # ------------------- - function ($FUN)( - loss::SupervisedLoss, - outputs::AbstractVector, - targets::AbstractVector, - ::AggMode.Mean) + function ($FUN)(loss::SupervisedLoss, outputs, targets, ::AggMode.Mean) nobs = length(outputs) f(i) = ($FUN)(loss, outputs[i], targets[i]) sum(f, 1:nobs) / nobs @@ -55,11 +47,7 @@ for FUN in (:value, :deriv, :deriv2) # --------------------------- # AGGREGATION: WEIGHTED SUM # --------------------------- - function ($FUN)( - loss::SupervisedLoss, - outputs::AbstractVector, - targets::AbstractVector, - agg::AggMode.WeightedSum) + function ($FUN)(loss::SupervisedLoss, outputs, targets, agg::AggMode.WeightedSum) nobs = length(outputs) wsum = sum(agg.weights) denom = agg.normalize ? wsum : one(wsum) @@ -70,11 +58,7 @@ for FUN in (:value, :deriv, :deriv2) # ---------------------------- # AGGREGATION: WEIGHTED MEAN # ---------------------------- - function ($FUN)( - loss::SupervisedLoss, - outputs::AbstractVector, - targets::AbstractVector, - agg::AggMode.WeightedMean) + function ($FUN)(loss::SupervisedLoss, outputs, targets, agg::AggMode.WeightedMean) nobs = length(outputs) wsum = sum(agg.weights) denom = agg.normalize ? nobs * wsum : nobs * one(wsum) @@ -85,4 +69,4 @@ for FUN in (:value, :deriv, :deriv2) end # convenient functor interface -(loss::SupervisedLoss)(outputs::AbstractVector, targets::AbstractVector) = value.(loss, outputs, targets) +(loss::SupervisedLoss)(outputs, targets) = value.(loss, outputs, targets) From 7e300a69b8ccdd53605a3d7c34f6a8e2dea2757c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= Date: Mon, 24 Apr 2023 18:27:17 -0300 Subject: [PATCH 5/9] Accept iterables in vectorized loss --- src/losses.jl | 51 +++++++++++++++++++++++---------------------------- 1 file changed, 23 insertions(+), 28 deletions(-) diff --git a/src/losses.jl b/src/losses.jl index 6a1f5bf..9fc73e7 100644 --- a/src/losses.jl +++ b/src/losses.jl @@ -26,44 +26,39 @@ include("losses/weighted.jl") # ---------------------- for FUN in (:value, :deriv, :deriv2) @eval begin - # ------------------ - # AGGREGATION: SUM - # ------------------ function ($FUN)(loss::SupervisedLoss, outputs, targets, ::AggMode.Sum) - nobs = length(outputs) - f(i) = ($FUN)(loss, outputs[i], targets[i]) - sum(f, 1:nobs) + sum(loss(ŷ, y) for (ŷ, y) in zip(outputs, targets)) end - # ------------------- - # AGGREGATION: MEAN - # ------------------- function ($FUN)(loss::SupervisedLoss, outputs, targets, ::AggMode.Mean) - nobs = length(outputs) - f(i) = ($FUN)(loss, outputs[i], targets[i]) - sum(f, 1:nobs) / nobs + T = typeof(loss(first(outputs), first(targets))) + l = zero(T) + n = 0 + for (ŷ, y) in zip(outputs, targets) + l += loss(ŷ, y) + n += 1 + end + l / n end - # --------------------------- - # AGGREGATION: WEIGHTED SUM - # --------------------------- function ($FUN)(loss::SupervisedLoss, outputs, targets, agg::AggMode.WeightedSum) - nobs = length(outputs) - wsum = sum(agg.weights) - denom = agg.normalize ? wsum : one(wsum) - f(i) = agg.weights[i] * ($FUN)(loss, outputs[i], targets[i]) - sum(f, 1:nobs) / denom + l = sum(w * loss(ŷ, y) for (ŷ, y, w) in zip(outputs, targets, agg.weights)) + w = sum(agg.weights) + d = agg.normalize ? w : one(w) + l / d end - # ---------------------------- - # AGGREGATION: WEIGHTED MEAN - # ---------------------------- function ($FUN)(loss::SupervisedLoss, outputs, targets, agg::AggMode.WeightedMean) - nobs = length(outputs) - wsum = sum(agg.weights) - denom = agg.normalize ? nobs * wsum : nobs * one(wsum) - f(i) = agg.weights[i] * ($FUN)(loss, outputs[i], targets[i]) - sum(f, 1:nobs) / denom + T = typeof(loss(first(outputs), first(targets))) + l = zero(T) + n = 0 + for (ŷ, y, w) in zip(outputs, targets, agg.weights) + l += w * loss(ŷ, y) + n += 1 + end + w = sum(agg.weights) + d = agg.normalize ? n * w : n * one(w) + l / d end end end From 6a2331171feafcc63473280aebd7ac9f70f96820 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= Date: Mon, 24 Apr 2023 21:36:41 -0300 Subject: [PATCH 6/9] Refactor functor interface --- src/losses.jl | 15 +++++++++------ src/losses/other.jl | 9 +++------ test/aggmode.jl | 3 ++- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/src/losses.jl b/src/losses.jl index 9fc73e7..34d28eb 100644 --- a/src/losses.jl +++ b/src/losses.jl @@ -1,5 +1,8 @@ -# broadcasting behavior -Broadcast.broadcastable(loss::SupervisedLoss) = Ref(loss) +# type alias to make code more readable +Scalar = Union{Number,CategoricalValue} + +# convenient functor interface +(loss::SupervisedLoss)(output::Scalar, target::Scalar) = value(loss, output, target) # fallback to unary evaluation value(loss::DistanceLoss, output::Number, target::Number) = value(loss, output - target) @@ -10,6 +13,9 @@ value(loss::MarginLoss, output::Number, target::Number) = value(loss, target * deriv(loss::MarginLoss, output::Number, target::Number) = target * deriv(loss, target * output) deriv2(loss::MarginLoss, output::Number, target::Number) = deriv2(loss, target * output) +# broadcasting behavior +Broadcast.broadcastable(loss::SupervisedLoss) = Ref(loss) + # ------------------ # AVAILABLE LOSSES # ------------------ @@ -61,7 +67,4 @@ for FUN in (:value, :deriv, :deriv2) l / d end end -end - -# convenient functor interface -(loss::SupervisedLoss)(outputs, targets) = value.(loss, outputs, targets) +end \ No newline at end of file diff --git a/src/losses/other.jl b/src/losses/other.jl index 6d4cad4..0c7bf73 100644 --- a/src/losses/other.jl +++ b/src/losses/other.jl @@ -12,17 +12,14 @@ struct MisclassLoss{R<:AbstractFloat} <: SupervisedLoss end MisclassLoss() = MisclassLoss{Float64}() -# type alias to make code more readable -NumberOrValue = Union{Number,CategoricalValue} - # return floating point to avoid big integers in aggregations value(::MisclassLoss{R}, agreement::Bool) where R = ifelse(agreement, zero(R), one(R)) deriv(::MisclassLoss{R}, agreement::Bool) where R = zero(R) deriv2(::MisclassLoss{R}, agreement::Bool) where R = zero(R) -value(loss::MisclassLoss, output::NumberOrValue, target::NumberOrValue) = value(loss, target == output) -deriv(loss::MisclassLoss, output::NumberOrValue, target::NumberOrValue) = deriv(loss, target == output) -deriv2(loss::MisclassLoss, output::NumberOrValue, target::NumberOrValue) = deriv2(loss, target == output) +value(loss::MisclassLoss, output::Scalar, target::Scalar) = value(loss, target == output) +deriv(loss::MisclassLoss, output::Scalar, target::Scalar) = deriv(loss, target == output) +deriv2(loss::MisclassLoss, output::Scalar, target::Scalar) = deriv2(loss, target == output) isminimizable(::MisclassLoss) = false isdifferentiable(::MisclassLoss) = false diff --git a/test/aggmode.jl b/test/aggmode.jl index d0860bf..ba47214 100644 --- a/test/aggmode.jl +++ b/test/aggmode.jl @@ -1,6 +1,7 @@ function test_vector_value(l, o, t) ref = [value(l, o[i], t[i]) for i in 1:length(o)] - @test @inferred(l(o, t)) == ref + v(l, o, t) = value.(l, o, t) + @test @inferred(v(l, o, t)) == ref n = length(ref) s = sum(ref) @test @inferred(value(l, o, t, AggMode.Sum())) ≈ s From 98b5bc3fcd0cb8c5c2e90c38f40c2e47a09904ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= Date: Tue, 25 Apr 2023 15:46:02 -0300 Subject: [PATCH 7/9] Remove AggMode submodule --- Project.toml | 1 + src/LossFunctions.jl | 12 ++--- src/aggmode.jl | 102 ------------------------------------------- src/losses.jl | 51 ++++++++-------------- test/aggmode.jl | 22 +++++++--- test/core.jl | 5 --- 6 files changed, 40 insertions(+), 153 deletions(-) delete mode 100644 src/aggmode.jl diff --git a/Project.toml b/Project.toml index 99d4dc8..9985bbc 100644 --- a/Project.toml +++ b/Project.toml @@ -5,6 +5,7 @@ version = "0.9.0" [deps] CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a" +Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" [compat] CategoricalArrays = "0.10" diff --git a/src/LossFunctions.jl b/src/LossFunctions.jl index 57e7de3..d86d1d4 100644 --- a/src/LossFunctions.jl +++ b/src/LossFunctions.jl @@ -3,8 +3,8 @@ module LossFunctions using Markdown using CategoricalArrays: CategoricalValue -# aggregation mode -include("aggmode.jl") +import Base: sum +import Statistics: mean # trait functions include("traits.jl") @@ -31,9 +31,6 @@ export islipschitzcont, islocallylipschitzcont, isclipable, isclasscalibrated, issymmetric, - # relevant submodules - AggMode, - # margin-based losses ZeroOneLoss, LogitMarginLoss, @@ -68,6 +65,9 @@ export # meta losses ScaledLoss, - WeightedMarginLoss + WeightedMarginLoss, + + # reexport mean + mean end # module diff --git a/src/aggmode.jl b/src/aggmode.jl deleted file mode 100644 index 728d95e..0000000 --- a/src/aggmode.jl +++ /dev/null @@ -1,102 +0,0 @@ -""" -Baseclass for all aggregation modes. -""" -abstract type AggregateMode end - -""" - module AggMode - -Types for aggregation of multiple observations. - -- `AggMode.Sum()` -- `AggMode.Mean()` -- `AggMode.WeightedSum(weights)` -- `AggMode.WeightedMean(weights)` -""" -module AggMode - using ..LossFunctions: AggregateMode - - """ - AggMode.Sum() - - Causes the method to return the unweighted sum of the - elements instead of the individual elements. Can be used in - combination with `ObsDim`, in which case a vector will be - returned containing the sum for each observation (useful - mainly for multivariable regression). - """ - struct Sum <: AggregateMode end - - """ - AggMode.Mean() - - Causes the method to return the unweighted mean of the - elements instead of the individual elements. Can be used in - combination with `ObsDim`, in which case a vector will be - returned containing the mean for each observation (useful - mainly for multivariable regression). - """ - struct Mean <: AggregateMode end - - """ - AggMode.WeightedSum(weights; [normalize = false]) - - Causes the method to return the weighted sum of all - observations. The variable `weights` has to be a vector of - the same length as the number of observations. - If `normalize = true`, the values of the weight vector will - be normalized in such as way that they sum to one. - - # Arguments - - - `weights::AbstractVector`: Vector of weight values that - can be used to give certain observations a stronger - influence on the sum. - - - `normalize::Bool`: Boolean that specifies if the weight - vector should be transformed in such a way that it sums to - one (i.e. normalized). This will not mutate the weight - vector but instead happen on the fly during the - accumulation. - - Defaults to `false`. Setting it to `true` only really - makes sense in multivalue-regression, otherwise the result - will be the same as for [`WeightedMean`](@ref). - """ - struct WeightedSum{W<:AbstractVector} <: AggregateMode - weights::W - normalize::Bool - end - WeightedSum(weights::AbstractVector; normalize::Bool = false) = WeightedSum(weights, normalize) - - """ - AggMode.WeightedMean(weights; [normalize = true]) - - Causes the method to return the weighted mean of all - observations. The variable `weights` has to be a vector of - the same length as the number of observations. - If `normalize = true`, the values of the weight vector will - be normalized in such as way that they sum to one. - - # Arguments - - - `weights::AbstractVector`: Vector of weight values that can - be used to give certain observations a stronger influence - on the mean. - - - `normalize::Bool`: Boolean that specifies if the weight - vector should be transformed in such a way that it sums to - one (i.e. normalized). This will not mutate the weight - vector but instead happen on the fly during the - accumulation. - - Defaults to `true`. Setting it to `false` only really makes - sense in multivalue-regression, otherwise the result will - be the same as for [`WeightedSum`](@ref). - """ - struct WeightedMean{W<:AbstractVector} <: AggregateMode - weights::W - normalize::Bool - end - WeightedMean(weights::AbstractVector; normalize::Bool = true) = WeightedMean(weights, normalize) -end diff --git a/src/losses.jl b/src/losses.jl index 34d28eb..f32d6ad 100644 --- a/src/losses.jl +++ b/src/losses.jl @@ -19,6 +19,7 @@ Broadcast.broadcastable(loss::SupervisedLoss) = Ref(loss) # ------------------ # AVAILABLE LOSSES # ------------------ + include("losses/distance.jl") include("losses/margin.jl") include("losses/other.jl") @@ -30,41 +31,23 @@ include("losses/weighted.jl") # ---------------------- # AGGREGATION BEHAVIOR # ---------------------- -for FUN in (:value, :deriv, :deriv2) - @eval begin - function ($FUN)(loss::SupervisedLoss, outputs, targets, ::AggMode.Sum) - sum(loss(ŷ, y) for (ŷ, y) in zip(outputs, targets)) - end - function ($FUN)(loss::SupervisedLoss, outputs, targets, ::AggMode.Mean) - T = typeof(loss(first(outputs), first(targets))) - l = zero(T) - n = 0 - for (ŷ, y) in zip(outputs, targets) - l += loss(ŷ, y) - n += 1 - end - l / n - end +function sum(loss::SupervisedLoss, outputs, targets) + sum(loss(ŷ, y) for (ŷ, y) in zip(outputs, targets)) +end + +function sum(loss::SupervisedLoss, outputs, targets, weights; normalize=true) + s = sum(w * loss(ŷ, y) for (ŷ, y, w) in zip(outputs, targets, weights)) + n = normalize ? sum(weights) : one(first(weights)) + s / n +end - function ($FUN)(loss::SupervisedLoss, outputs, targets, agg::AggMode.WeightedSum) - l = sum(w * loss(ŷ, y) for (ŷ, y, w) in zip(outputs, targets, agg.weights)) - w = sum(agg.weights) - d = agg.normalize ? w : one(w) - l / d - end +function mean(loss::SupervisedLoss, outputs, targets) + mean(loss(ŷ, y) for (ŷ, y) in zip(outputs, targets)) +end - function ($FUN)(loss::SupervisedLoss, outputs, targets, agg::AggMode.WeightedMean) - T = typeof(loss(first(outputs), first(targets))) - l = zero(T) - n = 0 - for (ŷ, y, w) in zip(outputs, targets, agg.weights) - l += w * loss(ŷ, y) - n += 1 - end - w = sum(agg.weights) - d = agg.normalize ? n * w : n * one(w) - l / d - end - end +function mean(loss::SupervisedLoss, outputs, targets, weights; normalize=true) + m = mean(w * loss(ŷ, y) for (ŷ, y, w) in zip(outputs, targets, weights)) + n = normalize ? sum(weights) : one(first(weights)) + m / n end \ No newline at end of file diff --git a/test/aggmode.jl b/test/aggmode.jl index ba47214..9e54838 100644 --- a/test/aggmode.jl +++ b/test/aggmode.jl @@ -4,12 +4,12 @@ function test_vector_value(l, o, t) @test @inferred(v(l, o, t)) == ref n = length(ref) s = sum(ref) - @test @inferred(value(l, o, t, AggMode.Sum())) ≈ s - @test @inferred(value(l, o, t, AggMode.Mean())) ≈ s / n - @test @inferred(value(l, o, t, AggMode.WeightedSum(ones(n)))) ≈ s - @test @inferred(value(l, o, t, AggMode.WeightedSum(ones(n),normalize=true))) ≈ s / n - @test @inferred(value(l, o, t, AggMode.WeightedMean(ones(n)))) ≈ (s / n) / n - @test @inferred(value(l, o, t, AggMode.WeightedMean(ones(n),normalize=false))) ≈ s / n + @test @inferred(sum(l, o, t)) ≈ s + @test @inferred(mean(l, o, t)) ≈ s / n + @test @inferred(sum(l, o, t, ones(n), normalize=false)) ≈ s + @test @inferred(sum(l, o, t, ones(n), normalize=true)) ≈ s / n + @test @inferred(mean(l, o, t, ones(n), normalize=false)) ≈ s / n + @test @inferred(mean(l, o, t, ones(n), normalize=true)) ≈ (s / n) / n end function test_vector_deriv(l, o, t) @@ -57,4 +57,14 @@ end end end end +end + +@testset "Aggregation with categorical values" begin + c = categorical(["Foo","Bar","Baz","Foo"]) + l = MisclassLoss() + @test sum(l, c, reverse(c)) == 2.0 + @test mean(l, c, reverse(c)) == 0.5 + @test sum(l, c, reverse(c), 2*ones(4), normalize=false) == 4.0 + @test mean(l, c, reverse(c), 2*ones(4), normalize=false) == 1.0 + @test mean(l, c, reverse(c), 2*ones(4), normalize=true) == 0.125 end \ No newline at end of file diff --git a/test/core.jl b/test/core.jl index fa18199..3dd83bf 100644 --- a/test/core.jl +++ b/test/core.jl @@ -428,11 +428,6 @@ end @test value(l, c[1], c[1]) == 0.0 @test value(l, c[1], c[2]) == 1.0 @test value.(l, c, reverse(c)) == [0.0, 1.0, 1.0, 0.0] - @test value(l, c, reverse(c), AggMode.Sum()) == 2.0 - @test value(l, c, reverse(c), AggMode.Mean()) == 0.5 - @test value(l, c, reverse(c), AggMode.WeightedSum(2*ones(4))) == 4.0 - @test value(l, c, reverse(c), AggMode.WeightedMean(2*ones(4),false)) == 1.0 - @test value(l, c, reverse(c), AggMode.WeightedMean(2*ones(4),true)) == 0.125 l = MisclassLoss{Float32}() @test value(l, c[1], c[1]) isa Float32 From bb898b00aac7c4bd92684e069500e74dc1a5b8a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= Date: Tue, 25 Apr 2023 15:46:47 -0300 Subject: [PATCH 8/9] Rename test/aggmode.jl to test/agg.jl --- test/{aggmode.jl => agg.jl} | 0 test/runtests.jl | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename test/{aggmode.jl => agg.jl} (100%) diff --git a/test/aggmode.jl b/test/agg.jl similarity index 100% rename from test/aggmode.jl rename to test/agg.jl diff --git a/test/runtests.jl b/test/runtests.jl index e9f4fd6..3e07aa6 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -10,7 +10,7 @@ using Test tests = [ "core.jl", "props.jl", - "aggmode.jl" + "agg.jl" ] # for deterministic testing From addc7272ff13e08a8f9644d7f983a87689a48b65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= Date: Thu, 27 Apr 2023 15:56:52 -0300 Subject: [PATCH 9/9] Update docs --- docs/src/introduction/gettingstarted.md | 36 ++----- docs/src/user/aggregate.md | 135 +++--------------------- src/losses.jl | 24 +++++ 3 files changed, 46 insertions(+), 149 deletions(-) diff --git a/docs/src/introduction/gettingstarted.md b/docs/src/introduction/gettingstarted.md index 62e1a7e..af485fa 100644 --- a/docs/src/introduction/gettingstarted.md +++ b/docs/src/introduction/gettingstarted.md @@ -78,8 +78,8 @@ julia> true_targets = [ 1, 0, -2]; julia> pred_outputs = [0.5, 2, -1]; -julia> value(L2DistLoss(), pred_outputs, true_targets) -3-element Array{Float64,1}: +julia> value.(L2DistLoss(), pred_outputs, true_targets) +3-element Vector{Float64}: 0.25 4.0 1.0 @@ -92,10 +92,10 @@ This will avoid allocating a temporary array and directly compute the result. ```julia-repl -julia> value(L2DistLoss(), pred_outputs, true_targets, AggMode.Sum()) +julia> sum(L2DistLoss(), pred_outputs, true_targets) 5.25 -julia> value(L2DistLoss(), pred_outputs, true_targets, AggMode.Mean()) +julia> mean(L2DistLoss(), pred_outputs, true_targets) 1.75 ``` @@ -105,33 +105,11 @@ each observation in the predicted outputs and so allow to give certain observations a stronger influence over the result. ```julia-repl -julia> value(L2DistLoss(), pred_outputs, true_targets, AggMode.WeightedSum([2,1,1])) +julia> sum(L2DistLoss(), pred_outputs, true_targets, [2,1,1], normalize=false) 5.5 -julia> value(L2DistLoss(), pred_outputs, true_targets, AggMode.WeightedMean([2,1,1])) -1.375 -``` - -All these function signatures of [`value`](@ref) also apply for -computing the derivatives using [`deriv`](@ref) and the second -derivatives using [`deriv2`](@ref). - -```julia-repl -julia> true_targets = [ 1, 0, -2]; - -julia> pred_outputs = [0.5, 2, -1]; - -julia> deriv(L2DistLoss(), pred_outputs, true_targets) -3-element Array{Float64,1}: - -1.0 - 4.0 - 2.0 - -julia> deriv2(L2DistLoss(), pred_outputs, true_targets) -3-element Array{Float64,1}: - 2.0 - 2.0 - 2.0 +julia> mean(L2DistLoss(), pred_outputs, true_targets, [2,1,1], normalize=false) +1.8333333333333333 ``` ## Getting Help diff --git a/docs/src/user/aggregate.md b/docs/src/user/aggregate.md index a0c46fe..dc035fe 100644 --- a/docs/src/user/aggregate.md +++ b/docs/src/user/aggregate.md @@ -34,13 +34,13 @@ say "naive", because it will not give us an acceptable performance. ```jldoctest -julia> value(L1DistLoss(), [2,5,-2], [1.,2,3]) +julia> value.(L1DistLoss(), [2,5,-2], [1.,2,3]) 3-element Vector{Float64}: 1.0 3.0 5.0 -julia> sum(value(L1DistLoss(), [2,5,-2], [1.,2,3])) # WARNING: Bad code +julia> sum(value.(L1DistLoss(), [2,5,-2], [1.,2,3])) # WARNING: Bad code 9.0 ``` @@ -53,52 +53,25 @@ that we don't need in the end and could avoid. For that reason we provide special methods that compute the common accumulations efficiently without allocating temporary -arrays. These methods can be invoked using an additional -parameter which specifies how the values should be accumulated / -averaged. The type of this parameter has to be a subtype of -`AggregateMode`. - -## Aggregation Modes - -Before we discuss these memory-efficient methods, let us briefly -introduce the available aggregation mode types. We provide a number -of different aggregation modes, all of which are contained within -the namespace `AggMode`. An instance of such type can then be -used as additional parameter to [`value`](@ref), [`deriv`](@ref), -and [`deriv2`](@ref), as we will see further down. - -It follows a list of available aggregation modes. Each of which with -a short description of what their effect would be when used as an -additional parameter to the functions mentioned above. - -```@docs -AggMode.None -AggMode.Sum -AggMode.Mean -AggMode.WeightedSum -AggMode.WeightedMean -``` - -## Unweighted Sum and Mean +arrays. -As hinted before, we provide special memory efficient methods for -computing the **sum** or the **mean** of the element-wise (or -broadcasted) results of [`value`](@ref), [`deriv`](@ref), and -[`deriv2`](@ref). These methods avoid the allocation of a -temporary array and instead compute the result directly. +```jldoctest +julia> sum(L1DistLoss(), [2,5,-2], [1.,2,3]) +9.0 -## Weighted Sum and Mean +julia> mean(L1DistLoss(), [2,5,-2], [1.,2,3]) +3.0 +``` Up to this point, all the averaging was performed in an unweighted manner. That means that each observation was treated as equal and had thus the same potential influence on the result. -In this sub-section we will consider the situations in which we +In the following we will consider situations in which we do want to explicitly specify the influence of each observation (i.e. we want to weigh them). When we say we "weigh" an observation, what it effectively boils down to is multiplying the -result for that observation (i.e. the computed loss or -derivative) with some number. This is done for every observation -individually. +result for that observation (i.e. the computed loss) with some number. +This is done for every observation individually. To get a better understand of what we are talking about, let us consider performing a weighting scheme manually. The following @@ -127,88 +100,10 @@ between the different weights. In the example above the second observation was thus considered twice as important as any of the other two observations. -In the case of multi-dimensional arrays the process isn't that -simple anymore. In such a scenario, computing the weighted sum -(or weighted mean) can be thought of as having an additional -step. First we either compute the sum or (unweighted) average for -each observation (which results in a vector), and then we compute -the weighted sum of all observations. - -The following code snipped demonstrates how to compute the -`AggMode.WeightedSum([2,1])` manually. This is **not** meant as -an example of how to do it, but simply to show what is happening -qualitatively. In this example we assume that we are working in a -multi-variable regression setting, in which our data set has four -observations with two target-variables each. - -```jldoctest weight -julia> targets = reshape(1:8, (2, 4)) ./ 8 -2×4 Matrix{Float64}: - 0.125 0.375 0.625 0.875 - 0.25 0.5 0.75 1.0 - -julia> outputs = reshape(1:2:16, (2, 4)) ./ 8 -2×4 Matrix{Float64}: - 0.125 0.625 1.125 1.625 - 0.375 0.875 1.375 1.875 - -julia> # WARNING: BAD CODE - ONLY FOR ILLUSTRATION - -julia> tmp = sum(value.(L1DistLoss(), outputs, targets), dims=2) -2×1 Matrix{Float64}: - 1.5 - 2.0 - -julia> sum(tmp .* [2, 1]) # weigh 1st observation twice as high -5.0 -``` - -To manually compute the result for `AggMode.WeightedMean([2,1])` -we follow a similar approach, but use the normalized weight -vector in the last step. - ```jldoctest weight -julia> using Statistics # for access to "mean" - -julia> # WARNING: BAD CODE - ONLY FOR ILLUSTRATION - -julia> tmp = mean(value.(L1DistLoss(), outputs, targets), dims=2) -2×1 Matrix{Float64}: - 0.375 - 0.5 - -julia> sum(tmp .* [0.6666, 0.3333]) # weigh 1st observation twice as high -0.416625 -``` - -Note that you can specify explicitly if you want to normalize the -weight vector. That option is supported for computing the -weighted sum, as well as for computing the weighted mean. See the -documentation for [`AggMode.WeightedSum`](@ref) and -[`AggMode.WeightedMean`](@ref) for more information. - -The code-snippets above are of course very inefficient, because -they allocate (multiple) temporary arrays. We only included them -to demonstrate what is happening in terms of desired result / -effect. For doing those computations efficiently we provide -special methods for [`value`](@ref), [`deriv`](@ref), -[`deriv2`](@ref) and their mutating counterparts. - -```jldoctest weight -julia> value(L1DistLoss(), [2,5,-2], [1.,2,3], AggMode.WeightedSum([1,2,1])) +julia> sum(L1DistLoss(), [2,5,-2], [1.,2,3], [1,2,1], normalize=false) 12.0 -julia> value(L1DistLoss(), [2,5,-2], [1.,2,3], AggMode.WeightedMean([1,2,1])) +julia> mean(L1DistLoss(), [2,5,-2], [1.,2,3], [1,2,1]) 1.0 -``` - -We also provide this functionality for [`deriv`](@ref) and -[`deriv2`](@ref) respectively. - -```jldoctest weight -julia> deriv(L2DistLoss(), [2,5,-2], [1.,2,3], AggMode.WeightedSum([1,2,1])) -4.0 - -julia> deriv(L2DistLoss(), [2,5,-2], [1.,2,3], AggMode.WeightedMean([1,2,1])) -0.3333333333333333 -``` +``` \ No newline at end of file diff --git a/src/losses.jl b/src/losses.jl index f32d6ad..b5350c7 100644 --- a/src/losses.jl +++ b/src/losses.jl @@ -32,20 +32,44 @@ include("losses/weighted.jl") # AGGREGATION BEHAVIOR # ---------------------- +""" + sum(loss, outputs, targets) + +Return sum of `loss` values over the iterables `outputs` and `targets`. +""" function sum(loss::SupervisedLoss, outputs, targets) sum(loss(ŷ, y) for (ŷ, y) in zip(outputs, targets)) end +""" + sum(loss, outputs, targets, weights; normalize=true) + +Return sum of `loss` values over the iterables `outputs` and `targets`. +The `weights` determine the importance of each observation. The option +`normalize` divides the result by the sum of the weights. +""" function sum(loss::SupervisedLoss, outputs, targets, weights; normalize=true) s = sum(w * loss(ŷ, y) for (ŷ, y, w) in zip(outputs, targets, weights)) n = normalize ? sum(weights) : one(first(weights)) s / n end +""" + mean(loss, outputs, targets) + +Return mean of `loss` values over the iterables `outputs` and `targets`. +""" function mean(loss::SupervisedLoss, outputs, targets) mean(loss(ŷ, y) for (ŷ, y) in zip(outputs, targets)) end +""" + mean(loss, outputs, targets, weights; normalize=true) + +Return mean of `loss` values over the iterables `outputs` and `targets`. +The `weights` determine the importance of each observation. The option +`normalize` divides the result by the sum of the weights. +""" function mean(loss::SupervisedLoss, outputs, targets, weights; normalize=true) m = mean(w * loss(ŷ, y) for (ŷ, y, w) in zip(outputs, targets, weights)) n = normalize ? sum(weights) : one(first(weights))