From 2a7941f7a871aa0af9f89c9ad6c61590b4fba5b9 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Thu, 25 May 2023 14:10:42 +1200 Subject: [PATCH 01/13] jettison built-in measures in favour of StatisticalMeasures.jl --- Project.toml | 6 + src/MLJBase.jl | 82 +- src/composition/models/stacking.jl | 40 +- src/measures.jl | 58 + src/measures/README.md | 117 -- src/measures/confusion_matrix.jl | 270 ----- src/measures/continuous.jl | 315 ------ src/measures/doc_strings.jl | 12 - src/measures/finite.jl | 1247 --------------------- src/measures/loss_functions_interface.jl | 208 ---- src/measures/measure_search.jl | 65 -- src/measures/measures.jl | 302 ----- src/measures/meta_utilities.jl | 233 ---- src/measures/probabilistic.jl | 423 ------- src/measures/roc.jl | 91 -- src/resampling.jl | 327 +++--- src/utilities.jl | 40 + test/composition/models/stacking.jl | 2 +- test/interface/model_api.jl | 2 +- test/measures.jl | 40 + test/measures/confusion_matrix.jl | 116 -- test/measures/continuous.jl | 31 - test/measures/doc_strings.jl | 9 - test/measures/finite.jl | 609 ---------- test/measures/loss_functions_interface.jl | 68 -- test/measures/measure_search.jl | 42 - test/measures/measures.jl | 134 --- test/measures/probabilistic.jl | 174 --- test/measures/roc.jl | 13 - test/preliminaries.jl | 9 +- test/resampling.jl | 141 ++- test/runtests.jl | 4 +- test/utilities.jl | 12 + 33 files changed, 448 insertions(+), 4794 deletions(-) create mode 100644 src/measures.jl delete mode 100644 src/measures/README.md delete mode 100644 src/measures/confusion_matrix.jl delete mode 100644 src/measures/continuous.jl delete mode 100644 src/measures/doc_strings.jl delete mode 100644 src/measures/finite.jl delete mode 100644 src/measures/loss_functions_interface.jl delete mode 100644 src/measures/measure_search.jl delete mode 100644 src/measures/measures.jl delete mode 100644 src/measures/meta_utilities.jl delete mode 100644 src/measures/probabilistic.jl delete mode 100644 src/measures/roc.jl create mode 100644 test/measures.jl delete mode 100644 test/measures/confusion_matrix.jl delete mode 100644 test/measures/continuous.jl delete mode 100644 test/measures/doc_strings.jl delete mode 100644 test/measures/finite.jl delete mode 100644 test/measures/loss_functions_interface.jl delete mode 100644 test/measures/measure_search.jl delete mode 100644 test/measures/measures.jl delete mode 100644 test/measures/probabilistic.jl delete mode 100644 test/measures/roc.jl diff --git a/Project.toml b/Project.toml index bbe431ac..3b5c52f0 100644 --- a/Project.toml +++ b/Project.toml @@ -13,6 +13,7 @@ Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" InvertedIndices = "41ab1584-1d38-5bbf-9106-f11c6c58b48f" +LearnAPI = "92ad9a40-7767-427a-9ee6-6e577f1266cb" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LossFunctions = "30fc2ffe-d236-52d8-8643-a9d8f7c094a7" MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea" @@ -22,8 +23,10 @@ Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a" PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +Reexport = "189a3867-3050-52da-a836-e630ba90ab69" ScientificTypes = "321657f4-b219-11e9-178b-2701a2544e81" Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" +StatisticalMeasures = "a19d573c-0a75-4610-95b3-7071388c7541" StatisticalTraits = "64bff920-2084-43da-a3e6-9bb72801c0c9" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" @@ -38,11 +41,14 @@ InvertedIndices = "1" LossFunctions = "0.10" MLJModelInterface = "1.7" Missings = "0.4, 1" +LearnAPI = "0.1" OrderedCollections = "1.1" Parameters = "0.12" PrettyTables = "1, 2" ProgressMeter = "1.7.1" +Reexport = "1.2" ScientificTypes = "3" +StatisticalMeasures = "0.1" StatisticalTraits = "3.2" StatsBase = "0.32, 0.33, 0.34" Tables = "0.2, 1.0" diff --git a/src/MLJBase.jl b/src/MLJBase.jl index f7d95499..17d13308 100644 --- a/src/MLJBase.jl +++ b/src/MLJBase.jl @@ -3,6 +3,7 @@ module MLJBase # =================================================================== # IMPORTS +using Reexport import Base: ==, precision, getindex, setindex! import Base.+, Base.*, Base./ @@ -16,7 +17,7 @@ for trait in StatisticalTraits.TRAITS eval(:(import StatisticalTraits.$trait)) end -import Base.instances # considered a trait for measures +import LearnAPI import StatisticalTraits.snakecase import StatisticalTraits.info @@ -88,6 +89,10 @@ using CategoricalDistributions import Distributions: pdf, logpdf, sampler const Dist = Distributions +# Measures +@reexport using StatisticalMeasures +import StatisticalMeasures.StatisticalMeasuresBase + # from Standard Library: using Statistics, LinearAlgebra, Random, InteractiveUtils @@ -127,57 +132,6 @@ const CatArrMissing{T,N} = ArrMissing{CategoricalValue{T},N} const MMI = MLJModelInterface const FI = MLJModelInterface.FullInterface -const MARGIN_LOSSES = [ - :DWDMarginLoss, - :ExpLoss, - :L1HingeLoss, - :L2HingeLoss, - :L2MarginLoss, - :LogitMarginLoss, - :ModifiedHuberLoss, - :PerceptronLoss, - :SigmoidLoss, - :SmoothedL1HingeLoss, - :ZeroOneLoss -] - -const DISTANCE_LOSSES = [ - :HuberLoss, - :L1EpsilonInsLoss, - :L2EpsilonInsLoss, - :LPDistLoss, - :LogitDistLoss, - :PeriodicLoss, - :QuantileLoss -] - -const WITH_PARAMETERS = [ - :DWDMarginLoss, - :SmoothedL1HingeLoss, - :HuberLoss, - :L1EpsilonInsLoss, - :L2EpsilonInsLoss, - :LPDistLoss, - :QuantileLoss, -] - -const MEASURE_TYPE_ALIASES = [ - :FPR, :FNR, :TPR, :TNR, - :FDR, :PPV, :NPV, :Recall, :Specificity, - :MFPR, :MFNR, :MTPR, :MTNR, - :MFDR, :MPPV, :MNPV, :MulticlassRecall, :MulticlassSpecificity, - :MCR, - :MCC, - :BAC, :BACC, - :RMS, :RMSPV, :RMSL, :RMSLP, :RMSP, - :MAV, :MAE, :MAPE, - :RSQ, :LogCosh, - :CrossEntropy, - :AUC -] - -const LOSS_FUNCTIONS = vcat(MARGIN_LOSSES, DISTANCE_LOSSES) - # =================================================================== # Computational Resource # default_resource allows to switch the mode of parallelization @@ -224,15 +178,10 @@ include("data/data.jl") include("data/datasets.jl") include("data/datasets_synthetic.jl") -include("measures/measures.jl") -include("measures/measure_search.jl") -include("measures/doc_strings.jl") +include("measures.jl") include("composition/models/stacking.jl") -# function on the right-hand side is defined in src/measures/meta_utilities.jl: -const MEASURE_TYPES_ALIASES_AND_INSTANCES = measures_for_export() - const EXTENDED_ABSTRACT_MODEL_TYPES = vcat( MLJBase.MLJModelInterface.ABSTRACT_MODEL_SUBTYPES, MLJBase.NETWORK_COMPOSITE_TYPES, # src/composition/models/network_composite_types.jl @@ -355,23 +304,8 @@ export ResamplingStrategy, Holdout, CV, StratifiedCV, TimeSeriesCV, # ------------------------------------------------------------------- # exports from MLJBase specific to measures -# measure names: -for m in MEASURE_TYPES_ALIASES_AND_INSTANCES - :(export $m) |> eval -end - -# measures/registry.jl: -export measures, metadata_measure - # measure/measures.jl (excluding traits): -export aggregate, default_measure, value, skipinvalid - -# measures/probabilistic: -export roc_curve, roc - -# measures/finite.jl (averaging modes for multiclass scores) -export no_avg, macro_avg, micro_avg - +export default_measure # ------------------------------------------------------------------- # re-export from Random, StatsBase, Statistics, Distributions, diff --git a/src/composition/models/stacking.jl b/src/composition/models/stacking.jl index 3a5cb6aa..474ef920 100644 --- a/src/composition/models/stacking.jl +++ b/src/composition/models/stacking.jl @@ -378,21 +378,30 @@ model_2, ...), ...) function internal_stack_report( stack::Stack{modelnames,}, verbosity::Int, - tt_pairs, + tt_pairs, # train_test_pairs folds_evaluations... ) where modelnames n_measures = length(stack.measures) nfolds = length(tt_pairs) - # For each model we record the results mimicking the fields PerformanceEvaluation + test_fold_sizes = map(tt_pairs) do train_test_pair + test = last(train_test_pair) + length(test) + end + + # weights to be used to aggregate per-fold measurements (averaging to 1): + fold_weights(mode) = nfolds .* test_fold_sizes ./ sum(test_fold_sizes) + fold_weights(::StatisticalMeasuresBase.Sum) = nothing + + # For each model we record the results mimicking the fields of PerformanceEvaluation results = NamedTuple{modelnames}( [( measure = stack.measures, measurement = Vector{Any}(undef, n_measures), operation = _actual_operations(nothing, stack.measures, model, verbosity), per_fold = [Vector{Any}(undef, nfolds) for _ in 1:n_measures], - per_observation = Vector{Union{Missing, Vector{Any}}}(missing, n_measures), + per_observation = [Vector{Vector{Any}}(undef, nfolds) for _ in 1:n_measures], fitted_params_per_fold = [], report_per_fold = [], train_test_pairs = tt_pairs @@ -416,30 +425,29 @@ function internal_stack_report( model_results.operation, )) ypred = operation(mach, Xtest) - loss = measure(ypred, ytest) - # Update per_observation - if reports_each_observation(measure) - if model_results.per_observation[i] === missing - model_results.per_observation[i] = Vector{Any}(undef, nfolds) - end - model_results.per_observation[i][foldid] = loss - end + measurements = StatisticalMeasures.measurements(measure, ypred, ytest) + + # Update per observation: + model_results.per_observation[i][foldid] = measurements # Update per_fold - model_results.per_fold[i][foldid] = - reports_each_observation(measure) ? - MLJBase.aggregate(loss, measure) : loss + model_results.per_fold[i][foldid] = measure(ypred, ytest) end index += 1 end end - # Update measurement field by aggregation + # Update measurement field by aggregating per-fold measurements for modelname in modelnames for (i, measure) in enumerate(stack.measures) model_results = results[modelname] + mode = StatisticalMeasuresBase.external_aggregation_mode(measure) model_results.measurement[i] = - MLJBase.aggregate(model_results.per_fold[i], measure) + StatisticalMeasuresBase.aggregate( + model_results.per_fold[i]; + mode, + weights=fold_weights(mode), + ) end end diff --git a/src/measures.jl b/src/measures.jl new file mode 100644 index 00000000..c0a2ae7b --- /dev/null +++ b/src/measures.jl @@ -0,0 +1,58 @@ +# # DEFAULT MEASURES + +default_measure(T, S) = _default_measure(T, nonmissingtype(S)) + +_default_measure(T, S) = nothing + +# Deterministic + Continuous / Count ==> RMS +function _default_measure( + ::Type{<:Deterministic}, + ::Type{<:Union{AbstractVector{<:Continuous}, AbstractVector{<:Count}}}, +) + return rms +end + +# Deterministic + Finite ==> Misclassification rate +function _default_measure( + ::Type{<:Deterministic}, + ::Type{<:AbstractVector{<:Finite}}, +) + return misclassification_rate +end + +# Probabilistic + Finite / Count ==> log loss +function _default_measure( + ::Type{<:Probabilistic}, + ::Type{<:Union{AbstractVector{<:Finite},AbstractVector{<:Count}}}, +) + return log_loss +end + +# Probabilistic + Continuous ==> Log loss +function _default_measure( + ::Type{<:Probabilistic}, + ::Type{<:AbstractVector{<:Continuous}}, +) + return log_loss +end + +function _default_measure( + ::Type{<:MMI.ProbabilisticDetector}, + ::Type{<:AbstractVector{<:OrderedFactor{2}}}, +) + return area_under_curve +end + +function _default_measure( + ::Type{<:MMI.DeterministicDetector}, + ::Type{<:AbstractVector{<:OrderedFactor{2}}}, +) + return balanced_accuracy +end + +# Fallbacks +default_measure(M::Type{<:Supervised}) = default_measure(M, target_scitype(M)) +default_measure(::M) where M <: Supervised = default_measure(M) + +default_measure(M::Type{<:Annotator}) = _default_measure(M, target_scitype(M)) +default_measure(::M) where M <: Annotator = default_measure(M) diff --git a/src/measures/README.md b/src/measures/README.md deleted file mode 100644 index 0097d2f7..00000000 --- a/src/measures/README.md +++ /dev/null @@ -1,117 +0,0 @@ -## Adding new measures - -This document assumes familiarity with the traits provided for -measures. For a summary, query the docstring for -`MLJBase.metadata_measures`. - -A measure is ordinarily called on data directly, as in - -```julia -ŷ = rand(3) # predictions -y = rand(3) # ground truth observations - -m = LPLoss(p=3) - -julia> m(ŷ, y) -3-element Vector{Float64}: - 0.07060087052171798 - 0.003020044780949528 - 0.019067038457889922 -``` - -To call a measure without performing dimension or pool checks, one -uses `MLJBase.call` instead: - -```julia -MLJBase.call(m, ŷ, y) -``` - -A new measure reporting an aggregate measurement, such as -`AreaUnderCurve`, will subtype `Aggregate`, and only needs to -implement `call`. A measure that reports a measurement for each -observation , such as `LPLoss`, subtypes `Unaggregated` and only needs -to implement an evaluation method for single observations called -`single`. - -Recall also that if a measure reports each observation, it does so -even in the case that weights are additionally specified: - -```julia -w = rand(3) # per-observation weights - -julia> m(ŷ, y, rand(3)) -3-element Vector{Float64}: - 0.049333392516241206 - 0.0017612002314472718 - 0.003157450446692638 - ``` - -This behaviour differs from other places where weights can only be -specified as part of an aggregation of multi-observation measurements. - - -### Unaggregated measures implement `single` - -To implement an `Unaggregated` measure, it suffices to implement `single(measure, η̂, η)`, -which should return a measurement (e.g., a float) for a single example `(η̂, η)` (e.g., a -pair of floats). There is no need for `single` to handle `missing` values. (Internally, a -wrapper function `robust_single` handles these.) - -If only `single` is implemented, then the measure will automatically -support per-observation weights and, where that makes sense, per-class -weights. However, `supports_class_weights` may need to be overloaded, -as this defaults to `false`. - -#### Special cases - -If `single` is *not* implemented, then `call(measure, ŷ, y)`, and optionally -`call(measure, ŷ, y, w)`, must be implemented (the fallbacks call `robust_single`, a -wrapped version of `single` that handles `missing` values). In this case `y` and `ŷ` are -arrays of matching size and the method should return an array of that size *without -performing size or pool checks*. The method should handle `missing` and `NaN` values if -possible, which should be propagated to relevant elements of the returned array. - -The `supports_weights` trait, which defaults to `true`, will need to -be overloaded to return `false` if neither `single(::MyMeasure, -args...)` nor `call(::MyMeasure, ŷ, y, w::AbstractArray)` are -overloaded. - -### Aggregated measures implement `call` - -To implement an `Aggregated` measure, implement -`call(measure::MyMeasure, ŷ, y)`. Optionally implement -`call(measure::MyMeasure, ŷ, y, w)`. - - -### Trait declarations - -Measure traits can be set using the `metadata_measure` -function (query the doc-string) or individually, as in - -```julia -supports_weights(::Type{<:MyMeasure}) = false -``` - -Defaults are shown below - -trait | allowed values | default --------------------------|------------------------------|-------------- -`target_scitype` | some scientific type | `Unknown` -`human_name` | any `String` | string version of type name -`instances` | any `Vector{String}` | empty -`prediction_type` | `:deterministic`, `:probabilistic`, `:interval` `:unknown` | `:unknown` -`orientation` | `:score`, `:loss`, `:unknown`| `:unknown` -`aggregation` | `Mean()`, `Sum()`, `RootMeanSqaure()` | `Mean()` -`supports_weights` | `true` or `false` | `true` -`supports_class_weights` | `true` or `false` | `false` -`docstring` | any `String` | includes `name`, `human_name` and `instances` -`distribution_type` | any `Distribution` subtype or `Unknown` | `Unknown` - -### Exporting the measure and its aliases - -If you create a type alias, as in `const MAE = MeanAbsoluteValue`, -then you must add this alias to the constant -`MEASURE_TYPE_ALIASES`. That is the only step needed, as the the macro -`@export_measures` programmatically exports all measure types and -their instances, and those aliases listed in = MeanAbsoluteValue`, -then you must add this alias to the constant `MEASURE_TYPE_ALIASES`. diff --git a/src/measures/confusion_matrix.jl b/src/measures/confusion_matrix.jl deleted file mode 100644 index a835bddb..00000000 --- a/src/measures/confusion_matrix.jl +++ /dev/null @@ -1,270 +0,0 @@ -## CONFUSION MATRIX OBJECT - -""" - ConfusionMatrixObject{C} - -Confusion matrix with `C ≥ 2` classes. Rows correspond to predicted values -and columns to the ground truth. -""" -struct ConfusionMatrixObject{C} - mat::Matrix - labels::Vector{String} -end - -""" - ConfusionMatrixObject(m, labels) - -Instantiates a confusion matrix out of a square integer matrix `m`. -Rows are the predicted class, columns the ground truth. See also the -[wikipedia article](https://en.wikipedia.org/wiki/Confusion_matrix). - -""" -function ConfusionMatrixObject(m::Matrix{Int}, labels::Vector{String}) - s = size(m) - s[1] == s[2] || throw(ArgumentError("Expected a square matrix.")) - s[1] > 1 || throw(ArgumentError("Expected a matrix of size ≥ 2x2.")) - length(labels) == s[1] || - throw(ArgumentError("As many labels as classes must be provided.")) - ConfusionMatrixObject{s[1]}(m, labels) -end - -# allow to access cm[i,j] but not set (it's immutable) -Base.getindex(cm::ConfusionMatrixObject, inds...) = getindex(cm.mat, inds...) - -_levels(y1, y2) = vcat(levels(y1), levels(y2)) |> unique - -# simultaneous coercion of two vectors into categorical vectors having -# the same pool: -function _categorical(y1, y2) - L = _levels(y1, y2) - return categorical(y1, levels=L), categorical(y2, levels=L) -end -_categorical(y1::CategoricalArray{V1,N}, - y2::CategoricalArray{V2,N}) where - {V, V1<:Union{Missing,V}, V2<:Union{Missing,V}, N} = - y1, y2 -_categorical(y1::AbstractArray{<:CategoricalArrays.CategoricalValue}, - y2::AbstractArray{<:CategoricalArrays.CategoricalValue}) = - broadcast(identity, y1), broadcast(identity, y2) - - -""" - _confmat(ŷ, y; rev=false) - -A private method. General users should use `confmat` or other instances -of the measure type [`ConfusionMatrix`](@ref). - -Computes the confusion matrix given a predicted `ŷ` with categorical elements -and the actual `y`. Rows are the predicted class, columns the ground truth. -The ordering follows that of `levels(y)`. - -## Keywords - -* `rev=false`: in the binary case, this keyword allows to swap the ordering of - classes. -* `perm=[]`: in the general case, this keyword allows to specify a permutation - re-ordering the classes. -* `warn=true`: whether to show a warning in case `y` does not have scientific - type `OrderedFactor{2}` (see note below). - -## Note - -To decrease the risk of unexpected errors, if `y` does not have -scientific type `OrderedFactor{2}` (and so does not have a "natural -ordering" negative-positive), a warning is shown indicating the -current order unless the user explicitly specifies either `rev` or -`perm` in which case it's assumed the user is aware of the class -ordering. - -The `confusion_matrix` is a measure (although neither a score nor a -loss) and so may be specified as such in calls to `evaluate`, -`evaluate!`, although not in `TunedModel`s. In this case, however, -there no way to specify an ordering different from `levels(y)`, where -`y` is the target. - -""" -function _confmat(ŷraw::Union{Arr{V1,N}, CategoricalArray{V1,N}}, - yraw::Union{Arr{V2,N}, CategoricalArray{V2,N}}; - rev::Union{Nothing,Bool}=nothing, - perm::Union{Nothing,Vector{<:Integer}}=nothing, - warn::Bool=true) where - {V,V1<:Union{Missing,V}, V2<:Union{Missing,V},N} - - # no-op if vectors already categorical arrays: - ŷ, y = _categorical(ŷraw, yraw) - - levels_ = levels(y) - nc = length(levels_) - if rev !== nothing && rev && nc > 2 - throw(ArgumentError("Keyword `rev` can only be used in binary case.")) - end - if perm !== nothing && !isempty(perm) - length(perm) == nc || - throw(ArgumentError("`perm` must be of length matching the "* - "number of classes.")) - Set(perm) == Set(collect(1:nc)) || - throw(ArgumentError("`perm` must specify a valid permutation of "* - "`[1, 2, ..., c]`, where `c` is "* - "number of classes.")) - end - - # warning - if rev === nothing && perm === nothing - S = nonmissingtype(elscitype(y)) - if warn - if nc==2 && !(S <: OrderedFactor) - @warn "The classes are un-ordered,\n" * - "using: negative='$(levels_[1])' "* - "and positive='$(levels_[2])'.\n" * - "To suppress this warning, consider coercing "* - "to OrderedFactor." - elseif !(S <: OrderedFactor) - @warn "The classes are un-ordered,\n" * - "using order: $([l for l in levels_]).\n" * - "To suppress this warning, consider "* - "coercing to OrderedFactor." - end - end - rev = false - perm = Int[] - elseif rev !== nothing && nc == 2 - # rev takes precedence in binary case - if rev - perm = [2, 1] - else - perm = Int[] - end - end - - # No permutation - if isempty(perm) - cmat = zeros(Int, nc, nc) - @inbounds for i in eachindex(y) - (isinvalid(y[i]) || isinvalid(ŷ[i])) && continue - cmat[int(ŷ[i]), int(y[i])] += 1 - end - return ConfusionMatrixObject(cmat, string.(levels_)) - end - - # With permutation - cmat = zeros(Int, nc, nc) - iperm = invperm(perm) - @inbounds for i in eachindex(y) - (isinvalid(y[i]) || isinvalid(ŷ[i])) && continue - cmat[iperm[int(ŷ[i])], iperm[int(y[i])]] += 1 - end - return ConfusionMatrixObject(cmat, string.(levels_[perm])) -end - - -# Machinery to display the confusion matrix in a non-confusing way -# (provided the REPL is wide enough) - -splitw(w::Int) = (sp1 = div(w, 2); sp2 = w - sp1; (sp1, sp2)) - -function Base.show(stream::IO, m::MIME"text/plain", cm::ConfusionMatrixObject{C} - ) where C - width = displaysize(stream)[2] - cw = 13 - textlim = 9 - totalwidth = cw * (C+1) + C + 2 - width < totalwidth && (show(stream, m, cm.mat); return) - - iob = IOBuffer() - wline = s -> write(iob, s * "\n") - splitcw = s -> (w = cw - length(s); splitw(w)) - cropw = s -> length(s) > textlim ? s[1:prevind(s, textlim)] * "…" : s - - # 1.a top box - " "^(cw+1) * "┌" * "─"^((cw + 1) * C - 1) * "┐" |> wline - gt = "Ground Truth" - w = (cw + 1) * C - 1 - length(gt) - sp1, sp2 = splitw(w) - " "^(cw+1) * "│" * " "^sp1 * gt * " "^sp2 * "│" |> wline - # 1.b separator - "┌" * "─"^cw * "┼" * ("─"^cw * "┬")^(C-1) * "─"^cw * "┤" |> wline - # 2.a description line - pr = "Predicted" - sp1, sp2 = splitcw(pr) - partial = "│" * " "^sp1 * pr * " "^sp2 * "│" - for c in 1:C - # max = 10 - s = cm.labels[c] |> cropw - sp1, sp2 = splitcw(s) - partial *= " "^sp1 * s * " "^sp2 * "│" - end - partial |> wline - # 2.b separating line - "├" * "─"^cw * "┼" * ("─"^cw * "┼")^(C-1) * ("─"^cw * "┤") |> wline - # 2.c line by line - for c in 1:C - # line - s = cm.labels[c] |> cropw - sp1, sp2 = splitcw(s) - partial = "│" * " "^sp1 * s * " "^sp2 * "│" - for r in 1:C - e = string(cm[c, r]) - sp1, sp2 = splitcw(e) - partial *= " "^sp1 * e * " "^sp2 * "│" - end - partial |> wline - # separator - if c < C - "├" * "─"^cw * "┼" * ("─"^cw * "┼")^(C-1) * ("─"^cw * "┤") |> wline - end - end - # 2.d final line - "└" * "─"^cw * "┴" * ("─"^cw * "┴")^(C-1) * ("─"^cw * "┘") |> wline - write(stream, take!(iob)) -end - - -## CONFUSION MATRIX AS MEASURE - -struct ConfusionMatrix <: Aggregated - perm::Union{Nothing,Vector{<:Integer}} -end - -ConfusionMatrix(; perm=nothing) = ConfusionMatrix(perm) - -is_measure(::ConfusionMatrix) = true -is_measure_type(::Type{ConfusionMatrix}) = true -human_name(::Type{<:ConfusionMatrix}) = "confusion matrix" -target_scitype(::Type{ConfusionMatrix}) = - Union{AbstractVector{<:Union{Missing,OrderedFactor}}, - AbstractVector{<:Union{Missing,OrderedFactor}}} -supports_weights(::Type{ConfusionMatrix}) = false -prediction_type(::Type{ConfusionMatrix}) = :deterministic -instances(::Type{<:ConfusionMatrix}) = ["confusion_matrix", "confmat"] -orientation(::Type{ConfusionMatrix}) = :other -reports_each_observation(::Type{ConfusionMatrix}) = false -is_feature_dependent(::Type{ConfusionMatrix}) = false -aggregation(::Type{ConfusionMatrix}) = Sum() - -@create_aliases ConfusionMatrix - -@create_docs(ConfusionMatrix, -body= -""" -If `r` is the return value, then the raw confusion matrix is `r.mat`, -whose rows correspond to predictions, and columns to ground truth. -The ordering follows that of `levels(y)`. - -Use `ConfusionMatrix(perm=[2, 1])` to reverse the class order for binary -data. For more than two classes, specify an appropriate permutation, as in -`ConfusionMatrix(perm=[2, 3, 1])`. - -""", -scitype=DOC_ORDERED_FACTOR_BINARY) - -# calling behaviour: -call(m::ConfusionMatrix, ŷ, y) = _confmat(ŷ, y, perm=m.perm) - -# overloading addition to make aggregation work: -Base.round(m::MLJBase.ConfusionMatrixObject; kws...) = m -function Base.:+(m1::ConfusionMatrixObject, m2::ConfusionMatrixObject) - if m1.labels != m2.labels - throw(ArgumentError("Confusion matrix labels must agree")) - end - ConfusionMatrixObject(m1.mat + m2.mat, m1.labels) -end diff --git a/src/measures/continuous.jl b/src/measures/continuous.jl deleted file mode 100644 index 33670216..00000000 --- a/src/measures/continuous.jl +++ /dev/null @@ -1,315 +0,0 @@ -const InfiniteArrMissing = Union{ - AbstractArray{<:Union{Missing,Continuous}}, - AbstractArray{<:Union{Missing,Count}}} - -# ----------------------------------------------------------- -# MeanAbsoluteError - -struct MeanAbsoluteError <: Aggregated end - -metadata_measure(MeanAbsoluteError; - instances = ["mae", "mav", "mean_absolute_error", - "mean_absolute_value"], - target_scitype = InfiniteArrMissing, - prediction_type = :deterministic, - orientation = :loss), - -const MAE = MeanAbsoluteError -const MAV = MeanAbsoluteError -@create_aliases MeanAbsoluteError - -@create_docs(MeanAbsoluteError, -body= -""" -``\\text{mean absolute error} = n^{-1}∑ᵢ|yᵢ-ŷᵢ|`` or -``\\text{mean absolute error} = n^{-1}∑ᵢwᵢ|yᵢ-ŷᵢ|`` -""", -scitype=DOC_INFINITE) - -call(::MeanAbsoluteError, ŷ, y) = abs.(ŷ .- y) |> skipinvalid |> mean -call(::MeanAbsoluteError, ŷ, y, w) = abs.(ŷ .- y) .* w |> skipinvalid |> mean - -# ---------------------------------------------------------------- -# RootMeanSquaredError - -struct RootMeanSquaredError <: Aggregated end - -metadata_measure(RootMeanSquaredError; - instances = ["rms", "rmse", - "root_mean_squared_error"], - target_scitype = InfiniteArrMissing, - prediction_type = :deterministic, - orientation = :loss, - aggregation = RootMeanSquare()) - -const RMS = RootMeanSquaredError -@create_aliases RootMeanSquaredError - -@create_docs(RootMeanSquaredError, -body= -""" -``\\text{root mean squared error} = \\sqrt{n^{-1}∑ᵢ|yᵢ-ŷᵢ|^2}`` or -``\\text{root mean squared error} = \\sqrt{\\frac{∑ᵢwᵢ|yᵢ-ŷᵢ|^2}{∑ᵢwᵢ}}`` -""", -scitype=DOC_INFINITE) - -call(::RootMeanSquaredError, ŷ, y) = (y .- ŷ).^2 |> skipinvalid |> mean |> sqrt -call(::RootMeanSquaredError, ŷ, y, w) = (y .- ŷ).^2 .* w |> skipinvalid |> mean |> sqrt - -# ------------------------------------------------------------------------- -# R-squared (coefficient of determination) - -struct RSquared <: Aggregated end - -metadata_measure(RSquared; - instances = ["rsq", "rsquared"], - target_scitype = InfiniteArrMissing, - prediction_type = :deterministic, - orientation = :score, - supports_weights = false) - -const RSQ = RSquared -@create_aliases RSquared - -@create_docs(RSquared, -body= -""" -The R² (also known as R-squared or coefficient of determination) is suitable for -interpreting linear regression analysis (Chicco et al., [2021](https://doi.org/10.7717/peerj-cs.623)). - -Let ``\\overline{y}`` denote the mean of ``y``, then - -``\\text{R^2} = 1 - \\frac{∑ (\\hat{y} - y)^2}{∑ \\overline{y} - y)^2}.`` -""", -scitype=DOC_INFINITE) - -function call(::RSquared, ŷ, y) - num = (ŷ .- y).^2 |> skipinvalid |> sum - mean_y = mean(y) - denom = (mean_y .- y).^2 |> skipinvalid |> sum - return 1 - (num / denom) -end - -# ------------------------------------------------------------------- -# LP - -struct LPLoss{T<:Real} <: Unaggregated - p::T -end - -LPLoss(; p=2.0) = LPLoss(p) - -metadata_measure(LPLoss; - instances = ["l1", "l2"], - target_scitype = InfiniteArrMissing, - prediction_type = :deterministic, - orientation = :loss) - -const l1 = LPLoss(1) -const l2 = LPLoss(2) - -@create_docs(LPLoss, -body= -""" -Constructor signature: `LPLoss(p=2)`. Reports -`|ŷ[i] - y[i]|^p` for every index `i`. -""", -scitype=DOC_INFINITE) - -single(m::LPLoss, ŷ, y) = abs(y - ŷ)^(m.p) - -# ---------------------------------------------------------------------------- -# RootMeanSquaredLogError - -struct RootMeanSquaredLogError <: Aggregated end - -metadata_measure(RootMeanSquaredLogError; - instances = ["rmsl", "rmsle", "root_mean_squared_log_error"], - target_scitype = InfiniteArrMissing, - prediction_type = :deterministic, - orientation = :loss, - aggregation = RootMeanSquare()) - -const RMSL = RootMeanSquaredLogError -@create_aliases RootMeanSquaredLogError - -@create_docs(RootMeanSquaredLogError, -body= -""" -``\\text{root mean squared log error} = -\\sqrt{n^{-1}∑ᵢ\\log\\left({yᵢ \\over ŷᵢ}\\right)^2}`` -""", -footer="See also [`rmslp1`](@ref).", -scitype=DOC_INFINITE) - -call(::RootMeanSquaredLogError, ŷ, y) = - (log.(y) - log.(ŷ)).^2 |> skipinvalid |> mean |> sqrt -call(::RootMeanSquaredLogError, ŷ, y, w) = - (log.(y) - log.(ŷ)).^2 .* w |> skipinvalid |> mean |> sqrt - -# --------------------------------------------------------------------------- -# RootMeanSquaredLogProportionalError - -struct RootMeanSquaredLogProportionalError{T<:Real} <: Aggregated - offset::T -end - -RootMeanSquaredLogProportionalError(; offset=1.0) = - RootMeanSquaredLogProportionalError(offset) - -metadata_measure(RootMeanSquaredLogProportionalError; - instances = ["rmslp1", ], - target_scitype = InfiniteArrMissing, - prediction_type = :deterministic, - orientation = :loss, - aggregation = RootMeanSquare()) - -const RMSLP = RootMeanSquaredLogProportionalError -@create_aliases RootMeanSquaredLogProportionalError - -@create_docs(RootMeanSquaredLogProportionalError, -body= -""" -Constructor signature: `RootMeanSquaredLogProportionalError(; offset = 1.0)`. - -``\\text{root mean squared log proportional error} = -\\sqrt{n^{-1}∑ᵢ\\log\\left({yᵢ + \\text{offset} \\over ŷᵢ + \\text{offset}}\\right)}`` -""", -footer="See also [`rmsl`](@ref). ", -scitype=DOC_INFINITE) - -call(m::RMSLP, ŷ, y) = - (log.(y .+ m.offset) - log.(ŷ .+ m.offset)).^2 |> - skipinvalid |> mean |> sqrt - -call(m::RMSLP, ŷ, y, w) = - (log.(y .+ m.offset) - log.(ŷ .+ m.offset)).^2 .* w |> - skipinvalid |> mean |> sqrt - -# -------------------------------------------------------------------------- -# RootMeanSquaredProportionalError - -struct RootMeanSquaredProportionalError{T<:Real} <: Aggregated - tol::T -end - -RootMeanSquaredProportionalError(; tol=eps()) = - RootMeanSquaredProportionalError(tol) - -metadata_measure(RootMeanSquaredProportionalError; - instances = ["rmsp", ], - target_scitype = InfiniteArrMissing, - prediction_type = :deterministic, - orientation = :loss, - aggregation = RootMeanSquare()) - -const RMSP = RootMeanSquaredProportionalError -@create_aliases RMSP - -@create_docs(RootMeanSquaredProportionalError, -body= -""" -Constructor keyword arguments: `tol` (default = `eps()`). - -``\\text{root mean squared proportional error} = -\\sqrt{m^{-1}∑ᵢ \\left({yᵢ-ŷᵢ \\over yᵢ}\\right)^2}`` - -where the sum is over indices such that `abs(yᵢ) > tol` and `m` is the number -of such indices. - -""", scitype=DOC_INFINITE) - -function call( - m::RootMeanSquaredProportionalError, - ŷ, - y, - w=nothing, - ) - ret = 0 - count = 0 - @inbounds for i in eachindex(y) - (isinvalid(y[i]) || isinvalid(ŷ[i])) && continue - ayi = abs(y[i]) - if ayi > m.tol - dev = ((y[i] - ŷ[i]) / ayi)^2 - ret += dev - ret = _scale(ret, w, i) - count += 1 - end - end - return sqrt(ret / count) -end - -# ----------------------------------------------------------------------- -# MeanAbsoluteProportionalError - -struct MeanAbsoluteProportionalError{T} <: Aggregated - tol::T -end - -MeanAbsoluteProportionalError(; tol=eps()) = MeanAbsoluteProportionalError(tol) - -metadata_measure(MeanAbsoluteProportionalError; - instances = ["mape", ], - target_scitype = InfiniteArrMissing, - prediction_type = :deterministic, - orientation = :loss) - -const MAPE = MeanAbsoluteProportionalError -@create_aliases MAPE - -@create_docs(MeanAbsoluteProportionalError, -body= -""" -Constructor key-word arguments: `tol` (default = `eps()`). - -``\\text{mean absolute proportional error} = m^{-1}∑ᵢ|{(yᵢ-ŷᵢ) \\over yᵢ}|`` - -where the sum is over indices such that `abs(yᵢ) > tol` and `m` is the number -of such indices. -""", scitype=DOC_INFINITE) - -function call( - m::MeanAbsoluteProportionalError, - ŷ, - y, - w=nothing, - ) - ret = 0 - count = 0 - @inbounds for i in eachindex(y) - (isinvalid(y[i]) || isinvalid(ŷ[i])) && continue - ayi = abs(y[i]) - if ayi > m.tol - #if y[i] != zero(eltype(y)) - dev = abs((y[i] - ŷ[i]) / ayi) - ret += dev - ret =_scale(ret, w, i) - count += 1 - end - end - return ret / count -end - -# ------------------------------------------------------------------------- -# LogCoshLoss - -struct LogCoshLoss <: Unaggregated end - -metadata_measure(LogCoshLoss; - instances = ["log_cosh", "log_cosh_loss"], - target_scitype = InfiniteArrMissing, - prediction_type = :deterministic, - orientation = :loss) - -const LogCosh = LogCoshLoss -@create_aliases LogCoshLoss - -@create_docs(LogCoshLoss, - body="Reports ``\\log(\\cosh(ŷᵢ-yᵢ))`` for each index `i`. ", - scitype=DOC_INFINITE) - -_softplus(x::T) where T<:Real = x > zero(T) ? x + log1p(exp(-x)) : log1p(exp(x)) -_log_cosh(x::T) where T<:Real = x + _softplus(-2x) - log(convert(T, 2)) - -single(::LogCoshLoss, ŷ, y) = _log_cosh(ŷ - y) diff --git a/src/measures/doc_strings.jl b/src/measures/doc_strings.jl deleted file mode 100644 index 03ed76df..00000000 --- a/src/measures/doc_strings.jl +++ /dev/null @@ -1,12 +0,0 @@ -# the following creates doc-strings for the aliases (`instances`) of each measure: - -for m in measures() - name = m.name - for instance in m.instances - alias = Symbol(instance) - quote - @doc "An instance of type [`$($name)`](@ref). "* - "Query the [`$($name)`](@ref) doc-string for details. " $alias - end |> eval - end -end diff --git a/src/measures/finite.jl b/src/measures/finite.jl deleted file mode 100644 index 908525ab..00000000 --- a/src/measures/finite.jl +++ /dev/null @@ -1,1247 +0,0 @@ -const FiniteArrMissing{N} = Union{ - AbstractArray{<:Union{Missing,Multiclass{N}}}, - AbstractArray{<:Union{Missing,OrderedFactor{N}}}} - -# --------------------------------------------------- -# misclassification rate - -struct MisclassificationRate <: Aggregated end - -metadata_measure(MisclassificationRate; - instances = ["misclassification_rate", "mcr"], - target_scitype = FiniteArrMissing, - prediction_type = :deterministic, - orientation = :loss) - -const MCR = MisclassificationRate -@create_aliases MCR - -@create_docs(MisclassificationRate, -body= -""" -A confusion matrix can also be passed as argument. -$INVARIANT_LABEL -""", -scitype=DOC_FINITE) - -# calling behaviour: -call(::MCR, ŷ, y) = (y .!= ŷ) |> Mean() -call(::MCR, ŷ, y, w) = (y .!= ŷ) .* w |> Mean() -(::MCR)(cm::ConfusionMatrixObject) = 1.0 - sum(diag(cm.mat)) / sum(cm.mat) - -# ------------------------------------------------------------- -# accuracy - -struct Accuracy <: Aggregated end - -metadata_measure(Accuracy; - instances = ["accuracy",], - target_scitype = FiniteArrMissing, - prediction_type = :deterministic, - orientation = :score) - -@create_aliases Accuracy - -@create_docs(Accuracy, -body= -""" -Accuracy is proportion of correct predictions `ŷ[i]` that match the -ground truth `y[i]` observations. $INVARIANT_LABEL -""", -scitype=DOC_FINITE) - -# calling behaviour: -call(::Accuracy, args...) = 1.0 - call(misclassification_rate, args...) -(::Accuracy)(m::ConfusionMatrixObject) = sum(diag(m.mat)) / sum(m.mat) - -# ----------------------------------------------------------- -# balanced accuracy - -struct BalancedAccuracy <: Aggregated - adjusted::Bool -end -BalancedAccuracy(; adjusted=false) = BalancedAccuracy(adjusted) - -metadata_measure(BalancedAccuracy; - instances = ["balanced_accuracy", "bacc", "bac"], - target_scitype = FiniteArrMissing, - prediction_type = :deterministic, - orientation = :score) - -const BACC = BalancedAccuracy -@create_aliases BACC - -@create_docs(BalancedAccuracy, -body= -""" -Balanced accuracy compensates standard [`Accuracy`](@ref) for class imbalance. -See [https://en.wikipedia.org/wiki/Precision_and_recall#Imbalanced_data](https://en.wikipedia.org/wiki/Precision_and_recall#Imbalanced_data). - -Setting `adjusted=true` rescales the score in the way prescribed in -[L. Mosley (2013): A balanced approach to the multi-class imbalance -problem. PhD thesis, Iowa State -University](https://lib.dr.iastate.edu/etd/13537/). In the binary -case, the adjusted balanced accuracy is also known as *Youden’s J -statistic*, or *informedness*. - -$INVARIANT_LABEL -""", -scitype=DOC_FINITE) - -function call(m::BACC, ŷm, ym, wm=nothing) - - ŷ, y, w = _skipinvalid(ŷm, ym, wm) - - if w === nothing - n_given_class = StatsBase.countmap(y) - freq(i) = @inbounds n_given_class[y[i]] - ŵ = 1 ./ freq.(eachindex(y)) - else # following sklearn, which is non-linear - ŵ = similar(w) - @inbounds for i in eachindex(w) - ŵ[i] = w[i] / sum(w .* (y .== y[i])) - end - end - s = sum(ŵ) - score = sum((ŷ .== y) .* ŵ) / sum(ŵ) - if m.adjusted - n_classes = length(levels(y)) - chance = 1 / n_classes - score -= chance - score /= 1 - chance - end - return score -end - -# --------------------------------------------------- -# kappa - -struct Kappa <: Aggregated end - -metadata_measure(Kappa; - instances = ["kappa"], - target_scitype = FiniteArrMissing, - prediction_type = :deterministic, - orientation = :score, - supports_weights = false) - -@create_aliases Kappa - -@create_docs(Kappa, -body= -""" -A metric to measure agreement between predicted labels and the ground truth. -See [https://en.wikipedia.org/wiki/Cohen%27s_kappa](https://en.wikipedia.org/wiki/Cohen%27s_kappa) - -$INVARIANT_LABEL -""", -scitype=DOC_FINITE) - -# calling behaviour: -function (::Kappa)(cm::ConfusionMatrixObject{C}) where C - # relative observed agreement - same as accuracy - p₀ = sum(diag(cm.mat))/sum(cm.mat) - - # probability of agreement due to chance - for each class cᵢ, this - # would be: (#predicted=cᵢ)/(#instances) x (#observed=cᵢ)/(#instances) - rows_sum = sum!(similar(cm.mat, 1, C), cm.mat) # 1 x C matrix - cols_sum = sum!(similar(cm.mat, C, 1), cm.mat) # C X 1 matrix - pₑ = first(rows_sum*cols_sum)/sum(rows_sum)^2 - - # Kappa calculation - κ = (p₀ - pₑ)/(1 - pₑ) - - return κ -end - -call(k::Kappa, ŷ, y) = _confmat(ŷ, y, warn=false) |> k - - -# ================================================================== -## DETERMINISTIC BINARY PREDICTIONS - ORDER-INDEPENDENT - -# ------------------------------------------------------------------ -# Matthew's correlation - -struct MatthewsCorrelation <: Aggregated end - -metadata_measure(MatthewsCorrelation; - instances = ["matthews_correlation", "mcc"], - target_scitype = FiniteArrMissing{2}, - prediction_type = :deterministic, - orientation = :score, - supports_weights = false) -const MCC = MatthewsCorrelation -@create_aliases MCC - -@create_docs(MatthewsCorrelation, -body= -""" -[https://en.wikipedia.org/wiki/Matthews_correlation_coefficient](https://en.wikipedia.org/wiki/Matthews_correlation_coefficient) -$INVARIANT_LABEL -""", -scitype=DOC_FINITE_BINARY) - -# calling behaviour: -function (::MCC)(cm::ConfusionMatrixObject{C}) where C - # http://rk.kvl.dk/introduction/index.html - # NOTE: this is O(C^3), there may be a clever way to - # speed this up though in general this is only used for low C - num = 0 - @inbounds for k in 1:C, l in 1:C, m in 1:C - num += cm[k,k] * cm[l,m] - cm[k,l] * cm[m,k] - end - den1 = 0 - den2 = 0 - @inbounds for k in 1:C - a = sum(cm[k, :]) - b = sum(cm[setdiff(1:C, k), :]) - den1 += a * b - a = sum(cm[:, k]) - b = sum(cm[:, setdiff(1:C, k)]) - den2 += a * b - end - mcc = num / sqrt(float(den1) * float(den2)) - - isnan(mcc) && return 0 - return mcc -end - -call(m::MCC, ŷ, y) = _confmat(ŷ, y, warn=false) |> m - - -# ========================================================================== -# DETERMINISTIC BINARY PREDICTIONS - ORDER DEPENDENT - -const CM2 = ConfusionMatrixObject{2} - -# -------------------------------------------------------------------------- -# FScore - -struct FScore{T<:Real} <: Aggregated - β::T - rev::Union{Nothing,Bool} -end - -FScore(; β=1.0, rev=nothing) = FScore(β, rev) - -metadata_measure(FScore; - human_name = "F-Score", - instances = ["f1score",], - target_scitype = FiniteArrMissing{2}, - prediction_type = :deterministic, - orientation = :score, - supports_weights = false) - -@create_aliases FScore - -@create_docs(FScore, -body= -""" -This is the one-parameter generalization, ``F_β``, of the F-measure or -balanced F-score. - -[https://en.wikipedia.org/wiki/F1_score](https://en.wikipedia.org/wiki/F1_score) - -Constructor signature: `FScore(; β=1.0, rev=true)`. - -By default, the second element of `levels(y)` is designated as -`true`. To reverse roles, specify `rev=true`. -""", -scitype=DOC_ORDERED_FACTOR_BINARY, -footer="Constructor signature: `FScore(β=1.0, rev=false)`. ") - -# calling on conf matrix: -function (score::FScore)(m::CM2) - β = score.β - β2 = β^2 - tp = _tp(m) - fn = _fn(m) - fp = _fp(m) - return (1 + β2) * tp / ((1 + β2)*tp + β2*fn + fp) -end - -# calling on arrays: -call(m::FScore, ŷ, y) = _confmat(ŷ, y; rev=m.rev) |> m - -# ------------------------------------------------------------------------- -# TruePositive and its cousins - struct and metadata declerations - -const TRUE_POSITIVE_AND_COUSINS = - (:TruePositive, :TrueNegative, :FalsePositive, :FalseNegative, - :TruePositiveRate, :TrueNegativeRate, :FalsePositiveRate, - :FalseNegativeRate, :FalseDiscoveryRate, :Precision, - :NegativePredictiveValue) - -for M in TRUE_POSITIVE_AND_COUSINS - ex = quote - struct $M <: Aggregated rev::Union{Nothing,Bool} end - $M(; rev=nothing) = $M(rev) - end - eval(ex) -end - -metadata_measure.((FalsePositive, FalseNegative); - target_scitype = FiniteArrMissing{2}, - prediction_type = :deterministic, - orientation = :loss, - aggregation = Sum(), - supports_weights = false) - -metadata_measure.((FalsePositiveRate, FalseNegativeRate, FalseDiscoveryRate); - target_scitype = FiniteArrMissing{2}, - prediction_type = :deterministic, - orientation = :loss, - supports_weights = false) - -metadata_measure.((TruePositive, TrueNegative); - target_scitype = FiniteArrMissing{2}, - prediction_type = :deterministic, - orientation = :score, - aggregation = Sum(), - supports_weights = false) - -metadata_measure.((TruePositiveRate, TrueNegativeRate, Precision, - NegativePredictiveValue); - target_scitype = FiniteArrMissing{2}, - prediction_type = :deterministic, - orientation = :score, - supports_weights = false) - -# adjustments: -instances(::Type{<:TruePositive}) = ["true_positive", "truepositive"] -human_name(::Type{<:TruePositive}) = "number of true positives" - -instances(::Type{<:TrueNegative}) = ["true_negative", "truenegative"] -human_name(::Type{<:TrueNegative}) = "number of true negatives" - -instances(::Type{<:FalsePositive}) = ["false_positive", "falsepositive"] -human_name(::Type{<:FalsePositive}) = "number of false positives" - -instances(::Type{<:FalseNegative}) = ["false_negative", "falsenegative"] -human_name(::Type{<:FalseNegative}) = "number of false negatives" - -instances(::Type{<:TruePositiveRate}) = - ["true_positive_rate", "truepositive_rate", - "tpr", "sensitivity", "recall", "hit_rate"] -human_name(::Type{<:TruePositiveRate}) = - "true positive rate (a.k.a recall)" - -instances(::Type{<:TrueNegativeRate}) = - ["true_negative_rate", "truenegative_rate", "tnr", - "specificity", "selectivity"] - -instances(::Type{<:FalsePositiveRate}) = - ["false_positive_rate", "falsepositive_rate", - "fpr", "fallout"] - "." -instances(::Type{<:FalseNegativeRate}) = - ["false_negative_rate", "falsenegative_rate", "fnr", "miss_rate"] - "." -instances(::Type{<:FalseDiscoveryRate}) = - ["false_discovery_rate", "falsediscovery_rate", "fdr"] - -instances(::Type{<:NegativePredictiveValue}) = - ["negative_predictive_value", "negativepredictive_value", "npv"] - -instances(::Type{<:Precision}) = - ["positive_predictive_value", "ppv", "positivepredictive_value", "precision"] -human_name(::Type{<:Precision}) = - "precision (a.k.a. positive predictive value)" - - -# --------------------------------------------------------------------- -# TruePositive and its cousins - doc-string building and alias creation - -for M in TRUE_POSITIVE_AND_COUSINS - eval(quote - $M == Precision || @create_aliases $M # precision handled separately - - @create_docs($M, - body= - """ - Assigns `false` to first element of `levels(y)`. To reverse roles, - use `$(name($M))(rev=true)`. - """, - scitype=DOC_ORDERED_FACTOR_BINARY) - end) -end - -# type aliases: -const TNR = TrueNegativeRate -const Specificity = TrueNegativeRate -const TPR = TruePositiveRate -const Recall = TPR -const FPR = FalsePositiveRate -const FNR = FalseNegativeRate -const FDR = FalseDiscoveryRate -const NPV = NegativePredictiveValue -const PPV = Precision - -# special case of precision; cannot generate alias's automatically due -# to conflict with Base.precision: -const positive_predictive_value = Precision() -const ppv = Precision() -const positivepredictive_value = Precision() - -# ---------------------------------------------------------------------- -# TruePositive and its cousins - helper functions for confusion matrices - -_tp(m::CM2) = m[2,2] -_tn(m::CM2) = m[1,1] -_fp(m::CM2) = m[2,1] -_fn(m::CM2) = m[1,2] - -_tpr(m::CM2) = _tp(m) / (_tp(m) + _fn(m)) -_tnr(m::CM2) = _tn(m) / (_tn(m) + _fp(m)) -_fpr(m::CM2) = 1 - _tnr(m) -_fnr(m::CM2) = 1 - _tpr(m) - -_fdr(m::CM2) = _fp(m) / (_tp(m) + _fp(m)) -_npv(m::CM2) = _tn(m) / (_tn(m) + _fn(m)) - -# ---------------------------------------------------------------------- -# TruePositive and its cousins - calling behaviour - -# NOTE: here we assume the CM was constructed a priori with the -# proper ordering so the field `rev` in the measure is ignored - -# on confusion matrices: -(::TruePositive)(m::CM2) = _tp(m) -(::TrueNegative)(m::CM2) = _tn(m) -(::FalsePositive)(m::CM2) = _fp(m) -(::FalseNegative)(m::CM2) = _fn(m) -(::TPR)(m::CM2) = _tpr(m) -(::TNR)(m::CM2) = _tnr(m) -(::FPR)(m::CM2) = _fpr(m) -(::FNR)(m::CM2) = _fnr(m) -(::FDR)(m::CM2) = _fdr(m) -(::NPV)(m::CM2) = _npv(m) -(::Precision)(m::CM2) = 1.0 - _fdr(m) - -# on arrays (ŷ, y): -for M_ex in TRUE_POSITIVE_AND_COUSINS - @eval call(m::$M_ex, ŷ, y) = _confmat(ŷ, y; rev=m.rev) |> m -end - -# since Base.precision exists (as single argument function) we -# manually overload Base.precision: -Base.precision(m::CM2) = m |> Precision() -function Base.precision(ŷ, y) - _check(Precision(), ŷ, y) - call(Precision(), ŷ, y) -end - - -# ================================================================= -# MULTICLASS AND ORDER INDEPENDENT - -const CM = ConfusionMatrixObject{N} where N - -abstract type MulticlassAvg end -struct MacroAvg <: MulticlassAvg end -struct MicroAvg <: MulticlassAvg end -struct NoAvg <: MulticlassAvg end - -const macro_avg = MacroAvg() -const micro_avg = MicroAvg() -const no_avg = NoAvg() - -const DS_AVG_RET = "Options for `average` are: `no_avg`, `macro_avg` "* - "(default) and `micro_avg`. Options for `return_type`, "* - "applying in the `no_avg` case, are: `LittleDict` (default) or "* - "`Vector`. " - -const DS_RET = "Options for `return_type` are: "* - "`LittleDict`(default) or "* - "`Vector`. " - -const CLASS_W = "An optional `AbstractDict`, denoted `class_w` above, "* - "keyed on `levels(y)`, specifies class weights. It applies if "* - "`average=macro_avg` or `average=no_avg`." - -""" - MulticlassFScore(; β=1.0, average=macro_avg, return_type=LittleDict) - -One-parameter generalization, ``F_β``, of the F-measure or balanced F-score for -multiclass observations. - - MulticlassFScore()(ŷ, y) - MulticlassFScore()(ŷ, y, class_w) - -Evaluate the default score on multiclass observations, `ŷ`, given -ground truth values, `y`. $DS_AVG_RET $CLASS_W - -For more information, run `info(MulticlassFScore)`. - -""" -struct MulticlassFScore{T<:Real, - M<:MulticlassAvg, - U<:Union{Vector, LittleDict}} <:Aggregated - β::T - average::M - return_type::Type{U} -end - -MulticlassFScore(; β=1.0, average=macro_avg, return_type=LittleDict) = - MulticlassFScore(β, average, return_type) - -metadata_measure(MulticlassFScore; - instances = ["macro_f1score", "micro_f1score", - "multiclass_f1score"], - target_scitype = FiniteArrMissing, - prediction_type = :deterministic, - orientation = :score, - supports_weights = false, - supports_class_weights = true) - -MLJModelInterface.docstring(::Type{<:MulticlassFScore}) = - "Multiclass F_β score; aliases: " * - "`macro_f1score=MulticlassFScore()`, "* - "`multiclass_f1score=MulticlassFScore()` " * - "`micro_f1score=MulticlassFScore(average=micro_avg)`." - -const micro_f1score = MulticlassFScore(average=micro_avg) -const macro_f1score = MulticlassFScore(average=macro_avg) -const multiclass_f1score = MulticlassFScore(average=macro_avg) - -for M in (:MulticlassTruePositive, :MulticlassTrueNegative, - :MulticlassFalsePositive, :MulticlassFalseNegative) - ex = quote - struct $M{U<:Union{Vector, LittleDict}} <: Aggregated - return_type::Type{U} - end -# $M(return_type::Type{U}) where {U} = $M(return_type) - $M(; return_type=LittleDict) = $M(return_type) - end - eval(ex) -end - -const _mtp_vec = MulticlassTruePositive(return_type=Vector) -const _mfn_vec = MulticlassFalseNegative(return_type=Vector) -const _mfp_vec = MulticlassFalsePositive(return_type=Vector) -const _mtn_vec = MulticlassTrueNegative(return_type=Vector) - -for M in (:MulticlassTruePositiveRate, :MulticlassTrueNegativeRate, - :MulticlassFalsePositiveRate, :MulticlassFalseNegativeRate, - :MulticlassFalseDiscoveryRate, :MulticlassPrecision, - :MulticlassNegativePredictiveValue) - ex = quote - struct $M{T<:MulticlassAvg, U<:Union{Vector, LittleDict}} <: Aggregated - average::T - return_type::Type{U} - end - $M(; average=macro_avg, return_type=LittleDict) = $M(average, return_type) - end - eval(ex) -end - -metadata_measure.((MulticlassFalsePositive, MulticlassFalseNegative); - target_scitype = FiniteArrMissing, - prediction_type = :deterministic, - orientation = :loss, - aggregation = Sum(), - is_feature_dependent = false, - supports_weights = false, - supports_class_weights = false) - -metadata_measure.((MulticlassFalsePositiveRate, MulticlassFalseNegativeRate, - MulticlassFalseDiscoveryRate); - target_scitype = FiniteArrMissing, - prediction_type = :deterministic, - orientation = :loss, - is_feature_dependent = false, - supports_weights = false, - supports_class_weights = true) - -metadata_measure.((MulticlassTruePositive, MulticlassTrueNegative); - target_scitype = FiniteArrMissing, - prediction_type = :deterministic, - orientation = :score, - aggregation = Sum(), - is_feature_dependent = false, - supports_weights = false, - supports_class_weights = false) - -metadata_measure.((MulticlassTrueNegativeRate, MulticlassNegativePredictiveValue); - target_scitype = FiniteArrMissing, - prediction_type = :deterministic, - orientation = :score, - is_feature_dependent = false, - supports_weights = false, - supports_class_weights = true) - -metadata_measure.((MulticlassTruePositiveRate, MulticlassPrecision); - target_scitype = FiniteArrMissing, - prediction_type = :deterministic, - orientation = :score, - is_feature_dependent = false, - supports_weights = false, - supports_class_weights = true) - -MMI.docstring(::Type{<:MulticlassTruePositive}) = - "Number of true positives; " * - "aliases: `multiclass_true_positive`, `multiclass_truepositive`." -instances(::Type{<:MulticlassTruePositive}) = - ["multiclass_true_positive", "multiclass_truepositive"] -MMI.docstring(::Type{<:MulticlassTrueNegative}) = - "Number of true negatives; " * - "aliases: `multiclass_true_negative`, `multiclass_truenegative`." -instances(::Type{<:MulticlassTrueNegative}) = - ["multiclass_true_negative", "multiclass_truenegative"] -MMI.docstring(::Type{<:MulticlassFalsePositive}) = - "Number of false positives; " * - "aliases: `multiclass_false_positive`, `multiclass_falsepositive`." -instances(::Type{<:MulticlassFalsePositive}) = - ["multiclass_false_positive", "multiclass_falsepositive"] -MMI.docstring(::Type{<:MulticlassFalseNegative}) = - "Number of false negatives; " * - "aliases: `multiclass_false_negative`, `multiclass_falsenegative`." -instances(::Type{<:MulticlassFalseNegative}) = - ["multiclass_false_negative", "multiclass_falsenegative"] - -MMI.docstring(::Type{<:MulticlassTruePositiveRate}) = - "multiclass true positive rate; aliases: " * - "`multiclass_true_positive_rate`, `multiclass_tpr`, " * - "`multiclass_sensitivity`, `multiclass_recall`, " * - "`multiclass_hit_rate`, `multiclass_truepositive_rate`, " -instances(::Type{<:MulticlassTruePositiveRate}) = - ["multiclass_true_positive_rate", "multiclass_tpr", - "multiclass_sensitivity", "multiclass_recall", - "multiclass_hit_rate", "multiclass_truepositive_rate"] -MMI.docstring(::Type{<:MulticlassTrueNegativeRate}) = - "multiclass true negative rate; aliases: " * - "`multiclass_true_negative_rate`, `multiclass_tnr` " * - " `multiclass_specificity`, `multiclass_selectivity`, " * - "`multiclass_truenegative_rate`." -instances(::Type{<:MulticlassTrueNegativeRate}) = - ["multiclass_true_negative_rate", "multiclass_tnr", - "multiclass_specificity", "multiclass_selectivity", - "multiclass_truenegative_rate"] -MMI.docstring(::Type{<:MulticlassFalsePositiveRate}) = - "multiclass false positive rate; aliases: " * - "`multiclass_false_positive_rate`, `multiclass_fpr` " * - "`multiclass_fallout`, `multiclass_falsepositive_rate`." -instances(::Type{<:MulticlassFalsePositiveRate}) = - ["multiclass_false_positive_rate", "multiclass_fpr", - "multiclass_fallout", "multiclass_falsepositive_rate"] -MMI.docstring(::Type{<:MulticlassFalseNegativeRate}) = - "multiclass false negative rate; aliases: " * - "`multiclass_false_negative_rate`, `multiclass_fnr`, " * - "`multiclass_miss_rate`, `multiclass_falsenegative_rate`." -instances(::Type{<:MulticlassFalseNegativeRate}) = - ["multiclass_false_negative_rate", "multiclass_fnr", - "multiclass_miss_rate", "multiclass_falsenegative_rate"] -MMI.docstring(::Type{<:MulticlassFalseDiscoveryRate}) = - "multiclass false discovery rate; "* - "aliases: `multiclass_false_discovery_rate`, " * - "`multiclass_falsediscovery_rate`, `multiclass_fdr`." -instances(::Type{<:MulticlassFalseDiscoveryRate}) = - ["multiclass_falsediscovery_rate", "multiclass_fdr", - "multiclass_false_discovery_rate"] -MMI.docstring(::Type{<:MulticlassNegativePredictiveValue}) = - "multiclass negative predictive value; aliases: " * - "`multiclass_negative_predictive_value`, " * - "`multiclass_negativepredictive_value`, `multiclass_npv`." -instances(::Type{<:MulticlassNegativePredictiveValue}) = - ["multiclass_negative_predictive_value", - "multiclass_negativepredictive_value", "multiclass_npv"] -MMI.docstring(::Type{<:MulticlassPrecision}) = - "multiclass positive predictive value (aka precision);"* - " aliases: `multiclass_positive_predictive_value`, `multiclass_ppv`, " * - "`multiclass_positivepredictive_value`, " * - "`multiclass_precision`." -instances(::Type{<:MulticlassPrecision}) = - ["multiclass_positive_predictive_value", "multiclass_ppv", - "multiclass_positivepredictive_value", "multiclass_precision"] - -const W_KEY_MISMATCH = "Encountered target with levels different from the " * - "keys of user-specified dictionary of class weights." -const W_PROMOTE_WARN = "Using macro averaging instead of micro averaging, as "* - "class weights specified. " - - -# ---------------------------------------------------- -# MulticlassTruePositive - -""" - MulticlassTruePositive(; return_type=LittleDict) - -$(docstring(MulticlassTruePositive())) - - MulticlassTruePositive()(ŷ, y) - -Number of true positives for multiclass observations `ŷ` and ground -truth `y`, using default return type. $DS_RET - -For more information, run `info(MulticlassTruePositive)`. - -""" -function MulticlassTruePositive end -const multiclass_true_positive = MulticlassTruePositive() -const multiclass_truepositive = MulticlassTruePositive() -const mtp = MulticlassTruePositive() - - -# ---------------------------------------------------- -# MulticlassTrueNegative - -""" - MulticlassTrueNegative(; return_type=LittleDict) - -$(docstring(MulticlassTrueNegative())) - - MulticlassTrueNegative()(ŷ, y) - -Number of true negatives for multiclass observations `ŷ` and ground truth -`y`, using default return type. $DS_RET - -For more information, run `info(MulticlassTrueNegative)`. - -""" -function MulticlassTrueNegative end -const multiclass_true_negative = MulticlassTrueNegative() -const multiclass_truenegative = MulticlassTrueNegative() -const mtn = MulticlassTrueNegative() - - -# ---------------------------------------------------- -# MulticlassFalsePositive - -""" - MulticlassFalsePositive(; return_type=LittleDict) - -$(docstring(MulticlassFalsePositive())) - - MulticlassFalsePositive()(ŷ, y) - -Number of false positives for multiclass observations `ŷ` and ground -truth `y`, using default return type. $DS_RET - -For more information, run `info(MulticlassFalsePositive)`. - -""" -function MulticlassPositive end -const multiclass_false_positive = MulticlassFalsePositive() -const multiclass_falsepositive = MulticlassFalsePositive() -const mfp = MulticlassFalsePositive() - - -# ---------------------------------------------------- -# MulticlassFalseNegative - -""" - MulticlassFalseNegative(; return_type=LittleDict) - -$(docstring(MulticlassFalseNegative())) - - MulticlassFalseNegative()(ŷ, y) - -Number of false negatives for multiclass observations `ŷ` and ground -truth `y`, using default return type. $DS_RET - -For more information, run `info(MulticlassFalseNegative)`. - -""" -function MulticlassNegative end -const multiclass_false_negative = MulticlassFalseNegative() -const multiclass_falsenegative = MulticlassFalseNegative() -const mfn = MulticlassFalseNegative() - - -# ---------------------------------------------------- -# MulticlassTruePositiveRate - -""" - MulticlassTruePositiveRate(; average=macro_avg, return_type=LittleDict) - -$(docstring(MulticlassTruePositiveRate())) - - MulticlassTruePositiveRate(ŷ, y) - MulticlassTruePositiveRate(ŷ, y, class_w) - -True positive rate (a.k.a. sensitivity, recall, hit rate) for -multiclass observations `ŷ` and ground truth `y`, using default -averaging and return type. $DS_AVG_RET $CLASS_W - -For more information, run `info(MulticlassTruePositiveRate)`. - -""" -function MulticlassTruePositiveRate end -const multiclass_true_positive_rate = MulticlassTruePositiveRate() -const multiclass_truepositive_rate = MulticlassTruePositiveRate() -const multiclass_tpr = MulticlassTruePositiveRate() -const multiclass_sensitivity = MulticlassTruePositiveRate() -const multiclass_hit_rate = MulticlassTruePositiveRate() -const MTPR = MulticlassTruePositiveRate -const multiclass_recall = MulticlassTruePositiveRate() -const MulticlassRecall = MulticlassTruePositiveRate - - -# ---------------------------------------------------- -# MulticlassTrueNegativeRate - -""" - MulticlassTrueNegativeRate(; average=macro_avg, return_type=LittleDict) - -$(docstring(MulticlassTrueNegativeRate())) - - MulticlassTrueNegativeRate()(ŷ, y) - MulticlassTrueNegativeRate()(ŷ, y, class_w) - -True negative rate for multiclass observations `ŷ` and ground truth -`y`, using default averaging and return type. $DS_AVG_RET $CLASS_W - -For more information, run `info(MulticlassTrueNegativeRate)`. - -""" -function MulticlassTrueNegativeRate end -const multiclass_true_negative_rate = MulticlassTrueNegativeRate() -const multiclass_truenegative_rate = MulticlassTrueNegativeRate() -const multiclass_tnr = MulticlassTrueNegativeRate() -const multiclass_specificity = MulticlassTrueNegativeRate() -const multiclass_selectivity = MulticlassTrueNegativeRate() -const MulticlassSpecificity = MulticlassTrueNegativeRate -const MTNR = MulticlassTrueNegativeRate - - -# ---------------------------------------------------- -# MulticlassFalsePositiveRate - -""" - MulticlassFalsePositiveRate(; average=macro_avg, return_type=LittleDict) - -$(docstring(MulticlassFalsePositiveRate())) - - MulticlassFalsePositiveRate()(ŷ, y) - MulticlassFalsePositiveRate()(ŷ, y, class_w) - -False positive rate for multiclass observations `ŷ` and ground truth -`y`, using default averaging and return type. $DS_AVG_RET $CLASS_W - -For more information, run `info(MulticlassFalsePositiveRate)`. - -""" -function MulticlassFalsePositiveRate end -const multiclass_false_positive_rate = MulticlassFalsePositiveRate() -const multiclass_falsepositive_rate = MulticlassFalsePositiveRate() -const multiclass_fpr = MulticlassFalsePositiveRate() -const MFPR = MulticlassFalsePositiveRate -const multiclass_fallout = MFPR() - - -# ---------------------------------------------------- -# MulticlassFalseNegativeRate - -""" - MulticlassFalseNegativeRate(; average=macro_avg, return_type=LittleDict) - -$(docstring(MulticlassFalseNegativeRate())) - - MulticlassFalseNegativeRate()(ŷ, y) - MulticlassFalseNegativeRate()(ŷ, y, class_w) - -False negative rate for multiclass observations `ŷ` and ground truth -`y`, using default averaging and return type. $DS_AVG_RET $CLASS_W - -For more information, run `info(MulticlassFalseNegativeRate)`. - -""" -function MulticlassFalseNegativeRate end -const multiclass_false_negative_rate = MulticlassFalseNegativeRate() -const multiclass_falsenegative_rate = MulticlassFalseNegativeRate() -const multiclass_fnr = MulticlassFalseNegativeRate() -const MFNR = MulticlassFalseNegativeRate -const multiclass_miss_rate = MFNR() - - -# ---------------------------------------------------- -# MulticlassFalseDiscoveryRate - -""" - MulticlassFalseDiscoveryRate(; average=macro_avg, return_type=LittleDict) - -$(docstring(MulticlassFalseDiscoveryRate())) - - MulticlassFalseDiscoveryRate()(ŷ, y) - MulticlassFalseDiscoveryRate()(ŷ, y, class_w) - -False discovery rate for multiclass observations `ŷ` and ground truth -`y`, using default averaging and return type. $DS_AVG_RET $CLASS_W - -For more information, run `info(MulticlassFalseDiscoveryRate)`. - -""" -function MulticlassFalseDiscoveryRate end -const multiclass_false_discovery_rate = MulticlassFalseDiscoveryRate() -const multiclass_falsediscovery_rate = MulticlassFalseDiscoveryRate() -const multiclass_fdr = MulticlassFalseDiscoveryRate() -const MFDR = MulticlassFalseDiscoveryRate - - -# ---------------------------------------------------- -# MulticlassPrecision - -""" - MulticlassPrecision(; average=macro_avg, return_type=LittleDict) - -$(docstring(MulticlassPrecision())) - - MulticlassPrecision()(ŷ, y) - MulticlassPrecision()(ŷ, y, class_w) - -Precision for multiclass observations `ŷ` and ground truth `y`, using -default averaging and return type. $DS_AVG_RET $CLASS_W - -For more information, run `info(MulticlassPrecision)`. - -""" -function MulticlassPrecision end -const multiclass_precision = MulticlassPrecision() -const multiclass_ppv = MulticlassPrecision() -const multiclass_positive_predictive_value = MulticlassPrecision() -const multiclass_positivepredictive_value = MulticlassPrecision() -const MPPV = MulticlassPrecision - - -# ---------------------------------------------------- -# MulticlassNegativePredictiveValue - -""" - MulticlassNegativePredictiveValue(; average=macro_avg, return_type=LittleDict) - -$(docstring(MulticlassNegativePredictiveValue())) - - MulticlassNegativePredictiveValue()(ŷ, y) - MulticlassNegativePredictiveValue()(ŷ, y, class_w) - -Negative predictive value for multiclass observations `ŷ` and ground truth -`y`, using default averaging and return type. $DS_AVG_RET $CLASS_W - -For more information, run `info(MulticlassNegativePredictiveValue)`. - -""" -function MulticlassNegativePredictiveValue end -const multiclass_npv = MulticlassNegativePredictiveValue() -const multiclass_negative_predictive_value = MulticlassNegativePredictiveValue() -const multiclass_negativepredictive_value = MulticlassNegativePredictiveValue() -const MNPV = MulticlassNegativePredictiveValue - - -# ----------------------------------------------------- -## INTERNAL FUNCTIONS ON MULTICLASS CONFUSION MATRIX - -_mtp(m::CM, return_type::Type{Vector}) = diag(m.mat) -_mtp(m::CM, return_type::Type{LittleDict}) = - LittleDict(m.labels, diag(m.mat)) - -_mfp(m::CM, return_type::Type{Vector}) = - (col_sum = vec(sum(m.mat, dims=2)); col_sum .-= diag(m.mat)) - -_mfp(m::CM, return_type::Type{LittleDict}) = - (col_sum = vec(sum(m.mat, dims=2)); col_sum .-= diag(m.mat); - LittleDict(m.labels, col_sum)) - -_mfn(m::CM, return_type::Type{Vector}) = - (row_sum = vec(collect(transpose(sum(m.mat, dims=1)))); - row_sum .-= diag(m.mat)) - -_mfn(m::CM, return_type::Type{LittleDict}) = - (row_sum = vec(collect(transpose(sum(m.mat, dims=1)))); - row_sum .-= diag(m.mat); LittleDict(m.labels, row_sum)) - -function _mtn(m::CM, return_type::Type{Vector}) - _sum = sum(m.mat, dims=2) - _sum .= sum(m.mat) .- (_sum .+= sum(m.mat, dims=1)'.- diag(m.mat)) - return vec(_sum) -end - -function _mtn(m::CM, return_type::Type{LittleDict}) - _sum = sum(m.mat, dims=2) - _sum .= sum(m.mat) .- (_sum .+= sum(m.mat, dims=1)'.- diag(m.mat)) - return LittleDict(m.labels, vec(_sum)) -end - -@inline _mean(x::Arr{<:Real}) = mean(skipnan(x)) # defined in src/data/data.jl - -@inline function _class_w(level_m::Arr{<:String}, - class_w::AbstractDict{<:Any, <:Real}) - class_w_labels = levels(keys(class_w)) - string.(class_w_labels) == level_m || throw(ArgumentError(W_KEY_MISMATCH)) - return [class_w[l] for l in class_w_labels] -end - -@inline function _mc_helper(m::CM, a::Arr{<:Real}, b::Arr{<:Real}, - average::NoAvg, return_type::Type{Vector}) - return vec(a ./ (a + b)) -end - -@inline function _mc_helper(m::CM, a::Arr{<:Real}, b::Arr{<:Real}, - average::NoAvg, return_type::Type{LittleDict}) - return LittleDict(m.labels, _mc_helper(m, a, b, average, Vector)) -end - -@inline function _mc_helper(m::CM, a::Arr{<:Real}, b::Arr{<:Real}, - average::MacroAvg, return_type) - return _mean(_mc_helper(m, a, b, no_avg, Vector)) -end - -@inline function _mc_helper(m::CM, a::Arr{<:Real}, b::Arr{<:Real}, - average::MicroAvg, return_type) - a_sum, b_sum = sum(a), sum(b) - return a_sum / (a_sum + b_sum) -end - -@inline function _mc_helper(m::CM, a::Arr{<:Real}, b::Arr{<:Real}, - class_w::AbstractDict{<:Any, <:Real}, - average::NoAvg, return_type::Type{Vector}) - level_w = _class_w(m.labels, class_w) - return _mc_helper(m, a, b, no_avg, return_type) .* level_w -end - -@inline function _mc_helper(m::CM, a::Arr{<:Real}, b::Arr{<:Real}, - class_w::AbstractDict{<:Any, <:Real}, - average::MacroAvg, return_type::Type{Vector}) - return _mean(_mc_helper(m, a, b, class_w, no_avg, return_type)) -end - -@inline function _mc_helper(m::CM, a::Arr{<:Real}, b::Arr{<:Real}, - class_w::AbstractDict{<:Any, <:Real}, - average::MicroAvg, return_type) - @warn W_PROMOTE_WARN - return _mc_helper(m, a, b, class_w, macro_avg, Vector) -end - -@inline function _mc_helper_b(m::CM, helper_name, - class_w::AbstractDict{<:Any, <:Real}, - average::NoAvg, return_type::Type{Vector}) - level_w = _class_w(m.labels, class_w) - return (1 .- helper_name(m, no_avg, return_type)) .* level_w -end - -@inline function _mc_helper_b(m::CM, helper_name, - class_w::AbstractDict{<:Any, <:Real}, - average::NoAvg, return_type::Type{LittleDict}) - level_w = _class_w(m.labels, class_w) - return LittleDict(m.labels, ((1 .- helper_name(m, no_avg, Vector)) .* level_w)) -end - -@inline function _mc_helper_b(m::CM, helper_name, - class_w::AbstractDict{<:Any, <:Real}, - average::MacroAvg, return_type) - return _mean(_mc_helper_b(m, helper_name, class_w, no_avg, Vector)) -end - -@inline function _mc_helper_b(m::CM, helper_name, - class_w::AbstractDict{<:Any, <:Real}, - average::MicroAvg, return_type) - @warn W_PROMOTE_WARN - return _mc_helper_b(m, helper_name, class_w, macro_avg, Vector) -end - -@inline function _mc_helper_b(m::CM, helper_name, average::NoAvg, - return_type::Type{LittleDict}) - return LittleDict(m.labels, 1.0 .- helper_name(m, average, Vector)) -end - -@inline function _mc_helper_b(m::CM, helper_name, average::NoAvg, - return_type::Type{Vector}) - return 1.0 .- helper_name(m, average, Vector) -end - -@inline function _mc_helper_b(m::CM, helper_name, average::MacroAvg, - return_type) - return 1.0 .- helper_name(m, average, Vector) -end - -@inline function _mc_helper_b(m::CM, helper_name, average::MicroAvg, - return_type) - return 1.0 .- helper_name(m, average, Vector) -end - -@inline function _mc_helper(m::CM, a::Arr{<:Real}, b::Arr{<:Real}, - class_w::AbstractDict{<:Any, <:Real}, - average::NoAvg, return_type::Type{LittleDict}) - level_w = _class_w(m.labels, class_w) - return LittleDict(m.labels, _mc_helper(m, a, b, class_w, no_avg, Vector)) -end - -@inline function _mc_helper(m::CM, a::Arr{<:Real}, b::Arr{<:Real}, - class_w::AbstractDict{<:Any, <:Real}, - average::MacroAvg, return_type::Type{U}) where U - return _mean(_mc_helper(m, a, b, class_w, no_avg, Vector)) -end - -@inline function _mc_helper(m::CM, a::Arr{<:Real}, b::Arr{<:Real}, - class_w::AbstractDict{<:Any, <:Real}, - average::MicroAvg, return_type::Type{U}) where U - @warn W_PROMOTE_WARN - return _mc_helper(m, a, b, class_w, macro_avg, return_type) -end - -function _mtpr(m::CM, average::A, return_type::Type{U}) where {A, U} - mtp_val, mfn_val = _mtp_vec(m), _mfn_vec(m) - return _mc_helper(m, mtp_val, mfn_val, average, return_type) -end - -function _mtpr(m::CM, class_w::AbstractDict{<:Any, <:Real}, average::A, - return_type::Type{U}) where {A, U} - mtp_val, mfn_val = _mtp_vec(m), _mfn_vec(m) - return _mc_helper(m, mtp_val, mfn_val, class_w, average, return_type) -end - -function _mtnr(m::CM, average::A, return_type::Type{U}) where {A, U} - mtn_val, mfp_val = _mtn_vec(m), _mfp_vec(m) - return _mc_helper(m, mtn_val, mfp_val, average, return_type) -end - -function _mtnr(m::CM, class_w::AbstractDict{<:Any, <:Real}, average::A, - return_type::Type{U}) where {A, U} - mtn_val, mfp_val = _mtn_vec(m), _mfp_vec(m) - return _mc_helper(m, mtn_val, mfp_val, class_w, average, return_type) -end - -_mfpr(m::CM, average::A, return_type::Type{U}) where {A, U} = - _mc_helper_b(m, _mtnr, average, return_type) - -function _mfpr(m::CM, class_w::AbstractDict{<:Any, <:Real}, average::A, - return_type::Type{U}) where {A, U} - return _mc_helper_b(m, _mtnr, class_w, average, return_type) -end - -_mfnr(m::CM, average::A, return_type::Type{U}) where {A, U} = - _mc_helper_b(m, _mtpr, average, return_type) - -function _mfnr(m::CM, class_w::AbstractDict{<:Any, <:Real}, average::A, - return_type::Type{U}) where {A, U} - return _mc_helper_b(m, _mtpr, class_w, average, return_type) -end - -function _mfdr(m::CM, average::A, return_type::Type{U}) where {A, U} - mfp_val, mtp_val = _mfp_vec(m), _mtp_vec(m) - return _mc_helper(m, mfp_val, mtp_val, average, return_type) -end - -function _mfdr(m::CM, class_w::AbstractDict{<:Any, <:Real}, average::A, - return_type::Type{U}) where {A, U} - mfp_val, mtp_val = _mfp_vec(m), _mtp_vec(m) - return _mc_helper(m, mfp_val, mtp_val, class_w, average, return_type) -end - -function _mnpv(m::CM, average::A, return_type::Type{U}) where {A, U} - mtn_val, mfn_val = _mtn_vec(m), _mfn_vec(m) - return _mc_helper(m, mtn_val, mfn_val, average, return_type) -end - -function _mnpv(m::CM, class_w::AbstractDict{<:Any, <:Real}, average::A, - return_type::Type{U}) where {A, U} - mtn_val, mfn_val = _mtn_vec(m), _mfn_vec(m) - return _mc_helper(m, mtn_val, mfn_val, class_w, average, return_type) -end - -## CALLABLES ON MULTICLASS CONFUSION MATRIX - -(p::MulticlassTruePositive)(m::CM) = _mtp(m, p.return_type) -(n::MulticlassTrueNegative)(m::CM) = _mtn(m, n.return_type) -(p::MulticlassFalsePositive)(m::CM) = _mfp(m, p.return_type) -(n::MulticlassFalseNegative)(m::CM) = _mfn(m, n.return_type) - -(r::MTPR)(m::CM) = _mtpr(m, r.average, r.return_type) -(r::MTPR)(m::CM, w::AbstractDict{<:Any, <:Real}) = - _mtpr(m, w, r.average, r.return_type) - -(r::MTNR)(m::CM) = _mtnr(m, r.average, r.return_type) -(r::MTNR)(m::CM, w::AbstractDict{<:Any, <:Real}) = - _mtnr(m, w, r.average, r.return_type) - -(r::MFPR)(m::CM) = _mfpr(m, r.average, r.return_type) -(r::MFPR)(m::CM, w::AbstractDict{<:Any, <:Real}) = - _mfpr(m, w, r.average, r.return_type) - -(r::MFNR)(m::CM) = _mfnr(m, r.average, r.return_type) -(r::MFNR)(m::CM, w::AbstractDict{<:Any, <:Real}) = - _mfnr(m, w, r.average, r.return_type) - -(r::MFDR)(m::CM) = _mfdr(m, r.average, r.return_type) -(r::MFDR)(m::CM, w::AbstractDict{<:Any, <:Real}) = - _mfdr(m, w, r.average, r.return_type) - -(v::MNPV)(m::CM) = _mnpv(m, v.average, v.return_type) -(v::MNPV)(m::CM, w::AbstractDict{<:Any, <:Real}) = - _mnpv(m, w, v.average, v.return_type) - -(p::MulticlassPrecision)(m::CM) = - _mc_helper_b(m, _mfdr, p.average, p.return_type) -(p::MulticlassPrecision)(m::CM, class_w::AbstractDict{<:Any, <:Real}) = - _mc_helper_b(m, _mfdr, class_w, p.average, p.return_type) - -@inline function _fs_helper(m::CM, β::Real, mtp_val::Arr{<:Real}, mfp_val::Arr{<:Real}, mfn_val::Arr{<:Real}, - average::NoAvg, return_type::Type{LittleDict}) - β2 = β^2 - return LittleDict(m.labels, (1 + β2) * mtp_val ./ ((1 + β2) * mtp_val + β2 * mfn_val + mfp_val)) -end - -@inline function _fs_helper(m::CM, β::Real, mtp_val::Arr{<:Real}, mfp_val::Arr{<:Real}, mfn_val::Arr{<:Real}, - average::NoAvg, return_type::Type{Vector}) - β2 = β^2 - return (1 + β2) * mtp_val ./ ((1 + β2) * mtp_val + β2 * mfn_val + mfp_val) -end - -@inline function _fs_helper(m::CM, β::Real, mtp_val::Arr{<:Real}, mfp_val::Arr{<:Real}, mfn_val::Arr{<:Real}, - average::MacroAvg, return_type::Type{U}) where U - return _mean(_fs_helper(m, β, mtp_val, mfp_val, mfn_val, no_avg, Vector)) -end - -function (f::MulticlassFScore)(m::CM) - f.average == micro_avg && return MulticlassRecall(; average=micro_avg, return_type=f.return_type)(m) - mtp_val = _mtp(m, Vector) - mfp_val = _mfp(m, Vector) - mfn_val = _mfn(m, Vector) - return _fs_helper(m, f.β, mtp_val, mfp_val, mfn_val, f.average, f.return_type) -end - -@inline function _fs_helper(m::CM, w::AbstractDict{<:Any, <:Real}, β::Real, - average::NoAvg, return_type::Type{LittleDict}) - level_w = _class_w(m.labels, w) - return LittleDict(m.labels, - MulticlassFScore(β=β, - average=no_avg, - return_type=Vector)(m) .* level_w) -end - -@inline function _fs_helper(m::CM, w::AbstractDict{<:Any, <:Real}, β::Real, - average::NoAvg, return_type::Type{Vector}) - level_w = _class_w(m.labels, w) - return MulticlassFScore(β=β, - average=no_avg, - return_type=Vector)(m) .* level_w -end - -@inline function _fs_helper(m::CM, w::AbstractDict{<:Any, <:Real}, β::Real, - average::MacroAvg, return_type::Type{U}) where U - return _mean(_fs_helper(m, w, β, no_avg, Vector)) -end - -@inline function _fs_helper(m::CM, w::AbstractDict{<:Any, <:Real}, β::Real, - average::MicroAvg, return_type::Type{U}) where U - @warn W_PROMOTE_WARN - return _fs_helper(m, w, β, macro_avg, return_type) -end - -function (f::MulticlassFScore)(m::CM, class_w::AbstractDict{<:Any, <:Real}) - return _fs_helper(m, class_w, f.β, f.average, f.return_type) -end - -## Callables on arrays - -for M_ex in (:MulticlassTruePositive, :MulticlassTrueNegative, - :MulticlassFalsePositive, :MulticlassFalseNegative) - @eval call(m::$M_ex, ŷ, y) = m(_confmat(ŷ, y, warn=false)) -end - -for M_ex in (:MTPR, :MTNR, :MFPR, :MFNR, :MFDR, :MulticlassPrecision, :MNPV, - :MulticlassFScore) - @eval call(m::$M_ex, ŷ, y) = m(_confmat(ŷ, y, warn=false)) - @eval call(m::$M_ex, ŷ, y, class_w::AbstractDict{<:Any, <:Real}) = - m(_confmat(ŷ, y, warn=false), class_w) -end diff --git a/src/measures/loss_functions_interface.jl b/src/measures/loss_functions_interface.jl deleted file mode 100644 index 5d7d6125..00000000 --- a/src/measures/loss_functions_interface.jl +++ /dev/null @@ -1,208 +0,0 @@ -# implementation of MLJ measure interface for LossFunctions.jl - -function naked(T::Type) - without_module_name = split(string(T), '.') |> last - without_type_parameters = split(without_module_name, '{') |> first - return Symbol(without_type_parameters) -end - -const WITHOUT_PARAMETERS = - setdiff(LOSS_FUNCTIONS, WITH_PARAMETERS) - -## WRAPPER - -abstract type SupervisedLoss <: Unaggregated end - - -struct MarginLoss{L<:LossFunctions.MarginLoss} <: SupervisedLoss - loss::L -end - -struct DistanceLoss{L<:LossFunctions.DistanceLoss} <: SupervisedLoss - loss::L -end - -# INTERFACE FOR EXTRACTING PARAMETERS - -# LossFunctions.jl does not have a uniform interface for extacting -# parameters, and hence: - -_parameter(loss::LossFunctions.DWDMarginLoss) = loss.q -_parameter(loss::LossFunctions.SmoothedL1HingeLoss) = loss.gamma -_parameter(loss::LossFunctions.HuberLoss) = loss.d -_parameter(loss::LossFunctions.L1EpsilonInsLoss) = loss.ε -_parameter(loss::LossFunctions.L2EpsilonInsLoss) = loss.ε -_parameter(::LossFunctions.LPDistLoss{P}) where P = P -_parameter(::LossFunctions.L1DistLoss) = 1 -_parameter(::LossFunctions.L2DistLoss) = 2 -_parameter(loss::LossFunctions.QuantileLoss) = loss.τ - - -## CONSTRUCTORS AND CALLING BEHAVIOUR - -err_wrap(n) = ArgumentError("Bad @wrap syntax: $n. ") - -# We define amacro to wrap a concrete `LossFunctions.SupervisedLoss` -# type and define its constructor, and to define property access in -# case of parameters; the macro also defines calling behaviour: -macro wrap_loss(ex) - ex.head == :call || throw(err_wrap(1)) - Loss_ex = ex.args[1] - Loss_str = string(Loss_ex) - if Loss_ex in MARGIN_LOSSES - T = :MarginLoss - else - T = :DistanceLoss - end - - # bind name to wrapped version of LossFunctions loss: - program = quote - const $Loss_ex = $T{<:LossFunctions.$Loss_ex} - name(M::Type{<:$Loss_ex}) = $Loss_str - end - - # defined instances - alias = snakecase(string(Loss_ex)) - push!(program.args, quote - instances(::Type{<:$Loss_ex}) = [$alias, ] - end) - - # define kw constructor and expose any parameter as a property: - if length(ex.args) == 1 - push!(program.args, quote - $Loss_ex() = $T(LossFunctions.$Loss_ex()) - Base.propertynames(::$Loss_ex) = () - end) - elseif length(ex.args) > 1 - sub_ex = ex.args[2] - sub_ex.head == :parameters || throw(err_wrap(2)) - length(sub_ex.args) == 1 || throw(err_wrap("Only 1 kwarg supported")) - sub_ex.args[1].head == :kw || throw(err_wrap(3)) - var_ex = sub_ex.args[1].args[1] - var_str = string(var_ex) - val_ex = sub_ex.args[1].args[2] - push!(program.args, quote - $Loss_ex(; $var_ex=$val_ex) = - $T(LossFunctions.$Loss_ex($var_ex)) - $Loss_ex(p) = $Loss_ex($var_ex=p) - Base.propertynames(::$Loss_ex) = (Symbol($var_str), ) - function Base.getproperty(wrapper::$Loss_ex, name::Symbol) - if name === Symbol($var_str) - return _parameter(getfield(wrapper, :loss)) # see below - end - error("type $($Loss_ex) has no property $name") - end - end) - else - throw(err_wrap(4)) - end - - esc(program) -end - -for Loss in WITHOUT_PARAMETERS - eval(:(@wrap_loss $Loss())) -end - -@wrap_loss DWDMarginLoss(; q=1.0) -@wrap_loss SmoothedL1HingeLoss(; gamma=1.0) -@wrap_loss HuberLoss(; d=1.0) -@wrap_loss L1EpsilonInsLoss(; ε=1.0) -@wrap_loss L2EpsilonInsLoss(; ε=1.0) -@wrap_loss LPDistLoss(; P=2) -@wrap_loss QuantileLoss(; τ=0.7) - - -## GENERIC TRAITS - -const LossFunctions = LossFunctions -is_measure_type(::Type{<:SupervisedLoss}) = true -orientation(::Type{<:SupervisedLoss}) = :loss -reports_each_observation(::Type{<:SupervisedLoss}) = true -is_feature_dependent(::Type{<:SupervisedLoss}) = false -supports_weights(::Type{<:SupervisedLoss}) = true -docstring(M::Type{<:SupervisedLoss}) = name(M) - - -## CALLING - DISTANCE BASED LOSS FUNCTIONS - -MMI.prediction_type(::Type{<:DistanceLoss}) = :deterministic -MMI.target_scitype(::Type{<:DistanceLoss}) = Union{Vec{Continuous},Vec{Count}} - -call(measure::DistanceLoss, yhat, y) = - (getfield(measure, :loss)).(yhat, y) - -function call(measure::DistanceLoss, yhat, y, w::AbstractArray) - return w .* call(measure, yhat, y) -end - - -## CALLING - MARGIN BASED LOSS FUNCTIONS - -MMI.prediction_type(::Type{<:MarginLoss}) = :probabilistic -MMI.target_scitype(::Type{<:MarginLoss}) = AbstractArray{<:Finite{2}} - -# rescale [0, 1] -> [-1, 1]: -_scale(p) = 2p - 1 - -function call(measure::MarginLoss, yhat, y) - probs_of_observed = broadcast(pdf, yhat, y) - loss = getfield(measure, :loss) - return loss.(_scale.(probs_of_observed), 1) -end - -call(measure::MarginLoss, yhat, y, w::AbstractArray) = - w .* call(measure, yhat, y) - - -## ADJUSTMENTS - -human_name(::Type{<:L1EpsilonInsLoss}) = "l1 ϵ-insensitive loss" -human_name(::Type{<:L2EpsilonInsLoss}) = "l2 ϵ-insensitive loss" -human_name(::Type{<:DWDMarginLoss}) = "distance weighted discrimination loss" - -_signature(::Any) = "" -_signature(::Type{<:HuberLoss}) = "`HuberLoss(; d=1.0)`" -_signature(::Type{<:DWDMarginLoss}) = "`DWDMarginLoss(; q=1.0)`" -_signature(::Type{<:SmoothedL1HingeLoss}) = "`SmoothedL1HingeLoss(; gamma=1.0)`" -_signature(::Type{<:L1EpsilonInsLoss}) = "`L1EpsilonInsLoss(; ε=1.0)`" -_signature(::Type{<:L2EpsilonInsLoss}) = "`L2EpsilonInsLoss(; ε=1.0)`" -_signature(::Type{<:LPDistLoss}) = "`LPDistLoss(; P=2)`" -_signature(::Type{<:QuantileLoss}) = "`QuantileLoss(; τ=0.7)`" - - -## ALIASES AND DOCSTRINGS - -const DOC_LOSS_FUNCTIONS = -""" -For more detail, see the original LossFunctions.jl documentation *but -note differences in the signature.* - -Losses from LossFunctions.jl do not support `missing` values. To use -with `missing` values, replace `(ŷ, y)` with `skipinvalid(ŷ, y))`. -""" - -for Loss_ex in DISTANCE_LOSSES - eval(quote - sig = _signature($Loss_ex) - isempty(sig) || (sig = "Constructor signature: "*sig) - @create_aliases $Loss_ex - @create_docs($Loss_ex, - typename = name($Loss_ex), - body=DOC_LOSS_FUNCTIONS, - footer=sig) - end) -end - -for Loss_ex in MARGIN_LOSSES - eval(quote - sig = _signature($Loss_ex) - isempty(sig) || (sig = "Constructor signature: "*sig) - @create_aliases $Loss_ex - @create_docs($Loss_ex, - typename = name($Loss_ex), - body=DOC_LOSS_FUNCTIONS, - scitype=DOC_FINITE_BINARY, - footer= sig) - end) -end diff --git a/src/measures/measure_search.jl b/src/measures/measure_search.jl deleted file mode 100644 index bd813009..00000000 --- a/src/measures/measure_search.jl +++ /dev/null @@ -1,65 +0,0 @@ -const LOCAL_MEASURE_TYPES = filter(x->x != SupervisedLoss, - vcat(subtypes(MLJBase.Unaggregated), - subtypes(MLJBase.Aggregated))) - -const LOSS_FUNCTIONS_MEASURE_TYPES = - [eval(:($Loss)) for Loss in LOSS_FUNCTIONS] - -const MEASURE_TYPES = vcat(LOCAL_MEASURE_TYPES, LOSS_FUNCTIONS_MEASURE_TYPES) - -const MeasureProxy = NamedTuple{Tuple(MEASURE_TRAITS)} - -function Base.show(stream::IO, p::MeasureProxy) - instances = "["*join(p.instances, ", ")*"]" - print(stream, "(name = $(p.name), instances = $instances, ...)") -end - -function Base.show(stream::IO, ::MIME"text/plain", p::MeasureProxy) - printstyled(IOContext(stream, :color=> MLJBase.SHOW_COLOR[]), - p.docstring, bold=false, color=:magenta) - println(stream) - MLJBase.fancy_nt(stream, p) -end - -""" - measures() - -List all measures as named-tuples keyed on measure traits. - - measures(filters...) - -List all measures compatible with the target `y`. - - measures(needle::Union{AbstractString,Regex} - -List all measures with `needle` in a measure's `name`, `instances`, or -`docstring` - - -### Example - -Find all classification measures supporting sample weights: - - measures(m -> m.target_scitype <: AbstractVector{<:Finite} && - m.supports_weights) - -Find all measures in the "rms" family: - - measures("rms") - -""" -function measures(conditions...) - all_measures = map(info, MEASURE_TYPES) - return filter(all_measures) do measure - all(c(measure) for c in conditions) - end -end - -function measures(needle::Union{AbstractString,Regex}) - f = m -> occursin(needle, m.name) || - occursin(needle, m.docstring) || - occursin(needle, join(m.instances, " ")) - return MLJBase.measures(f) -end - -measures() = measures(x->true) diff --git a/src/measures/measures.jl b/src/measures/measures.jl deleted file mode 100644 index 3c23a4f9..00000000 --- a/src/measures/measures.jl +++ /dev/null @@ -1,302 +0,0 @@ -const PROPER_SCORING_RULES = "[Gneiting and Raftery (2007), \"Strictly"* - "Proper Scoring Rules, Prediction, and Estimation\""* - "](https://doi.org/10.1198/016214506000001437)" -const DOC_FINITE = - "`AbstractArray{<:Union{Finite,Missing}` (multiclass classification)" -const DOC_FINITE_BINARY = - "`AbstractArray{<:Union{Finite{2},Missing}}` (binary classification)" -const DOC_ORDERED_FACTOR = - "`AbstractArray{<:Union{OrderedFactor,Missing}}` (classification of ordered target)" -const DOC_ORDERED_FACTOR_BINARY = - "`AbstractArray{<:Union{OrderedFactor{2},Missing}}` "* - "(binary classification where choice of \"true\" effects the measure)" -const DOC_CONTINUOUS = "`AbstractArray{<:Union{Continuous,Missing}}` (regression)" -const DOC_COUNT = "`AbstractArray{<:Union{Count,Missing}}`" -const DOC_MULTI = "`AbtractArray{<:Union{Missing,T}` where `T` is `Continuous` "* - "or `Count` (for respectively continuous or discrete Distribution.jl objects in "* - "`ŷ`) or `OrderedFactor` or `Multiclass` "* - "(for `UnivariateFinite` distributions in `ŷ`)" - -const DOC_INFINITE = "`AbstractArray{<:Union{Infinite,Missing}}`" -const INVARIANT_LABEL = - "This metric is invariant to class reordering." -const VARIANT_LABEL = - "This metric is *not* invariant to class re-ordering" - -is_measure_type(::Any) = false - -# Each of the following traits, with fallbacks defined in -# StatisticalTraits.jl, make sense for some or all measures: - -const MEASURE_TRAITS = [ - :name, - :instances, - :human_name, - :target_scitype, - :supports_weights, - :supports_class_weights, - :prediction_type, - :orientation, - :reports_each_observation, - :aggregation, - :is_feature_dependent, - :docstring, - :distribution_type -] - -# # FOR BUILT-IN MEASURES (subtyping Measure) - -abstract type Measure <: MLJType end -abstract type Aggregated <: Measure end -abstract type Unaggregated <: Measure end - -StatisticalTraits.reports_each_observation(::Type{<:Aggregated}) = false -StatisticalTraits.reports_each_observation(::Type{<:Unaggregated}) = true - - -# # FALLBACK CHECKS -extra_check(::Measure, args...) = nothing -function _check(measure::Measure, yhat, y) - check_dimensions(yhat, y) - extra_check(measure, yhat, y) -end -function _check(measure::Measure, yhat, y, w) - check_dimensions(yhat, y) - extra_check(measure, yhat, y, w) -end -function _check(measure::Measure, yhat, y, w::Arr) - check_dimensions(yhat, y) - check_dimensions(y, w) - extra_check(measure, yhat, y, w) -end -function _check(measure::Measure, yhat::Arr{<:UnivariateFinite}) - check_dimensions(yhat, y) - check_pools(yhat, y) - extra_check(measure, yhat, y) -end - -function _check( - measure::Measure, - yhat::Arr{<:UnivariateFinite}, - y, - w::Arr -) - check_dimensions(yhat, y) - check_pools(yhat, y) - extra_check(measure, yhat, y, w) -end - -function _check( - measure::Measure, - yhat::Arr{<:UnivariateFinite}, - y, - w::AbstractDict -) - check_dimensions(yhat, y) - check_pools(yhat, y) - check_pools(yhat, w) - extra_check(measure, yhat, y, w) -end - -# # METHODS TO EVALUATE MEASURES - -# See measures/README.md for details - -# `robust_single` can accept `missing` observations/predictions but is never overloaded; -# `single` is overloaded but does not need to handle missings. This factoring allows us -# to avoid method ambiguities which are cumbersome to avoid with only one function. - -robust_single(args...) = single(args...) -robust_single(m, ::Missing, ::Missing) = missing -robust_single(m, ::Missing, η) = missing -robust_single(m, η̂, ::Missing) = missing - -const Label = Union{CategoricalValue, Number, AbstractString, Symbol, AbstractChar} - -# closure for broadcasting: -robust_single(measure::Measure) = (ηhat, η) -> robust_single(measure, ηhat, η) - -call(measure::Unaggregated, yhat, y) = broadcast(robust_single(measure), yhat, y) -function call(measure::Unaggregated, yhat, y, w::AbstractArray) - unweighted = broadcast(robust_single(measure), yhat, y) - return w .* unweighted -end -function call(measure::Unaggregated, yhat, y, weight_given_class::AbstractDict) - unweighted = broadcast(robust_single(measure), yhat, y) - w = @inbounds broadcast(η -> weight_given_class[η], y) - return w .* unweighted -end - -# ## Top level -function (measure::Measure)(args...) - _check(measure, args...) - call(measure, args...) -end - -# # TRAITS - -# user-bespoke measures will subtype `Measure` directly and the -# following will therefore not apply: -StatisticalTraits.supports_weights(::Type{<:Union{Aggregated, Unaggregated}}) = true - -is_measure_type(::Type{<:Measure}) = true -is_measure(m) = is_measure_type(typeof(m)) - -# docstring fall-back: -_decorate(s::AbstractString) = "`$s`" -_decorate(v::Vector{<:AbstractString}) = join(_decorate.(v), ", ") -function MMI.docstring(M::Type{<:Measure}) - list = _decorate(instances(M)) - ret = "`$(name(M))` - $(human_name(M)) type" - isempty(list) || (ret *= " with instances $list") - ret *= ". " - return ret -end - -# display: -show_as_constructed(::Type{<:Measure}) = true - -# info -function StatisticalTraits.info(M::Type{<:Measure}) - values = Tuple(@eval($trait($M)) for trait in MEASURE_TRAITS) - return NamedTuple{Tuple(MEASURE_TRAITS)}(values) -end - -StatisticalTraits.info(m::Measure) = StatisticalTraits.info(typeof(m)) - - -# # AGGREGATION - -(::Sum)(v) = sum(skipinvalid(v)) -(::Sum)(v::LittleDict) = sum(values(v)) - -(::Mean)(v) = mean(skipinvalid(v)) -(::Mean)(v::LittleDict) = mean(values(v)) - -(::RootMeanSquare)(v) = sqrt(mean(skipinvalid(v).^2)) - -aggregate(v, measure) = aggregation(measure)(v) - -# aggregation is no-op on scalars: -const MeasureValue = Union{Real,Tuple{<:Real,<:Real}} # number or interval -aggregate(x::MeasureValue, measure) = x - - -# # UNIVERSAL CALLING SYNTAX - -# yhat - predictions (point or probabilisitic) -# X - features -# y - target observations -# w - per-observation weights - -function value(measure, yhat, X, y, w) - vfdep = Val(is_feature_dependent(measure)) - vsweights = Val(supports_weights(measure) || - supports_class_weights(measure)) - return value(measure, yhat, X, y, w, vfdep, vsweights) -end - -# # UNIVERSAL CALLING INTERFACE - -# is feature independent, weights not supported: -value(m, yhat, X, y, w, ::Val{false}, ::Val{false}) = m(yhat, y) - -# is feature dependent:, weights not supported: -value(m, yhat, X, y, w, ::Val{true}, ::Val{false}) = m(yhat, X, y) - -# is feature independent, weights supported: -value(m, yhat, X, y, w, ::Val{false}, ::Val{true}) = m(yhat, y, w) -value(m, yhat, X, y, ::Nothing, ::Val{false}, ::Val{true}) = m(yhat, y) - -# is feature dependent, weights supported: -value(m, yhat, X, y, w, ::Val{true}, ::Val{true}) = m(yhat, X, y, w) -value(m, yhat, X, y, ::Nothing, ::Val{true}, ::Val{true}) = m(yhat, X, y) - -# # helpers - -_scale(x, w::Arr, i) = x*w[i] -_scale(x, ::Nothing, i::Any) = x - -function check_pools(ŷ, y) - levels(y) == levels(ŷ[1]) || - error("Conflicting categorical pools found "* - "in observations and predictions. ") - return nothing -end - -function check_pools(ŷ, w::AbstractDict) - Set(levels(ŷ[1])) == Set(keys(w)) || - error("Conflicting categorical pools found "* - "in class weights and predictions. ") - return nothing -end - -# # INCLUDE SPECIFIC MEASURES AND TOOLS - -include("meta_utilities.jl") -include("roc.jl") -include("confusion_matrix.jl") -include("continuous.jl") -include("finite.jl") -include("probabilistic.jl") -include("loss_functions_interface.jl") - - -# # DEFAULT MEASURES - -default_measure(T, S) = _default_measure(T, nonmissingtype(S)) - -_default_measure(T, S) = nothing - -# Deterministic + Continuous / Count ==> RMS -function _default_measure( - ::Type{<:Deterministic}, - ::Type{<:Union{Vec{<:Continuous}, Vec{<:Count}}}, -) - return rms -end - -# Deterministic + Finite ==> Misclassification rate -function _default_measure( - ::Type{<:Deterministic}, - ::Type{<:Vec{<:Finite}}, -) - return misclassification_rate -end - -# Probabilistic + Finite / Count ==> log loss -function _default_measure( - ::Type{<:Probabilistic}, - ::Type{<:Union{Vec{<:Finite},Vec{<:Count}}}, -) - return log_loss -end - -# Probabilistic + Continuous ==> Log loss -function _default_measure( - ::Type{<:Probabilistic}, - ::Type{<:Vec{<:Continuous}}, -) - return log_loss -end - -function _default_measure( - ::Type{<:MMI.ProbabilisticDetector}, - ::Type{<:Vec{<:OrderedFactor{2}}}, -) - return area_under_curve -end - -function _default_measure( - ::Type{<:MMI.DeterministicDetector}, - ::Type{<:Vec{<:OrderedFactor{2}}}, -) - return balanced_accuracy -end - -# Fallbacks -default_measure(M::Type{<:Supervised}) = default_measure(M, target_scitype(M)) -default_measure(::M) where M <: Supervised = default_measure(M) - -default_measure(M::Type{<:Annotator}) = _default_measure(M, target_scitype(M)) -default_measure(::M) where M <: Annotator = default_measure(M) diff --git a/src/measures/meta_utilities.jl b/src/measures/meta_utilities.jl deleted file mode 100644 index 3b0de197..00000000 --- a/src/measures/meta_utilities.jl +++ /dev/null @@ -1,233 +0,0 @@ -const DOC_OBSERVATIONS = - "on predictions `ŷ`, "* - "given ground truth observations `y`. " -const DOC_WEIGHTS = - "Optionally specify per-sample weights, `w`. " -const DOC_CLASS_WEIGHTS = - "An optional `AbstractDict`, denoted `class_w` above, "* - "keyed on `levels(y)`, specifies class weights. " - -macro create_aliases(M_ex) - esc(quote - M = $M_ex - for alias in Symbol.(instances(M)) - # isdefined(parentmodule(M), alias) || eval(:(const $alias = $M())) - eval(:(const $alias = $M())) - end - end) -end - -function detailed_doc_string(M; typename="", body="", footer="", scitype="") - - _instances = _decorate(instances(M)) - human_name = MLJBase.human_name(M) - if isempty(scitype) - scitype = "`$(target_scitype(M))`" - end - - if isempty(typename) - ret = " $M\n\n" - else - ret = " MLJBase.$typename\n\n" - end - - ret *= "A measure type for $(human_name)" - isempty(_instances) || - (ret *= ", which includes the instance(s): "* - "$_instances") - ret *= ".\n\n" - ret *= " $(name(M))()(ŷ, y)\n" - supports_weights(M) && - (ret *= " $(name(M))()(ŷ, y, w)\n") - supports_class_weights(M) && - (ret *= " $(name(M))()(ŷ, y, class_w)\n") - ret *= "\n" - if isempty(fieldnames(M)) - ret *= "Evaluate the $(human_name) " - else - ret *= "Evaluate the default instance of $(name(M)) " - end - ret *= "$DOC_OBSERVATIONS" - supports_weights(M) && - (ret *= DOC_WEIGHTS) - supports_class_weights(M) && - (ret *= DOC_CLASS_WEIGHTS) - ret *= "\n\n" - isempty(body) || (ret *= "$body\n\n") - ret *= "Requires `scitype(y)` to be a subtype of $scitype; " - ret *= "`ŷ` must be an array of `$(prediction_type(M))` predictions. " - isempty(footer) ||(ret *= "\n\n$footer") - ret *= "\n\n" - ret *= "For more information, run `info($(name(M)))`. " - return ret -end - - -_err_create_docs() = error( - "@create_docs syntax error. Usage: \n"* - "@create_docs(MeasureType, typename=..., body=..., scitype=..., footer=...") -macro create_docs(M_ex, exs...) - M_ex isa Symbol || _err_create_docs() - t = "" - b = "" - s = "" - f = "" - for ex in exs - ex.head == :(=) || _err_create_docs() - ex.args[1] == :typename && (t = ex.args[2]) - ex.args[1] == :body && (b = ex.args[2]) - ex.args[1] == :scitype && (s = ex.args[2]) - ex.args[1] == :footer && (f = ex.args[2]) - end - esc(quote - "$(detailed_doc_string($M_ex, typename=$t, body=$b, scitype=$s, footer=$f))" - function $M_ex end - end) -end - -# TODO: I wonder why this is not a macro? - -""" - metadata_measure(T; kw...) - -Helper function to write the metadata (trait definitions) for a single -measure. - -### Compulsory keyword arguments - -- `target_scitype`: The allowed scientific type of `y` in `measure(ŷ, - y, ...)`. This is typically some abstract array. E.g, in single - target variable regression this is typically - `AbstractArray{<:Union{Missing,Continuous}}`. For a binary - classification metric insensitive to class order, this would - typically be `Union{AbstractArray{<:Union{Missing,Multiclass{2}}}, - AbstractArray{<:Union{Missing,OrderedFactor{2}}}}`, which has the - alias `FiniteArrMissing`. - -- `orientation`: Orientation of the measure. Use `:loss` when lower is - better and `:score` when higher is better. For example, set - `:loss` for root mean square and `:score` for area under the ROC - curve. - -- `prediction_type`: Refers to `ŷ` in `measure(ŷ, y, ...)` and should - be one of: `:deterministic` (`ŷ` has same type as `y`), - `:probabilistic` or `:interval`. - - -#### Optional keyword arguments - -The following have meaningful defaults but may still require -overloading: - -- `instances`: A vector of strings naming the built-in instances of - the measurement type provided by the implementation, which are - usually just common aliases for the default instance. E.g., for - `RSquared` has the `instances = ["rsq", "rsquared"]` which are both - defined as `RSquared()` in the implementation. `MulticlassFScore` - has the `instances = ["macro_f1score", "micro_f1score", - "multiclass_f1score"]`, where `micro_f1score = - MulticlassFScore(average=micro_avg)`, etc. Default is `String[]`. - -- `aggregation`: Aggregation method for measurements, typically - `Mean()` (for, e.g., mean absolute error) or `Sum()` (for number - of true positives). Default is `Mean()`. Must subtype - `StatisticalTraits.AggregationMode`. It is used to: - - - aggregate measurements in resampling (e.g., cross-validation) - - - aggregating per-observation measurements returned by `single` in - the fallback definition of `call` for `Unaggregated` measures - (such as area under the ROC curve). - -- `supports_weights`: Whether the measure can be called with - per-observation weights `w`, as in `l2(ŷ, y, w)`. Default is `true`. - -- `supports_class_weights`: Whether the measure can be called with a - class weight dictionary `w`, as in `micro_f1score(ŷ, y, w)`. Default - is `true`. Default is `false`. - -- `human_name`: Ordinary name of measure. Used in the full - auto-generated docstring, which begins "A measure type for - \$human_name ...". Eg, the `human_name` for `TruePositive` is `number - of true positives. Default is snake-case version of type name, with - underscores replaced by spaces; so `MeanAbsoluteError` becomes "mean - absolute error". - -- `docstring`: An abbreviated docstring, displayed by - `info(measure)`. Fallback uses `human_name` and lists the - `instances`. - -""" -function metadata_measure(T; name::String="", - human_name="", - instances::Vector{String}=String[], - target_scitype=Unknown, - prediction_type::Symbol=:unknown, - orientation::Symbol=:unknown, - aggregation=Mean(), - is_feature_dependent::Bool=false, - supports_weights::Bool=true, - supports_class_weights::Bool=false, - docstring::String="", - distribution_type=Unknown) - pred_str = "$prediction_type" - orientation_str = "$orientation" -# dist = ifelse(ismissing(distribution_type), missing, "$distribution_type") - ex = quote - - # traits common with models: - if !isempty($name) - StatisticalTraits.name(::Type{<:$T}) = $name - end - if !isempty($docstring) - StatisticalTraits.docstring(::Type{<:$T}) = $docstring - end - StatisticalTraits.target_scitype(::Type{<:$T}) = $target_scitype - StatisticalTraits.prediction_type(::Type{<:$T}) = Symbol($pred_str) - StatisticalTraits.supports_weights(::Type{<:$T}) = $supports_weights - - # traits specific to measures: - if !isempty($instances) - StatisticalTraits.instances(::Type{<:$T}) = $instances - end - if !isempty($human_name) - StatisticalTraits.human_name(::Type{<:$T}) = $human_name - end - StatisticalTraits.orientation(::Type{<:$T}) = Symbol($orientation_str) - StatisticalTraits.aggregation(::Type{<:$T}) = $aggregation - StatisticalTraits.is_feature_dependent(::Type{<:$T}) = - $is_feature_dependent - StatisticalTraits.supports_class_weights(::Type{<:$T}) = - $supports_class_weights - StatisticalTraits.distribution_type(::Type{<:$T}) = $distribution_type - - end - parentmodule(T).eval(ex) -end - -""" - - measures_for_export() - -Return a list of the symbolic representation of all: - -- measure types (subtypes of `Aggregated` and `Unaggregated`) measure - -- type aliases (as defined by the constant - `MLJBase.MEASURE_TYPE_ALIASES`) - -- all built-in measure instances (as declared by `instances` trait) - -""" -function measures_for_export() - ret = MLJBase.MEASURE_TYPE_ALIASES - for m in measures() - name = m.name |> Symbol - push!(ret, name) - for instance in m.instances - alias = Symbol(instance) - push!(ret, alias) - end - end - return ret -end diff --git a/src/measures/probabilistic.jl b/src/measures/probabilistic.jl deleted file mode 100644 index 11c3bcdf..00000000 --- a/src/measures/probabilistic.jl +++ /dev/null @@ -1,423 +0,0 @@ -const DOC_DISTRIBUTIONS = -""" -In the case the predictions `ŷ` are continuous probability -distributions, such as `Distributions.Normal`, replace the above sum -with an integral, and interpret `p` as the probablity density -function. In case of discrete distributions over the integers, such as -`Distributions.Poisson`, sum over all integers instead of `C`. -""" -const WITH_L2NORM_CONTINUOUS = - [@eval(Distributions.$d) for d in [ - :Chisq, - :Gamma, - :Beta, - :Chi, - :Cauchy, - :Normal, - :Uniform, - :Logistic, - :Exponential]] - -const WITH_L2NORM_COUNT = - [@eval(Distributions.$d) for d in [ - :Poisson, - :DiscreteUniform, - :DiscreteNonParametric]] - -const WITH_L2NORM = vcat([UnivariateFinite, ], - WITH_L2NORM_CONTINUOUS, - WITH_L2NORM_COUNT) - -const UD = Distributions.UnivariateDistribution - -# ======================================================== -# AGGREGATED MEASURES - -# --------------------------------------------------------- -# AreaUnderCurve - -# Implementation based on the Mann-Whitney U statistic. -# see https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve -# and https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test#Area_under_curve_(AUC)_statistic_for_ROC_curves - - -struct AreaUnderCurve <: Aggregated end - -metadata_measure(AreaUnderCurve; - human_name = "area under the ROC", - instances = ["area_under_curve", "auc"], - target_scitype = FiniteArrMissing{2}, - prediction_type = :probabilistic, - orientation = :score, - supports_weights = false, - distribution_type = UnivariateFinite) - -const AUC = AreaUnderCurve -@create_aliases AreaUnderCurve - -@create_docs(AreaUnderCurve, -body= -""" -Returns the area under the ROC ([receiver operator -characteristic](https://en.wikipedia.org/wiki/Receiver_operating_characteristic)) - -If `missing` or `NaN` values are present, use `auc(skipinvalid(yhat, y)...)`. - -$INVARIANT_LABEL -""", -scitpye = DOC_FINITE_BINARY) - -# core algorithm: -function _auc(ŷ, y) - lab_pos = classes(ŷ)[2] # 'positive' label - scores = pdf.(ŷ, lab_pos) # associated scores - ranks = StatsBase.tiedrank(scores) - n = length(y) - n_neg = 0 # to keep of the number of negative preds - T = eltype(ranks) - R_pos = zero(T) # sum of positive ranks - @inbounds for (i,j) in zip(eachindex(y), eachindex(ranks)) - if y[i] == lab_pos - R_pos += ranks[j] - else - n_neg += 1 - end - end - n_pos = n - n_neg # number of positive predictions - U = R_pos - T(0.5)*n_pos*(n_pos + 1) # Mann-Whitney U statistic - return U / (n_neg * n_pos) -end - -# Missing values not supported, but allow `Missing` in eltype, because -# `skipinvalid(yhat, y)` does not tighten the type. See doc string above. - -call(::AUC, ŷ, y) = _auc(ŷ, y) - -# ======================================================== -# UNAGGREGATED MEASURES - -# --------------------------------------------------------------------- -# LogScore - -struct LogScore{R <: Real} <: Unaggregated - tol::R -end -LogScore(;eps=eps(), tol=eps) = LogScore(tol) - -metadata_measure(LogScore; - instances = ["log_score", ], - target_scitype = Union{ - Arr{<:Union{Missing,Multiclass}}, - Arr{<:Union{Missing,OrderedFactor}}, - Arr{<:Union{Missing,Continuous}}, - Arr{<:Union{Missing,Count}}}, - prediction_type = :probabilistic, - orientation = :score, - distribution_type = Union{WITH_L2NORM...}) - -@create_aliases LogScore - -@create_docs(LogScore, -body= -""" -Since the score is undefined in the case that the true observation is -predicted to occur with probability zero, probablities are clamped -between `tol` and `1-tol`, where `tol` is a constructor key-word -argument. - -If `p` is the predicted probability mass or density function -corresponding to a *single* ground truth observation `η`, then the -score for that example is - - log(clamp(p(η), tol), 1 - tol) - -For example, for a binary target with "yes"/"no" labels, and -predicted probability of "yes" equal to 0.8, an observation of "no" -scores `log(0.2)`. - -The predictions `ŷ` should be an array of `UnivariateFinite` -distributions in the case of `Finite` target `y`, and otherwise a -supported `Distributions.UnivariateDistribution` such as `Normal` or -`Poisson`. - -See also [`LogLoss`](@ref), which differs only in sign. -""", -scitype=DOC_MULTI) - -# for single finite observation: -single(c::LogScore, d::UnivariateFinite, η) = - log(clamp(pdf(d, η), c.tol, 1 - c.tol)) - -# for a single infinite observation: -single(c::LogScore, d::Distributions.UnivariateDistribution, η) = - log(clamp(pdf(d, η), c.tol, 1 - c.tol)) - -# to resolve method ambiguities: -single(::LogScore, ::UnivariateFinite, ::Missing) = missing -single(::LogScore, ::Distributions.UnivariateDistribution, ::Missing) = missing -single(::LogScore, ::Missing, ::Missing) = missing - -# performant broadasting in case of UnivariateFiniteArray: -call(c::LogScore, ŷ::UnivariateFiniteArray, y) = - log.(clamp.(broadcast(pdf, ŷ, y), c.tol, 1 - c.tol)) -call(c::LogScore, ŷ::UnivariateFiniteArray, y, w::AbstractArray) = call(c, ŷ, y) .* w - -# --------------------------------------------------------------------- -# LogLoss - -struct LogLoss{R <: Real} <: Unaggregated - tol::R -end -LogLoss(;eps=eps(), tol=eps) = LogLoss(tol) - -metadata_measure(LogLoss; - instances = ["log_loss", "cross_entropy"], - target_scitype = Union{ - Arr{<:Union{Missing,Multiclass}}, - Arr{<:Union{Missing,OrderedFactor}}, - Arr{<:Union{Missing,Continuous}}, - Arr{<:Union{Missing,Count}}}, - prediction_type = :probabilistic, - orientation = :loss, - distribution_type = Union{WITH_L2NORM...}) - -const CrossEntropy = LogLoss -@create_aliases LogLoss - -@create_docs(LogLoss, -body= -""" -For details, see [`LogScore`](@ref), which differs only by a sign. -""", -scitype=DOC_MULTI) - -# for single observation: -single(c::LogLoss, d, η) = -single(LogScore(tol=c.tol), d, η) - -# to get performant broadasting in case of UnivariateFiniteArray: -call(c::LogLoss, ŷ::UnivariateFiniteArray, y) = - -call(LogScore(tol=c.tol), ŷ, y) -call(c::LogLoss, ŷ::UnivariateFiniteArray, y, w::AbstractArray) = - -call(LogScore(tol=c.tol), ŷ, y, w) - - -# ----------------------------------------------------- -# BrierScore - -struct BrierScore <: Unaggregated end - -metadata_measure(BrierScore; - human_name = "Brier score (a.k.a. quadratic score)", - instances = ["brier_score",], - target_scitype = Union{ - Arr{<:Union{Missing,Multiclass}}, - Arr{<:Union{Missing,OrderedFactor}}, - Arr{<:Union{Missing,Continuous}}, - Arr{<:Union{Missing,Count}}}, - prediction_type = :probabilistic, - orientation = :score, - distribution_type = Union{WITH_L2NORM...}) - -@create_aliases BrierScore - -@create_docs(BrierScore, -body= -""" -Convention as in $PROPER_SCORING_RULES - -*Finite case.* If `p` is the predicted probability mass function for a -*single* observation `η`, and `C` all possible classes, then the -corresponding score for that observation is given by - -``2p(η) - \\left(\\sum_{c ∈ C} p(c)^2\\right) - 1`` - -*Warning.* `BrierScore()` is a "score" in the sense that bigger is -better (with `0` optimal, and all other values negative). In Brier's -original 1950 paper, and many other places, it has the opposite sign, -despite the name. Moreover, the present implementation does not treat -the binary case as special, so that the score may differ in the binary -case by a factor of two from usage elsewhere. - -*Infinite case.* Replacing the sum above with an integral does *not* -lead to the formula adopted here in the case of `Continuous` or -`Count` target `y`. Rather the convention in the paper cited above is -adopted, which means returning a score of - -``2p(η) - ∫ p(t)^2 dt`` - -in the `Continuous` case (`p` the probablity density function) or - -``2p(η) - ∑_t p(t)^2`` - -in the `Count` cae (`p` the probablity mass function). -""", -scitype=DOC_MULTI) - -# calling on single finite observation: -function single(::BrierScore, - d::UnivariateFinite, - η) - levels = classes(d) - pvec = broadcast(pdf, d, levels) - offset = 1 + sum(pvec.^2) - return 2 * pdf(d, η) - offset -end - -# calling on a single infinite observation: -single(::BrierScore, d::Distributions.UnivariateDistribution, η) = - 2*pdf(d, η) - Distributions.pdfsquaredL2norm(d) - -# To get performant broadcasted version in case of UnivariateFiniteArray: -function call( - ::BrierScore, - ŷ::UnivariateFiniteArray, - y - ) - - probs = pdf(ŷ, classes(first(ŷ))) - offset = 1 .+ vec(sum(probs.^2, dims=2)) - - 2 .* broadcast(pdf, ŷ, y) .- offset -end -call(m::BrierScore, ŷ::UnivariateFiniteArray, y, w::AbstractArray) = call(m, ŷ, y) .* w - - -# ----------------------------------------------------- -# BrierLoss - -struct BrierLoss <: Unaggregated end - -metadata_measure(BrierLoss; - human_name = "Brier loss (a.k.a. quadratic loss)", - instances = ["brier_loss",], - target_scitype = Union{ - Arr{<:Union{Missing,Multiclass}}, - Arr{<:Union{Missing,OrderedFactor}}, - Arr{<:Union{Missing,Continuous}}, - Arr{<:Union{Missing,Count}}}, - prediction_type = :probabilistic, - orientation = :loss, - distribution_type = Union{WITH_L2NORM...}) - -@create_aliases BrierLoss - -@create_docs(BrierLoss, -body= -""" -For details, see [`BrierScore`](@ref), which differs only by a sign. -""", -scitype=DOC_MULTI) - -# calling on single observation: -single(::BrierLoss, d, η) = - single(BrierScore(), d, η) - -# to get performant broadcasting in case of UnivariateFiniteArray: -call(m::BrierLoss, ŷ::UnivariateFiniteArray, y) = - -call(BrierScore(), ŷ, y) -call(m::BrierLoss, ŷ::UnivariateFiniteArray, y, w::AbstractArray) = - -call(BrierScore(), ŷ, y, w) - - -# ----------------------------------------------------- -# SphericalScore - -struct SphericalScore{T<:Real} <: Unaggregated - alpha::T -end -SphericalScore(; alpha=2) = SphericalScore(alpha) - -metadata_measure(SphericalScore; - human_name = "Spherical score", - instances = ["spherical_score",], - target_scitype = Union{ - Arr{<:Union{Missing,Multiclass}}, - Arr{<:Union{Missing,OrderedFactor}}, - Arr{<:Union{Missing,Continuous}}, - Arr{<:Union{Missing,Count}}}, - prediction_type = :probabilistic, - orientation = :score, - distribution_type = Union{WITH_L2NORM...}) - -@create_aliases SphericalScore - -@create_docs(SphericalScore, -body= -""" -Convention as in $PROPER_SCORING_RULES: If `η` takes on a finite -number of classes `C` and ``p(η)` is the predicted probability for a -*single* observation `η`, then the corresponding score for that -observation is given by - -``p(y)^α / \\left(\\sum_{η ∈ C} p(η)^α\\right)^{1-α} - 1`` - -where `α` is the measure parameter `alpha`. - -$DOC_DISTRIBUTIONS - -""", -scitype=DOC_MULTI) - -# calling on single observations: -function single(s::SphericalScore, d::UnivariateFinite, η) - α = s.alpha - levels = classes(d) - pvec = broadcast(pdf, d, levels) - return (pdf(d, η)/norm(pvec, α))^(α - 1) -end - -single(s::SphericalScore, d::Distributions.UnivariateDistribution, η) = - pdf(d, η)/sqrt(Distributions.pdfsquaredL2norm(d)) - -# to compute the α-norm along last dimension: -_norm(A::AbstractArray{<:Any,N}, α) where N = - sum(x -> x^α, A, dims=N).^(1/α) - -# To get performant version in case of UnivariateFiniteArray: -function call( - s::SphericalScore, - ŷ::UnivariateFiniteArray, - y - ) - α = s.alpha - alphanorm(A) = _norm(A, α) - - predicted_probs = pdf(ŷ, classes(first(ŷ))) - - (broadcast(pdf, ŷ, y) ./ alphanorm(predicted_probs)).^(α - 1) -end -call(s::SphericalScore, ŷ::UnivariateFiniteArray, y, w::AbstractArray) = - call(s, ŷ, y) .* w - - -# --------------------------------------------------------------------------- -# Extra check for L2 norm based proper scoring rules - -err_l2_norm(m) = ArgumentError( - "Distribution not supported by $m. "* - "Supported distributions are "* - join(string.(map(s->"`$s`", WITH_L2NORM)), ", ", ", and ")) - -const ERR_UNSUPPORTED_ALPHA = ArgumentError( - "Only `alpha = 2` is supported, unless scoring a `Finite` target. ") - -# not for export: -const L2ProperScoringRules = Union{LogScore, - LogLoss, - BrierScore, - BrierLoss, - SphericalScore} - -function extra_check(measure::L2ProperScoringRules, yhat, args...) - - D = nonmissing(eltype(yhat)) - D <: Distributions.Distribution || D <: UnivariateFinite || - (D = typeof(findfirst(x->!isinvalid(x), yhat))) - D <: Union{Nothing, WITH_L2NORM...} || - throw(err_l2_norm(measure)) - - if measure isa SphericalScore - measure.alpha == 2 || throw(ERR_UNSUPPORTED_ALPHA) - end - - return nothing -end diff --git a/src/measures/roc.jl b/src/measures/roc.jl deleted file mode 100644 index 8614b00e..00000000 --- a/src/measures/roc.jl +++ /dev/null @@ -1,91 +0,0 @@ -## ROC COMPUTATION - -""" - _idx_unique_sorted(v) - -Internal function to return the index of unique elements in `v` under the -assumption that the vector `v` is sorted in decreasing order. -""" -function _idx_unique_sorted(v::Vec{<:Real}) - n = length(v) - idx = ones(Int, n) - p, h = 1, 1 - cur = v[1] - @inbounds while h < n - h += 1 # head position - cand = v[h] # candidate value - cand < cur || continue # is it new? otherwise skip - p += 1 # if new store it - idx[p] = h - cur = cand # and update the last seen value - end - p < n && deleteat!(idx, p+1:n) - return idx -end - -""" - fprs, tprs, ts = roc_curve(ŷ, y) = roc(ŷ, y) - -Return the ROC curve for a two-class probabilistic prediction `ŷ` given the -ground truth `y`. The true positive rates, false positive rates over a range -of thresholds `ts` are returned. Note that if there are `k` unique scores, -there are correspondingly `k` thresholds and `k+1` "bins" over which the FPR -and TPR are constant: - -* `[0.0 - thresh[1]]` -* `[thresh[1] - thresh[2]]` -* ... -* `[thresh[k] - 1]` - -consequently, `tprs` and `fprs` are of length `k+1` if `ts` is of length `k`. - -To draw the curve using your favorite plotting backend, do `plot(fprs, tprs)`. -""" -function roc_curve(ŷm, ym) - ŷ, y = skipinvalid(ŷm, ym) - length(classes(ŷ)) == 2 || throw( - ArgumentError("`ŷ` must be a two-class probabilistic prediction") - ) - length(levels(y)) == 2 || throw( - ArgumentError("`y` must be a categorical vector with two-levels.") - ) - n = length(y) - lab_pos = levels(y)[2] - scores = pdf.(ŷ, lab_pos) - ranking = sortperm(scores, rev=true) - - scores_sort = scores[ranking] - y_sort_bin = (y[ranking] .== lab_pos) - - idx_unique = _idx_unique_sorted(scores_sort) - thresholds = scores_sort[idx_unique] - - # detailed computations with example: - # y = [ 1 0 0 1 0 0 1] - # s = [0.5 0.5 0.2 0.2 0.1 0.1 0.1] thresh are 0.5 0.2, 0.1 // idx [1, 3, 5] - # ŷ = [ 0 0 0 0 0 0 0] (0.5 - 1.0] # no pos pred - # ŷ = [ 1 1 0 0 0 0 0] (0.2 - 0.5] # 2 pos pred - # ŷ = [ 1 1 1 1 0 0 0] (0.1 - 0.2] # 4 pos pred - # ŷ = [ 1 1 1 1 1 1 1] [0.0 - 0.1] # all pos pre - - idx_unique_2 = idx_unique[2:end] # [3, 5] - n_ŷ_pos = idx_unique_2 .- 1 # [2, 4] implicit [0, 2, 4, 7] - - cs = cumsum(y_sort_bin) # [1, 1, 1, 2, 2, 2, 3] - n_tp = cs[n_ŷ_pos] # [1, 2] implicit [0, 1, 2, 3] - n_fp = n_ŷ_pos .- n_tp # [1, 2] implicit [0, 1, 2, 4] - - # add end points - P = sum(y_sort_bin) # total number of true positives - N = n - P # total number of true negatives - - n_tp = [0, n_tp..., P] # [0, 1, 2, 3] - n_fp = [0, n_fp..., N] # [0, 1, 2, 4] - - tprs = n_tp ./ P # [0/3, 1/3, 2/3, 1] - fprs = n_fp ./ N # [0/4, 1/4, 2/4, 1] - - return fprs, tprs, thresholds -end - -const roc = roc_curve diff --git a/src/resampling.jl b/src/resampling.jl index 1b74e042..d0591325 100644 --- a/src/resampling.jl +++ b/src/resampling.jl @@ -14,8 +14,6 @@ const PREDICT_OPERATIONS_STRING = begin join(strings, ", ", ", or ") end const PROG_METER_DT = 0.1 -const ERR_WEIGHTS_REAL = - ArgumentError("`weights` must be a `Real` vector. ") const ERR_WEIGHTS_LENGTH = DimensionMismatch("`weights` and target "* "have different lengths. ") @@ -32,19 +30,41 @@ const ERR_INVALID_OPERATION = ArgumentError( "Invalid `operation` or `operations`. "* "An operation must be one of these: $PREDICT_OPERATIONS_STRING. ") _ambiguous_operation(model, measure) = - "`prediction_type($measure) == $(prediction_type(measure))` but "* - "`prediction_type($model) == $(prediction_type(model))`." + "`$measure` does not support a `model` with "* + "`prediction_type(model) == :$(prediction_type(model))`. " err_ambiguous_operation(model, measure) = ArgumentError( _ambiguous_operation(model, measure)* - "\nUnable to deduce an appropriate operation for $measure. "* + "\nUnable to infer an appropriate operation for `$measure`. "* "Explicitly specify `operation=...` or `operations=...`. ") err_incompatible_prediction_types(model, measure) = ArgumentError( _ambiguous_operation(model, measure)* - "If your model really is making probabilistic predictions, try explicitly "* + "If your model is truly making probabilistic predictions, try explicitly "* "specifiying operations. For example, for "* "`measures = [area_under_curve, accuracy]`, try "* "`operations=[predict, predict_mode]`. ") - +const LOG_AVOID = "\nTo override measure checks, set check_measure=false. " +const LOG_SUGGESTION1 = + "\nPerhaps you want to set `operation="* + "predict_mode` or need to "* + "specify multiple operations, "* + "one for each measure. " +const LOG_SUGGESTION2 = + "\nPerhaps you want to set `operation="* + "predict_mean` or `operation=predict_median`, or "* + "specify multiple operations, "* + "one for each measure. " +ERR_MEASURES_OBSERVATION_SCITYPE(measure, T_measure, T) = ArgumentError( + "\nobservation scitype of target = `$T` but ($measure) only supports "* + "`$T_measure`."*LOG_AVOID +) +ERR_MEASURES_PROBABILISTIC(measure, suggestion) = ArgumentError( + "The model subtypes `Probabilistic`, and so is not supported by "* + "`$measure`. $suggestion"*LOG_AVOID +) +ERR_MEASURES_DETERMINISTIC(measure) = ArgumentError( + "The model subtypes `Deterministic`, "* + "and so is not supported by `$measure`. "*LOG_AVOID +) # ================================================================== ## MODEL TYPES THAT CAN BE EVALUATED @@ -345,7 +365,7 @@ For example, if you run `replace!(y, 'a' => 'b', 'b' => 'a')` and then re-run `train_test_pairs`, the returned `(train, test)` pairs will be the same. Pre-shuffling of `rows` is controlled by `rng` and `shuffle`. If `rng` -is an integer, then the `StratifedCV` keyword constructor resets it to +is an integer, then the `StratifedCV` keywod constructor resets it to `MersenneTwister(rng)`. Otherwise some `AbstractRNG` object is expected. @@ -448,65 +468,58 @@ end """ PerformanceEvaluation -Type of object returned by [`evaluate`](@ref) (for models plus data) -or [`evaluate!`](@ref) (for machines). Such objects encode estimates -of the performance (generalization error) of a supervised model or -outlier detection model. - -When `evaluate`/`evaluate!` is called, a number of train/test pairs -("folds") of row indices are generated, according to the options -provided, which are discussed in the [`evaluate!`](@ref) -doc-string. Rows correspond to observations. The generated train/test -pairs are recorded in the `train_test_rows` field of the -`PerformanceEvaluation` struct, and the corresponding estimates, -aggregated over all train/test pairs, are recorded in `measurement`, a -vector with one entry for each measure (metric) recorded in `measure`. - -When displayed, a `PerformanceEvalution` object includes a value under -the heading `1.96*SE`, derived from the standard error of the `per_fold` -entries. This value is suitable for constructing a formal 95% -confidence interval for the given `measurement`. Such intervals should -be interpreted with caution. See, for example, Bates et al. -[(2021)](https://arxiv.org/abs/2104.00673). +Type of object returned by [`evaluate`](@ref) (for models plus data) or +[`evaluate!`](@ref) (for machines). Such objects encode estimates of the performance +(generalization error) of a supervised model or outlier detection model. + +When `evaluate`/`evaluate!` is called, a number of train/test pairs ("folds") of row +indices are generated, according to the options provided, which are discussed in the +[`evaluate!`](@ref) doc-string. Rows correspond to observations. The generated train/test +pairs are recorded in the `train_test_rows` field of the `PerformanceEvaluation` struct, +and the corresponding estimates, aggregated over all train/test pairs, are recorded in +`measurement`, a vector with one entry for each measure (metric) recorded in `measure`. + +When displayed, a `PerformanceEvalution` object includes a value under the heading +`1.96*SE`, derived from the standard error of the `per_fold` entries. This value is +suitable for constructing a formal 95% confidence interval for the given +`measurement`. Such intervals should be interpreted with caution. See, for example, Bates +et al. [(2021)](https://arxiv.org/abs/2104.00673). ### Fields -These fields are part of the public API of the `PerformanceEvaluation` -struct. +These fields are part of the public API of the `PerformanceEvaluation` struct. - `measure`: vector of measures (metrics) used to evaluate performance -- `measurement`: vector of measurements - one for each element of - `measure` - aggregating the performance measurements over all - train/test pairs (folds). The aggregation method applied for a given - measure `m` is `aggregation(m)` (commonly `Mean` or `Sum`) - -- `operation` (e.g., `predict_mode`): the operations applied for each - measure to generate predictions to be evaluated. Possibilities are: - $PREDICT_OPERATIONS_STRING. - -- `per_fold`: a vector of vectors of individual test fold evaluations - (one vector per measure). Useful for obtaining a rough estimate of - the variance of the performance estimate. - -- `per_observation`: a vector of vectors of individual observation - evaluations of those measures for which - `reports_each_observation(measure)` is true, which is otherwise - reported `missing`. Useful for some forms of hyper-parameter - optimization. - -- `fitted_params_per_fold`: a vector containing `fitted params(mach)` - for each machine `mach` trained during resampling - one machine per - train/test pair. Use this to extract the learned parameters for each - individual training event. - -- `report_per_fold`: a vector containing `report(mach)` for each - machine `mach` training in resampling - one machine per train/test - pair. - -- `train_test_rows`: a vector of tuples, each of the form `(train, test)`, - where `train` and `test` are vectors of row (observation) indices for - training and evaluation respectively. +- `measurement`: vector of measurements - one for each element of `measure` - aggregating + the performance measurements over all train/test pairs (folds). The aggregation method + applied for a given measure `m` is `aggregation(m)` (commonly `Mean` or `Sum`) + +- `operation` (e.g., `predict_mode`): the operations applied for each measure to generate + predictions to be evaluated. Possibilities are: $PREDICT_OPERATIONS_STRING. + +- `per_fold`: a vector of vectors of individual test fold evaluations (one vector per + measure). Useful for obtaining a rough estimate of the variance of the performance + estimate. + +- `per_observation`: a vector of vectors of vectors containing individual per-observation + measurements: for an evaluation `e`, `e.per_observation[m][f][i]` is the measurement for + the `i`th observation in the `f`th test fold, evaluated using the `m`th measure. Useful + for some forms of hyper-parameter optimization. Note that an aggregregated measurement + for some measure `measure` is repeated across all observations in a fold if + `StatisticalMeasures.can_report_unaggregated(measure) == true`. + +- `fitted_params_per_fold`: a vector containing `fitted params(mach)` for each machine + `mach` trained during resampling - one machine per train/test pair. Use this to extract + the learned parameters for each individual training event. + +- `report_per_fold`: a vector containing `report(mach)` for each machine `mach` training + in resampling - one machine per train/test pair. + +- `train_test_rows`: a vector of tuples, each of the form `(train, test)`, where `train` + and `test` are vectors of row (observation) indices for training and evaluation + respectively. + """ struct PerformanceEvaluation{M, Measurement, @@ -605,48 +618,37 @@ end function _check_measure(measure, operation, model, y) - T = scitype(y) + # get observation scitype: + T = MLJBase.guess_observation_scitype(y) + # get type supported by measure: + T_measure = StatisticalMeasuresBase.observation_scitype(measure) + T == Unknown && (return true) - target_scitype(measure) == Unknown && (return true) - prediction_type(measure) == :unknown && (return true) + T_measure == Union{} && (return true) + isnothing(StatisticalMeasuresBase.kind_of_proxy(measure)) && (return true) - avoid = "\nTo override measure checks, set check_measure=false. " - T <: target_scitype(measure) || - throw(ArgumentError( - "\nscitype of target = $T but target_scitype($measure) = "* - "$(target_scitype(measure))."*avoid)) + T <: T_measure || throw(ERR_MEASURES_OBSERVATION_SCITYPE(measure, T_measure, T)) incompatible = model isa Probabilistic && operation == predict && - prediction_type(measure) != :probabilistic + StatisticalMeasuresBase.kind_of_proxy(measure) != LearnAPI.Distribution() if incompatible - if target_scitype(measure) <: - AbstractVector{<:Union{Missing,Finite}} - suggestion = "\nPerhaps you want to set `operation="* - "predict_mode` or need to "* - "specify multiple operations, "* - "one for each measure. " - elseif target_scitype(measure) <: - AbstractVector{<:Union{Missing,Continuous}} - suggestion = "\nPerhaps you want to set `operation="* - "predict_mean` or `operation=predict_median`, or "* - "specify multiple operations, "* - "one for each measure. " + if T <: Union{Missing,Finite} + suggestion = LOG_SUGGESTION1 + elseif T <: Union{Missing,Infinite} + suggestion = LOG_SUGGESTION2 else suggestion = "" end - throw(ArgumentError( - "\n$model <: Probabilistic but prediction_type($measure) = "* - ":$(prediction_type(measure)). "*suggestion*avoid)) + throw(ERR_MEASURES_PROBABILISTIC(measure, suggestion)) end - model isa Deterministic && prediction_type(measure) != :deterministic && - throw(ArgumentError("$model <: Deterministic but "* - "prediction_type($measure) ="* - ":$(prediction_type(measure))."*avoid)) + model isa Deterministic && + StatisticalMeasuresBase.kind_of_proxy(measure) != LearnAPI.LiteralTarget() && + throw(ERR_MEASURES_DETERMINISTIC(measure)) return true @@ -670,13 +672,14 @@ function _actual_measures(measures, model) _measures = measures end - return _measures + # wrap in `robust_measure` to allow unsupported weights to be silently treated as + # uniform when invoked; `_check_measure` will throw appropriate warnings unless + # explicitly suppressed. + return StatisticalMeasuresBase.robust_measure.(_measures) end function _check_weights(weights, nrows) - weights isa AbstractVector{<:Real} || - throw(ERR_WEIGHTS_REAL) length(weights) == nrows || throw(ERR_WEIGHTS_LENGTH) return true @@ -729,21 +732,35 @@ function _actual_operations(operation::Nothing, verbosity) map(measures) do m - prediction_type = MLJBase.prediction_type(m) - target_scitype = MLJBase.target_scitype(m) + # `kind_of_proxy` is the measure trait corresponding to `prediction_type` model + # trait. But it's values are instances of LearnAPI.KindOfProxy, instead of + # symbols: + # + # `LearnAPI.LiteralTarget()` ~ `:deterministic` (`model isa Deterministic`) + # `LearnAPI.Distribution()` ~ `:probabilistic` (`model isa Deterministic`) + # + kind_of_proxy = StatisticalMeasuresBase.kind_of_proxy(m) - if prediction_type === :unknown - return predict - end + # `observation_type` is the measure trait which we need to match the model + # `target_scitype` but the latter refers to the whole target `y`, not a single + # observation. + # + # One day, models will have their own `observation_scitype` + observation_scitype = StatisticalMeasuresBase.observation_scitype(m) + + # One day, models will implement LearnAPI and will get their own `kind_of_proxy` + # trait replacing `prediction_type` and `observation_scitype` trait replacing + # `target_scitype`. + + isnothing(kind_of_proxy) && (return predict) if MLJBase.prediction_type(model) === :probabilistic - if prediction_type === :probabilistic + if kind_of_proxy === LearnAPI.Distribution() return predict - elseif prediction_type === :deterministic - if target_scitype <: AbstractArray{<:Union{Missing,Finite}} + elseif kind_of_proxy === LearnAPI.LiteralTarget() + if observation_scitype <: Union{Missing,Finite} return predict_mode - elseif target_scitype <: - AbstractArray{<:Union{Missing,Continuous,Count}} + elseif observation_scitype <:Union{Missing,Infinite} return predict_mean else throw(err_ambiguous_operation(model, m)) @@ -752,19 +769,21 @@ function _actual_operations(operation::Nothing, throw(err_ambiguous_operation(model, m)) end elseif MLJBase.prediction_type(model) === :deterministic - if prediction_type === :probabilistic + if kind_of_proxy === LearnAPI.Distribution() throw(err_incompatible_prediction_types(model, m)) - elseif prediction_type === :deterministic + elseif kind_of_proxy === LearnAPI.LiteralTarget() return predict else throw(err_ambiguous_operation(model, m)) end - else - if prediction_type === :interval + elseif MLJBase.prediction_type(model) === :interval + if kind_of_proxy === LearnAPI.ConfidenceInterval() return predict else throw(err_ambiguous_operation(model, m)) end + else + throw(err_ambiguous_operation(model, m)) end end end @@ -856,12 +875,11 @@ pairs for evaluation and subsequent aggregation. If `resampling isa MLJ.ResamplingStrategy` then one may optionally restrict the data used in evaluation by specifying `rows`. -An optional `weights` vector may be passed for measures that support -sample weights (`MLJ.supports_weights(measure) == true`), which is -ignored by those that don't. These weights are not to be confused with -any weights `w` bound to `mach` (as in `mach = machine(model, X, -y, w)`). To pass these to the performance evaluation measures you must -explictly specify `weights=w` in the `evaluate!` call. +An optional `weights` vector may be passed for measures that support sample weights +(`StatisticalMeasuresBase.supports_weights(measure) == true`), which is ignored by those +that don't. These weights are not to be confused with any weights `w` bound to `mach` (as +in `mach = machine(model, X, y, w)`). To pass these to the performance evaluation measures +you must explictly specify `weights=w` in the `evaluate!` call. Additionally, optional `class_weights` dictionary may be passed for measures that support class weights @@ -973,10 +991,20 @@ function evaluate!(mach::Machine{<:Measurable}; verbosity, check_measure) - _warn_about_unsupported(supports_weights, - "Sample", _measures, weights, verbosity) - _warn_about_unsupported(supports_class_weights, - "Class", _measures, class_weights, verbosity) + _warn_about_unsupported( + StatisticalMeasuresBase.supports_weights, + "Sample", + _measures, + weights, + verbosity, + ) + _warn_about_unsupported( + StatisticalMeasuresBase.supports_class_weights, + "Class", + _measures, + class_weights, + verbosity, + ) _acceleration= _process_accel_settings(acceleration) @@ -1140,22 +1168,8 @@ const AbstractRow = Union{AbstractVector{<:Integer}, Colon} const TrainTestPair = Tuple{AbstractRow, AbstractRow} const TrainTestPairs = AbstractVector{<:TrainTestPair} -# helper: -_feature_dependencies_exist(measures) = - !all(m->!(is_feature_dependent(m)), measures) - -# helper: -function measure_specific_weights(measure, weights, class_weights, test) - supports_weights(measure) && supports_class_weights(measure) && - error("Encountered a measure that simultaneously supports "* - "(per-sample) weights and class weights. ") - if supports_weights(measure) - weights === nothing && return nothing - return weights[test] - end - supports_class_weights(measure) && return class_weights - return nothing -end +_view(::Nothing, rows) = nothing +_view(weights, rows) = view(weights, rows) # Evaluation when `resampling` is a TrainTestPairs (CORE EVALUATOR): function evaluate!(mach::Machine, resampling, weights, @@ -1172,12 +1186,21 @@ function evaluate!(mach::Machine, resampling, weights, X = mach.args[1]() y = mach.args[2]() + nrows = MLJBase.nrows(y) nfolds = length(resampling) + test_fold_sizes = map(resampling) do train_test_pair + test = last(train_test_pair) + test isa Colon && (return nrows) + length(test) + end - nmeasures = length(measures) + # weights used to aggregate per-fold measurements, which depends on a measures + # external mode of aggregation: + fold_weights(mode) = nfolds .* test_fold_sizes ./ sum(test_fold_sizes) + fold_weights(::StatisticalMeasuresBase.Sum) = nothing - feature_dependencies_exist = _feature_dependencies_exist(measures) + nmeasures = length(measures) function fit_and_extract_on_fold(mach, k) train, test = resampling[k] @@ -1186,21 +1209,17 @@ function evaluate!(mach::Machine, resampling, weights, # that appear (`predict`, `predict_mode`, etc): yhat_given_operation = Dict(op=>op(mach, rows=test) for op in unique(operations)) - if feature_dependencies_exist - Xtest = selectrows(X, test) - else - Xtest = nothing - end + ytest = selectrows(y, test) measurements = map(measures, operations) do m, op - wtest = measure_specific_weights( + StatisticalMeasuresBase.measurements( m, - weights, + yhat_given_operation[op], + ytest, + _view(weights, test), class_weights, - test ) - value(m, yhat_given_operation[op], Xtest, ytest, wtest) end fp = fitted_params(mach) @@ -1233,35 +1252,35 @@ function evaluate!(mach::Machine, resampling, weights, measurements_flat = vcat(measurements_vector_of_vectors...) - # in the following rows=folds, columns=measures: + # in the following rows=folds, columns=measures; each element of the matrix is a + # vector of meausurements, one per observation, over a fold for a particular metric. measurements_matrix = permutedims( reshape(collect(measurements_flat), (nmeasures, nfolds)) ) # measurements for each observation: per_observation = map(1:nmeasures) do k - m = measures[k] - if reports_each_observation(m) measurements_matrix[:,k] - else - missing - end end # measurements for each fold: per_fold = map(1:nmeasures) do k m = measures[k] - if reports_each_observation(m) - broadcast(MLJBase.aggregate, per_observation[k], [m,]) - else - measurements_matrix[:,k] + mode = StatisticalMeasuresBase.external_aggregation_mode(m) + map(per_observation[k]) do v + StatisticalMeasuresBase.aggregate(v; mode) end end # overall aggregates: per_measure = map(1:nmeasures) do k m = measures[k] - MLJBase.aggregate(per_fold[k], m) + mode = StatisticalMeasuresBase.external_aggregation_mode(m) + StatisticalMeasuresBase.aggregate( + per_fold[k]; + mode, + weights=fold_weights(mode), + ) end return PerformanceEvaluation( diff --git a/src/utilities.jl b/src/utilities.jl index 66dd62b7..e9775e93 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -469,3 +469,43 @@ end generate_name!(model, existing_names; kwargs...) = generate_name!(typeof(model), existing_names; kwargs...) + +# This is a bit of hack in the case of tables, it is based on the first row. If some some +# rows have `missing`s, they may not be accounted for in the type. If the first row has +# `missing`s the "regular" type will not be accounted for. So use +""" + guess_observation_scitype(y) + +*Private method.* + +If `y` is an `AbstractArray`, return the scitype of `y[:, :, ..., :, 1]`. If `y` is a +table, return the scitype of the first row, converted to a vector, unless this row has +`missing` elements, in which case return `Unknown`. + +In all other cases, `Unknown`. + +``` +julia> guess_observation_scitype([missing, 1, 2, 3]) +Union{Missing, Count} + +julia> guess_observation_scitype(rand(3, 2)) +AbstractVector{Continuous} + +julia> guess_observation_scitype((x=rand(3), y=rand(Bool, 3))) +AbstractVector{Union{Continuous, Count}} + +julia> guess_observation_scitype((x=[missing, 1, 2], y=[1, 2, 3])) +Unknown +``` +""" +guess_observation_scitype(y) = guess_observation_scitype(y, Val(Tables.istable(y))) +guess_observation_scitype(y, ::Any) = Unknown +guess_observation_scitype(y::AbstractArray, ::Val{false}) = _observation(scitype(y)) +_observation(::Type{AbstractVector{S}}) where S = S +_observation(::Type{AbstractArray{S,N}}) where {S,N} = AbstractArray{S,N-1} +function guess_observation_scitype(table, ::Val{true}) + row = Tables.subset(table, 1, viewhint=false) |> collect + E = eltype(row) + nonmissingtype(E) == E || return Unknown + scitype(row) +end diff --git a/test/composition/models/stacking.jl b/test/composition/models/stacking.jl index 6cbe6588..8bc62043 100644 --- a/test/composition/models/stacking.jl +++ b/test/composition/models/stacking.jl @@ -31,7 +31,7 @@ function test_internal_evaluation(internalreport, std_evaluation, modelnames) @test model_ev isa PerformanceEvaluation @test model_ev.per_fold == std_ev.per_fold @test model_ev.measurement == std_ev.measurement - @test model_ev.per_observation[1] === std_ev.per_observation[1] === missing + @test model_ev.per_observation[1] == std_ev.per_observation[1] @test model_ev.per_observation[2] == std_ev.per_observation[2] @test model_ev.operation == std_ev.operation @test model_ev.report_per_fold == std_ev.report_per_fold diff --git a/test/interface/model_api.jl b/test/interface/model_api.jl index 9bf3e0bf..6fd553f8 100644 --- a/test/interface/model_api.jl +++ b/test/interface/model_api.jl @@ -77,7 +77,7 @@ UnivariateFiniteFitter(;alpha=1.0) = UnivariateFiniteFitter(alpha) yhat = predict(mach, nothing) # single UnivariateFinite distribution @test cross_entropy(fill(yhat, 3), ytest) ≈ - [-log(1/2), -log(1/2), -log(1/4)] + mean([-log(1/2), -log(1/2), -log(1/4)]) end diff --git a/test/measures.jl b/test/measures.jl new file mode 100644 index 00000000..34502fb1 --- /dev/null +++ b/test/measures.jl @@ -0,0 +1,40 @@ +mutable struct DRegressor <: Deterministic end +MLJBase.target_scitype(::Type{<:DRegressor}) = + AbstractVector{<:Continuous} + +mutable struct D2Regressor <: Deterministic end +MLJBase.target_scitype(::Type{<:D2Regressor}) = + AbstractVector{Continuous} + +mutable struct DClassifier <: Deterministic end +MLJBase.target_scitype(::Type{<:DClassifier}) = + AbstractVector{<:Finite} + +mutable struct PClassifier <: Probabilistic end +MLJBase.target_scitype(::Type{<:PClassifier}) = + AbstractVector{<:Finite} + +mutable struct PRegressor <: Probabilistic end +MLJBase.target_scitype(::Type{<:PRegressor}) = + AbstractVector{<:Continuous} + +mutable struct PCountRegressor <: Probabilistic end +MLJBase.target_scitype(::Type{<:PCountRegressor}) = + AbstractVector{<:Count} + +@testset "default_measure" begin + @test MLJBase.default_measure(DRegressor()) == rms + @test MLJBase.default_measure(D2Regressor()) == rms + @test MLJBase.default_measure(DClassifier()) == misclassification_rate + @test MLJBase.default_measure(PClassifier()) == log_loss + + @test MLJBase.default_measure(DRegressor) == rms + @test MLJBase.default_measure(D2Regressor) == rms + @test MLJBase.default_measure(DClassifier) == misclassification_rate + @test MLJBase.default_measure(PClassifier) == log_loss + + @test MLJBase.default_measure(PRegressor) == log_loss + @test MLJBase.default_measure(PCountRegressor) == log_loss +end + +true diff --git a/test/measures/confusion_matrix.jl b/test/measures/confusion_matrix.jl deleted file mode 100644 index ce8911fb..00000000 --- a/test/measures/confusion_matrix.jl +++ /dev/null @@ -1,116 +0,0 @@ -using Test -using MLJBase -include(joinpath("..", "..", "test", "_models", "models.jl")) -using .Models - -@testset "_categorical" begin - a = [1, 1, 2, 3] - b = [3, 3, 4, 5] - c = [missing, a...] - d = [missing, b...] - e = categorical(a) - f = categorical(b) - g = categorical(c) - h = categorical(d) - j = CategoricalArrays.CategoricalValue{Int64, UInt32}[e[1], e[1], e[1], e[1]] - k = CategoricalArrays.CategoricalValue{Int64, UInt32}[e[4], e[4], e[4], e[4]] - rhs = (Set(1:5), Set(1:5)) - @test Set.(levels.(MLJBase._categorical(a, b))) == rhs - @test Set.(levels.(MLJBase._categorical(a, d))) == rhs - @test Set.(levels.(MLJBase._categorical(c, b))) == rhs - @test Set.(levels.(MLJBase._categorical(c, d))) == rhs - @test Set.(levels.(MLJBase._categorical(a, f))) == rhs - @test Set.(levels.(MLJBase._categorical(a, h))) == rhs - @test Set.(levels.(MLJBase._categorical(b, a))) == rhs - @test Set.(levels.(MLJBase._categorical(d, a))) == rhs - @test Set.(levels.(MLJBase._categorical(b, c))) == rhs - @test Set.(levels.(MLJBase._categorical(d, c))) == rhs - @test Set.(levels.(MLJBase._categorical(f, a))) == rhs - @test Set.(levels.(MLJBase._categorical(h, a))) == rhs - - @test Set.(levels.(MLJBase._categorical(j, k))) == (Set(1:3), Set(1:3)) - - # case of ordinary vector with CategoricalValue eltype: - acv = CategoricalArrays.CategoricalVector -end - -@testset "basics" begin - yraw = ['m', 'm', 'f', 'n', missing, 'f', 'm', 'n', 'n', 'm', 'f'] - ŷraw = [missing, 'f', 'f', 'm', 'f', 'f', 'n', 'm', 'n', 'm', 'f'] - y = categorical(yraw) - ŷ = categorical(ŷraw) - l = levels(y) # f, m, n - cm = MLJBase._confmat(ŷ, y; warn=false) - ŷ_clean, y_clean = MLJBase.skipinvalid(ŷ, y) - ee(l,i,j) = sum((ŷ_clean .== l[i]) .& (y_clean .== l[j])) - for i in 1:3, j in 1:3 - @test cm[i,j] == ee(l,i,j) - end - - cm2 = @test_logs (:warn, r"The classes are") MLJBase._confmat(ŷraw, yraw) - @test cm2.mat == cm.mat - - perm = [3, 1, 2] - l2 = l[perm] - cm2 = @test_logs MLJBase._confmat(ŷ, y; perm=perm) - m = ConfusionMatrix(perm=perm) - for i in 1:3, j in 1:3 - @test cm2[i,j] == ee(l2,i,j) - end - @test_logs (:warn, r"The classes are un") MLJBase._confmat(ŷ, y) - ŷc = coerce(ŷ, Union{Missing,OrderedFactor}) - yc = coerce(y, Union{Missing,OrderedFactor}) - @test MLJBase._confmat(ŷc, yc).mat == cm.mat - - y = categorical(['a','b','a','b']) - ŷ = categorical(['b','b','a','a']) - @test_logs (:warn, r"The classes are un") MLJBase._confmat(ŷ, y) - - # more tests for coverage - y = categorical([1,2,3,1,2,3,1,2,3]) - ŷ = categorical([1,2,3,1,2,3,1,2,3]) - @test_throws ArgumentError MLJBase._confmat(ŷ, y, rev=true) - - # silly test for display - ŷ = coerce(y, OrderedFactor) - y = coerce(y, OrderedFactor) - iob = IOBuffer() - Base.show(iob, MIME("text/plain"), MLJBase._confmat(ŷ, y)) - siob = String(take!(iob)) - @test strip(siob) == strip(""" - ┌─────────────────────────────────────────┐ - │ Ground Truth │ - ┌─────────────┼─────────────┬─────────────┬─────────────┤ - │ Predicted │ 1 │ 2 │ 3 │ - ├─────────────┼─────────────┼─────────────┼─────────────┤ - │ 1 │ 3 │ 0 │ 0 │ - ├─────────────┼─────────────┼─────────────┼─────────────┤ - │ 2 │ 0 │ 3 │ 0 │ - ├─────────────┼─────────────┼─────────────┼─────────────┤ - │ 3 │ 0 │ 0 │ 3 │ - └─────────────┴─────────────┴─────────────┴─────────────┘""") -end - -@testset "ConfusionMatrix measure" begin - - @test info(confmat).orientation == :other - model = DeterministicConstantClassifier() - - X = (x=rand(10),) - long = categorical(collect("abbaacaabbbbababcbac"), ordered=true) - y = long[1:10] - yhat =long[11:20] - - @test confmat(yhat, y).mat == [1 2 0; 3 1 1; 1 1 0] - @test ConfusionMatrix(perm=[2, 1, 3])(yhat, y).mat == - MLJBase._confmat(yhat, y, perm=[2, 1, 3]).mat - - MLJBase.value(confmat, yhat, X, y, nothing) - - e = evaluate(model, X, y, - measures=[misclassification_rate, confmat], - resampling=Holdout(fraction_train=0.5)) - cm = e.measurement[2] - @test cm.labels == ["a", "b", "c"] - @test cm.mat == [2 2 1; 0 0 0; 0 0 0] -end diff --git a/test/measures/continuous.jl b/test/measures/continuous.jl deleted file mode 100644 index 3e645845..00000000 --- a/test/measures/continuous.jl +++ /dev/null @@ -1,31 +0,0 @@ -rng = StableRNG(666899) - -@testset "regressor measures" begin - y = [1, 42, 2, 3, missing, 4] - yhat = [4, NaN, 3, 2, 42, 1] - w = [1, 42, 2, 4, 42, 3] - y = [1, 2, 3, 4] - yhat = [4, 3, 2, 1] - w = [1, 2, 4, 3] - @test isapprox(mae(yhat, y), 2) - @test isapprox(mae(yhat, y, w), (1*3 + 2*1 + 4*1 + 3*3)/4) - @test isapprox(rms(yhat, y), sqrt(5)) - @test isapprox(rms(yhat, y, w), sqrt((1*3^2 + 2*1^2 + 4*1^2 + 3*3^2)/4)) - @test rsq(yhat, y) == -3 - @test isapprox(mean(skipinvalid(l1(yhat, y))), 2) - @test isapprox(mean(skipinvalid(l1(yhat, y, w))), mae(yhat, y, w)) - @test isapprox(mean(skipinvalid(l2(yhat, y))), 5) - @test isapprox(mean(skipinvalid(l2(yhat, y, w))), rms(yhat, y, w)^2) - @test isapprox(mean(skipinvalid(log_cosh(yhat, y))), 1.3715546675) - - y = [1, 42, 2, 3, missing, 4] - yhat = [2, NaN, 3, 4, 42, 5] - @test isapprox(rmsl(yhat, y), - sqrt((log(1/2)^2 + log(2/3)^2 + log(3/4)^2 + log(4/5)^2)/4)) - @test isapprox(rmslp1(yhat, y), - sqrt((log(2/3)^2 + log(3/4)^2 + log(4/5)^2 + log(5/6)^2)/4)) - @test isapprox(rmsp(yhat, y), sqrt((1 + 1/4 + 1/9 + 1/16)/4)) - @test isapprox(mape(yhat, y), (1/1 + 1/2 + 1/3 + 1/4)/4) -end - -true diff --git a/test/measures/doc_strings.jl b/test/measures/doc_strings.jl deleted file mode 100644 index 1cbf96c4..00000000 --- a/test/measures/doc_strings.jl +++ /dev/null @@ -1,9 +0,0 @@ -using MLJBase - -docstring = (Base.Docs.doc)((Base.Docs.Binding)(Main, :multiclass_recall)) - -@test string(docstring) == "An instance of type "* - "[`MulticlassTruePositiveRate`](@ref). Query the "* - "[`MulticlassTruePositiveRate`](@ref) doc-string for details. \n" - -true diff --git a/test/measures/finite.jl b/test/measures/finite.jl deleted file mode 100644 index f06266c3..00000000 --- a/test/measures/finite.jl +++ /dev/null @@ -1,609 +0,0 @@ -rng = StableRNG(51803) - -const Vec = AbstractVector - -@testset "misclassification_rate" begin - y = categorical(collect("asdfasdfaaassdd")) - yhat = categorical(collect("asdfaadfaasssdf")) - w = 1:15 - ym = vcat(y, [missing,]) - yhatm = vcat(yhat, [missing,]) - wm = 1:16 - @test misclassification_rate(yhat, y) ≈ 0.2 - @test misclassification_rate(yhatm, ym) ≈ 0.2 - @test misclassification_rate(yhat, y, w) ≈ (6*1 + 11*1 + 15*1) / 15 - @test misclassification_rate(yhatm, ym, wm) ≈ (6*1 + 11*1 + 15*1) / 15 -end - -@testset "mcr, acc, bacc, mcc" begin - y = categorical(['m', 'f', 'n', 'f', 'm', 'n', 'n', 'm', 'f']) - ŷ = categorical(['f', 'f', 'm', 'f', 'n', 'm', 'n', 'm', 'f']) - @test accuracy(ŷ, y) == 1-mcr(ŷ,y) == - accuracy(MLJBase._confmat(ŷ, y, warn=false)) == - 1-mcr(MLJBase._confmat(ŷ, y, warn=false)) - w = randn(rng,length(y)) - @test accuracy(ŷ, y, w) == 1-mcr(ŷ,y,w) - - ## balanced accuracy - y = categorical([ - 3, 4, 1, 1, 1, 4, 1, 3, 3, 1, 2, 3, 1, 3, 3, 3, 2, 4, 3, 2, 1, 3, - 3, 1, 1, 1, 2, 4, 1, 4, 4, 4, 1, 1, 4, 4, 3, 1, 2, 2, 3, 4, 2, 1, - 2, 2, 3, 2, 2, 3, 1, 2, 3, 4, 1, 2, 4, 2, 1, 4, 3, 2, 3, 3, 3, 1, - 3, 1, 4, 3, 1, 2, 3, 1, 2, 2, 4, 4, 1, 3, 2, 1, 4, 3, 3, 1, 3, 1, - 2, 2, 2, 2, 2, 3, 2, 1, 1, 4, 2, 2]) - ŷ = categorical([ - 2, 3, 2, 1, 2, 2, 3, 3, 2, 4, 2, 3, 2, 4, 3, 4, 4, 2, 1, 3, 3, 3, - 3, 3, 2, 4, 4, 3, 4, 4, 1, 2, 3, 2, 4, 1, 2, 3, 1, 4, 2, 2, 1, 2, - 3, 2, 2, 4, 3, 2, 2, 2, 1, 2, 2, 1, 3, 1, 4, 1, 2, 1, 2, 4, 3, 2, - 4, 3, 2, 4, 4, 2, 4, 3, 2, 3, 1, 2, 1, 2, 1, 2, 3, 1, 1, 3, 4, 2, - 4, 4, 2, 1, 3, 2, 2, 4, 1, 1, 4, 1]) - w = [ - 0.5, 1.4, 0.6, 1. , 0.1, 0.5, 1.2, 0.2, 1.8, 0.3, 0.6, 2.2, 0.1, - 1.4, 0.2, 0.4, 0.6, 2.1, 0.7, 0.2, 0.9, 0.4, 0.7, 0.3, 0.1, 1.7, - 0.2, 0.7, 1.2, 1. , 0.9, 0.4, 0.5, 0.5, 0.5, 1. , 0.3, 0.1, 0.2, - 0. , 2.2, 0.8, 0.9, 0.8, 1.3, 0.2, 0.4, 0.7, 1. , 0.7, 1.7, 0.7, - 1.1, 1.8, 0.1, 1.2, 1.8, 1. , 0.1, 0.5, 0.6, 0.7, 0.6, 1.2, 0.6, - 1.2, 0.5, 0.5, 0.8, 0.2, 0.6, 1. , 0.3, 1. , 0.2, 1.1, 1.1, 1.1, - 0.6, 1.4, 1.2, 0.3, 1.1, 0.2, 0.5, 1.6, 0.3, 1. , 0.3, 0.9, 0.9, - 0. , 0.6, 0.6, 0.4, 0.5, 0.4, 0.2, 0.9, 0.4] - sk_bacc = 0.17493386243386244 # note: sk-learn reverses ŷ and y - @test bacc(ŷ, y) ≈ sk_bacc - sk_adjusted_bacc = -0.10008818342151675 - @test BalancedAccuracy(adjusted=true)(ŷ, y) ≈ sk_adjusted_bacc - sk_bacc_w = 0.1581913163016446 - @test bacc(ŷ, y, w) ≈ sk_bacc_w - sk_adjusted_bacc_w = -0.1224115782644738 - @test BalancedAccuracy(adjusted=true)(ŷ, y, w) ≈ sk_adjusted_bacc_w - - ## matthews correlation - sk_mcc = -0.09759509982785947 - @test mcc(ŷ, y) == matthews_correlation(ŷ, y) ≈ sk_mcc - # invariance with respect to permutation ? - cm = MLJBase._confmat(ŷ, y, perm=[3, 1, 2, 4]) - @test mcc(cm) ≈ sk_mcc - - # Issue #381 - cm = MLJBase.ConfusionMatrixObject([29488 13017; 12790 29753], ["0.0", "1.0"]) - @test mcc(cm) ≈ 0.39312321239417797 -end - -@testset "kappa" begin - # Binary case - y_b = categorical([2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 1, 2, 2, 1, 1, 2, 2, 1, 2, 1, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2]) - ŷ_b = categorical([1, 1, 2, 2, 2, 2, 1, 1, 2, 1, 1, 2, 1, 2, 2, 2, 1, 1, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2, 2, 2]) - cm_b = MLJBase._confmat(y_b, ŷ_b, warn=false) - p0_b = (4+10)/30 - pe_b = (13*11 + 17*19)/(30*30) - - # Multiclass case - y_m = categorical([5, 5, 3, 5, 4, 4, 2, 2, 3, 2, 5, 2, 4, 3, 2, 1, 1, 5, 1, 4, 2, 5, 4, 5, 2, 3, 3, 4, 2, 4]) - ŷ_m = categorical([1, 1, 1, 5, 4, 2, 1, 3, 4, 4, 2, 5, 4, 4, 1, 5, 5, 2, 3, 3, 1, 3, 2, 5, 5, 2, 3, 2, 5, 3]) - cm_m = MLJBase._confmat(ŷ_m, y_m, warn=false) - p0_m = 5/30 - pe_m = (3*6 + 8*6 + 5*6 + 7*5 + 7*7)/(30*30) - - # Tests - @test kappa(y_m, ŷ_m) ≈ (p0_m - pe_m)/(1 - pe_m) - @test kappa(y_b, ŷ_b) ≈ (p0_b - pe_b)/(1 - pe_b) - @test kappa(cm_m) == kappa(y_m, ŷ_m) - @test kappa(cm_b) == kappa(y_b, ŷ_b) - @test kappa(ŷ_m, y_m) == kappa(y_m, ŷ_m) - @test kappa(ŷ_b, y_b) == kappa(y_b, ŷ_b) - @test kappa(y_m, y_m) == 1.0 - @test kappa(y_b, y_b) == 1.0 -end - -@testset "confusion matrix {2}" begin - # first class is 1 is assumed negative, second positive - y = categorical([1, 2, 1, 2, 1, 1, 2]) - ŷ = categorical([1, 2, 2, 2, 2, 1, 2]) - cm = MLJBase._confmat(ŷ, y, warn=false) - TN = sum(ŷ .== y .== 1) # pred and true = - (1) - TP = sum(ŷ .== y .== 2) # pred and true = + (2) - FP = sum(ŷ .!= y .== 1) # pred + (2) and true - (1) - FN = sum(ŷ .!= y .== 2) # pred - (1) and true + (2) - @test cm[1,1] == TN - @test cm[2,2] == TP - @test cm[1,2] == FN - @test cm[2,1] == FP - - ym = categorical([1, missing, 2, 1, 2, 1, 1, 1, 2]) - ŷm = categorical([1, 2, 2, 2, 2, missing, 2, 1, 2]) - cm = MLJBase._confmat(ŷ, y, warn=false) - TN = sum(skipmissing(ŷ .== y .== 1)) # pred and true = - (1) - TP = sum(skipmissing(ŷ .== y .== 2)) # pred and true = + (2) - FP = sum(skipmissing(ŷ .!= y .== 1)) # pred + (2) and true - (1) - FN = sum(skipmissing(ŷ .!= y .== 2)) # pred - (1) and true + (2) - @test cm[1,1] == TN - @test cm[2,2] == TP - @test cm[1,2] == FN - @test cm[2,1] == FP - - cm2 = MLJBase._confmat(ŷ, y; rev=true) - @test cm2[1,1] == cm[2,2] - @test cm2[1,2] == cm[2,1] - @test cm2[2,2] == cm[1,1] - @test cm2[2,1] == cm[1,2] - - @test accuracy(ŷ, y) == accuracy(cm) == sum(y .== ŷ) / length(y) - - @test @test_logs((:warn, r"The classes are un-ordered"), - recall(ŷ, y) == TP / (TP + FN)) - - ŷ = coerce(ŷ, Union{Missing,OrderedFactor}) - y = coerce(y, Union{Missing,OrderedFactor}) - - @test precision(ŷ, y) == TP / (TP + FP) - @test specificity(ŷ, y) == TN / (TN + FP) - @test f1score(ŷ, y) ≈ - 2.0 / (1.0 / recall(ŷ, y) + 1.0 / precision(ŷ, y)) - - recall_rev = Recall(rev=true) - @test recall_rev(ŷ, y) == - TN / (TN + FP) # no warning because rev is specified - precision_rev = Precision(rev=true) - @test precision_rev(ŷ, y) == TN / (TN + FN) - specificity_rev = Specificity(rev=true) - @test specificity_rev(ŷ, y) == TP / (TP + FN) - f1score_rev = FScore(rev=true) - @test f1score_rev(ŷ, y) ≈ - 2.0 / (1.0 / recall_rev(ŷ, y) + 1.0 / precision_rev(ŷ, y)) -end - -@testset "confusion matrix {n}" begin - y = coerce([1, 2, 0, 2, 1, 0, 0, 1, 2, 2, 2, 1, 2, - 2, 1, 0, 1, 1, 1, 2, 1, 2, 2, 1, 2, 1, - 2, 2, 2], Multiclass) - ŷ = coerce([2, 0, 2, 2, 2, 0, 1, 2, 1, 2, 0, 1, 2, - 1, 1, 1, 2, 0, 1, 2, 1, 2, 2, 2, 1, 2, - 1, 2, 2], Multiclass) - class_w = Dict(0=>0,2=>2,1=>1) - cm = MLJBase._confmat(ŷ, y, warn=false) - - # ┌─────────────────────────────────────────┐ - # │ Ground Truth │ - # ┌─────────────┼─────────────┬─────────────┬─────────────┤ - # │ Predicted │ 0 │ 1 │ 2 │ - # ├─────────────┼─────────────┼─────────────┼─────────────┤ - # │ 0 │ 1 │ 1 │ 2 │ - # ├─────────────┼─────────────┼─────────────┼─────────────┤ - # │ 1 │ 2 │ 4 │ 4 │ - # ├─────────────┼─────────────┼─────────────┼─────────────┤ - # │ 2 │ 1 │ 6 │ 8 │ - # └─────────────┴─────────────┴─────────────┴─────────────┘ - - cm_tp = [1; 4; 8] - cm_tn = [22; 12; 8] - cm_fp = [1+2; 2+4; 1+6] - cm_fn = [2+1; 1+6; 2+4] - cm_prec = cm_tp ./ ( cm_tp + cm_fp ) - cm_rec = cm_tp ./ ( cm_tp + cm_fn ) - - # Check if is positive - m = MulticlassTruePositive(;return_type=Vector) - @test [0; 0; 0] <= m(ŷ, y) == cm_tp - m = MulticlassTrueNegative(;return_type=Vector) - @test [0; 0; 0] <= m(ŷ, y) == cm_tn - m = MulticlassFalsePositive(;return_type=Vector) - @test [0; 0; 0] <= m(ŷ, y) == cm_fp - m = MulticlassFalseNegative(;return_type=Vector) - @test [0; 0; 0] <= m(ŷ, y) == cm_fn - - # Check if is in [0,1] - m = MulticlassTruePositiveRate(average=no_avg;return_type=Vector) - @test [0; 0; 0] <= m(ŷ, y) == cm_tp ./ (cm_fn.+cm_tp) <= [1; 1; 1] - m = MulticlassTrueNegativeRate(average=no_avg;return_type=Vector) - @test [0; 0; 0] <= m(ŷ, y) == cm_tn ./ (cm_tn.+cm_fp) <= [1; 1; 1] - m = MulticlassFalsePositiveRate(average=no_avg;return_type=Vector) - @test [0; 0; 0] <= m(ŷ, y) == 1 .- cm_tn ./ (cm_tn.+cm_fp) <= [1; 1; 1] - m = MulticlassFalseNegativeRate(average=no_avg;return_type=Vector) - @test [0; 0; 0] <= m(ŷ, y) == 1 .- cm_tp ./ (cm_fn.+cm_tp) <= [1; 1; 1] - - #`no_avg` and `LittleDict` - @test collect(values(MulticlassPrecision(average=no_avg)(cm))) ≈ - collect(values(MulticlassPrecision(average=no_avg)(ŷ, y))) ≈ - cm_prec - @test MulticlassPrecision(average=macro_avg)(cm) ≈ - MulticlassPrecision(average=macro_avg)(ŷ, y) ≈ mean(cm_prec) - @test collect(keys(MulticlassPrecision(average=no_avg)(cm))) == - collect(keys(MulticlassPrecision(average=no_avg)(ŷ, y))) == - ["0"; "1"; "2"] - @test collect(values(MulticlassRecall(average=no_avg)(cm))) ≈ - collect(values(MulticlassRecall(average=no_avg)(ŷ, y))) ≈ - cm_rec - @test collect(values(MulticlassFScore(average=no_avg)(cm))) ≈ - collect(values(MulticlassFScore(average=no_avg)(ŷ, y))) ≈ - 2 ./ ( 1 ./ cm_prec + 1 ./ cm_rec ) - - #`no_avg` and `LittleDict` with class weights - @test collect(values(MulticlassPrecision(average=no_avg)(cm, class_w))) ≈ - collect(values(MulticlassPrecision(average=no_avg)(ŷ, y, class_w))) ≈ - cm_prec .* [0; 1; 2] - @test collect(values(MulticlassRecall(average=no_avg)(cm, class_w))) ≈ - collect(values(MulticlassRecall(average=no_avg)(ŷ, y, class_w))) ≈ - cm_rec .* [0; 1; 2] - @test collect(values(MulticlassFScore(average=no_avg)(cm, class_w))) ≈ - collect(values(MulticlassFScore(average=no_avg)(ŷ, y, class_w))) ≈ - 2 ./ ( 1 ./ cm_prec + 1 ./ cm_rec ) .* [0; 1; 2] - - #`macro_avg` and `LittleDict` - macro_prec = MulticlassPrecision(average=macro_avg) - macro_rec = MulticlassRecall(average=macro_avg) - - @test macro_prec(cm) ≈ macro_prec(ŷ, y) ≈ mean(cm_prec) - @test macro_rec(cm) ≈ macro_rec(ŷ, y) ≈ mean(cm_rec) - @test macro_f1score(cm) ≈ macro_f1score(ŷ, y) ≈ mean(2 ./ ( 1 ./ cm_prec + 1 ./ cm_rec )) - - #`micro_avg` and `LittleDict` - micro_prec = MulticlassPrecision(average=micro_avg) - micro_rec = MulticlassRecall(average=micro_avg) - - @test micro_prec(cm) == micro_prec(ŷ, y) == sum(cm_tp) ./ sum(cm_fp.+cm_tp) - @test micro_rec(cm) == micro_rec(ŷ, y) == sum(cm_tp) ./ sum(cm_fn.+cm_tp) - @test micro_f1score(cm) == micro_f1score(ŷ, y) == - 2 ./ ( 1 ./ ( sum(cm_tp) ./ sum(cm_fp.+cm_tp) ) + 1 ./ ( sum(cm_tp) ./ sum(cm_fn.+cm_tp) ) ) - - #`no_avg` and `Vector` with class weights - vec_precision = MulticlassPrecision(return_type=Vector) - vec_recall = MulticlassRecall(return_type=Vector) - vec_f1score = MulticlassFScore(return_type=Vector) - - @test vec_precision(cm, class_w) ≈ vec_precision(ŷ, y, class_w) ≈ - mean(cm_prec .* [0; 1; 2]) - @test vec_recall(cm, class_w) ≈ vec_recall(ŷ, y, class_w) ≈ - mean(cm_rec .* [0; 1; 2]) - @test vec_f1score(cm, class_w) ≈ vec_f1score(ŷ, y, class_w) ≈ - mean(2 ./ ( 1 ./ cm_prec + 1 ./ cm_rec ) .* [0; 1; 2]) - - #`macro_avg` and `Vector` - v_ma_prec = MulticlassPrecision(average=macro_avg, - return_type=Vector) - v_ma_rec = MulticlassRecall(average=macro_avg, return_type=Vector) - v_ma_f1 = MulticlassFScore(average=macro_avg, return_type=Vector) - - @test v_ma_prec(cm) ≈ v_ma_prec(ŷ, y) ≈ mean(cm_prec) - @test v_ma_rec(cm) ≈ v_ma_rec(ŷ, y) ≈ mean(cm_rec) - @test v_ma_f1(cm) ≈ v_ma_f1(ŷ, y) ≈ mean(2 ./ ( 1 ./ cm_prec + 1 ./ cm_rec )) - - #`macro_avg` and `Vector` with class weights - @test v_ma_prec(cm, class_w) ≈ v_ma_prec(ŷ, y, class_w) ≈ - mean(cm_prec .* [0, 1, 2]) - @test v_ma_rec(cm, class_w) ≈ v_ma_rec(ŷ, y, class_w) ≈ - mean(cm_rec .* [0, 1, 2]) - @test v_ma_f1(cm, class_w) ≈ v_ma_f1(ŷ, y, class_w) ≈ - mean(2 ./ ( 1 ./ cm_prec + 1 ./ cm_rec ) .* [0, 1, 2]) - - #`micro_avg` and `Vector` - v_mi_prec = MulticlassPrecision(average=micro_avg, return_type=Vector) - v_mi_rec = MulticlassRecall(average=micro_avg, return_type=Vector) - v_mi_f1 = MulticlassFScore(average=micro_avg, return_type=Vector) - - @test v_mi_prec(cm) == v_mi_prec(ŷ, y) == sum(cm_tp) ./ sum(cm_fp.+cm_tp) - @test v_mi_rec(cm) == v_mi_rec(ŷ, y) == sum(cm_tp) ./ sum(cm_fn.+cm_tp) - @test v_mi_f1(cm) == v_mi_f1(ŷ, y) == - 2 ./ ( 1 ./ ( sum(cm_tp) ./ sum(cm_fp.+cm_tp) ) + 1 ./ ( sum(cm_tp) ./ sum(cm_fn.+cm_tp) ) ) -end - -@testset "issue #630" begin - # multiclass fscore corner case of absent class - - y = coerce([1, 2, 2, 2, 3], OrderedFactor)[1:4] - # [1, 2, 2, 2] # but 3 is in the pool - yhat = reverse(y) - # [2, 2, 2, 1] - - # In this case, assigning "3" as "positive" gives all true negative, - # and so NaN for that class's contribution to the average F1Score, - # which should accordingly be skipped. - - # postive class | TP | FP | FN | score for that class - # --------------|----|----|----|--------------------- - # 1 | 0 | 1 | 2 | 0 - # 2 | 2 | 1 | 1 | 2/3 - # 3 | 0 | 0 | 0 | NaN - - # mean score with skippin NaN is 1/3 - @test MulticlassFScore()(yhat, y) ≈ 1/3 -end - -@testset "Metadata binary" begin - for m in (accuracy, recall, Precision(), f1score, specificity) - e = info(m) - m == accuracy && (@test e.name == "Accuracy") - m == recall && (@test e.name == "TruePositiveRate") - m isa Precision && (@test e.name == "Precision") - m == f1score && (@test e.name == "FScore") - m == specificity && (@test e.name == "TrueNegativeRate") - @test e.target_scitype <: AbstractArray{<:Union{Missing,Finite}} - @test e.prediction_type == :deterministic - @test e.orientation == :score - @test e.reports_each_observation == false - @test e.is_feature_dependent == false - if m == accuracy - @test e.supports_weights - else - @test !e.supports_weights - end - end - e = info(auc) - @test e.name == "AreaUnderCurve" - @test e.target_scitype == - Union{AbstractArray{<:Union{Missing,Multiclass{2}}}, - AbstractArray{<:Union{Missing,OrderedFactor{2}}}} - @test e.prediction_type == :probabilistic - @test e.reports_each_observation == false - @test e.is_feature_dependent == false - @test e.supports_weights == false -end - -@testset "Metadata multiclass" begin - for m in (MulticlassRecall(), MulticlassPrecision(), - MulticlassFScore(), MulticlassTrueNegativeRate()) - e = info(m) - m isa MulticlassRecall && - (@test e.name == "MulticlassTruePositiveRate") - m isa MulticlassPrecision && - (@test e.name == "MulticlassPrecision") - m isa MulticlassFScore && - (@test e.name == "MulticlassFScore") - m isa MulticlassTrueNegativeRate && - (@test e.name == "MulticlassTrueNegativeRate") - @test e.target_scitype <: AbstractArray{<:Union{Missing,Finite}} - @test e.prediction_type == :deterministic - @test e.orientation == :score - @test e.reports_each_observation == false - @test e.is_feature_dependent == false - @test e.supports_weights == false - @test e.supports_class_weights == true - end -end - -@testset "More binary metrics" begin - y = coerce([missing, 1, 2, 1, 2, 1, 1, 2, 1, 2, 2, 2, 1, 2, - 2, 1, 2, 1, 1, 1, 2, 1, 2, 2, 1, 2, 1, - 2, 2, 2, 1], Union{Missing,OrderedFactor}) - ŷ = coerce([1, 1, 2, 2, 2, 2, 1, 2, 2, 1, 2, 2, 1, 2, - 1, 1, 1, 2, 2, 1, 2, 1, 2, 2, 2, 1, 2, - 1, 2, 2, missing], Union{Missing,OrderedFactor}) - - # check all constructors - m = TruePositive() - @test m(ŷ, y) == truepositive(ŷ, y) - m = TruePositive(rev=true) - @test m(ŷ, y) == truenegative(ŷ, y) - m = TrueNegative() - @test m(ŷ, y) == truenegative(ŷ, y) - m = FalsePositive() - @test m(ŷ, y) == falsepositive(ŷ, y) - m = FalseNegative() - @test m(ŷ, y) == falsenegative(ŷ, y) - m = TruePositiveRate() - @test m(ŷ, y) == tpr(ŷ, y) == truepositive_rate(ŷ, y) - m = TrueNegativeRate() - @test m(ŷ, y) == tnr(ŷ, y) == truenegative_rate(ŷ, y) - m = FalsePositiveRate() - @test m(ŷ, y) == fpr(ŷ, y) == falsepositive_rate(ŷ, y) - m = FalseNegativeRate() - @test m(ŷ, y) == fnr(ŷ, y) == falsenegative_rate(ŷ, y) - m = FalseDiscoveryRate() - @test m(ŷ, y) == fdr(ŷ, y) == falsediscovery_rate(ŷ, y) - m = Precision() - @test m(ŷ, y) == precision(ŷ, y) - m = NPV() - @test m(ŷ, y) == npv(ŷ, y) - m = FScore() - @test m(ŷ, y) == f1score(ŷ, y) - # check synonyms - m = TPR() - @test m(ŷ, y) == tpr(ŷ, y) - m = TNR() - @test m(ŷ, y) == tnr(ŷ, y) - m = FPR() - @test m(ŷ, y) == fpr(ŷ, y) == fallout(ŷ, y) - m = FNR() - @test m(ŷ, y) == fnr(ŷ, y) == miss_rate(ŷ, y) - m = FDR() - @test m(ŷ, y) == fdr(ŷ, y) - m = PPV() - @test m(ŷ, y) == precision(ŷ, y) == ppv(ŷ, y) - m = Recall() - @test m(ŷ, y) == tpr(ŷ, y) == recall(ŷ, y) == - sensitivity(ŷ, y) == hit_rate(ŷ, y) - m = Specificity() - @test m(ŷ, y) == tnr(ŷ, y) == specificity(ŷ, y) == selectivity(ŷ, y) - # 'higher order' - m = BACC() - @test m(ŷ, y) == bacc(ŷ, y) == (tpr(ŷ, y) + tnr(ŷ, y))/2 - - ### External comparisons - sk_prec = 0.6111111111111112 # m.precision_score(y, yhat, pos_label=2) - @test precision(ŷ, y) ≈ sk_prec - sk_rec = 0.6875 - @test recall(ŷ, y) == sk_rec # m.recall_score(y, yhat, pos_label=2) - sk_f05 = 0.625 - f05 = FScore(β=0.5) - @test f05(ŷ, y) ≈ sk_f05 # m.fbeta_score(y, yhat, 0.5, pos_label=2) - - # reversion mechanism - sk_prec_rev = 0.5454545454545454 - prec_rev = Precision(rev=true) - @test prec_rev(ŷ, y) ≈ sk_prec_rev - sk_rec_rev = 0.46153846153846156 - rec_rev = Recall(rev=true) - @test rec_rev(ŷ, y) ≈ sk_rec_rev -end - -@testset "More multiclass metrics" begin - y = coerce(categorical([missing, 1, 2, 0, 2, 1, 0, 0, 1, 2, 2, 2, 1, 2, - 2, 1, 0, 1, 1, 1, 2, 1, 2, 2, 1, 2, 1, - 2, 2, 2, 0]), Union{Missing,Multiclass}) - ŷ = coerce(categorical([0, 2, 0, 2, 2, 2, 0, 1, 2, 1, 2, 0, 1, 2, - 1, 1, 1, 2, 0, 1, 2, 1, 2, 2, 2, 1, 2, - 1, 2, 2, missing]), Union{Missing,Multiclass}) - w = Dict(0=>1, 1=>2, 2=>3) #class_w - # check all constructors - m = MulticlassTruePositive() - @test m(ŷ, y) == multiclass_truepositive(ŷ, y) - m = MulticlassTrueNegative() - @test m(ŷ, y) == multiclass_truenegative(ŷ, y) - m = MulticlassFalsePositive() - @test m(ŷ, y) == multiclass_falsepositive(ŷ, y) - m = MulticlassFalseNegative() - @test m(ŷ, y) == multiclass_falsenegative(ŷ, y) - m = MulticlassTruePositiveRate() - @test m(ŷ, y) == multiclass_tpr(ŷ, y) == - multiclass_truepositive_rate(ŷ, y) - @test m(ŷ, y, w) == multiclass_tpr(ŷ, y, w) == - multiclass_truepositive_rate(ŷ, y, w) - m = MulticlassTrueNegativeRate() - @test m(ŷ, y) == multiclass_tnr(ŷ, y) == - multiclass_truenegative_rate(ŷ, y) - @test m(ŷ, y, w) == multiclass_tnr(ŷ, y, w) == - multiclass_truenegative_rate(ŷ, y, w) - m = MulticlassFalsePositiveRate() - @test m(ŷ, y) == multiclass_fpr(ŷ, y) == - multiclass_falsepositive_rate(ŷ, y) - @test m(ŷ, y, w) == multiclass_fpr(ŷ, y, w) == - multiclass_falsepositive_rate(ŷ, y, w) - m = MulticlassFalseNegativeRate() - @test m(ŷ, y) == multiclass_fnr(ŷ, y) == - multiclass_falsenegative_rate(ŷ, y) - @test m(ŷ, y, w) == multiclass_fnr(ŷ, y, w) == - multiclass_falsenegative_rate(ŷ, y, w) - m = MulticlassFalseDiscoveryRate() - @test m(ŷ, y) == multiclass_fdr(ŷ, y) == - multiclass_falsediscovery_rate(ŷ, y) - @test m(ŷ, y, w) == multiclass_fdr(ŷ, y, w) == - multiclass_falsediscovery_rate(ŷ, y, w) - m = MulticlassPrecision() - @test m(ŷ, y) == multiclass_precision(ŷ, y) - @test m(ŷ, y, w) == multiclass_precision(ŷ, y, w) - m = MulticlassNegativePredictiveValue() - @test m(ŷ, y) == multiclass_npv(ŷ, y) - @test m(ŷ, y, w) == multiclass_npv(ŷ, y, w) - m = MulticlassFScore() - @test m(ŷ, y) == macro_f1score(ŷ, y) - @test m(ŷ, y, w) == macro_f1score(ŷ, y, w) - # check synonyms - m = MTPR(return_type=Vector) - @test m(ŷ, y) == multiclass_tpr(ŷ, y) - @test m(ŷ, y, w) == multiclass_tpr(ŷ, y, w) - m = MTNR(return_type=Vector) - @test m(ŷ, y) == multiclass_tnr(ŷ, y) - @test m(ŷ, y, w) == multiclass_tnr(ŷ, y, w) - m = MFPR() - @test m(ŷ, y) == multiclass_fpr(ŷ, y) == multiclass_fallout(ŷ, y) - @test m(ŷ, y, w) == multiclass_fpr(ŷ, y, w) == - multiclass_fallout(ŷ, y, w) - m = MFNR() - @test m(ŷ, y) == multiclass_fnr(ŷ, y) == - multiclass_miss_rate(ŷ, y) - @test m(ŷ, y, w) == multiclass_fnr(ŷ, y, w) == - multiclass_miss_rate(ŷ, y, w) - m = MFDR() - @test m(ŷ, y) == multiclass_fdr(ŷ, y) - @test m(ŷ, y, w) == multiclass_fdr(ŷ, y, w) - m = MPPV() - @test m(ŷ, y) == MulticlassPrecision()(ŷ, y) == - multiclass_ppv(ŷ, y) - @test m(ŷ, y, w) == MulticlassPrecision()(ŷ, y, w) == - multiclass_ppv(ŷ, y, w) - m = MulticlassRecall() - @test m(ŷ, y) == multiclass_tpr(ŷ, y) - @test m(ŷ, y, w) == multiclass_tpr(ŷ, y, w) - @test m(ŷ, y) == multiclass_sensitivity(ŷ, y) == - multiclass_hit_rate(ŷ, y) - @test m(ŷ, y, w) == multiclass_sensitivity(ŷ, y, w) == - multiclass_hit_rate(ŷ, y, w) - m = MulticlassSpecificity() - @test m(ŷ, y) == multiclass_tnr(ŷ, y) == multiclass_specificity(ŷ, y) == - multiclass_selectivity(ŷ, y) - @test m(ŷ, y, w) == multiclass_tnr(ŷ, y, w) == - multiclass_specificity(ŷ, y, w) == multiclass_selectivity(ŷ, y, w) -end - - -@testset "Additional multiclass tests" begin - table = reshape(collect("aabbbccccddbabccbacccd"), 11, 2) - table = coerce(table, Multiclass); - yhat = table[:,1] # ['a', 'a', 'b', 'b', 'b', 'c', 'c', 'c', 'c', 'd', 'd'] - y = table[:,2] # ['b', 'a', 'b', 'c', 'c', 'b', 'a', 'c', 'c', 'c', 'd'] - class_w = Dict('a'=>7, 'b'=>5, 'c'=>2, 'd'=> 0) - - # class | TP | FP | TP + FP | precision | FN | TP + FN | recall - # ------|----|----|------------------------------------|------- - # a | 1 | 1 | 2 | 1/2 | 1 | 2 | 1/2 - # b | 1 | 2 | 3 | 1/3 | 2 | 3 | 1/3 - # c | 2 | 2 | 4 | 1/2 | 3 | 5 | 2/5 - # d | 1 | 1 | 2 | 1/2 | 0 | 1 | 1 - - # helper: - inverse(x) = 1/x - harmonic_mean(x, y; beta=1.0) = - (1 + inverse(beta^2))*inverse(mean(inverse(beta^2*x)+ inverse(y))) - - # precision: - p_macro = mean([1/2, 1/3, 1/2, 1/2]) - @test MulticlassPrecision()(yhat, y) ≈ p_macro - p_macro_w = mean([7/2, 5/3, 2/2, 0/2]) - @test MulticlassPrecision()(yhat, y, class_w) ≈ p_macro_w - @test p_macro_w ≈ - @test_logs((:warn, r"Using macro"), - MulticlassPrecision(average=micro_avg)(yhat, y, class_w)) - p_micro = (1 + 1 + 2 + 1)/(2 + 3 + 4 + 2) - @test MulticlassPrecision(average=micro_avg)(yhat, y) ≈ p_micro - - # recall: - r_macro = mean([1/2, 1/3, 2/5, 1]) - @test MulticlassRecall(average=macro_avg)(yhat, y) ≈ r_macro - r_macro_w = mean([7/2, 5/3, 4/5, 0/1]) - @test MulticlassRecall(average=macro_avg)(yhat, y, class_w) ≈ r_macro_w - @test r_macro_w ≈ - @test_logs((:warn, r"Using macro"), - MulticlassRecall(average=micro_avg)(yhat, y, class_w)) - r_micro = (1 + 1 + 2 + 1)/(2 + 3 + 5 + 1) - @test MulticlassPrecision(average=micro_avg)(yhat, y) ≈ r_micro - - # fscore: - harm_means = [harmonic_mean(1/2, 1/2), - harmonic_mean(1/3, 1/3), - harmonic_mean(1/2, 2/5), - harmonic_mean(1/2, 1)] - f1_macro = mean(harm_means) - @test MulticlassFScore(average=macro_avg)(yhat, y) ≈ f1_macro - @test MulticlassFScore(average=no_avg, - return_type=Vector)(yhat, y, class_w) ≈ - [7, 5, 2, 0] .* harm_means - f1_macro_w = mean([7, 5, 2, 0] .* harm_means) - @test MulticlassFScore(average=macro_avg)(yhat, y, class_w) ≈ f1_macro_w - @test f1_macro_w ≈ - @test_logs((:warn, r"Using macro"), - MulticlassFScore(average=micro_avg)(yhat, y, class_w)) - f1_micro = harmonic_mean(p_micro, r_micro) - @test MulticlassFScore(average=micro_avg)(yhat, y) ≈ f1_micro - - # fscore, β=1/3: - harm_means = [harmonic_mean(1/2, 1/2, beta=1/3), - harmonic_mean(1/3, 1/3, beta=1/3), - harmonic_mean(1/2, 2/5, beta=1/3), - harmonic_mean(1/2, 1, beta=1/3)] - f1_macro = mean(harm_means) - @test MulticlassFScore(β=1/3, average=macro_avg)(yhat, y) ≈ f1_macro - @test MulticlassFScore(β=1/3, - average=no_avg, - return_type=Vector)(yhat, y, class_w) ≈ - [7, 5, 2, 0] .* harm_means - f1_macro_w = mean([7, 5, 2, 0] .* harm_means) - @test MulticlassFScore(β=1/3, - average=macro_avg)(yhat, y, class_w) ≈ f1_macro_w - @test f1_macro_w ≈ - @test_logs((:warn, r"Using macro"), - MulticlassFScore(β=1/3, - average=micro_avg)(yhat, y, class_w)) - f1_micro = harmonic_mean(p_micro, r_micro, beta=1/3) - @test MulticlassFScore(β=1/3, average=micro_avg)(yhat, y) ≈ f1_micro -end - -@testset "docstrings coverage" begin - @test startswith(info(BrierScore()).docstring, "`BrierScore`") -end diff --git a/test/measures/loss_functions_interface.jl b/test/measures/loss_functions_interface.jl deleted file mode 100644 index 8c59945b..00000000 --- a/test/measures/loss_functions_interface.jl +++ /dev/null @@ -1,68 +0,0 @@ -rng = StableRNG(614) - -# convert a Binary vector into vector of +1 or -1 values -# (for testing only): -pm1(y) = Int8(2) .* (Int8.(MLJBase.int(y))) .- Int8(3) - -const MARGIN_LOSSES = MLJBase.MARGIN_LOSSES -const DISTANCE_LOSSES = MLJBase.DISTANCE_LOSSES - -# using `WeightedSum` instead of `WeightedMean`; see -# https://github.com/JuliaML/LossFunctions.jl/issues/149 -WeightedSum(w) = LossFunctions.AggMode.WeightedMean(w, normalize=false) - -@testset "naked" begin - @test MLJBase.naked(MLJBase.LossFunctions.PeriodicLoss{Float64}) == - :PeriodicLoss - @test MLJBase.naked(MLJBase.LossFunctions.PeriodicLoss) == - :PeriodicLoss -end - -@testset "LossFunctions.jl - binary" begin - y = categorical(["yes", "yes", "no", "yes"]) - yes, no = y[1], y[3] - dyes = MLJBase.UnivariateFinite([yes, no], [0.6, 0.4]) - dno = MLJBase.UnivariateFinite([yes, no], [0.3, 0.7]) - yhat = [dno, dno, dyes, dyes] - w = [1, 2, 3, 4] - - @test MLJBase.ZeroOneLoss()(yhat, y) ≈ [1, 1, 1, 0] - @test MLJBase.zero_one_loss(yhat,y, w) ≈ [1, 2, 3, 0] - - N = 10 - y = categorical(rand(rng, ["yes", "no"], N), ordered=true) - levels!(y, ["no", "yes"]) - no, yes = MLJBase.classes(y[1]) - @test pm1([yes, no]) in [[+1, -1], [-1, +1]] - ym = pm1(y) # observations for raw LossFunctions measure - p_vec = rand(N) - yhat = MLJBase.UnivariateFinite([no, yes], p_vec, augment=true) - yhatm = MLJBase._scale.(p_vec) # predictions for raw LossFunctions measure - w = rand(rng, N) - - for M_ex in MARGIN_LOSSES - m = eval(:(MLJBase.$M_ex())) - @test m(yhat, y) ≈ (getfield(m, :loss)).(yhatm, ym) - @test m(yhat, y, w) ≈ - w .* (getfield(m, :loss)).(yhatm, ym) - end -end - -@testset "LossFunctions.jl - continuous" begin - # losses for continuous targets: - N = 10 - y = randn(rng, N) - yhat = randn(rng, N) - X = nothing - w = rand(rng, N) - - for M_ex in DISTANCE_LOSSES - m = eval(:(MLJBase.$M_ex())) - m_ex = MLJBase.snakecase(M_ex) - @test m == eval(:(MLJBase.$m_ex)) - @test m(yhat, y) ≈ - (getfield(m, :loss)).(yhat, y) - @test m(yhat ,y, w) ≈ - w .* (getfield(m, :loss)).(yhat, y) - end -end diff --git a/test/measures/measure_search.jl b/test/measures/measure_search.jl deleted file mode 100644 index f8aa5e4d..00000000 --- a/test/measures/measure_search.jl +++ /dev/null @@ -1,42 +0,0 @@ -ms = map(measures()) do m - m.name -end -@test "LogLoss" in ms -@test "RootMeanSquaredError" in ms - -# test `M()` makes sense for all measure types `M` extracted from `name`, -@test all(Symbol.(ms)) do ex - try - eval(:($ex())) - true - catch - false - end -end - -S = AbstractVector{Union{Missing,Multiclass{3}}} -task(m) = S <: m.target_scitype - -ms = map(measures(task)) do m - m.name -end - -@test "LogLoss" in ms -@test !("RootMeanSquaredError" in ms) - -task(m) = AbstractVector{Continuous} <: m.target_scitype - -ms = map(measures(task)) do m - m.name -end - -@test !("Accuracy" in ms) -@test "RootMeanSquaredError" in ms - -ms = map(measures("Brier")) do m - m.name -end - -@test Set(ms) == Set(["BrierLoss", "BrierScore"]) - -true diff --git a/test/measures/measures.jl b/test/measures/measures.jl deleted file mode 100644 index 602c3e78..00000000 --- a/test/measures/measures.jl +++ /dev/null @@ -1,134 +0,0 @@ -module TestMeasures - -using MLJBase, Test -import Distributions -using CategoricalArrays -using Statistics -import LossFunctions -using StableRNGs -using OrderedCollections: LittleDict - -rng = StableRNGs.StableRNG(123) - -@testset "aggregation" begin - v = rand(5) - @test aggregate(v, mae) ≈ mean(v) - @test aggregate(v, TruePositive()) ≈ sum(v) - @test aggregate(v, rms) ≈ sqrt(mean(v.^2)) - λ = rand() - @test aggregate(λ, rms) === λ - @test aggregate(aggregate(v, l2), l2) == aggregate(v, l2) - m = LittleDict([0, 1, 2, 3, 4], v) - @test aggregate(m, MTPR()) == mean(v) -end - -@testset "metadata" begin - measures() - measures(m -> m.target_scitype <: AbstractVector{<:Finite} && - m.supports_weights) - info(rms) - @test true -end - -@testset "coverage" begin - # just checking that the traits work not that they're correct - @test orientation(BrierScore()) == :score - @test orientation(auc) == :score - @test orientation(rms) == :loss - - @test reports_each_observation(auc) == false - @test is_feature_dependent(auc) == false - - @test MLJBase.distribution_type(auc) == MLJBase.UnivariateFinite -end - -@testset "MLJBase.value" begin - yhat = randn(rng,5) - X = (weight=randn(rng,5), x1 = randn(rng,5)) - y = randn(rng,5) - w = randn(rng,5) - - @test MLJBase.value(mae, yhat, nothing, y, nothing) ≈ mae(yhat, y) - @test MLJBase.value(mae, yhat, nothing, y, w) ≈ mae(yhat, y, w) - - spooky(yhat, y) = abs.(yhat - y) |> mean - @test MLJBase.value(spooky, yhat, nothing, y, nothing) ≈ mae(yhat, y) - - cool(yhat, y, w) = abs.(yhat - y) .* w |> mean - MLJBase.supports_weights(::Type{typeof(cool)}) = true - @test MLJBase.value(cool, yhat, nothing, y, w) ≈ mae(yhat, y, w) - - funky(yhat, X, y) = X.weight .* abs.(yhat - y) |> mean - MLJBase.is_feature_dependent(::Type{typeof(funky)}) = true - @test MLJBase.value(funky, yhat, X, y, nothing) ≈ mae(yhat, y, X.weight) - - weird(yhat, X, y, w) = w .* X.weight .* abs.(yhat - y) |> mean - MLJBase.is_feature_dependent(::Type{typeof(weird)}) = true - MLJBase.supports_weights(::Type{typeof(weird)}) = true - @test MLJBase.value(weird, yhat, X, y, w) ≈ mae(yhat, y, X.weight .* w) -end - -mutable struct DRegressor <: Deterministic end -MLJBase.target_scitype(::Type{<:DRegressor}) = - AbstractVector{<:Continuous} - -mutable struct D2Regressor <: Deterministic end -MLJBase.target_scitype(::Type{<:D2Regressor}) = - AbstractVector{Continuous} - -mutable struct DClassifier <: Deterministic end -MLJBase.target_scitype(::Type{<:DClassifier}) = - AbstractVector{<:Finite} - -mutable struct PClassifier <: Probabilistic end -MLJBase.target_scitype(::Type{<:PClassifier}) = - AbstractVector{<:Finite} - -mutable struct PRegressor <: Probabilistic end -MLJBase.target_scitype(::Type{<:PRegressor}) = - AbstractVector{<:Continuous} - -mutable struct PCountRegressor <: Probabilistic end -MLJBase.target_scitype(::Type{<:PCountRegressor}) = - AbstractVector{<:Count} - -@testset "default_measure" begin - @test MLJBase.default_measure(DRegressor()) == rms - @test MLJBase.default_measure(D2Regressor()) == rms - @test MLJBase.default_measure(DClassifier()) == misclassification_rate - @test MLJBase.default_measure(PClassifier()) == log_loss - - @test MLJBase.default_measure(DRegressor) == rms - @test MLJBase.default_measure(D2Regressor) == rms - @test MLJBase.default_measure(DClassifier) == misclassification_rate - @test MLJBase.default_measure(PClassifier) == log_loss - - @test MLJBase.default_measure(PRegressor) == log_loss - @test MLJBase.default_measure(PCountRegressor) == log_loss -end - -include("confusion_matrix.jl") -include("roc.jl") -include("continuous.jl") -include("finite.jl") -include("probabilistic.jl") -include("loss_functions_interface.jl") - -@testset "show method for measures" begin - io = IOBuffer() - for meta in measures() - m = eval(Meta.parse("$(meta.name)()")) - show(io, MIME("text/plain"), m) - show(io, m) - end -end - -@testset "missing and NaN values in aggregation" begin - v =[1, 2, missing, 5, NaN] - @test MLJBase.Sum()(v) == 8 - @test MLJBase.RootMeanSquare()(v) ≈ sqrt((1 + 4 + 25)/3) - @test MLJBase.Mean()(Union{Missing,Float32}[]) |> isnan -end - -end -true diff --git a/test/measures/probabilistic.jl b/test/measures/probabilistic.jl deleted file mode 100644 index 733c0d20..00000000 --- a/test/measures/probabilistic.jl +++ /dev/null @@ -1,174 +0,0 @@ -rng = StableRNG(51803) -using LinearAlgebra - -const Vec = AbstractVector - -@testset "AUC" begin - # this is random binary and random scores generated with numpy - # then using roc_auc_score from sklearn to get the AUC - # we check that we recover a comparable AUC and that it's invariant - # to ordering. - c = ["neg", "pos"] - y = categorical(c[[0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, - 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, - 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, - 1, 0] .+ 1]) - probs = [ - 0.90237535, 0.41276349, 0.94511611, 0.08390761, 0.55847392, - 0.26043136, 0.78565351, 0.20133953, 0.7404382 , 0.15307601, - 0.59596716, 0.8169512 , 0.88200483, 0.23321489, 0.94050483, - 0.27593662, 0.60702176, 0.36427036, 0.35481784, 0.06416543, - 0.45576954, 0.12354048, 0.79830435, 0.15799818, 0.20981099, - 0.43451663, 0.24020098, 0.11401055, 0.25785748, 0.86490263, - 0.75715379, 0.06550534, 0.12628999, 0.18878245, 0.1283757 , - 0.76542903, 0.8780248 , 0.86891113, 0.24835709, 0.06528076, - 0.72061354, 0.89451634, 0.95634394, 0.07555979, 0.16345437, - 0.43498831, 0.37774708, 0.31608861, 0.41369339, 0.95691113] - - ŷ = UnivariateFinite(y[1:2], probs, augment=true) - # ŷ = [UnivariateFinite(y[1:2], [1.0 - p, p]) for p in [ - # 0.90237535, 0.41276349, 0.94511611, 0.08390761, 0.55847392, - # 0.26043136, 0.78565351, 0.20133953, 0.7404382 , 0.15307601, - # 0.59596716, 0.8169512 , 0.88200483, 0.23321489, 0.94050483, - # 0.27593662, 0.60702176, 0.36427036, 0.35481784, 0.06416543, - # 0.45576954, 0.12354048, 0.79830435, 0.15799818, 0.20981099, - # 0.43451663, 0.24020098, 0.11401055, 0.25785748, 0.86490263, - # 0.75715379, 0.06550534, 0.12628999, 0.18878245, 0.1283757 , - # 0.76542903, 0.8780248 , 0.86891113, 0.24835709, 0.06528076, - # 0.72061354, 0.89451634, 0.95634394, 0.07555979, 0.16345437, - # 0.43498831, 0.37774708, 0.31608861, 0.41369339, 0.95691113]] - @test isapprox(auc(ŷ, y), 0.455716, rtol=1e-4) - ŷ_unwrapped = [ŷ...] - @test isapprox(auc(ŷ_unwrapped, y), 0.455716, rtol=1e-4) - - # reversing the roles of positive and negative should return very - # similar score - y2 = deepcopy(y); - levels!(y2, reverse(levels(y2))); - @test y == y2 - @test levels(y) != levels(y2) - ŷ2 = UnivariateFinite(y2[1:2], probs, augment=true) # same probs - @test isapprox(auc(ŷ2, y2), auc(ŷ, y), rtol=1e-4) - - # The auc algorithm should be able to handle the case where two or more - # samples in the prediction vector has the same UnivariateFinite distribution - # We check this by comparing our auc with that gotten from roc_auc_score from sklearn. - y = categorical(["class_1","class_1","class_0","class_0","class_1","class_1","class_0"]) - ŷ = UnivariateFinite(levels(y), [0.8,0.7,0.5,0.5,0.5,0.5,0.3], augment=true, pool=y) - # We can see that ŷ[3] ≈ ŷ[4] ≈ ŷ[5] ≈ ŷ[6] - @test isapprox(auc(ŷ, y), 0.8333333333333334, rtol=1e-16) -end - -@testset "Log, Brier, Spherical - finite case" begin - y = categorical(collect("abb")) - L = [y[1], y[2]] - d1 = UnivariateFinite(L, [0.1, 0.9]) # a - d2 = UnivariateFinite(L, Float32[0.4, 0.6]) # b - d3 = UnivariateFinite(L, [0.2, 0.8]) # b - yhat = [d1, d2, d3] - ym = vcat(y, [missing,]) - yhatm = vcat(yhat, [d3, ]) - - @test mean(log_loss(yhat, y)) ≈ - Float32(-(log(0.1) + log(0.6) + log(0.8))/3) - @test mean(skipmissing(log_loss(yhatm, ym))) ≈ - Float32(-(log(0.1) + log(0.6) + log(0.8))/3) - yhat = UnivariateFinite(L, [0.1 0.9; - 0.4 0.6; - 0.2 0.8]) - @test isapprox(mean(log_loss(yhat, y)), - -(log(0.1) + log(0.6) + log(0.8))/3, atol=eps(Float32)) - - @test log_score(yhat, y) ≈ -log_loss(yhat, y) - - # sklearn test - # >>> from sklearn.metrics import log_loss - # >>> log_loss(["spam", "ham", "ham", "spam","ham","ham"], - # [[.1, .9], [.9, .1], [.8, .2], [.35, .65], [0.2, 0.8], [0.3,0.7]]) - # 0.6130097025803921 - y2 = categorical(["spam", "ham", "ham", "spam", "ham", "ham"]) - L2 = classes(y2[1]) - probs = vcat([.1 .9], [.9 .1], [.8 .2], [.35 .65], [0.2 0.8], [0.3 0.7]) - yhat2 = UnivariateFinite(L2, probs) - y2m = vcat(y2, [missing,]) - yhat2m = UnivariateFinite(L2, vcat(probs, [0.1 0.9])) - @test mean(log_loss(yhat2, y2)) ≈ 0.6130097025803921 - @test mean(skipmissing(log_loss(yhat2, y2))) ≈ 0.6130097025803921 - - ## Brier - scores = BrierScore()(yhat, y) - @test size(scores) == size(y) - @test Float32.(scores) ≈ [-1.62, -0.32, -0.08] - scoresm = BrierScore()(yhatm, ym) - @test Float32.((scoresm)[1:3]) ≈ [-1.62, -0.32, -0.08] - @test ismissing(scoresm[end]) - # test specialized broadcasting on brierloss - @test BrierLoss()(yhat, y) == -BrierScore()(yhat, y) - # sklearn test - # >>> from sklearn.metrics import brier_score_loss - # >>> brier_score_loss([1, 0, 0, 1, 0, 0], [.9, .1, .2, .65, 0.8, 0.7]) - # 0.21875 NOTE: opposite orientation - @test -mean(BrierScore()(yhat2, y2)) / 2 ≈ 0.21875 - probs2 = [[.1, .9], [Float32(0.9), Float32(1) - Float32(0.9)], [.8, .2], - [.35, .65], [0.2, 0.8], [0.3, 0.7]] - yhat3 = [UnivariateFinite(L2, prob) for prob in probs2] - @test -mean(BrierScore()(yhat3, y2) / 2) ≈ 0.21875 - @test mean(BrierLoss()(yhat3, y2) / 2) ≈ -mean(BrierScore()(yhat3, y2) / 2) - - # Spherical - s = SphericalScore() # SphericalScore(2) - norms = [norm(probs[i,:]) for i in 1:size(probs, 1)] - @test (pdf.(yhat2, y2) ./ norms) ≈ s(yhat2, y2) - # non-performant version: - yhat4 = [yhat2...] - @test (pdf.(yhat2, y2) ./ norms) ≈ s(yhat4, y2) -end - -@testset "LogScore, BrierScore, SphericalScore - infinite case" begin - uniform = Distributions.Uniform(2, 5) - betaprime = Distributions.BetaPrime() - discrete_uniform = Distributions.DiscreteUniform(2, 5) - w = [2, 3] - - # brier - yhat = [missing, uniform] - @test isapprox(brier_score(yhat, [1.0, 1.0]) |> last, -1/3) - @test isapprox(brier_score(yhat, [NaN, 4.0]) |> last, 1/3) - @test isapprox(brier_score(yhat, [1.0, 1.0], w) |> last, -1) - yhat = [missing, uniform] - # issue https://github.com/JuliaStats/Distributions.jl/issues/1392 - @test_broken isapprox(brier_score(yhat, [missing, 4.0], w), [1,]) - yhat = [discrete_uniform, discrete_uniform] - @test isapprox(brier_score(yhat, [NaN, 1.0]), [-1/4, -1/4,]) - @test isapprox(brier_score(yhat, [4.0, 4.0]), [1/4, 1/4,]) - - # spherical - yhat = [uniform, uniform] - @test isapprox(spherical_score(yhat, [1.0, 1.0]), [0, 0]) - @test isapprox(spherical_score(yhat, [NaN, 4.0]), [0, 1/sqrt(3),]) - # issue https://github.com/JuliaStats/Distributions.jl/issues/1392 - @test_broken isapprox(spherical_score(yhat, [missing, 4.0], w), [sqrt(3),]) - @test isapprox(spherical_score(yhat, [4.0, 4.0], w), [2/sqrt(3), sqrt(3),]) - yhat = [discrete_uniform, discrete_uniform] - @test isapprox(spherical_score(yhat, [NaN, 1.0]), [0, 0]) - @test isapprox(spherical_score(yhat, [4.0, 4.0]), [1/2, 1/2]) - - # log - yhat = [uniform, uniform] - @test isapprox(log_score(yhat, [4.0, 4.0]), [-log(3), -log(3),]) - @test isapprox(log_score(yhat, [4.0, 4.0], w), [-2*log(27)/3, -log(27)]) - yhat = [discrete_uniform, discrete_uniform] - # issue https://github.com/JuliaStats/Distributions.jl/issues/1392 - @test_broken isapprox(log_score(yhat, [missing, 4.0]), [-log(4),]) - - log_score([missing, uniform], [4.0, 4.0]) - - # errors - @test_throws(MLJBase.err_l2_norm(brier_score), - brier_score([betaprime, betaprime], [1.0, 1.0])) - s = SphericalScore(alpha=1) - @test_throws MLJBase.ERR_UNSUPPORTED_ALPHA s(yhat, [1.0, 1.0]) -end - -true diff --git a/test/measures/roc.jl b/test/measures/roc.jl deleted file mode 100644 index aaaed8b7..00000000 --- a/test/measures/roc.jl +++ /dev/null @@ -1,13 +0,0 @@ -@testset "ROC" begin - y = [ 0 0 0 1 0 1 1 0] |> vec |> categorical - s = [0.0 0.1 0.1 0.1 0.2 0.2 0.5 0.5] |> vec - ŷ = UnivariateFinite([0, 1], s, augment=true, pool=y) - - fprs, tprs, ts = roc(ŷ, y) - - sk_fprs = [0. , 0.2, 0.4, 0.8, 1. ] - sk_tprs = [0. , 0.33333333, 0.66666667, 1., 1.] - - @test fprs ≈ sk_fprs - @test tprs ≈ sk_tprs -end diff --git a/test/preliminaries.jl b/test/preliminaries.jl index b806a840..bffc1f4e 100644 --- a/test/preliminaries.jl +++ b/test/preliminaries.jl @@ -12,12 +12,8 @@ using Distributed addprocs(; exeflags="--project=$(Base.active_project())") @info "nprocs() = $(nprocs())" -@static if VERSION >= v"1.3.0-DEV.573" - import .Threads - @info "nthreads() = $(Threads.nthreads())" -else - @info "Running julia $(VERSION). Multithreading tests excluded. " -end +import .Threads +@info "nthreads() = $(Threads.nthreads())" @everywhere begin using MLJModelInterface @@ -27,6 +23,7 @@ end using Logging using ComputationalResources using StableRNGs + using StatisticalMeasures end import TypedTables diff --git a/test/resampling.jl b/test/resampling.jl index c170039a..ef565f72 100644 --- a/test/resampling.jl +++ b/test/resampling.jl @@ -5,6 +5,9 @@ import ComputationalResources: CPU1, CPUProcesses, CPUThreads using .TestUtilities using ProgressMeter import Tables +@everywhere import StatisticalMeasures.StatisticalMeasuresBase as API +using StatisticalMeasures +import LearnAPI @everywhere begin using .Models @@ -25,13 +28,18 @@ struct DummyInterval <: Interval end dummy_interval=DummyInterval() dummy_measure_det(yhat, y) = 42 -MLJBase.target_scitype(::typeof(dummy_measure_det)) = Table(MLJBase.Textual) -MLJBase.prediction_type(::typeof(dummy_measure_det)) = :deterministic - -dummy_measure_interval(yhat, y) = [123, 456] -MLJBase.target_scitype(::typeof(dummy_measure_interval)) = - Table(MLJBase.Textual) -MLJBase.prediction_type(::typeof(dummy_measure_interval)) = :interval +API.@trait( + typeof(dummy_measure_det), + observation_scitype = MLJBase.Textual, + kind_of_proxy = LearnAPI.LiteralTarget(), +) + +dummy_measure_interval(yhat, y) = 42 +API.@trait( + typeof(dummy_measure_interval), + observation_scitype = MLJBase.Textual, + kind_of_proxy = LearnAPI.ConfidenceInterval(), +) @testset "_actual_operations" begin clf = ConstantClassifier() @@ -49,7 +57,7 @@ MLJBase.prediction_type(::typeof(dummy_measure_interval)) = :interval 1) == [predict_mean, predict_mean] - # handling of a measure with `:unknown` `prediction_type` (eg, + # handling of a measure with `nothing` `kind_of_proxy` (eg, # custom measure): my_mae(yhat, y) = abs.(yhat - y) @test( @@ -71,21 +79,29 @@ MLJBase.prediction_type(::typeof(dummy_measure_interval)) = :interval [predict_mode]) @test MLJBase._actual_operations(nothing, [l2,], rgs, 1) == [predict_mean, ] - @test_throws(MLJBase.err_incompatible_prediction_types(clf_det, LogLoss()), - MLJBase._actual_operations(nothing, [LogLoss(),], clf_det, 1)) + @test_throws( + MLJBase.err_incompatible_prediction_types(clf_det, LogLoss()), + MLJBase._actual_operations(nothing, [LogLoss(),], clf_det, 1), + ) @test MLJBase._actual_operations(nothing, measures_det, clf_det, 1) == [predict, predict] - # measure/model differ in prediction type but weird target_scitype: + # measure/model differ in prediction type: @test_throws( MLJBase.err_ambiguous_operation(clf, dummy_measure_det), - MLJBase._actual_operations(nothing, [dummy_measure_det, ], clf, 1)) + MLJBase._actual_operations(nothing, [dummy_measure_det, ], clf, 1), + ) # measure has :interval prediction type but model does not (2 cases): @test_throws( MLJBase.err_ambiguous_operation(clf, dummy_measure_interval), - MLJBase._actual_operations(nothing, - [dummy_measure_interval, ], clf, 1)) + MLJBase._actual_operations( + nothing, + [dummy_measure_interval, ], + clf, + 1, + ), + ) @test_throws( MLJBase.err_ambiguous_operation(clf_det, dummy_measure_interval), MLJBase._actual_operations(nothing, @@ -103,16 +119,6 @@ MLJBase.prediction_type(::typeof(dummy_measure_interval)) = :interval [LogLoss(), ], dummy_interval, 1)) end -@testset "_feature_dependencies_exist" begin - measures = Any[rms, rsq, log_loss, brier_score] - @test !MLJBase._feature_dependencies_exist(measures) - my_feature_dependent_loss(ŷ, X, y) = - sum(abs.(ŷ - y) .* X.penalty)/sum(X.penalty); - MLJBase.is_feature_dependent(::typeof(my_feature_dependent_loss)) = true - push!(measures, my_feature_dependent_loss) - @test MLJBase._feature_dependencies_exist(measures) -end - @testset_accelerated "dispatch of resources and progress meter" accel begin @info "Checking progress bars:" @@ -175,34 +181,50 @@ end y = rand(rng,4) # model prediction type is Probablistic but measure is Deterministic: - @test_throws(ArgumentError, - MLJBase._check_measure(rms, predict, model, y)) + @test_throws( + MLJBase.ERR_MEASURES_PROBABILISTIC(rms, MLJBase.LOG_SUGGESTION2), + MLJBase._check_measure(rms, predict, model, y), + ) @test MLJBase._check_measure(rms, predict_mean, model, y) @test MLJBase._check_measure(rms, predict_median, model, y) - # has `y` `Finite` elscityp but measure `rms` is for `Continuous`: + # has `y` `Finite` elscitype but measure `rms` is for `Continuous`: y=categorical(collect("abc")) - @test_throws(ArgumentError, - MLJBase._check_measure(rms, predict_median, model, y)) + @test_throws( + MLJBase.ERR_MEASURES_OBSERVATION_SCITYPE( + rms, + Union{Missing,Infinite}, + Multiclass{3}, + ), + MLJBase._check_measure(rms, predict_median, model, y), + ) model = ConstantClassifier() # model prediction type is Probablistic but measure is Deterministic: - @test_throws(ArgumentError, - MLJBase._check_measure(mcr, predict, model, y)) + @test_throws( + MLJBase.ERR_MEASURES_PROBABILISTIC(mcr, MLJBase.LOG_SUGGESTION1), + MLJBase._check_measure(mcr, predict, model, y), + ) @test MLJBase._check_measure(mcr, predict_mode, model, y) # `Determistic` model but `Probablistic` measure: model = DeterministicConstantClassifier() - @test_throws(ArgumentError, - MLJBase._check_measure(cross_entropy, predict, model, y)) + @test_throws( + MLJBase.ERR_MEASURES_DETERMINISTIC(cross_entropy), + MLJBase._check_measure(cross_entropy, predict, model, y), + ) # measure with wrong target_scitype: - @test_throws(ArgumentError, - MLJBase._check_measures([brier_score, rms], - [predict_mode, predict_mean], - model, y)) + @test_throws( + MLJBase.ERR_MEASURES_DETERMINISTIC(brier_score), + MLJBase._check_measures( + [brier_score, rms], + [predict_mode, predict_mean], + model, y, + ), + ) model = ConstantClassifier() @test MLJBase._check_measures([brier_score, cross_entropy, accuracy], @@ -211,8 +233,6 @@ end end @testset "check weights" begin - @test_throws(MLJBase.ERR_WEIGHTS_REAL, - MLJBase._check_weights([:junk, :junk], 2)) @test_throws(MLJBase.ERR_WEIGHTS_LENGTH, MLJBase._check_weights([0.5, 0.5], 3)) @test MLJBase._check_weights([0.5, 0.5], 2) @@ -227,18 +247,18 @@ end @test MLJBase._check_class_weights(w, ['b', 'a']) end +@everywhere begin + user_rms(yhat, y) = mean((yhat -y).^2) |> sqrt + # deliberately omitting `consumes_multiple_observations` trait: + API.@trait typeof(user_rms) kind_of_proxy=LearnAPI.LiteralTarget() +end + @testset_accelerated "folds specified" accel begin x1 = ones(10) x2 = ones(10) X = (x1=x1, x2=x2) y = [1.0, 1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0] - my_rms(yhat, y) = sqrt(mean((yhat -y).^2)) - my_mae(yhat, y) = abs.(yhat - y) - MLJBase.reports_each_observation(::typeof(my_mae)) = true - MLJBase.prediction_type(::typeof(my_rms)) = :deterministic - MLJBase.prediction_type(::typeof(my_mae)) = :deterministic - resampling = [(3:10, 1:2), ([1, 2, 5, 6, 7, 8, 9, 10], 3:4), ([1, 2, 3, 4, 7, 8, 9, 10], 5:6), @@ -251,19 +271,27 @@ end mach = machine(model, X, y, cache=cache) # check detection of incompatible measure (cross_entropy): - @test_throws ArgumentError evaluate!(mach, resampling=resampling, - measure=[cross_entropy, rmslp1], - verbosity=verb, - acceleration=accel) + @test_throws( + MLJBase.err_incompatible_prediction_types(model, cross_entropy), + evaluate!( + mach, + resampling=resampling, + measure=[cross_entropy, rmslp1], + verbosity=verb, + acceleration=accel, + ), + ) result = evaluate!(mach, resampling=resampling, verbosity=verb, - measure=[my_rms, my_mae, rmslp1], acceleration=accel) + measure=[user_rms, mae, rmslp1], acceleration=accel) v = [1/2, 3/4, 1/2, 3/4, 1/2] @test result.per_fold[1] ≈ v @test result.per_fold[2] ≈ v @test result.per_fold[3][1] ≈ abs(log(2) - log(2.5)) - @test ismissing(result.per_observation[1]) + @test result.per_observation[1] ≈ map(result.per_fold[1]) do μ + fill(μ, 2) + end @test result.per_observation[2][1] ≈ [1/2, 1/2] @test result.per_observation[2][2] ≈ [3/4, 3/4] @test result.measurement[1] ≈ mean(v) @@ -454,7 +482,7 @@ end d for fold in folds]) end -@testset_accelerated "sample weights in evaluation" accel begin +@testset_accelerated "weights in evaluation" accel begin # cv: x1 = ones(4) x2 = ones(4) @@ -637,13 +665,6 @@ end measure=misclassification_rate, weights = fill(1, 100), acceleration=accel, verbosity=verb)) - - @test_throws(ArgumentError, - evaluate!(mach, resampling=Holdout(fraction_train=0.6), - operation=predict_mode, - measure=misclassification_rate, - weights = fill('a', 5), acceleration=accel, - verbosity=verb)) end # resampling on a subset of all rows: @@ -813,7 +834,7 @@ end operation=predict_mode, measure=ConfusionMatrix(), resampling=CV(), - ) + ); printed_evaluations = sprint(show, "text/plain", evaluations) @test contains(printed_evaluations, "N/A") end diff --git a/test/runtests.jl b/test/runtests.jl index 8b07929e..9db2dbbd 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -26,9 +26,7 @@ end end @conditional_testset "measures" begin - @test include("measures/measures.jl") - @test include("measures/measure_search.jl") - @test include("measures/doc_strings.jl") + @test include("measures.jl") end @conditional_testset "resampling" begin diff --git a/test/utilities.jl b/test/utilities.jl index f9e40580..4ec731b5 100644 --- a/test/utilities.jl +++ b/test/utilities.jl @@ -171,5 +171,17 @@ end "sin, cos, tan, ..." end +@testset "guess_observation_scitype" begin + @test MLJBase.guess_observation_scitype([missing, 1, 2, 3]) == + Union{Missing, Count} + @test MLJBase.guess_observation_scitype(rand(3, 2)) == + AbstractVector{Continuous} + @test MLJBase.guess_observation_scitype((x=rand(3), y=rand(Bool, 3))) == + AbstractVector{Union{Continuous, Count}} + @test MLJBase.guess_observation_scitype((x=[missing, 1, 2], y=[1, 2, 3])) == + Unknown + @test MLJBase.guess_observation_scitype(5) == Unknown +end + end # module true From 1a4e5e1a8f567f47f73c76942daa07b8e2fc6fde Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Thu, 25 May 2023 14:12:10 +1200 Subject: [PATCH 02/13] remove loss functions --- Project.toml | 2 -- 1 file changed, 2 deletions(-) diff --git a/Project.toml b/Project.toml index 3b5c52f0..d0be5d1e 100644 --- a/Project.toml +++ b/Project.toml @@ -15,7 +15,6 @@ InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" InvertedIndices = "41ab1584-1d38-5bbf-9106-f11c6c58b48f" LearnAPI = "92ad9a40-7767-427a-9ee6-6e577f1266cb" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" -LossFunctions = "30fc2ffe-d236-52d8-8643-a9d8f7c094a7" MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea" Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" @@ -38,7 +37,6 @@ CategoricalDistributions = "0.1" ComputationalResources = "0.3" Distributions = "0.25.3" InvertedIndices = "1" -LossFunctions = "0.10" MLJModelInterface = "1.7" Missings = "0.4, 1" LearnAPI = "0.1" From 4c54571f54b3bb36cbadb1e94853dea475286193 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Thu, 25 May 2023 14:39:18 +1200 Subject: [PATCH 03/13] add 0.22 staging branch to ci --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ac1e885e..31959ec6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -4,7 +4,7 @@ on: branches: - master - dev - - for-a-0-point-21-release + - for-a-0-point-22-release - next-breaking-release push: branches: From b1bfab178ad131e30efb1940f7c3dfb0e82eaa47 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Thu, 25 May 2023 16:17:04 +1200 Subject: [PATCH 04/13] remove LossFunctions import --- src/MLJBase.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/MLJBase.jl b/src/MLJBase.jl index 17d13308..ec5b9eec 100644 --- a/src/MLJBase.jl +++ b/src/MLJBase.jl @@ -79,7 +79,6 @@ using ProgressMeter import .Threads # Operations & extensions -import LossFunctions import StatsBase import StatsBase: fit!, mode, countmap import Missings: levels From da710ad4adff09ea7efb8b28301b30a254c5e128 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Fri, 26 May 2023 11:52:11 +1200 Subject: [PATCH 05/13] cleanup default measures, incl. a bug fix; change regressor default; --- src/MLJBase.jl | 2 +- src/measures.jl | 73 +++++++++++----------------------------------- src/utilities.jl | 62 +++++++++++++++++++++++++++++++++++---- test/measures.jl | 34 +++++++++++---------- test/resampling.jl | 5 ++-- test/utilities.jl | 25 +++++++++++++++- 6 files changed, 119 insertions(+), 82 deletions(-) diff --git a/src/MLJBase.jl b/src/MLJBase.jl index ec5b9eec..db0c7562 100644 --- a/src/MLJBase.jl +++ b/src/MLJBase.jl @@ -48,7 +48,7 @@ end ################### # Hack Block ends # ################### - +import MLJModelInterface: ProbabilisticDetector, DeterministicDetector import MLJModelInterface: fit, update, update_data, transform, inverse_transform, fitted_params, predict, predict_mode, predict_mean, predict_median, predict_joint, diff --git a/src/measures.jl b/src/measures.jl index c0a2ae7b..0af91204 100644 --- a/src/measures.jl +++ b/src/measures.jl @@ -1,58 +1,19 @@ # # DEFAULT MEASURES -default_measure(T, S) = _default_measure(T, nonmissingtype(S)) - -_default_measure(T, S) = nothing - -# Deterministic + Continuous / Count ==> RMS -function _default_measure( - ::Type{<:Deterministic}, - ::Type{<:Union{AbstractVector{<:Continuous}, AbstractVector{<:Count}}}, -) - return rms -end - -# Deterministic + Finite ==> Misclassification rate -function _default_measure( - ::Type{<:Deterministic}, - ::Type{<:AbstractVector{<:Finite}}, -) - return misclassification_rate -end - -# Probabilistic + Finite / Count ==> log loss -function _default_measure( - ::Type{<:Probabilistic}, - ::Type{<:Union{AbstractVector{<:Finite},AbstractVector{<:Count}}}, -) - return log_loss -end - -# Probabilistic + Continuous ==> Log loss -function _default_measure( - ::Type{<:Probabilistic}, - ::Type{<:AbstractVector{<:Continuous}}, -) - return log_loss -end - -function _default_measure( - ::Type{<:MMI.ProbabilisticDetector}, - ::Type{<:AbstractVector{<:OrderedFactor{2}}}, -) - return area_under_curve -end - -function _default_measure( - ::Type{<:MMI.DeterministicDetector}, - ::Type{<:AbstractVector{<:OrderedFactor{2}}}, -) - return balanced_accuracy -end - -# Fallbacks -default_measure(M::Type{<:Supervised}) = default_measure(M, target_scitype(M)) -default_measure(::M) where M <: Supervised = default_measure(M) - -default_measure(M::Type{<:Annotator}) = _default_measure(M, target_scitype(M)) -default_measure(::M) where M <: Annotator = default_measure(M) +""" + default_measure(model) + +Return a measure that should work with `model`, or return `nothing` if none can be +reliably inferred. + +""" +default_measure(m) = nothing +default_measure(m::Union{Supervised,Annotator}) = + default_measure(m, nonmissingtype(guess_model_target_observation_scitype(m))) +default_measure(m, S) = nothing +default_measure(::Deterministic, ::Type{<:Union{Continuous,Count}}) = l2 +default_measure(::Deterministic, ::Type{<:Finite}) = misclassification_rate +default_measure(::Probabilistic, ::Type{<:Union{Finite,Count}}) = log_loss +default_measure(::Probabilistic, ::Type{<:Continuous}) = log_loss +default_measure(::ProbabilisticDetector, ::Type{<:OrderedFactor{2}}) = area_under_curve +default_measure(::DeterministicDetector, ::Type{<:OrderedFactor{2}}) = balanced_accuracy diff --git a/src/utilities.jl b/src/utilities.jl index e9775e93..7288c30e 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -470,9 +470,46 @@ end generate_name!(model, existing_names; kwargs...) = generate_name!(typeof(model), existing_names; kwargs...) -# This is a bit of hack in the case of tables, it is based on the first row. If some some -# rows have `missing`s, they may not be accounted for in the type. If the first row has -# `missing`s the "regular" type will not be accounted for. So use + +# # OBSERVATION VS CONTAINER HACKINGS TOOLS + +# The following tools are used to bridge the gap between old paradigm of prescribing +# the scitype of containers of observations, and the LearnAPI.jl paradigm of prescribing +# only the scitype of the observations themeselves. This is needed because measures are +# now taken from StatisticalMeasures.jl which follows the LearnAPI.jl paradigm, but model +# `target_scitype` refers to containers. + +""" + observation(S) + +*Private method.* + +Tries to infer the per-observation scitype from the scitype of `S`, when `S` is known to +be the scitype of some container with multiple observations; here we view the scitype for +one row of a table to be the scitype of the row converted to a vector. Return `Unknown` if +unable to draw reliable inferrence. + + +The observation scitype for a table is here understood as the scitype of a row converted +to a vector. + +""" +observation(::Type) = Unknown +observation(::Type{AbstractVector{S}}) where S = S +observation(::Type{AbstractArray{S,N}}) where {S,N} = AbstractArray{S,N-1} +for T in [:Continuous, :Count, :Finite, :Infinite, :Multiclass, :OrderedFactor] + TM = "Union{Missing,$T}" |> Meta.parse + for S in [T, TM] + quote + observation(::Type{AbstractVector{<:$S}}) = $S + observation(::Type{AbstractArray{<:$S,N}}) where N = AbstractArray{<:$S,N-1} + observation(::Type{Table{<:AbstractVector{<:$S}}}) = AbstractVector{<:$S} + end |> eval + end +end +# note that in Julia `f(::Type{AbstractVector{<:T}}) where T = T` has not a well-formed +# left-hand side + """ guess_observation_scitype(y) @@ -500,12 +537,25 @@ Unknown """ guess_observation_scitype(y) = guess_observation_scitype(y, Val(Tables.istable(y))) guess_observation_scitype(y, ::Any) = Unknown -guess_observation_scitype(y::AbstractArray, ::Val{false}) = _observation(scitype(y)) -_observation(::Type{AbstractVector{S}}) where S = S -_observation(::Type{AbstractArray{S,N}}) where {S,N} = AbstractArray{S,N-1} +guess_observation_scitype(y::AbstractArray, ::Val{false}) = observation(scitype(y)) function guess_observation_scitype(table, ::Val{true}) row = Tables.subset(table, 1, viewhint=false) |> collect E = eltype(row) nonmissingtype(E) == E || return Unknown scitype(row) end + +""" + guess_model_targetobservation_scitype(model) + +*Private method* + +Try to infer a lowest upper bound on the scitype of target observations acceptable to +`model`, by inspecting `target_scitype(model)`. Return `Unknown` if unable to draw reliable +inferrence. + +The observation scitype for a table is here understood as the scitype of a row converted +to a vector. + +""" +guess_model_target_observation_scitype(model) = observation(target_scitype(model)) diff --git a/test/measures.jl b/test/measures.jl index 34502fb1..28a28b5d 100644 --- a/test/measures.jl +++ b/test/measures.jl @@ -1,40 +1,42 @@ mutable struct DRegressor <: Deterministic end MLJBase.target_scitype(::Type{<:DRegressor}) = - AbstractVector{<:Continuous} + AbstractVector{<:Union{Missing,Continuous}} mutable struct D2Regressor <: Deterministic end MLJBase.target_scitype(::Type{<:D2Regressor}) = - AbstractVector{Continuous} + AbstractVector{<:Union{Missing,Continuous}} mutable struct DClassifier <: Deterministic end MLJBase.target_scitype(::Type{<:DClassifier}) = - AbstractVector{<:Finite} + AbstractVector{<:Union{Missing,Finite}} + +mutable struct DClassifierWeird <: Deterministic end +MLJBase.target_scitype(::Type{<:DClassifierWeird}) = + AbstractVector{<:Textual} mutable struct PClassifier <: Probabilistic end MLJBase.target_scitype(::Type{<:PClassifier}) = - AbstractVector{<:Finite} + AbstractVector{<:Union{Missing,Finite}} mutable struct PRegressor <: Probabilistic end MLJBase.target_scitype(::Type{<:PRegressor}) = - AbstractVector{<:Continuous} + AbstractVector{<:Union{Missing,Continuous}} mutable struct PCountRegressor <: Probabilistic end MLJBase.target_scitype(::Type{<:PCountRegressor}) = - AbstractVector{<:Count} + AbstractVector{<:Union{Missing,Count}} + + @testset "default_measure" begin - @test MLJBase.default_measure(DRegressor()) == rms - @test MLJBase.default_measure(D2Regressor()) == rms + @test MLJBase.default_measure(DRegressor()) == l2 + @test MLJBase.default_measure(D2Regressor()) == l2 @test MLJBase.default_measure(DClassifier()) == misclassification_rate @test MLJBase.default_measure(PClassifier()) == log_loss - - @test MLJBase.default_measure(DRegressor) == rms - @test MLJBase.default_measure(D2Regressor) == rms - @test MLJBase.default_measure(DClassifier) == misclassification_rate - @test MLJBase.default_measure(PClassifier) == log_loss - - @test MLJBase.default_measure(PRegressor) == log_loss - @test MLJBase.default_measure(PCountRegressor) == log_loss + @test MLJBase.default_measure(PRegressor()) == log_loss + @test MLJBase.default_measure(PCountRegressor()) == log_loss + @test isnothing(MLJBase.default_measure(DClassifierWeird())) + @test isnothing(MLJBase.default_measure("junk")) end true diff --git a/test/resampling.jl b/test/resampling.jl index ef565f72..4009bdd5 100644 --- a/test/resampling.jl +++ b/test/resampling.jl @@ -341,10 +341,11 @@ end model = Models.DeterministicConstantRegressor() for cache in [true, false] mach = machine(model, X, y, cache=cache) + # to see if a default measure is found: + evaluate!(mach, resampling=holdout, verbosity=verb, + acceleration=accel) result = evaluate!(mach, resampling=holdout, verbosity=verb, measure=[rms, rmslp1], acceleration=accel) - result = evaluate!(mach, resampling=holdout, verbosity=verb, - acceleration=accel) @test result.measurement[1] ≈ 2/3 # test direct evaluation of a model + data: diff --git a/test/utilities.jl b/test/utilities.jl index 4ec731b5..03be2877 100644 --- a/test/utilities.jl +++ b/test/utilities.jl @@ -171,6 +171,22 @@ end "sin, cos, tan, ..." end +@testset "observation" begin + @test MLJBase.observation(AbstractVector{Count}) == + Count + @test MLJBase.observation(AbstractVector{<:Count}) == + Count + @test MLJBase.observation(AbstractVector{<:Union{Missing,Count}}) == + Union{Missing,Count} + @test MLJBase.observation(AbstractMatrix{<:Count}) == + AbstractVector{<:Count} + @test MLJBase.observation(AbstractMatrix{Union{Missing,Count}}) == + AbstractVector{Union{Missing,Count}} + @test MLJBase.observation(AbstractMatrix{<:Union{Missing,Count}}) == + AbstractVector{<:Union{Missing,Count}} + @test MLJBase.observation(Table(Count)) == AbstractVector{<:Count} +end + @testset "guess_observation_scitype" begin @test MLJBase.guess_observation_scitype([missing, 1, 2, 3]) == Union{Missing, Count} @@ -181,7 +197,14 @@ end @test MLJBase.guess_observation_scitype((x=[missing, 1, 2], y=[1, 2, 3])) == Unknown @test MLJBase.guess_observation_scitype(5) == Unknown -end +end + +mutable struct DRegressor2 <: Deterministic end +MLJBase.target_scitype(::Type{<:DRegressor2}) = + AbstractVector{<:Continuous} + +@test MLJBase.guess_model_target_observation_scitype(DRegressor2()) == Continuous + end # module true From 2692ca9b3751f8ed6491ce1ef56977037d45c70b Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Fri, 26 May 2023 12:18:12 +1200 Subject: [PATCH 06/13] rename two files + whitespace changes + docstring addition --- src/MLJBase.jl | 2 +- src/{measures.jl => default_measures.jl} | 3 +++ test/{measures.jl => default_measures.jl} | 0 test/runtests.jl | 4 ++-- 4 files changed, 6 insertions(+), 3 deletions(-) rename src/{measures.jl => default_measures.jl} (90%) rename test/{measures.jl => default_measures.jl} (100%) diff --git a/src/MLJBase.jl b/src/MLJBase.jl index db0c7562..eabb000d 100644 --- a/src/MLJBase.jl +++ b/src/MLJBase.jl @@ -177,7 +177,7 @@ include("data/data.jl") include("data/datasets.jl") include("data/datasets_synthetic.jl") -include("measures.jl") +include("default_measures.jl.jl") include("composition/models/stacking.jl") diff --git a/src/measures.jl b/src/default_measures.jl similarity index 90% rename from src/measures.jl rename to src/default_measures.jl index 0af91204..0dc05ad0 100644 --- a/src/measures.jl +++ b/src/default_measures.jl @@ -6,6 +6,9 @@ Return a measure that should work with `model`, or return `nothing` if none can be reliably inferred. +For Julia 1.9 and higher, `nothing` is returned, unless StatisticalMeasures.jl is +loaded. + """ default_measure(m) = nothing default_measure(m::Union{Supervised,Annotator}) = diff --git a/test/measures.jl b/test/default_measures.jl similarity index 100% rename from test/measures.jl rename to test/default_measures.jl diff --git a/test/runtests.jl b/test/runtests.jl index 9db2dbbd..a9c1a49b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -25,8 +25,8 @@ end @test include("interface/data_utils.jl") end -@conditional_testset "measures" begin - @test include("measures.jl") +@conditional_testset "default_measures.jl" begin + @test include("default_measures.jl.jl") end @conditional_testset "resampling" begin From 1dde0d939d728ca120edb1598e710171d7e022d1 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Fri, 26 May 2023 12:19:32 +1200 Subject: [PATCH 07/13] oop forgotten commit --- src/MLJBase.jl | 2 +- src/resampling.jl | 73 +++++++++++++++++++++-------------------------- test/runtests.jl | 4 +-- 3 files changed, 35 insertions(+), 44 deletions(-) diff --git a/src/MLJBase.jl b/src/MLJBase.jl index eabb000d..66472830 100644 --- a/src/MLJBase.jl +++ b/src/MLJBase.jl @@ -177,7 +177,7 @@ include("data/data.jl") include("data/datasets.jl") include("data/datasets_synthetic.jl") -include("default_measures.jl.jl") +include("default_measures.jl") include("composition/models/stacking.jl") diff --git a/src/resampling.jl b/src/resampling.jl index d0591325..3f8ff4eb 100644 --- a/src/resampling.jl +++ b/src/resampling.jl @@ -841,39 +841,33 @@ _process_accel_settings(accel) = throw(ArgumentError("unsupported" * verbosity=1, check_measure=true) -Estimate the performance of a machine `mach` wrapping a supervised -model in data, using the specified `resampling` strategy (defaulting -to 6-fold cross-validation) and `measure`, which can be a single -measure or vector. +Estimate the performance of a machine `mach` wrapping a supervised model in data, using +the specified `resampling` strategy (defaulting to 6-fold cross-validation) and `measure`, +which can be a single measure or vector. -Do `subtypes(MLJ.ResamplingStrategy)` to obtain a list of available -resampling strategies. If `resampling` is not an object of type -`MLJ.ResamplingStrategy`, then a vector of tuples (of the form -`(train_rows, test_rows)` is expected. For example, setting +Do `subtypes(MLJ.ResamplingStrategy)` to obtain a list of available resampling +strategies. If `resampling` is not an object of type `MLJ.ResamplingStrategy`, then a +vector of tuples (of the form `(train_rows, test_rows)` is expected. For example, setting resampling = [((1:100), (101:200)), ((101:200), (1:100))] gives two-fold cross-validation using the first 200 rows of data. -The type of operation (`predict`, `predict_mode`, etc) to be -associated with `measure` is automatically inferred from measure -traits where possible. For example, `predict_mode` will be used for a -`Multiclass` target, if `model` is probabilistic but `measure` is -deterministic. The operations applied can be inspected from the -`operation` field of the object returned. Alternatively, operations -can be explicitly specified using `operation=...`. If `measure` is a -vector, then `operation` must be a single operation, which will be -associated with all measures, or a vector of the same length as -`measure`. - -The resampling strategy is applied repeatedly (Monte Carlo resampling) -if `repeats > 1`. For example, if `repeats = 10`, then `resampling = -CV(nfolds=5, shuffle=true)`, generates a total of 50 `(train, test)` -pairs for evaluation and subsequent aggregation. - -If `resampling isa MLJ.ResamplingStrategy` then one may optionally -restrict the data used in evaluation by specifying `rows`. +The type of operation (`predict`, `predict_mode`, etc) to be associated with `measure` is +automatically inferred from measure traits where possible. For example, `predict_mode` +will be used for a `Multiclass` target, if `model` is probabilistic but `measure` is +deterministic. The operations applied can be inspected from the `operation` field of the +object returned. Alternatively, operations can be explicitly specified using +`operation=...`. If `measure` is a vector, then `operation` must be a single operation, +which will be associated with all measures, or a vector of the same length as `measure`. + +The resampling strategy is applied repeatedly (Monte Carlo resampling) if `repeats > +1`. For example, if `repeats = 10`, then `resampling = CV(nfolds=5, shuffle=true)`, +generates a total of 50 `(train, test)` pairs for evaluation and subsequent aggregation. + +If `resampling isa MLJ.ResamplingStrategy` then one may optionally restrict the data used +in evaluation by specifying `rows`. An optional `weights` vector may be passed for measures that support sample weights (`StatisticalMeasuresBase.supports_weights(measure) == true`), which is ignored by those @@ -881,25 +875,22 @@ that don't. These weights are not to be confused with any weights `w` bound to ` in `mach = machine(model, X, y, w)`). To pass these to the performance evaluation measures you must explictly specify `weights=w` in the `evaluate!` call. -Additionally, optional `class_weights` dictionary may be passed -for measures that support class weights -(`MLJ.supports_class_weights(measure) == true`), which is -ignored by those that don't. These weights are not to be confused with -any weights `class_w` bound to `mach` (as in `mach = machine(model, X, -y, class_w)`). To pass these to the performance evaluation measures you -must explictly specify `class_weights=w` in the `evaluate!` call. +Additionally, optional `class_weights` dictionary may be passed for measures that support +class weights (`MLJ.supports_class_weights(measure) == true`), which is ignored by those +that don't. These weights are not to be confused with any weights `class_w` bound to +`mach` (as in `mach = machine(model, X, y, class_w)`). To pass these to the performance +evaluation measures you must explictly specify `class_weights=w` in the `evaluate!` call. User-defined measures are supported; see the manual for details. -If no measure is specified, then `default_measure(mach.model)` is -used, unless this default is `nothing` and an error is thrown. +If no measure is specified, then `default_measure(mach.model)` is used, unless this +default is `nothing` and an error is thrown. -The `acceleration` keyword argument is used to specify the compute resource (a -subtype of `ComputationalResources.AbstractResource`) that will be used to -accelerate/parallelize the resampling operation. +The `acceleration` keyword argument is used to specify the compute resource (a subtype of +`ComputationalResources.AbstractResource`) that will be used to accelerate/parallelize the +resampling operation. -Although `evaluate!` is mutating, `mach.model` and `mach.args` are -untouched. +Although `evaluate!` is mutating, `mach.model` and `mach.args` are untouched. ### Summary of key-word arguments @@ -930,7 +921,7 @@ untouched. `CPUThreads` (multi-threaded computation) and `CPUProcesses` (multi-process computation); default is `default_resource()`. -- `force` - default is `false`; set to `true` for force cold-restart +- `force` - default is `false`; set to `true` to force cold-restart of each training event - `verbosity` level, an integer defaulting to 1. diff --git a/test/runtests.jl b/test/runtests.jl index a9c1a49b..f6076565 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -25,8 +25,8 @@ end @test include("interface/data_utils.jl") end -@conditional_testset "default_measures.jl" begin - @test include("default_measures.jl.jl") +@conditional_testset "default_measures" begin + @test include("default_measures.jl") end @conditional_testset "resampling" begin From a3fca5272402b4769b2dca21fa75040a71a4ae10 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Fri, 26 May 2023 13:19:09 +1200 Subject: [PATCH 08/13] add per_observation flag to evaluate/evaluate! for performance boost --- src/resampling.jl | 100 +++++++++++++++++++++++++++++++-------------- test/resampling.jl | 36 ++++++++++++++++ 2 files changed, 105 insertions(+), 31 deletions(-) diff --git a/src/resampling.jl b/src/resampling.jl index 3f8ff4eb..7ef69f0e 100644 --- a/src/resampling.jl +++ b/src/resampling.jl @@ -623,7 +623,7 @@ function _check_measure(measure, operation, model, y) # get type supported by measure: T_measure = StatisticalMeasuresBase.observation_scitype(measure) - + T == Unknown && (return true) T_measure == Union{} && (return true) isnothing(StatisticalMeasuresBase.kind_of_proxy(measure)) && (return true) @@ -838,8 +838,9 @@ _process_accel_settings(accel) = throw(ArgumentError("unsupported" * repeats=1, acceleration=default_resource(), force=false, - verbosity=1, - check_measure=true) + check_measure=true, + per_observation=true, + verbosity=1) Estimate the performance of a machine `mach` wrapping a supervised model in data, using the specified `resampling` strategy (defaulting to 6-fold cross-validation) and `measure`, @@ -928,6 +929,10 @@ Although `evaluate!` is mutating, `mach.model` and `mach.args` are untouched. - `check_measure` - default is `true` +- `per_observation=true`: whether to calculate estimates for individual observations; if + `false` the `per_observation` field of the returned object is populated with + `missing`s. Setting to `false` may reduce compute time and allocations. + ### Return value @@ -948,7 +953,8 @@ function evaluate!(mach::Machine{<:Measurable}; repeats=1, force=false, check_measure=true, - verbosity=1) + per_observation=true, + verbosity=1,) # this method just checks validity of options, preprocess the # weights, measures, operations, and dispatches a @@ -1000,7 +1006,7 @@ function evaluate!(mach::Machine{<:Measurable}; _acceleration= _process_accel_settings(acceleration) evaluate!(mach, resampling, weights, class_weights, rows, verbosity, - repeats, _measures, _operations, _acceleration, force) + repeats, _measures, _operations, _acceleration, force, per_observation) end @@ -1165,7 +1171,7 @@ _view(weights, rows) = view(weights, rows) # Evaluation when `resampling` is a TrainTestPairs (CORE EVALUATOR): function evaluate!(mach::Machine, resampling, weights, class_weights, rows, verbosity, repeats, - measures, operations, acceleration, force) + measures, operations, acceleration, force, per_observation_flag) # Note: `rows` and `repeats` are ignored here @@ -1202,15 +1208,25 @@ function evaluate!(mach::Machine, resampling, weights, Dict(op=>op(mach, rows=test) for op in unique(operations)) ytest = selectrows(y, test) - - measurements = map(measures, operations) do m, op - StatisticalMeasuresBase.measurements( - m, - yhat_given_operation[op], - ytest, - _view(weights, test), - class_weights, - ) + if per_observation_flag + measurements = map(measures, operations) do m, op + StatisticalMeasuresBase.measurements( + m, + yhat_given_operation[op], + ytest, + _view(weights, test), + class_weights, + ) + end + else + measurements = map(measures, operations) do m, op + m( + yhat_given_operation[op], + ytest, + _view(weights, test), + class_weights, + ) + end end fp = fitted_params(mach) @@ -1243,23 +1259,39 @@ function evaluate!(mach::Machine, resampling, weights, measurements_flat = vcat(measurements_vector_of_vectors...) - # in the following rows=folds, columns=measures; each element of the matrix is a - # vector of meausurements, one per observation, over a fold for a particular metric. + # In the `measurements_matrix` below, rows=folds, columns=measures; each element of + # the matrix is: + # + # - a vector of meausurements, one per observation within a fold, if + # - `per_observation_flag = true`; or + # + # - a single measurment for the whole fold, if `per_observation_flag = false`. + # measurements_matrix = permutedims( reshape(collect(measurements_flat), (nmeasures, nfolds)) ) # measurements for each observation: - per_observation = map(1:nmeasures) do k - measurements_matrix[:,k] + per_observation = if per_observation_flag + map(1:nmeasures) do k + measurements_matrix[:,k] + end + else + fill(missing, nmeasures) end # measurements for each fold: - per_fold = map(1:nmeasures) do k - m = measures[k] - mode = StatisticalMeasuresBase.external_aggregation_mode(m) - map(per_observation[k]) do v - StatisticalMeasuresBase.aggregate(v; mode) + per_fold = if per_observation_flag + map(1:nmeasures) do k + m = measures[k] + mode = StatisticalMeasuresBase.external_aggregation_mode(m) + map(per_observation[k]) do v + StatisticalMeasuresBase.aggregate(v; mode) + end + end + else + map(1:nmeasures) do k + measurements_matrix[:,k] end end @@ -1329,7 +1361,8 @@ end operation=predict, repeats = 1, acceleration=default_resource(), - check_measure=true + check_measure=true, + per_observation=true, ) Resampling model wrapper, used internally by the `fit` method of @@ -1375,6 +1408,7 @@ mutable struct Resampler{S} <: Model check_measure::Bool repeats::Int cache::Bool + per_observation::Bool end # Some traits are markded as `missing` because we cannot determine @@ -1403,8 +1437,8 @@ function MLJModelInterface.clean!(resampler::Resampler) return warning end -function Resampler(; - model=nothing, +function Resampler( + ;model=nothing, resampling=CV(), measure=nothing, weights=nothing, @@ -1413,7 +1447,8 @@ function Resampler(; acceleration=default_resource(), check_measure=true, repeats=1, - cache=true + cache=true, + per_observation=true, ) resampler = Resampler( model, @@ -1425,7 +1460,8 @@ function Resampler(; acceleration, check_measure, repeats, - cache + cache, + per_observation, ) message = MLJModelInterface.clean!(resampler) isempty(message) || @warn message @@ -1470,7 +1506,8 @@ function MLJModelInterface.fit(resampler::Resampler, verbosity::Int, args...) _measures, _operations, _acceleration, - false + false, + resampler.per_observation, ) fitresult = (machine = mach, evaluation = e) @@ -1533,7 +1570,8 @@ function MLJModelInterface.update( measures, operations, acceleration, - false + false, + resampler.per_observation ) report = (evaluation = e, ) fitresult = (machine=mach2, evaluation=e) diff --git a/test/resampling.jl b/test/resampling.jl index 4009bdd5..18fdead3 100644 --- a/test/resampling.jl +++ b/test/resampling.jl @@ -304,6 +304,42 @@ end end end +@testset "folds specified - per_observation=false" begin + accel = CPU1() + cache = true + x1 = ones(10) + x2 = ones(10) + X = (x1=x1, x2=x2) + y = [1.0, 1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0] + + resampling = [(3:10, 1:2), + ([1, 2, 5, 6, 7, 8, 9, 10], 3:4), + ([1, 2, 3, 4, 7, 8, 9, 10], 5:6), + ([1, 2, 3, 4, 5, 6, 9, 10], 7:8), + (1:8, 9:10)] + + model = DeterministicConstantRegressor() + mach = machine(model, X, y, cache=cache) + + result = evaluate!(mach, resampling=resampling, verbosity=verb, + measure=[user_rms, mae, rmslp1], acceleration=accel, + per_observation=false) + + v = [1/2, 3/4, 1/2, 3/4, 1/2] + + @test result.per_fold[1] ≈ v + @test result.per_fold[2] ≈ v + @test result.per_fold[3][1] ≈ abs(log(2) - log(2.5)) + @test result.per_observation isa Vector{Missing} + @test result.measurement[1] ≈ mean(v) + @test result.measurement[2] ≈ mean(v) + + # fitted_params and report per fold: + @test map(fp->fp.fitresult, result.fitted_params_per_fold) ≈ + [1.5, 1.25, 1.5, 1.25, 1.5] + @test all(isnothing, result.report_per_fold) +end + @testset "repeated resampling" begin x1 = ones(20) x2 = ones(20) From 91a6f54c3f5b1325dd454675b24d83fd664dbc8d Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Fri, 26 May 2023 13:46:37 +1200 Subject: [PATCH 09/13] simplify evaluate!/evaluate docstrings update `evaluate` docstring --- src/resampling.jl | 125 ++++++++++++++++------------------------------ 1 file changed, 43 insertions(+), 82 deletions(-) diff --git a/src/resampling.jl b/src/resampling.jl index 7ef69f0e..3cd4c582 100644 --- a/src/resampling.jl +++ b/src/resampling.jl @@ -507,7 +507,9 @@ These fields are part of the public API of the `PerformanceEvaluation` struct. the `i`th observation in the `f`th test fold, evaluated using the `m`th measure. Useful for some forms of hyper-parameter optimization. Note that an aggregregated measurement for some measure `measure` is repeated across all observations in a fold if - `StatisticalMeasures.can_report_unaggregated(measure) == true`. + `StatisticalMeasures.can_report_unaggregated(measure) == true`. If `e` has been computed + with the `per_observation=false` option, then `e_per_observation` is a vector of + `missings`. - `fitted_params_per_fold`: a vector containing `fitted params(mach)` for each machine `mach` trained during resampling - one machine per train/test pair. Use this to extract @@ -828,23 +830,12 @@ _process_accel_settings(accel) = throw(ArgumentError("unsupported" * # User interface points: `evaluate!` and `evaluate` """ - evaluate!(mach, - resampling=CV(), - measure=nothing, - rows=nothing, - weights=nothing, - class_weights=nothing, - operation=nothing, - repeats=1, - acceleration=default_resource(), - force=false, - check_measure=true, - per_observation=true, - verbosity=1) + evaluate!(mach; resampling=CV(), measure=nothing, options...) Estimate the performance of a machine `mach` wrapping a supervised model in data, using the specified `resampling` strategy (defaulting to 6-fold cross-validation) and `measure`, -which can be a single measure or vector. +which can be a single measure or vector. Returns a [`PerformanceEvaluation`](@ref) +object. Do `subtypes(MLJ.ResamplingStrategy)` to obtain a list of available resampling strategies. If `resampling` is not an object of type `MLJ.ResamplingStrategy`, then a @@ -855,89 +846,55 @@ vector of tuples (of the form `(train_rows, test_rows)` is expected. For example gives two-fold cross-validation using the first 200 rows of data. -The type of operation (`predict`, `predict_mode`, etc) to be associated with `measure` is -automatically inferred from measure traits where possible. For example, `predict_mode` -will be used for a `Multiclass` target, if `model` is probabilistic but `measure` is -deterministic. The operations applied can be inspected from the `operation` field of the -object returned. Alternatively, operations can be explicitly specified using -`operation=...`. If `measure` is a vector, then `operation` must be a single operation, -which will be associated with all measures, or a vector of the same length as `measure`. +Any measure conforming to the +[StatisticalMeasuresBase.jl](https://juliaai.github.io/StatisticalMeasuresBase.jl/dev/) +API can be provided, assuming it can consume multiple observations. -The resampling strategy is applied repeatedly (Monte Carlo resampling) if `repeats > -1`. For example, if `repeats = 10`, then `resampling = CV(nfolds=5, shuffle=true)`, -generates a total of 50 `(train, test)` pairs for evaluation and subsequent aggregation. +Although `evaluate!` is mutating, `mach.model` and `mach.args` are not mutated. -If `resampling isa MLJ.ResamplingStrategy` then one may optionally restrict the data used -in evaluation by specifying `rows`. +# Additional keyword options -An optional `weights` vector may be passed for measures that support sample weights -(`StatisticalMeasuresBase.supports_weights(measure) == true`), which is ignored by those -that don't. These weights are not to be confused with any weights `w` bound to `mach` (as -in `mach = machine(model, X, y, w)`). To pass these to the performance evaluation measures -you must explictly specify `weights=w` in the `evaluate!` call. +- `rows` - vector of observation indices from which both train and test folds are + constructed (default is all observations) -Additionally, optional `class_weights` dictionary may be passed for measures that support -class weights (`MLJ.supports_class_weights(measure) == true`), which is ignored by those -that don't. These weights are not to be confused with any weights `class_w` bound to -`mach` (as in `mach = machine(model, X, y, class_w)`). To pass these to the performance -evaluation measures you must explictly specify `class_weights=w` in the `evaluate!` call. +- `operation`/`operations=nothing` - One of $PREDICT_OPERATIONS_STRING, or a vector of + these of the same length as `measure`/`measures`. Automatically inferred if left + unspecified. For example, `predict_mode` will be used for a `Multiclass` target, if + `model` is a probabilistic predictor, but `measure` is expects literal (point) target + predictions. Operations actually applied can be inspected from the `operation` field of + the object returned. -User-defined measures are supported; see the manual for details. +- `weights` - per-sample `Real` weights for measures that support them (not to be confused + with weights used in training, such as the `w` in `mach = machine(model, X, y, w)`). -If no measure is specified, then `default_measure(mach.model)` is used, unless this -default is `nothing` and an error is thrown. +- `class_weights` - dictionary of `Real` per-class weights for use with measures that + support these, in classification problems (not to be confused + with weights used in training, such as the `w` in `mach = machine(model, X, y, w)`). -The `acceleration` keyword argument is used to specify the compute resource (a subtype of -`ComputationalResources.AbstractResource`) that will be used to accelerate/parallelize the -resampling operation. +- `repeats::Int=1`: set to a higher value for repeated (Monte Carlo) + resampling. For example, if `repeats = 10`, then `resampling = CV(nfolds=5, + shuffle=true)`, generates a total of 50 `(train, test)` pairs for evaluation and + subsequent aggregation. -Although `evaluate!` is mutating, `mach.model` and `mach.args` are untouched. +- `acceleration=CPU1()`: acceleration/parallelization option; can be any instance of + `CPU1`, (single-threaded computation), `CPUThreads` (multi-threaded computation) or + `CPUProcesses` (multi-process computation); default is `default_resource()`. These types + are owned by ComputationalResources.jl. -### Summary of key-word arguments - -- `resampling` - resampling strategy (default is `CV(nfolds=6)`) - -- `measure`/`measures` - measure or vector of measures (losses, scores, etc) - -- `rows` - vector of observation indices from which both train and - test folds are constructed (default is all observations) - -- `weights` - per-sample weights for measures that support them (not - to be confused with weights used in training) - -- `class_weights` - dictionary of per-class weights for use with - measures that support these, in classification problems (not to be - confused with per-sample `weights` or with class weights used in - training) - -- `operation`/`operations` - One of $PREDICT_OPERATIONS_STRING, or a - vector of these of the same length as - `measure`/`measures`. Automatically inferred if left unspecified. - -- `repeats` - default is 1; set to a higher value for repeated - (Monte Carlo) resampling - -- `acceleration` - parallelization option; currently supported - options are instances of `CPU1` (single-threaded computation) - `CPUThreads` (multi-threaded computation) and `CPUProcesses` - (multi-process computation); default is `default_resource()`. - -- `force` - default is `false`; set to `true` to force cold-restart +- `force=false`: set to `true` to force cold-restart of each training event -- `verbosity` level, an integer defaulting to 1. +- `verbosity::Int=1` logging level; can be negative -- `check_measure` - default is `true` +- `check_measure=true`: whether to screen measures for possible incompatibility with the + model. Will not catch all incompatibilities. - `per_observation=true`: whether to calculate estimates for individual observations; if `false` the `per_observation` field of the returned object is populated with `missing`s. Setting to `false` may reduce compute time and allocations. -### Return value - -A [`PerformanceEvaluation`](@ref) object. See -[`PerformanceEvaluation`](@ref) for details. +See also [`evaluate`](@ref), [`PerformanceEvaluation`](@ref) """ function evaluate!(mach::Machine{<:Measurable}; @@ -1011,12 +968,16 @@ function evaluate!(mach::Machine{<:Measurable}; end """ - evaluate(model, data...; cache=true, kw_options...) + evaluate(model, data...; cache=true, options...) Equivalent to `evaluate!(machine(model, data..., cache=cache); -wk_options...)`. See the machine version `evaluate!` for the complete +options...)`. See the machine version `evaluate!` for the complete list of options. +Returns a [`PerformanceEvaluation`](@ref) object. + +See also [`evaluate!`](@ref). + """ evaluate(model::Measurable, args...; cache=true, kwargs...) = evaluate!(machine(model, args...; cache=cache); kwargs...) From e2d15ddb5050229b814e85b69368f3d04fb9ab6e Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Fri, 26 May 2023 23:14:18 +1200 Subject: [PATCH 10/13] make StatisticalMeasures a [weakdep]; add DefaultMeasuresExt.jl --- Project.toml | 15 ++++++++++++--- ext/DefaultMeasuresExt.jl | 15 +++++++++++++++ src/MLJBase.jl | 9 +++++++-- src/composition/models/stacking.jl | 2 +- src/default_measures.jl | 13 +++++++------ .../learning_networks/deprecated_machines.jl | 1 + test/composition/learning_networks/nodes.jl | 1 + test/composition/learning_networks/signatures.jl | 1 + test/composition/models/network_composite.jl | 3 ++- test/composition/models/stacking.jl | 2 +- test/composition/models/static_transformers.jl | 1 + test/interface/model_api.jl | 1 + test/machines.jl | 1 + test/resampling.jl | 2 +- 14 files changed, 52 insertions(+), 15 deletions(-) create mode 100644 ext/DefaultMeasuresExt.jl diff --git a/Project.toml b/Project.toml index d0be5d1e..dd090b62 100644 --- a/Project.toml +++ b/Project.toml @@ -26,27 +26,35 @@ Reexport = "189a3867-3050-52da-a836-e630ba90ab69" ScientificTypes = "321657f4-b219-11e9-178b-2701a2544e81" Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" StatisticalMeasures = "a19d573c-0a75-4610-95b3-7071388c7541" +StatisticalMeasuresBase = "c062fc1d-0d66-479b-b6ac-8b44719de4cc" StatisticalTraits = "64bff920-2084-43da-a3e6-9bb72801c0c9" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" +[weakdeps] +StatisticalMeasures = "a19d573c-0a75-4610-95b3-7071388c7541" + +[extensions] +DefaultMeasuresExt = "StatisticalMeasures" + [compat] CategoricalArrays = "0.9, 0.10" CategoricalDistributions = "0.1" ComputationalResources = "0.3" Distributions = "0.25.3" InvertedIndices = "1" +LearnAPI = "0.1" MLJModelInterface = "1.7" Missings = "0.4, 1" -LearnAPI = "0.1" OrderedCollections = "1.1" Parameters = "0.12" PrettyTables = "1, 2" ProgressMeter = "1.7.1" Reexport = "1.2" ScientificTypes = "3" -StatisticalMeasures = "0.1" +StatisticalMeasures = "0.1.1" +StatisticalMeasuresBase = "0.1.1" StatisticalTraits = "3.2" StatsBase = "0.32, 0.33, 0.34" Tables = "0.2, 1.0" @@ -60,8 +68,9 @@ Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" MultivariateStats = "6f286f6a-111f-5878-ab1e-185364afe411" NearestNeighbors = "b8a86587-4115-5ab1-83bc-aa920d37bbce" StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3" +StatisticalMeasures = "a19d573c-0a75-4610-95b3-7071388c7541" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" TypedTables = "9d95f2ec-7b3d-5a63-8d20-e2491e220bb9" [targets] -test = ["DataFrames", "DecisionTree", "Distances", "Logging", "MultivariateStats", "NearestNeighbors", "StableRNGs", "Test", "TypedTables"] +test = ["DataFrames", "DecisionTree", "Distances", "Logging", "MultivariateStats", "NearestNeighbors", "StableRNGs", "StatisticalMeasures", "Test", "TypedTables"] diff --git a/ext/DefaultMeasuresExt.jl b/ext/DefaultMeasuresExt.jl new file mode 100644 index 00000000..a06cd00f --- /dev/null +++ b/ext/DefaultMeasuresExt.jl @@ -0,0 +1,15 @@ +module DefaultMeasuresExt + +using MLJBase +import MLJBase:default_measure, ProbabilisticDetector, DeterministicDetector +using StatisticalMeasures +using StatisticalMeasures.ScientificTypesBase + +default_measure(::Deterministic, ::Type{<:Union{Continuous,Count}}) = l2 +default_measure(::Deterministic, ::Type{<:Finite}) = misclassification_rate +default_measure(::Probabilistic, ::Type{<:Union{Finite,Count}}) = log_loss +default_measure(::Probabilistic, ::Type{<:Continuous}) = log_loss +default_measure(::ProbabilisticDetector, ::Type{<:OrderedFactor{2}}) = area_under_curve +default_measure(::DeterministicDetector, ::Type{<:OrderedFactor{2}}) = balanced_accuracy + +end # module diff --git a/src/MLJBase.jl b/src/MLJBase.jl index 66472830..77d58dbe 100644 --- a/src/MLJBase.jl +++ b/src/MLJBase.jl @@ -89,8 +89,7 @@ import Distributions: pdf, logpdf, sampler const Dist = Distributions # Measures -@reexport using StatisticalMeasures -import StatisticalMeasures.StatisticalMeasuresBase +import StatisticalMeasuresBase # from Standard Library: using Statistics, LinearAlgebra, Random, InteractiveUtils @@ -312,4 +311,10 @@ export default_measure export pdf, sampler, mode, median, mean, shuffle!, categorical, shuffle, levels, levels!, std, Not, support, logpdf, LittleDict +# for julia < 1.9 +if !isdefined(Base, :get_extension) + include(joinpath("..","ext", "DefaultMeasuresExt.jl")) + @reexport using .DefaultMeasuresExt.StatisticalMeasures +end + end # module diff --git a/src/composition/models/stacking.jl b/src/composition/models/stacking.jl index 474ef920..14f055e5 100644 --- a/src/composition/models/stacking.jl +++ b/src/composition/models/stacking.jl @@ -425,7 +425,7 @@ function internal_stack_report( model_results.operation, )) ypred = operation(mach, Xtest) - measurements = StatisticalMeasures.measurements(measure, ypred, ytest) + measurements = StatisticalMeasuresBase.measurements(measure, ypred, ytest) # Update per observation: model_results.per_observation[i][foldid] = measurements diff --git a/src/default_measures.jl b/src/default_measures.jl index 0dc05ad0..2488bbf5 100644 --- a/src/default_measures.jl +++ b/src/default_measures.jl @@ -9,14 +9,15 @@ reliably inferred. For Julia 1.9 and higher, `nothing` is returned, unless StatisticalMeasures.jl is loaded. +# New implementations + +This method dispatches `default_measure(model, observation_scitype)`, which has +`nothing` as the fallback return value. Extend `default_measure` by overloading this +version of the method. See for example the MLJBase.jl package extension, +DefaultMeausuresExt.jl. + """ default_measure(m) = nothing default_measure(m::Union{Supervised,Annotator}) = default_measure(m, nonmissingtype(guess_model_target_observation_scitype(m))) default_measure(m, S) = nothing -default_measure(::Deterministic, ::Type{<:Union{Continuous,Count}}) = l2 -default_measure(::Deterministic, ::Type{<:Finite}) = misclassification_rate -default_measure(::Probabilistic, ::Type{<:Union{Finite,Count}}) = log_loss -default_measure(::Probabilistic, ::Type{<:Continuous}) = log_loss -default_measure(::ProbabilisticDetector, ::Type{<:OrderedFactor{2}}) = area_under_curve -default_measure(::DeterministicDetector, ::Type{<:OrderedFactor{2}}) = balanced_accuracy diff --git a/test/composition/learning_networks/deprecated_machines.jl b/test/composition/learning_networks/deprecated_machines.jl index 19b580d6..bad68bd2 100644 --- a/test/composition/learning_networks/deprecated_machines.jl +++ b/test/composition/learning_networks/deprecated_machines.jl @@ -9,6 +9,7 @@ using MLJBase using Tables using StableRNGs using Serialization +using StatisticalMeasures rng = StableRNG(616161) # A dummy clustering model: diff --git a/test/composition/learning_networks/nodes.jl b/test/composition/learning_networks/nodes.jl index 1f175d45..e79cec9d 100644 --- a/test/composition/learning_networks/nodes.jl +++ b/test/composition/learning_networks/nodes.jl @@ -6,6 +6,7 @@ using MLJBase using ..Models using ..TestUtilities using CategoricalArrays +using StatisticalMeasures import Random.seed! seed!(1234) diff --git a/test/composition/learning_networks/signatures.jl b/test/composition/learning_networks/signatures.jl index 08785b40..019a9cd5 100644 --- a/test/composition/learning_networks/signatures.jl +++ b/test/composition/learning_networks/signatures.jl @@ -7,6 +7,7 @@ using Tables using Test using MLJModelInterface using OrderedCollections +using StatisticalMeasures @testset "signatures - accessor functions" begin a = source(:a) diff --git a/test/composition/models/network_composite.jl b/test/composition/models/network_composite.jl index 87e064df..df00f201 100644 --- a/test/composition/models/network_composite.jl +++ b/test/composition/models/network_composite.jl @@ -1,4 +1,4 @@ -module TestNetowrkComposite +module TestNetoworkComposite using Test using MLJBase @@ -9,6 +9,7 @@ using Tables using MLJModelInterface using CategoricalArrays using OrderedCollections +using StatisticalMeasures using Serialization const MMI = MLJModelInterface diff --git a/test/composition/models/stacking.jl b/test/composition/models/stacking.jl index 8bc62043..ca973775 100644 --- a/test/composition/models/stacking.jl +++ b/test/composition/models/stacking.jl @@ -2,11 +2,11 @@ module TestStacking using Test using MLJBase +using StatisticalMeasures using MLJModelInterface using ..Models using Random using StableRNGs - import Distributions rng = StableRNGs.StableRNG(1234) diff --git a/test/composition/models/static_transformers.jl b/test/composition/models/static_transformers.jl index c0162950..072dcbca 100644 --- a/test/composition/models/static_transformers.jl +++ b/test/composition/models/static_transformers.jl @@ -5,6 +5,7 @@ using Test using MLJBase using ..Models using CategoricalArrays +using StatisticalMeasures import Random.seed! seed!(1234) diff --git a/test/interface/model_api.jl b/test/interface/model_api.jl index 6fd553f8..8966f70f 100644 --- a/test/interface/model_api.jl +++ b/test/interface/model_api.jl @@ -2,6 +2,7 @@ module TestModelAPI using Test using MLJBase +using StatisticalMeasures import MLJModelInterface using ..Models using Distributions diff --git a/test/machines.jl b/test/machines.jl index 16655d26..7d0845c2 100644 --- a/test/machines.jl +++ b/test/machines.jl @@ -7,6 +7,7 @@ using ..Models using StableRNGs using Serialization using ..TestUtilities +using StatisticalMeasures const MLJModelInterface = MLJBase.MLJModelInterface const MMI = MLJModelInterface diff --git a/test/resampling.jl b/test/resampling.jl index 18fdead3..27850375 100644 --- a/test/resampling.jl +++ b/test/resampling.jl @@ -548,7 +548,7 @@ end X, y = make_blobs(rng=rng) cv=CV(nfolds = 2) fold1, fold2 = partition(eachindex(y), 0.5) - m = MLJBase.MulticlassFScore() + m = MulticlassFScore() class_w = Dict(1=>1, 2=>2, 3=>3) model = Models.DeterministicConstantClassifier() From 7ae73a5c0becd68feac20253c83b2bc4793180a6 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Sat, 27 May 2023 09:06:31 +1200 Subject: [PATCH 11/13] doc string tweak further tweak --- src/resampling.jl | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/resampling.jl b/src/resampling.jl index 3cd4c582..b66fcf6a 100644 --- a/src/resampling.jl +++ b/src/resampling.jl @@ -829,6 +829,17 @@ _process_accel_settings(accel) = throw(ArgumentError("unsupported" * # -------------------------------------------------------------- # User interface points: `evaluate!` and `evaluate` +const RESAMPLING_STRATEGIES = subtypes(ResamplingStrategy) +const RESAMPLING_STRATEGIES_LIST = + join( + map(RESAMPLING_STRATEGIES) do s + name = split(string(s), ".") |> last + "`$name`" + end, + ", ", + " and ", + ) + """ evaluate!(mach; resampling=CV(), measure=nothing, options...) @@ -837,9 +848,9 @@ the specified `resampling` strategy (defaulting to 6-fold cross-validation) and which can be a single measure or vector. Returns a [`PerformanceEvaluation`](@ref) object. -Do `subtypes(MLJ.ResamplingStrategy)` to obtain a list of available resampling -strategies. If `resampling` is not an object of type `MLJ.ResamplingStrategy`, then a -vector of tuples (of the form `(train_rows, test_rows)` is expected. For example, setting +Available resampling strategies are $RESAMPLING_STRATEGIES_LIST. If `resampling` is not an +instance of one of these, then a vector of tuples of the form `(train_rows, test_rows)` +is expected. For example, setting resampling = [((1:100), (101:200)), ((101:200), (1:100))] From 588e7779073048bf819ef95a13a7f2cb4432fc3b Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Mon, 29 May 2023 08:44:47 +1200 Subject: [PATCH 12/13] doc string typo --- src/resampling.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/resampling.jl b/src/resampling.jl index b66fcf6a..5cc61190 100644 --- a/src/resampling.jl +++ b/src/resampling.jl @@ -1351,7 +1351,7 @@ On subsequent calls to `fit!(mach)` new train/test pairs of row indices are only regenerated if `resampling`, `repeats` or `cache` fields of `resampler` have changed. The evolution of an RNG field of `resampler` does *not* constitute a change (`==` for `MLJType` objects -is not sensitive to such changes; see [`is_same_except'](@ref)). +is not sensitive to such changes; see [`is_same_except`](@ref)). If there is single train/test pair, then warm-restart behavior of the wrapped model `resampler.model` will extend to warm-restart behaviour From 054915034be22bf5c662f30eaa7f6a1d1f0f3cf1 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Thu, 21 Sep 2023 17:50:52 +1200 Subject: [PATCH 13/13] update readme --- README.md | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 9323a9c2..8e9fc1e5 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,11 @@ repository provides core functionality for MLJ, including: - basic utilities for **manipulating datasets** and for **synthesizing datasets** (src/data) -- a [small interface](https://alan-turing-institute.github.io/MLJ.jl/dev/evaluating_model_performance/#Custom-resampling-strategies-1) for **resampling strategies** and implementations, including `CV()`, `StratifiedCV` and `Holdout` (src/resampling.jl) +- a [small + interface](https://alan-turing-institute.github.io/MLJ.jl/dev/evaluating_model_performance/#Custom-resampling-strategies-1) + for **resampling strategies** and implementations, including `CV()`, `StratifiedCV` and + `Holdout` (src/resampling.jl). Actual performance evaluation measures (aka metrics), which previously + were provided by MLJBase.jl, now live in [StatisticalMeasures.jl](https://juliaai.github.io/StatisticalMeasures.jl/dev/). - methods for **performance evaluation**, based on those resampling strategies (src/resampling.jl) @@ -44,9 +48,4 @@ repository provides core functionality for MLJ, including: associated methods, for use with [MLJTuning](https://github.com/JuliaAI/MLJTuning.jl) (src/hyperparam) -- a [small - interface](https://alan-turing-institute.github.io/MLJ.jl/dev/performance_measures/#Traits-and-custom-measures-1) - for **performance measures** (losses and scores), implementation of about 60 such measures, including integration of the - [LossFunctions.jl](https://github.com/JuliaML/LossFunctions.jl) - library (src/measures). To be migrated into separate package in the near future.