From 14441aa04996b00ab2f54974c28b8b7be89286ff Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Thu, 9 May 2024 09:30:17 +1200 Subject: [PATCH 1/5] update urls broken by MLJ transfer --- README.md | 6 +++--- docs/src/index.md | 2 +- src/composition/learning_networks/nodes.jl | 2 +- src/composition/models/pipelines.jl | 2 +- src/composition/models/stacking.jl | 2 +- src/interface/data_utils.jl | 2 +- test/_models/Constant.jl | 2 +- test/_models/simple_composite_model.jl | 2 +- 8 files changed, 10 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index a506c5bab..025a82f1b 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ ## MLJBase Repository for developers that provides core functionality for the -[MLJ](https://github.com/alan-turing-institute/MLJ.jl) machine +[MLJ](https://github.com/JuliaAI/MLJ.jl) machine learning framework. | Branch | Julia | Build | Coverage | @@ -20,7 +20,7 @@ learning framework. [![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://juliaai.github.io/MLJBase.jl/stable/) -[MLJ](https://github.com/alan-turing-institute/MLJ.jl) is a Julia +[MLJ](https://github.com/JuliaAI/MLJ.jl) is a Julia framework for combining and tuning machine learning models. This repository provides core functionality for MLJ, including: @@ -37,7 +37,7 @@ repository provides core functionality for MLJ, including: - basic utilities for **manipulating datasets** and for **synthesizing datasets** (src/data) - a [small - interface](https://alan-turing-institute.github.io/MLJ.jl/dev/evaluating_model_performance/#Custom-resampling-strategies-1) + interface](https://JuliaAI.github.io/MLJ.jl/dev/evaluating_model_performance/#Custom-resampling-strategies-1) for **resampling strategies** and implementations, including `CV()`, `StratifiedCV` and `Holdout` (src/resampling.jl). Actual performance evaluation measures (aka metrics), which previously were provided by MLJBase.jl, now live in [StatisticalMeasures.jl](https://juliaai.github.io/StatisticalMeasures.jl/dev/). diff --git a/docs/src/index.md b/docs/src/index.md index 0363f55f3..e514b5321 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -2,7 +2,7 @@ These docs are bare-bones and auto-generated. Complete MLJ documentation is -[here](https://alan-turing-institute.github.io/MLJ.jl/dev/). +[here](https://JuliaAI.github.io/MLJ.jl/dev/). For MLJBase-specific developer information, see also the [README.md file](https://github.com/JuliaAI/MLJBase.jl#readme). diff --git a/src/composition/learning_networks/nodes.jl b/src/composition/learning_networks/nodes.jl index 07bd3ae05..c4508448c 100644 --- a/src/composition/learning_networks/nodes.jl +++ b/src/composition/learning_networks/nodes.jl @@ -182,7 +182,7 @@ function ScientificTypes.elscitype( end # TODO after -# https://github.com/alan-turing-institute/ScientificTypesBase.jl/issues/102 : +# https://github.com/JuliaAI/ScientificTypesBase.jl/issues/102 : # Add Probabilistic case to above ScientificTypes.scitype(N::Node) = CallableReturning{elscitype(N)} diff --git a/src/composition/models/pipelines.jl b/src/composition/models/pipelines.jl index 5fdab66e4..41d165cba 100644 --- a/src/composition/models/pipelines.jl +++ b/src/composition/models/pipelines.jl @@ -397,7 +397,7 @@ end # # LEARNING NETWORK INTERFACE -# https://alan-turing-institute.github.io/MLJ.jl/dev/composing_models/#Learning-network-machines +# https://JuliaAI.github.io/MLJ.jl/dev/composing_models/#Learning-network-machines # ## Methods to extend a pipeline learning network diff --git a/src/composition/models/stacking.jl b/src/composition/models/stacking.jl index 9f6b4121f..7de001120 100644 --- a/src/composition/models/stacking.jl +++ b/src/composition/models/stacking.jl @@ -276,7 +276,7 @@ MLJBase.load_path(::Type{<:ProbabilisticStack}) = "MLJBase.ProbabilisticStack" MLJBase.load_path(::Type{<:DeterministicStack}) = "MLJBase.DeterministicStack" MLJBase.package_name(::Type{<:Stack}) = "MLJBase" MLJBase.package_uuid(::Type{<:Stack}) = "a7f614a8-145f-11e9-1d2a-a57a1082229d" -MLJBase.package_url(::Type{<:Stack}) = "https://github.com/alan-turing-institute/MLJBase.jl" +MLJBase.package_url(::Type{<:Stack}) = "https://github.com/JuliaAI/MLJBase.jl" MLJBase.package_license(::Type{<:Stack}) = "MIT" ########################################################### diff --git a/src/interface/data_utils.jl b/src/interface/data_utils.jl index 0362d2505..7b20178c8 100644 --- a/src/interface/data_utils.jl +++ b/src/interface/data_utils.jl @@ -89,7 +89,7 @@ MMI.selectcols(::FI, ::Val{:table}, X, ::Colon) = X function MMI.selectrows(::FI, ::Val{:table}, X, r) r = r isa Integer ? (r:r) : r # next uncommented line is a hack; see - # https://github.com/alan-turing-institute/MLJBase.jl/issues/151 + # https://github.com/JuliaAI/MLJBase.jl/issues/151 isdataframe(X) && return X[r, :] cols = Tables.columntable(X) new_cols = NamedTuple{keys(cols)}(tuple((c[r] for c in values(cols))...)) diff --git a/test/_models/Constant.jl b/test/_models/Constant.jl index 02ec815ed..17b8c4379 100644 --- a/test/_models/Constant.jl +++ b/test/_models/Constant.jl @@ -171,7 +171,7 @@ metadata_pkg.((ConstantRegressor, ConstantClassifier, DeterministicConstantRegressor, DeterministicConstantClassifier), name="MLJModels", uuid="d491faf4-2d78-11e9-2867-c94bc002c0b7", - url="https://github.com/alan-turing-institute/MLJModels.jl", + url="https://github.com/JuliaAI/MLJModels.jl", julia=true, license="MIT", is_wrapper=false) diff --git a/test/_models/simple_composite_model.jl b/test/_models/simple_composite_model.jl index 09951d49f..d16af3081 100644 --- a/test/_models/simple_composite_model.jl +++ b/test/_models/simple_composite_model.jl @@ -54,7 +54,7 @@ for model in COMPOSITE_MODELS MLJBase.metadata_pkg( $(model); - package_url = "https://github.com/alan-turing-institute/MLJBase.jl", + package_url = "https://github.com/JuliaAI/MLJBase.jl", is_pure_julia = true, is_wrapper = true ) From 19fe38b74c51e1d93b4dc1ed006dd3e606498f99 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Fri, 31 May 2024 09:11:34 +1200 Subject: [PATCH 2/5] bump 1.4.0 --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 181d50af4..0c339e9e1 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "MLJBase" uuid = "a7f614a8-145f-11e9-1d2a-a57a1082229d" authors = ["Anthony D. Blaom "] -version = "1.3" +version = "1.4.0" [deps] CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" From 4e9d58105a9bdc8c118cfe6292d4a9e60942a836 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Fri, 31 May 2024 09:13:45 +1200 Subject: [PATCH 3/5] bump [compat] MLJModelInterface = "1.10"; StatisticalTraits = "3.4" --- Project.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index 0c339e9e1..b2efe10a0 100644 --- a/Project.toml +++ b/Project.toml @@ -47,7 +47,7 @@ DelimitedFiles = "1" Distributions = "0.25.3" InvertedIndices = "1" LearnAPI = "0.1" -MLJModelInterface = "1.7" +MLJModelInterface = "1.10" Missings = "0.4, 1" OrderedCollections = "1.1" Parameters = "0.12" @@ -58,7 +58,7 @@ Reexport = "1.2" ScientificTypes = "3" StatisticalMeasures = "0.1.1" StatisticalMeasuresBase = "0.1.1" -StatisticalTraits = "3.2" +StatisticalTraits = "3.4" Statistics = "1" StatsBase = "0.32, 0.33, 0.34" Tables = "0.2, 1.0" From bdb9cfc4159a0534f4da02ef5ecfaa37e50008f9 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Mon, 3 Jun 2024 16:20:22 +1200 Subject: [PATCH 4/5] oops; change [compat] StatisticalTraits = "3.3" --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index b2efe10a0..39ebba981 100644 --- a/Project.toml +++ b/Project.toml @@ -58,7 +58,7 @@ Reexport = "1.2" ScientificTypes = "3" StatisticalMeasures = "0.1.1" StatisticalMeasuresBase = "0.1.1" -StatisticalTraits = "3.4" +StatisticalTraits = "3.3" Statistics = "1" StatsBase = "0.32, 0.33, 0.34" Tables = "0.2, 1.0" From 52d16df186282878be8a0c8d8a4d80da7a93dbac Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Mon, 3 Jun 2024 16:21:02 +1200 Subject: [PATCH 5/5] implement constructor trait for wrappers --- src/composition/models/pipelines.jl | 3 +++ src/composition/models/stacking.jl | 7 ++--- .../models/transformed_target_model.jl | 7 ++++- src/resampling.jl | 27 +++++++++---------- test/composition/models/pipelines.jl | 4 ++- test/composition/models/stacking.jl | 3 +++ .../models/transformed_target_model.jl | 1 + test/resampling.jl | 3 +++ 8 files changed, 35 insertions(+), 20 deletions(-) diff --git a/src/composition/models/pipelines.jl b/src/composition/models/pipelines.jl index 41d165cba..0ea85297b 100644 --- a/src/composition/models/pipelines.jl +++ b/src/composition/models/pipelines.jl @@ -599,6 +599,9 @@ end MMI.target_scitype(p::SupervisedPipeline) = target_scitype(supervised_component(p)) +MMI.package_name(::Type{<:SomePipeline}) = "MLJBase" +MMI.load_path(::Type{<:SomePipeline}) = "MLJBase.Pipeline" +MMI.constructor(::Type{<:SomePipeline}) = Pipeline # ## Training losses diff --git a/src/composition/models/stacking.jl b/src/composition/models/stacking.jl index 7de001120..70be50414 100644 --- a/src/composition/models/stacking.jl +++ b/src/composition/models/stacking.jl @@ -264,6 +264,8 @@ function Base.setproperty!(stack::Stack{modelnames}, _name::Symbol, val) where m end +# # TRAITS + MMI.target_scitype(::Type{<:Stack{modelnames, input_scitype, target_scitype}}) where {modelnames, input_scitype, target_scitype} = target_scitype @@ -271,9 +273,8 @@ MMI.target_scitype(::Type{<:Stack{modelnames, input_scitype, target_scitype}}) w MMI.input_scitype(::Type{<:Stack{modelnames, input_scitype, target_scitype}}) where {modelnames, input_scitype, target_scitype} = input_scitype - -MLJBase.load_path(::Type{<:ProbabilisticStack}) = "MLJBase.ProbabilisticStack" -MLJBase.load_path(::Type{<:DeterministicStack}) = "MLJBase.DeterministicStack" +MMI.constructor(::Type{<:Stack}) = Stack +MLJBase.load_path(::Type{<:Stack}) = "MLJBase.Stack" MLJBase.package_name(::Type{<:Stack}) = "MLJBase" MLJBase.package_uuid(::Type{<:Stack}) = "a7f614a8-145f-11e9-1d2a-a57a1082229d" MLJBase.package_url(::Type{<:Stack}) = "https://github.com/JuliaAI/MLJBase.jl" diff --git a/src/composition/models/transformed_target_model.jl b/src/composition/models/transformed_target_model.jl index 9304b63ca..272b226e2 100644 --- a/src/composition/models/transformed_target_model.jl +++ b/src/composition/models/transformed_target_model.jl @@ -10,7 +10,8 @@ const TT_SUPPORTED_ATOMS = ( :Deterministic, :DeterministicUnsupervisedDetector, :DeterministicSupervisedDetector, - :Interval) + :Interval, +) # Each supported atomic type gets its own wrapper: @@ -265,6 +266,10 @@ MMI.package_uuid(::Type{<:SomeTT}) = "a7f614a8-145f-11e9-1d2a-a57a1082229d" MMI.is_wrapper(::Type{<:SomeTT}) = true MMI.package_url(::Type{<:SomeTT}) = "https://github.com/JuliaAI/MLJBase.jl" +MMI.load_path(::Type{<:SomeTT}) = "MLJBase.TransformedTargetModel" +MMI.constructor(::Type{<:SomeTT}) = TransformedTargetModel + + for New in TT_TYPE_EXS quote MMI.iteration_parameter(::Type{<:$New{M}}) where M = diff --git a/src/resampling.jl b/src/resampling.jl index 250e3ca0c..68ecc0404 100644 --- a/src/resampling.jl +++ b/src/resampling.jl @@ -1548,9 +1548,11 @@ end compact=false, ) +*Private method.* Use at own risk. + Resampling model wrapper, used internally by the `fit` method of `TunedModel` instances -and `IteratedModel` instances. See [`evaluate!](@ref) for options. Not intended for use by -general user, who will ordinarily use [`evaluate!`](@ref) directly. +and `IteratedModel` instances. See [`evaluate!`](@ref) for meaning of the options. Not +intended for use by general user, who will ordinarily use [`evaluate!`](@ref) directly. Given a machine `mach = machine(resampler, args...)` one obtains a performance evaluation of the specified `model`, performed according to the prescribed `resampling` strategy and @@ -1592,16 +1594,6 @@ mutable struct Resampler{S, L} <: Model compact::Bool end -# Some traits are markded as `missing` because we cannot determine -# them from from the type because we have removed `M` (for "model"} as -# a `Resampler` type parameter. See -# https://github.com/JuliaAI/MLJTuning.jl/issues/141#issue-951221466 - -StatisticalTraits.is_wrapper(::Type{<:Resampler}) = true -StatisticalTraits.supports_weights(::Type{<:Resampler}) = missing -StatisticalTraits.supports_class_weights(::Type{<:Resampler}) = missing -StatisticalTraits.is_pure_julia(::Type{<:Resampler}) = true - function MLJModelInterface.clean!(resampler::Resampler) warning = "" if resampler.measure === nothing && resampler.model !== nothing @@ -1787,11 +1779,16 @@ function MLJModelInterface.update( end -# The input and target scitypes cannot be determined from the type -# because we have removed `M` (for "model") as a `Resampler` type -# parameter. See +# Some traits are marked as `missing` because we cannot determine +# them from from the type because we have removed `M` (for "model"} as +# a `Resampler` type parameter. See # https://github.com/JuliaAI/MLJTuning.jl/issues/141#issue-951221466 +StatisticalTraits.is_wrapper(::Type{<:Resampler}) = true +StatisticalTraits.supports_weights(::Type{<:Resampler}) = missing +StatisticalTraits.supports_class_weights(::Type{<:Resampler}) = missing +StatisticalTraits.is_pure_julia(::Type{<:Resampler}) = true +StatisticalTraits.constructor(::Type{<:Resampler}) = Resampler StatisticalTraits.input_scitype(::Type{<:Resampler}) = Unknown StatisticalTraits.target_scitype(::Type{<:Resampler}) = Unknown StatisticalTraits.package_name(::Type{<:Resampler}) = "MLJBase" diff --git a/test/composition/models/pipelines.jl b/test/composition/models/pipelines.jl index d8790f096..e213cdc25 100644 --- a/test/composition/models/pipelines.jl +++ b/test/composition/models/pipelines.jl @@ -95,7 +95,9 @@ end @testset "public constructor" begin # un-named components: - @test Pipeline(m, t, u) isa UnsupervisedPipeline + flute = Pipeline(m, t, u) + @test flute isa UnsupervisedPipeline + @test MLJBase.constructor(flute) == Pipeline @test Pipeline(m, t, u, p) isa ProbabilisticPipeline @test Pipeline(m, t, u, p, operation=predict_mean) isa DeterministicPipeline @test Pipeline(u, p, u, operation=predict_mean) isa DeterministicPipeline diff --git a/test/composition/models/stacking.jl b/test/composition/models/stacking.jl index ca9737758..110ccdef3 100644 --- a/test/composition/models/stacking.jl +++ b/test/composition/models/stacking.jl @@ -202,6 +202,9 @@ end measures=rmse, resampling=CV(;nfolds=3), models...) + + @test MLJBase.constructor(mystack) == Stack + @test mystack.ridge_lambda.lambda == 0.1 @test mystack.metalearner isa FooBarRegressor @test mystack.resampling isa CV diff --git a/test/composition/models/transformed_target_model.jl b/test/composition/models/transformed_target_model.jl index b992503c1..e34355423 100644 --- a/test/composition/models/transformed_target_model.jl +++ b/test/composition/models/transformed_target_model.jl @@ -86,6 +86,7 @@ avg_nonlinear = g(mean(f(y))) # = g(mean(z)) # Test wrapping using f and g: model = TransformedTargetModel(atom, transformer=f, inverse=g) + @test MLJBase.constructor(model) == TransformedTargetModel fr1, _, _ = MMI.fit(model, 0, X, y) @test first(predict(model, fr1, X)) ≈ fill(avg_nonlinear, 5) diff --git a/test/resampling.jl b/test/resampling.jl index 19b351656..33ee41ec0 100644 --- a/test/resampling.jl +++ b/test/resampling.jl @@ -606,6 +606,9 @@ end holdout = Holdout(fraction_train=0.75) resampler = Resampler(resampling=holdout, model=ridge_model, measure=mae, acceleration=accel) + @test constructor(resampler) == Resampler + @test package_name(resampler) == "MLJBase" + @test load_path(resampler) == "MLJBase.Resampler" resampling_machine = machine(resampler, X, y) @test_logs((:info, r"^Training"), fit!(resampling_machine)) e1=evaluate(resampling_machine).measurement[1]