Skip to content

Commit

Permalink
Merge pull request #68 from alan-turing-institute/dev
Browse files Browse the repository at this point in the history
For a 0.4.2 release
  • Loading branch information
ablaom authored Aug 14, 2020
2 parents f9ec25c + e6d2d98 commit 250c055
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 38 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "MLJTuning"
uuid = "03970b2e-30c4-11ea-3135-d1576263f10f"
authors = ["Anthony D. Blaom <[email protected]>"]
version = "0.4.1"
version = "0.4.2"

[deps]
ComputationalResources = "ed09eef8-17a6-5b46-8889-db040fac31e3"
Expand Down
4 changes: 2 additions & 2 deletions src/plotrecipes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@

xguide --> r.parameter_names[1]
yguide --> r.parameter_names[2]
xscale --> (xsc == :linear ? :identity : xsc)
yscale --> (ysc == :linear ? :identity : ysc)
xscale --> (xsc in [:custom, :linear] ? :identity : xsc)
yscale --> (ysc in [:custom, :linear] ? :identity : ysc)

st = get(plotattributes, :seriestype, :scatter)

Expand Down
97 changes: 62 additions & 35 deletions src/tuned_models.jl
Original file line number Diff line number Diff line change
Expand Up @@ -52,49 +52,61 @@ MLJBase.is_wrapper(::Type{<:EitherTunedModel}) = true
acceleration=default_resource(),
acceleration_resampling=CPU1(),
check_measure=true)
Construct a model wrapper for hyperparameter optimization of a
supervised learner.
Calling `fit!(mach)` on a machine `mach=machine(tuned_model, X, y)` or
`mach=machine(tuned_model, X, y, w)` will:
- Instigate a search, over clones of `model`, with the hyperparameter
mutations specified by `range`, for a model optimizing the specified
`measure`, using performance evaluations carried out using the
specified `tuning` strategy and `resampling` strategy.
- Fit an internal machine, based on the optimal model
`fitted_params(mach).best_model`, wrapping the optimal `model`
object in *all* the provided data `X`, `y`(, `w`). Calling
`predict(mach, Xnew)` then returns predictions on `Xnew` of this
internal machine. The final train can be supressed by setting
`train_best=false`.
The `range` objects supported depend on the `tuning` strategy
specified. Query the `strategy` docstring for details. To optimize
over an explicit list `v` of models of the same type, use
`strategy=Explicit()` and specify `model=v[1]` and `range=v`.
The number of models searched is specified by `n`. If unspecified,
then `MLJTuning.default_n(tuning, range)` is used. When `n` is
increased and `fit!(mach)` called again, the old search history is
re-instated and the search continues where it left off.
If `measure` supports weights (`supports_weights(measure) == true`)
then any `weights` specified will be passed to the measure. If more
than one `measure` is specified, then only the first is optimized
(unless `strategy` is multi-objective) but the performance against
every measure specified will be computed and reported in
`report(mach).best_performance` and other relevant attributes of the
generated report.
Specify `repeats > 1` for repeated resampling per model evaluation. See
[`evaluate!`](@ref) options for details.
Specify `repeats > 1` for repeated resampling per model
evaluation. See [`evaluate!`](@ref) options for details.
*Important.* If a custom `measure` is used, and the measure is
a score, rather than a loss, be sure to check that
`MLJ.orientation(measure) == :score` to ensure maximization of the
measure, rather than minimization. Override an incorrect value with
`MLJ.orientation(::typeof(measure)) = :score`.
*Important:* If `weights` are left unspecified, and `measure` supports
sample weights, then any weight vector `w` used in constructing a
corresponding tuning machine, as in `tuning_machine =
machine(tuned_model, X, y, w)` (which is then used in *training* each
model in the search) will also be passed to `measure` for evaluation.
In the case of two-parameter tuning, a Plots.jl plot of performance
estimates is returned by `plot(mach)` or `heatmap(mach)`.
Once a tuning machine `mach` has bee trained as above, then
`fitted_params(mach)` has these keys/values:
key | value
Expand All @@ -109,35 +121,50 @@ key | value
`best_report` | report generated by fitting the optimal model
`history` | tuning strategy-specific history of all evaluations
plus others specific to the `tuning` strategy, such as `history=...`.
### Summary of key-word arguments
- `model`: `Supervised` model prototype that is cloned and mutated to
generate models for evaluation
- `tuning=Grid()`: tuning strategy to be applied (eg, `RandomSearch()`)
- `resampling=Holdout()`: resampling strategy (eg, `Holdout()`, `CV()`),
`StratifiedCV()`) to be applied in performance evaluations
- `measure`: measure or measures to be applied in performance
evaluations; only the first used in optimization (unless the
strategy is multi-objective) but all reported to the history
- `weights`: sample weights to be passed the measure(s) in performance
evaluations, if supported (see important note above for behaviour in
unspecified case)
- `repeats=1`: for generating train/test sets multiple times in
resampling; see [`evaluate!`](@ref) for details
- `operation=predict`: operation to be applied to each fitted model;
usually `predict` but `predict_mean`, `predict_median` or
`predict_mode` can be used for `Probabilistic` models, if
the specified measures are `Deterministic`
- `range`: range object; tuning strategy documentation describes
supported types
- `n`: number of iterations (ie, models to be evaluated); set by
tuning strategy if left unspecified
- `train_best=true`: whether to train the optimal model
- `acceleration=default_resource()`: mode of parallelization for
tuning strategies that support this
- `acceleration_resampling=CPU1()`: mode of parallelization for
resampling
- `check_measure`: whether to check `measure` is compatible with the
specified `model` and `operation`)
"""
function TunedModel(; model=nothing,
tuning=Grid(),
Expand All @@ -159,8 +186,8 @@ function TunedModel(; model=nothing,
model == nothing && error("You need to specify model=... .\n"*
"If `tuning=Explicit()`, any model in the "*
"range will do. ")



if model isa Deterministic
tuned_model = DeterministicTunedModel(model, tuning, resampling,
Expand Down Expand Up @@ -200,33 +227,33 @@ function MLJBase.clean!(tuned_model::EitherTunedModel)
"Setting measure=$(tuned_model.measure). "
end
end
if (tuned_model.acceleration isa CPUProcesses &&

if (tuned_model.acceleration isa CPUProcesses &&
tuned_model.acceleration_resampling isa CPUProcesses)
message *=
message *=
"The combination acceleration=$(tuned_model.acceleration) and"*
" acceleration_resampling=$(tuned_model.acceleration_resampling) is"*
" not generally optimal. You may want to consider setting"*
" `acceleration = CPUProcesses()` and"*
" `acceleration_resampling = CPUThreads()`."
end
if (tuned_model.acceleration isa CPUThreads &&

if (tuned_model.acceleration isa CPUThreads &&
tuned_model.acceleration_resampling isa CPUProcesses)
message *=
message *=
"The combination acceleration=$(tuned_model.acceleration) and"*
" acceleration_resampling=$(tuned_model.acceleration_resampling) isn't"*
" supported. \n Resetting to"*
" `acceleration = CPUProcesses()` and"*
" `acceleration_resampling = CPUThreads()`."

tuned_model.acceleration = CPUProcesses()
tuned_model.acceleration_resampling = CPUThreads()
tuned_model.acceleration_resampling = CPUThreads()
end

tuned_model.acceleration =
_process_accel_settings(tuned_model.acceleration)

return message
end

Expand Down Expand Up @@ -277,22 +304,22 @@ function assemble_events(metamodels,
acceleration::CPU1)

n_metamodels = length(metamodels)

p = Progress(n_metamodels,
dt = 0,
desc = "Evaluating over $(n_metamodels) metamodels: ",
barglyphs = BarGlyphs("[=> ]"),
barlen = 25,
color = :yellow)

verbosity <1 || update!(p,0)

results = map(metamodels) do m
r= event(m, resampling_machine, verbosity, tuning, history, state)
verbosity < 1 || begin
p.counter += 1
ProgressMeter.updateProgress!(p)
end
ProgressMeter.updateProgress!(p)
end
r
end

Expand Down Expand Up @@ -320,13 +347,13 @@ results = @sync begin

# printing the progress bar
verbosity < 1 || begin
update!(p,0)
update!(p,0)
@async while take!(channel)
p.counter +=1
ProgressMeter.updateProgress!(p)
end
end


ret = @distributed vcat for m in metamodels
r = event(m, resampling_machine, verbosity, tuning, history, state)
Expand All @@ -338,7 +365,7 @@ results = @sync begin
verbosity < 1 || put!(channel, false)
ret
end

return results
end

Expand All @@ -351,7 +378,7 @@ function assemble_events(metamodels,
history,
state,
acceleration::CPUThreads)

if Threads.nthreads() == 1
return assemble_events(metamodels,
resampling_machine,
Expand All @@ -374,14 +401,14 @@ function assemble_events(metamodels,
barlen = 25,
color = :yellow)
ch = Channel{Bool}(min(1000, length(partitions)) )


@sync begin
# printing the progress bar
verbosity < 1 || begin
update!(p,0)
@async while take!(ch)
p.counter +=1
p.counter +=1
ProgressMeter.updateProgress!(p)
end
end
Expand All @@ -394,20 +421,20 @@ function assemble_events(metamodels,
check_measure = resampling_machine.model.check_measure,
repeats = resampling_machine.model.repeats,
acceleration = resampling_machine.model.acceleration),
resampling_machine.args...) for _ in 2:length(partitions)]...]
@sync for (i, parts) in enumerate(partitions)
Threads.@spawn begin
resampling_machine.args...) for _ in 2:length(partitions)]...]

@sync for (i, parts) in enumerate(partitions)
Threads.@spawn begin
results[i] = map(metamodels[parts]) do m
r = event(m, machs[i],
r = event(m, machs[i],
verbosity, tuning, history, state)
verbosity < 1 || put!(ch, true)
r
end
end
end
verbosity < 1 || put!(ch, false)

end
verbosity < 1 || put!(ch, false)
end
reduce(vcat, results)
end
Expand Down Expand Up @@ -450,7 +477,7 @@ function build(history,
Δj == 0 && break
shortfall < 0 && (metamodels = metamodels[1:n - j])
j += Δj

Δhistory = assemble_events(metamodels,
resampling_machine,
verbosity,
Expand Down Expand Up @@ -560,7 +587,7 @@ function MLJBase.update(tuned_model::EitherTunedModel, verbosity::Integer,
else

return fit(tuned_model, verbosity, data...)

end

end
Expand Down

0 comments on commit 250c055

Please sign in to comment.