Merge pull request #68 from alan-turing-institute/dev

For a 0.4.2 release
JuliaAI · Aug 14, 2020 · 250c055 · 250c055
2 parents f9ec25c + e6d2d98
commit 250c055
Show file tree

Hide file tree

Showing 3 changed files with 65 additions and 38 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "MLJTuning"
 uuid = "03970b2e-30c4-11ea-3135-d1576263f10f"
 authors = ["Anthony D. Blaom <[email protected]>"]
-version = "0.4.1"
+version = "0.4.2"
 
 [deps]
 ComputationalResources = "ed09eef8-17a6-5b46-8889-db040fac31e3"

diff --git a/src/plotrecipes.jl b/src/plotrecipes.jl
@@ -14,8 +14,8 @@
 
     xguide --> r.parameter_names[1]
     yguide --> r.parameter_names[2]
-    xscale --> (xsc == :linear ? :identity : xsc)
-    yscale --> (ysc == :linear ? :identity : ysc)
+    xscale --> (xsc in [:custom, :linear] ? :identity : xsc)
+    yscale --> (ysc in [:custom, :linear] ? :identity : ysc)
 
     st = get(plotattributes, :seriestype, :scatter)
 

diff --git a/src/tuned_models.jl b/src/tuned_models.jl
@@ -52,49 +52,61 @@ MLJBase.is_wrapper(::Type{<:EitherTunedModel}) = true
                              acceleration=default_resource(),
                              acceleration_resampling=CPU1(),
                              check_measure=true)
+
 Construct a model wrapper for hyperparameter optimization of a
 supervised learner.
+
 Calling `fit!(mach)` on a machine `mach=machine(tuned_model, X, y)` or
 `mach=machine(tuned_model, X, y, w)` will:
+
 - Instigate a search, over clones of `model`, with the hyperparameter
   mutations specified by `range`, for a model optimizing the specified
   `measure`, using performance evaluations carried out using the
   specified `tuning` strategy and `resampling` strategy.
+
 - Fit an internal machine, based on the optimal model
   `fitted_params(mach).best_model`, wrapping the optimal `model`
   object in *all* the provided data `X`, `y`(, `w`). Calling
   `predict(mach, Xnew)` then returns predictions on `Xnew` of this
   internal machine. The final train can be supressed by setting
   `train_best=false`.
+
 The `range` objects supported depend on the `tuning` strategy
 specified. Query the `strategy` docstring for details. To optimize
 over an explicit list `v` of models of the same type, use
 `strategy=Explicit()` and specify `model=v[1]` and `range=v`.
+
 The number of models searched is specified by `n`. If unspecified,
 then `MLJTuning.default_n(tuning, range)` is used. When `n` is
 increased and `fit!(mach)` called again, the old search history is
 re-instated and the search continues where it left off.
+
 If `measure` supports weights (`supports_weights(measure) == true`)
 then any `weights` specified will be passed to the measure. If more
 than one `measure` is specified, then only the first is optimized
 (unless `strategy` is multi-objective) but the performance against
 every measure specified will be computed and reported in
 `report(mach).best_performance` and other relevant attributes of the
 generated report.
-Specify `repeats > 1` for repeated resampling per model evaluation. See
-[`evaluate!`](@ref) options for details.
+
+Specify `repeats > 1` for repeated resampling per model
+evaluation. See [`evaluate!`](@ref) options for details.
+
 *Important.* If a custom `measure` is used, and the measure is
 a score, rather than a loss, be sure to check that
 `MLJ.orientation(measure) == :score` to ensure maximization of the
 measure, rather than minimization. Override an incorrect value with
 `MLJ.orientation(::typeof(measure)) = :score`.
+
 *Important:* If `weights` are left unspecified, and `measure` supports
 sample weights, then any weight vector `w` used in constructing a
 corresponding tuning machine, as in `tuning_machine =
 machine(tuned_model, X, y, w)` (which is then used in *training* each
 model in the search) will also be passed to `measure` for evaluation.
+
 In the case of two-parameter tuning, a Plots.jl plot of performance
 estimates is returned by `plot(mach)` or `heatmap(mach)`.
+
 Once a tuning machine `mach` has bee trained as above, then
 `fitted_params(mach)` has these keys/values:
 key                 | value
@@ -109,35 +121,50 @@ key                 | value
 `best_report`       | report generated by fitting the optimal model
 `history`           | tuning strategy-specific history of all evaluations
 plus others specific to the `tuning` strategy, such as `history=...`.
+
 ### Summary of key-word arguments
+
 - `model`: `Supervised` model prototype that is cloned and mutated to
   generate models for evaluation
+
 - `tuning=Grid()`: tuning strategy to be applied (eg, `RandomSearch()`)
+
 - `resampling=Holdout()`: resampling strategy (eg, `Holdout()`, `CV()`),
   `StratifiedCV()`) to be applied in performance evaluations
+
 - `measure`: measure or measures to be applied in performance
   evaluations; only the first used in optimization (unless the
   strategy is multi-objective) but all reported to the history
+
 - `weights`: sample weights to be passed the measure(s) in performance
   evaluations, if supported (see important note above for behaviour in
   unspecified case)
+
 - `repeats=1`: for generating train/test sets multiple times in
   resampling; see [`evaluate!`](@ref) for details
+
 - `operation=predict`: operation to be applied to each fitted model;
   usually `predict` but `predict_mean`, `predict_median` or
   `predict_mode` can be used for `Probabilistic` models, if
   the specified measures are `Deterministic`
+
 - `range`: range object; tuning strategy documentation describes
   supported types
+
 - `n`: number of iterations (ie, models to be evaluated); set by
   tuning strategy if left unspecified
+
 - `train_best=true`: whether to train the optimal model
+
 - `acceleration=default_resource()`: mode of parallelization for
   tuning strategies that support this
+
 - `acceleration_resampling=CPU1()`: mode of parallelization for
   resampling
+
 - `check_measure`: whether to check `measure` is compatible with the
   specified `model` and `operation`)
+
 """
 function TunedModel(; model=nothing,
                     tuning=Grid(),
@@ -159,8 +186,8 @@ function TunedModel(; model=nothing,
     model == nothing && error("You need to specify model=... .\n"*
                               "If `tuning=Explicit()`, any model in the "*
                               "range will do. ")
-    
-    
+
+
 
     if model isa Deterministic
         tuned_model = DeterministicTunedModel(model, tuning, resampling,
@@ -200,33 +227,33 @@ function MLJBase.clean!(tuned_model::EitherTunedModel)
             "Setting measure=$(tuned_model.measure). "
         end
     end
-    
-    if (tuned_model.acceleration isa CPUProcesses && 
+
+    if (tuned_model.acceleration isa CPUProcesses &&
         tuned_model.acceleration_resampling isa CPUProcesses)
-        message *= 
+        message *=
         "The combination acceleration=$(tuned_model.acceleration) and"*
         " acceleration_resampling=$(tuned_model.acceleration_resampling) is"*
         "  not generally optimal. You may want to consider setting"*
         " `acceleration = CPUProcesses()` and"*
         " `acceleration_resampling = CPUThreads()`."
     end
-    
-    if (tuned_model.acceleration isa CPUThreads && 
+
+    if (tuned_model.acceleration isa CPUThreads &&
         tuned_model.acceleration_resampling isa CPUProcesses)
-        message *= 
+        message *=
         "The combination acceleration=$(tuned_model.acceleration) and"*
         " acceleration_resampling=$(tuned_model.acceleration_resampling) isn't"*
         " supported. \n Resetting to"*
         " `acceleration = CPUProcesses()` and"*
         " `acceleration_resampling = CPUThreads()`."
-   
+
         tuned_model.acceleration = CPUProcesses()
-        tuned_model.acceleration_resampling = CPUThreads()        
+        tuned_model.acceleration_resampling = CPUThreads()
     end
-    
+
     tuned_model.acceleration =
         _process_accel_settings(tuned_model.acceleration)
-    
+
     return message
 end
 
@@ -277,22 +304,22 @@ function assemble_events(metamodels,
                          acceleration::CPU1)
 
      n_metamodels = length(metamodels)
-    
+
      p = Progress(n_metamodels,
          dt = 0,
          desc = "Evaluating over $(n_metamodels) metamodels: ",
          barglyphs = BarGlyphs("[=> ]"),
          barlen = 25,
          color = :yellow)
-                 
+
     verbosity <1 || update!(p,0)
 
     results = map(metamodels) do m
         r= event(m, resampling_machine, verbosity, tuning, history, state)
         verbosity < 1 || begin
                   p.counter += 1
-                  ProgressMeter.updateProgress!(p)  
-                end 
+                  ProgressMeter.updateProgress!(p)
+                end
         r
       end
 
@@ -320,13 +347,13 @@ results = @sync begin
 
    # printing the progress bar
    verbosity < 1 || begin
-                      update!(p,0)   
+                      update!(p,0)
                       @async while take!(channel)
                         p.counter +=1
                         ProgressMeter.updateProgress!(p)
                       end
                    end
-        
+
 
     ret = @distributed vcat for m in metamodels
         r = event(m, resampling_machine, verbosity, tuning, history, state)
@@ -338,7 +365,7 @@ results = @sync begin
       verbosity < 1 || put!(channel, false)
       ret
     end
-        
+
     return results
 end
 
@@ -351,7 +378,7 @@ function assemble_events(metamodels,
                          history,
                          state,
                          acceleration::CPUThreads)
-    
+
     if Threads.nthreads() == 1
         return assemble_events(metamodels,
                          resampling_machine,
@@ -374,14 +401,14 @@ function assemble_events(metamodels,
          barlen = 25,
          color = :yellow)
     ch = Channel{Bool}(min(1000, length(partitions)) )
-   
-      
+
+
     @sync begin
         # printing the progress bar
         verbosity < 1 || begin
                           update!(p,0)
                          @async while take!(ch)
-                            p.counter +=1 
+                            p.counter +=1
                             ProgressMeter.updateProgress!(p)
                           end
                         end
@@ -394,20 +421,20 @@ function assemble_events(metamodels,
                       check_measure = resampling_machine.model.check_measure,
                       repeats       = resampling_machine.model.repeats,
                       acceleration  = resampling_machine.model.acceleration),
-                      resampling_machine.args...) for _ in 2:length(partitions)]...] 
-      
-    @sync for (i, parts) in enumerate(partitions)  
-      Threads.@spawn begin    
+                      resampling_machine.args...) for _ in 2:length(partitions)]...]
+
+    @sync for (i, parts) in enumerate(partitions)
+      Threads.@spawn begin
         results[i] =  map(metamodels[parts]) do m
-            r = event(m, machs[i], 
+            r = event(m, machs[i],
                                 verbosity, tuning, history, state)
             verbosity < 1 || put!(ch, true)
             r
        end
     end
-       
-end    
-    verbosity < 1 || put!(ch, false)        
+
+end
+    verbosity < 1 || put!(ch, false)
 end
   reduce(vcat, results)
 end
@@ -450,7 +477,7 @@ function build(history,
         Δj == 0 && break
         shortfall < 0 && (metamodels = metamodels[1:n - j])
         j += Δj
-        
+
         Δhistory = assemble_events(metamodels,
                                    resampling_machine,
                                    verbosity,
@@ -560,7 +587,7 @@ function MLJBase.update(tuned_model::EitherTunedModel, verbosity::Integer,
     else
 
         return fit(tuned_model, verbosity, data...)
-  
+
     end
 
 end