Merge branch 'abhro-docstring-patch-1' into dev

JuliaAI · May 6, 2024 · af10ff2 · af10ff2
2 parents d6b1930 + e0ca155
commit af10ff2
Show file tree

Hide file tree

Showing 13 changed files with 238 additions and 222 deletions.
diff --git a/src/composition/learning_networks/nodes.jl b/src/composition/learning_networks/nodes.jl
@@ -409,14 +409,14 @@ of nodes, sources and other arguments.
 
 ### Examples
 
-```
-X = source(π)
-W = @node sin(X)
+```julia-repl
+julia> X = source(π)
+julia> W = @node sin(X)
 julia> W()
 0
 
-X = source(1:10)
-Y = @node selectrows(X, 3:4)
+julia> X = source(1:10)
+julia> Y = @node selectrows(X, 3:4)
 julia> Y()
 3:4
 
@@ -425,10 +425,10 @@ julia> Y(["one", "two", "three", "four"])
  "three"
  "four"
 
-X1 = source(4)
-X2 = source(5)
-add(a, b, c) = a + b + c
-N = @node add(X1, 1, X2)
+julia> X1 = source(4)
+julia> X2 = source(5)
+julia> add(a, b, c) = a + b + c
+julia> N = @node add(X1, 1, X2)
 julia> N()
 10
 

diff --git a/src/composition/learning_networks/signatures.jl b/src/composition/learning_networks/signatures.jl
@@ -8,10 +8,10 @@
 
 **Private method.**
 
-Return a dictionary of machines, keyed on model, for the all machines in the completed
-learning network for which `node` is the greatest lower bound. Only machines bound to
-symbolic models are included. Values are always vectors, even if they contain only a
-single machine.
+Return a dictionary of machines, keyed on model, for the all machines in the
+completed learning network for which `node` is the greatest lower bound. Only
+machines bound to symbolic models are included. Values are always vectors,
+even if they contain only a single machine.
 
 """
 function machines_given_model(node::AbstractNode)
@@ -35,14 +35,14 @@ attempt_scalarize(v) = length(v) == 1 ? v[1] : v
 
 **Private method.**
 
-Given a dictionary of machine vectors, keyed on model names (symbols), broadcast `f` over
-each vector, and make the result, in the returned named tuple, the value associated with
-the corresponding model name as key.
+Given a dictionary of machine vectors, keyed on model names (symbols), broadcast
+`f` over each vector, and make the result, in the returned named tuple, the
+value associated with the corresponding model name as key.
 
 Singleton vector values are scalarized, unless `scalarize = false`.
 
-If a value in the computed named tuple is `nothing`, or a vector of `nothing`s, then the
-entry is dropped from the tuple, unless `drop_nothings=false`.
+If a value in the computed named tuple is `nothing`, or a vector of `nothing`s,
+then the entry is dropped from the tuple, unless `drop_nothings=false`.
 
 """
 function tuple_keyed_on_model(f, machines_given_model; scalarize=true, drop_nothings=true)

diff --git a/src/composition/models/stacking.jl b/src/composition/models/stacking.jl
@@ -337,12 +337,12 @@ internal_stack_report(
 ) = NamedTuple{}()
 
 """
-internal_stack_report(
-    m::Stack,
-    verbosity::Int,
-    y::AbstractNode,
-    folds_evaluations::Vararg{AbstractNode},
-)
+    internal_stack_report(
+        m::Stack,
+        verbosity::Int,
+        y::AbstractNode,
+        folds_evaluations::Vararg{AbstractNode},
+    )
 
 When measure/measures is provided, the folds_evaluation will have been filled by
 `store_for_evaluation`. This function is not doing any heavy work (not constructing nodes
@@ -518,7 +518,7 @@ function oos_set(m::Stack{modelnames}, Xs::Source, ys::Source, tt_pairs) where m
 end
 
 #######################################
-################# Prefit #################
+################# Prefit ##############
 #######################################
 
 function prefit(m::Stack{modelnames}, verbosity::Int, X, y) where modelnames
@@ -564,8 +564,7 @@ const DOC_STACK =
     Stack(; metalearner=nothing, name1=model1, name2=model2, ..., keyword_options...)
 
 Implements the two-layer generalized stack algorithm introduced by
-[Wolpert
-(1992)](https://www.sciencedirect.com/science/article/abs/pii/S0893608005800231)
+[Wolpert (1992)](https://www.sciencedirect.com/science/article/abs/pii/S0893608005800231)
 and generalized by [Van der Laan et al
 (2007)](https://biostats.bepress.com/ucbbiostat/paper222/). Returns an
 instance of type `ProbabilisticStack` or `DeterministicStack`,

diff --git a/src/composition/models/transformed_target_model.jl b/src/composition/models/transformed_target_model.jl
@@ -61,7 +61,7 @@ const ERR_MODEL_UNSPECIFIED = ArgumentError(
     "Expecting atomic model as argument. None specified. "
 )
 const ERR_TRANSFORMER_UNSPECIFIED = ArgumentError(
-"You must specify `transformer=...`. ."
+    "You must specify `transformer=...`. ."
 )
 const ERR_TOO_MANY_ARGUMENTS = ArgumentError(
     "At most one non-keyword argument, a model, allowed. "
@@ -123,7 +123,7 @@ y -> mode.(y))`.
 A model that normalizes the target before applying ridge regression,
 with predictions returned on the original scale:
 
-```
+```julia
 @load RidgeRegressor pkg=MLJLinearModels
 model = RidgeRegressor()
 tmodel = TransformedTargetModel(model, transformer=Standardizer())
@@ -132,7 +132,7 @@ tmodel = TransformedTargetModel(model, transformer=Standardizer())
 A model that applies a static `log` transformation to the data, again
 returning predictions to the original scale:
 
-```
+```julia
 tmodel2 = TransformedTargetModel(model, transformer=y->log.(y), inverse=z->exp.(y))
 ```
 

diff --git a/src/data/data.jl b/src/data/data.jl
@@ -104,23 +104,28 @@ corresponding `fractions` of `length(nrows(X))`, where valid fractions
 are floats between 0 and 1 whose sum is less than one. The last
 fraction is not provided, as it is inferred from the preceding ones.
 
-For "synchronized" partitioning of multiple objects, use the
-`multi=true` option described below.
+For synchronized partitioning of multiple objects, use the
+`multi=true` option.
 
-    julia> partition(1:1000, 0.8)
-    ([1,...,800], [801,...,1000])
+```julia-repl
+julia> partition(1:1000, 0.8)
+([1,...,800], [801,...,1000])
 
-    julia> partition(1:1000, 0.2, 0.7)
-    ([1,...,200], [201,...,900], [901,...,1000])
+julia> partition(1:1000, 0.2, 0.7)
+([1,...,200], [201,...,900], [901,...,1000])
 
-    julia> partition(reshape(1:10, 5, 2), 0.2, 0.4)
-    ([1 6], [2 7; 3 8], [4 9; 5 10])
+julia> partition(reshape(1:10, 5, 2), 0.2, 0.4)
+([1 6], [2 7; 3 8], [4 9; 5 10])
 
-    X, y = make_blobs() # a table and vector
-    Xtrain, Xtest = partition(X, 0.8, stratify=y)
+julia> X, y = make_blobs() # a table and vector
+julia> Xtrain, Xtest = partition(X, 0.8, stratify=y)
+```
 
-    (Xtrain, Xtest), (ytrain, ytest) = partition((X, y), 0.8, rng=123, multi=true)
+Here's an example of synchronized partitioning of multiple objects:
 
+```julia-repl
+julia> (Xtrain, Xtest), (ytrain, ytest) = partition((X, y), 0.8, rng=123, multi=true)
+```
 
 ## Keywords
 
@@ -209,7 +214,7 @@ Returns a tuple of tables/vectors with length one greater than the
 number of supplied predicates, with the last component including all
 previously unselected columns.
 
-```
+```julia-repl
 julia> table = DataFrame(x=[1,2], y=['a', 'b'], z=[10.0, 20.0], w=["A", "B"])
 2×4 DataFrame
  Row │ x      y     z        w
@@ -300,9 +305,11 @@ The method is curried, so that `restrict(folds, i)` is the operator
 on data defined by `restrict(folds, i)(X) = restrict(X, folds, i)`.
 
 ### Example
-
-    folds = ([1, 2], [3, 4, 5],  [6,])
-    restrict([:x1, :x2, :x3, :x4, :x5, :x6], folds, 2) # [:x3, :x4, :x5]
+#
+```julia
+folds = ([1, 2], [3, 4, 5],  [6,])
+restrict([:x1, :x2, :x3, :x4, :x5, :x6], folds, 2) # [:x3, :x4, :x5]
+```
 
 See also [`corestrict`](@ref)
 
@@ -322,7 +329,9 @@ all elements of `folds`. Here `folds` is a vector or tuple of integer
 vectors, typically representing row indices or a vector, matrix or
 table.
 
-    complement(([1,2], [3,], [4, 5]), 2) # [1 ,2, 4, 5]
+```julia
+complement(([1,2], [3,], [4, 5]), 2) # [1 ,2, 4, 5]
+```
 
 """
 complement(f, i) = reduce(vcat, collect(f)[Not(i)])
@@ -345,8 +354,10 @@ on data defined by `corestrict(folds, i)(X) = corestrict(X, folds, i)`.
 
 ### Example
 
-    folds = ([1, 2], [3, 4, 5],  [6,])
-    corestrict([:x1, :x2, :x3, :x4, :x5, :x6], folds, 2) # [:x1, :x2, :x6]
+```julia
+folds = ([1, 2], [3, 4, 5],  [6,])
+corestrict([:x1, :x2, :x3, :x4, :x5, :x6], folds, 2) # [:x1, :x2, :x6]
+```
 
 """
 corestrict(f::NTuple{N}, i) where N = FoldComplementRestrictor{i,N}(f)

diff --git a/src/data/datasets.jl b/src/data/datasets.jl
@@ -158,7 +158,7 @@ const COERCE_SUNSPOTS = (
     (:sunspot_number=>Continuous),)
 
 """
-load_dataset(fpath, coercions)
+    load_dataset(fpath, coercions)
 
 Load one of standard dataset like Boston etc assuming the file is a
 comma separated file with a header.

diff --git a/src/data/datasets_synthetic.jl b/src/data/datasets_synthetic.jl
@@ -18,9 +18,6 @@ const EXTRA_CLASSIFICATION =
 Internal function to  finalize the `make_*` functions.
 
 """
-x = [1 2 3 ; 4 5 6]
-x
-length(size(collect(1:3))) # (
 function finalize_Xy(X, y, shuffle, as_table, eltype, rng; clf::Bool=true)
     # Shuffle the rows if required
     if shuffle
@@ -78,7 +75,7 @@ By default, a table `X` with `p` columns (features) and `n` rows
 
 ### Example
 
-```
+```julia
 X, y = make_blobs(100, 3; centers=2, cluster_std=[1.0, 3.0])
 ```
 
@@ -95,8 +92,7 @@ function make_blobs(n::Integer=100,
 
     # check arguments make sense
     if n < 1 || p < 1
-        throw(ArgumentError(
-            "Expected `n` and `p` to be at least 1."))
+        throw(ArgumentError("Expected `n` and `p` to be at least 1."))
     end
     if center_box.first >= center_box.second
         throw(ArgumentError(
@@ -181,7 +177,7 @@ $(EXTRA_KW_MAKE*EXTRA_CLASSIFICATION)
 
 ### Example
 
-```
+```julia
 X, y = make_circles(100; noise=0.5, factor=0.3)
 ```
 
@@ -196,12 +192,10 @@ function make_circles(n::Integer=100;
 
     # check arguments make sense
     if n < 1
-        throw(ArgumentError(
-            "Expected `n` to be at least 1."))
+        throw(ArgumentError("Expected `n` to be at least 1."))
     end
     if noise < 0
-        throw(ArgumentError(
-            "Noise argument cannot be negative."))
+        throw(ArgumentError("Noise argument cannot be negative."))
     end
     if !(0 < factor < 1)
         throw(ArgumentError(
@@ -224,12 +218,12 @@ function make_circles(n::Integer=100;
         X .+= noise .* randn(rng, n, 2)
     end
 
-        return finalize_Xy(X, y, shuffle, as_table, eltype, rng)
+    return finalize_Xy(X, y, shuffle, as_table, eltype, rng)
 end
 
 
 """
-        make_moons(n::Int=100; kwargs...)
+    make_moons(n::Int=100; kwargs...)
 
 Generates labeled two-dimensional points lying close to two
 interleaved semi-circles, for use with classification and clustering
@@ -257,7 +251,7 @@ membership to the left or right semi-circle.
 
 ### Example
 
-```
+```julia
 X, y = make_moons(100; noise=0.5)
 ```
 
@@ -273,12 +267,10 @@ function make_moons(n::Int=150;
 
     # check arguments make sense
     if n < 1
-        throw(ArgumentError(
-            "Expected `n` to be at least 1."))
+        throw(ArgumentError("Expected `n` to be at least 1."))
     end
     if noise < 0
-        throw(ArgumentError(
-            "Noise argument cannot be negative."))
+        throw(ArgumentError("Noise argument cannot be negative."))
     end
 
     rng = init_rng(rng)
@@ -324,8 +316,7 @@ end
 Make portion `s` of vector `θ` exactly 0.
 
 """
-sparsify!(rng, θ, s) =
-        (θ .*= (rand(rng, length(θ)) .< s))
+sparsify!(rng, θ, s) = (θ .*= (rand(rng, length(θ)) .< s))
 
 """Add outliers to portion s of vector."""
 outlify!(rng, y, s) =
@@ -338,19 +329,18 @@ const SIGMOID_32 = log(Float32(1)/eps(Float32) - Float32(1))
     sigmoid(x)
 
 Return the sigmoid computed in a numerically stable way:
-
 ``σ(x) = 1/(1+exp(-x))``
 
 """
 function sigmoid(x::Float64)
-        x > SIGMOID_64  && return one(x)
-        x < -SIGMOID_64 && return zero(x)
-        return one(x) / (one(x) + exp(-x))
+    x > SIGMOID_64  && return one(x)
+    x < -SIGMOID_64 && return zero(x)
+    return one(x) / (one(x) + exp(-x))
 end
 function sigmoid(x::Float32)
-        x > SIGMOID_32  && return one(x)
-        x < -SIGMOID_32 && return zero(x)
-        return one(x) / (one(x) + exp(-x))
+    x > SIGMOID_32  && return one(x)
+    x < -SIGMOID_32 && return zero(x)
+    return one(x) / (one(x) + exp(-x))
 end
 sigmoid(x) = sigmoid(float(x))
 
@@ -392,7 +382,7 @@ $EXTRA_KW_MAKE
 
 ### Example
 
-```
+```julia
 X, y = make_regression(100, 5; noise=0.5, sparse=0.2, outliers=0.1)
 ```
 
@@ -411,24 +401,19 @@ function make_regression(n::Int=100,
 
     # check arguments make sense
     if n < 1 || p < 1
-        throw(ArgumentError(
-            "Expected `n` and `p` to be at least 1."))
+        throw(ArgumentError("Expected `n` and `p` to be at least 1."))
     end
     if n_targets < 1
-        throw(ArgumentError(
-            "Expected `n_targets` to be at least 1."))
+        throw(ArgumentError("Expected `n_targets` to be at least 1."))
     end
     if !(0 <= sparse < 1)
-        throw(ArgumentError(
-            "Sparsity argument must be in [0, 1)."))
+        throw(ArgumentError("Sparsity argument must be in [0, 1)."))
     end
     if noise < 0
-        throw(ArgumentError(
-            "Noise argument cannot be negative."))
+        throw(ArgumentError("Noise argument cannot be negative."))
     end
     if !(0 <= outliers <= 1)
-        throw(ArgumentError(
-            "Outliers argument must be in [0, 1]."))
+        throw(ArgumentError("Outliers argument must be in [0, 1]."))
     end
 
     rng = init_rng(rng)