Skip to content

Commit

Permalink
Merge pull request #28 from Yuan-Ru-Lin/add-affinity-propagation
Browse files Browse the repository at this point in the history
Initial commit for implementation of Affinity Propagation
  • Loading branch information
ablaom authored Dec 10, 2024
2 parents 0be6b1a + da6e69c commit 4e3c06f
Show file tree
Hide file tree
Showing 4 changed files with 175 additions and 6 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
fail-fast: false
matrix:
version:
- '1.6'
- '1.10'
- '1'
os:
- ubuntu-latest
Expand Down
6 changes: 5 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,14 @@ version = "0.1.11"
[deps]
Clustering = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5"
Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"

[compat]
Clustering = "0.15"
Distances = "0.9, 0.10"
LinearAlgebra = "1"
MLJModelInterface = "1.4"
julia = "1.6"
StatsBase = "0.34"
julia = "1.10"
139 changes: 136 additions & 3 deletions src/MLJClusteringInterface.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,12 @@ import MLJModelInterface: Continuous, Count, Finite, Multiclass, Table, OrderedF
@mlj_model, metadata_model, metadata_pkg

using Distances
using LinearAlgebra
using StatsBase

# ===================================================================
## EXPORTS
export KMeans, KMedoids, DBSCAN, HierarchicalClustering
export KMeans, KMedoids, AffinityPropagation, DBSCAN, HierarchicalClustering

# ===================================================================
## CONSTANTS
Expand Down Expand Up @@ -95,7 +97,6 @@ function MMI.transform(model::KMedoids, fitresult, X)
return MMI.table(X̃, prototype=X)
end


# # PREDICT FOR K_MEANS AND K_MEDOIDS

function MMI.predict(model::Union{KMeans,KMedoids}, fitresult, Xnew)
Expand Down Expand Up @@ -208,10 +209,66 @@ end

MMI.reporting_operations(::Type{<:HierarchicalClustering}) = (:predict,)

# # AFFINITY_PROPAGATION

@mlj_model mutable struct AffinityPropagation <: MMI.Static
damp::Float64 = 0.5::(0.0 ≤ _ < 1.0)
maxiter::Int = 200::(_ > 0)
tol::Float64 = 1e-6::(_ > 0)
preference::Union{Nothing,Float64} = nothing
metric::SemiMetric = SqEuclidean()
end

function MMI.predict(model::AffinityPropagation, ::Nothing, X)
Xarray = MMI.matrix(X)'

# Compute similarity matrix using negative pairwise distances
S = -pairwise(model.metric, Xarray, dims=2)

diagonal_element = if !isnothing(model.preference)
model.preference
else
# Get the median out of all pairs of similarity, that is, values above
# the diagonal line.
# Such default choice is mentioned in the algorithm's wiki article
iuppertri = triu!(trues(size(S)),1)
median(S[iuppertri])
end

fill!(view(S, diagind(S)), diagonal_element)

result = Cl.affinityprop(
S,
maxiter=model.maxiter,
tol=model.tol,
damp=model.damp
)

# Get number of clusters and labels
exemplars = result.exemplars
k = length(exemplars)
cluster_labels = MMI.categorical(1:k)

# Store exemplar points as centers (similar to KMeans/KMedoids)
centers = view(Xarray, :, exemplars)

report = (
exemplars=exemplars,
centers=centers,
cluster_labels=cluster_labels,
iterations=result.iterations,
converged=result.converged
)

return MMI.categorical(result.assignments), report
end

MMI.reporting_operations(::Type{<:AffinityPropagation}) = (:predict,)

# # METADATA

metadata_pkg.(
(KMeans, KMedoids, DBSCAN, HierarchicalClustering),
(KMeans, KMedoids, DBSCAN, HierarchicalClustering, AffinityPropagation),
name="Clustering",
uuid="aaaa29a8-35af-508c-8bc3-b662a17a0fe5",
url="https://github.com/JuliaStats/Clustering.jl",
Expand Down Expand Up @@ -251,6 +308,13 @@ metadata_model(
path = "$(PKG).HierarchicalClustering"
)

metadata_model(
AffinityPropagation,
human_name = "Affinity Propagation clusterer",
input_scitype = MMI.Table(Continuous),
path = "$(PKG).AffinityPropagation"
)

"""
$(MMI.doc_header(KMeans))
Expand Down Expand Up @@ -618,4 +682,73 @@ report(mach).cutter(h = 2.5)
"""
HierarchicalClustering

"""
$(MMI.doc_header(AffinityPropagation))
[Affinity Propagation](https://en.wikipedia.org/wiki/Affinity_propagation) is a clustering algorithm based on the concept of "message passing" between data points. More information is available at the [Clustering.jl documentation](https://juliastats.org/Clustering.jl/stable/index.html). Use `predict` to get cluster assignments. Indices of the exemplars, their values, etc, are accessed from the machine report (see below).
This is a static implementation, i.e., it does not generalize to new data instances, and
there is no training data. For clusterers that do generalize, see [`KMeans`](@ref) or
[`KMedoids`](@ref).
In MLJ or MLJBase, create a machine with
mach = machine(model)
# Hyper-parameters
- `damp = 0.5`: damping factor
- `maxiter = 200`: maximum number of iteration
- `tol = 1e-6`: tolerance for converenge
- `preference = nothing`: the (single float) value of the diagonal elements of the similarity matrix. If unspecified, choose median (negative) similarity of all pairs as mentioned [here](https://en.wikipedia.org/wiki/Affinity_propagation#Algorithm)
- `metric = Distances.SqEuclidean()`: metric (see `Distances.jl` for available metrics)
# Operations
- `predict(mach, X)`: return cluster label assignments, as an unordered
`CategoricalVector`. Here `X` is any table of input features (eg, a `DataFrame`) whose
columns are of scitype `Continuous`; check column scitypes with `schema(X)`.
# Report
After calling `predict(mach)`, the fields of `report(mach)` are:
- exemplars: indices of the data picked as exemplars in `X`
- centers: positions of the exemplars in the feature space
- cluster_labels: labels of clusters given to each datum in `X`
- iterations: the number of iteration run by the algorithm
- converged: whether or not the algorithm converges by the maximum iteration
# Examples
```
using MLJ
X, labels = make_moons(400, noise=0.9, rng=1)
AffinityPropagation = @load AffinityPropagation pkg=Clustering
model = AffinityPropagation(preference=-10.0)
mach = machine(model)
# compute and output cluster assignments for observations in `X`:
yhat = predict(mach, X)
# Get the positions of the exemplars
report(mach).centers
# Plot clustering result
using GLMakie
scatter(MLJ.matrix(X)', color=yhat.refs)
```
"""
AffinityPropagation

end # module
34 changes: 33 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,40 @@ end
@test report(mach).dendrogram.heights == dendro.heights
end

# # AffinityPropagation

@testset "AffinityPropagation" begin
X = table(stack(Iterators.partition(0.5:0.5:20, 5))')

# Test case 1: preference == median (negative) similarity (i.e. unspecified)
mach = machine(AffinityPropagation())

yhat = predict(mach, X)
@test yhat == [1, 1, 1, 1, 2, 2, 2, 2]

_report = report(mach)
@test _report.exemplars == [2, 7]
@test _report.centers == [3.0 15.5; 3.5 16.0; 4.0 16.5; 4.5 17.0; 5.0 17.5]
@test _report.cluster_labels == [1, 2]
@test _report.iterations == 50
@test _report.converged == true

# Test case 2: |preference| too large
mach2 = machine(AffinityPropagation(preference=-20.0))

yhat = predict(mach2, X)
@test yhat == [1, 2, 3, 4, 5, 6, 7, 8]

_report = report(mach2)
@test _report.exemplars == [1, 2, 3, 4, 5, 6, 7, 8]
@test _report.centers == matrix(X)'
@test _report.cluster_labels == [1, 2, 3, 4, 5, 6, 7, 8]
@test _report.iterations == 32
@test _report.converged == true
end

@testset "MLJ interface" begin
models = [KMeans, KMedoids, DBSCAN, HierarchicalClustering]
models = [KMeans, KMedoids, DBSCAN, HierarchicalClustering, AffinityPropagation]
failures, summary = MLJTestInterface.test(
models,
X;
Expand Down

0 comments on commit 4e3c06f

Please sign in to comment.