Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Windows compat for nauty #124

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions ext/NautyACSetsExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,12 @@ using ACSets
using nauty_jll

"""Compute CSetNautyRes from an ACSet."""
function ACSets.call_nauty(g::ACSet)::CSetNautyRes
ACSets.NautyInterface.parse_res(nauty_res(g), g)
function ACSets.call_nauty(g::ACSet; use_nauty=true)::CSetNautyRes
if Sys.iswindows() || !use_nauty
CSetAutomorphisms.to_nauty_res(g)
else
ACSets.NautyInterface.parse_res(nauty_res(g), g)
end
end

"""Make shell command to dreadnaut (nauty) and collect stdout text."""
Expand Down
4 changes: 2 additions & 2 deletions src/ACSets.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ include("DenseACSets.jl")
include("intertypes/InterTypes.jl")
include("serialization/Serialization.jl")
include("ADTs.jl")
include("NautyInterface.jl")
include("nauty/Nauty.jl")

@reexport using .ColumnImplementations: AttrVar
@reexport using .Schemas
Expand All @@ -23,6 +23,6 @@ include("NautyInterface.jl")
@reexport using .InterTypes
@reexport using .ACSetSerialization
using .ADTs
@reexport using .NautyInterface
@reexport using .Nauty

end
224 changes: 224 additions & 0 deletions src/nauty/CSetAutomorphisms.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
"""
This is code which is required because nauty_jll is not supported on Windows:
thus we need a makeshift implementation of Nauty within Julia. There is
potential to do this in a much cleaner and more efficient way with a virtual
machine, but for now performance is not a priority.
"""
module CSetAutomorphisms

using ...ACSetInterface, ...DenseACSets, ...Schemas
using ..NautyInterface
using Permutations


# Color assigned to each elem of each component
const CDict = Dict{Symbol, Vector{Int}}

"""Construct permutation σ⁻¹ such that σσ⁻¹=id"""
invert_perms(x::CDict) = Dict([k=>Base.invperm(v) for (k, v) in collect(x)])

Check warning on line 18 in src/nauty/CSetAutomorphisms.jl

View check run for this annotation

Codecov / codecov/patch

src/nauty/CSetAutomorphisms.jl#L18

Added line #L18 was not covered by tests

check_auto(x::CDict)::Bool = all(Base.isperm, values(x))

# Sequence of keys, to index a Tree
const VPSI = Vector{Pair{Symbol, Int}}

max0(x::Vector{Int})::Int = isempty(x) ? 0 : maximum(x)

include("ColorRefine.jl")

# CSets with attributes replaced w/ combinatorial representatives
#################################################################

"""
To compute automorphisms of Attributed CSets, we create a pseudo CSet which has
additional components for each data type.

This is inefficient for attributes which have a total order on them
(e.g. integers/strings) since we solve for a canonical permutation of the
attributes. Future work could address this by initializing the coloring with
the 'correct' canonical order.
"""
function pseudo_cset(g::ACSet)::Tuple{ACSet, Dict{Symbol,Vector{Any}}}
# Create copy of schema (+ an extra component for each datatype)
S = acset_schema(g)
pres = deepcopy(S)
append!(pres.obs, pres.attrtypes)
append!(pres.homs, pres.attrs)
empty!.([pres.attrtypes, pres.attrs])

# Use Julia ordering to give each value an index
attrvals = Dict(map(attrtypes(S)) do at
vals = Set{Any}()
[union!(vals, g[a]) for a in attrs(S; just_names=true, to=at)]
at => vcat(filter(x->!(x isa AttrVar), vals) |> collect |> sort,
AttrVar.(parts(g, at)))
end)

# Create and populate pseudo-cset
res = AnonACSet(pres, index=arrows(S; just_names=true))

copy_parts!(res, g)

# Replace data value with an index for each attribute
for t in attrtypes(S)
add_parts!(res, t, length(attrvals[t]) - nparts(g, t))
for (a,d,_) in attrs(S; to=t)
for p in parts(g, d)
res[p, a] = findfirst(==(g[p, a]), attrvals[t])
end
end
end

(res, attrvals)
end

"""
Inverse of pseudo_cset. Requires mapping (generated by `pseudo_cset`) of indices
for each Data to the actual data values.
"""
function pseudo_cset_inv(g::ACSet, orig::ACSet, attrvals::AbstractDict)
S = acset_schema(orig)
orig = deepcopy(orig)
for arr in hom(S)
orig[arr] = g[arr]
end
for (darr, _, tgt) in attrs(S)
orig[darr] = attrvals[tgt][g[darr]]
end
orig

Check warning on line 88 in src/nauty/CSetAutomorphisms.jl

View check run for this annotation

Codecov / codecov/patch

src/nauty/CSetAutomorphisms.jl#L88

Added line #L88 was not covered by tests
end

# Results
#########

"""Apply a coloring to a C-set to get an isomorphic cset"""
function apply_automorphism(c::ACSet, d::CDict)
check_auto(d) || error("received coloring that is not an automorphism: $d")
new = deepcopy(c)
for (arr, src, tgt) in homs(acset_schema(c))
new[d[src], arr] = d[tgt][c[arr]]
end
new

Check warning on line 101 in src/nauty/CSetAutomorphisms.jl

View check run for this annotation

Codecov / codecov/patch

src/nauty/CSetAutomorphisms.jl#L101

Added line #L101 was not covered by tests
end

function to_nauty_res(g::ACSet)
p, avals = pseudo_cset(g)
c, m = [pseudo_cset_inv(apply_automorphism(p, Dict(a)), g, avals) => a
for a in autos(p)[1]] |> sort |> first
strhsh = string(c)
orbits = Dict{Symbol, Vector{Int}}() # todo
generators = Pair{Int, Vector{Permutation}}[] # todo
CSetNautyRes(strhsh, orbits, generators, 1, m, c)
end

# Trees
#######

"""
Search tree explored by Nauty. Each node has an input coloring, a refined
coloring, and a set of children indexed by which element (in the smallest
nontrivial orbit) has its symmetry artificially broken.
"""
struct Tree
coloring::CDict
saturated::CDict
children::Dict{Pair{Symbol, Int}, Tree}
Tree() = new(CDict(), CDict(), Dict{Pair{Symbol, Int}, Tree}())
end

"""Get a node via a sequence of edges from the root"""
function Base.getindex(t::Tree, pth::VPSI)::Tree
ptr = t
for p in pth
ptr = ptr.children[p]
end
ptr
end

"""
Get vector listing nontrivial colors (which component and which color index) as
well as how many elements have that color. E.g. for (V=[1,1,2], E=[1,2,2,2,3,3])
we would get `[2=>(:V,1), 3=>(:E,2), 2=>(:E, 3)]`
"""
function get_colors_by_size(coloring::CDict)::Vector{Pair{Int,Tuple{Symbol, Int}}}
res = []
for (k, v) in coloring
for color in 1:max0(v)
n_c = count(==(color), v)
n_c > 1 && push!(res, n_c => (k, color)) # Store which table and which color
end
end
res
end


"""To reduce branching factor, split on the SMALLEST nontrivial partition"""
function split_data(coloring::CDict)::Tuple{Symbol, Int, Vector{Int}}
colors_by_size = sort(get_colors_by_size(coloring), rev=false)
isempty(colors_by_size) && return :_nothing, 0, []
split_tab, split_color = colors_by_size[1][2]
colors = coloring[split_tab]
split_inds = findall(==(split_color), colors)
(split_tab, split_color, split_inds)
end

"""
DFS tree of colorings, with edges being choices in how to break symmetry
Goal is to acquire all leaf nodes.

Algorithm from "McKay’s Canonical Graph Labeling Algorithm" by Hartke and
Radcliffe (2009).

McKay's "Practical Graph Isomorphism" (Section 2.29: "storage of identity
nodes") warns that it's not a good idea to check for every possible automorphism
pruning (for memory and time concerns). To do: look into doing this in a more
balanced way. Profiling code will probably reveal that checking for automorphism
pruning is a bottleneck.

Inputs:
- g: our structure that we are computing automorphisms for
- res: all automorphisms found so far
- split_seq: sequence of edges (our current location in the tree)
- tree: all information known so far - this gets modified
- leafnodes: coordinates of all automorphisms found so far
"""
function search_tree!(g::ACSet, init_coloring::CDict, split_seq::VPSI,
tree::Tree, leafnodes::Set{VPSI})
curr_tree = tree[split_seq]
# Perform color saturation
coloring = color_saturate(g; init_color=init_coloring)
for (k, v) in pairs(coloring)
curr_tree.coloring[k] = init_coloring[k]
curr_tree.saturated[k] = v
end

split_tab, _, split_inds = split_data(coloring)

# Check if we are now at a leaf node
if isempty(split_inds)
# Add result to list of results
push!(leafnodes, split_seq)
check_auto(coloring) # fail if not a perm
else
# Branch on this leaf
for split_ind in split_inds
if split_ind == split_inds[1]
# Construct arguments for recursive call to child
new_coloring = deepcopy(coloring)
new_seq = vcat(split_seq, [split_tab => split_ind])
new_coloring[split_tab][split_ind] = maximum(coloring[split_tab]) + 1
curr_tree.children[split_tab => split_ind] = Tree()
search_tree!(g, new_coloring, new_seq, tree, leafnodes)
end
end
end
end

"""Compute the automorphisms of a CSet"""
function autos(g::ACSet)::Tuple{Set{CDict}, Tree}
tree, leafnodes = Tree(), Set{VPSI}()
search_tree!(g, nocolor(g), VPSI(), tree, leafnodes)
Set([tree[ln].saturated for ln in leafnodes]), tree
end

end # module
96 changes: 96 additions & 0 deletions src/nauty/ColorRefine.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@

using StructEquality
using .....ColumnImplementations: AttrVar

"""
Data for an individual component (each vector corresponds to its elements)
1.) how many of each color (for each in-arrow) targets this point
2.) what color this point targets (for each out arrow)

This could be extended to add extra automorphism-invariant properties.
E.g. detecting if src+tgt both point to the same element
"""
@struct_hash_equal struct CDataPoint
indata::Vector{Vector{Int}}
outdata::Vector{Int}
end

"""Data required to color a CSet (each element of each component)"""
const CData = Dict{Symbol, Vector{CDataPoint}}

"""
Computes colors for a CSet, distinguishing nodes by their immediate
connectivity. It is not sufficient to compute the automorphism group, but it is
a good starting point.

This does not generalize to ACSets. We cannot naively throw the attributes as
raw data into the color data. It will make indistinguishable elements (e.g. two
elements that map to different data but otherwise can be permuted) as
distinguishable.
"""
function compute_color_data(g::ACSet, color::CDict)::CData
S = acset_schema(g)
res = CData()
for tab in ob(S) # compute colordata for each tab
subres = map(homs(S; to=tab)) do (arr, src, _) # vector for each in-arrow
color_src = color[src]
subsubres = zeros(Int, nparts(g, tab), max0(color_src))
for (colorsrc, arrtgt) in zip(color_src, g[arr])
subsubres[arrtgt, colorsrc] += 1
end
subsubres

Check warning on line 41 in src/nauty/ColorRefine.jl

View check run for this annotation

Codecov / codecov/patch

src/nauty/ColorRefine.jl#L41

Added line #L41 was not covered by tests
end

# Also compute per-element data for table `tgt` (now, regard as a src)
out_subres = map(homs(S; from=tab)) do (oga, _, tgt)
color[tgt][g[oga]]
end

# Combine the two pieces of data for each elmeent in tgt, store in res
res[tab] = map(parts(g,tab)) do i
CDataPoint([ssr[i,:] for ssr in subres], [osr[i] for osr in out_subres])
end
end
res
end

"""Initial state for tree search: every element is symmetric"""
nocolor(g::ACSet) =
CDict([k => ones(Int, nparts(g, k)) for k in ob(acset_schema(g))])

"""
Iterative color refinement based on the number (and identity) of incoming and
outgoing arrows.
Inputs:
- g: CSet we are color saturating
- init_color: initial coloring, if any (default: uniform)
Returns:
- trajectory of colorings
"""
function color_saturate(g::ACSet; init_color::Union{Nothing,CDict}=nothing)
# Default: uniform coloring
new_color = isnothing(init_color) ? nocolor(g) : init_color

prev_n, curr_n, iter = 0, 1, 0
hashes = Dict{Symbol, Vector{UInt}}()
while prev_n != curr_n
iter += 1
prev_color = new_color
# All that matters about newdata's type is that it is hashable
newdata = compute_color_data(g, prev_color)
# Distinguish by both color AND newly computed color data
new_datahash = Dict{Symbol, Vector{UInt}}(
[k=>map(hash, zip(prev_color[k],v)) for (k, v) in collect(newdata)])
# Identify set of new colors for each component
hashes = Dict{Symbol, Vector{UInt}}(
[k=>sort(collect(Set(v))) for (k, v) in new_datahash])
# Assign new colors by hash value of color+newdata
new_color = CDict([
k=>[findfirst(==(new_datahash[k][i]), hashes[k])
for i in 1:nparts(g, k)]
for (k, v) in new_datahash])
prev_n = sum(map(max0, values(prev_color)))
curr_n = sum(map(max0, values(new_color)))
end
new_color

Check warning on line 95 in src/nauty/ColorRefine.jl

View check run for this annotation

Codecov / codecov/patch

src/nauty/ColorRefine.jl#L95

Added line #L95 was not covered by tests
end
11 changes: 11 additions & 0 deletions src/nauty/Nauty.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
module Nauty

using Reexport

include("NautyInterface.jl")
include("CSetAutomorphisms.jl")

@reexport using .CSetAutomorphisms
@reexport using .NautyInterface

end # module
Loading
Loading