diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 3b91e01..60dfa83 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -8,7 +8,7 @@ jobs: strategy: fail-fast: false matrix: - julia-version: ['1.0', '1.3', '1.6'] + julia-version: ['1.6'] os: [ubuntu-latest] steps: diff --git a/Project.toml b/Project.toml index 5349932..9e7632c 100644 --- a/Project.toml +++ b/Project.toml @@ -11,9 +11,9 @@ DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" LibPQ = "194296ae-ab2e-5f79-8cd4-7183a0a5a0d1" [compat] -AlgebraicPetri = "0.6" +AlgebraicPetri = "0.6, 0.7" AutoHashEquals = "0.2.0" -Catlab = "0.11, 0.12" +Catlab = "0.13" DataFrames = "0.21, 0.22, 1.0" LibPQ = "1.4.0" julia = "1.0" diff --git a/src/AlgebraicRelations.jl b/src/AlgebraicRelations.jl index a484c1e..6dd4f06 100644 --- a/src/AlgebraicRelations.jl +++ b/src/AlgebraicRelations.jl @@ -1,5 +1,4 @@ module AlgebraicRelations - include("functor.jl") include("DB.jl") include("Queries.jl") include("Interface.jl") diff --git a/src/DB.jl b/src/DB.jl index 8df5a52..c591575 100644 --- a/src/DB.jl +++ b/src/DB.jl @@ -2,7 +2,8 @@ module DB using Catlab: @present using Catlab.Present using Catlab.CategoricalAlgebra.CSets - export TheorySQL, SchemaType, generate_schema_sql, @present, get_fields, TypeToSQL, typeToSQL + using Catlab.CSetDataStructures: struct_acset + export TheorySQL, generate_schema_sql, @present, get_fields, TypeToSQL, typeToSQL, @db_schema, AbstractSQL TypeToSQL = Dict("String" => "text", "Int" => "int", @@ -12,20 +13,25 @@ module DB typeToSQL(x) = TypeToSQL[string(x)] @present TheorySQL(FreeSchema) begin - Int::Data - Int64::Data - Real::Data - String::Data - Bool::Data + Int::AttrType + Int64::AttrType + Real::AttrType + String::AttrType + Bool::AttrType end; - function SchemaType(present::Presentation) - ACSetType(present){Int, Int64, Real, String, Bool} - end + @abstract_acset_type AbstractSQL - const AbstractSQL = AbstractACSetType(TheorySQL) + # TODO: This should be replacable with a cleaner method + macro db_schema(head) + struct_name = gensym() + quote + $(esc(:eval))(struct_acset($(Meta.quot(struct_name)), AbstractSQL, $(esc(head.args[2])))) + $(esc(head.args[1]))() = $(esc(struct_name)){$(esc(Int)), $(esc(Int)), $(esc(Real)), $(esc(String)), $(esc(Bool))}() + end + end - function generate_schema_sql(schema::AbstractACSet) + function generate_schema_sql(schema::AbstractSQL) queries = map(collect(get_fields(schema))) do (name, col) cols = ["$n $(typeToSQL(t))" for (n,t) in col] "CREATE TABLE $name ($(join(cols, ", ")))" @@ -33,15 +39,16 @@ module DB string(join(queries, ";\n"), ";") end - function get_fields(schema::AbstractACSet) + function get_fields(schema::AbstractSQL) fields = Dict{Symbol, Array{Tuple{Symbol, Type},1}}() - for (name, table) in pairs(schema.tables) + for (name, table) in pairs(tables(schema)) table_name = name # Get the column names and types - col_names, types = eltype(table).parameters + col_names = propertynames(table) + types = eltype.([schema[c] for c in col_names]) col_names = map(x -> Symbol(split(string(x), r"_\d+_")[end]), col_names) - fields[table_name] = map(zip(col_names,types.parameters)) do (n,t) + fields[table_name] = map(zip(col_names,types)) do (n,t) (n, t) end end diff --git a/src/Presentations.jl b/src/Presentations.jl index 615f0fb..ce79ccb 100644 --- a/src/Presentations.jl +++ b/src/Presentations.jl @@ -1,19 +1,20 @@ module Presentations using ..DB + import ..DB: @db_schema using Catlab using Catlab.Theories using Catlab.Graphics using Catlab.Present: Presentation - using Catlab.Theories.FreeSchema: Attr, Data + using Catlab.Theories.FreeSchema: Attr using AlgebraicPetri import Catlab.Theories: FreeSymmetricMonoidalCategory, ⊗ import Catlab.Programs: @program export present_to_schema, @program, draw_workflow, FreeSymmetricMonoidalCategory, add_types!, add_type!, add_process!, add_processes!, Presentation, ⊗, - draw_schema + draw_schema, @db_schema, @present_to_schema hasprop(o, p) = p in propertynames(o) @@ -40,51 +41,46 @@ module Presentations return map(hom->add_process!(p, hom), homs) end - function present_to_schema(wf::Presentation) - gens = Array{GATExpr, 1}() - tables = Dict{Symbol, GATExpr}() - sym_app(s::Symbol, suffix::String) = Symbol(string(s, suffix)) - get_syms(g::Union{GATExpr, Array}) = begin - if g isa Array - if eltype(g) == Symbol - return [g] - end - sym_array = Array{Array{Symbol, 1}, 1}() - for i in g - append!(sym_array, get_syms(i)) + macro present_to_schema(head) + present_to_schema(head.args[1], head.args[2]) + end + + function present_to_schema(sch_name::Symbol, wf::Symbol) + quote + gens = Array{GATExpr, 1}() + tables = Dict{Symbol, GATExpr}() + sym_app(s::Symbol, suffix::String) = Symbol(string(s, suffix)) + get_syms(g::Union{GATExpr, Array}) = begin + if g isa Array + if eltype(g) == Symbol + return [g] + end + sym_array = Array{Array{Symbol, 1}, 1}() + for i in g + append!(sym_array, get_syms(i)) + end + return sym_array + elseif hasprop(g, :args) + return get_syms(g.args) end - return sym_array - elseif hasprop(g, :args) - return get_syms(g.args) end - end - # Evaluate objects to tables with attributes - #for g in generators(wf, :Ob) - # g_name = g.args[1] - # tab_name = sym_app(g_name, "_T") - # table = Ob(FreeSchema, tab_name) - # tables[g_name] = table - # push!(gens, table) - # push!(gens, Attr(sym_app(tab_name, "_1_id"), table, generator(TheorySQL, :Int64))) - # push!(gens, Attr(sym_app(tab_name, "_1_data"), table, generator(TheorySQL, g.args[2]))) - #end - - # Evaluate homs to purely data-connected tables - for g in generators(wf, :Hom) - g_name = g.args[1] - table = Ob(FreeSchema, g_name) - tables[g_name] = table - push!(gens, table) - append!(gens, map(enumerate(get_syms(g.type_args))) do (i, sym) - Attr(sym_app(g_name, "_$(i)_$(sym[1])$i"), - table, generator(TheorySQL, sym[2])) - end) - end + # Evaluate homs to purely data-connected tables + for g in generators($(esc(wf)), :Hom) + g_name = g.args[1] + table = Ob(FreeSchema, g_name) + tables[g_name] = table + push!(gens, table) + append!(gens, map(enumerate(get_syms(g.type_args))) do (i, sym) + Attr(sym_app(g_name, "_$(i)_$(sym[1])$i"), + table, generator(TheorySQL, sym[2])) + end) + end - @present p <: TheorySQL begin end - add_generators!(p, gens) - SchemaType(p) + @present p <: TheorySQL begin end + add_generators!(p, gens) + $(esc(:eval))(:(@db_schema $($(Meta.quot(sch_name)))($(p)))) + end end function draw_schema(p::Presentation; kw...) diff --git a/src/Queries.jl b/src/Queries.jl index f10ca9f..3123de2 100644 --- a/src/Queries.jl +++ b/src/Queries.jl @@ -1,18 +1,11 @@ module Queries using Catlab: @present - import Catlab.Programs.RelationalPrograms: TheoryTypedRelationDiagram - import Catlab.Programs.RelationalPrograms: parse_relation_diagram using Catlab.Programs.RelationalPrograms + using Catlab.Programs.RelationalPrograms: TheoryTypedRelationDiagram, TheoryTypedNamedRelationDiagram, TypedNamedRelationDiagram, parse_relation_diagram using Catlab.Graphics using Catlab.WiringDiagrams using Catlab.CategoricalAlgebra.CSets using ..DB - using ..Functors - - # Used for the redefinition of copy_parts! - using Catlab.Theories: Schema, FreeSchema, dom, codom, - CatDesc, CatDescType, AttrDesc, AttrDescType, SchemaType, - ob_num, hom_num, data_num, attr_num, dom_num, codom_num export TheoryQuery, Query, @query, to_sql, draw_query, to_prepared_sql, infer! @@ -24,8 +17,7 @@ module Queries :(!=) => ("<>", [:first, :second]), ) - @present TheoryQuery <: TheoryTypedRelationDiagram begin - field::Attr(Port, Name) + @present TheoryQuery <: TheoryTypedNamedRelationDiagram begin Comparison::Ob comp_port1::Hom(Comparison, Port) comp_port2::Hom(Comparison, Port) @@ -34,15 +26,13 @@ module Queries # subpart(q, :port_type) == subpart(q, subpart(q, :junction), :junction_type) end - const Query = ACSetType(TheoryQuery, - index=[:box, :junction, :outer_junction, :field], - unique_index=[:variable]) + @acset_type Query(TheoryQuery, + index=[:box, :junction, :outer_junction, :port_name], + unique_index=[:variable]) NullableSym = Union{Symbol, Nothing} Query() = Query{NullableSym, NullableSym, NullableSym}() - - function infer!(wd, rels::Array{Tuple{Array{Symbol,1}, Array{Symbol,1}},1}; max_iter=2*length(rels)) # Perform multiple steps to fill in chains of relations for i in 1:max_iter @@ -101,43 +91,35 @@ module Queries return all(is_defined), changed end - function RelToQuery(schema) + function Query(schema, wd) port_names = get_fields(schema) - function ob_to_sql(rel::UntypedRelationDiagram) - q = Query() - copy_parts!(q, rel) - name = subpart(rel, 1, :name) + q = Query() + copy_parts!(q, wd) + # Add types to each of the junctions by iterating through each box + for b in 1:nparts(q, :Box) # Set junction and outer_port types (these will be inferred from schema types) - set_subpart!(q, :outer_port_type, nothing) - set_subpart!(q, :junction_type, nothing) + q[:outer_port_type] .= nothing + q[:outer_port_name] .= nothing + q[:junction_type] .= nothing + name = q[b, :name] + ports = incident(wd, b, :box) # add comparison references for later type-inference if name in keys(SQLOperators) - ports = incident(rel, 1, :box) add_part!(q, :Comparison, comp_port1=ports[1], comp_port2=ports[2]) - set_subparts!(q, 1:2, field=SQLOperators[name][2][1:2], port_type=[nothing, nothing]) + set_subparts!(q, ports, port_name=SQLOperators[name][2][1:2], port_type=[nothing, nothing]) else - fields = [port_names[name][i][1] for i in 1:nparts(q, :Port)] - types = [Symbol(port_names[name][i][2]) for i in 1:nparts(q, :Port)] - set_subparts!(q, 1:nparts(q, :Port), field=fields, port_type=types) + fields = [port_names[name][i][1] for i in 1:length(ports)] + types = [Symbol(port_names[name][i][2]) for i in 1:length(ports)] + set_subparts!(q, ports, port_name=fields, port_type=types) end - q - end - - toQuery = Functor(ob_to_sql, Query) - - function toSQL(rel::UntypedRelationDiagram) - q = toQuery(rel) - infer!(q, [([:port_type],[:junction, :junction_type]), - ([:outer_junction, :junction_type],[:outer_port_type]), - ([:comp_port1,:port_type],[:comp_port2,:port_type])]); - q end - end - - function Query(schema, wd) - RelToQuery(schema)(wd) + infer!(q, [([:port_type],[:junction, :junction_type]), + ([:outer_junction, :junction_type],[:outer_port_type]), + ([:comp_port1,:port_type],[:comp_port2,:port_type]), + ([:outer_port_name],[:outer_junction, :variable])]); + q end macro query(schema, exprs...) @@ -187,7 +169,7 @@ module Queries op_inds = findall(x -> x in keys(SQLOperators), box_names) outer_juncs = subpart(q, :outer_junction) - port_info = subparts(q, [:box, :junction, :field]) + port_info = subparts(q, [:box, :junction, :port_name]) junctions = zeros(Int, nparts(q, :Junction)) variables = subpart(q, :variable) prepared_junctions = findall(x -> string(x)[1] == '_', variables) @@ -250,7 +232,7 @@ module Queries end function draw_query(q; kw...) - uwd = TypedRelationDiagram{NullableSym, NullableSym, NullableSym}() + uwd = TypedNamedRelationDiagram{NullableSym, NullableSym, NullableSym}() copy_parts!(uwd, q) to_graphviz(uwd; box_labels=:name, junction_labels=:variable, kw...) end diff --git a/src/functor.jl b/src/functor.jl deleted file mode 100644 index 8295bf2..0000000 --- a/src/functor.jl +++ /dev/null @@ -1,213 +0,0 @@ -module Functors - -using Catlab -using Catlab.Theories -using Catlab.Theories: attr -using Catlab.Programs.RelationalPrograms -using Catlab.CategoricalAlgebra -using Catlab.CategoricalAlgebra.FinSets -using Catlab.CategoricalAlgebra.CSetDataStructures -using Catlab.CategoricalAlgebra.StructuredCospans - -export Functor, compose, split! - -struct Functor - ob - comp - split -end - -function (F::Functor)(uwd::RelationDiagram) - _functor(F, copy(uwd)) -end - -function _functor(F::Functor, uwd::RelationDiagram) - # Base Case: UWD of 1 box - if nparts(uwd, :Box) == 1 - return F.ob(uwd) - end - # TODO: Make a more generalized version so that we don't have - # to arbitrarily keep track of OuterPort order - - # Split box, apply functor, then re-compose - op, cosp, l_uwd, r_uwd = F.split(uwd) - f_comp = F.comp(op, cosp, _functor(F, l_uwd), _functor(F,r_uwd)) - return f_comp -end - - -function Functor(ob, OpenType) - Functor(ob, gen_compose(OpenType), split!) -end -function Functor(ob) - Functor(ob, gen_compose(UntypedRelationDiagram), split!) -end - - -# Generic compose function that should -function gen_compose(OpenType) - OpenTOb, OpenT = OpenACSetTypes(OpenType, :Junction) - function typedComp(op_map::Array{Int, 1}, cosp::Cospan, a::T, b::T) where {CD, AD, Ts, T <: ACSet{CD, AD, Ts}} - # Get inverses of the cospan legs to convert it to a span - a_inv = zeros(Int, length(cosp.apex)) - a_inv[cosp.legs[1].func] = 1:length(cosp.legs[1].func) - - b_inv = zeros(Int, length(cosp.apex)) - b_inv[cosp.legs[2].func] = 1:length(cosp.legs[2].func) - - # Generate boundaries, and fill in junctions necessary to convert cospan to span - for i in 1:length(cosp.apex) - if a_inv[i] == 0 && b_inv[i] == 0 - throw(error("Index $i in the apex has no value mapped to it")) - elseif b_inv[i] == 0 - a_attr = a.tables.Junction[a_inv[i]] - b_junc = add_part!(b, :Junction, a_attr) - b_inv[i] = b_junc - elseif a_inv[i] == 0 - b_attr = b.tables.Junction[b_inv[i]] - a_junc = add_part!(a, :Junction, b_attr) - a_inv[i] = a_junc - end - end - - # We now have sufficient data on both ends to use open composition - open_a = OpenT{Ts.parameters...}(a, FinFunction(Array{Int, 1}(), nparts(a, :Junction)), FinFunction(a_inv, nparts(a, :Junction))) - open_b = OpenT{Ts.parameters...}(b, FinFunction(b_inv, nparts(b, :Junction)), FinFunction(Array{Int, 1}(), nparts(b, :Junction))) - - # It might help if this structure stored what objects in a,b map to objects in ab - ab = compose(open_a, open_b).cospan.apex - - # Bring the resulting junctions back to original order - a_completed = zeros(Int,length(a_inv)) - a_completed[a_inv] = 1:length(a_inv) - reorder_part!(ab, :Junction, a_completed) - reorder_part!(ab, :OuterPort, op_map) - return ab - end - return typedComp -end - -# Splits off one box from a RelationDiagram -function split!(left::UntypedRelationDiagram) - njuncs = nparts(left, :Junction) - left_juncs = collect(1:njuncs) - # Create new RelDiag w/ last right - right_ind = nparts(left, :Box) - right = UntypedRelationDiagram{Symbol, Symbol}() - add_part!(right, :Box, name=subpart(left, right_ind, :name)) - - ports = incident(left, right_ind, :box) - right_juncs = unique(subpart(left, ports, :junction)) - left_to_right = zeros(Int, nparts(left, :Junction)) - left_to_right[right_juncs] = 1:length(right_juncs) - add_parts!(right, :Junction, length(right_juncs), - variable=subpart(left, right_juncs, :variable)) - add_parts!(right, :Port, length(ports), - box=fill(1, length(ports)), - junction=left_to_right[subpart(left, ports, :junction)]) - - # Remove last box - rem_parts!(left, :Port, ports) - rem_part!(left, :Box, nparts(left, :Box)) - - # Remove any right_juncs from right1 that have no connections - junctions = filter(j -> isempty(incident(left, j, :junction)), right_juncs) - # Transfer outer_ports with these - outerports = vcat(incident(left, junctions, :outer_junction)...) - left_outer = collect(1:nparts(left, :OuterPort)) - right_outer = Array{Int, 1}() - reverse!(sort!(outerports)) # Delete from back to front - - # Take care of order of OuterPorts - left_op_juncs = left.tables.OuterPort[outerports] - - for (i,op) in enumerate(outerports) - add_part!(right, :OuterPort) - for k in keys(left_op_juncs[i]) - v = left_op_juncs[i][k] - if k == :outer_junction - set_subpart!(right, i, k, left_to_right[v]) - else - set_subpart!(right, i, k, v) - end - end - rem_part!(left, :OuterPort, op) - push!(right_outer, left_outer[op]) - left_outer[op] = left_outer[end] - pop!(left_outer) - end - - # Remove unnecessary junctions - reverse!(sort!(junctions)) - - # NOTE: This section is dependent upon the specific deletion algorithm used by - for j in junctions - rem_part!(left, :Junction, j) - left_juncs[j] = left_juncs[end] - pop!(left_juncs) - end - cosp = Cospan(FinFunction(left_juncs, njuncs), FinFunction(right_juncs, njuncs)) - vcat(left_outer, right_outer), cosp, left, right -end - -# Provide a new order for a given part of the ACSet -# `order` is assumed to be a bijection. Maybe make this into -# a generated function and upstream to Catlab? -reorder_part!(acs::ACSet, type::Symbol, order::Array{Int, 1}) = - _reorder_part!(acs, Val(type), order) - -function _reorder_part!(acs::ACSet{CD,AD,Ts,Idxed}, ::Val{ob}, - order::Array{Int, 1}) where {CD,AD,Ts,Idxed,ob} - in_homs = filter(c_hom -> codom(CD, c_hom) == ob, hom(CD)) - indexed_out_homs = filter(c_hom -> dom(CD, c_hom) == ob && c_hom ∈ Idxed, hom(CD)) - indexed_attrs = filter(c_attr -> dom(AD, c_attr) == ob && c_attr ∈ Idxed, attr(AD)) - last_part = length(acs.tables[ob]) - - - # Check for bijection - @assert length(order) == last_part - @assert maximum(order) == last_part - @assert minimum(order) == 1 - @assert length(unique(order)) == last_part - - order_adj = zeros(Int, last_part) - order_adj[order] = collect(1:last_part) - - # Swap hom references - for hom in Tuple(in_homs) - incident_obs = [incident(acs, i, hom, copy=true) for i in 1:last_part] - for i in order - set_subpart!(acs, incident_obs[i], hom, order[i]) - end - end - - # Swap - junc_attrs = acs.tables[ob] - junc_dict = Dict(map(keys(junc_attrs[1])) do k - (k, [junc_attrs[i][k] for i in order_adj]) - end) - for attr in Tuple(indexed_attrs) - for i in 1:last_part - unset_data_index!(acs.indices[attr], junc_attrs[i][attr], i) - end - end - set_subparts!(acs, 1:last_part, (; junc_dict...)) -end - -# Copied in from CSetDataStructures in Catlab -function unset_data_index!(d::AbstractDict{K,Int}, k::K, v::Int) where K - if haskey(d, k) && d[k] == v - delete!(d, k) - end -end -function unset_data_index!(d::AbstractDict{K,<:AbstractVector{Int}}, - k::K, v::Int) where K - if haskey(d, k) - vs = d[k] - if deletesorted!(vs, v) && isempty(vs) - delete!(d, k) - end - end -end - -end #module diff --git a/test/DB.jl b/test/DB.jl index 3ab46cb..1158392 100644 --- a/test/DB.jl +++ b/test/DB.jl @@ -26,7 +26,7 @@ using SQLite sali::Hom(income, salary) end; -Workplace = SchemaType(WorkplaceSchema) +@db_schema Workplace(WorkplaceSchema) f = Workplace() db = SQLite.DB() diff --git a/test/Presentations.jl b/test/Presentations.jl index b0a711d..59e85b6 100644 --- a/test/Presentations.jl +++ b/test/Presentations.jl @@ -18,9 +18,9 @@ extract, split_im, train, evaluate = add_processes!(wf, [(:extract, Files, Image (:train, NeuralNet⊗Images, NeuralNet⊗Metadata), (:evaluate, NeuralNet⊗Images, Accuracy⊗Metadata)]); # Convert to Schema -TrainDB = present_to_schema(wf); +@present_to_schema TrainDB(wf); g = draw_schema(wf) @test wf isa Catlab.Present.Presentation -@test TrainDB <: Catlab.CategoricalAlgebra.ACSet +@test TrainDB() isa Catlab.CategoricalAlgebra.ACSet @test g isa Catlab.Graphics.Graphviz.Graph diff --git a/test/Queries.jl b/test/Queries.jl index 06c8c19..ad23ff0 100644 --- a/test/Queries.jl +++ b/test/Queries.jl @@ -29,7 +29,7 @@ using Catlab relation_3_relationship::Attr(relation, Real) end; -Workplace = SchemaType(WorkplaceSchema) +@db_schema Workplace(WorkplaceSchema) schema = Workplace() db = SQLite.DB()