AlgebraicJulia · olynch · Sep 20, 2023 · Sep 9, 2023 · Sep 19, 2023 · Sep 19, 2023
diff --git a/.gitignore b/.gitignore
@@ -7,6 +7,8 @@
 # Files generated by invoking Julia with --track-allocation
 *.jl.mem
 
+lcov.info
+
 # System-specific files and directories generated by the BinaryProvider and BinDeps packages
 # They contain absolute paths specific to the host computer, and so should not be committed
 deps/deps.jl

diff --git a/docs/src/concepts/scopes.md b/docs/src/concepts/scopes.md
@@ -2,17 +2,26 @@
 
 Design rationale:
 
-Namespacing should be first class. This doesn't play nicely with deBruijn
-levels; we don't want to do a bunch of math to convert between flattened and
-unflattened versions of contexts. There is an accepted solution to the problem
-of organizing names: use refs! Mathematically speaking, a ref is just a list
+Namespacing, i.e. references which look like `britain.economy.GDP`, should be
+first class. This doesn't play nicely with the internal representation of names
+via [deBruijn levels](https://en.wikipedia.org/wiki/De_Bruijn_index); we don't
+want to do a bunch of math to convert between flattened and nested versions
+of contexts. Conceptually, we should then switch our identifiers to be *lists*
 of deBruijn levels.
 
-However, deBruijn levels are unfriendly for users; we want to be able to give
-names to our variables!
+But there are also significant subtleties in the translation between named and
+nameless representations of identifiers. Also in practice if we are referencing
+something that might change over time, like a C-set, we don't want to use
+deBruijn levels, because we don't want have to change all of our identifiers
+when we delete something, so in the rest of this I will instead say "local
+identifier" instead of deBruijn level to refer to a nameless representation of
+an identifier.
 
-In general, naming is given by a *relation* between symbols and bindings (as
-referred to by deBruijn levels). Each binding may be associated with zero or
+(the name "local identifier" and also some of this philosophy is taken from
+[chit](https://github.com/davidad/chit))
+
+In general, naming is given by a *relation* between symbols and *bindings* (as
+refered to by local identifiers).  Each binding may be associated with zero or
 more symbols, and each symbol might be associated with zero or more bindings.
 
 We name the failure of this relation to be a bijection in several ways.
@@ -55,11 +64,11 @@ scopes, and the most recent scope "wins".
 
 Parsing turns a `Symbol` into an `Ident`. The `Ident` contains the original
 `Symbol` for printing, but it also contains a reference to a scope via ScopeTag,
-and an deBruijn level within that scope. Thus, the name in the `Ident` is never
-needed for later stages of programatic manipulation.
+and an local identifier ("lid") within that scope. Thus, the name in the `Ident`
+is never needed for later stages of programatic manipulation.
 
 Scopes cache the relation between names and bindings, by providing a way to go
-from binding (as reified by deBruijn level) to a set of aliases with
+from binding (as reified by local identifier) to a set of aliases with
 distinguished primary alias, and to go from name to the set of bindings that
 have that name that we need to disambiguate between in the case of an overload.
 

diff --git a/src/models/GATExprUtils.jl b/src/models/GATExprUtils.jl
@@ -0,0 +1,104 @@
+""" Rewriting for GAT expressions.
+
+The current content of this module exists purely for backwards compatibility
+with the existing rewriting in Catlab; the more advanced implementation uses
+e-graphs and works with AlgTerms instead of GATExprs.
+"""
+module GATExprUtils
+export associate, associate_unit_inv, associate_unit,
+  distribute_unary, involute, normalize_zero
+
+using ..SymbolicModels
+
+#Warning: assumes expr has only two args!
+""" Simplify associative binary operation.
+
+Maintains the normal form `op(e1,e2,...)` where `e1`,`e2`,... are expressions
+that are *not* applications of `op()`
+"""
+function associate(expr::E)::E where E <: GATExpr
+  op, e1, e2 = head(expr), first(expr), last(expr)
+  args1 = head(e1) == op ? args(e1) : [e1]
+  args2 = head(e2) == op ? args(e2) : [e2]
+  E([args1; args2], gat_type_args(expr))
+end
+
+""" Simplify associative binary operation with unit.
+
+Reduces a freely generated (typed) monoid to normal form.
+"""
+function associate_unit(expr::GATExpr, unit::Function)::GATExpr
+  e1, e2 = first(expr), last(expr)
+  if (head(e1) == nameof(unit)) e2
+  elseif (head(e2) == nameof(unit)) e1
+  else associate(expr) end
+end
+
+""" Simplify associative binary operation with unit and inverses.
+"""
+function associate_unit_inv(expr::E, unit::Function,
+                            inverse::Function)::GATExpr where E <: GATExpr
+  op, e1, e2 = head(expr), first(expr), last(expr)
+  if (head(e1) == nameof(unit)) e2
+  elseif (head(e2) == nameof(unit)) e1
+  else
+    args1 = head(e1) == op ? args(e1) : [e1]
+    args2 = head(e2) == op ? args(e2) : [e2]
+    while !isempty(args1) && !isempty(args2)
+      l = args1[end]; r = args2[1]
+      if (head(l) == nameof(inverse) && first(l) == r ||
+          head(r) == nameof(inverse) && l == first(r))
+        pop!(args1); popfirst!(args2)
+      else break end
+    end
+    newargs = [args1; args2]
+    # XXX: Assumes that the unit/identity takes exactly one argument, hence this
+    # function will not work for the algebraic theory of groups.
+    if (isempty(newargs)) unit(only(unique(gat_type_args(expr))))
+    elseif (length(newargs) == 1) only(newargs)
+    else E(newargs, gat_type_args(expr)) end
+  end
+end
+
+""" Distribute unary operation over binary operation.
+"""
+function distribute_unary(expr::GATExpr, unary::Function, binary::Function;
+                          unit::Union{Function,Nothing}=nothing,
+                          contravariant::Bool=false)::GATExpr
+  if (head(expr) != nameof(unary)) return expr end
+  @assert length(args(expr)) == 1
+  arg = first(expr)
+  if head(arg) == nameof(binary)
+    binary(map(unary, (contravariant ? reverse : identity)(args(arg)))...)
+  elseif !isnothing(unit) && head(arg) == nameof(unit)
+    arg
+  else
+    expr
+  end
+end
+
+""" Simplify involutive unary operation.
+"""
+function involute(expr::GATExpr)
+  @assert length(args(expr)) == 1
+  arg = first(expr)
+  head(expr) == head(arg) ? first(arg) : expr
+end
+
+"""
+If given GATExpr contains a zero morphism,
+collapse the expression to a single zero morphism.
+"""
+function normalize_zero(expr::E;zname=:zeromap) where E <: GATExpr
+  ztype = E
+  for subexpr in args(expr)
+    if head(subexpr) == zname
+      ztype = typeof(subexpr)
+      s,t = gat_type_args(expr)
+      return ztype([s,t],[s,t])
+    end
+  end
+  expr
+end
+
+end
diff --git a/src/models/ModelInterface.jl b/src/models/ModelInterface.jl
@@ -85,7 +85,7 @@
   all(!isnothing(implements(m, gettag(scope))) for scope in theory_module.THEORY.segments.scopes)
 
 struct TypeCheckFail <: Exception
-  model::Model
+  model::Union{Model, Nothing}
   theory::GAT
   type::Ident
   val::Any
@@ -150,10 +150,13 @@
   # Parse the body into functions defined here and functions defined elsewhere
   functions, ext_functions = parse_instance_body(body)
 
+  oldinstance = isnothing(model)
+
   # Checks that all the functions are defined with the correct types Add default
   # methods for type constructors and type argument accessors if these methods
   # are missing
-  typechecked_functions = typecheck_instance(theory, functions, ext_functions, jltype_by_sort)
+  typechecked_functions =
+    typecheck_instance(theory, functions, ext_functions, jltype_by_sort; oldinstance)
 
   # Adds keyword arguments to the functions, and qualifies them by
   # `theory_module`, i.e. changes `Ob(x) = blah` to `ThCategory.Ob(x; model::M,
@@ -249,11 +252,18 @@
   theory::GAT,
   x::Ident,
   termcon::AlgTermConstructor,
-  jltype_by_sort::Dict{AlgSort}
+  jltype_by_sort::Dict{AlgSort};
+  oldinstance=false
 )
+  sortsig = sortsignature(termcon)
+  args = if oldinstance && isempty(sortsig)
+    Expr0[Expr(:curly, :Type, jltype_by_sort[AlgSort(termcon.type)])]
+  else
+    Expr0[jltype_by_sort[sort] for sort in sortsignature(termcon)]
+  end
   JuliaFunctionSig(
     nameof(x),
-    Expr0[jltype_by_sort[sort] for sort in sortsignature(termcon)]
+    args
   )
 end
 
@@ -306,7 +316,8 @@
   theory::GAT,
   functions::Vector{JuliaFunction},
   ext_functions::Vector{Symbol},
-  jltype_by_sort::Dict{AlgSort}
+  jltype_by_sort::Dict{AlgSort};
+  oldinstance=false,
 )::Vector{JuliaFunction}
   typechecked = JuliaFunction[]
 
@@ -317,7 +328,7 @@
 
   for x in theory.termcons
     if nameof(x) ∉ ext_functions
-      sig = julia_signature(theory, x, getvalue(theory[x]), jltype_by_sort)
+      sig = julia_signature(theory, x, getvalue(theory[x]), jltype_by_sort; oldinstance)
       if haskey(undefined_signatures, sig)
         error(overload_errormsg)
       end
@@ -359,22 +370,34 @@
   for f in functions
     sig = parse_function_sig(f)
 
-    sig ∈ keys(undefined_signatures) ||
-      throw(SignatureMismatchError(f.name, toexpr(sig), expected_signatures[f.name]))
+    if !haskey(undefined_signatures, sig)
+      try
+        x = ident(theory; name=f.name)
+      catch e
+        throw(SignatureMismatchError(f.name, toexpr(sig), expected_signatures[f.name]))
+      end
+      judgment = getvalue(theory, x)
+      # We allow overloading for type constructors
+      if !(judgment isa AlgTypeConstructor)
+        throw(SignatureMismatchError(f.name, toexpr(sig), expected_signatures[f.name]))
+      end
 
-    x = undefined_signatures[sig]
+      push!(typechecked, expand_fail(judgment, theory, x, f))
+    else
+      x = undefined_signatures[sig]
 
-    if x isa Ident
-      judgment = getvalue(theory, x)
+      if x isa Ident
+        judgment = getvalue(theory, x)
 
-      if judgment isa AlgTypeConstructor
-        f = expand_fail(judgment, theory, x, f)
+        if judgment isa AlgTypeConstructor
+          f = expand_fail(judgment, theory, x, f)
+        end
       end
-    end
 
-    delete!(undefined_signatures, sig)
+      delete!(undefined_signatures, sig)
 
-    push!(typechecked, f)
+      push!(typechecked, f)
+    end
   end
 
   for (_, n) in undefined_signatures
@@ -427,7 +450,7 @@
       Expr(:let, Expr(:(=), :model, :($m.model)), fun.impl)
     )
   else
-    (fun.args, fun.impl)
+    (fun.args, Expr(:let, Expr(:(=), :model, nothing), fun.impl))
   end
 
   JuliaFunction(