@symbolic_models replicates most of the functionality of @syntax

AlgebraicJulia · Sep 19, 2023 · 844207b · 844207b
1 parent 3c8ab11
commit 844207b
Show file tree

Hide file tree

Showing 7 changed files with 566 additions and 203 deletions.
diff --git a/docs/src/concepts/scopes.md b/docs/src/concepts/scopes.md
@@ -2,17 +2,26 @@
 
 Design rationale:
 
-Namespacing should be first class. This doesn't play nicely with deBruijn
-levels; we don't want to do a bunch of math to convert between flattened and
-unflattened versions of contexts. There is an accepted solution to the problem
-of organizing names: use refs! Mathematically speaking, a ref is just a list
+Namespacing, i.e. references which look like `britain.economy.GDP`, should be
+first class. This doesn't play nicely with the internal representation of names
+via [deBruijn levels](https://en.wikipedia.org/wiki/De_Bruijn_index); we don't
+want to do a bunch of math to convert between flattened and nested versions
+of contexts. Conceptually, we should then switch our identifiers to be *lists*
 of deBruijn levels.
 
-However, deBruijn levels are unfriendly for users; we want to be able to give
-names to our variables!
+But there are also significant subtleties in the translation between named and
+nameless representations of identifiers. Also in practice if we are referencing
+something that might change over time, like a C-set, we don't want to use
+deBruijn levels, because we don't want have to change all of our identifiers
+when we delete something, so in the rest of this I will instead say "local
+identifier" instead of deBruijn level to refer to a nameless representation of
+an identifier.
 
-In general, naming is given by a *relation* between symbols and bindings (as
-referred to by deBruijn levels). Each binding may be associated with zero or
+(the name "local identifier" and also some of this philosophy is taken from
+[chit](https://github.com/davidad/chit))
+
+In general, naming is given by a *relation* between symbols and *bindings* (as
+refered to by local identifiers).  Each binding may be associated with zero or
 more symbols, and each symbol might be associated with zero or more bindings.
 
 We name the failure of this relation to be a bijection in several ways.
@@ -55,11 +64,11 @@ scopes, and the most recent scope "wins".
 
 Parsing turns a `Symbol` into an `Ident`. The `Ident` contains the original
 `Symbol` for printing, but it also contains a reference to a scope via ScopeTag,
-and an deBruijn level within that scope. Thus, the name in the `Ident` is never
-needed for later stages of programatic manipulation.
+and an local identifier ("lid") within that scope. Thus, the name in the `Ident`
+is never needed for later stages of programatic manipulation.
 
 Scopes cache the relation between names and bindings, by providing a way to go
-from binding (as reified by deBruijn level) to a set of aliases with
+from binding (as reified by local identifier) to a set of aliases with
 distinguished primary alias, and to go from name to the set of bindings that
 have that name that we need to disambiguate between in the case of an overload.
 

diff --git a/src/models/GATExprUtils.jl b/src/models/GATExprUtils.jl
@@ -0,0 +1,101 @@
+""" Rewriting for GAT expressions.
+
+The current content of this module exists purely for backwards compatibility
+with the existing rewriting in Catlab; the more advanced implementation uses
+e-graphs and works with AlgTerms instead of GATExprs.
+"""
+module GATExprUtils
+export associate, associate_unit_inv, associate_unit,
+  distribute_unary, involute, normalize_zero
+
+using ..SymbolicModels
+
+#Warning: assumes expr has only two args!
+""" Simplify associative binary operation.
+
+Maintains the normal form `op(e1,e2,...)` where `e1`,`e2`,... are expressions
+that are *not* applications of `op()`
+"""
+function associate(expr::E)::E where E <: GATExpr
+  op, e1, e2 = head(expr), first(expr), last(expr)
+  args1 = head(e1) == op ? args(e1) : [e1]
+  args2 = head(e2) == op ? args(e2) : [e2]
+  E([args1; args2], gat_type_args(expr))
+end
+
+""" Simplify associative binary operation with unit.
+
+Reduces a freely generated (typed) monoid to normal form.
+"""
+function associate_unit(expr::GATExpr, unit::Function)::GATExpr
+  e1, e2 = first(expr), last(expr)
+  if (head(e1) == nameof(unit)) e2
+  elseif (head(e2) == nameof(unit)) e1
+  else associate(expr) end
+end
+
+""" Simplify associative binary operation with unit and inverses.
+"""
+function associate_unit_inv(expr::E, unit::Function,
+                            inverse::Function)::GATExpr where E <: GATExpr
+  op, e1, e2 = head(expr), first(expr), last(expr)
+  if (head(e1) == nameof(unit)) e2
+  elseif (head(e2) == nameof(unit)) e1
+  else
+    args1 = head(e1) == op ? args(e1) : [e1]
+    args2 = head(e2) == op ? args(e2) : [e2]
+    while !isempty(args1) && !isempty(args2)
+      l = args1[end]; r = args2[1]
+      if (head(l) == nameof(inverse) && first(l) == r ||
+          head(r) == nameof(inverse) && l == first(r))
+        pop!(args1); popfirst!(args2)
+      else break end
+    end
+    newargs = [args1; args2]
+    # XXX: Assumes that the unit/identity takes exactly one argument, hence this
+    # function will not work for the algebraic theory of groups.
+    if (isempty(newargs)) unit(only(unique(gat_type_args(expr))))
+    elseif (length(newargs) == 1) only(newargs)
+    else E(newargs, gat_type_args(expr)) end
+  end
+end
+
+""" Distribute unary operation over binary operation.
+"""
+function distribute_unary(expr::GATExpr, unary::Function, binary::Function;
+                          unit::Union{Function,Nothing}=nothing,
+                          contravariant::Bool=false)::GATExpr
+  if (head(expr) != nameof(unary)) return expr end
+  @assert length(args(expr)) == 1
+  arg = first(expr)
+  if head(arg) == nameof(binary)
+    binary(map(unary, (contravariant ? reverse : identity)(args(arg))))
+  elseif !isnothing(unit) && head(arg) == nameof(unit)
+    arg
+  else
+    expr
+  end
+end
+
+""" Simplify involutive unary operation.
+"""
+function involute(expr::GATExpr)
+  @assert length(args(expr)) == 1
+  arg = first(expr)
+  head(expr) == head(arg) ? first(arg) : expr
+end
+"""
+If given GATExpr contains a zero morphism,
+collapse the expression to a single zero morphism.
+"""
+function normalize_zero(expr::E;zname=:zeromap) where E <: GATExpr
+  ztype = E
+  for subexpr in args(expr)
+    if head(subexpr) == zname
+      ztype = typeof(subexpr)
+      s,t = gat_type_args(expr)
+      return ztype([s,t],[s,t])
+    end end
+  expr
+end
+end