From 02a363e8eea7fefcfe6b8c1b4d6c895000cd639a Mon Sep 17 00:00:00 2001
From: "Patrick S. Kim" <pk2354@stellar-intel.princeton.edu>
Date: Tue, 10 Jan 2023 14:44:34 -0500
Subject: [PATCH 01/41] adding auglagrangian objective and updating optimizer.

---
 desc/objectives/__init__.py             |   1 +
 desc/objectives/_auglagrangian.py       |  43 ++++++
 desc/objectives/_equilibrium.py         |   2 +
 desc/objectives/objective_funs.py       |  15 +++
 desc/optimize/__init__.py               |   2 +
 desc/optimize/aug_lagrangian_ls_stel.py | 171 ++++++++++++++++++++++++
 desc/optimize/optimizer.py              | 100 +++++++++++++-
 7 files changed, 332 insertions(+), 2 deletions(-)
 create mode 100644 desc/objectives/_auglagrangian.py
 create mode 100644 desc/optimize/aug_lagrangian_ls_stel.py

diff --git a/desc/objectives/__init__.py b/desc/objectives/__init__.py
index f250af128c..5cece5a388 100644
--- a/desc/objectives/__init__.py
+++ b/desc/objectives/__init__.py
@@ -21,5 +21,6 @@
     FixPressure,
     FixPsi,
 )
+from ._auglagrangian import AugLagrangianLS
 from .objective_funs import ObjectiveFunction
 from .utils import get_equilibrium_objective, get_fixed_boundary_constraints
diff --git a/desc/objectives/_auglagrangian.py b/desc/objectives/_auglagrangian.py
new file mode 100644
index 0000000000..19985f2875
--- /dev/null
+++ b/desc/objectives/_auglagrangian.py
@@ -0,0 +1,43 @@
+import numpy as np
+from desc.backend import jnp
+from desc.derivatives import Derivative
+from .objective_funs import ObjectiveFunction
+
+class AugLagrangianLS(ObjectiveFunction):
+    
+    def __init__(self, func, constr):
+        self.func = func
+        self.constr = constr
+    
+    def scalar(self):
+        return False
+    
+    def name(self):
+        return "least squares augmented lagrangian"
+    
+    def derivatives(self):
+        return
+    
+    def compute(self, x, lmbda, mu):
+        L = self.func(x)
+        c = self.compute_constraints(x)
+        for i in range(len(c)):
+            c = put(c,i,-lmbda[i]*c[i] + mu[i]/2*c[i]**2)
+        L = jnp.concatenate((L,c),axis=None)
+        print("L is evaluated")
+        return L
+    
+    def compute_scalar(self,x,lmbda,mu):
+        return np.linalg.norm(self.compute(x,lmbda,mu))
+    
+    def callback(self, x, lmbda,mu):
+        L = self.compute(x,lmbda,mu)
+        print("The Least Squares Lagrangian is " + str(L))
+        
+    def compute_constraints(self,x):
+        c = jnp.array([])
+        for i in range(len(self.constr)):
+            c = jnp.concatenate((c,self.constr[i](x)),axis=None)
+        return c
+
+
diff --git a/desc/objectives/_equilibrium.py b/desc/objectives/_equilibrium.py
index 15967d28ed..b4d831c754 100644
--- a/desc/objectives/_equilibrium.py
+++ b/desc/objectives/_equilibrium.py
@@ -61,9 +61,11 @@ def __init__(
         normalize_target=True,
         grid=None,
         name="force",
+        equality=True,
     ):
 
         self.grid = grid
+        self.equality = equality
         super().__init__(
             eq=eq,
             target=target,
diff --git a/desc/objectives/objective_funs.py b/desc/objectives/objective_funs.py
index 407c9407f3..b6b7c9680a 100644
--- a/desc/objectives/objective_funs.py
+++ b/desc/objectives/objective_funs.py
@@ -53,7 +53,22 @@ def __init__(
 
         if eq is not None:
             self.build(eq, use_jit=self._use_jit, verbose=verbose)
+    def combine_args(self,objectives):
+        self._args = np.concatenate((self.args,[obj.args for obj in objectives.objectives][0]))
+        self._args = [arg for arg in arg_order if arg in self._args]
 
+        self._dimensions = self.objectives[0].dimensions
+        self._dim_x = 0
+        self._x_idx = {}
+        for arg in self.args:
+            self.x_idx[arg] = np.arange(self._dim_x, self._dim_x + self.dimensions[arg])
+            self._dim_x += self.dimensions[arg]
+        
+        objectives._args = self._args
+        objectives._dimensions = self._dimensions
+        objectives._dim_x = self._dim_x
+        objectives._x_idx = self._x_idx
+ 
     def _set_state_vector(self):
         """Set state vector components, dimensions, and indices."""
         self._args = np.concatenate([obj.args for obj in self.objectives])
diff --git a/desc/optimize/__init__.py b/desc/optimize/__init__.py
index 92f735508c..6b6d10b8d1 100644
--- a/desc/optimize/__init__.py
+++ b/desc/optimize/__init__.py
@@ -4,3 +4,5 @@
 from .least_squares import lsqtr
 from .optimizer import Optimizer
 from .stochastic import sgd
+from .aug_lagrangian_ls_stel import fmin_lag_ls_stel
+
diff --git a/desc/optimize/aug_lagrangian_ls_stel.py b/desc/optimize/aug_lagrangian_ls_stel.py
new file mode 100644
index 0000000000..fbdc60c508
--- /dev/null
+++ b/desc/optimize/aug_lagrangian_ls_stel.py
@@ -0,0 +1,171 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Jul 13 10:39:48 2022
+
+@author: pk123
+"""
+
+import numpy as np
+from termcolor import colored
+from desc.backend import jnp
+from .utils import (
+    check_termination,
+    print_header_nonlinear,
+    print_iteration_nonlinear,
+    STATUS_MESSAGES,
+    compute_jac_scale,
+    evaluate_quadratic_form_jac,
+)
+
+from .tr_subproblems import (
+    solve_trust_region_dogleg,
+    solve_trust_region_2d_subspace,
+    update_tr_radius,
+)
+from scipy.optimize import OptimizeResult, least_squares
+
+from desc.optimize.fmin_scalar import fmintr
+from desc.optimize.least_squares import lsqtr
+
+from desc.derivatives import Derivative
+from desc.objectives._auglagrangian import AugLagrangianLS
+    
+
+def conv_test(x,L,gL):
+    return np.linalg.norm(jnp.dot(gL.T,L))
+
+def fmin_lag_ls_stel(
+    fun,
+    x0,
+    grad,
+    eq_constr,
+    ineq_constr,
+    x_scale=1,
+    ftol=1e-6,
+    xtol=1e-6,
+    gtol=1e-6,
+    verbose=1,
+    maxiter=None,
+    tr_method="svd",
+    callback=None,
+    options={},
+):
+    nfev = 0
+    ngev = 0
+    nhev = 0
+    iteration = 0
+
+    N = x0.size
+    x = x0.copy()
+    f = fun(x, *args)
+    nfev += 1
+    g = grad(x, *args)
+    ngev += 1
+
+    eq = eq_constr(x) if eq_constr is None else jnp.array([])
+    ineq = ineq_constr(x) if ineq_constr is None else jnp.array([])
+    eq_dim = len(eq.flatten())
+    ineq_dim = len(ineq.flatten())
+    x = np.append(x,1.0*np.ones(ineq_dim))
+    
+    if maxiter is None:
+        maxiter = N * 100
+
+    mu = options.pop("mu", 10*jnp.ones(eq_dim + ineq_dim))
+    lmbda = options.pop("lmbda", 10*jnp.ones(eq_dim + ineq_dim))
+    bounds = options.pop("bounds", jnp.zeros(eq_dim + ineq_dim))
+    
+    def recover(x):
+        return x[0:len(x)-ineq_dim]
+    
+    def wrapped_constraint(x):
+        c = np.array([])
+        slack = x[len(recover(x)):]**2
+        
+        eq = eq_constr(x)
+        ineq = ineq_constr(x)
+        c = jnp.append(c,eq_constr(x))
+        slack = jnp.append(jnp.zeros(len(eq.flatten())),slack)
+        c = jnp.append(c,ineq)
+        c = c - bounds + slack
+        return c
+    
+    def wrapped_obj(x):
+        return fun(recover(x))
+        
+    constr = np.array([wrapped_constraint])         
+
+    L = AugLagrangianLS(wrapped_obj, constr)
+    gradL = Derivative(L.compute,0,"fwd")
+    hessL = Derivative(L.compute,argnum=0,mode="hess")
+    
+    gtolk = 1/(10*np.linalg.norm(mu0))
+    ctolk = 1/(np.linalg.norm(mu0)**(0.1))    
+    xold = x
+    f = fun(recover(x))
+    fold = f
+    
+    while iteration < maxiter:
+        xk = lsqtr(L.compute,x,gradL,args=(lmbda,mu,),gtol=gtolk,maxiter=10,verbose=2)
+        x = xk['x']
+        f = fun(recover(x))
+        cv = L.compute_constraints(x)
+        c = np.max(cv)
+        
+        if np.linalg.norm(xold - x) < xtol:
+            print("xtol satisfied\n")
+            break         
+
+        if (np.linalg.norm(f) - np.linalg.norm(fold))/np.linalg.norm(fold) > 0.1:
+            mu = mu / 2
+            print("Decreasing mu. mu is now " + str(np.mean(mu)))
+        
+        elif c < ctolk:
+            if c < ctol and conv_test(x,L.compute(x,lmbda,mu),gradL(x,lmbda,mu)) < gtol:
+                break
+
+            else:
+                print("Updating lambda")
+                lmbda = lmbda - mu*cv
+                ctolk = ctolk/(np.max(mu)**(0.9))
+                gtolk = gtolk/(np.max(mu))
+        else:
+            mu = 5.0 * mu
+            ctolk = ctolk/(np.max(mu)**(0.1))
+            gtolk = gtolk/np.max(mu)
+        
+        
+        iteration = iteration + 1
+        xold = x
+        fold = f
+        
+        print("mu is now " + str(mu)) 
+        print("The average c is " + str(np.mean(cv)))
+        print("The max c is " + str(np.max(cv)))
+        print("The objective function is " + str(np.linalg.norm(f)))
+        print("The constraints are " + str(np.linalg.norm(cv)))
+        penalty = mu[0]*np.dot(cv,cv)/2
+        print("The penalty term is " + str(penalty))
+        print("The aspect ratio constraint is " + str(cv[-1]))
+        print("x is " + str(recover(x)))
+        print("The iteration is " + str(iteration))
+    
+    g = gradL(x,lmbda,mu)
+    f = fun(recover(x))
+    success = True
+    message = "successful"
+    result = OptimizeResult(
+        x=x,
+        success=success,
+        fun=f,
+        grad=g,
+        optimality=jnp.linalg.norm(g),
+        nfev=nfev,
+        ngev=ngev,
+        nhev=nhev,
+        nit=iteration,
+        message=message,
+    )
+    result["allx"] = [recover(x)]
+    return result
diff --git a/desc/optimize/optimizer.py b/desc/optimize/optimizer.py
index a01397c6f5..3cc3a86f46 100644
--- a/desc/optimize/optimizer.py
+++ b/desc/optimize/optimizer.py
@@ -24,6 +24,7 @@
 from .fmin_scalar import fmintr
 from .least_squares import lsqtr
 from .stochastic import sgd
+from .aug_lagrangian_ls_stel import fmin_lag_ls_stel
 from .utils import find_matching_inds
 
 
@@ -222,7 +223,7 @@ def optimize(  # noqa: C901 - FIXME: simplify this
                 options.setdefault("initial_trust_radius", 0.5)
                 options.setdefault("max_trust_radius", 1.0)
 
-        linear_constraints, nonlinear_constraints = _parse_constraints(constraints)
+        linear_constraints, nonlinear_constraints, equality_constraints, inequality_constraints = _parse_constraints(constraints)
         # wrap nonlinear constraints if necessary
         wrapped = False
         if len(nonlinear_constraints) > 0 and (
@@ -233,6 +234,19 @@ def optimize(  # noqa: C901 - FIXME: simplify this
                 objective, nonlinear_constraints, self.method, options
             )
 
+        elif self.method in Optimizer._constrained_methods:
+            
+            wrapped = False
+            constraint_objectives, equality_objectives, inequality_objectives = _wrap_constraints(nonlinear_constraints, equality_constraints, inequality_constraints)
+
+            if not constraint_objectives.built:
+                constraint_objectives.build(eq,verbose=verbose)
+            if not equality_objectives.built:
+                equality_objectives.build(eq,verbose=verbose)
+            if not inequality_objectives.built:
+                inequality_objectives.build(eq,verbose=verbose)
+           
+
         if not objective.built:
             objective.build(eq, verbose=verbose)
         if not objective.compiled:
@@ -242,6 +256,14 @@ def optimize(  # noqa: C901 - FIXME: simplify this
             if not constraint.built:
                 constraint.build(eq, verbose=verbose)
 
+        if self.method in Optimizer._constrained_methods:
+            objective.combine_args(constraint_objectives)
+            if len(inequality_constraints) != 0:
+                objective.combine_args(inequality_objectives)
+            if len(equality_constraints) != 0:
+                objective.combine_args(equality_objectives)
+
+
         if objective.scalar and (self.method in Optimizer._least_squares_methods):
             warnings.warn(
                 colored(
@@ -264,6 +286,13 @@ def optimize(  # noqa: C901 - FIXME: simplify this
             project,
             recover,
         ) = _wrap_objective_with_constraints(objective, linear_constraints, self.method)
+
+        if self.method in Optimizer._constrained_methods:
+            (
+                compute_eq_constraints_wrapped,
+                compute_ineq_constraints_wrapped,
+            ) = _wrap_constraint_objectives(objective, linear_constraints, equality_objectives, inequality_objectives)
+
         timer.stop("linear constraint factorize")
         if verbose > 1:
             timer.disp("linear constraint factorize")
@@ -286,6 +315,7 @@ def optimize(  # noqa: C901 - FIXME: simplify this
         if verbose > 0:
             print("Starting optimization")
         timer.start("Solution time")
+        print("method is " + str(self.method))
 
         if self.method in Optimizer._scipy_scalar_methods:
 
@@ -386,6 +416,25 @@ def optimize(  # noqa: C901 - FIXME: simplify this
                 options=options,
             )
 
+        elif self.method in Optimizer._desc_constrained_least_squares_methods:
+            
+            result = fmin_lag_stel(
+                compute_wrapped,
+                x0=x0_reduced,
+                jac=jac_wrapped,
+                eq_constr=compute_eq_constraints_wrapped,
+                ineq_constr=compute_ineq_constraints_wrapped,
+                args=(),
+                x_scale=x_scale,
+                ftol=stoptol["ftol"],
+                xtol=stoptol["xtol"],
+                gtol=stoptol["gtol"],
+                maxiter=stoptol["maxiter"],
+                verbose=disp,
+                callback=None,
+                options=options,
+            )
+
         if wrapped:
             # history from objective includes steps the optimizer didn't accept
             # need to find where the optimizer actually stepped and only take those
@@ -431,6 +480,13 @@ def _parse_constraints(constraints):
     nonlinear_constraints = tuple(
         constraint for constraint in constraints if not constraint.linear
     )
+    equality_constraints = tuple(
+        constraint for constraint in nonlinear_constraints if constraint.equality
+    )
+    inequality_constraints = tuple(
+        constraint for constraint in nonlinear_constraints if not constraint.equality
+    )
+
     if any(isinstance(lc, FixCurrent) for lc in linear_constraints) and any(
         isinstance(lc, FixIota) for lc in linear_constraints
     ):
@@ -438,7 +494,7 @@ def _parse_constraints(constraints):
             "Toroidal current and rotational transform cannot be "
             + "constrained simultaneously."
         )
-    return linear_constraints, nonlinear_constraints
+    return linear_constraints, nonlinear_constraints, equality_constraints, inequality_constraints
 
 
 def _wrap_objective_with_constraints(objective, linear_constraints, method):
@@ -484,6 +540,25 @@ def jac_wrapped(x_reduced):
         recover,
     )
 
+def _wrap_constraint_objectives(objective, linear_constraints, equality_objectives, inequality_objectives):
+    _, _, _, _, Z, unfixed_idx, project, recover = factorize_linear_constraints(
+        linear_constraints, objective.args
+    )
+
+    def compute_eq_constraints_wrapped(x_reduced):
+        x = recover(x_reduced)
+        c = mu_0*equality_objectives.compute(x)
+        return c
+            
+    def compute_ineq_constraints_wrapped(x_reduced):
+        x = recover(x_reduced)
+        c = inequality_objectives.compute(x)
+        return c
+    
+    return (
+        compute_eq_constraints_wrapped,
+        compute_ineq_constraints_wrapped,
+    )
 
 def _wrap_nonlinear_constraints(objective, nonlinear_constraints, method, options):
     """Use WrappedEquilibriumObjective to hanle nonlinear equilibrium constraints."""
@@ -513,7 +588,28 @@ def _wrap_nonlinear_constraints(objective, nonlinear_constraints, method, option
     )
     return objective
 
+def _wrap_constraints(nonlinear_constraints, equality_constraints, inequality_constraints):
+    for constraint in nonlinear_constraints:
+        if not isinstance(
+            constraint,
+            (
+                ForceBalance,
+                RadialForceBalance,
+                HelicalForceBalance,
+                CurrentDensity,
+            ),
+        ):
+            raise ValueError(
+                "optimizer method {} ".format(self.method)
+                + "cannot handle general nonlinear constraint {}.".format(constraint)
+        )
+
+    constraint_objectives = ObjectiveFunction(nonlinear_constraints, eq)
+    equality_objectives = ObjectiveFunction(equality_constraints,eq)
+    inequality_objectives = ObjectiveFunction(inequality_constraints,eq)
 
+    return constraint_objectives, equality_objectives, inequality_objectives
+ 
 def _get_default_tols(
     method,
     ftol=None,

From 315a200d704ca5527b6b644d03ed80d8610c39fc Mon Sep 17 00:00:00 2001
From: "Patrick S. Kim" <pk2354@stellar-intel.princeton.edu>
Date: Fri, 13 Jan 2023 17:10:16 -0500
Subject: [PATCH 02/41] fixing constrained implementation.

---
 desc/objectives/_auglagrangian.py       |  1 +
 desc/optimize/aug_lagrangian_ls_stel.py | 20 +++++++++---------
 desc/optimize/optimizer.py              | 27 ++++++++++++++-----------
 3 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/desc/objectives/_auglagrangian.py b/desc/objectives/_auglagrangian.py
index 19985f2875..12846d5d0c 100644
--- a/desc/objectives/_auglagrangian.py
+++ b/desc/objectives/_auglagrangian.py
@@ -2,6 +2,7 @@
 from desc.backend import jnp
 from desc.derivatives import Derivative
 from .objective_funs import ObjectiveFunction
+from desc.backend import put
 
 class AugLagrangianLS(ObjectiveFunction):
     
diff --git a/desc/optimize/aug_lagrangian_ls_stel.py b/desc/optimize/aug_lagrangian_ls_stel.py
index fbdc60c508..74cb07d72f 100644
--- a/desc/optimize/aug_lagrangian_ls_stel.py
+++ b/desc/optimize/aug_lagrangian_ls_stel.py
@@ -41,10 +41,12 @@ def fmin_lag_ls_stel(
     grad,
     eq_constr,
     ineq_constr,
+    args=(),
     x_scale=1,
     ftol=1e-6,
     xtol=1e-6,
     gtol=1e-6,
+    ctol=1e-6,
     verbose=1,
     maxiter=None,
     tr_method="svd",
@@ -62,9 +64,9 @@ def fmin_lag_ls_stel(
     nfev += 1
     g = grad(x, *args)
     ngev += 1
-
-    eq = eq_constr(x) if eq_constr is None else jnp.array([])
-    ineq = ineq_constr(x) if ineq_constr is None else jnp.array([])
+    
+    eq = eq_constr(x) if eq_constr is not None else jnp.array([])
+    ineq = ineq_constr(x) if ineq_constr is not None else jnp.array([])
     eq_dim = len(eq.flatten())
     ineq_dim = len(ineq.flatten())
     x = np.append(x,1.0*np.ones(ineq_dim))
@@ -82,9 +84,9 @@ def recover(x):
     def wrapped_constraint(x):
         c = np.array([])
         slack = x[len(recover(x)):]**2
-        
-        eq = eq_constr(x)
-        ineq = ineq_constr(x)
+        eq = eq_constr(x) if eq_constr is not None else jnp.array([])
+        ineq = ineq_constr(x) if ineq_constr is not None else jnp.array([])
+
         c = jnp.append(c,eq_constr(x))
         slack = jnp.append(jnp.zeros(len(eq.flatten())),slack)
         c = jnp.append(c,ineq)
@@ -95,13 +97,13 @@ def wrapped_obj(x):
         return fun(recover(x))
         
     constr = np.array([wrapped_constraint])         
-
+    print("constraint is " + str(np.mean(wrapped_constrained(x))))
     L = AugLagrangianLS(wrapped_obj, constr)
     gradL = Derivative(L.compute,0,"fwd")
     hessL = Derivative(L.compute,argnum=0,mode="hess")
     
-    gtolk = 1/(10*np.linalg.norm(mu0))
-    ctolk = 1/(np.linalg.norm(mu0)**(0.1))    
+    gtolk = 1/(10*np.linalg.norm(mu))
+    ctolk = 1/(np.linalg.norm(mu)**(0.1))    
     xold = x
     f = fun(recover(x))
     fold = f
diff --git a/desc/optimize/optimizer.py b/desc/optimize/optimizer.py
index 3cc3a86f46..54b7bcedaf 100644
--- a/desc/optimize/optimizer.py
+++ b/desc/optimize/optimizer.py
@@ -26,7 +26,7 @@
 from .stochastic import sgd
 from .aug_lagrangian_ls_stel import fmin_lag_ls_stel
 from .utils import find_matching_inds
-
+from scipy.constants import mu_0
 
 class Optimizer(IOAble):
     """A helper class to wrap several optimization routines.
@@ -71,7 +71,7 @@ class Optimizer(IOAble):
     _scipy_constrained_scalar_methods = ["scipy-trust-constr"]
     _scipy_constrained_least_squares_methods = []
     _desc_constrained_scalar_methods = []
-    _desc_constrained_least_squares_methods = []
+    _desc_constrained_least_squares_methods = ["lsq-auglag"]
     _scalar_methods = (
         _desc_scalar_methods
         + _scipy_scalar_methods
@@ -110,6 +110,7 @@ class Optimizer(IOAble):
         + _desc_scalar_methods
         + _desc_least_squares_methods
         + _desc_stochastic_methods
+        + _desc_constrained_least_squares_methods
     )
 
     def __init__(self, method):
@@ -241,9 +242,9 @@ def optimize(  # noqa: C901 - FIXME: simplify this
 
             if not constraint_objectives.built:
                 constraint_objectives.build(eq,verbose=verbose)
-            if not equality_objectives.built:
+            if not equality_objectives.built and len(equality_constraints) != 0:
                 equality_objectives.build(eq,verbose=verbose)
-            if not inequality_objectives.built:
+            if not inequality_objectives.built and len(inequality_constraints) != 0:
                 inequality_objectives.build(eq,verbose=verbose)
            
 
@@ -418,12 +419,14 @@ def optimize(  # noqa: C901 - FIXME: simplify this
 
         elif self.method in Optimizer._desc_constrained_least_squares_methods:
             
-            result = fmin_lag_stel(
+            eq_constr = compute_eq_constraints_wrapped if len(equality_constraints) != 0 else None
+            ineq_constr = compute_ineq_constraints_wrapped if len(inequality_constraints) !=0 else None
+            result = fmin_lag_ls_stel(
                 compute_wrapped,
                 x0=x0_reduced,
-                jac=jac_wrapped,
-                eq_constr=compute_eq_constraints_wrapped,
-                ineq_constr=compute_ineq_constraints_wrapped,
+                grad=jac_wrapped,
+                eq_constr=eq_constr,
+                ineq_constr=ineq_constr,
                 args=(),
                 x_scale=x_scale,
                 ftol=stoptol["ftol"],
@@ -547,7 +550,7 @@ def _wrap_constraint_objectives(objective, linear_constraints, equality_objectiv
 
     def compute_eq_constraints_wrapped(x_reduced):
         x = recover(x_reduced)
-        c = mu_0*equality_objectives.compute(x)
+        c = equality_objectives.compute(x)
         return c
             
     def compute_ineq_constraints_wrapped(x_reduced):
@@ -604,9 +607,9 @@ def _wrap_constraints(nonlinear_constraints, equality_constraints, inequality_co
                 + "cannot handle general nonlinear constraint {}.".format(constraint)
         )
 
-    constraint_objectives = ObjectiveFunction(nonlinear_constraints, eq)
-    equality_objectives = ObjectiveFunction(equality_constraints,eq)
-    inequality_objectives = ObjectiveFunction(inequality_constraints,eq)
+    constraint_objectives = ObjectiveFunction(nonlinear_constraints)
+    equality_objectives = ObjectiveFunction(equality_constraints)
+    inequality_objectives = ObjectiveFunction(inequality_constraints)
 
     return constraint_objectives, equality_objectives, inequality_objectives
  

From f54746f94c606cdee68dfd4d39a393785091d715 Mon Sep 17 00:00:00 2001
From: "Patrick S. Kim" <pk2354@stellar-intel.princeton.edu>
Date: Mon, 30 Jan 2023 10:53:29 -0500
Subject: [PATCH 03/41] adding scalar aug lagrangian.

---
 desc/objectives/_auglagrangian.py       | 37 +++++++++++++++++++++++--
 desc/optimize/__init__.py               |  1 +
 desc/optimize/aug_lagrangian_ls_stel.py | 11 +++++---
 3 files changed, 43 insertions(+), 6 deletions(-)

diff --git a/desc/objectives/_auglagrangian.py b/desc/objectives/_auglagrangian.py
index 12846d5d0c..0b9f896d74 100644
--- a/desc/objectives/_auglagrangian.py
+++ b/desc/objectives/_auglagrangian.py
@@ -22,8 +22,9 @@ def derivatives(self):
     def compute(self, x, lmbda, mu):
         L = self.func(x)
         c = self.compute_constraints(x)
-        for i in range(len(c)):
-            c = put(c,i,-lmbda[i]*c[i] + mu[i]/2*c[i]**2)
+        #for i in range(len(c)):
+        #    c = put(c,i,-lmbda[i]*c[i] + mu[i]/2*c[i]**2)
+        c = -lmbda*c + mu/2*c**2
         L = jnp.concatenate((L,c),axis=None)
         print("L is evaluated")
         return L
@@ -41,4 +42,36 @@ def compute_constraints(self,x):
             c = jnp.concatenate((c,self.constr[i](x)),axis=None)
         return c
 
+class AugLagrangian(ObjectiveFunction):
+    
+    def __init__(self, func, constr):
+        self.func = func
+        self.constr = constr
+    
+    def scalar(self):
+        return True
+    
+    def name(self):
+        return "augmented lagrangian"
+    
+    def derivatives(self):
+        return
+    
+    def compute(self, x, lmbda, mu):
+        L = self.func(x)
+        c = self.compute_constraints(x)
+        return L - jnp.dot(lmbda,c) + mu/2*jnp.dot(c,c)
+    
+    def compute_scalar(self,x,lmbda,mu):
+        return self.compute(x,lmbda,mu)
+    
+    def callback(self, x, lmbda, mu):
+        L = self.compute(x,lmbda,mu)
+        print("The Lagrangian is " + str(L))
+        
+    def compute_constraints(self,x):
+        c = jnp.array([])
+        for i in range(len(self.constr)):
+            c = jnp.concatenate((c,self.constr[i](x)),axis=None)
+        return c
 
diff --git a/desc/optimize/__init__.py b/desc/optimize/__init__.py
index 6b6d10b8d1..1d40aca5ba 100644
--- a/desc/optimize/__init__.py
+++ b/desc/optimize/__init__.py
@@ -5,4 +5,5 @@
 from .optimizer import Optimizer
 from .stochastic import sgd
 from .aug_lagrangian_ls_stel import fmin_lag_ls_stel
+from .aug_lagrangian import fmin_lag_stel
 
diff --git a/desc/optimize/aug_lagrangian_ls_stel.py b/desc/optimize/aug_lagrangian_ls_stel.py
index c28c9cc868..cd97b72b78 100644
--- a/desc/optimize/aug_lagrangian_ls_stel.py
+++ b/desc/optimize/aug_lagrangian_ls_stel.py
@@ -84,10 +84,11 @@ def recover(x):
     def wrapped_constraint(x):
         c = np.array([])
         slack = x[len(recover(x)):]**2
-        eq = eq_constr(x) if eq_constr is not None else jnp.array([])
-        ineq = ineq_constr(x) if ineq_constr is not None else jnp.array([])
+        x_recov = recover(x)
+        eq = eq_constr(x_recov) if eq_constr is not None else jnp.array([])
+        ineq = ineq_constr(x_recov) if ineq_constr is not None else jnp.array([])
 
-        c = jnp.append(c,eq_constr(x))
+        c = jnp.append(c,eq_constr(x_recov))
         slack = jnp.append(jnp.zeros(len(eq.flatten())),slack)
         c = jnp.append(c,ineq)
         c = c - bounds + slack
@@ -109,7 +110,9 @@ def wrapped_obj(x):
     fold = f
     
     while iteration < maxiter:
-        xk = lsqtr(L.compute,x,gradL,args=(lmbda,mu,),gtol=gtolk,maxiter=10,verbose=2)
+        #xk = lsqtr(L.compute,x,gradL,args=(lmbda,mu,),gtol=gtolk,maxiter=10,verbose=2)
+        xk = minimize(L.compute,x,args=(lmbda,mu),method="trust-exact",jac=gradL, hess=hessL, options = {"maxiter": 10.0,"verbose":3})
+
         x = xk['x']
         f = fun(recover(x))
         cv = L.compute_constraints(x)

From f17d881465ad82356d671d81b2ba5fbf51574bdf Mon Sep 17 00:00:00 2001
From: "Patrick S. Kim" <pk2354@stellar-intel.princeton.edu>
Date: Mon, 30 Jan 2023 16:24:44 -0500
Subject: [PATCH 04/41] adding equality parameters for mercier and asepct ratio
 constraints.

---
 desc/objectives/__init__.py             |  2 +-
 desc/objectives/_geometry.py            |  4 ++++
 desc/objectives/_stability.py           |  4 ++++
 desc/optimize/aug_lagrangian_ls_stel.py |  3 +--
 desc/optimize/optimizer.py              | 31 ++++++++++++++++++++++++-
 5 files changed, 40 insertions(+), 4 deletions(-)

diff --git a/desc/objectives/__init__.py b/desc/objectives/__init__.py
index 23c82200a4..7eb004d9e6 100644
--- a/desc/objectives/__init__.py
+++ b/desc/objectives/__init__.py
@@ -21,6 +21,6 @@
     FixPressure,
     FixPsi,
 )
-from ._auglagrangian import AugLagrangianLS
+from ._auglagrangian import AugLagrangianLS, AugLagrangian
 from .objective_funs import ObjectiveFunction
 from .utils import get_equilibrium_objective, get_fixed_boundary_constraints
diff --git a/desc/objectives/_geometry.py b/desc/objectives/_geometry.py
index 2990246d0d..d9cb4d996e 100644
--- a/desc/objectives/_geometry.py
+++ b/desc/objectives/_geometry.py
@@ -4,6 +4,7 @@
 from desc.compute import get_params, get_profiles, get_transforms
 from desc.grid import QuadratureGrid
 from desc.utils import Timer
+from desc.backend import jnp
 
 from .normalization import compute_scaling_factors
 from .objective_funs import _Objective
@@ -51,9 +52,12 @@ def __init__(
         normalize_target=True,
         grid=None,
         name="aspect ratio",
+        equality=True
     ):
 
         self.grid = grid
+        self.equality=equality
+
         super().__init__(
             eq=eq,
             target=target,
diff --git a/desc/objectives/_stability.py b/desc/objectives/_stability.py
index a2423156d7..052656ff32 100644
--- a/desc/objectives/_stability.py
+++ b/desc/objectives/_stability.py
@@ -231,8 +231,12 @@ def __init__(
         normalize_target=False,
         grid=None,
         name="Magnetic Well",
+        equality=True
     ):
+    
         self.grid = grid
+        self.equality=equality
+
         super().__init__(
             eq=eq,
             target=target,
diff --git a/desc/optimize/aug_lagrangian_ls_stel.py b/desc/optimize/aug_lagrangian_ls_stel.py
index cd97b72b78..8cd911b37c 100644
--- a/desc/optimize/aug_lagrangian_ls_stel.py
+++ b/desc/optimize/aug_lagrangian_ls_stel.py
@@ -110,8 +110,7 @@ def wrapped_obj(x):
     fold = f
     
     while iteration < maxiter:
-        #xk = lsqtr(L.compute,x,gradL,args=(lmbda,mu,),gtol=gtolk,maxiter=10,verbose=2)
-        xk = minimize(L.compute,x,args=(lmbda,mu),method="trust-exact",jac=gradL, hess=hessL, options = {"maxiter": 10.0,"verbose":3})
+        xk = lsqtr(L.compute,x,gradL,args=(lmbda,mu,),gtol=gtolk,maxiter=10,verbose=2)
 
         x = xk['x']
         f = fun(recover(x))
diff --git a/desc/optimize/optimizer.py b/desc/optimize/optimizer.py
index 0a8767950d..e648b3b9e3 100644
--- a/desc/optimize/optimizer.py
+++ b/desc/optimize/optimizer.py
@@ -16,6 +16,8 @@
     ObjectiveFunction,
     RadialForceBalance,
     WrappedEquilibriumObjective,
+    AspectRatio,
+    MagneticWell
 )
 from desc.objectives.utils import factorize_linear_constraints
 from desc.utils import Timer
@@ -25,6 +27,7 @@
 from .least_squares import lsqtr
 from .stochastic import sgd
 from .aug_lagrangian_ls_stel import fmin_lag_ls_stel
+from .aug_lagrangian import fmin_lag_stel
 
 class Optimizer(IOAble):
     """A helper class to wrap several optimization routines.
@@ -68,7 +71,7 @@ class Optimizer(IOAble):
     _hessian_free_methods = ["scipy-bfgs", "dogleg-bfgs", "subspace-bfgs"]
     _scipy_constrained_scalar_methods = ["scipy-trust-constr"]
     _scipy_constrained_least_squares_methods = []
-    _desc_constrained_scalar_methods = []
+    _desc_constrained_scalar_methods = ["auglag"]
     _desc_constrained_least_squares_methods = ["lsq-auglag"]
     _scalar_methods = (
         _desc_scalar_methods
@@ -109,6 +112,8 @@ class Optimizer(IOAble):
         + _desc_least_squares_methods
         + _desc_stochastic_methods
         + _desc_constrained_least_squares_methods
+        + _desc_constrained_scalar_methods
+
     )
 
     def __init__(self, method):
@@ -436,6 +441,28 @@ def optimize(  # noqa: C901 - FIXME: simplify this
                 options=options,
             )
 
+        elif self.method in Optimizer._desc_constrained_scalar_methods:
+            
+            eq_constr = compute_eq_constraints_wrapped if len(equality_constraints) != 0 else None
+            ineq_constr = compute_ineq_constraints_wrapped if len(inequality_constraints) !=0 else None
+            result = fmin_lag_stel(
+                compute_scalar_wrapped,
+                x0=x0_reduced,
+                grad=jac_wrapped,
+                eq_constr=eq_constr,
+                ineq_constr=ineq_constr,
+                args=(),
+                x_scale=x_scale,
+                ftol=stoptol["ftol"],
+                xtol=stoptol["xtol"],
+                gtol=stoptol["gtol"],
+                maxiter=stoptol["maxiter"],
+                verbose=disp,
+                callback=None,
+                options=options,
+            )
+
+
         if wrapped:
             result["history"] = objective.history
         else:
@@ -589,6 +616,8 @@ def _wrap_constraints(nonlinear_constraints, equality_constraints, inequality_co
                 RadialForceBalance,
                 HelicalForceBalance,
                 CurrentDensity,
+                MagneticWell,
+                AspectRatio
             ),
         ):
             raise ValueError(

From 979f785973e015531ad7de0a40136eca747ff898 Mon Sep 17 00:00:00 2001
From: "Patrick S. Kim" <pk2354@stellar-intel.princeton.edu>
Date: Mon, 30 Jan 2023 16:28:56 -0500
Subject: [PATCH 05/41] actually adding scalar aug lag this time.

---
 desc/optimize/aug_lagrangian.py | 179 ++++++++++++++++++++++++++++++++
 1 file changed, 179 insertions(+)
 create mode 100644 desc/optimize/aug_lagrangian.py

diff --git a/desc/optimize/aug_lagrangian.py b/desc/optimize/aug_lagrangian.py
new file mode 100644
index 0000000000..c0aec4fabb
--- /dev/null
+++ b/desc/optimize/aug_lagrangian.py
@@ -0,0 +1,179 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Jul 13 10:39:48 2022
+
+@author: pk123
+"""
+
+import numpy as np
+from termcolor import colored
+from desc.backend import jnp
+from .utils import (
+    check_termination,
+    print_header_nonlinear,
+    print_iteration_nonlinear,
+    STATUS_MESSAGES,
+    compute_jac_scale,
+    evaluate_quadratic_form_jac,
+)
+
+from .tr_subproblems import (
+    solve_trust_region_dogleg,
+    solve_trust_region_2d_subspace,
+    update_tr_radius,
+)
+from scipy.optimize import OptimizeResult, least_squares, minimize
+
+from desc.optimize.fmin_scalar import fmintr
+from desc.optimize.least_squares import lsqtr
+
+from desc.derivatives import Derivative
+from desc.objectives._auglagrangian import AugLagrangian
+    
+
+def conv_test(x,L,gL):
+    return np.linalg.norm(jnp.dot(gL.T,L))
+
+def fmin_lag_stel(
+    fun,
+    x0,
+    grad,
+    eq_constr,
+    ineq_constr,
+    args=(),
+    x_scale=1,
+    ftol=1e-6,
+    xtol=1e-6,
+    gtol=1e-6,
+    ctol=1e-6,
+    verbose=1,
+    maxiter=None,
+    tr_method="svd",
+    callback=None,
+    options={},
+):
+    nfev = 0
+    ngev = 0
+    nhev = 0
+    iteration = 0
+
+    N = x0.size
+    x = x0.copy()
+    f = fun(x, *args)
+    nfev += 1
+    g = grad(x, *args)
+    ngev += 1
+    
+    eq = eq_constr(x) if eq_constr is not None else jnp.array([])
+    ineq = ineq_constr(x) if ineq_constr is not None else jnp.array([])
+    eq_dim = len(eq.flatten())
+    ineq_dim = len(ineq.flatten())
+    x = np.append(x,1.0*np.ones(ineq_dim))
+    
+    if maxiter is None:
+        maxiter = N * 100
+
+    mu = options.pop("mu", 100)
+    lmbda = options.pop("lmbda", 0.01*jnp.ones(eq_dim + ineq_dim))
+    bounds = options.pop("bounds", jnp.zeros(eq_dim + ineq_dim))
+    
+    def recover(x):
+        return x[0:len(x)-ineq_dim]
+    
+    def wrapped_constraint(x):
+        c = np.array([])
+        slack = x[len(recover(x)):]**2
+        x_recov = recover(x)
+        eq = eq_constr(x_recov) if eq_constr is not None else jnp.array([])
+        ineq = ineq_constr(x_recov) if ineq_constr is not None else jnp.array([])
+
+        c = jnp.append(c,eq_constr(x_recov))
+        slack = jnp.append(jnp.zeros(len(eq.flatten())),slack)
+        c = jnp.append(c,ineq)
+        c = c - bounds + slack
+        return c
+    
+    def wrapped_obj(x):
+        return fun(recover(x))
+        
+    constr = np.array([wrapped_constraint])         
+    print("constraint is " + str(np.mean(wrapped_constraint(x))))
+    L = AugLagrangian(wrapped_obj, constr)
+    gradL = Derivative(L.compute,0,"fwd")
+    hessL = Derivative(L.compute,argnum=0,mode="hess")
+    
+    gtolk = 1/(10*mu)
+    ctolk = 1/(mu**(0.1))    
+    xold = x
+    f = fun(recover(x))
+    fold = f
+    
+    print("The initial obj func is " + str(f))
+    cv = L.compute_constraints(x)
+    print("The constraint term is " + str(-jnp.dot(lmbda,cv)+mu*jnp.dot(cv,cv))) 
+
+    while iteration < maxiter:
+        xk = fmintr(L.compute,x,gradL,args=(lmbda,mu,),gtol=gtolk,maxiter=10,verbose=2)
+#        xk = minimize(L.compute,x,args=(lmbda,mu),method="trust-exact",jac=gradL, hess=hessL, options = {"maxiter": 10.0,"verbose":3})
+
+        x = xk['x']
+        f = fun(recover(x))
+        cv = L.compute_constraints(x)
+        c = np.max(cv)
+        
+        if np.linalg.norm(xold - x) < xtol:
+            print("xtol satisfied\n")
+            break         
+
+        if (np.linalg.norm(f) - np.linalg.norm(fold))/np.linalg.norm(fold) > 0.1:
+            mu = mu / 2
+            print("Decreasing mu. mu is now " + str(np.mean(mu)))
+        
+        elif c < ctolk:
+            if c < ctol and gradL(x,lmbda,mu) < gtol:
+                break
+
+            else:
+                print("Updating lambda")
+                lmbda = lmbda - mu*cv
+                ctolk = ctolk/(mu**(0.9))
+                gtolk = gtolk/(mu)
+        else:
+            mu = 5.0 * mu
+            ctolk = ctolk/(mu**(0.1))
+            gtolk = gtolk/mu
+        
+        
+        iteration = iteration + 1
+        xold = x
+        fold = f
+        
+        print("mu is now " + str(mu)) 
+        print("The average c is " + str(np.mean(cv)))
+        print("The max c is " + str(np.max(cv)))
+        print("The objective function is " + str(np.linalg.norm(f)))
+        print("The constraints are " + str(np.linalg.norm(cv)))
+        penalty = mu*np.dot(cv,cv)/2
+        print("The penalty term is " + str(penalty))
+        print("The aspect ratio constraint is " + str(cv[-1]))
+        print("The iteration is " + str(iteration))
+    
+    g = gradL(x,lmbda,mu)
+    f = fun(recover(x))
+    success = True
+    message = "successful"
+    result = OptimizeResult(
+        x=x,
+        success=success,
+        fun=f,
+        grad=g,
+        optimality=jnp.linalg.norm(g),
+        nfev=nfev,
+        ngev=ngev,
+        nhev=nhev,
+        nit=iteration,
+        message=message,
+    )
+    result["allx"] = [recover(x)]
+    return result

From e2494158b9969c41835b98ed8e5f9c6898d4996d Mon Sep 17 00:00:00 2001
From: "Patrick S. Kim" <pk2354@stellar-intel.princeton.edu>
Date: Mon, 30 Jan 2023 23:35:18 -0500
Subject: [PATCH 06/41] formatting issues.

---
 desc/objectives/_auglagrangian.py       |  75 +++++++-------
 desc/objectives/_geometry.py            |   5 +-
 desc/optimize/__init__.py               |   5 +-
 desc/optimize/aug_lagrangian.py         | 128 ++++++++++-------------
 desc/optimize/aug_lagrangian_ls_stel.py | 129 ++++++++++--------------
 desc/optimize/optimizer.py              | 105 +++++++++++++------
 6 files changed, 227 insertions(+), 220 deletions(-)

diff --git a/desc/objectives/_auglagrangian.py b/desc/objectives/_auglagrangian.py
index 0b9f896d74..d846d61a55 100644
--- a/desc/objectives/_auglagrangian.py
+++ b/desc/objectives/_auglagrangian.py
@@ -1,77 +1,82 @@
 import numpy as np
+
 from desc.backend import jnp
-from desc.derivatives import Derivative
+
 from .objective_funs import ObjectiveFunction
-from desc.backend import put
+
 
 class AugLagrangianLS(ObjectiveFunction):
-    
+    """Augmented Lagrangian function for least-squares optimization
+
+    Parameters
+    ----------
+    func : objective function
+    constr : constraint functions
+    """
+
     def __init__(self, func, constr):
         self.func = func
         self.constr = constr
-    
+
     def scalar(self):
         return False
-    
+
     def name(self):
         return "least squares augmented lagrangian"
-    
+
     def derivatives(self):
         return
-    
+
     def compute(self, x, lmbda, mu):
         L = self.func(x)
         c = self.compute_constraints(x)
-        #for i in range(len(c)):
-        #    c = put(c,i,-lmbda[i]*c[i] + mu[i]/2*c[i]**2)
-        c = -lmbda*c + mu/2*c**2
-        L = jnp.concatenate((L,c),axis=None)
+        c = -lmbda * c + mu / 2 * c**2
+        L = jnp.concatenate((L, c), axis=None)
         print("L is evaluated")
         return L
-    
-    def compute_scalar(self,x,lmbda,mu):
-        return np.linalg.norm(self.compute(x,lmbda,mu))
-    
-    def callback(self, x, lmbda,mu):
-        L = self.compute(x,lmbda,mu)
+
+    def compute_scalar(self, x, lmbda, mu):
+        return np.linalg.norm(self.compute(x, lmbda, mu))
+
+    def callback(self, x, lmbda, mu):
+        L = self.compute(x, lmbda, mu)
         print("The Least Squares Lagrangian is " + str(L))
-        
-    def compute_constraints(self,x):
+
+    def compute_constraints(self, x):
         c = jnp.array([])
         for i in range(len(self.constr)):
-            c = jnp.concatenate((c,self.constr[i](x)),axis=None)
+            c = jnp.concatenate((c, self.constr[i](x)), axis=None)
         return c
 
+
 class AugLagrangian(ObjectiveFunction):
-    
     def __init__(self, func, constr):
         self.func = func
         self.constr = constr
-    
+
     def scalar(self):
         return True
-    
+
     def name(self):
         return "augmented lagrangian"
-    
+
     def derivatives(self):
         return
-    
+
     def compute(self, x, lmbda, mu):
         L = self.func(x)
         c = self.compute_constraints(x)
-        return L - jnp.dot(lmbda,c) + mu/2*jnp.dot(c,c)
-    
-    def compute_scalar(self,x,lmbda,mu):
-        return self.compute(x,lmbda,mu)
-    
+        return L - jnp.dot(lmbda, c) + mu / 2 * jnp.dot(c, c)
+
+    def compute_scalar(self, x, lmbda, mu):
+        return self.compute(x, lmbda, mu)
+
     def callback(self, x, lmbda, mu):
-        L = self.compute(x,lmbda,mu)
+        L = self.compute(x, lmbda, mu)
         print("The Lagrangian is " + str(L))
-        
-    def compute_constraints(self,x):
+
+    def compute_constraints(self, x):
         c = jnp.array([])
         for i in range(len(self.constr)):
-            c = jnp.concatenate((c,self.constr[i](x)),axis=None)
+            c = jnp.concatenate((c, self.constr[i](x)), axis=None)
         return c
-
diff --git a/desc/objectives/_geometry.py b/desc/objectives/_geometry.py
index d9cb4d996e..db3d66f472 100644
--- a/desc/objectives/_geometry.py
+++ b/desc/objectives/_geometry.py
@@ -4,7 +4,6 @@
 from desc.compute import get_params, get_profiles, get_transforms
 from desc.grid import QuadratureGrid
 from desc.utils import Timer
-from desc.backend import jnp
 
 from .normalization import compute_scaling_factors
 from .objective_funs import _Objective
@@ -52,11 +51,11 @@ def __init__(
         normalize_target=True,
         grid=None,
         name="aspect ratio",
-        equality=True
+        equality=True,
     ):
 
         self.grid = grid
-        self.equality=equality
+        self.equality = equality
 
         super().__init__(
             eq=eq,
diff --git a/desc/optimize/__init__.py b/desc/optimize/__init__.py
index 1d40aca5ba..ccd9101b46 100644
--- a/desc/optimize/__init__.py
+++ b/desc/optimize/__init__.py
@@ -1,9 +1,8 @@
 """Functions for minimization and wrappers for scipy methods."""
 
+from .aug_lagrangian import fmin_lag_stel
+from .aug_lagrangian_ls_stel import fmin_lag_ls_stel
 from .fmin_scalar import fmintr
 from .least_squares import lsqtr
 from .optimizer import Optimizer
 from .stochastic import sgd
-from .aug_lagrangian_ls_stel import fmin_lag_ls_stel
-from .aug_lagrangian import fmin_lag_stel
-
diff --git a/desc/optimize/aug_lagrangian.py b/desc/optimize/aug_lagrangian.py
index c0aec4fabb..17e6cf62eb 100644
--- a/desc/optimize/aug_lagrangian.py
+++ b/desc/optimize/aug_lagrangian.py
@@ -1,5 +1,4 @@
 #!/usr/bin/env python3
-# -*- coding: utf-8 -*-
 """
 Created on Wed Jul 13 10:39:48 2022
 
@@ -7,33 +6,17 @@
 """
 
 import numpy as np
-from termcolor import colored
-from desc.backend import jnp
-from .utils import (
-    check_termination,
-    print_header_nonlinear,
-    print_iteration_nonlinear,
-    STATUS_MESSAGES,
-    compute_jac_scale,
-    evaluate_quadratic_form_jac,
-)
-
-from .tr_subproblems import (
-    solve_trust_region_dogleg,
-    solve_trust_region_2d_subspace,
-    update_tr_radius,
-)
-from scipy.optimize import OptimizeResult, least_squares, minimize
-
-from desc.optimize.fmin_scalar import fmintr
-from desc.optimize.least_squares import lsqtr
+from scipy.optimize import OptimizeResult
 
+from desc.backend import jnp
 from desc.derivatives import Derivative
 from desc.objectives._auglagrangian import AugLagrangian
-    
+from desc.optimize.fmin_scalar import fmintr
+
+
+def conv_test(x, L, gL):
+    return np.linalg.norm(jnp.dot(gL.T, L))
 
-def conv_test(x,L,gL):
-    return np.linalg.norm(jnp.dot(gL.T,L))
 
 def fmin_lag_stel(
     fun,
@@ -64,102 +47,95 @@ def fmin_lag_stel(
     nfev += 1
     g = grad(x, *args)
     ngev += 1
-    
+
     eq = eq_constr(x) if eq_constr is not None else jnp.array([])
     ineq = ineq_constr(x) if ineq_constr is not None else jnp.array([])
     eq_dim = len(eq.flatten())
     ineq_dim = len(ineq.flatten())
-    x = np.append(x,1.0*np.ones(ineq_dim))
-    
+    x = np.append(x, 1.0 * np.ones(ineq_dim))
+
     if maxiter is None:
         maxiter = N * 100
 
-    mu = options.pop("mu", 100)
-    lmbda = options.pop("lmbda", 0.01*jnp.ones(eq_dim + ineq_dim))
+    mu = options.pop("mu", 0.1)
+    lmbda = options.pop("lmbda", 0.01 * jnp.ones(eq_dim + ineq_dim))
     bounds = options.pop("bounds", jnp.zeros(eq_dim + ineq_dim))
-    
+
     def recover(x):
-        return x[0:len(x)-ineq_dim]
-    
+        return x[0 : len(x) - ineq_dim]
+
     def wrapped_constraint(x):
         c = np.array([])
-        slack = x[len(recover(x)):]**2
+        slack = x[len(recover(x)) :] ** 2
         x_recov = recover(x)
         eq = eq_constr(x_recov) if eq_constr is not None else jnp.array([])
         ineq = ineq_constr(x_recov) if ineq_constr is not None else jnp.array([])
 
-        c = jnp.append(c,eq_constr(x_recov))
-        slack = jnp.append(jnp.zeros(len(eq.flatten())),slack)
-        c = jnp.append(c,ineq)
+        c = jnp.append(c, eq_constr(x_recov))
+        slack = jnp.append(jnp.zeros(len(eq.flatten())), slack)
+        c = jnp.append(c, ineq)
         c = c - bounds + slack
         return c
-    
+
     def wrapped_obj(x):
         return fun(recover(x))
-        
-    constr = np.array([wrapped_constraint])         
-    print("constraint is " + str(np.mean(wrapped_constraint(x))))
+
+    constr = np.array([wrapped_constraint])
     L = AugLagrangian(wrapped_obj, constr)
-    gradL = Derivative(L.compute,0,"fwd")
-    hessL = Derivative(L.compute,argnum=0,mode="hess")
-    
-    gtolk = 1/(10*mu)
-    ctolk = 1/(mu**(0.1))    
+    gradL = Derivative(L.compute, 0, "fwd")
+
+    gtolk = 1 / (10 * mu)
+    ctolk = 1 / (mu ** (0.1))
     xold = x
     f = fun(recover(x))
     fold = f
-    
-    print("The initial obj func is " + str(f))
     cv = L.compute_constraints(x)
-    print("The constraint term is " + str(-jnp.dot(lmbda,cv)+mu*jnp.dot(cv,cv))) 
 
     while iteration < maxiter:
-        xk = fmintr(L.compute,x,gradL,args=(lmbda,mu,),gtol=gtolk,maxiter=10,verbose=2)
-#        xk = minimize(L.compute,x,args=(lmbda,mu),method="trust-exact",jac=gradL, hess=hessL, options = {"maxiter": 10.0,"verbose":3})
-
-        x = xk['x']
+        xk = fmintr(
+            L.compute,
+            x,
+            gradL,
+            args=(
+                lmbda,
+                mu,
+            ),
+            gtol=gtolk,
+            maxiter=20,
+            verbose=2,
+        )
+        x = xk["x"]
         f = fun(recover(x))
         cv = L.compute_constraints(x)
         c = np.max(cv)
-        
+
         if np.linalg.norm(xold - x) < xtol:
             print("xtol satisfied\n")
-            break         
+            break
 
-        if (np.linalg.norm(f) - np.linalg.norm(fold))/np.linalg.norm(fold) > 0.1:
+        if (np.linalg.norm(f) - np.linalg.norm(fold)) / np.linalg.norm(fold) > 0.1:
             mu = mu / 2
             print("Decreasing mu. mu is now " + str(np.mean(mu)))
-        
+
         elif c < ctolk:
-            if c < ctol and gradL(x,lmbda,mu) < gtol:
+            if c < ctol and gradL(x, lmbda, mu) < gtol:
                 break
 
             else:
                 print("Updating lambda")
-                lmbda = lmbda - mu*cv
-                ctolk = ctolk/(mu**(0.9))
-                gtolk = gtolk/(mu)
+                lmbda = lmbda - mu * cv
+                ctolk = ctolk / (mu ** (0.9))
+                gtolk = gtolk / (mu)
         else:
             mu = 5.0 * mu
-            ctolk = ctolk/(mu**(0.1))
-            gtolk = gtolk/mu
-        
-        
+            ctolk = ctolk / (mu ** (0.1))
+            gtolk = gtolk / mu
+
         iteration = iteration + 1
         xold = x
         fold = f
-        
-        print("mu is now " + str(mu)) 
-        print("The average c is " + str(np.mean(cv)))
-        print("The max c is " + str(np.max(cv)))
-        print("The objective function is " + str(np.linalg.norm(f)))
-        print("The constraints are " + str(np.linalg.norm(cv)))
-        penalty = mu*np.dot(cv,cv)/2
-        print("The penalty term is " + str(penalty))
-        print("The aspect ratio constraint is " + str(cv[-1]))
-        print("The iteration is " + str(iteration))
-    
-    g = gradL(x,lmbda,mu)
+
+    g = gradL(x, lmbda, mu)
     f = fun(recover(x))
     success = True
     message = "successful"
diff --git a/desc/optimize/aug_lagrangian_ls_stel.py b/desc/optimize/aug_lagrangian_ls_stel.py
index 8cd911b37c..e7054ec009 100644
--- a/desc/optimize/aug_lagrangian_ls_stel.py
+++ b/desc/optimize/aug_lagrangian_ls_stel.py
@@ -1,5 +1,4 @@
 #!/usr/bin/env python3
-# -*- coding: utf-8 -*-
 """
 Created on Wed Jul 13 10:39:48 2022
 
@@ -7,33 +6,17 @@
 """
 
 import numpy as np
-from termcolor import colored
-from desc.backend import jnp
-from .utils import (
-    check_termination,
-    print_header_nonlinear,
-    print_iteration_nonlinear,
-    STATUS_MESSAGES,
-    compute_jac_scale,
-    evaluate_quadratic_form_jac,
-)
-
-from .tr_subproblems import (
-    solve_trust_region_dogleg,
-    solve_trust_region_2d_subspace,
-    update_tr_radius,
-)
-from scipy.optimize import OptimizeResult, least_squares
-
-from desc.optimize.fmin_scalar import fmintr
-from desc.optimize.least_squares import lsqtr
+from scipy.optimize import OptimizeResult
 
+from desc.backend import jnp
 from desc.derivatives import Derivative
 from desc.objectives._auglagrangian import AugLagrangianLS
-    
+from desc.optimize.least_squares import lsqtr
+
+
+def conv_test(x, L, gL):
+    return np.linalg.norm(jnp.dot(gL.T, L))
 
-def conv_test(x,L,gL):
-    return np.linalg.norm(jnp.dot(gL.T,L))
 
 def fmin_lag_ls_stel(
     fun,
@@ -64,98 +47,98 @@ def fmin_lag_ls_stel(
     nfev += 1
     g = grad(x, *args)
     ngev += 1
-    
+
     eq = eq_constr(x) if eq_constr is not None else jnp.array([])
     ineq = ineq_constr(x) if ineq_constr is not None else jnp.array([])
     eq_dim = len(eq.flatten())
     ineq_dim = len(ineq.flatten())
-    x = np.append(x,1.0*np.ones(ineq_dim))
-    
+    x = np.append(x, 1.0 * np.ones(ineq_dim))
+
     if maxiter is None:
         maxiter = N * 100
 
-    mu = options.pop("mu", 10*jnp.ones(eq_dim + ineq_dim))
-    lmbda = options.pop("lmbda", 10*jnp.ones(eq_dim + ineq_dim))
+    mu = options.pop("mu", 10 * jnp.ones(eq_dim + ineq_dim))
+    lmbda = options.pop("lmbda", 10 * jnp.ones(eq_dim + ineq_dim))
     bounds = options.pop("bounds", jnp.zeros(eq_dim + ineq_dim))
-    
+
     def recover(x):
-        return x[0:len(x)-ineq_dim]
-    
+        return x[0 : len(x) - ineq_dim]
+
     def wrapped_constraint(x):
         c = np.array([])
-        slack = x[len(recover(x)):]**2
+        slack = x[len(recover(x)) :] ** 2
         x_recov = recover(x)
         eq = eq_constr(x_recov) if eq_constr is not None else jnp.array([])
         ineq = ineq_constr(x_recov) if ineq_constr is not None else jnp.array([])
 
-        c = jnp.append(c,eq_constr(x_recov))
-        slack = jnp.append(jnp.zeros(len(eq.flatten())),slack)
-        c = jnp.append(c,ineq)
+        c = jnp.append(c, eq_constr(x_recov))
+        slack = jnp.append(jnp.zeros(len(eq.flatten())), slack)
+        c = jnp.append(c, ineq)
         c = c - bounds + slack
         return c
-    
+
     def wrapped_obj(x):
         return fun(recover(x))
-        
-    constr = np.array([wrapped_constraint])         
-    print("constraint is " + str(np.mean(wrapped_constraint(x))))
+
+    constr = np.array([wrapped_constraint])
     L = AugLagrangianLS(wrapped_obj, constr)
-    gradL = Derivative(L.compute,0,"fwd")
-    hessL = Derivative(L.compute,argnum=0,mode="hess")
-    
-    gtolk = 1/(10*np.linalg.norm(mu))
-    ctolk = 1/(np.linalg.norm(mu)**(0.1))    
+    gradL = Derivative(L.compute, 0, "fwd")
+
+    gtolk = 1 / (10 * np.linalg.norm(mu))
+    ctolk = 1 / (np.linalg.norm(mu) ** (0.1))
     xold = x
     f = fun(recover(x))
     fold = f
-    
+
     while iteration < maxiter:
-        xk = lsqtr(L.compute,x,gradL,args=(lmbda,mu,),gtol=gtolk,maxiter=10,verbose=2)
+        xk = lsqtr(
+            L.compute,
+            x,
+            gradL,
+            args=(
+                lmbda,
+                mu,
+            ),
+            gtol=gtolk,
+            maxiter=10,
+            verbose=2,
+        )
 
-        x = xk['x']
+        x = xk["x"]
         f = fun(recover(x))
         cv = L.compute_constraints(x)
         c = np.max(cv)
-        
+
         if np.linalg.norm(xold - x) < xtol:
             print("xtol satisfied\n")
-            break         
+            break
 
-        if (np.linalg.norm(f) - np.linalg.norm(fold))/np.linalg.norm(fold) > 0.1:
+        if (np.linalg.norm(f) - np.linalg.norm(fold)) / np.linalg.norm(fold) > 0.1:
             mu = mu / 2
             print("Decreasing mu. mu is now " + str(np.mean(mu)))
-        
+
         elif c < ctolk:
-            if c < ctol and conv_test(x,L.compute(x,lmbda,mu),gradL(x,lmbda,mu)) < gtol:
+            if (
+                c < ctol
+                and conv_test(x, L.compute(x, lmbda, mu), gradL(x, lmbda, mu)) < gtol
+            ):
                 break
 
             else:
                 print("Updating lambda")
-                lmbda = lmbda - mu*cv
-                ctolk = ctolk/(np.max(mu)**(0.9))
-                gtolk = gtolk/(np.max(mu))
+                lmbda = lmbda - mu * cv
+                ctolk = ctolk / (np.max(mu) ** (0.9))
+                gtolk = gtolk / (np.max(mu))
         else:
             mu = 5.0 * mu
-            ctolk = ctolk/(np.max(mu)**(0.1))
-            gtolk = gtolk/np.max(mu)
-        
-        
+            ctolk = ctolk / (np.max(mu) ** (0.1))
+            gtolk = gtolk / np.max(mu)
+
         iteration = iteration + 1
         xold = x
         fold = f
-        
-        print("mu is now " + str(mu)) 
-        print("The average c is " + str(np.mean(cv)))
-        print("The max c is " + str(np.max(cv)))
-        print("The objective function is " + str(np.linalg.norm(f)))
-        print("The constraints are " + str(np.linalg.norm(cv)))
-        penalty = mu[0]*np.dot(cv,cv)/2
-        print("The penalty term is " + str(penalty))
-        print("The aspect ratio constraint is " + str(cv[-1]))
-        print("x is " + str(recover(x)))
-        print("The iteration is " + str(iteration))
-    
-    g = gradL(x,lmbda,mu)
+
+    g = gradL(x, lmbda, mu)
     f = fun(recover(x))
     success = True
     message = "successful"
diff --git a/desc/optimize/optimizer.py b/desc/optimize/optimizer.py
index e648b3b9e3..8978ede6d6 100644
--- a/desc/optimize/optimizer.py
+++ b/desc/optimize/optimizer.py
@@ -8,26 +8,27 @@
 from desc.backend import jnp
 from desc.io import IOAble
 from desc.objectives import (
+    AspectRatio,
     CurrentDensity,
     FixCurrent,
     FixIota,
     ForceBalance,
     HelicalForceBalance,
+    MagneticWell,
     ObjectiveFunction,
     RadialForceBalance,
     WrappedEquilibriumObjective,
-    AspectRatio,
-    MagneticWell
 )
 from desc.objectives.utils import factorize_linear_constraints
 from desc.utils import Timer
 
 from ._scipy_wrappers import _optimize_scipy_least_squares, _optimize_scipy_minimize
+from .aug_lagrangian import fmin_lag_stel
+from .aug_lagrangian_ls_stel import fmin_lag_ls_stel
 from .fmin_scalar import fmintr
 from .least_squares import lsqtr
 from .stochastic import sgd
-from .aug_lagrangian_ls_stel import fmin_lag_ls_stel
-from .aug_lagrangian import fmin_lag_stel
+
 
 class Optimizer(IOAble):
     """A helper class to wrap several optimization routines.
@@ -113,7 +114,6 @@ class Optimizer(IOAble):
         + _desc_stochastic_methods
         + _desc_constrained_least_squares_methods
         + _desc_constrained_scalar_methods
-
     )
 
     def __init__(self, method):
@@ -227,7 +227,12 @@ def optimize(  # noqa: C901 - FIXME: simplify this
                 options.setdefault("initial_trust_radius", 0.5)
                 options.setdefault("max_trust_radius", 1.0)
 
-        linear_constraints, nonlinear_constraints, equality_constraints, inequality_constraints = _parse_constraints(constraints)
+        (
+            linear_constraints,
+            nonlinear_constraints,
+            equality_constraints,
+            inequality_constraints,
+        ) = _parse_constraints(constraints)
         # wrap nonlinear constraints if necessary
         wrapped = False
         if len(nonlinear_constraints) > 0 and (
@@ -239,17 +244,25 @@ def optimize(  # noqa: C901 - FIXME: simplify this
             )
 
         elif self.method in Optimizer._constrained_methods:
-            
+
             wrapped = False
-            constraint_objectives, equality_objectives, inequality_objectives = _wrap_constraints(nonlinear_constraints, equality_constraints, inequality_constraints)
+            (
+                constraint_objectives,
+                equality_objectives,
+                inequality_objectives,
+            ) = _wrap_constraints(
+                self.method,
+                nonlinear_constraints,
+                equality_constraints,
+                inequality_constraints,
+            )
 
             if not constraint_objectives.built:
-                constraint_objectives.build(eq,verbose=verbose)
+                constraint_objectives.build(eq, verbose=verbose)
             if not equality_objectives.built and len(equality_constraints) != 0:
-                equality_objectives.build(eq,verbose=verbose)
+                equality_objectives.build(eq, verbose=verbose)
             if not inequality_objectives.built and len(inequality_constraints) != 0:
-                inequality_objectives.build(eq,verbose=verbose)
-           
+                inequality_objectives.build(eq, verbose=verbose)
 
         if not objective.built:
             objective.build(eq, verbose=verbose)
@@ -267,7 +280,6 @@ def optimize(  # noqa: C901 - FIXME: simplify this
             if len(equality_constraints) != 0:
                 objective.combine_args(equality_objectives)
 
-
         if objective.scalar and (self.method in Optimizer._least_squares_methods):
             warnings.warn(
                 colored(
@@ -295,7 +307,12 @@ def optimize(  # noqa: C901 - FIXME: simplify this
             (
                 compute_eq_constraints_wrapped,
                 compute_ineq_constraints_wrapped,
-            ) = _wrap_constraint_objectives(objective, linear_constraints, equality_objectives, inequality_objectives)
+            ) = _wrap_constraint_objectives(
+                objective,
+                linear_constraints,
+                equality_objectives,
+                inequality_objectives,
+            )
 
         timer.stop("linear constraint factorize")
         if verbose > 1:
@@ -421,9 +438,17 @@ def optimize(  # noqa: C901 - FIXME: simplify this
             )
 
         elif self.method in Optimizer._desc_constrained_least_squares_methods:
-            
-            eq_constr = compute_eq_constraints_wrapped if len(equality_constraints) != 0 else None
-            ineq_constr = compute_ineq_constraints_wrapped if len(inequality_constraints) !=0 else None
+
+            eq_constr = (
+                compute_eq_constraints_wrapped
+                if len(equality_constraints) != 0
+                else None
+            )
+            ineq_constr = (
+                compute_ineq_constraints_wrapped
+                if len(inequality_constraints) != 0
+                else None
+            )
             result = fmin_lag_ls_stel(
                 compute_wrapped,
                 x0=x0_reduced,
@@ -442,9 +467,17 @@ def optimize(  # noqa: C901 - FIXME: simplify this
             )
 
         elif self.method in Optimizer._desc_constrained_scalar_methods:
-            
-            eq_constr = compute_eq_constraints_wrapped if len(equality_constraints) != 0 else None
-            ineq_constr = compute_ineq_constraints_wrapped if len(inequality_constraints) !=0 else None
+
+            eq_constr = (
+                compute_eq_constraints_wrapped
+                if len(equality_constraints) != 0
+                else None
+            )
+            ineq_constr = (
+                compute_ineq_constraints_wrapped
+                if len(inequality_constraints) != 0
+                else None
+            )
             result = fmin_lag_stel(
                 compute_scalar_wrapped,
                 x0=x0_reduced,
@@ -462,7 +495,6 @@ def optimize(  # noqa: C901 - FIXME: simplify this
                 options=options,
             )
 
-
         if wrapped:
             result["history"] = objective.history
         else:
@@ -512,7 +544,12 @@ def _parse_constraints(constraints):
             "Toroidal current and rotational transform cannot be "
             + "constrained simultaneously."
         )
-    return linear_constraints, nonlinear_constraints, equality_constraints, inequality_constraints
+    return (
+        linear_constraints,
+        nonlinear_constraints,
+        equality_constraints,
+        inequality_constraints,
+    )
 
 
 def _wrap_objective_with_constraints(objective, linear_constraints, method):
@@ -558,7 +595,10 @@ def jac_wrapped(x_reduced):
         recover,
     )
 
-def _wrap_constraint_objectives(objective, linear_constraints, equality_objectives, inequality_objectives):
+
+def _wrap_constraint_objectives(
+    objective, linear_constraints, equality_objectives, inequality_objectives
+):
     _, _, _, _, Z, unfixed_idx, project, recover = factorize_linear_constraints(
         linear_constraints, objective.args
     )
@@ -567,17 +607,18 @@ def compute_eq_constraints_wrapped(x_reduced):
         x = recover(x_reduced)
         c = equality_objectives.compute(x)
         return c
-            
+
     def compute_ineq_constraints_wrapped(x_reduced):
         x = recover(x_reduced)
         c = inequality_objectives.compute(x)
         return c
-    
+
     return (
         compute_eq_constraints_wrapped,
         compute_ineq_constraints_wrapped,
     )
 
+
 def _wrap_nonlinear_constraints(objective, nonlinear_constraints, method, options):
     """Use WrappedEquilibriumObjective to hanle nonlinear equilibrium constraints."""
     for constraint in nonlinear_constraints:
@@ -607,7 +648,10 @@ def _wrap_nonlinear_constraints(objective, nonlinear_constraints, method, option
     )
     return objective
 
-def _wrap_constraints(nonlinear_constraints, equality_constraints, inequality_constraints):
+
+def _wrap_constraints(
+    method, nonlinear_constraints, equality_constraints, inequality_constraints
+):
     for constraint in nonlinear_constraints:
         if not isinstance(
             constraint,
@@ -617,20 +661,21 @@ def _wrap_constraints(nonlinear_constraints, equality_constraints, inequality_co
                 HelicalForceBalance,
                 CurrentDensity,
                 MagneticWell,
-                AspectRatio
+                AspectRatio,
             ),
         ):
             raise ValueError(
-                "optimizer method {} ".format(self.method)
+                "optimizer method {} ".format(method)
                 + "cannot handle general nonlinear constraint {}.".format(constraint)
-        )
+            )
 
     constraint_objectives = ObjectiveFunction(nonlinear_constraints)
     equality_objectives = ObjectiveFunction(equality_constraints)
     inequality_objectives = ObjectiveFunction(inequality_constraints)
 
     return constraint_objectives, equality_objectives, inequality_objectives
- 
+
+
 def _get_default_tols(
     method,
     ftol=None,

From ac44f1826858ffe13d80203b338aa438050352e6 Mon Sep 17 00:00:00 2001
From: "Patrick S. Kim" <pk2354@stellar-intel.princeton.edu>
Date: Tue, 31 Jan 2023 13:18:35 -0500
Subject: [PATCH 07/41] added hessian to subproblem. stalsl immediately though.

---
 desc/optimize/aug_lagrangian.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/desc/optimize/aug_lagrangian.py b/desc/optimize/aug_lagrangian.py
index 17e6cf62eb..877fa77eda 100644
--- a/desc/optimize/aug_lagrangian.py
+++ b/desc/optimize/aug_lagrangian.py
@@ -83,6 +83,7 @@ def wrapped_obj(x):
     constr = np.array([wrapped_constraint])
     L = AugLagrangian(wrapped_obj, constr)
     gradL = Derivative(L.compute, 0, "fwd")
+    hessL = Derivative(L.compute, argnum=0, mode="hess")
 
     gtolk = 1 / (10 * mu)
     ctolk = 1 / (mu ** (0.1))
@@ -95,7 +96,8 @@ def wrapped_obj(x):
         xk = fmintr(
             L.compute,
             x,
-            gradL,
+            grad=gradL,
+            hess=hessL,
             args=(
                 lmbda,
                 mu,

From f26c0ba52626918dd4423e9048796a0afdf8b06c Mon Sep 17 00:00:00 2001
From: "Patrick S. Kim" <pk2354@stellar-intel.princeton.edu>
Date: Mon, 13 Feb 2023 10:14:07 -0500
Subject: [PATCH 08/41] minor tuning of scalar aug lagrangian.

---
 desc/objectives/_auglagrangian.py |  4 +++
 desc/optimize/aug_lagrangian.py   | 55 ++++++++++++++++++++-----------
 2 files changed, 40 insertions(+), 19 deletions(-)

diff --git a/desc/objectives/_auglagrangian.py b/desc/objectives/_auglagrangian.py
index d846d61a55..16770906da 100644
--- a/desc/objectives/_auglagrangian.py
+++ b/desc/objectives/_auglagrangian.py
@@ -1,3 +1,4 @@
+import jax
 import numpy as np
 
 from desc.backend import jnp
@@ -66,6 +67,9 @@ def derivatives(self):
     def compute(self, x, lmbda, mu):
         L = self.func(x)
         c = self.compute_constraints(x)
+        # jax.debug.print("lmbda term is " + str(jnp.dot(lmbda,c)))
+        # jax.debug.print("mu term is " + str(mu/2*jnp.dot(c,c)))
+        # jax.debug.print("L is " + str(L))
         return L - jnp.dot(lmbda, c) + mu / 2 * jnp.dot(c, c)
 
     def compute_scalar(self, x, lmbda, mu):
diff --git a/desc/optimize/aug_lagrangian.py b/desc/optimize/aug_lagrangian.py
index 877fa77eda..a54afa00d6 100644
--- a/desc/optimize/aug_lagrangian.py
+++ b/desc/optimize/aug_lagrangian.py
@@ -6,7 +6,7 @@
 """
 
 import numpy as np
-from scipy.optimize import OptimizeResult
+from scipy.optimize import OptimizeResult, minimize
 
 from desc.backend import jnp
 from desc.derivatives import Derivative
@@ -57,8 +57,8 @@ def fmin_lag_stel(
     if maxiter is None:
         maxiter = N * 100
 
-    mu = options.pop("mu", 0.1)
-    lmbda = options.pop("lmbda", 0.01 * jnp.ones(eq_dim + ineq_dim))
+    mu = options.pop("mu", 10000.0)
+    lmbda = options.pop("lmbda", 1.0 * jnp.ones(eq_dim + ineq_dim))
     bounds = options.pop("bounds", jnp.zeros(eq_dim + ineq_dim))
 
     def recover(x):
@@ -91,35 +91,52 @@ def wrapped_obj(x):
     f = fun(recover(x))
     fold = f
     cv = L.compute_constraints(x)
+    lmbda = lmbda - mu * cv
+    print("f is " + str(f))
+    print("lmbda term is " + str(np.dot(lmbda, cv)))
+    print("mu term is " + str(mu / 2 * np.dot(cv, cv)))
 
     while iteration < maxiter:
-        xk = fmintr(
+        # xk = fmintr(
+        #    L.compute,
+        #    x,
+        #    grad=gradL,
+        #    hess=hessL,
+        #    args=(
+        #        lmbda,
+        #        mu,
+        #    ),
+        #    gtol=gtolk,
+        #    maxiter=20,
+        #    verbose=2,
+        # )
+        xk = minimize(
             L.compute,
             x,
-            grad=gradL,
+            args=(lmbda, mu),
+            method="trust-exact",
+            jac=gradL,
             hess=hessL,
-            args=(
-                lmbda,
-                mu,
-            ),
-            gtol=gtolk,
-            maxiter=20,
-            verbose=2,
+            options={"maxiter": 20},
         )
         x = xk["x"]
         f = fun(recover(x))
+        print("f is " + str(f))
         cv = L.compute_constraints(x)
-        c = np.max(cv)
+        print("lmbda term is " + str(np.dot(lmbda, cv)))
+        print("mu term is " + str(mu / 2 * np.dot(cv, cv)))
+        print("\n")
+        c = np.linalg.norm(cv)
 
         if np.linalg.norm(xold - x) < xtol:
             print("xtol satisfied\n")
             break
 
-        if (np.linalg.norm(f) - np.linalg.norm(fold)) / np.linalg.norm(fold) > 0.1:
-            mu = mu / 2
-            print("Decreasing mu. mu is now " + str(np.mean(mu)))
+        # if (np.linalg.norm(f) - np.linalg.norm(fold)) / np.linalg.norm(fold) > 0.1:
+        #    mu = mu / 2
+        #    print("Decreasing mu. mu is now " + str(np.mean(mu)))
 
-        elif c < ctolk:
+        if c < ctolk:
             if c < ctol and gradL(x, lmbda, mu) < gtol:
                 break
 
@@ -129,8 +146,8 @@ def wrapped_obj(x):
                 ctolk = ctolk / (mu ** (0.9))
                 gtolk = gtolk / (mu)
         else:
-            mu = 5.0 * mu
-            ctolk = ctolk / (mu ** (0.1))
+            mu = 10 * mu
+            ctolk = c / (mu ** (0.1))
             gtolk = gtolk / mu
 
         iteration = iteration + 1

From cd87f0131a6ebc773035f15375497b44f559e88f Mon Sep 17 00:00:00 2001
From: "Patrick S. Kim" <pk2354@stellar-intel.princeton.edu>
Date: Wed, 8 Mar 2023 14:41:21 -0500
Subject: [PATCH 09/41] more tuning of scalar aug lagrangian.

---
 desc/optimize/aug_lagrangian.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/desc/optimize/aug_lagrangian.py b/desc/optimize/aug_lagrangian.py
index a54afa00d6..c2a5cd8be2 100644
--- a/desc/optimize/aug_lagrangian.py
+++ b/desc/optimize/aug_lagrangian.py
@@ -57,8 +57,8 @@ def fmin_lag_stel(
     if maxiter is None:
         maxiter = N * 100
 
-    mu = options.pop("mu", 10000.0)
-    lmbda = options.pop("lmbda", 1.0 * jnp.ones(eq_dim + ineq_dim))
+    mu = options.pop("mu", 1.0)
+    lmbda = options.pop("lmbda", 0.0 * jnp.ones(eq_dim + ineq_dim))
     bounds = options.pop("bounds", jnp.zeros(eq_dim + ineq_dim))
 
     def recover(x):
@@ -82,19 +82,21 @@ def wrapped_obj(x):
 
     constr = np.array([wrapped_constraint])
     L = AugLagrangian(wrapped_obj, constr)
-    gradL = Derivative(L.compute, 0, "fwd")
+    gradL = Derivative(L.compute, 0, "rev")
     hessL = Derivative(L.compute, argnum=0, mode="hess")
 
     gtolk = 1 / (10 * mu)
-    ctolk = 1 / (mu ** (0.1))
+    c = np.linalg.norm(L.compute_constraints(x))
+    ctolk = c / (mu ** (0.1))
     xold = x
     f = fun(recover(x))
     fold = f
     cv = L.compute_constraints(x)
-    lmbda = lmbda - mu * cv
+    # lmbda = lmbda - mu * cv
     print("f is " + str(f))
     print("lmbda term is " + str(np.dot(lmbda, cv)))
     print("mu term is " + str(mu / 2 * np.dot(cv, cv)))
+    print("The constraints are " + str(c))
 
     while iteration < maxiter:
         # xk = fmintr(
@@ -123,6 +125,7 @@ def wrapped_obj(x):
         f = fun(recover(x))
         print("f is " + str(f))
         cv = L.compute_constraints(x)
+        print("The constraints are " + str(np.linalg.norm(cv)))
         print("lmbda term is " + str(np.dot(lmbda, cv)))
         print("mu term is " + str(mu / 2 * np.dot(cv, cv)))
         print("\n")
@@ -146,7 +149,7 @@ def wrapped_obj(x):
                 ctolk = ctolk / (mu ** (0.9))
                 gtolk = gtolk / (mu)
         else:
-            mu = 10 * mu
+            mu = 2 * mu
             ctolk = c / (mu ** (0.1))
             gtolk = gtolk / mu
 

From 3c51f6981fc12149206cc11a48590777610b6682 Mon Sep 17 00:00:00 2001
From: "Patrick S. Kim" <pk2354@stellar-intel.princeton.edu>
Date: Sun, 23 Apr 2023 17:34:38 -0400
Subject: [PATCH 10/41] beginning implementation of inequality constraints and
 merging with rc/bounded.

---
 desc/optimize/_desc_wrappers.py         |  34 +++++--
 desc/optimize/aug_lagrangian_ls_stel.py |  14 +--
 desc/optimize/utils.py                  | 120 +++++++++++++++++++++++-
 3 files changed, 153 insertions(+), 15 deletions(-)

diff --git a/desc/optimize/_desc_wrappers.py b/desc/optimize/_desc_wrappers.py
index e755114c24..f265dfdd98 100644
--- a/desc/optimize/_desc_wrappers.py
+++ b/desc/optimize/_desc_wrappers.py
@@ -8,6 +8,7 @@
 from .least_squares import lsqtr
 from .optimizer import register_optimizer
 from .stochastic import sgd
+from .utils import inequality_to_bounds
 
 
 @register_optimizer(
@@ -69,15 +70,31 @@ def _optimize_desc_aug_lagrangian_least_squares(
     x_scale = 1 if x_scale == "auto" else x_scale
     if isinstance(x_scale, str):
         raise ValueError(f"Method {method} does not support x_scale type {x_scale}")
-
+    bounds = (-jnp.inf, jnp.inf)
     if constraint is not None:
-        constraint_wrapped = DESCNonlinearConstraint(
+        (
+            z0,
+            fun_wrapped,
+            grad_wrapped,
+            hess_wrapped,
+            constraint_wrapped,
+            zbounds,
+            z2xs,
+        ) = inequality_to_bounds(
+            x0,
+            objective.compute_scaled,
+            objective.grad,
+            objective.hess,
             constraint,
+            bounds,
         )
-        if constraint_wrapped._equality_constraint:
-            objective.combine_args(constraint_wrapped._equality_constraint)
-        if constraint_wrapped._inequality_constraint:
-            objective.combine_args(constrained_wrapped._inequality_constraint)
+    #        constraint_wrapped = DESCNonlinearConstraint(
+    #            constraint,
+    #        )
+    #        if constraint_wrapped._equality_constraint:
+    #            objective.combine_args(constraint_wrapped._equality_constraint)
+    #        if constraint_wrapped._inequality_constraint:
+    #            objective.combine_args(constrained_wrapped._inequality_constraint)
     else:
         constraint_wrapped = None
     # need to use some "global" variables here
@@ -92,9 +109,10 @@ def _optimize_desc_aug_lagrangian_least_squares(
     success = [None]
 
     result = fmin_lag_ls_stel(
-        objective.compute_scaled,
+        fun_wrapped,
         constraint_wrapped,
-        x0=x0,
+        x0=z0,
+        bounds=zbounds,
         args=(),
         x_scale=x_scale,
         ftol=stoptol["ftol"],
diff --git a/desc/optimize/aug_lagrangian_ls_stel.py b/desc/optimize/aug_lagrangian_ls_stel.py
index 6e39baeeba..9136b79c9b 100644
--- a/desc/optimize/aug_lagrangian_ls_stel.py
+++ b/desc/optimize/aug_lagrangian_ls_stel.py
@@ -22,6 +22,7 @@ def fmin_lag_ls_stel(
     fun,
     constraint,
     x0,
+    bounds,
     args=(),
     x_scale=1,
     ftol=1e-6,
@@ -48,14 +49,14 @@ def fmin_lag_ls_stel(
     lmbda = options.pop("lmbda", 10 * jnp.ones(constraint.dim_f()))
     x = np.append(x, 1.0 * np.ones(constraint._ineq_dim))
 
-    def recover(x):
-        return x[0 : len(x) - constraint._ineq_dim]
+    #    def recover(x):
+    #        return x[0 : len(x) - constraint._ineq_dim]
 
-    def wrapped_obj(x):
-        return fun(recover(x))
+    #    def wrapped_obj(x):
+    #        return fun(recover(x))
 
     constr = np.array([constraint])
-    L = AugLagrangianLS(wrapped_obj, constr)
+    L = AugLagrangianLS(fun, constr)
     gradL = Derivative(L.compute, 0, "fwd")
 
     gtolk = 1 / (10 * np.linalg.norm(mu))
@@ -73,13 +74,14 @@ def wrapped_obj(x):
                 lmbda,
                 mu,
             ),
+            bounds=bounds,
             gtol=gtolk,
             maxiter=10,
             verbose=2,
         )
 
         x = xk["x"]
-        f = fun(recover(x))
+        f = fun(x)
         cv = L.compute_constraints(x)
         c = np.max(cv)
 
diff --git a/desc/optimize/utils.py b/desc/optimize/utils.py
index cb95e1a708..c9e0cfed70 100644
--- a/desc/optimize/utils.py
+++ b/desc/optimize/utils.py
@@ -2,7 +2,125 @@
 
 import numpy as np
 
-from desc.backend import cond, jit, jnp
+from desc.backend import cond, jit, jnp, put
+
+
+def inequality_to_bounds(x0, fun, grad, hess, constraint, bounds):
+    """Convert inequality constraints to bounds using slack variables.
+
+    We do this by introducing slack variables s
+
+    ie, lb < con(x) < ub --> con(x) - s == 0, lb < s < ub
+
+    A new state vector z is defined as [x, s] and the problem
+    is transformed into one that has only equality constraints
+    and simple bounds on the variables z
+
+    Parameters
+    ----------
+    x0 : ndarray
+        Starting point for primal variables
+    fun, grad, hess : callable
+        Functions for computing the objective and derivatives
+    constraint : scipy.optimize.NonlinearConstraint
+        constraint object of both equality and inequality constraints
+    bounds : tuple
+        lower and upper bounds for primal variables x
+
+    Returns
+    -------
+    z0 : ndarray
+        Starting point for primal + slack variables
+    fun, grad, hess : callable
+        functions for computing objective and derivatives wrt z
+    constraint : scipy.optimize.NonlinearConstraint
+        constraint containing just equality constraints
+    bounds : tuple
+        lower and upper bounds on combined variable z
+    z2xs : callable
+        function for splitting combined variable z into primal
+        and slack variables x and s
+
+    """
+
+    c0 = constraint.fun(x0)
+    ncon = c0.size
+    constraint.lb = jnp.broadcast_to(constraint.lb, ncon)
+    constraint.ub = jnp.broadcast_to(constraint.ub, ncon)
+    bounds = tuple(jnp.broadcast_to(bi, x0.shape) for bi in bounds)
+
+    lbs, ubs = constraint.lb, constraint.ub
+    lbx, ubx = bounds
+
+    ineq_mask = lbs == ubs
+    eq_mask = lbs != ubs
+    eq_target = lbs[~ineq_mask]
+    nslack = jnp.sum(ineq_mask)
+    zbounds = (
+        jnp.concatenate([lbx, lbs[ineq_mask]]),
+        jnp.concatenate([ubx, ubs[ineq_mask]]),
+    )
+    s0 = c0[ineq_mask]
+    s0 = jnp.clip(s0, lbs[ineq_mask], ubs[ineq_mask])
+    z0 = jnp.concatenate([x0, s0])
+    target = jnp.zeros(c0.size)
+    target = put(target, eq_mask, eq_target)
+
+    def z2xs(z):
+        return z[:nslack], z[nslack:]
+
+    def fun_wrapped(z):
+        x, s = z2xs(z)
+        return fun(x)
+
+    def grad_wrapped(z):
+        x, s = z2xs(z)
+        g = grad(x)
+        return jnp.concatenate([g, jnp.zeros(nslack)])
+
+    if callable(hess):
+
+        def hess_wrapped(z):
+            x, s = z2xs(z)
+            H = hess(x)
+            return jnp.pad(H, (0, nslack))
+
+    else:  # using BFGS
+        hess_wrapped = hess
+
+    def confun_wrapped(z):
+        x, s = z2xs(z)
+        c = constraint.fun(x)
+        sbig = jnp.zeros(ncon)
+        sbig = put(sbig, ineq_mask, s)
+        return c - sbig - target
+
+    def conjac_wrapped(z):
+        x, s = z2xs(z)
+        J = constraint.jac(x)
+        I = jnp.eye(nslack)
+        Js = jnp.zeros((ncon, nslack))
+        Js = put(Js, Index[ineq_mask, :], -I)
+        return jnp.hstack([J, Js])
+
+    if callable(constraint.hess):
+
+        def conhess_wrapped(z, y):
+            x, s = z2xs(z)
+            H = constraint.hess(x, y)
+            return jnp.pad(H, (0, nslack))
+
+    else:  # using BFGS
+        conhess_wrapped = constraint.hess
+
+    newcon = copy.copy(constraint)
+    newcon.fun = confun_wrapped
+    newcon.jac = conjac_wrapped
+    newcon.hess = conhess_wrapped
+    newcon.lb = target
+    newcon.ub = target
+
+    return z0, fun_wrapped, grad_wrapped, hess_wrapped, newcon, zbounds, z2xs
 
 
 @jit

From 518efc33bb5b3ad1f2f4f46fc2d3319851540c3a Mon Sep 17 00:00:00 2001
From: "Patrick S. Kim" <pk2354@stellar-intel.princeton.edu>
Date: Tue, 25 Apr 2023 04:37:04 -0400
Subject: [PATCH 11/41] integrated Rory's ineq constraint to bounds function.
 tested for eq constrained qs problem.

---
 desc/objectives/_auglagrangian.py       |  2 +-
 desc/optimize/_desc_wrappers.py         |  3 +++
 desc/optimize/aug_lagrangian_ls_stel.py | 14 ++++++++------
 desc/optimize/utils.py                  | 19 +++++++++++--------
 4 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/desc/objectives/_auglagrangian.py b/desc/objectives/_auglagrangian.py
index 3f9e1fe6c3..e42311600b 100644
--- a/desc/objectives/_auglagrangian.py
+++ b/desc/objectives/_auglagrangian.py
@@ -46,7 +46,7 @@ def callback(self, x, lmbda, mu):
     def compute_constraints(self, x):
         c = jnp.array([])
         for i in range(len(self.constr)):
-            c = jnp.concatenate((c, self.constr[i].compute(x)), axis=None)
+            c = jnp.concatenate((c, self.constr[i].fun(x)), axis=None)
         return c
 
 
diff --git a/desc/optimize/_desc_wrappers.py b/desc/optimize/_desc_wrappers.py
index 661c2b0f48..c9fb197c41 100644
--- a/desc/optimize/_desc_wrappers.py
+++ b/desc/optimize/_desc_wrappers.py
@@ -123,6 +123,9 @@ def _optimize_desc_aug_lagrangian_least_squares(
         callback=None,
         options=options,
     )
+    x, s = z2xs(result["x"])
+    result["x"] = x
+    result["allx"] = [x]
     return result
 
 
diff --git a/desc/optimize/aug_lagrangian_ls_stel.py b/desc/optimize/aug_lagrangian_ls_stel.py
index 9136b79c9b..93197bd6bb 100644
--- a/desc/optimize/aug_lagrangian_ls_stel.py
+++ b/desc/optimize/aug_lagrangian_ls_stel.py
@@ -43,11 +43,12 @@ def fmin_lag_ls_stel(
     N = x0.size
     x = x0.copy()
     f = fun(x, *args)
+    c = constraint.fun(x)
     nfev += 1
 
-    mu = options.pop("mu", 10 * jnp.ones(constraint.dim_f()))
-    lmbda = options.pop("lmbda", 10 * jnp.ones(constraint.dim_f()))
-    x = np.append(x, 1.0 * np.ones(constraint._ineq_dim))
+    mu = options.pop("mu", 10 * jnp.ones(len(c)))
+    lmbda = options.pop("lmbda", 10 * jnp.ones(len(c)))
+    #    x = np.append(x, 1.0 * np.ones(constraint._ineq_dim))
 
     #    def recover(x):
     #        return x[0 : len(x) - constraint._ineq_dim]
@@ -62,7 +63,7 @@ def fmin_lag_ls_stel(
     gtolk = 1 / (10 * np.linalg.norm(mu))
     ctolk = 1 / (np.linalg.norm(mu) ** (0.1))
     xold = x
-    f = fun(recover(x))
+    #    f = fun(recover(x))
     fold = f
 
     while iteration < maxiter:
@@ -115,7 +116,8 @@ def fmin_lag_ls_stel(
         fold = f
 
     g = gradL(x, lmbda, mu)
-    f = fun(recover(x))
+    # f = fun(recover(x))
+    f = fun(x)
     success = True
     message = "successful"
     result = OptimizeResult(
@@ -130,5 +132,5 @@ def fmin_lag_ls_stel(
         nit=iteration,
         message=message,
     )
-    result["allx"] = [recover(x)]
+    # result["allx"] = [recover(x)]
     return result
diff --git a/desc/optimize/utils.py b/desc/optimize/utils.py
index 989a37c608..08ea4549f4 100644
--- a/desc/optimize/utils.py
+++ b/desc/optimize/utils.py
@@ -1,5 +1,7 @@
 """Utility functions used in optimization problems."""
 
+import copy
+
 import numpy as np
 
 from desc.backend import cond, jit, jnp, put
@@ -43,19 +45,20 @@ def inequality_to_bounds(x0, fun, grad, hess, constraint, bounds):
 
     """
 
-    c0 = constraint.fun(x0)
+    c0 = constraint.compute_scaled(x0)
     ncon = c0.size
-    constraint.lb = jnp.broadcast_to(constraint.lb, ncon)
-    constraint.ub = jnp.broadcast_to(constraint.ub, ncon)
+    # constraint.bounds[0] = jnp.broadcast_to(constraint.bounds[0], ncon)
+    # constraint.bounds[1] = jnp.broadcast_to(constraint.bounds[1], ncon)
     bounds = tuple(jnp.broadcast_to(bi, x0.shape) for bi in bounds)
 
-    lbs, ubs = constraint.lb, constraint.ub
+    lbs, ubs = constraint.bounds[0], constraint.bounds[1]
     lbx, ubx = bounds
 
-    ineq_mask = lbs == ubs
-    eq_mask = lbs != ubs
+    ineq_mask = lbs != ubs
+    eq_mask = lbs == ubs
     eq_target = lbs[~ineq_mask]
     nslack = jnp.sum(ineq_mask)
+    print("nslack is " + str(nslack))
     zbounds = (
         jnp.concatenate([lbx, lbs[ineq_mask]]),
         jnp.concatenate([ubx, ubs[ineq_mask]]),
@@ -67,7 +70,7 @@ def inequality_to_bounds(x0, fun, grad, hess, constraint, bounds):
     target = put(target, eq_mask, eq_target)
 
     def z2xs(z):
-        return z[:nslack], z[nslack:]
+        return z[: len(z) - nslack], z[len(z) - nslack :]
 
     def fun_wrapped(z):
         x, s = z2xs(z)
@@ -90,7 +93,7 @@ def hess_wrapped(z):
 
     def confun_wrapped(z):
         x, s = z2xs(z)
-        c = constraint.fun(x)
+        c = constraint.compute_scaled(x)
         sbig = jnp.zeros(ncon)
         sbig = put(sbig, ineq_mask, s)
         return c - sbig - target

From 2672126b34ce330703dc0f179069396f596f150c Mon Sep 17 00:00:00 2001
From: "Patrick S. Kim" <pk2354@stellar-intel.princeton.edu>
Date: Thu, 4 May 2023 13:32:50 -0400
Subject: [PATCH 12/41] tuning augmented lagrangian optimizer.

---
 desc/optimize/aug_lagrangian_ls_stel.py | 5 ++++-
 desc/optimize/utils.py                  | 5 +----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/desc/optimize/aug_lagrangian_ls_stel.py b/desc/optimize/aug_lagrangian_ls_stel.py
index 93197bd6bb..b04974dd77 100644
--- a/desc/optimize/aug_lagrangian_ls_stel.py
+++ b/desc/optimize/aug_lagrangian_ls_stel.py
@@ -77,14 +77,17 @@ def fmin_lag_ls_stel(
             ),
             bounds=bounds,
             gtol=gtolk,
-            maxiter=10,
+            maxiter=5,
             verbose=2,
         )
 
         x = xk["x"]
+        print("slack vars are " + str(x[-10:]))
         f = fun(x)
         cv = L.compute_constraints(x)
+        print("mag well constr is " + str(cv[-10:]))
         c = np.max(cv)
+        print("max c is " + str(c))
 
         if np.linalg.norm(xold - x) < xtol:
             print("xtol satisfied\n")
diff --git a/desc/optimize/utils.py b/desc/optimize/utils.py
index 08ea4549f4..be856ea5a6 100644
--- a/desc/optimize/utils.py
+++ b/desc/optimize/utils.py
@@ -47,8 +47,6 @@ def inequality_to_bounds(x0, fun, grad, hess, constraint, bounds):
 
     c0 = constraint.compute_scaled(x0)
     ncon = c0.size
-    # constraint.bounds[0] = jnp.broadcast_to(constraint.bounds[0], ncon)
-    # constraint.bounds[1] = jnp.broadcast_to(constraint.bounds[1], ncon)
     bounds = tuple(jnp.broadcast_to(bi, x0.shape) for bi in bounds)
 
     lbs, ubs = constraint.bounds[0], constraint.bounds[1]
@@ -58,7 +56,6 @@ def inequality_to_bounds(x0, fun, grad, hess, constraint, bounds):
     eq_mask = lbs == ubs
     eq_target = lbs[~ineq_mask]
     nslack = jnp.sum(ineq_mask)
-    print("nslack is " + str(nslack))
     zbounds = (
         jnp.concatenate([lbx, lbs[ineq_mask]]),
         jnp.concatenate([ubx, ubs[ineq_mask]]),
@@ -96,7 +93,7 @@ def confun_wrapped(z):
         c = constraint.compute_scaled(x)
         sbig = jnp.zeros(ncon)
         sbig = put(sbig, ineq_mask, s)
-        return c - sbig - target
+        return c - sbig
 
     def conjac_wrapped(z):
         x, s = z2xs(z)

From 09b6bd1610b71cd3bd197f0dd2924bbb6682d39d Mon Sep 17 00:00:00 2001
From: "Patrick S. Kim" <pk2354@stellar-intel.princeton.edu>
Date: Thu, 4 May 2023 17:31:14 -0400
Subject: [PATCH 13/41] removing print statements

---
 desc/optimize/aug_lagrangian_ls_stel.py | 13 -------------
 desc/optimize/utils.py                  |  3 ---
 2 files changed, 16 deletions(-)

diff --git a/desc/optimize/aug_lagrangian_ls_stel.py b/desc/optimize/aug_lagrangian_ls_stel.py
index 4f1df15b5d..93bcc0aeae 100644
--- a/desc/optimize/aug_lagrangian_ls_stel.py
+++ b/desc/optimize/aug_lagrangian_ls_stel.py
@@ -48,24 +48,14 @@ def fmin_lag_ls_stel(
 
     mu = options.pop("mu", 10 * jnp.ones(len(c)))
     lmbda = options.pop("lmbda", 10 * jnp.ones(len(c)))
-    #    x = np.append(x, 1.0 * np.ones(constraint._ineq_dim))
-
-    #    def recover(x):
-    #        return x[0 : len(x) - constraint._ineq_dim]
-
-    #    def wrapped_obj(x):
-    #        return fun(recover(x))
 
     constr = np.array([constraint])
-    print("obj is " + str(np.linalg.norm(fun(x))))
-    print("constr norm is " + str(np.linalg.norm(constr[0].fun(x))))
     L = AugLagrangianLS(fun, constr)
     gradL = Derivative(L.compute, 0, "fwd")
 
     gtolk = 1 / (10 * np.linalg.norm(mu))
     ctolk = 1 / (np.linalg.norm(mu) ** (0.1))
     xold = x
-    #    f = fun(recover(x))
     fold = f
 
     while iteration < maxiter:
@@ -84,12 +74,9 @@ def fmin_lag_ls_stel(
         )
 
         x = xk["x"]
-        print("slack vars are " + str(x[-10:]))
         f = fun(x)
         cv = L.compute_constraints(x)
-        print("mag well constr is " + str(cv[-10:]))
         c = np.max(cv)
-        print("max c is " + str(c))
 
         if np.linalg.norm(xold - x) < xtol:
             print("xtol satisfied\n")
diff --git a/desc/optimize/utils.py b/desc/optimize/utils.py
index 606e30ee83..c1ae59e4c5 100644
--- a/desc/optimize/utils.py
+++ b/desc/optimize/utils.py
@@ -52,9 +52,6 @@ def inequality_to_bounds(x0, fun, grad, hess, constraint, bounds):
     lbs, ubs = con_bounds[0], con_bounds[1]
     lbx, ubx = bounds
 
-    print("lbs are " + str(lbs))
-    print("ubs are " + str(ubs))
-
     ineq_mask = lbs != ubs
     eq_mask = lbs == ubs
     eq_target = lbs[~ineq_mask]

From cc11a6e8d34719d5b9eb15d1a30e7eb946db3ace Mon Sep 17 00:00:00 2001
From: "Patrick S. Kim" <pk2354@stellar-intel.princeton.edu>
Date: Sat, 6 May 2023 03:25:45 -0400
Subject: [PATCH 14/41] trying alternative formulation of aug lagrangian.

---
 desc/objectives/_auglagrangian.py       | 4 ++--
 desc/optimize/aug_lagrangian_ls_stel.py | 5 +++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/desc/objectives/_auglagrangian.py b/desc/objectives/_auglagrangian.py
index e42311600b..fac4537d1e 100644
--- a/desc/objectives/_auglagrangian.py
+++ b/desc/objectives/_auglagrangian.py
@@ -31,9 +31,9 @@ def derivatives(self):
     def compute(self, x, lmbda, mu):
         L = self.func(x)
         c = self.compute_constraints(x)
-        c = -lmbda * c + mu / 2 * c**2
+        #        c = -lmbda * c + mu / 2 * c**2
+        c = 1 / (np.sqrt(2 * mu)) * (-lmbda + mu * c)
         L = jnp.concatenate((L, c), axis=None)
-        print("L is evaluated")
         return L
 
     def compute_scalar(self, x, lmbda, mu):
diff --git a/desc/optimize/aug_lagrangian_ls_stel.py b/desc/optimize/aug_lagrangian_ls_stel.py
index 93bcc0aeae..2182a4a7c4 100644
--- a/desc/optimize/aug_lagrangian_ls_stel.py
+++ b/desc/optimize/aug_lagrangian_ls_stel.py
@@ -47,7 +47,7 @@ def fmin_lag_ls_stel(
     nfev += 1
 
     mu = options.pop("mu", 10 * jnp.ones(len(c)))
-    lmbda = options.pop("lmbda", 10 * jnp.ones(len(c)))
+    lmbda = options.pop("lmbda", 0.01 * jnp.ones(len(c)))
 
     constr = np.array([constraint])
     L = AugLagrangianLS(fun, constr)
@@ -69,7 +69,7 @@ def fmin_lag_ls_stel(
             ),
             bounds=bounds,
             gtol=gtolk,
-            maxiter=5,
+            maxiter=10,
             verbose=2,
         )
 
@@ -95,6 +95,7 @@ def fmin_lag_ls_stel(
 
             else:
                 print("Updating lambda")
+                #                lmbda = lmbda - mu * cv
                 lmbda = lmbda - mu * cv
                 ctolk = ctolk / (np.max(mu) ** (0.9))
                 gtolk = gtolk / (np.max(mu))

From 61808c232be3aa248034a45152b25db47c35b866 Mon Sep 17 00:00:00 2001
From: "Patrick S. Kim" <pk2354@stellar-intel.princeton.edu>
Date: Mon, 8 May 2023 02:00:30 -0400
Subject: [PATCH 15/41] fixing formatting and scalar augmented lagrangian.

---
 desc/objectives/_auglagrangian.py       |   6 +-
 desc/objectives/_iota_utils.py          |   1 -
 desc/objectives/objective_funs.py       |   4 -
 desc/optimize/__init__.py               |   6 +-
 desc/optimize/_constraint_wrappers.py   |  95 ------------------
 desc/optimize/_desc_wrappers.py         | 124 ++++++++++++++++++++----
 desc/optimize/aug_lagrangian.py         | 115 +++++++---------------
 desc/optimize/aug_lagrangian_ls_stel.py |   2 -
 desc/optimize/optimizer.py              |  27 ------
 9 files changed, 142 insertions(+), 238 deletions(-)

diff --git a/desc/objectives/_auglagrangian.py b/desc/objectives/_auglagrangian.py
index fac4537d1e..4b14c44efc 100644
--- a/desc/objectives/_auglagrangian.py
+++ b/desc/objectives/_auglagrangian.py
@@ -1,4 +1,3 @@
-import jax
 import numpy as np
 
 from desc.backend import jnp
@@ -67,9 +66,6 @@ def derivatives(self):
     def compute(self, x, lmbda, mu):
         L = self.func(x)
         c = self.compute_constraints(x)
-        # jax.debug.print("lmbda term is " + str(jnp.dot(lmbda,c)))
-        # jax.debug.print("mu term is " + str(mu/2*jnp.dot(c,c)))
-        # jax.debug.print("L is " + str(L))
         return L - jnp.dot(lmbda, c) + mu / 2 * jnp.dot(c, c)
 
     def compute_scalar(self, x, lmbda, mu):
@@ -82,5 +78,5 @@ def callback(self, x, lmbda, mu):
     def compute_constraints(self, x):
         c = jnp.array([])
         for i in range(len(self.constr)):
-            c = jnp.concatenate((c, self.constr[i](x)), axis=None)
+            c = jnp.concatenate((c, self.constr[i].fun(x)), axis=None)
         return c
diff --git a/desc/objectives/_iota_utils.py b/desc/objectives/_iota_utils.py
index a78316ae8f..e051e5cc08 100644
--- a/desc/objectives/_iota_utils.py
+++ b/desc/objectives/_iota_utils.py
@@ -6,7 +6,6 @@
 from desc.compute.utils import compress
 from desc.grid import LinearGrid, QuadratureGrid
 from desc.objectives.objective_funs import _Objective
-from desc.transform import Transform
 from desc.utils import Timer
 
 
diff --git a/desc/objectives/objective_funs.py b/desc/objectives/objective_funs.py
index 4370914b32..6c450f2210 100644
--- a/desc/objectives/objective_funs.py
+++ b/desc/objectives/objective_funs.py
@@ -74,10 +74,6 @@ def combine_args(self, objectives):
         objectives._dim_x = self._dim_x
         objectives._x_idx = self._x_idx
 
-    def _set_state_vector(self):
-        """Set state vector components, dimensions, and indices."""
-        self._args = np.concatenate([obj.args for obj in self.objectives])
-
     def set_args(self, *args):
         """Set which arguments the objective should expect.
 
diff --git a/desc/optimize/__init__.py b/desc/optimize/__init__.py
index baf13a62f1..1e4eed0b40 100644
--- a/desc/optimize/__init__.py
+++ b/desc/optimize/__init__.py
@@ -1,11 +1,7 @@
 """Functions for minimization and wrappers for scipy methods."""
 
 from . import _desc_wrappers, _scipy_wrappers
-from ._constraint_wrappers import (
-    DESCNonlinearConstraint,
-    LinearConstraintProjection,
-    ProximalProjection,
-)
+from ._constraint_wrappers import LinearConstraintProjection, ProximalProjection
 from .aug_lagrangian import fmin_lag_stel
 from .aug_lagrangian_ls_stel import fmin_lag_ls_stel
 from .fmin_scalar import fmintr
diff --git a/desc/optimize/_constraint_wrappers.py b/desc/optimize/_constraint_wrappers.py
index 4325a0cc9d..48d50bf2a2 100644
--- a/desc/optimize/_constraint_wrappers.py
+++ b/desc/optimize/_constraint_wrappers.py
@@ -20,101 +20,6 @@
 from .utils import compute_jac_scale, f_where_x
 
 
-class DESCNonlinearConstraint(ObjectiveFunction):
-    _print_value_fmt = "Total constraint error: {:10.3e} "
-    _scalar = False
-    _linear = False
-
-    def __init__(self, constraint):
-        print("constraint is " + str(constraint))
-        assert isinstance(constraint, ObjectiveFunction), (
-            "constraint should be instance of ObjectiveFunction." ""
-        )
-        self._constraint = constraint
-        self._built = False
-        # don't want to compile this, just use the compiled objective
-        self._use_jit = False
-        self._compiled = False
-
-        (
-            self._equality_constraint,
-            self._inequality_constraint,
-        ) = self.parse_constraints()
-
-        self._eq_dim, self._ineq_dim = self.dim()
-
-    #        if eq is not None:
-    #            self.build(eq, verbose=verbose)
-
-    def parse_constraints(self):
-        #        print("constraint objectives are " + str(self._constraint._objectives[0]._objectives))
-        #        for constraint in self._constraint._objectives[0]._objectives:
-        #            print("lower bounds are " + str(constraint.bounds[0]))
-        #            print("upper bounds are " + str(constraint.bounds[1]))
-        equality_constraints = tuple(
-            constraint
-            for constraint in self._constraint._objectives
-            if np.array_equal(constraint.bounds[0], constraint.bounds[1])
-        )
-        inequality_constraints = tuple(
-            constraint
-            for constraint in self._constraint._objectives
-            if not np.array_equal(constraint.bounds[0], constraint.bounds[1])
-        )
-        #        equality_objective = ObjectiveFunction(equality_constraints)
-        #        inequality_objective = ObjectiveFunction(inequality_constraints)
-        if len(equality_constraints):
-            equality_objective = LinearConstraintProjection(
-                equality_constraints[0], self._constraint._constraints
-            )
-            equality_objective.build(self._constraint._eq)
-        else:
-            equality_objective = None
-        if len(inequality_constraints):
-            inequality_objective = LinearConstraintProjection(
-                equality_constraints[0], self._constraint._constraints
-            )
-            inequality_objective.build(self._constraint._eq)
-        else:
-            inequality_objective = None
-        return equality_objective, inequality_objective
-
-    def dim(self):
-        eq_dim = 0
-        ineq_dim = 0
-        if self._inequality_constraint:
-            for constr in self._inequality_constraint._objectives:
-                if constr.bounds[0] != -np.inf:
-                    ineq_dim += constr.dim_f
-                if constr.bounds[1] != np.inf:
-                    ineq_dim += constr.dim_f
-        if self._equality_constraint:
-            for constr in self._equality_constraint._objectives:
-                eq_dim += constr.dim_f
-
-        return eq_dim, ineq_dim
-
-    def dim_f(self):
-        return self._eq_dim + self._ineq_dim
-
-    def recover(self, x):
-        x_reduced = x[0 : len(x) - self._ineq_dim]
-        slack = x[len(x_reduced) :] ** 2
-        return x_reduced, slack
-
-    def compute(self, x):
-        x_reduced, slack = self.recover(x)
-        c = jnp.array([])
-        if self._equality_constraint:
-            c = jnp.append(c, self._equality_constraint.compute_scaled(x_reduced))
-            slack = jnp.append(jnp.zeros(self._equality_constraint.dim_f), slack)
-
-        if self._inequality_constraint:
-            c = jnp.append(c, self._inequality_constraint.compute_scaled(x_reduced))
-
-        return c + slack
-
-
 class LinearConstraintProjection(ObjectiveFunction):
     """Remove linear constraints via orthogonal projection.
 
diff --git a/desc/optimize/_desc_wrappers.py b/desc/optimize/_desc_wrappers.py
index 27b6b282f8..ae07c364a4 100644
--- a/desc/optimize/_desc_wrappers.py
+++ b/desc/optimize/_desc_wrappers.py
@@ -2,7 +2,7 @@
 
 from desc.backend import jnp
 
-from ._constraint_wrappers import DESCNonlinearConstraint
+from .aug_lagrangian import fmin_lag_stel
 from .aug_lagrangian_ls_stel import fmin_lag_ls_stel
 from .fmin_scalar import fmintr
 from .least_squares import lsqtr
@@ -12,7 +12,7 @@
 
 
 @register_optimizer(
-    name="auglag-lsq",
+    name="auglag",
     description="Augmented Lagrangian approach to constrained optimization",
     scalar=False,
     equality_constraints=True,
@@ -20,6 +20,109 @@
     stochastic=False,
     hessian=False,
 )
+def _optimize_desc_aug_lagrangian(
+    objective, constraint, x0, method, x_scale, verbose, stoptol, options=None
+):
+    """Wrapper for desc.optimize.fmin_lag_ls_stel.
+
+
+    Parameters
+    ----------
+    objective : ObjectiveFunction
+        Function to minimize.
+    constraint : ObjectiveFunction
+        Constraint to satisfy
+    x0 : ndarray
+        Starting point.
+    method : str
+        Name of the method to use. Any of the options in scipy.optimize.least_squares.
+    x_scale : array_like or ‘jac’, optional
+        Characteristic scale of each variable. Setting x_scale is equivalent to
+        reformulating the problem in scaled variables xs = x / x_scale. An alternative
+        view is that the size of a trust region along jth dimension is proportional to
+        x_scale[j]. Improved convergence may be achieved by setting x_scale such that
+        a step of a given size along any of the scaled variables has a similar effect
+        on the cost function. If set to ‘jac’, the scale is iteratively updated using
+        the inverse norms of the columns of the Jacobian matrix.
+    verbose : int
+        * 0  : work silently.
+        * 1 : display a termination report.
+        * 2 : display progress during iterations
+    stoptol : dict
+        Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol",
+        "maxiter", "max_nfev", "max_njev", "max_ngev", "max_nhev"}
+    options : dict, optional
+        Dictionary of optional keyword arguments to override default solver
+        settings. See the code for more details.
+
+    Returns
+    -------
+    res : OptimizeResult
+       The optimization result represented as a ``OptimizeResult`` object.
+       Important attributes are: ``x`` the solution array, ``success`` a
+       Boolean flag indicating if the optimizer exited successfully and
+       ``message`` which describes the cause of the termination. See
+       `OptimizeResult` for a description of other attributes.
+
+    """
+    options = {} if options is None else options
+    options.setdefault("maxiter", np.iinfo(np.int32).max)
+    options.setdefault("disp", False)
+    x_scale = 1 if x_scale == "auto" else x_scale
+    if isinstance(x_scale, str):
+        raise ValueError(f"Method {method} does not support x_scale type {x_scale}")
+    bounds = (-jnp.inf, jnp.inf)
+    if constraint is not None:
+        (
+            z0,
+            fun_wrapped,
+            grad_wrapped,
+            hess_wrapped,
+            constraint_wrapped,
+            zbounds,
+            z2xs,
+        ) = inequality_to_bounds(
+            x0,
+            objective.compute_scalar,
+            objective.grad,
+            objective.hess,
+            constraint,
+            bounds,
+        )
+    else:
+        constraint_wrapped = None
+
+    result = fmin_lag_stel(
+        fun_wrapped,
+        constraint_wrapped,
+        x0=z0,
+        bounds=zbounds,
+        args=(),
+        x_scale=x_scale,
+        ftol=stoptol["ftol"],
+        xtol=stoptol["xtol"],
+        gtol=stoptol["gtol"],
+        maxiter=stoptol["maxiter"],
+        verbose=verbose,
+        callback=None,
+        options=options,
+    )
+    x, s = z2xs(result["x"])
+    result["x"] = x
+    result["allx"] = [x]
+    return result
+
+
+@register_optimizer(
+    name="auglag-lsq",
+    description="Least Squares Augmented Lagrangian approach "
+    + "to constrained optimization",
+    scalar=False,
+    equality_constraints=True,
+    inequality_constraints=True,
+    stochastic=False,
+    hessian=False,
+)
 def _optimize_desc_aug_lagrangian_least_squares(
     objective, constraint, x0, method, x_scale, verbose, stoptol, options=None
 ):
@@ -89,25 +192,8 @@ def _optimize_desc_aug_lagrangian_least_squares(
             constraint,
             bounds,
         )
-    #        constraint_wrapped = DESCNonlinearConstraint(
-    #            constraint,
-    #        )
-    #        if constraint_wrapped._equality_constraint:
-    #            objective.combine_args(constraint_wrapped._equality_constraint)
-    #        if constraint_wrapped._inequality_constraint:
-    #            objective.combine_args(constrained_wrapped._inequality_constraint)
     else:
         constraint_wrapped = None
-    # need to use some "global" variables here
-    allx = []
-    func_allx = []
-    func_allf = []
-    grad_allx = []
-    grad_allf = []
-    hess_allx = []
-    hess_allf = []
-    message = [""]
-    success = [None]
 
     result = fmin_lag_ls_stel(
         fun_wrapped,
diff --git a/desc/optimize/aug_lagrangian.py b/desc/optimize/aug_lagrangian.py
index c2a5cd8be2..304dc0708a 100644
--- a/desc/optimize/aug_lagrangian.py
+++ b/desc/optimize/aug_lagrangian.py
@@ -6,7 +6,7 @@
 """
 
 import numpy as np
-from scipy.optimize import OptimizeResult, minimize
+from scipy.optimize import OptimizeResult
 
 from desc.backend import jnp
 from desc.derivatives import Derivative
@@ -20,10 +20,9 @@ def conv_test(x, L, gL):
 
 def fmin_lag_stel(
     fun,
+    constraint,
     x0,
-    grad,
-    eq_constr,
-    ineq_constr,
+    bounds,
     args=(),
     x_scale=1,
     ftol=1e-6,
@@ -41,94 +40,50 @@ def fmin_lag_stel(
     nhev = 0
     iteration = 0
 
-    N = x0.size
     x = x0.copy()
     f = fun(x, *args)
+    c = constraint.fun(x)
     nfev += 1
-    g = grad(x, *args)
-    ngev += 1
-
-    eq = eq_constr(x) if eq_constr is not None else jnp.array([])
-    ineq = ineq_constr(x) if ineq_constr is not None else jnp.array([])
-    eq_dim = len(eq.flatten())
-    ineq_dim = len(ineq.flatten())
-    x = np.append(x, 1.0 * np.ones(ineq_dim))
-
-    if maxiter is None:
-        maxiter = N * 100
-
-    mu = options.pop("mu", 1.0)
-    lmbda = options.pop("lmbda", 0.0 * jnp.ones(eq_dim + ineq_dim))
-    bounds = options.pop("bounds", jnp.zeros(eq_dim + ineq_dim))
-
-    def recover(x):
-        return x[0 : len(x) - ineq_dim]
-
-    def wrapped_constraint(x):
-        c = np.array([])
-        slack = x[len(recover(x)) :] ** 2
-        x_recov = recover(x)
-        eq = eq_constr(x_recov) if eq_constr is not None else jnp.array([])
-        ineq = ineq_constr(x_recov) if ineq_constr is not None else jnp.array([])
-
-        c = jnp.append(c, eq_constr(x_recov))
-        slack = jnp.append(jnp.zeros(len(eq.flatten())), slack)
-        c = jnp.append(c, ineq)
-        c = c - bounds + slack
-        return c
-
-    def wrapped_obj(x):
-        return fun(recover(x))
-
-    constr = np.array([wrapped_constraint])
-    L = AugLagrangian(wrapped_obj, constr)
-    gradL = Derivative(L.compute, 0, "rev")
+
+    mu = options.pop("mu", 1)
+    lmbda = options.pop("lmbda", jnp.zeros(len(c)))
+
+    constr = np.array([constraint])
+    L = AugLagrangian(fun, constr)
+    gradL = Derivative(L.compute, 0, "fwd")
     hessL = Derivative(L.compute, argnum=0, mode="hess")
 
-    gtolk = 1 / (10 * mu)
-    c = np.linalg.norm(L.compute_constraints(x))
-    ctolk = c / (mu ** (0.1))
+    gtolk = 1 / (10 * np.linalg.norm(mu))
+    ctolk = 1 / (np.linalg.norm(mu) ** (0.1))
     xold = x
-    f = fun(recover(x))
-    fold = f
-    cv = L.compute_constraints(x)
-    # lmbda = lmbda - mu * cv
-    print("f is " + str(f))
-    print("lmbda term is " + str(np.dot(lmbda, cv)))
-    print("mu term is " + str(mu / 2 * np.dot(cv, cv)))
-    print("The constraints are " + str(c))
 
     while iteration < maxiter:
-        # xk = fmintr(
-        #    L.compute,
-        #    x,
-        #    grad=gradL,
-        #    hess=hessL,
-        #    args=(
-        #        lmbda,
-        #        mu,
-        #    ),
-        #    gtol=gtolk,
-        #    maxiter=20,
-        #    verbose=2,
-        # )
-        xk = minimize(
+        xk = fmintr(
             L.compute,
             x,
-            args=(lmbda, mu),
-            method="trust-exact",
-            jac=gradL,
+            grad=gradL,
             hess=hessL,
-            options={"maxiter": 20},
+            args=(
+                lmbda,
+                mu,
+            ),
+            gtol=gtolk,
+            maxiter=20,
+            verbose=2,
         )
+        # xk = minimize(
+        #     L.compute,
+        #     x,
+        #     args=(lmbda, mu),
+        #     method="trust-exact",
+        #     jac=gradL,
+        #     hess=hessL,
+        #     options={"maxiter": 20},
+        #     verbose=2
+        # )
         x = xk["x"]
-        f = fun(recover(x))
-        print("f is " + str(f))
+        f = fun(x)
         cv = L.compute_constraints(x)
-        print("The constraints are " + str(np.linalg.norm(cv)))
-        print("lmbda term is " + str(np.dot(lmbda, cv)))
-        print("mu term is " + str(mu / 2 * np.dot(cv, cv)))
-        print("\n")
         c = np.linalg.norm(cv)
 
         if np.linalg.norm(xold - x) < xtol:
@@ -158,7 +113,7 @@ def wrapped_obj(x):
         fold = f
 
     g = gradL(x, lmbda, mu)
-    f = fun(recover(x))
+    f = fun(x)
     success = True
     message = "successful"
     result = OptimizeResult(
@@ -173,5 +128,5 @@ def wrapped_obj(x):
         nit=iteration,
         message=message,
     )
-    result["allx"] = [recover(x)]
+    result["allx"] = [x]
     return result
diff --git a/desc/optimize/aug_lagrangian_ls_stel.py b/desc/optimize/aug_lagrangian_ls_stel.py
index 2182a4a7c4..0140720f85 100644
--- a/desc/optimize/aug_lagrangian_ls_stel.py
+++ b/desc/optimize/aug_lagrangian_ls_stel.py
@@ -40,7 +40,6 @@ def fmin_lag_ls_stel(
     nhev = 0
     iteration = 0
 
-    N = x0.size
     x = x0.copy()
     f = fun(x, *args)
     c = constraint.fun(x)
@@ -109,7 +108,6 @@ def fmin_lag_ls_stel(
         fold = f
 
     g = gradL(x, lmbda, mu)
-    # f = fun(recover(x))
     f = fun(x)
     success = True
     message = "successful"
diff --git a/desc/optimize/optimizer.py b/desc/optimize/optimizer.py
index fb02de7028..01fcc94ea1 100644
--- a/desc/optimize/optimizer.py
+++ b/desc/optimize/optimizer.py
@@ -358,33 +358,6 @@ def _maybe_wrap_nonlinear_constraints(objective, nonlinear_constraint, method, o
     return objective, nonlinear_constraint
 
 
-def _wrap_constraints(
-    method, nonlinear_constraints, equality_constraints, inequality_constraints
-):
-    for constraint in nonlinear_constraints:
-        if not isinstance(
-            constraint,
-            (
-                ForceBalance,
-                RadialForceBalance,
-                HelicalForceBalance,
-                CurrentDensity,
-                MagneticWell,
-                AspectRatio,
-            ),
-        ):
-            raise ValueError(
-                "optimizer method {} ".format(method)
-                + "cannot handle general nonlinear constraint {}.".format(constraint)
-            )
-
-    constraint_objectives = ObjectiveFunction(nonlinear_constraints)
-    equality_objectives = ObjectiveFunction(equality_constraints)
-    inequality_objectives = ObjectiveFunction(inequality_constraints)
-
-    return constraint_objectives, equality_objectives, inequality_objectives
-
-
 def _get_default_tols(
     method,
     ftol=None,

From 821965fef36cd3a7fec636546de9a4d6a4ae9f1e Mon Sep 17 00:00:00 2001
From: Rory Conlin <roryconlin@gmail.com>
Date: Wed, 24 May 2023 17:51:59 -0400
Subject: [PATCH 16/41] Remove iota_utils

---
 desc/objectives/__init__.py    |   1 -
 desc/objectives/_iota_utils.py | 275 ---------------------------------
 2 files changed, 276 deletions(-)
 delete mode 100644 desc/objectives/_iota_utils.py

diff --git a/desc/objectives/__init__.py b/desc/objectives/__init__.py
index 144f36af76..b1269383b8 100644
--- a/desc/objectives/__init__.py
+++ b/desc/objectives/__init__.py
@@ -18,7 +18,6 @@
     PrincipalCurvature,
     Volume,
 )
-from ._iota_utils import IotaAt, MeanIota
 from ._qs import (
     Isodynamicity,
     QuasisymmetryBoozer,
diff --git a/desc/objectives/_iota_utils.py b/desc/objectives/_iota_utils.py
deleted file mode 100644
index e051e5cc08..0000000000
--- a/desc/objectives/_iota_utils.py
+++ /dev/null
@@ -1,275 +0,0 @@
-"""Objectives related to rotational transform"""
-
-from desc.backend import jnp
-from desc.compute import compute as compute_fun
-from desc.compute import get_params, get_profiles, get_transforms
-from desc.compute.utils import compress
-from desc.grid import LinearGrid, QuadratureGrid
-from desc.objectives.objective_funs import _Objective
-from desc.utils import Timer
-
-
-class MeanIota(_Objective):
-    r"""Targets a radially averaged rotational transform.
-
-    f = (1/2) [(\int d\rho iota) - target]^2
-
-    Parameters
-    ----------
-    eq : Equilibrium, optional
-        Equilibrium that will be optimized to satisfy the Objective.
-    target : float, ndarray, optional
-        Target value(s) of the objective.
-        len(target) must be equal to Objective.dim_f == grid.num_rho
-    weight : float, ndarray, optional
-        Weighting to apply to the Objective, relative to other Objectives.
-        len(weight) must be equal to Objective.dim_f == grid.num_rho
-    normalize : bool
-        Whether to compute the error in physical units or non-dimensionalize.
-        Note: has no effect for this objective.
-    normalize_target : bool
-        Whether target should be normalized before comparing to computed values.
-        if `normalize` is `True` and the target is in physical units, this should also
-        be set to True.
-        Note: has no effect for this objective.
-    grid : Grid, optional
-        Collocation grid containing the nodes to evaluate at.
-    name : str
-        Name of the objective function.
-    """
-
-    _scalar = True
-    _linear = False
-    _units = "(dimensionless)"
-    _print_value_fmt = "Mean rotational transform: {:10.3e} "
-
-    def __init__(
-        self,
-        eq=None,
-        target=0,
-        weight=1,
-        normalize=False,
-        normalize_target=False,
-        grid=None,
-        name="mean rotational transform",
-    ):
-
-        self.grid = grid
-        super().__init__(
-            eq=eq,
-            target=target,
-            weight=weight,
-            normalize=normalize,
-            normalize_target=normalize_target,
-            name=name,
-        )
-
-    def build(self, eq, use_jit=True, verbose=1):
-        """Build constant arrays.
-
-        Parameters
-        ----------
-        eq : Equilibrium, optional
-            Equilibrium that will be optimized to satisfy the Objective.
-        use_jit : bool, optional
-            Whether to just-in-time compile the objective and derivatives.
-        verbose : int, optional
-            Level of output.
-        """
-        if self.grid is None:
-            self.grid = QuadratureGrid(
-                L=eq.L_grid,
-                M=eq.M_grid,
-                N=eq.N_grid,
-                NFP=eq.NFP,
-                sym=eq.sym,
-            )
-
-        self._dim_f = 1
-        self._data_keys = ["iota"]
-        self._args = get_params(self._data_keys)
-
-        timer = Timer()
-        if verbose > 0:
-            print("Precomputing transforms")
-        timer.start("Precomputing transforms")
-
-        self._profiles = get_profiles(self._data_keys, eq=eq, grid=self.grid)
-        self._transforms = get_transforms(self._data_keys, eq=eq, grid=self.grid)
-
-        timer.stop("Precomputing transforms")
-        if verbose > 1:
-            timer.disp("Precomputing transforms")
-
-        super().build(eq=eq, use_jit=use_jit, verbose=verbose)
-
-    def compute(self, *args, **kwargs):
-        """Compute rotational transform profile errors.
-
-        Parameters
-        ----------
-        R_lmn : ndarray
-            Spectral coefficients of R(rho,theta,zeta) -- flux surface R coordinate (m).
-        Z_lmn : ndarray
-            Spectral coefficients of Z(rho,theta,zeta) -- flux surface Z coordinate (m).
-        L_lmn : ndarray
-            Spectral coefficients of lambda(rho,theta,zeta) -- poloidal stream function.
-        i_l : ndarray
-            Spectral coefficients of iota(rho) -- rotational transform profile.
-        c_l : ndarray
-            Spectral coefficients of I(rho) -- toroidal current profile.
-        Psi : float
-            Total toroidal magnetic flux within the last closed flux surface (Wb).
-
-        Returns
-        -------
-        iota : ndarray
-            rotational transform on specified flux surfaces.
-        """
-        params = self._parse_args(*args, **kwargs)
-        data = compute_fun(
-            self._data_keys,
-            params=params,
-            transforms=self._transforms,
-            profiles=self._profiles,
-        )
-        return jnp.sum(
-            compress(
-                self.grid, data["iota"] * self.grid.spacing[:, 0], surface_label="rho"
-            )
-        )
-
-
-class IotaAt(_Objective):
-    r"""Targets the rotational transform on one surface.
-
-    f = (1/2) [iota - target]^2
-
-    Use this objective with a LinearGrid containing only a single rho value.
-
-    Parameters
-    ----------
-    eq : Equilibrium, optional
-        Equilibrium that will be optimized to satisfy the Objective.
-    target : float, ndarray, optional
-        Target value(s) of the objective.
-        len(target) must be equal to Objective.dim_f == grid.num_rho
-    weight : float, ndarray, optional
-        Weighting to apply to the Objective, relative to other Objectives.
-        len(weight) must be equal to Objective.dim_f == grid.num_rho
-    normalize : bool
-        Whether to compute the error in physical units or non-dimensionalize.
-        Note: has no effect for this objective.
-    normalize_target : bool
-        Whether target should be normalized before comparing to computed values.
-        if `normalize` is `True` and the target is in physical units, this should also
-        be set to True.
-        Note: has no effect for this objective.
-    grid : Grid, optional
-        Collocation grid containing the nodes to evaluate at.
-    name : str
-        Name of the objective function.
-    """
-
-    _scalar = True
-    _linear = False
-    _units = "(dimensionless)"
-    _print_value_fmt = "Rotational transform: {:10.3e} "
-
-    def __init__(
-        self,
-        eq=None,
-        target=0,
-        weight=1,
-        normalize=False,
-        normalize_target=False,
-        grid=None,
-        name="rotational transform",
-    ):
-
-        self.grid = grid
-        super().__init__(
-            eq=eq,
-            target=target,
-            weight=weight,
-            normalize=normalize,
-            normalize_target=normalize_target,
-            name=name,
-        )
-
-    def build(self, eq, use_jit=True, verbose=1):
-        """Build constant arrays.
-
-        Parameters
-        ----------
-        eq : Equilibrium, optional
-            Equilibrium that will be optimized to satisfy the Objective.
-        use_jit : bool, optional
-            Whether to just-in-time compile the objective and derivatives.
-        verbose : int, optional
-            Level of output.
-        """
-        if self.grid is None:
-            self.grid = LinearGrid(
-                rho=0.5,
-                M=eq.M_grid,
-                N=eq.N_grid,
-                NFP=eq.NFP,
-                sym=eq.sym,
-            )
-
-        if self.grid.num_rho > 1:
-            raise ValueError("For IotaAt, grid should have only a single rho value.")
-
-        if not isinstance(self.grid, LinearGrid):
-            raise ValueError("For IotaAt, grid should be a LinearGrid.")
-
-        self._dim_f = 1
-        self._data_keys = ["iota"]
-        self._args = get_params(self._data_keys)
-
-        timer = Timer()
-        if verbose > 0:
-            print("Precomputing transforms")
-        timer.start("Precomputing transforms")
-
-        self._profiles = get_profiles(self._data_keys, eq=eq, grid=self.grid)
-        self._transforms = get_transforms(self._data_keys, eq=eq, grid=self.grid)
-
-        timer.stop("Precomputing transforms")
-        if verbose > 1:
-            timer.disp("Precomputing transforms")
-
-        super().build(eq=eq, use_jit=use_jit, verbose=verbose)
-
-    def compute(self, *args, **kwargs):
-        """Compute rotational transform profile errors.
-
-        Parameters
-        ----------
-        R_lmn : ndarray
-            Spectral coefficients of R(rho,theta,zeta) -- flux surface R coordinate (m).
-        Z_lmn : ndarray
-            Spectral coefficients of Z(rho,theta,zeta) -- flux surface Z coordinate (m).
-        L_lmn : ndarray
-            Spectral coefficients of lambda(rho,theta,zeta) -- poloidal stream function.
-        i_l : ndarray
-            Spectral coefficients of iota(rho) -- rotational transform profile.
-        c_l : ndarray
-            Spectral coefficients of I(rho) -- toroidal current profile.
-        Psi : float
-            Total toroidal magnetic flux within the last closed flux surface (Wb).
-
-        Returns
-        -------
-        iota : ndarray
-            rotational transform on specified flux surfaces.
-        """
-        params = self._parse_args(*args, **kwargs)
-        data = compute_fun(
-            self._data_keys,
-            params=params,
-            transforms=self._transforms,
-            profiles=self._profiles,
-        )
-        return data["iota"][0]

From ee7a22bc1acc2f19c47619ad9efed188adbf7283 Mon Sep 17 00:00:00 2001
From: Rory Conlin <roryconlin@gmail.com>
Date: Wed, 24 May 2023 17:41:37 -0400
Subject: [PATCH 17/41] Remove old code

---
 desc/equilibrium/equilibrium.py   | 10 ++++------
 desc/objectives/_equilibrium.py   |  3 +--
 desc/objectives/_geometry.py      |  2 --
 desc/objectives/_stability.py     |  3 +--
 desc/objectives/objective_funs.py | 18 ------------------
 desc/optimize/optimizer.py        |  3 ++-
 6 files changed, 8 insertions(+), 31 deletions(-)

diff --git a/desc/equilibrium/equilibrium.py b/desc/equilibrium/equilibrium.py
index 01709ab3e2..1bce661425 100644
--- a/desc/equilibrium/equilibrium.py
+++ b/desc/equilibrium/equilibrium.py
@@ -627,9 +627,8 @@ def optimize(
         if verbose > 0:
             print("Start of solver")
             objective.print_value(objective.x(eq))
-        #            for con in constraints:
-        #                print("con is " + str(con))
-        #                con.print_value(*con.xs(eq))
+            for con in constraints:
+                con.print_value(*con.xs(eq))
         for key, value in result["history"].items():
             # don't set nonexistent profile (values are empty ndarrays)
             if value[-1].size:
@@ -637,9 +636,8 @@ def optimize(
         if verbose > 0:
             print("End of solver")
             objective.print_value(objective.x(eq))
-        #            for con in constraints:
-        #                print("con is " + str(con))
-        #                con.print_value(*con.xs(eq))
+            for con in constraints:
+                con.print_value(*con.xs(eq))
 
         eq.solved = result["success"]
         return eq, result
diff --git a/desc/objectives/_equilibrium.py b/desc/objectives/_equilibrium.py
index 3015ccde11..76b9489646 100644
--- a/desc/objectives/_equilibrium.py
+++ b/desc/objectives/_equilibrium.py
@@ -75,12 +75,11 @@ def __init__(
         normalize_target=True,
         grid=None,
         name="force",
-        equality=True,
     ):
         if target is None and bounds is None:
             target = 0
         self._grid = grid
-        self.equality = equality
+
         super().__init__(
             eq=eq,
             target=target,
diff --git a/desc/objectives/_geometry.py b/desc/objectives/_geometry.py
index c013942b1b..e6a2b3c6d4 100644
--- a/desc/objectives/_geometry.py
+++ b/desc/objectives/_geometry.py
@@ -60,12 +60,10 @@ def __init__(
         normalize_target=True,
         grid=None,
         name="aspect ratio",
-        equality=True,
     ):
         if target is None and bounds is None:
             target = 2
         self._grid = grid
-        self.equality = equality
 
         super().__init__(
             eq=eq,
diff --git a/desc/objectives/_stability.py b/desc/objectives/_stability.py
index b5428fda65..7cc30f6f28 100644
--- a/desc/objectives/_stability.py
+++ b/desc/objectives/_stability.py
@@ -257,12 +257,11 @@ def __init__(
         normalize_target=True,
         grid=None,
         name="Magnetic Well",
-        equality=True,
     ):
         if target is None and bounds is None:
             bounds = (0, np.inf)
         self._grid = grid
-        self.equality = equality
+
         super().__init__(
             eq=eq,
             target=target,
diff --git a/desc/objectives/objective_funs.py b/desc/objectives/objective_funs.py
index 6c450f2210..b42123a66c 100644
--- a/desc/objectives/objective_funs.py
+++ b/desc/objectives/objective_funs.py
@@ -56,24 +56,6 @@ def __init__(
         if eq is not None:
             self.build(eq, use_jit=self._use_jit, verbose=verbose)
 
-    def combine_args(self, objectives):
-        self._args = np.concatenate(
-            (self.args, [obj.args for obj in objectives.objectives][0])
-        )
-        self._args = [arg for arg in arg_order if arg in self._args]
-
-        self._dimensions = self.objectives[0].dimensions
-        self._dim_x = 0
-        self._x_idx = {}
-        for arg in self.args:
-            self.x_idx[arg] = np.arange(self._dim_x, self._dim_x + self.dimensions[arg])
-            self._dim_x += self.dimensions[arg]
-
-        objectives._args = self._args
-        objectives._dimensions = self._dimensions
-        objectives._dim_x = self._dim_x
-        objectives._x_idx = self._x_idx
-
     def set_args(self, *args):
         """Set which arguments the objective should expect.
 
diff --git a/desc/optimize/optimizer.py b/desc/optimize/optimizer.py
index 01fcc94ea1..8b48ed301c 100644
--- a/desc/optimize/optimizer.py
+++ b/desc/optimize/optimizer.py
@@ -226,8 +226,9 @@ def optimize(  # noqa: C901 - FIXME: simplify this
 
         if verbose > 0:
             print("Starting optimization")
+            print("Using method: " + str(self.method))
+
         timer.start("Solution time")
-        print("method is " + str(self.method))
 
         result = optimizers[method]["fun"](
             objective,

From d08993cf04be789874f4f61790d846ef97c14cb7 Mon Sep 17 00:00:00 2001
From: Rory Conlin <roryconlin@gmail.com>
Date: Wed, 24 May 2023 22:06:56 -0400
Subject: [PATCH 18/41] Allow inequality_to_bounds to work for constrained
 least squares too

---
 desc/optimize/utils.py | 46 +++++++++++++++++++++++++-----------------
 1 file changed, 28 insertions(+), 18 deletions(-)

diff --git a/desc/optimize/utils.py b/desc/optimize/utils.py
index c1ae59e4c5..1ec2e8519b 100644
--- a/desc/optimize/utils.py
+++ b/desc/optimize/utils.py
@@ -5,6 +5,7 @@
 import numpy as np
 
 from desc.backend import cond, jit, jnp, put
+from desc.utils import Index
 
 
 def inequality_to_bounds(x0, fun, grad, hess, constraint, bounds):
@@ -44,12 +45,12 @@ def inequality_to_bounds(x0, fun, grad, hess, constraint, bounds):
         and slack variables x and s
 
     """
-
-    c0 = constraint.compute_scaled(x0)
+    c0 = constraint.fun(x0)
     ncon = c0.size
     bounds = tuple(jnp.broadcast_to(bi, x0.shape) for bi in bounds)
-    con_bounds = constraint._objective.bounds_scaled
-    lbs, ubs = con_bounds[0], con_bounds[1]
+    cbounds = (constraint.lb, constraint.ub)
+    cbounds = tuple(jnp.broadcast_to(bi, c0.shape) for bi in cbounds)
+    lbs, ubs = cbounds
     lbx, ubx = bounds
 
     ineq_mask = lbs != ubs
@@ -69,35 +70,44 @@ def inequality_to_bounds(x0, fun, grad, hess, constraint, bounds):
     def z2xs(z):
         return z[: len(z) - nslack], z[len(z) - nslack :]
 
-    def fun_wrapped(z):
+    def fun_wrapped(z, *args):
         x, s = z2xs(z)
-        return fun(x)
+        return fun(x, *args)
 
-    def grad_wrapped(z):
-        x, s = z2xs(z)
-        g = grad(x)
-        return jnp.concatenate([g, jnp.zeros(nslack)])
+    if hess is None:
+        # assume grad is really jac of least squares
+        def grad_wrapped(z, *args):
+            x, s = z2xs(z)
+            g = grad(x, *args)
+            return jnp.hstack([g, jnp.zeros((g.shape[0], nslack))])
+
+    else:
+
+        def grad_wrapped(z, *args):
+            x, s = z2xs(z)
+            g = grad(x, *args)
+            return jnp.concatenate([g, jnp.zeros(nslack)])
 
     if callable(hess):
 
-        def hess_wrapped(z):
+        def hess_wrapped(z, *args):
             x, s = z2xs(z)
-            H = hess(x)
+            H = hess(x, *args)
             return jnp.pad(H, (0, nslack))
 
     else:  # using BFGS
         hess_wrapped = hess
 
-    def confun_wrapped(z):
+    def confun_wrapped(z, *args):
         x, s = z2xs(z)
-        c = constraint.compute_scaled(x)
+        c = constraint.fun(x, *args)
         sbig = jnp.zeros(ncon)
         sbig = put(sbig, ineq_mask, s)
         return c - sbig - target
 
-    def conjac_wrapped(z):
+    def conjac_wrapped(z, *args):
         x, s = z2xs(z)
-        J = constraint.jac(x)
+        J = constraint.jac(x, *args)
         I = jnp.eye(nslack)
         Js = jnp.zeros((ncon, nslack))
         Js = put(Js, Index[ineq_mask, :], -I)
@@ -105,9 +115,9 @@ def conjac_wrapped(z):
 
     if callable(constraint.hess):
 
-        def conhess_wrapped(z, y):
+        def conhess_wrapped(z, y, *args):
             x, s = z2xs(z)
-            H = constraint.hess(x, y)
+            H = constraint.hess(x, y, *args)
             return jnp.pad(H, (0, nslack))
 
     else:  # using BFGS

From c183b9ba17a0bc6219d003695a3c3086c9a8917d Mon Sep 17 00:00:00 2001
From: Rory Conlin <roryconlin@gmail.com>
Date: Wed, 24 May 2023 22:07:24 -0400
Subject: [PATCH 19/41] Refactor augmented lagrangian optimizers and add basic
 tests

---
 desc/optimize/aug_lagrangian.py         | 304 +++++++++++++++++-------
 desc/optimize/aug_lagrangian_ls_stel.py | 283 +++++++++++++++-------
 tests/test_optimizer.py                 | 104 +++++++-
 3 files changed, 522 insertions(+), 169 deletions(-)

diff --git a/desc/optimize/aug_lagrangian.py b/desc/optimize/aug_lagrangian.py
index 304dc0708a..53cea62b55 100644
--- a/desc/optimize/aug_lagrangian.py
+++ b/desc/optimize/aug_lagrangian.py
@@ -1,28 +1,20 @@
-#!/usr/bin/env python3
-"""
-Created on Wed Jul 13 10:39:48 2022
-
-@author: pk123
-"""
+"""Augmented Langrangian for scalar valued objectives."""
 
 import numpy as np
-from scipy.optimize import OptimizeResult
 
 from desc.backend import jnp
-from desc.derivatives import Derivative
-from desc.objectives._auglagrangian import AugLagrangian
 from desc.optimize.fmin_scalar import fmintr
 
-
-def conv_test(x, L, gL):
-    return np.linalg.norm(jnp.dot(gL.T, L))
+from .utils import check_termination, inequality_to_bounds
 
 
 def fmin_lag_stel(
     fun,
-    constraint,
     x0,
-    bounds,
+    grad,
+    hess="bfgs",
+    bounds=(-jnp.inf, jnp.inf),
+    constraint=None,
     args=(),
     x_scale=1,
     ftol=1e-6,
@@ -31,102 +23,242 @@ def fmin_lag_stel(
     ctol=1e-6,
     verbose=1,
     maxiter=None,
-    tr_method="svd",
-    callback=None,
     options={},
 ):
+    """Minimize a function with constraints using an augmented Langrangian method.
+
+    Parameters
+    ----------
+    fun : callable
+        objective to be minimized. Should have a signature like fun(x,*args)-> float
+    x0 : array-like
+        initial guess
+    grad : callable
+        function to compute gradient, df/dx. Should take the same arguments as fun
+    hess : callable or ``'bfgs'``, optional:
+        function to compute Hessian matrix of fun, or ``'bfgs'`` in which case the BFGS
+        method will be used to approximate the Hessian.
+    bounds : tuple of array-like
+        Lower and upper bounds on independent variables. Defaults to no bounds.
+        Each array must match the size of x0 or be a scalar, in the latter case a
+        bound will be the same for all variables. Use np.inf with an appropriate sign
+        to disable bounds on all or some variables.
+    constraint : scipy.optimize.NonlinearConstraint
+        constraint to be satisfied
+    args : tuple
+        additional arguments passed to fun, grad, and hess
+    x_scale : array_like or ``'hess'``, optional
+        Characteristic scale of each variable. Setting ``x_scale`` is equivalent
+        to reformulating the problem in scaled variables ``xs = x / x_scale``.
+        An alternative view is that the size of a trust region along jth
+        dimension is proportional to ``x_scale[j]``. Improved convergence may
+        be achieved by setting ``x_scale`` such that a step of a given size
+        along any of the scaled variables has a similar effect on the cost
+        function. If set to ``'hess'``, the scale is iteratively updated using the
+        inverse norms of the columns of the Hessian matrix.
+    ftol : float or None, optional
+        Tolerance for termination by the change of the cost function. Default
+        is 1e-8. The optimization process is stopped when ``dF < ftol * F``,
+        and there was an adequate agreement between a local quadratic model and
+        the true model in the last step. If None, the termination by this
+        condition is disabled.
+    xtol : float or None, optional
+        Tolerance for termination by the change of the independent variables.
+        Default is 1e-8. Optimization is stopped when
+        ``norm(dx) < xtol * (xtol + norm(x))``. If None, the termination by
+        this condition is disabled.
+    gtol : float or None, optional
+        Absolute tolerance for termination by the norm of the gradient. Default is 1e-8.
+        Optimizer teriminates when ``norm(g) < gtol``, where
+        If None, the termination by this condition is disabled.
+    verbose : {0, 1, 2}, optional
+        * 0 (default) : work silently.
+        * 1 : display a termination report.
+        * 2 : display progress during iterations
+    maxiter : int, optional
+        maximum number of iterations. Defaults to size(x)*100
+    options : dict, optional
+        dictionary of optional keyword arguments to override default solver settings.
+        See the code for more details.
+
+    Returns
+    -------
+    res : OptimizeResult
+        The optimization result represented as a ``OptimizeResult`` object.
+        Important attributes are: ``x`` the solution array, ``success`` a
+        Boolean flag indicating if the optimizer exited successfully.
+
+    """
+    if constraint is None:
+        # just do regular unconstrained stuff
+        return fmintr(
+            fun,
+            x0,
+            grad,
+            hess,
+            bounds,
+            args,
+            x_scale=x_scale,
+            ftol=ftol,
+            xtol=xtol,
+            gtol=gtol,
+            verbose=verbose,
+            maxiter=maxiter,
+            options=options,
+        )
+    else:
+        (
+            z0,
+            fun_wrapped,
+            grad_wrapped,
+            hess_wrapped,
+            constraint_wrapped,
+            zbounds,
+            z2xs,
+        ) = inequality_to_bounds(
+            x0,
+            fun,
+            grad,
+            hess,
+            constraint,
+            bounds,
+        )
+
+    def lagfun(z, lmbda, mu, *args):
+        c = constraint_wrapped.fun(z, *args)
+        return fun_wrapped(z, *args) - jnp.dot(lmbda, c) + mu / 2 * jnp.dot(c, c)
+
+    def laggrad(x, lmbda, mu, *args):
+        c = constraint_wrapped.fun(x, *args)
+        J = constraint_wrapped.jac(x, *args)
+        return grad_wrapped(x, *args) - jnp.dot(lmbda, J) + mu * jnp.dot(c, J)
+
+    if callable(hess_wrapped):
+        if callable(constraint_wrapped.hess):
+
+            def laghess(x, lmbda, mu, *args):
+                c = constraint_wrapped.fun(x, *args)
+                Hf = hess_wrapped(x, *args)
+                Jc = constraint_wrapped.jac(x, *args)
+                Hc1 = constraint_wrapped.hess(x, lmbda)
+                Hc2 = constraint_wrapped.hess(x, c)
+                # ignoring higher order derivatives of constraints for now
+                return Hf - Hc1 + mu * (Hc2 + jnp.dot(Jc.T, Jc))
+
+        else:
+
+            def laghess(x, lmbda, mu, *args):
+                H = hess_wrapped(x, *args)
+                J = constraint_wrapped.jac(x, *args)
+                # ignoring higher order derivatives of constraints for now
+                return H + mu * jnp.dot(J.T, J)
+
+    else:
+        laghess = "bfgs"
+
     nfev = 0
     ngev = 0
     nhev = 0
     iteration = 0
 
-    x = x0.copy()
-    f = fun(x, *args)
-    c = constraint.fun(x)
+    z = z0.copy()
+    f = fun_wrapped(z, *args)
+    c = constraint_wrapped.fun(z)
     nfev += 1
 
-    mu = options.pop("mu", 1)
-    lmbda = options.pop("lmbda", jnp.zeros(len(c)))
-
-    constr = np.array([constraint])
-    L = AugLagrangian(fun, constr)
-    gradL = Derivative(L.compute, 0, "fwd")
-    hessL = Derivative(L.compute, argnum=0, mode="hess")
+    if maxiter is None:
+        maxiter = z.size
+    mu = options.pop("initial_penalty_parameter", 10)
+    lmbda = options.pop("initial_multipliers", jnp.zeros(len(c)))
+    maxiter_inner = options.pop("maxiter_inner", 20)
+    max_nfev = options.pop("max_nfev", 5 * maxiter * maxiter_inner + 1)
+    max_ngev = options.pop("max_ngev", maxiter * maxiter_inner + 1)
+    max_nhev = options.pop("max_nhev", maxiter * maxiter_inner + 1)
 
     gtolk = 1 / (10 * np.linalg.norm(mu))
     ctolk = 1 / (np.linalg.norm(mu) ** (0.1))
-    xold = x
+    zold = z
+    fold = f
+    allx = []
 
     while iteration < maxiter:
-        xk = fmintr(
-            L.compute,
-            x,
-            grad=gradL,
-            hess=hessL,
-            args=(
-                lmbda,
-                mu,
-            ),
+        result = fmintr(
+            lagfun,
+            z,
+            grad=laggrad,
+            hess=laghess,
+            bounds=zbounds,
+            args=(lmbda, mu) + args,
+            x_scale=x_scale,
+            ftol=ftol,
+            xtol=xtol,
             gtol=gtolk,
-            maxiter=20,
-            verbose=2,
+            maxiter=maxiter_inner,
+            verbose=verbose - 1,
         )
-        # xk = minimize(
-        #     L.compute,
-        #     x,
-        #     args=(lmbda, mu),
-        #     method="trust-exact",
-        #     jac=gradL,
-        #     hess=hessL,
-        #     options={"maxiter": 20},
-        #     verbose=2
-        # )
-        x = xk["x"]
-        f = fun(x)
-        cv = L.compute_constraints(x)
-        c = np.linalg.norm(cv)
-
-        if np.linalg.norm(xold - x) < xtol:
-            print("xtol satisfied\n")
-            break
+        allx += result["allx"]
+        nfev += result["nfev"]
+        ngev += result["ngev"]
+        nhev += result["nhev"]
+        z = result["x"]
+        f = fun_wrapped(z, *args)
+        c = constraint_wrapped.fun(z, *args)
+        nfev += 1
+        constr_violation = np.linalg.norm(c, ord=np.inf)
+        step_norm = np.linalg.norm(zold - z)
+        z_norm = np.linalg.norm(z)
+        g_norm = result["optimality"]
+        actual_reduction = fold - f
 
-        # if (np.linalg.norm(f) - np.linalg.norm(fold)) / np.linalg.norm(fold) > 0.1:
-        #    mu = mu / 2
-        #    print("Decreasing mu. mu is now " + str(np.mean(mu)))
+        if verbose:
+            print(f"Constraint violation: {constr_violation:.4e}")
 
-        if c < ctolk:
-            if c < ctol and gradL(x, lmbda, mu) < gtol:
-                break
+        success, message = check_termination(
+            actual_reduction,
+            f,
+            step_norm,
+            z_norm,
+            g_norm,
+            1,
+            ftol,
+            xtol,
+            gtol,
+            iteration,
+            maxiter,
+            nfev,
+            max_nfev,
+            ngev,
+            max_ngev,
+            nhev,
+            max_nhev,
+            constr_violation=constr_violation,
+            ctol=ctol,
+        )
+        if success is not None:
+            break
 
-            else:
-                print("Updating lambda")
-                lmbda = lmbda - mu * cv
-                ctolk = ctolk / (mu ** (0.9))
-                gtolk = gtolk / (mu)
+        if constr_violation < ctolk:
+            if verbose > 1:
+                print("Updating multipliers")
+            lmbda = lmbda - mu * c
+            ctolk = ctolk / (mu ** (0.9))
+            gtolk = gtolk / (mu)
         else:
+            if verbose > 1:
+                print("Updating penalty parameter")
             mu = 2 * mu
-            ctolk = c / (mu ** (0.1))
+            ctolk = constr_violation / (mu ** (0.1))
             gtolk = gtolk / mu
 
         iteration = iteration + 1
-        xold = x
+        zold = z
         fold = f
 
-    g = gradL(x, lmbda, mu)
-    f = fun(x)
-    success = True
-    message = "successful"
-    result = OptimizeResult(
-        x=x,
-        success=success,
-        fun=f,
-        grad=g,
-        optimality=jnp.linalg.norm(g),
-        nfev=nfev,
-        ngev=ngev,
-        nhev=nhev,
-        nit=iteration,
-        message=message,
-    )
-    result["allx"] = [x]
+    result["message"] = message
+    result["success"] = success
+    result["x"] = z2xs(result["x"])[0]
+    result["fun"] = f
+    result["y"] = lmbda
+    result["allx"] = [z2xs(x)[0] for x in allx]
+    result["nit"] = iteration
     return result
diff --git a/desc/optimize/aug_lagrangian_ls_stel.py b/desc/optimize/aug_lagrangian_ls_stel.py
index 0140720f85..6221d3ff0a 100644
--- a/desc/optimize/aug_lagrangian_ls_stel.py
+++ b/desc/optimize/aug_lagrangian_ls_stel.py
@@ -1,28 +1,19 @@
-#!/usr/bin/env python3
-"""
-Created on Wed Jul 13 10:39:48 2022
-
-@author: pk123
-"""
+"""Augmented Langrangian for vector valued objectives."""
 
 import numpy as np
-from scipy.optimize import OptimizeResult
 
 from desc.backend import jnp
-from desc.derivatives import Derivative
-from desc.objectives._auglagrangian import AugLagrangianLS
 from desc.optimize.least_squares import lsqtr
 
-
-def conv_test(x, L, gL):
-    return np.linalg.norm(jnp.dot(gL.T, L))
+from .utils import check_termination, inequality_to_bounds
 
 
 def fmin_lag_ls_stel(
     fun,
-    constraint,
     x0,
-    bounds,
+    jac,
+    bounds=(-jnp.inf, jnp.inf),
+    constraint=None,
     args=(),
     x_scale=1,
     ftol=1e-6,
@@ -31,97 +22,225 @@ def fmin_lag_ls_stel(
     ctol=1e-6,
     verbose=1,
     maxiter=None,
-    tr_method="svd",
-    callback=None,
     options={},
 ):
+    """Minimize a function with constraints using an augmented Langrangian method.
+
+    The objective function is assumed to be vector valued, and is minimized in the least
+    squares sense.
+
+    Parameters
+    ----------
+    fun : callable
+        objective to be minimized. Should have a signature like fun(x,*args)-> 1d array
+    x0 : array-like
+        initial guess
+    jac : callable:
+        function to compute Jacobian matrix of fun
+    bounds : tuple of array-like
+        Lower and upper bounds on independent variables. Defaults to no bounds.
+        Each array must match the size of x0 or be a scalar, in the latter case a
+        bound will be the same for all variables. Use np.inf with an appropriate sign
+        to disable bounds on all or some variables.
+    constraint : scipy.optimize.NonlinearConstraint
+        constraint to be satisfied
+    args : tuple
+        additional arguments passed to fun, grad, and hess
+    x_scale : array_like or ``'hess'``, optional
+        Characteristic scale of each variable. Setting ``x_scale`` is equivalent
+        to reformulating the problem in scaled variables ``xs = x / x_scale``.
+        An alternative view is that the size of a trust region along jth
+        dimension is proportional to ``x_scale[j]``. Improved convergence may
+        be achieved by setting ``x_scale`` such that a step of a given size
+        along any of the scaled variables has a similar effect on the cost
+        function. If set to ``'hess'``, the scale is iteratively updated using the
+        inverse norms of the columns of the Hessian matrix.
+    ftol : float or None, optional
+        Tolerance for termination by the change of the cost function. Default
+        is 1e-8. The optimization process is stopped when ``dF < ftol * F``,
+        and there was an adequate agreement between a local quadratic model and
+        the true model in the last step. If None, the termination by this
+        condition is disabled.
+    xtol : float or None, optional
+        Tolerance for termination by the change of the independent variables.
+        Default is 1e-8. Optimization is stopped when
+        ``norm(dx) < xtol * (xtol + norm(x))``. If None, the termination by
+        this condition is disabled.
+    gtol : float or None, optional
+        Absolute tolerance for termination by the norm of the gradient. Default is 1e-8.
+        Optimizer teriminates when ``norm(g) < gtol``, where
+        If None, the termination by this condition is disabled.
+    verbose : {0, 1, 2}, optional
+        * 0 (default) : work silently.
+        * 1 : display a termination report.
+        * 2 : display progress during iterations
+    maxiter : int, optional
+        maximum number of iterations. Defaults to size(x)*100
+    options : dict, optional
+        dictionary of optional keyword arguments to override default solver settings.
+        See the code for more details.
+
+    Returns
+    -------
+    res : OptimizeResult
+        The optimization result represented as a ``OptimizeResult`` object.
+        Important attributes are: ``x`` the solution array, ``success`` a
+        Boolean flag indicating if the optimizer exited successfully.
+
+    """
+    if constraint is None:
+        # just do regular unconstrained stuff
+        return lsqtr(
+            fun,
+            x0,
+            jac,
+            bounds,
+            args,
+            x_scale=x_scale,
+            ftol=ftol,
+            xtol=xtol,
+            gtol=gtol,
+            verbose=verbose,
+            maxiter=maxiter,
+            options=options,
+        )
+    else:
+        (
+            z0,
+            fun_wrapped,
+            jac_wrapped,
+            _,
+            constraint_wrapped,
+            zbounds,
+            z2xs,
+        ) = inequality_to_bounds(
+            x0,
+            fun,
+            jac,
+            None,
+            constraint,
+            bounds,
+        )
+
+    def lagfun(z, lmbda, mu, *args):
+        f = fun_wrapped(z, *args)
+        c = constraint_wrapped.fun(z, *args)
+        c = 1 / (np.sqrt(2 * mu)) * (-lmbda + mu * c)
+        return jnp.concatenate((f, c))
+
+    def lagjac(z, lmbda, mu, *args):
+        Jf = jac_wrapped(z, *args)
+        Jc = constraint_wrapped.jac(z, *args)
+        Jc = 1 / (np.sqrt(2 * mu)) * (mu * Jc)
+        return jnp.vstack((Jf, Jc))
+
     nfev = 0
-    ngev = 0
-    nhev = 0
+    njev = 0
     iteration = 0
 
-    x = x0.copy()
-    f = fun(x, *args)
-    c = constraint.fun(x)
+    z = z0.copy()
+    f = fun_wrapped(z, *args)
+    cost = 1 / 2 * jnp.dot(f, f)
+    c = constraint_wrapped.fun(z)
     nfev += 1
 
-    mu = options.pop("mu", 10 * jnp.ones(len(c)))
-    lmbda = options.pop("lmbda", 0.01 * jnp.ones(len(c)))
-
-    constr = np.array([constraint])
-    L = AugLagrangianLS(fun, constr)
-    gradL = Derivative(L.compute, 0, "fwd")
+    if maxiter is None:
+        maxiter = z.size
+    mu = options.pop("initial_penalty_parameter", 10)
+    lmbda = options.pop("initial_multipliers", 0.01 * jnp.ones(len(c)))
+    maxiter_inner = options.pop("maxiter_inner", 20)
+    max_nfev = options.pop("max_nfev", 5 * maxiter * maxiter_inner + 1)
+    max_njev = options.pop("max_njev", maxiter * maxiter_inner + 1)
 
     gtolk = 1 / (10 * np.linalg.norm(mu))
     ctolk = 1 / (np.linalg.norm(mu) ** (0.1))
-    xold = x
-    fold = f
+    zold = z
+    cost_old = cost
+    allx = []
 
     while iteration < maxiter:
-        xk = lsqtr(
-            L.compute,
-            x,
-            gradL,
+        result = lsqtr(
+            lagfun,
+            z,
+            lagjac,
+            bounds=zbounds,
             args=(
                 lmbda,
                 mu,
-            ),
-            bounds=bounds,
+            )
+            + args,
+            x_scale=x_scale,
+            ftol=ftol,
+            xtol=xtol,
             gtol=gtolk,
-            maxiter=10,
-            verbose=2,
+            maxiter=maxiter_inner,
+            verbose=verbose,
         )
-
-        x = xk["x"]
-        f = fun(x)
-        cv = L.compute_constraints(x)
-        c = np.max(cv)
-
-        if np.linalg.norm(xold - x) < xtol:
-            print("xtol satisfied\n")
+        # update outer counters
+        allx += result["allx"]
+        nfev += result["nfev"]
+        njev += result["njev"]
+        z = result["x"]
+        f = fun_wrapped(z, *args)
+        cost = 1 / 2 * jnp.dot(f, f)
+        c = constraint_wrapped.fun(z, *args)
+        nfev += 1
+        constr_violation = np.linalg.norm(c, ord=np.inf)
+        step_norm = np.linalg.norm(zold - z)
+        z_norm = np.linalg.norm(z)
+        g_norm = result["optimality"]
+        actual_reduction = cost_old - cost
+
+        if verbose:
+            print(f"Constraint violation: {constr_violation:.4e}")
+
+        # check if we can stop the outer loop
+        success, message = check_termination(
+            actual_reduction,
+            cost,
+            step_norm,
+            z_norm,
+            g_norm,
+            1,
+            ftol,
+            xtol,
+            gtol,
+            iteration,
+            maxiter,
+            nfev,
+            max_nfev,
+            njev,
+            max_njev,
+            0,
+            np.inf,
+            constr_violation=constr_violation,
+            ctol=ctol,
+        )
+        if success is not None:
             break
 
-        if (np.linalg.norm(f) - np.linalg.norm(fold)) / np.linalg.norm(fold) > 0.1:
-            mu = mu / 2
-            print("Decreasing mu. mu is now " + str(np.mean(mu)))
-
-        elif c < ctolk:
-            if (
-                c < ctol
-                and conv_test(x, L.compute(x, lmbda, mu), gradL(x, lmbda, mu)) < gtol
-            ):
-                break
-
-            else:
-                print("Updating lambda")
-                #                lmbda = lmbda - mu * cv
-                lmbda = lmbda - mu * cv
-                ctolk = ctolk / (np.max(mu) ** (0.9))
-                gtolk = gtolk / (np.max(mu))
+        # otherwise update lagrangian stuff and continue
+        if constr_violation < ctolk:
+            if verbose > 1:
+                print("Updating multipliers")
+            lmbda = lmbda - mu * c
+            ctolk = ctolk / (np.max(mu) ** (0.9))
+            gtolk = gtolk / (np.max(mu))
         else:
+            if verbose > 1:
+                print("Updating penalty parameter")
             mu = 5.0 * mu
             ctolk = ctolk / (np.max(mu) ** (0.1))
             gtolk = gtolk / np.max(mu)
 
         iteration = iteration + 1
-        xold = x
-        fold = f
-
-    g = gradL(x, lmbda, mu)
-    f = fun(x)
-    success = True
-    message = "successful"
-    result = OptimizeResult(
-        x=x,
-        success=success,
-        fun=f,
-        grad=g,
-        optimality=jnp.linalg.norm(g),
-        nfev=nfev,
-        ngev=ngev,
-        nhev=nhev,
-        nit=iteration,
-        message=message,
-    )
-    # result["allx"] = [recover(x)]
+        zold = z
+        cost_old = cost
+
+    # we can use the last output result object, but update where needed
+    result["message"] = message
+    result["success"] = success
+    result["x"] = z2xs(result["x"])[0]
+    result["allx"] = [z2xs(x)[0] for x in allx]
+    result["nit"] = iteration
     return result
diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py
index 722e961c34..c25d33e0c5 100644
--- a/tests/test_optimizer.py
+++ b/tests/test_optimizer.py
@@ -3,7 +3,14 @@
 import numpy as np
 import pytest
 from numpy.random import default_rng
-from scipy.optimize import BFGS, least_squares, rosen, rosen_der
+from scipy.optimize import (
+    BFGS,
+    NonlinearConstraint,
+    least_squares,
+    minimize,
+    rosen,
+    rosen_der,
+)
 
 import desc.examples
 from desc.backend import jit, jnp
@@ -29,6 +36,8 @@
     LinearConstraintProjection,
     Optimizer,
     ProximalProjection,
+    fmin_lag_ls_stel,
+    fmin_lag_stel,
     fmintr,
     lsqtr,
     optimizers,
@@ -725,3 +734,96 @@ def hess(x):
 
     np.testing.assert_allclose(out1["x"], out3["x"], rtol=1e-06, atol=1e-06)
     np.testing.assert_allclose(out2["x"], out3["x"], rtol=1e-06, atol=1e-06)
+
+
+@pytest.mark.unit
+def test_auglag():
+    """Test that our augmented lagrangian works as well as scipy for convex problems."""
+    rng = default_rng(12)
+
+    n = 15  # number of variables
+    m = 10  # number of constraints
+    p = 7  # number of primals
+    Qs = []
+    gs = []
+    for i in range(m + p):
+        A = 0.5 - rng.random((n, n))
+        Qs.append(A.T @ A)
+        gs.append(0.5 - rng.random(n))
+
+    @jit
+    def vecfun(x):
+        y0 = x @ Qs[0] + gs[0]
+        y1 = x @ Qs[1] + gs[1]
+        y = jnp.concatenate([y0, y1**2])
+        return y
+
+    @jit
+    def fun(x):
+        y = vecfun(x)
+        return 1 / 2 * jnp.dot(y, y)
+
+    grad = jit(Derivative(fun, mode="grad"))
+    hess = jit(Derivative(fun, mode="hess"))
+    jac = jit(Derivative(vecfun, mode="fwd"))
+
+    @jit
+    def con(x):
+        cs = []
+        for i in range(m):
+            cs.append(x @ Qs[p + i] @ x + gs[p + i] @ x)
+        return jnp.array(cs)
+
+    conjac = jit(Derivative(con, mode="fwd"))
+    conhess = jit(Derivative(lambda x, v: v @ con(x), mode="hess"))
+
+    constraint = NonlinearConstraint(con, -np.inf, 0, conjac, conhess)
+    x0 = rng.random(n)
+
+    out1 = fmin_lag_stel(
+        fun,
+        x0,
+        grad,
+        hess=hess,
+        bounds=(-jnp.inf, jnp.inf),
+        constraint=constraint,
+        args=(),
+        x_scale=1,
+        ftol=0,
+        xtol=1e-6,
+        gtol=1e-6,
+        ctol=1e-6,
+        verbose=3,
+        maxiter=None,
+        options={},
+    )
+
+    out2 = fmin_lag_ls_stel(
+        vecfun,
+        x0,
+        jac,
+        bounds=(-jnp.inf, jnp.inf),
+        constraint=constraint,
+        args=(),
+        x_scale=1,
+        ftol=0,
+        xtol=1e-6,
+        gtol=1e-6,
+        ctol=1e-6,
+        verbose=3,
+        maxiter=None,
+        options={},
+    )
+
+    out3 = minimize(
+        fun,
+        x0,
+        jac=grad,
+        hess=hess,
+        constraints=constraint,
+        method="trust-constr",
+        options={"verbose": 3, "maxiter": 1000},
+    )
+
+    np.testing.assert_allclose(out1["x"], out3["x"], rtol=1e-4, atol=1e-4)
+    np.testing.assert_allclose(out2["x"], out3["x"], rtol=1e-4, atol=1e-4)

From 85836b7af408122261b6aef137d8c2194ccba568 Mon Sep 17 00:00:00 2001
From: Rory Conlin <roryconlin@gmail.com>
Date: Wed, 24 May 2023 22:09:56 -0400
Subject: [PATCH 20/41] Remove unused augmented lagrangian objectives

---
 desc/objectives/__init__.py       |  1 -
 desc/objectives/_auglagrangian.py | 82 -------------------------------
 2 files changed, 83 deletions(-)
 delete mode 100644 desc/objectives/_auglagrangian.py

diff --git a/desc/objectives/__init__.py b/desc/objectives/__init__.py
index b1269383b8..9bb9cffa81 100644
--- a/desc/objectives/__init__.py
+++ b/desc/objectives/__init__.py
@@ -1,6 +1,5 @@
 """Classes defining objectives for equilibrium and optimization."""
 
-from ._auglagrangian import AugLagrangian, AugLagrangianLS
 from ._bootstrap import BootstrapRedlConsistency
 from ._equilibrium import (
     CurrentDensity,
diff --git a/desc/objectives/_auglagrangian.py b/desc/objectives/_auglagrangian.py
deleted file mode 100644
index 4b14c44efc..0000000000
--- a/desc/objectives/_auglagrangian.py
+++ /dev/null
@@ -1,82 +0,0 @@
-import numpy as np
-
-from desc.backend import jnp
-
-from .objective_funs import ObjectiveFunction
-
-
-class AugLagrangianLS(ObjectiveFunction):
-    """Augmented Lagrangian function for least-squares optimization
-
-    Parameters
-    ----------
-    func : objective function
-    constr : constraint functions
-    """
-
-    def __init__(self, func, constr):
-        self.func = func
-        self.constr = constr
-
-    def scalar(self):
-        return False
-
-    def name(self):
-        return "least squares augmented lagrangian"
-
-    def derivatives(self):
-        return
-
-    def compute(self, x, lmbda, mu):
-        L = self.func(x)
-        c = self.compute_constraints(x)
-        #        c = -lmbda * c + mu / 2 * c**2
-        c = 1 / (np.sqrt(2 * mu)) * (-lmbda + mu * c)
-        L = jnp.concatenate((L, c), axis=None)
-        return L
-
-    def compute_scalar(self, x, lmbda, mu):
-        return np.linalg.norm(self.compute(x, lmbda, mu))
-
-    def callback(self, x, lmbda, mu):
-        L = self.compute(x, lmbda, mu)
-        print("The Least Squares Lagrangian is " + str(L))
-
-    def compute_constraints(self, x):
-        c = jnp.array([])
-        for i in range(len(self.constr)):
-            c = jnp.concatenate((c, self.constr[i].fun(x)), axis=None)
-        return c
-
-
-class AugLagrangian(ObjectiveFunction):
-    def __init__(self, func, constr):
-        self.func = func
-        self.constr = constr
-
-    def scalar(self):
-        return True
-
-    def name(self):
-        return "augmented lagrangian"
-
-    def derivatives(self):
-        return
-
-    def compute(self, x, lmbda, mu):
-        L = self.func(x)
-        c = self.compute_constraints(x)
-        return L - jnp.dot(lmbda, c) + mu / 2 * jnp.dot(c, c)
-
-    def compute_scalar(self, x, lmbda, mu):
-        return self.compute(x, lmbda, mu)
-
-    def callback(self, x, lmbda, mu):
-        L = self.compute(x, lmbda, mu)
-        print("The Lagrangian is " + str(L))
-
-    def compute_constraints(self, x):
-        c = jnp.array([])
-        for i in range(len(self.constr)):
-            c = jnp.concatenate((c, self.constr[i].fun(x)), axis=None)
-        return c

From aaf5e9194ea3624b910c31e025e33ddcbe922c87 Mon Sep 17 00:00:00 2001
From: Rory Conlin <roryconlin@gmail.com>
Date: Wed, 24 May 2023 22:50:40 -0400
Subject: [PATCH 21/41] Clean up printing from augmented lagrangians

---
 desc/optimize/aug_lagrangian.py         | 83 +++++++++++++++++++-----
 desc/optimize/aug_lagrangian_ls_stel.py | 86 ++++++++++++++++++++-----
 2 files changed, 137 insertions(+), 32 deletions(-)

diff --git a/desc/optimize/aug_lagrangian.py b/desc/optimize/aug_lagrangian.py
index 53cea62b55..f9e3205761 100644
--- a/desc/optimize/aug_lagrangian.py
+++ b/desc/optimize/aug_lagrangian.py
@@ -1,14 +1,21 @@
 """Augmented Langrangian for scalar valued objectives."""
 
 import numpy as np
+from scipy.optimize import OptimizeResult
 
 from desc.backend import jnp
 from desc.optimize.fmin_scalar import fmintr
 
-from .utils import check_termination, inequality_to_bounds
+from .bound_utils import find_active_constraints
+from .utils import (
+    check_termination,
+    inequality_to_bounds,
+    print_header_nonlinear,
+    print_iteration_nonlinear,
+)
 
 
-def fmin_lag_stel(
+def fmin_lag_stel(  # noqa: C901 - FIXME: simplify this
     fun,
     x0,
     grad,
@@ -163,8 +170,10 @@ def laghess(x, lmbda, mu, *args):
 
     z = z0.copy()
     f = fun_wrapped(z, *args)
+    g = grad_wrapped(z, *args)
     c = constraint_wrapped.fun(z)
     nfev += 1
+    ngev += 1
 
     if maxiter is None:
         maxiter = z.size
@@ -181,6 +190,19 @@ def laghess(x, lmbda, mu, *args):
     fold = f
     allx = []
 
+    success = None
+    message = None
+    step_norm = jnp.inf
+    actual_reduction = jnp.inf
+    g_norm = np.linalg.norm(g, ord=np.inf)
+    constr_violation = np.linalg.norm(c, ord=np.inf)
+
+    if verbose > 1:
+        print_header_nonlinear(constrained=True)
+        print_iteration_nonlinear(
+            iteration, nfev, f, actual_reduction, step_norm, g_norm, constr_violation
+        )
+
     while iteration < maxiter:
         result = fmintr(
             lagfun,
@@ -194,7 +216,7 @@ def laghess(x, lmbda, mu, *args):
             xtol=xtol,
             gtol=gtolk,
             maxiter=maxiter_inner,
-            verbose=verbose - 1,
+            verbose=0,
         )
         allx += result["allx"]
         nfev += result["nfev"]
@@ -209,9 +231,18 @@ def laghess(x, lmbda, mu, *args):
         z_norm = np.linalg.norm(z)
         g_norm = result["optimality"]
         actual_reduction = fold - f
+        iteration = iteration + 1
 
-        if verbose:
-            print(f"Constraint violation: {constr_violation:.4e}")
+        if verbose > 1:
+            print_iteration_nonlinear(
+                iteration,
+                nfev,
+                f,
+                actual_reduction,
+                step_norm,
+                g_norm,
+                constr_violation,
+            )
 
         success, message = check_termination(
             actual_reduction,
@@ -238,27 +269,47 @@ def laghess(x, lmbda, mu, *args):
             break
 
         if constr_violation < ctolk:
-            if verbose > 1:
-                print("Updating multipliers")
             lmbda = lmbda - mu * c
             ctolk = ctolk / (mu ** (0.9))
             gtolk = gtolk / (mu)
         else:
-            if verbose > 1:
-                print("Updating penalty parameter")
             mu = 2 * mu
             ctolk = constr_violation / (mu ** (0.1))
             gtolk = gtolk / mu
 
-        iteration = iteration + 1
         zold = z
         fold = f
 
-    result["message"] = message
-    result["success"] = success
-    result["x"] = z2xs(result["x"])[0]
-    result["fun"] = f
-    result["y"] = lmbda
+    x, s = z2xs(z)
+    active_mask = find_active_constraints(z, zbounds[0], zbounds[1], rtol=xtol)
+    result = OptimizeResult(
+        x=x,
+        y=lmbda,
+        success=success,
+        fun=f,
+        grad=g,
+        optimality=g_norm,
+        nfev=nfev,
+        ngev=ngev,
+        nhev=nhev,
+        nit=iteration,
+        message=message,
+        active_mask=active_mask,
+        constr_violation=constr_violation,
+    )
+    if verbose > 0:
+        if result["success"]:
+            print(result["message"])
+        else:
+            print("Warning: " + result["message"])
+        print(f"""         Current function value: {result["fun"]:.3e}""")
+        print(f"""         Constraint violation: {result['constr_violation']:.3e}""")
+        print(f"""         Total delta_x: {jnp.linalg.norm(x0 - result["x"]):.3e}""")
+        print(f"""         Iterations: {result["nit"]:d}""")
+        print(f"""         Function evaluations: {result["nfev"]:d}""")
+        print(f"""         Gradient evaluations: {result["ngev"]:d}""")
+        print(f"""         Hessian evaluations: {result["nhev"]:d}""")
+
     result["allx"] = [z2xs(x)[0] for x in allx]
-    result["nit"] = iteration
+
     return result
diff --git a/desc/optimize/aug_lagrangian_ls_stel.py b/desc/optimize/aug_lagrangian_ls_stel.py
index 6221d3ff0a..7805f7513c 100644
--- a/desc/optimize/aug_lagrangian_ls_stel.py
+++ b/desc/optimize/aug_lagrangian_ls_stel.py
@@ -1,14 +1,21 @@
 """Augmented Langrangian for vector valued objectives."""
 
 import numpy as np
+from scipy.optimize import OptimizeResult
 
 from desc.backend import jnp
 from desc.optimize.least_squares import lsqtr
 
-from .utils import check_termination, inequality_to_bounds
+from .bound_utils import find_active_constraints
+from .utils import (
+    check_termination,
+    inequality_to_bounds,
+    print_header_nonlinear,
+    print_iteration_nonlinear,
+)
 
 
-def fmin_lag_ls_stel(
+def fmin_lag_ls_stel(  # noqa: C901 - FIXME: simplify this
     fun,
     x0,
     jac,
@@ -140,9 +147,12 @@ def lagjac(z, lmbda, mu, *args):
 
     z = z0.copy()
     f = fun_wrapped(z, *args)
+    J = jac_wrapped(z, *args)
     cost = 1 / 2 * jnp.dot(f, f)
+    g = jnp.dot(f, J)
     c = constraint_wrapped.fun(z)
     nfev += 1
+    njev += 1
 
     if maxiter is None:
         maxiter = z.size
@@ -158,6 +168,19 @@ def lagjac(z, lmbda, mu, *args):
     cost_old = cost
     allx = []
 
+    success = None
+    message = None
+    step_norm = jnp.inf
+    actual_reduction = jnp.inf
+    g_norm = np.linalg.norm(g, ord=np.inf)
+    constr_violation = np.linalg.norm(c, ord=np.inf)
+
+    if verbose > 1:
+        print_header_nonlinear(constrained=True)
+        print_iteration_nonlinear(
+            iteration, nfev, cost, actual_reduction, step_norm, g_norm, constr_violation
+        )
+
     while iteration < maxiter:
         result = lsqtr(
             lagfun,
@@ -174,7 +197,7 @@ def lagjac(z, lmbda, mu, *args):
             xtol=xtol,
             gtol=gtolk,
             maxiter=maxiter_inner,
-            verbose=verbose,
+            verbose=0,
         )
         # update outer counters
         allx += result["allx"]
@@ -183,16 +206,26 @@ def lagjac(z, lmbda, mu, *args):
         z = result["x"]
         f = fun_wrapped(z, *args)
         cost = 1 / 2 * jnp.dot(f, f)
-        c = constraint_wrapped.fun(z, *args)
+        c = constraint_wrapped.fun(z)
         nfev += 1
+        njev += 1
         constr_violation = np.linalg.norm(c, ord=np.inf)
         step_norm = np.linalg.norm(zold - z)
         z_norm = np.linalg.norm(z)
         g_norm = result["optimality"]
         actual_reduction = cost_old - cost
+        iteration = iteration + 1
 
-        if verbose:
-            print(f"Constraint violation: {constr_violation:.4e}")
+        if verbose > 1:
+            print_iteration_nonlinear(
+                iteration,
+                nfev,
+                cost,
+                actual_reduction,
+                step_norm,
+                g_norm,
+                constr_violation,
+            )
 
         # check if we can stop the outer loop
         success, message = check_termination(
@@ -221,26 +254,47 @@ def lagjac(z, lmbda, mu, *args):
 
         # otherwise update lagrangian stuff and continue
         if constr_violation < ctolk:
-            if verbose > 1:
-                print("Updating multipliers")
             lmbda = lmbda - mu * c
             ctolk = ctolk / (np.max(mu) ** (0.9))
             gtolk = gtolk / (np.max(mu))
         else:
-            if verbose > 1:
-                print("Updating penalty parameter")
             mu = 5.0 * mu
             ctolk = ctolk / (np.max(mu) ** (0.1))
             gtolk = gtolk / np.max(mu)
 
-        iteration = iteration + 1
         zold = z
         cost_old = cost
 
-    # we can use the last output result object, but update where needed
-    result["message"] = message
-    result["success"] = success
-    result["x"] = z2xs(result["x"])[0]
+    x, s = z2xs(z)
+    active_mask = find_active_constraints(z, zbounds[0], zbounds[1], rtol=xtol)
+    result = OptimizeResult(
+        x=x,
+        y=lmbda,
+        success=success,
+        cost=cost,
+        fun=f,
+        grad=g,
+        jac=J,
+        optimality=g_norm,
+        nfev=nfev,
+        njev=njev,
+        nit=iteration,
+        message=message,
+        active_mask=active_mask,
+        constr_violation=constr_violation,
+    )
+    if verbose > 0:
+        if result["success"]:
+            print(result["message"])
+        else:
+            print("Warning: " + result["message"])
+        print(f"""         Current function value: {result["cost"]:.3e}""")
+        print(f"""         Constraint violation: {result['constr_violation']:.3e}""")
+        print(f"""         Total delta_x: {jnp.linalg.norm(x0 - result["x"]):.3e}""")
+        print(f"""         Iterations: {result["nit"]:d}""")
+        print(f"""         Function evaluations: {result["nfev"]:d}""")
+        print(f"""         Jacobian evaluations: {result["njev"]:d}""")
+
     result["allx"] = [z2xs(x)[0] for x in allx]
-    result["nit"] = iteration
+
     return result

From e42221429fe692de9df699eb63285980bc29ec7c Mon Sep 17 00:00:00 2001
From: Rory Conlin <roryconlin@gmail.com>
Date: Fri, 26 May 2023 14:42:49 -0400
Subject: [PATCH 22/41] More minor cleanup

---
 desc/objectives/_equilibrium.py       | 1 -
 desc/objectives/_geometry.py          | 1 -
 desc/objectives/_stability.py         | 1 -
 desc/objectives/objective_funs.py     | 1 +
 desc/optimize/_constraint_wrappers.py | 1 -
 desc/optimize/optimizer.py            | 2 --
 6 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/desc/objectives/_equilibrium.py b/desc/objectives/_equilibrium.py
index 76b9489646..35bdd02ecb 100644
--- a/desc/objectives/_equilibrium.py
+++ b/desc/objectives/_equilibrium.py
@@ -79,7 +79,6 @@ def __init__(
         if target is None and bounds is None:
             target = 0
         self._grid = grid
-
         super().__init__(
             eq=eq,
             target=target,
diff --git a/desc/objectives/_geometry.py b/desc/objectives/_geometry.py
index e6a2b3c6d4..6cd32a52e9 100644
--- a/desc/objectives/_geometry.py
+++ b/desc/objectives/_geometry.py
@@ -64,7 +64,6 @@ def __init__(
         if target is None and bounds is None:
             target = 2
         self._grid = grid
-
         super().__init__(
             eq=eq,
             target=target,
diff --git a/desc/objectives/_stability.py b/desc/objectives/_stability.py
index 7cc30f6f28..8c3f798b8f 100644
--- a/desc/objectives/_stability.py
+++ b/desc/objectives/_stability.py
@@ -261,7 +261,6 @@ def __init__(
         if target is None and bounds is None:
             bounds = (0, np.inf)
         self._grid = grid
-
         super().__init__(
             eq=eq,
             target=target,
diff --git a/desc/objectives/objective_funs.py b/desc/objectives/objective_funs.py
index b42123a66c..2c6a34699c 100644
--- a/desc/objectives/objective_funs.py
+++ b/desc/objectives/objective_funs.py
@@ -695,6 +695,7 @@ def _set_derivatives(self):
             "grad": {},
             "hess": {},
         }
+
         for arg in arg_order:
             if arg in self.args:  # derivative wrt arg
                 self._derivatives["jac_unscaled"][arg] = Derivative(
diff --git a/desc/optimize/_constraint_wrappers.py b/desc/optimize/_constraint_wrappers.py
index 48d50bf2a2..6fb2e17a96 100644
--- a/desc/optimize/_constraint_wrappers.py
+++ b/desc/optimize/_constraint_wrappers.py
@@ -83,7 +83,6 @@ def build(self, eq, use_jit=None, verbose=1):
         """
         timer = Timer()
         timer.start("Linear constraint projection build")
-        self._eq = eq
 
         # we don't always build here because in ~all cases the user doesn't interact
         # with this directly, so if the user wants to manually rebuild they should
diff --git a/desc/optimize/optimizer.py b/desc/optimize/optimizer.py
index 8b48ed301c..9fd05eb60a 100644
--- a/desc/optimize/optimizer.py
+++ b/desc/optimize/optimizer.py
@@ -331,8 +331,6 @@ def _maybe_wrap_nonlinear_constraints(objective, nonlinear_constraint, method, o
                 f"No nonlinear constraints detected, ignoring wrapper method {wrapper}"
             )
         return objective, nonlinear_constraint
-    if wrapper is None and optimizers[method]["equality_constraints"]:
-        return objective, nonlinear_constraint
     if wrapper is None and not optimizers[method]["equality_constraints"]:
         warnings.warn(
             FutureWarning(

From 0ea34950b6368cec688101ffced7799f4b9a874a Mon Sep 17 00:00:00 2001
From: Rory Conlin <roryconlin@gmail.com>
Date: Fri, 26 May 2023 14:59:49 -0400
Subject: [PATCH 23/41] Fix wrappers for augmented lagrangians

---
 desc/optimize/_desc_wrappers.py | 120 ++++++++++++++------------------
 1 file changed, 52 insertions(+), 68 deletions(-)

diff --git a/desc/optimize/_desc_wrappers.py b/desc/optimize/_desc_wrappers.py
index ae07c364a4..cd661d8d72 100644
--- a/desc/optimize/_desc_wrappers.py
+++ b/desc/optimize/_desc_wrappers.py
@@ -1,4 +1,4 @@
-import numpy as np
+from scipy.optimize import NonlinearConstraint
 
 from desc.backend import jnp
 
@@ -8,24 +8,27 @@
 from .least_squares import lsqtr
 from .optimizer import register_optimizer
 from .stochastic import sgd
-from .utils import inequality_to_bounds
 
 
 @register_optimizer(
-    name="auglag",
-    description="Augmented Lagrangian approach to constrained optimization",
-    scalar=False,
+    name=["auglag", "auglag-bfgs"],
+    description=[
+        "Augmented Lagrangian method with trust region subproblem."
+        "Augmented Lagrangian method with trust region subproblem."
+        + "Uses BFGS to approximate hessian",
+    ],
+    scalar=True,
     equality_constraints=True,
     inequality_constraints=True,
     stochastic=False,
-    hessian=False,
+    hessian=[True, False],
+    GPU=True,
 )
 def _optimize_desc_aug_lagrangian(
     objective, constraint, x0, method, x_scale, verbose, stoptol, options=None
 ):
     """Wrapper for desc.optimize.fmin_lag_ls_stel.
 
-
     Parameters
     ----------
     objective : ObjectiveFunction
@@ -66,50 +69,42 @@ def _optimize_desc_aug_lagrangian(
 
     """
     options = {} if options is None else options
-    options.setdefault("maxiter", np.iinfo(np.int32).max)
-    options.setdefault("disp", False)
-    x_scale = 1 if x_scale == "auto" else x_scale
-    if isinstance(x_scale, str):
-        raise ValueError(f"Method {method} does not support x_scale type {x_scale}")
-    bounds = (-jnp.inf, jnp.inf)
+    hess = objective.hess if "bfgs" not in method else "bfgs"
+    if not isinstance(x_scale, str) and jnp.allclose(x_scale, 1):
+        options.setdefault("initial_trust_ratio", 1e-3)
+        options.setdefault("max_trust_radius", 1.0)
+    options["max_nfev"] = stoptol["max_nfev"]
+    options["max_ngev"] = stoptol["max_ngev"]
+    options["max_nhev"] = stoptol["max_nhev"]
+
     if constraint is not None:
-        (
-            z0,
-            fun_wrapped,
-            grad_wrapped,
-            hess_wrapped,
-            constraint_wrapped,
-            zbounds,
-            z2xs,
-        ) = inequality_to_bounds(
-            x0,
-            objective.compute_scalar,
-            objective.grad,
-            objective.hess,
-            constraint,
-            bounds,
+        lb, ub = constraint.bounds_scaled
+        constraint_wrapped = NonlinearConstraint(
+            constraint.compute_scaled,
+            lb,
+            ub,
+            constraint.jac_scaled,
         )
     else:
         constraint_wrapped = None
 
     result = fmin_lag_stel(
-        fun_wrapped,
-        constraint_wrapped,
-        x0=z0,
-        bounds=zbounds,
+        objective.compute_scalar,
+        x0=x0,
+        grad=objective.grad,
+        hess=hess,
+        bounds=(-jnp.inf, jnp.inf),
+        constraint=constraint_wrapped,
         args=(),
         x_scale=x_scale,
         ftol=stoptol["ftol"],
         xtol=stoptol["xtol"],
         gtol=stoptol["gtol"],
-        maxiter=stoptol["maxiter"],
+        ctol=stoptol["ctol"],
         verbose=verbose,
-        callback=None,
+        maxiter=stoptol["maxiter"],
         options=options,
     )
-    x, s = z2xs(result["x"])
-    result["x"] = x
-    result["allx"] = [x]
     return result
 
 
@@ -122,13 +117,13 @@ def _optimize_desc_aug_lagrangian(
     inequality_constraints=True,
     stochastic=False,
     hessian=False,
+    GPU=True,
 )
 def _optimize_desc_aug_lagrangian_least_squares(
     objective, constraint, x0, method, x_scale, verbose, stoptol, options=None
 ):
     """Wrapper for desc.optimize.fmin_lag_ls_stel.
 
-
     Parameters
     ----------
     objective : ObjectiveFunction
@@ -169,50 +164,39 @@ def _optimize_desc_aug_lagrangian_least_squares(
 
     """
     options = {} if options is None else options
-    options.setdefault("maxiter", np.iinfo(np.int32).max)
-    options.setdefault("disp", False)
-    x_scale = 1 if x_scale == "auto" else x_scale
-    if isinstance(x_scale, str):
-        raise ValueError(f"Method {method} does not support x_scale type {x_scale}")
-    bounds = (-jnp.inf, jnp.inf)
+    if not isinstance(x_scale, str) and jnp.allclose(x_scale, 1):
+        options.setdefault("initial_trust_radius", 1e-3)
+        options.setdefault("max_trust_radius", 1.0)
+    options["max_nfev"] = stoptol["max_nfev"]
+    options["max_njev"] = stoptol["max_njev"]
+
     if constraint is not None:
-        (
-            z0,
-            fun_wrapped,
-            grad_wrapped,
-            hess_wrapped,
-            constraint_wrapped,
-            zbounds,
-            z2xs,
-        ) = inequality_to_bounds(
-            x0,
-            objective.compute_scaled_error,
-            objective.grad,
-            objective.hess,
-            constraint,
-            bounds,
+        lb, ub = constraint.bounds_scaled
+        constraint_wrapped = NonlinearConstraint(
+            constraint.compute_scaled,
+            lb,
+            ub,
+            constraint.jac_scaled,
         )
     else:
         constraint_wrapped = None
 
     result = fmin_lag_ls_stel(
-        fun_wrapped,
-        constraint_wrapped,
-        x0=z0,
-        bounds=zbounds,
+        objective.compute_scaled_error,
+        x0=x0,
+        jac=objective.jac_scaled,
+        bounds=(-jnp.inf, jnp.inf),
+        constraint=constraint_wrapped,
         args=(),
         x_scale=x_scale,
         ftol=stoptol["ftol"],
         xtol=stoptol["xtol"],
         gtol=stoptol["gtol"],
-        maxiter=stoptol["maxiter"],
+        ctol=stoptol["ctol"],
         verbose=verbose,
-        callback=None,
+        maxiter=stoptol["maxiter"],
         options=options,
     )
-    x, s = z2xs(result["x"])
-    result["x"] = x
-    result["allx"] = [x]
     return result
 
 

From 10d4429244c4e9bf2808e43ba52978c1693f55d5 Mon Sep 17 00:00:00 2001
From: Rory Conlin <roryconlin@gmail.com>
Date: Fri, 26 May 2023 15:43:51 -0400
Subject: [PATCH 24/41] Use dummy constraint for unconstrained auglag

---
 desc/optimize/aug_lagrangian.py         | 56 ++++++++++---------------
 desc/optimize/aug_lagrangian_ls_stel.py | 55 ++++++++++--------------
 2 files changed, 46 insertions(+), 65 deletions(-)

diff --git a/desc/optimize/aug_lagrangian.py b/desc/optimize/aug_lagrangian.py
index f9e3205761..ece7b8358a 100644
--- a/desc/optimize/aug_lagrangian.py
+++ b/desc/optimize/aug_lagrangian.py
@@ -1,7 +1,7 @@
 """Augmented Langrangian for scalar valued objectives."""
 
 import numpy as np
-from scipy.optimize import OptimizeResult
+from scipy.optimize import NonlinearConstraint, OptimizeResult
 
 from desc.backend import jnp
 from desc.optimize.fmin_scalar import fmintr
@@ -97,39 +97,29 @@ def fmin_lag_stel(  # noqa: C901 - FIXME: simplify this
 
     """
     if constraint is None:
-        # just do regular unconstrained stuff
-        return fmintr(
-            fun,
-            x0,
-            grad,
-            hess,
-            bounds,
-            args,
-            x_scale=x_scale,
-            ftol=ftol,
-            xtol=xtol,
-            gtol=gtol,
-            verbose=verbose,
-            maxiter=maxiter,
-            options=options,
-        )
-    else:
-        (
-            z0,
-            fun_wrapped,
-            grad_wrapped,
-            hess_wrapped,
-            constraint_wrapped,
-            zbounds,
-            z2xs,
-        ) = inequality_to_bounds(
-            x0,
-            fun,
-            grad,
-            hess,
-            constraint,
-            bounds,
+        # create a dummy constraint
+        constraint = NonlinearConstraint(
+            fun=lambda x, *args: jnp.array([0.0]),
+            lb=0.0,
+            ub=0.0,
+            jac=lambda x, *args: jnp.zeros((1, x.size)),
         )
+    (
+        z0,
+        fun_wrapped,
+        grad_wrapped,
+        hess_wrapped,
+        constraint_wrapped,
+        zbounds,
+        z2xs,
+    ) = inequality_to_bounds(
+        x0,
+        fun,
+        grad,
+        hess,
+        constraint,
+        bounds,
+    )
 
     def lagfun(z, lmbda, mu, *args):
         c = constraint_wrapped.fun(z, *args)
diff --git a/desc/optimize/aug_lagrangian_ls_stel.py b/desc/optimize/aug_lagrangian_ls_stel.py
index 7805f7513c..70b4674e24 100644
--- a/desc/optimize/aug_lagrangian_ls_stel.py
+++ b/desc/optimize/aug_lagrangian_ls_stel.py
@@ -1,7 +1,7 @@
 """Augmented Langrangian for vector valued objectives."""
 
 import numpy as np
-from scipy.optimize import OptimizeResult
+from scipy.optimize import NonlinearConstraint, OptimizeResult
 
 from desc.backend import jnp
 from desc.optimize.least_squares import lsqtr
@@ -96,38 +96,29 @@ def fmin_lag_ls_stel(  # noqa: C901 - FIXME: simplify this
 
     """
     if constraint is None:
-        # just do regular unconstrained stuff
-        return lsqtr(
-            fun,
-            x0,
-            jac,
-            bounds,
-            args,
-            x_scale=x_scale,
-            ftol=ftol,
-            xtol=xtol,
-            gtol=gtol,
-            verbose=verbose,
-            maxiter=maxiter,
-            options=options,
-        )
-    else:
-        (
-            z0,
-            fun_wrapped,
-            jac_wrapped,
-            _,
-            constraint_wrapped,
-            zbounds,
-            z2xs,
-        ) = inequality_to_bounds(
-            x0,
-            fun,
-            jac,
-            None,
-            constraint,
-            bounds,
+        # create a dummy constraint
+        constraint = NonlinearConstraint(
+            fun=lambda x, *args: jnp.array([0.0]),
+            lb=0.0,
+            ub=0.0,
+            jac=lambda x, *args: jnp.zeros((1, x.size)),
         )
+    (
+        z0,
+        fun_wrapped,
+        jac_wrapped,
+        _,
+        constraint_wrapped,
+        zbounds,
+        z2xs,
+    ) = inequality_to_bounds(
+        x0,
+        fun,
+        jac,
+        None,
+        constraint,
+        bounds,
+    )
 
     def lagfun(z, lmbda, mu, *args):
         f = fun_wrapped(z, *args)

From a97138737a3f0c5081cc854f9d36fd82006788ce Mon Sep 17 00:00:00 2001
From: Rory Conlin <roryconlin@gmail.com>
Date: Sat, 27 May 2023 21:47:04 -0400
Subject: [PATCH 25/41] Add CL scaling vector to optimizer outputs

---
 desc/optimize/fmin_scalar.py   | 1 +
 desc/optimize/least_squares.py | 1 +
 2 files changed, 2 insertions(+)

diff --git a/desc/optimize/fmin_scalar.py b/desc/optimize/fmin_scalar.py
index a43f5ee338..8d226f4f81 100644
--- a/desc/optimize/fmin_scalar.py
+++ b/desc/optimize/fmin_scalar.py
@@ -405,6 +405,7 @@ def fmintr(  # noqa: C901 - FIXME: simplify this
         success=success,
         fun=f,
         grad=g,
+        v=v,
         hess=H,
         optimality=g_norm,
         nfev=nfev,
diff --git a/desc/optimize/least_squares.py b/desc/optimize/least_squares.py
index 2b9b6754d6..f89caf1dfb 100644
--- a/desc/optimize/least_squares.py
+++ b/desc/optimize/least_squares.py
@@ -389,6 +389,7 @@ def lsqtr(  # noqa: C901 - FIXME: simplify this
         cost=cost,
         fun=f,
         grad=g,
+        v=v,
         jac=J,
         optimality=g_norm,
         nfev=nfev,

From bf11de29758c125640ef1f23af1206853af69d8f Mon Sep 17 00:00:00 2001
From: Rory Conlin <roryconlin@gmail.com>
Date: Sat, 27 May 2023 21:51:30 -0400
Subject: [PATCH 26/41] Add more detailed output to console

---
 desc/optimize/aug_lagrangian.py         | 14 +++++++++++--
 desc/optimize/aug_lagrangian_ls_stel.py | 14 +++++++++++--
 desc/optimize/utils.py                  | 27 +++++++++++++++++--------
 3 files changed, 43 insertions(+), 12 deletions(-)

diff --git a/desc/optimize/aug_lagrangian.py b/desc/optimize/aug_lagrangian.py
index ece7b8358a..3ff3d2eb7d 100644
--- a/desc/optimize/aug_lagrangian.py
+++ b/desc/optimize/aug_lagrangian.py
@@ -188,9 +188,17 @@ def laghess(x, lmbda, mu, *args):
     constr_violation = np.linalg.norm(c, ord=np.inf)
 
     if verbose > 1:
-        print_header_nonlinear(constrained=True)
+        print_header_nonlinear(True, "Penalty param", "max(|mltplr|)")
         print_iteration_nonlinear(
-            iteration, nfev, f, actual_reduction, step_norm, g_norm, constr_violation
+            iteration,
+            nfev,
+            f,
+            actual_reduction,
+            step_norm,
+            g_norm,
+            constr_violation,
+            mu,
+            jnp.max(jnp.abs(lmbda)),
         )
 
     while iteration < maxiter:
@@ -232,6 +240,8 @@ def laghess(x, lmbda, mu, *args):
                 step_norm,
                 g_norm,
                 constr_violation,
+                mu,
+                jnp.max(jnp.abs(lmbda)),
             )
 
         success, message = check_termination(
diff --git a/desc/optimize/aug_lagrangian_ls_stel.py b/desc/optimize/aug_lagrangian_ls_stel.py
index 70b4674e24..5042440c72 100644
--- a/desc/optimize/aug_lagrangian_ls_stel.py
+++ b/desc/optimize/aug_lagrangian_ls_stel.py
@@ -167,9 +167,17 @@ def lagjac(z, lmbda, mu, *args):
     constr_violation = np.linalg.norm(c, ord=np.inf)
 
     if verbose > 1:
-        print_header_nonlinear(constrained=True)
+        print_header_nonlinear(True, "Penalty param", "max(|mltplr|)")
         print_iteration_nonlinear(
-            iteration, nfev, cost, actual_reduction, step_norm, g_norm, constr_violation
+            iteration,
+            nfev,
+            cost,
+            actual_reduction,
+            step_norm,
+            g_norm,
+            constr_violation,
+            mu,
+            jnp.max(jnp.abs(lmbda)),
         )
 
     while iteration < maxiter:
@@ -216,6 +224,8 @@ def lagjac(z, lmbda, mu, *args):
                 step_norm,
                 g_norm,
                 constr_violation,
+                mu,
+                jnp.max(jnp.abs(lmbda)),
             )
 
         # check if we can stop the outer loop
diff --git a/desc/optimize/utils.py b/desc/optimize/utils.py
index 1ec2e8519b..c48cd3ef89 100644
--- a/desc/optimize/utils.py
+++ b/desc/optimize/utils.py
@@ -316,7 +316,7 @@ def evaluate_quadratic_form_jac(J, g, s, diag=None):
     return 0.5 * q + l
 
 
-def print_header_nonlinear(constrained=False):
+def print_header_nonlinear(constrained=False, *args):
     """Print a pretty header."""
     s = "{:^15}{:^15}{:^15}{:^15}{:^15}{:^15}".format(
         "Iteration",
@@ -327,12 +327,21 @@ def print_header_nonlinear(constrained=False):
         "Optimality",
     )
     if constrained:
-        s += "{:^24}".format("Constraint violation")
+        s += "{:^15}".format("Constr viol.")
+    for arg in args:
+        s += "{:^15}".format(arg)
     print(s)
 
 
 def print_iteration_nonlinear(
-    iteration, nfev, cost, cost_reduction, step_norm, optimality, constr_violation=None
+    iteration,
+    nfev,
+    cost,
+    cost_reduction,
+    step_norm,
+    optimality,
+    constr_violation=None,
+    *args,
 ):
     """Print a line of optimizer output."""
     if iteration is None or abs(iteration) == np.inf:
@@ -348,27 +357,29 @@ def print_iteration_nonlinear(
     if cost is None or abs(cost) == np.inf:
         cost = " " * 15
     else:
-        cost = "{:^15.4e}".format(cost)
+        cost = "{: ^15.3e}".format(cost)
 
     if cost_reduction is None or abs(cost_reduction) == np.inf:
         cost_reduction = " " * 15
     else:
-        cost_reduction = "{:^15.2e}".format(cost_reduction)
+        cost_reduction = "{: ^15.3e}".format(cost_reduction)
 
     if step_norm is None or abs(step_norm) == np.inf:
         step_norm = " " * 15
     else:
-        step_norm = "{:^15.2e}".format(step_norm)
+        step_norm = "{:^15.3e}".format(step_norm)
 
     if optimality is None or abs(optimality) == np.inf:
         optimality = " " * 15
     else:
-        optimality = "{:^15.2e}".format(optimality)
+        optimality = "{:^15.3e}".format(optimality)
     s = "{}{}{}{}{}{}".format(
         iteration, nfev, cost, cost_reduction, step_norm, optimality
     )
     if constr_violation is not None:
-        s += "{:^24.2e}".format(constr_violation)
+        s += "{:^15.3e}".format(constr_violation)
+    for arg in args:
+        s += "{:^15.3e}".format(arg)
     print(s)
 
 

From 5854276abd292c145e0f986f5ab40ec0534c5aae Mon Sep 17 00:00:00 2001
From: Rory Conlin <roryconlin@gmail.com>
Date: Sat, 27 May 2023 21:52:41 -0400
Subject: [PATCH 27/41] Use least squares multipliers as initial estimate

---
 desc/optimize/aug_lagrangian.py         | 6 +++++-
 desc/optimize/aug_lagrangian_ls_stel.py | 7 ++++++-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/desc/optimize/aug_lagrangian.py b/desc/optimize/aug_lagrangian.py
index 3ff3d2eb7d..85fcda264e 100644
--- a/desc/optimize/aug_lagrangian.py
+++ b/desc/optimize/aug_lagrangian.py
@@ -168,7 +168,11 @@ def laghess(x, lmbda, mu, *args):
     if maxiter is None:
         maxiter = z.size
     mu = options.pop("initial_penalty_parameter", 10)
-    lmbda = options.pop("initial_multipliers", jnp.zeros(len(c)))
+    lmbda = options.pop("initial_multipliers", None)
+    if lmbda is None:  # use least squares multiplier estimates
+        _J = constraint_wrapped.jac(z, *args)
+        _g = grad_wrapped(z, *args)
+        lmbda = jnp.linalg.lstsq(_J.T, _g)[0]
     maxiter_inner = options.pop("maxiter_inner", 20)
     max_nfev = options.pop("max_nfev", 5 * maxiter * maxiter_inner + 1)
     max_ngev = options.pop("max_ngev", maxiter * maxiter_inner + 1)
diff --git a/desc/optimize/aug_lagrangian_ls_stel.py b/desc/optimize/aug_lagrangian_ls_stel.py
index 5042440c72..7aaad818b1 100644
--- a/desc/optimize/aug_lagrangian_ls_stel.py
+++ b/desc/optimize/aug_lagrangian_ls_stel.py
@@ -148,7 +148,12 @@ def lagjac(z, lmbda, mu, *args):
     if maxiter is None:
         maxiter = z.size
     mu = options.pop("initial_penalty_parameter", 10)
-    lmbda = options.pop("initial_multipliers", 0.01 * jnp.ones(len(c)))
+    lmbda = options.pop("initial_multipliers", None)
+    if lmbda is None:  # use least squares multiplier estimates
+        _J = constraint_wrapped.jac(z, *args)
+        _g = f @ jac_wrapped(z, *args)
+        lmbda = jnp.linalg.lstsq(_J.T, _g)[0]
+
     maxiter_inner = options.pop("maxiter_inner", 20)
     max_nfev = options.pop("max_nfev", 5 * maxiter * maxiter_inner + 1)
     max_njev = options.pop("max_njev", maxiter * maxiter_inner + 1)

From 00078ea537f27f94efbe8b2589f45c7b0bb24bba Mon Sep 17 00:00:00 2001
From: Rory Conlin <roryconlin@gmail.com>
Date: Sat, 27 May 2023 21:55:10 -0400
Subject: [PATCH 28/41] Allow AL parameters to be user specified

---
 desc/optimize/aug_lagrangian.py         | 30 ++++++++++++++++---------
 desc/optimize/aug_lagrangian_ls_stel.py | 28 +++++++++++++++--------
 2 files changed, 38 insertions(+), 20 deletions(-)

diff --git a/desc/optimize/aug_lagrangian.py b/desc/optimize/aug_lagrangian.py
index 85fcda264e..b8cb0a4a24 100644
--- a/desc/optimize/aug_lagrangian.py
+++ b/desc/optimize/aug_lagrangian.py
@@ -178,8 +178,17 @@ def laghess(x, lmbda, mu, *args):
     max_ngev = options.pop("max_ngev", maxiter * maxiter_inner + 1)
     max_nhev = options.pop("max_nhev", maxiter * maxiter_inner + 1)
 
-    gtolk = 1 / (10 * np.linalg.norm(mu))
-    ctolk = 1 / (np.linalg.norm(mu) ** (0.1))
+    # notation following Conn & Gould, algorithm 14.4.2, but with our mu = their mu^-1
+    omega = options.pop("omega", 1.0)
+    eta = options.pop("eta", 1.0)
+    alpha_omega = options.pop("alpha_omega", 1.0)
+    beta_omega = options.pop("beta_omega", 1.0)
+    alpha_eta = options.pop("alpha_eta", 0.1)
+    beta_eta = options.pop("beta_eta", 0.9)
+    tau = options.pop("tau", 10)
+
+    gtolk = omega / mu**alpha_omega
+    ctolk = eta / mu**alpha_eta
     zold = z
     fold = f
     allx = []
@@ -272,17 +281,16 @@ def laghess(x, lmbda, mu, *args):
         if success is not None:
             break
 
-        if constr_violation < ctolk:
+        if not result["success"]:  # did the subproblem actually finish, or maxiter?
+            continue
+        elif constr_violation < ctolk:
             lmbda = lmbda - mu * c
-            ctolk = ctolk / (mu ** (0.9))
-            gtolk = gtolk / (mu)
+            ctolk = ctolk / (mu**beta_eta)
+            gtolk = gtolk / (mu**beta_omega)
         else:
-            mu = 2 * mu
-            ctolk = constr_violation / (mu ** (0.1))
-            gtolk = gtolk / mu
-
-        zold = z
-        fold = f
+            mu = tau * mu
+            ctolk = eta / (mu**alpha_eta)
+            gtolk = omega / (mu**alpha_omega)
 
     x, s = z2xs(z)
     active_mask = find_active_constraints(z, zbounds[0], zbounds[1], rtol=xtol)
diff --git a/desc/optimize/aug_lagrangian_ls_stel.py b/desc/optimize/aug_lagrangian_ls_stel.py
index 7aaad818b1..928ea2e9c5 100644
--- a/desc/optimize/aug_lagrangian_ls_stel.py
+++ b/desc/optimize/aug_lagrangian_ls_stel.py
@@ -158,8 +158,17 @@ def lagjac(z, lmbda, mu, *args):
     max_nfev = options.pop("max_nfev", 5 * maxiter * maxiter_inner + 1)
     max_njev = options.pop("max_njev", maxiter * maxiter_inner + 1)
 
-    gtolk = 1 / (10 * np.linalg.norm(mu))
-    ctolk = 1 / (np.linalg.norm(mu) ** (0.1))
+    # notation following Conn & Gould, algorithm 14.4.2, but with our mu = their mu^-1
+    omega = options.pop("omega", 1.0)
+    eta = options.pop("eta", 1.0)
+    alpha_omega = options.pop("alpha_omega", 1.0)
+    beta_omega = options.pop("beta_omega", 1.0)
+    alpha_eta = options.pop("alpha_eta", 0.1)
+    beta_eta = options.pop("beta_eta", 0.9)
+    tau = options.pop("tau", 10)
+
+    gtolk = omega / mu**alpha_omega
+    ctolk = eta / mu**alpha_eta
     zold = z
     cost_old = cost
     allx = []
@@ -258,15 +267,16 @@ def lagjac(z, lmbda, mu, *args):
         if success is not None:
             break
 
-        # otherwise update lagrangian stuff and continue
-        if constr_violation < ctolk:
+        if not result["success"]:  # did the subproblem actually finish, or maxiter?
+            continue
+        elif constr_violation < ctolk:
             lmbda = lmbda - mu * c
-            ctolk = ctolk / (np.max(mu) ** (0.9))
-            gtolk = gtolk / (np.max(mu))
+            ctolk = ctolk / (mu**beta_eta)
+            gtolk = gtolk / (mu**beta_omega)
         else:
-            mu = 5.0 * mu
-            ctolk = ctolk / (np.max(mu) ** (0.1))
-            gtolk = gtolk / np.max(mu)
+            mu = tau * mu
+            ctolk = eta / (mu**alpha_eta)
+            gtolk = omega / (mu**alpha_omega)
 
         zold = z
         cost_old = cost

From 4b368a1743c0fcd5ed6c6cc6f06803d56ac2abee Mon Sep 17 00:00:00 2001
From: Rory Conlin <roryconlin@gmail.com>
Date: Sat, 27 May 2023 22:04:57 -0400
Subject: [PATCH 29/41] Reuse old trust region, change stoptol for subproblem

---
 desc/optimize/aug_lagrangian.py         | 16 +++++++++++++---
 desc/optimize/aug_lagrangian_ls_stel.py | 21 ++++++++++++---------
 2 files changed, 25 insertions(+), 12 deletions(-)

diff --git a/desc/optimize/aug_lagrangian.py b/desc/optimize/aug_lagrangian.py
index b8cb0a4a24..849e1bb9e8 100644
--- a/desc/optimize/aug_lagrangian.py
+++ b/desc/optimize/aug_lagrangian.py
@@ -200,6 +200,9 @@ def laghess(x, lmbda, mu, *args):
     g_norm = np.linalg.norm(g, ord=np.inf)
     constr_violation = np.linalg.norm(c, ord=np.inf)
 
+    options.setdefault("initial_trust_radius", "scipy")
+    options["return_tr"] = True
+
     if verbose > 1:
         print_header_nonlinear(True, "Penalty param", "max(|mltplr|)")
         print_iteration_nonlinear(
@@ -223,16 +226,19 @@ def laghess(x, lmbda, mu, *args):
             bounds=zbounds,
             args=(lmbda, mu) + args,
             x_scale=x_scale,
-            ftol=ftol,
-            xtol=xtol,
+            ftol=0,
+            xtol=0,
             gtol=gtolk,
             maxiter=maxiter_inner,
             verbose=0,
+            options=options.copy(),
         )
         allx += result["allx"]
         nfev += result["nfev"]
         ngev += result["ngev"]
         nhev += result["nhev"]
+        zold = z
+        fold = f
         z = result["x"]
         f = fun_wrapped(z, *args)
         c = constraint_wrapped.fun(z, *args)
@@ -242,6 +248,10 @@ def laghess(x, lmbda, mu, *args):
         z_norm = np.linalg.norm(z)
         g_norm = result["optimality"]
         actual_reduction = fold - f
+        # don't stop if we increased cost
+        reduction_ratio = jnp.sign(actual_reduction)
+        # reuse the previous trust radius on the next pass
+        options["initial_trust_radius"] = float(result["alltr"][-1])
         iteration = iteration + 1
 
         if verbose > 1:
@@ -263,7 +273,7 @@ def laghess(x, lmbda, mu, *args):
             step_norm,
             z_norm,
             g_norm,
-            1,
+            reduction_ratio,
             ftol,
             xtol,
             gtol,
diff --git a/desc/optimize/aug_lagrangian_ls_stel.py b/desc/optimize/aug_lagrangian_ls_stel.py
index 928ea2e9c5..b7cb5aa3c2 100644
--- a/desc/optimize/aug_lagrangian_ls_stel.py
+++ b/desc/optimize/aug_lagrangian_ls_stel.py
@@ -180,6 +180,9 @@ def lagjac(z, lmbda, mu, *args):
     g_norm = np.linalg.norm(g, ord=np.inf)
     constr_violation = np.linalg.norm(c, ord=np.inf)
 
+    options.setdefault("initial_trust_radius", "scipy")
+    options["return_tr"] = True
+
     if verbose > 1:
         print_header_nonlinear(True, "Penalty param", "max(|mltplr|)")
         print_iteration_nonlinear(
@@ -200,17 +203,14 @@ def lagjac(z, lmbda, mu, *args):
             z,
             lagjac,
             bounds=zbounds,
-            args=(
-                lmbda,
-                mu,
-            )
-            + args,
+            args=(lmbda, mu) + args,
             x_scale=x_scale,
-            ftol=ftol,
-            xtol=xtol,
+            ftol=0,
+            xtol=0,
             gtol=gtolk,
             maxiter=maxiter_inner,
             verbose=0,
+            options=options.copy(),
         )
         # update outer counters
         allx += result["allx"]
@@ -221,12 +221,15 @@ def lagjac(z, lmbda, mu, *args):
         cost = 1 / 2 * jnp.dot(f, f)
         c = constraint_wrapped.fun(z)
         nfev += 1
-        njev += 1
         constr_violation = np.linalg.norm(c, ord=np.inf)
         step_norm = np.linalg.norm(zold - z)
         z_norm = np.linalg.norm(z)
         g_norm = result["optimality"]
         actual_reduction = cost_old - cost
+        # don't stop if we increased cost
+        reduction_ratio = jnp.sign(actual_reduction)
+        # reuse the previous trust radius on the next pass
+        options["initial_trust_radius"] = float(result["alltr"][-1])
         iteration = iteration + 1
 
         if verbose > 1:
@@ -249,7 +252,7 @@ def lagjac(z, lmbda, mu, *args):
             step_norm,
             z_norm,
             g_norm,
-            1,
+            reduction_ratio,
             ftol,
             xtol,
             gtol,

From 10707d4ec59dac10ec91765a41a8909e4a4c27ae Mon Sep 17 00:00:00 2001
From: Rory Conlin <roryconlin@gmail.com>
Date: Sat, 27 May 2023 22:06:11 -0400
Subject: [PATCH 30/41] Cleanup unneeded code

---
 desc/optimize/aug_lagrangian.py         |  8 +++-----
 desc/optimize/aug_lagrangian_ls_stel.py | 16 ++++++++++------
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/desc/optimize/aug_lagrangian.py b/desc/optimize/aug_lagrangian.py
index 849e1bb9e8..4b49ade459 100644
--- a/desc/optimize/aug_lagrangian.py
+++ b/desc/optimize/aug_lagrangian.py
@@ -139,7 +139,6 @@ def laghess(x, lmbda, mu, *args):
                 Jc = constraint_wrapped.jac(x, *args)
                 Hc1 = constraint_wrapped.hess(x, lmbda)
                 Hc2 = constraint_wrapped.hess(x, c)
-                # ignoring higher order derivatives of constraints for now
                 return Hf - Hc1 + mu * (Hc2 + jnp.dot(Jc.T, Jc))
 
         else:
@@ -160,10 +159,8 @@ def laghess(x, lmbda, mu, *args):
 
     z = z0.copy()
     f = fun_wrapped(z, *args)
-    g = grad_wrapped(z, *args)
     c = constraint_wrapped.fun(z)
     nfev += 1
-    ngev += 1
 
     if maxiter is None:
         maxiter = z.size
@@ -197,7 +194,7 @@ def laghess(x, lmbda, mu, *args):
     message = None
     step_norm = jnp.inf
     actual_reduction = jnp.inf
-    g_norm = np.linalg.norm(g, ord=np.inf)
+    g_norm = np.linalg.norm(laggrad(z, lmbda, mu, *args), ord=np.inf)
     constr_violation = np.linalg.norm(c, ord=np.inf)
 
     options.setdefault("initial_trust_radius", "scipy")
@@ -309,7 +306,8 @@ def laghess(x, lmbda, mu, *args):
         y=lmbda,
         success=success,
         fun=f,
-        grad=g,
+        grad=result["grad"],
+        v=result["v"],
         optimality=g_norm,
         nfev=nfev,
         ngev=ngev,
diff --git a/desc/optimize/aug_lagrangian_ls_stel.py b/desc/optimize/aug_lagrangian_ls_stel.py
index b7cb5aa3c2..46c94d05bf 100644
--- a/desc/optimize/aug_lagrangian_ls_stel.py
+++ b/desc/optimize/aug_lagrangian_ls_stel.py
@@ -132,18 +132,20 @@ def lagjac(z, lmbda, mu, *args):
         Jc = 1 / (np.sqrt(2 * mu)) * (mu * Jc)
         return jnp.vstack((Jf, Jc))
 
+    def laggrad(z, lmbda, mu, *args):
+        f = lagfun(z, lmbda, mu, *args)
+        J = lagjac(z, lmbda, mu, *args)
+        return f @ J
+
     nfev = 0
     njev = 0
     iteration = 0
 
     z = z0.copy()
     f = fun_wrapped(z, *args)
-    J = jac_wrapped(z, *args)
     cost = 1 / 2 * jnp.dot(f, f)
-    g = jnp.dot(f, J)
     c = constraint_wrapped.fun(z)
     nfev += 1
-    njev += 1
 
     if maxiter is None:
         maxiter = z.size
@@ -177,7 +179,8 @@ def lagjac(z, lmbda, mu, *args):
     message = None
     step_norm = jnp.inf
     actual_reduction = jnp.inf
-    g_norm = np.linalg.norm(g, ord=np.inf)
+
+    g_norm = np.linalg.norm(laggrad(z, lmbda, mu, *args), ord=np.inf)
     constr_violation = np.linalg.norm(c, ord=np.inf)
 
     options.setdefault("initial_trust_radius", "scipy")
@@ -292,8 +295,9 @@ def lagjac(z, lmbda, mu, *args):
         success=success,
         cost=cost,
         fun=f,
-        grad=g,
-        jac=J,
+        grad=result["grad"],
+        v=result["v"],
+        jac=result["jac"],
         optimality=g_norm,
         nfev=nfev,
         njev=njev,

From fc44f15ec89c024b794b7641291133f4916ffff0 Mon Sep 17 00:00:00 2001
From: Rory Conlin <roryconlin@gmail.com>
Date: Sat, 27 May 2023 22:06:30 -0400
Subject: [PATCH 31/41] Fix definition of least squares augmented lagrangian

---
 desc/optimize/aug_lagrangian_ls_stel.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/desc/optimize/aug_lagrangian_ls_stel.py b/desc/optimize/aug_lagrangian_ls_stel.py
index 46c94d05bf..d51756b496 100644
--- a/desc/optimize/aug_lagrangian_ls_stel.py
+++ b/desc/optimize/aug_lagrangian_ls_stel.py
@@ -120,16 +120,19 @@ def fmin_lag_ls_stel(  # noqa: C901 - FIXME: simplify this
         bounds,
     )
 
+    # L(x,y,mu) = 1/2 f(x)^2 - y*c(x) + mu/2 c(x)^2 + y^2/(2*mu)
+    # = 1/2 f(x)^2 + 1/2 [-y/sqrt(mu) + sqrt(mu) c(x)]^2
+
     def lagfun(z, lmbda, mu, *args):
         f = fun_wrapped(z, *args)
         c = constraint_wrapped.fun(z, *args)
-        c = 1 / (np.sqrt(2 * mu)) * (-lmbda + mu * c)
+        c = -lmbda / jnp.sqrt(mu) + jnp.sqrt(mu) * c
         return jnp.concatenate((f, c))
 
     def lagjac(z, lmbda, mu, *args):
         Jf = jac_wrapped(z, *args)
         Jc = constraint_wrapped.jac(z, *args)
-        Jc = 1 / (np.sqrt(2 * mu)) * (mu * Jc)
+        Jc = jnp.sqrt(mu) * Jc
         return jnp.vstack((Jf, Jc))
 
     def laggrad(z, lmbda, mu, *args):

From 6bcdbfc45117744997bca774b3a68de4292b1679 Mon Sep 17 00:00:00 2001
From: Rory Conlin <roryconlin@gmail.com>
Date: Sat, 27 May 2023 22:06:48 -0400
Subject: [PATCH 32/41] Add tests for augmented langrangian stuff

---
 tests/test_optimizer.py | 97 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 94 insertions(+), 3 deletions(-)

diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py
index c25d33e0c5..69c5bb8c6d 100644
--- a/tests/test_optimizer.py
+++ b/tests/test_optimizer.py
@@ -27,6 +27,8 @@
     FixPressure,
     FixPsi,
     ForceBalance,
+    GenericObjective,
+    MagneticWell,
     MeanCurvature,
     ObjectiveFunction,
     Volume,
@@ -788,7 +790,7 @@ def con(x):
         bounds=(-jnp.inf, jnp.inf),
         constraint=constraint,
         args=(),
-        x_scale=1,
+        x_scale="auto",
         ftol=0,
         xtol=1e-6,
         gtol=1e-6,
@@ -797,7 +799,7 @@ def con(x):
         maxiter=None,
         options={},
     )
-
+    print(out1["active_mask"])
     out2 = fmin_lag_ls_stel(
         vecfun,
         x0,
@@ -805,7 +807,7 @@ def con(x):
         bounds=(-jnp.inf, jnp.inf),
         constraint=constraint,
         args=(),
-        x_scale=1,
+        x_scale="auto",
         ftol=0,
         xtol=1e-6,
         gtol=1e-6,
@@ -827,3 +829,92 @@ def con(x):
 
     np.testing.assert_allclose(out1["x"], out3["x"], rtol=1e-4, atol=1e-4)
     np.testing.assert_allclose(out2["x"], out3["x"], rtol=1e-4, atol=1e-4)
+
+
+@pytest.mark.slow
+@pytest.mark.regression
+def test_constrained_AL_lsq():
+    """Tests that the least squares augmented Lagrangian optimizer does something."""
+    eq = desc.examples.get("SOLOVEV")
+
+    constraints = (
+        FixBoundaryR(modes=[0, 0, 0]),  # fix specified major axis position
+        FixBoundaryZ(),  # fix Z shape but not R
+        FixPressure(),  # fix pressure profile
+        FixIota(),  # fix rotational transform profile
+        FixPsi(),  # fix total toroidal magnetic flux
+    )
+    # some random constraints to keep the shape from getting wacky
+    V = eq.compute("V")["V"]
+    Vbounds = (0.95 * V, 1.05 * V)
+    AR = eq.compute("R0/a")["R0/a"]
+    ARbounds = (0.95 * AR, 1.05 * AR)
+    constraints += (
+        Volume(bounds=Vbounds),
+        AspectRatio(bounds=ARbounds),
+        MagneticWell(bounds=(0, jnp.inf)),
+        ForceBalance(bounds=(-1e-3, 1e-3), normalize_target=False),
+    )
+    H = eq.compute(
+        "curvature_H", grid=LinearGrid(M=eq.M_grid, N=eq.N_grid, NFP=eq.NFP)
+    )["curvature_H"]
+    obj = ObjectiveFunction(MeanCurvature(target=H))
+    eq2, result = eq.optimize(
+        objective=obj,
+        constraints=constraints,
+        optimizer="auglag",
+        maxiter=5000,
+        verbose=3,
+        x_scale="auto",
+        copy=True,
+        options={},
+    )
+    V2 = eq2.compute("V")["V"]
+    AR2 = eq2.compute("R0/a")["R0/a"]
+    Dwell = constraints[-2].compute(*constraints[-2].xs(eq2))
+    assert ARbounds[0] < AR2 < ARbounds[1]
+    assert Vbounds[0] < V2 < Vbounds[1]
+    assert eq2.is_nested()
+    np.testing.assert_array_less(-Dwell, 0)
+
+
+@pytest.mark.slow
+@pytest.mark.regression
+def test_constrained_AL_scalar():
+    """Tests that the augmented Lagrangian constrained optimizer does something."""
+    eq = desc.examples.get("SOLOVEV")
+
+    constraints = (
+        FixBoundaryR(modes=[0, 0, 0]),  # fix specified major axis position
+        FixBoundaryZ(),  # fix Z shape but not R
+        FixPressure(),  # fix pressure profile
+        FixIota(),  # fix rotational transform profile
+        FixPsi(),  # fix total toroidal magnetic flux
+    )
+    V = eq.compute("V")["V"]
+    AR = eq.compute("R0/a")["R0/a"]
+    constraints += (
+        Volume(target=V),
+        AspectRatio(target=AR),
+        MagneticWell(bounds=(0, jnp.inf)),
+        ForceBalance(bounds=(-1e-3, 1e-3), normalize_target=False),
+    )
+    # Dummy objective to return 0, we just want a feasible solution.
+    obj = ObjectiveFunction(GenericObjective("0"))
+    eq2, result = eq.optimize(
+        objective=obj,
+        constraints=constraints,
+        optimizer="auglag",
+        maxiter=5000,
+        verbose=3,
+        x_scale="auto",
+        copy=True,
+        options={},
+    )
+    V2 = eq2.compute("V")["V"]
+    AR2 = eq2.compute("R0/a")["R0/a"]
+    Dwell = constraints[-2].compute(*constraints[-2].xs(eq2))
+    np.testing.assert_allclose(AR, AR2)
+    np.testing.assert_allclose(V, V2)
+    assert eq2.is_nested()
+    np.testing.assert_array_less(-Dwell, 1e-4)

From 257b4d8926d05c8381f0341e3d05d02335842329 Mon Sep 17 00:00:00 2001
From: Rory Conlin <roryconlin@gmail.com>
Date: Sat, 27 May 2023 22:12:00 -0400
Subject: [PATCH 33/41] Restructure how interations in AL are counted

---
 desc/optimize/aug_lagrangian.py         | 13 +++++++------
 desc/optimize/aug_lagrangian_ls_stel.py | 10 +++++-----
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/desc/optimize/aug_lagrangian.py b/desc/optimize/aug_lagrangian.py
index 4b49ade459..22a5cc552c 100644
--- a/desc/optimize/aug_lagrangian.py
+++ b/desc/optimize/aug_lagrangian.py
@@ -162,18 +162,19 @@ def laghess(x, lmbda, mu, *args):
     c = constraint_wrapped.fun(z)
     nfev += 1
 
-    if maxiter is None:
-        maxiter = z.size
     mu = options.pop("initial_penalty_parameter", 10)
     lmbda = options.pop("initial_multipliers", None)
     if lmbda is None:  # use least squares multiplier estimates
         _J = constraint_wrapped.jac(z, *args)
         _g = grad_wrapped(z, *args)
         lmbda = jnp.linalg.lstsq(_J.T, _g)[0]
+
+    if maxiter is None:
+        maxiter = z.size * 100
     maxiter_inner = options.pop("maxiter_inner", 20)
-    max_nfev = options.pop("max_nfev", 5 * maxiter * maxiter_inner + 1)
-    max_ngev = options.pop("max_ngev", maxiter * maxiter_inner + 1)
-    max_nhev = options.pop("max_nhev", maxiter * maxiter_inner + 1)
+    max_nfev = options.pop("max_nfev", 5 * maxiter + 1)
+    max_ngev = options.pop("max_ngev", maxiter + 1)
+    max_nhev = options.pop("max_nhev", maxiter + 1)
 
     # notation following Conn & Gould, algorithm 14.4.2, but with our mu = their mu^-1
     omega = options.pop("omega", 1.0)
@@ -234,6 +235,7 @@ def laghess(x, lmbda, mu, *args):
         nfev += result["nfev"]
         ngev += result["ngev"]
         nhev += result["nhev"]
+        iteration += result["nit"]
         zold = z
         fold = f
         z = result["x"]
@@ -249,7 +251,6 @@ def laghess(x, lmbda, mu, *args):
         reduction_ratio = jnp.sign(actual_reduction)
         # reuse the previous trust radius on the next pass
         options["initial_trust_radius"] = float(result["alltr"][-1])
-        iteration = iteration + 1
 
         if verbose > 1:
             print_iteration_nonlinear(
diff --git a/desc/optimize/aug_lagrangian_ls_stel.py b/desc/optimize/aug_lagrangian_ls_stel.py
index d51756b496..5e6cdcd521 100644
--- a/desc/optimize/aug_lagrangian_ls_stel.py
+++ b/desc/optimize/aug_lagrangian_ls_stel.py
@@ -150,8 +150,6 @@ def laggrad(z, lmbda, mu, *args):
     c = constraint_wrapped.fun(z)
     nfev += 1
 
-    if maxiter is None:
-        maxiter = z.size
     mu = options.pop("initial_penalty_parameter", 10)
     lmbda = options.pop("initial_multipliers", None)
     if lmbda is None:  # use least squares multiplier estimates
@@ -159,9 +157,11 @@ def laggrad(z, lmbda, mu, *args):
         _g = f @ jac_wrapped(z, *args)
         lmbda = jnp.linalg.lstsq(_J.T, _g)[0]
 
+    if maxiter is None:
+        maxiter = z.size * 100
     maxiter_inner = options.pop("maxiter_inner", 20)
-    max_nfev = options.pop("max_nfev", 5 * maxiter * maxiter_inner + 1)
-    max_njev = options.pop("max_njev", maxiter * maxiter_inner + 1)
+    max_nfev = options.pop("max_nfev", 5 * maxiter + 1)
+    max_njev = options.pop("max_njev", maxiter + 1)
 
     # notation following Conn & Gould, algorithm 14.4.2, but with our mu = their mu^-1
     omega = options.pop("omega", 1.0)
@@ -222,6 +222,7 @@ def laggrad(z, lmbda, mu, *args):
         allx += result["allx"]
         nfev += result["nfev"]
         njev += result["njev"]
+        iteration += result["nit"]
         z = result["x"]
         f = fun_wrapped(z, *args)
         cost = 1 / 2 * jnp.dot(f, f)
@@ -236,7 +237,6 @@ def laggrad(z, lmbda, mu, *args):
         reduction_ratio = jnp.sign(actual_reduction)
         # reuse the previous trust radius on the next pass
         options["initial_trust_radius"] = float(result["alltr"][-1])
-        iteration = iteration + 1
 
         if verbose > 1:
             print_iteration_nonlinear(

From 0dffaa997cc5e69552ab6a48a02d0597c87e17f5 Mon Sep 17 00:00:00 2001
From: Rory Conlin <roryconlin@gmail.com>
Date: Sat, 27 May 2023 22:19:38 -0400
Subject: [PATCH 34/41] Cleanup and rename

---
 desc/optimize/__init__.py                     |  4 +-
 desc/optimize/_desc_wrappers.py               |  8 +--
 desc/optimize/aug_lagrangian.py               | 51 ++++++++++---------
 ...angian_ls_stel.py => aug_lagrangian_ls.py} | 33 ++++++------
 4 files changed, 49 insertions(+), 47 deletions(-)
 rename desc/optimize/{aug_lagrangian_ls_stel.py => aug_lagrangian_ls.py} (93%)

diff --git a/desc/optimize/__init__.py b/desc/optimize/__init__.py
index 1e4eed0b40..7093f03c4a 100644
--- a/desc/optimize/__init__.py
+++ b/desc/optimize/__init__.py
@@ -2,8 +2,8 @@
 
 from . import _desc_wrappers, _scipy_wrappers
 from ._constraint_wrappers import LinearConstraintProjection, ProximalProjection
-from .aug_lagrangian import fmin_lag_stel
-from .aug_lagrangian_ls_stel import fmin_lag_ls_stel
+from .aug_lagrangian import fmin_auglag
+from .aug_lagrangian_ls import lsq_auglag
 from .fmin_scalar import fmintr
 from .least_squares import lsqtr
 from .optimizer import Optimizer, optimizers, register_optimizer
diff --git a/desc/optimize/_desc_wrappers.py b/desc/optimize/_desc_wrappers.py
index cd661d8d72..ead1a502f5 100644
--- a/desc/optimize/_desc_wrappers.py
+++ b/desc/optimize/_desc_wrappers.py
@@ -2,8 +2,8 @@
 
 from desc.backend import jnp
 
-from .aug_lagrangian import fmin_lag_stel
-from .aug_lagrangian_ls_stel import fmin_lag_ls_stel
+from .aug_lagrangian import fmin_auglag
+from .aug_lagrangian_ls import lsq_auglag
 from .fmin_scalar import fmintr
 from .least_squares import lsqtr
 from .optimizer import register_optimizer
@@ -88,7 +88,7 @@ def _optimize_desc_aug_lagrangian(
     else:
         constraint_wrapped = None
 
-    result = fmin_lag_stel(
+    result = fmin_auglag(
         objective.compute_scalar,
         x0=x0,
         grad=objective.grad,
@@ -181,7 +181,7 @@ def _optimize_desc_aug_lagrangian_least_squares(
     else:
         constraint_wrapped = None
 
-    result = fmin_lag_ls_stel(
+    result = lsq_auglag(
         objective.compute_scaled_error,
         x0=x0,
         jac=objective.jac_scaled,
diff --git a/desc/optimize/aug_lagrangian.py b/desc/optimize/aug_lagrangian.py
index 22a5cc552c..57995c2662 100644
--- a/desc/optimize/aug_lagrangian.py
+++ b/desc/optimize/aug_lagrangian.py
@@ -15,7 +15,7 @@
 )
 
 
-def fmin_lag_stel(  # noqa: C901 - FIXME: simplify this
+def fmin_auglag(  # noqa: C901 - FIXME: simplify this
     fun,
     x0,
     grad,
@@ -121,31 +121,31 @@ def fmin_lag_stel(  # noqa: C901 - FIXME: simplify this
         bounds,
     )
 
-    def lagfun(z, lmbda, mu, *args):
+    def lagfun(z, y, mu, *args):
         c = constraint_wrapped.fun(z, *args)
-        return fun_wrapped(z, *args) - jnp.dot(lmbda, c) + mu / 2 * jnp.dot(c, c)
+        return fun_wrapped(z, *args) - jnp.dot(y, c) + mu / 2 * jnp.dot(c, c)
 
-    def laggrad(x, lmbda, mu, *args):
-        c = constraint_wrapped.fun(x, *args)
-        J = constraint_wrapped.jac(x, *args)
-        return grad_wrapped(x, *args) - jnp.dot(lmbda, J) + mu * jnp.dot(c, J)
+    def laggrad(z, y, mu, *args):
+        c = constraint_wrapped.fun(z, *args)
+        J = constraint_wrapped.jac(z, *args)
+        return grad_wrapped(z, *args) - jnp.dot(y, J) + mu * jnp.dot(c, J)
 
     if callable(hess_wrapped):
         if callable(constraint_wrapped.hess):
 
-            def laghess(x, lmbda, mu, *args):
-                c = constraint_wrapped.fun(x, *args)
-                Hf = hess_wrapped(x, *args)
-                Jc = constraint_wrapped.jac(x, *args)
-                Hc1 = constraint_wrapped.hess(x, lmbda)
-                Hc2 = constraint_wrapped.hess(x, c)
+            def laghess(z, y, mu, *args):
+                c = constraint_wrapped.fun(z, *args)
+                Hf = hess_wrapped(z, *args)
+                Jc = constraint_wrapped.jac(z, *args)
+                Hc1 = constraint_wrapped.hess(z, y)
+                Hc2 = constraint_wrapped.hess(z, c)
                 return Hf - Hc1 + mu * (Hc2 + jnp.dot(Jc.T, Jc))
 
         else:
 
-            def laghess(x, lmbda, mu, *args):
-                H = hess_wrapped(x, *args)
-                J = constraint_wrapped.jac(x, *args)
+            def laghess(z, y, mu, *args):
+                H = hess_wrapped(z, *args)
+                J = constraint_wrapped.jac(z, *args)
                 # ignoring higher order derivatives of constraints for now
                 return H + mu * jnp.dot(J.T, J)
 
@@ -163,11 +163,11 @@ def laghess(x, lmbda, mu, *args):
     nfev += 1
 
     mu = options.pop("initial_penalty_parameter", 10)
-    lmbda = options.pop("initial_multipliers", None)
-    if lmbda is None:  # use least squares multiplier estimates
+    y = options.pop("initial_multipliers", None)
+    if y is None:  # use least squares multiplier estimates
         _J = constraint_wrapped.jac(z, *args)
         _g = grad_wrapped(z, *args)
-        lmbda = jnp.linalg.lstsq(_J.T, _g)[0]
+        y = jnp.linalg.lstsq(_J.T, _g)[0]
 
     if maxiter is None:
         maxiter = z.size * 100
@@ -195,7 +195,7 @@ def laghess(x, lmbda, mu, *args):
     message = None
     step_norm = jnp.inf
     actual_reduction = jnp.inf
-    g_norm = np.linalg.norm(laggrad(z, lmbda, mu, *args), ord=np.inf)
+    g_norm = np.linalg.norm(laggrad(z, y, mu, *args), ord=np.inf)
     constr_violation = np.linalg.norm(c, ord=np.inf)
 
     options.setdefault("initial_trust_radius", "scipy")
@@ -212,7 +212,7 @@ def laghess(x, lmbda, mu, *args):
             g_norm,
             constr_violation,
             mu,
-            jnp.max(jnp.abs(lmbda)),
+            jnp.max(jnp.abs(y)),
         )
 
     while iteration < maxiter:
@@ -222,7 +222,7 @@ def laghess(x, lmbda, mu, *args):
             grad=laggrad,
             hess=laghess,
             bounds=zbounds,
-            args=(lmbda, mu) + args,
+            args=(y, mu) + args,
             x_scale=x_scale,
             ftol=0,
             xtol=0,
@@ -262,7 +262,7 @@ def laghess(x, lmbda, mu, *args):
                 g_norm,
                 constr_violation,
                 mu,
-                jnp.max(jnp.abs(lmbda)),
+                jnp.max(jnp.abs(y)),
             )
 
         success, message = check_termination(
@@ -292,7 +292,7 @@ def laghess(x, lmbda, mu, *args):
         if not result["success"]:  # did the subproblem actually finish, or maxiter?
             continue
         elif constr_violation < ctolk:
-            lmbda = lmbda - mu * c
+            y = y - mu * c
             ctolk = ctolk / (mu**beta_eta)
             gtolk = gtolk / (mu**beta_omega)
         else:
@@ -304,7 +304,8 @@ def laghess(x, lmbda, mu, *args):
     active_mask = find_active_constraints(z, zbounds[0], zbounds[1], rtol=xtol)
     result = OptimizeResult(
         x=x,
-        y=lmbda,
+        s=s,
+        y=y,
         success=success,
         fun=f,
         grad=result["grad"],
diff --git a/desc/optimize/aug_lagrangian_ls_stel.py b/desc/optimize/aug_lagrangian_ls.py
similarity index 93%
rename from desc/optimize/aug_lagrangian_ls_stel.py
rename to desc/optimize/aug_lagrangian_ls.py
index 5e6cdcd521..2bb16ea14f 100644
--- a/desc/optimize/aug_lagrangian_ls_stel.py
+++ b/desc/optimize/aug_lagrangian_ls.py
@@ -15,7 +15,7 @@
 )
 
 
-def fmin_lag_ls_stel(  # noqa: C901 - FIXME: simplify this
+def lsq_auglag(  # noqa: C901 - FIXME: simplify this
     fun,
     x0,
     jac,
@@ -123,21 +123,21 @@ def fmin_lag_ls_stel(  # noqa: C901 - FIXME: simplify this
     # L(x,y,mu) = 1/2 f(x)^2 - y*c(x) + mu/2 c(x)^2 + y^2/(2*mu)
     # = 1/2 f(x)^2 + 1/2 [-y/sqrt(mu) + sqrt(mu) c(x)]^2
 
-    def lagfun(z, lmbda, mu, *args):
+    def lagfun(z, y, mu, *args):
         f = fun_wrapped(z, *args)
         c = constraint_wrapped.fun(z, *args)
-        c = -lmbda / jnp.sqrt(mu) + jnp.sqrt(mu) * c
+        c = -y / jnp.sqrt(mu) + jnp.sqrt(mu) * c
         return jnp.concatenate((f, c))
 
-    def lagjac(z, lmbda, mu, *args):
+    def lagjac(z, y, mu, *args):
         Jf = jac_wrapped(z, *args)
         Jc = constraint_wrapped.jac(z, *args)
         Jc = jnp.sqrt(mu) * Jc
         return jnp.vstack((Jf, Jc))
 
-    def laggrad(z, lmbda, mu, *args):
-        f = lagfun(z, lmbda, mu, *args)
-        J = lagjac(z, lmbda, mu, *args)
+    def laggrad(z, y, mu, *args):
+        f = lagfun(z, y, mu, *args)
+        J = lagjac(z, y, mu, *args)
         return f @ J
 
     nfev = 0
@@ -151,11 +151,11 @@ def laggrad(z, lmbda, mu, *args):
     nfev += 1
 
     mu = options.pop("initial_penalty_parameter", 10)
-    lmbda = options.pop("initial_multipliers", None)
-    if lmbda is None:  # use least squares multiplier estimates
+    y = options.pop("initial_multipliers", None)
+    if y is None:  # use least squares multiplier estimates
         _J = constraint_wrapped.jac(z, *args)
         _g = f @ jac_wrapped(z, *args)
-        lmbda = jnp.linalg.lstsq(_J.T, _g)[0]
+        y = jnp.linalg.lstsq(_J.T, _g)[0]
 
     if maxiter is None:
         maxiter = z.size * 100
@@ -183,7 +183,7 @@ def laggrad(z, lmbda, mu, *args):
     step_norm = jnp.inf
     actual_reduction = jnp.inf
 
-    g_norm = np.linalg.norm(laggrad(z, lmbda, mu, *args), ord=np.inf)
+    g_norm = np.linalg.norm(laggrad(z, y, mu, *args), ord=np.inf)
     constr_violation = np.linalg.norm(c, ord=np.inf)
 
     options.setdefault("initial_trust_radius", "scipy")
@@ -200,7 +200,7 @@ def laggrad(z, lmbda, mu, *args):
             g_norm,
             constr_violation,
             mu,
-            jnp.max(jnp.abs(lmbda)),
+            jnp.max(jnp.abs(y)),
         )
 
     while iteration < maxiter:
@@ -209,7 +209,7 @@ def laggrad(z, lmbda, mu, *args):
             z,
             lagjac,
             bounds=zbounds,
-            args=(lmbda, mu) + args,
+            args=(y, mu) + args,
             x_scale=x_scale,
             ftol=0,
             xtol=0,
@@ -248,7 +248,7 @@ def laggrad(z, lmbda, mu, *args):
                 g_norm,
                 constr_violation,
                 mu,
-                jnp.max(jnp.abs(lmbda)),
+                jnp.max(jnp.abs(y)),
             )
 
         # check if we can stop the outer loop
@@ -279,7 +279,7 @@ def laggrad(z, lmbda, mu, *args):
         if not result["success"]:  # did the subproblem actually finish, or maxiter?
             continue
         elif constr_violation < ctolk:
-            lmbda = lmbda - mu * c
+            y = y - mu * c
             ctolk = ctolk / (mu**beta_eta)
             gtolk = gtolk / (mu**beta_omega)
         else:
@@ -294,7 +294,8 @@ def laggrad(z, lmbda, mu, *args):
     active_mask = find_active_constraints(z, zbounds[0], zbounds[1], rtol=xtol)
     result = OptimizeResult(
         x=x,
-        y=lmbda,
+        s=s,
+        y=y,
         success=success,
         cost=cost,
         fun=f,

From c3e8bb0b824c6712246ef5729b90c9412a9e193d Mon Sep 17 00:00:00 2001
From: Rory Conlin <roryconlin@gmail.com>
Date: Sat, 27 May 2023 22:23:16 -0400
Subject: [PATCH 35/41] Remove BFGS-auglag option until we can get it working

---
 desc/optimize/_desc_wrappers.py | 13 ++++-------
 desc/optimize/aug_lagrangian.py | 41 ++++++++++++++++-----------------
 2 files changed, 24 insertions(+), 30 deletions(-)

diff --git a/desc/optimize/_desc_wrappers.py b/desc/optimize/_desc_wrappers.py
index ead1a502f5..c70c348dc1 100644
--- a/desc/optimize/_desc_wrappers.py
+++ b/desc/optimize/_desc_wrappers.py
@@ -11,17 +11,13 @@
 
 
 @register_optimizer(
-    name=["auglag", "auglag-bfgs"],
-    description=[
-        "Augmented Lagrangian method with trust region subproblem."
-        "Augmented Lagrangian method with trust region subproblem."
-        + "Uses BFGS to approximate hessian",
-    ],
+    name="auglag",
+    description="Augmented Lagrangian method with trust region subproblem.",
     scalar=True,
     equality_constraints=True,
     inequality_constraints=True,
     stochastic=False,
-    hessian=[True, False],
+    hessian=True,
     GPU=True,
 )
 def _optimize_desc_aug_lagrangian(
@@ -69,7 +65,6 @@ def _optimize_desc_aug_lagrangian(
 
     """
     options = {} if options is None else options
-    hess = objective.hess if "bfgs" not in method else "bfgs"
     if not isinstance(x_scale, str) and jnp.allclose(x_scale, 1):
         options.setdefault("initial_trust_ratio", 1e-3)
         options.setdefault("max_trust_radius", 1.0)
@@ -92,7 +87,7 @@ def _optimize_desc_aug_lagrangian(
         objective.compute_scalar,
         x0=x0,
         grad=objective.grad,
-        hess=hess,
+        hess=objective.hess,
         bounds=(-jnp.inf, jnp.inf),
         constraint=constraint_wrapped,
         args=(),
diff --git a/desc/optimize/aug_lagrangian.py b/desc/optimize/aug_lagrangian.py
index 57995c2662..808562bc81 100644
--- a/desc/optimize/aug_lagrangian.py
+++ b/desc/optimize/aug_lagrangian.py
@@ -19,7 +19,7 @@ def fmin_auglag(  # noqa: C901 - FIXME: simplify this
     fun,
     x0,
     grad,
-    hess="bfgs",
+    hess,
     bounds=(-jnp.inf, jnp.inf),
     constraint=None,
     args=(),
@@ -42,9 +42,8 @@ def fmin_auglag(  # noqa: C901 - FIXME: simplify this
         initial guess
     grad : callable
         function to compute gradient, df/dx. Should take the same arguments as fun
-    hess : callable or ``'bfgs'``, optional:
-        function to compute Hessian matrix of fun, or ``'bfgs'`` in which case the BFGS
-        method will be used to approximate the Hessian.
+    hess : callable
+        function to compute Hessian matrix of fun
     bounds : tuple of array-like
         Lower and upper bounds on independent variables. Defaults to no bounds.
         Each array must match the size of x0 or be a scalar, in the latter case a
@@ -130,27 +129,27 @@ def laggrad(z, y, mu, *args):
         J = constraint_wrapped.jac(z, *args)
         return grad_wrapped(z, *args) - jnp.dot(y, J) + mu * jnp.dot(c, J)
 
-    if callable(hess_wrapped):
-        if callable(constraint_wrapped.hess):
+    assert callable(hess_wrapped)
+    if callable(constraint_wrapped.hess):
 
-            def laghess(z, y, mu, *args):
-                c = constraint_wrapped.fun(z, *args)
-                Hf = hess_wrapped(z, *args)
-                Jc = constraint_wrapped.jac(z, *args)
-                Hc1 = constraint_wrapped.hess(z, y)
-                Hc2 = constraint_wrapped.hess(z, c)
-                return Hf - Hc1 + mu * (Hc2 + jnp.dot(Jc.T, Jc))
+        def laghess(z, y, mu, *args):
+            c = constraint_wrapped.fun(z, *args)
+            Hf = hess_wrapped(z, *args)
+            Jc = constraint_wrapped.jac(z, *args)
+            Hc1 = constraint_wrapped.hess(z, y)
+            Hc2 = constraint_wrapped.hess(z, c)
+            return Hf - Hc1 + mu * (Hc2 + jnp.dot(Jc.T, Jc))
 
-        else:
+    else:
 
-            def laghess(z, y, mu, *args):
-                H = hess_wrapped(z, *args)
-                J = constraint_wrapped.jac(z, *args)
-                # ignoring higher order derivatives of constraints for now
-                return H + mu * jnp.dot(J.T, J)
+        def laghess(z, y, mu, *args):
+            H = hess_wrapped(z, *args)
+            J = constraint_wrapped.jac(z, *args)
+            # ignoring higher order derivatives of constraints for now
+            return H + mu * jnp.dot(J.T, J)
 
-    else:
-        laghess = "bfgs"
+    # TODO: figure out how to let BFGS maintain state between subproblems, otherwise
+    # it doesn't really converge
 
     nfev = 0
     ngev = 0

From b09514afe6365a9f3cfd2799634ad110f4e0d9fe Mon Sep 17 00:00:00 2001
From: Rory Conlin <roryconlin@gmail.com>
Date: Sat, 27 May 2023 22:34:59 -0400
Subject: [PATCH 36/41] Fix tests

---
 tests/test_optimizer.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py
index 69c5bb8c6d..7f1e8e773b 100644
--- a/tests/test_optimizer.py
+++ b/tests/test_optimizer.py
@@ -38,9 +38,9 @@
     LinearConstraintProjection,
     Optimizer,
     ProximalProjection,
-    fmin_lag_ls_stel,
-    fmin_lag_stel,
+    fmin_auglag,
     fmintr,
+    lsq_auglag,
     lsqtr,
     optimizers,
     sgd,
@@ -782,7 +782,7 @@ def con(x):
     constraint = NonlinearConstraint(con, -np.inf, 0, conjac, conhess)
     x0 = rng.random(n)
 
-    out1 = fmin_lag_stel(
+    out1 = fmin_auglag(
         fun,
         x0,
         grad,
@@ -800,7 +800,7 @@ def con(x):
         options={},
     )
     print(out1["active_mask"])
-    out2 = fmin_lag_ls_stel(
+    out2 = lsq_auglag(
         vecfun,
         x0,
         jac,
@@ -862,8 +862,8 @@ def test_constrained_AL_lsq():
     eq2, result = eq.optimize(
         objective=obj,
         constraints=constraints,
-        optimizer="auglag",
-        maxiter=5000,
+        optimizer="auglag-lsq",
+        maxiter=500,
         verbose=3,
         x_scale="auto",
         copy=True,
@@ -905,7 +905,7 @@ def test_constrained_AL_scalar():
         objective=obj,
         constraints=constraints,
         optimizer="auglag",
-        maxiter=5000,
+        maxiter=500,
         verbose=3,
         x_scale="auto",
         copy=True,
@@ -917,4 +917,4 @@ def test_constrained_AL_scalar():
     np.testing.assert_allclose(AR, AR2)
     np.testing.assert_allclose(V, V2)
     assert eq2.is_nested()
-    np.testing.assert_array_less(-Dwell, 1e-4)
+    np.testing.assert_array_less(-Dwell, 0)

From a64ec3b8a67642ae570ab72e79416aa5bce84ff6 Mon Sep 17 00:00:00 2001
From: Rory Conlin <roryconlin@gmail.com>
Date: Sat, 27 May 2023 23:45:15 -0400
Subject: [PATCH 37/41] Fix nan multipliers with no constraints

---
 desc/optimize/aug_lagrangian.py    | 1 +
 desc/optimize/aug_lagrangian_ls.py | 1 +
 2 files changed, 2 insertions(+)

diff --git a/desc/optimize/aug_lagrangian.py b/desc/optimize/aug_lagrangian.py
index 808562bc81..c6d4fd0e5b 100644
--- a/desc/optimize/aug_lagrangian.py
+++ b/desc/optimize/aug_lagrangian.py
@@ -167,6 +167,7 @@ def laghess(z, y, mu, *args):
         _J = constraint_wrapped.jac(z, *args)
         _g = grad_wrapped(z, *args)
         y = jnp.linalg.lstsq(_J.T, _g)[0]
+        y = jnp.nan_to_num(y, nan=0.0, posinf=0.0, neginf=0.0)
 
     if maxiter is None:
         maxiter = z.size * 100
diff --git a/desc/optimize/aug_lagrangian_ls.py b/desc/optimize/aug_lagrangian_ls.py
index 2bb16ea14f..6faf304730 100644
--- a/desc/optimize/aug_lagrangian_ls.py
+++ b/desc/optimize/aug_lagrangian_ls.py
@@ -156,6 +156,7 @@ def laggrad(z, y, mu, *args):
         _J = constraint_wrapped.jac(z, *args)
         _g = f @ jac_wrapped(z, *args)
         y = jnp.linalg.lstsq(_J.T, _g)[0]
+        y = jnp.nan_to_num(y, nan=0.0, posinf=0.0, neginf=0.0)
 
     if maxiter is None:
         maxiter = z.size * 100

From e5152f697d8fd16e3b64f6783640a6b1b1d504e3 Mon Sep 17 00:00:00 2001
From: Rory Conlin <roryconlin@gmail.com>
Date: Sat, 27 May 2023 23:52:59 -0400
Subject: [PATCH 38/41] Replace np with jnp in augmented lagrangian

---
 desc/optimize/aug_lagrangian.py    | 11 +++++------
 desc/optimize/aug_lagrangian_ls.py | 13 ++++++-------
 2 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/desc/optimize/aug_lagrangian.py b/desc/optimize/aug_lagrangian.py
index c6d4fd0e5b..b0024c3208 100644
--- a/desc/optimize/aug_lagrangian.py
+++ b/desc/optimize/aug_lagrangian.py
@@ -1,6 +1,5 @@
 """Augmented Langrangian for scalar valued objectives."""
 
-import numpy as np
 from scipy.optimize import NonlinearConstraint, OptimizeResult
 
 from desc.backend import jnp
@@ -195,8 +194,8 @@ def laghess(z, y, mu, *args):
     message = None
     step_norm = jnp.inf
     actual_reduction = jnp.inf
-    g_norm = np.linalg.norm(laggrad(z, y, mu, *args), ord=np.inf)
-    constr_violation = np.linalg.norm(c, ord=np.inf)
+    g_norm = jnp.linalg.norm(laggrad(z, y, mu, *args), ord=jnp.inf)
+    constr_violation = jnp.linalg.norm(c, ord=jnp.inf)
 
     options.setdefault("initial_trust_radius", "scipy")
     options["return_tr"] = True
@@ -242,9 +241,9 @@ def laghess(z, y, mu, *args):
         f = fun_wrapped(z, *args)
         c = constraint_wrapped.fun(z, *args)
         nfev += 1
-        constr_violation = np.linalg.norm(c, ord=np.inf)
-        step_norm = np.linalg.norm(zold - z)
-        z_norm = np.linalg.norm(z)
+        constr_violation = jnp.linalg.norm(c, ord=jnp.inf)
+        step_norm = jnp.linalg.norm(zold - z)
+        z_norm = jnp.linalg.norm(z)
         g_norm = result["optimality"]
         actual_reduction = fold - f
         # don't stop if we increased cost
diff --git a/desc/optimize/aug_lagrangian_ls.py b/desc/optimize/aug_lagrangian_ls.py
index 6faf304730..33f07e702c 100644
--- a/desc/optimize/aug_lagrangian_ls.py
+++ b/desc/optimize/aug_lagrangian_ls.py
@@ -1,6 +1,5 @@
 """Augmented Langrangian for vector valued objectives."""
 
-import numpy as np
 from scipy.optimize import NonlinearConstraint, OptimizeResult
 
 from desc.backend import jnp
@@ -184,8 +183,8 @@ def laggrad(z, y, mu, *args):
     step_norm = jnp.inf
     actual_reduction = jnp.inf
 
-    g_norm = np.linalg.norm(laggrad(z, y, mu, *args), ord=np.inf)
-    constr_violation = np.linalg.norm(c, ord=np.inf)
+    g_norm = jnp.linalg.norm(laggrad(z, y, mu, *args), ord=jnp.inf)
+    constr_violation = jnp.linalg.norm(c, ord=jnp.inf)
 
     options.setdefault("initial_trust_radius", "scipy")
     options["return_tr"] = True
@@ -229,9 +228,9 @@ def laggrad(z, y, mu, *args):
         cost = 1 / 2 * jnp.dot(f, f)
         c = constraint_wrapped.fun(z)
         nfev += 1
-        constr_violation = np.linalg.norm(c, ord=np.inf)
-        step_norm = np.linalg.norm(zold - z)
-        z_norm = np.linalg.norm(z)
+        constr_violation = jnp.linalg.norm(c, ord=jnp.inf)
+        step_norm = jnp.linalg.norm(zold - z)
+        z_norm = jnp.linalg.norm(z)
         g_norm = result["optimality"]
         actual_reduction = cost_old - cost
         # don't stop if we increased cost
@@ -270,7 +269,7 @@ def laggrad(z, y, mu, *args):
             njev,
             max_njev,
             0,
-            np.inf,
+            jnp.inf,
             constr_violation=constr_violation,
             ctol=ctol,
         )

From f995b7123da106a95788925534fe9767d4697e1d Mon Sep 17 00:00:00 2001
From: Rory Conlin <roryconlin@gmail.com>
Date: Sun, 28 May 2023 15:17:59 -0400
Subject: [PATCH 39/41] Make optimizer names more consistent

---
 desc/optimize/_desc_wrappers.py | 21 ++++++++++++++++-----
 desc/optimize/fmin_scalar.py    |  2 +-
 docs/optimizers.csv             | 12 ++++++++----
 tests/test_optimizer.py         |  8 ++++----
 4 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/desc/optimize/_desc_wrappers.py b/desc/optimize/_desc_wrappers.py
index c70c348dc1..06eb71b9be 100644
--- a/desc/optimize/_desc_wrappers.py
+++ b/desc/optimize/_desc_wrappers.py
@@ -11,7 +11,7 @@
 
 
 @register_optimizer(
-    name="auglag",
+    name="fmin-auglag",
     description="Augmented Lagrangian method with trust region subproblem.",
     scalar=True,
     equality_constraints=True,
@@ -104,7 +104,7 @@ def _optimize_desc_aug_lagrangian(
 
 
 @register_optimizer(
-    name="auglag-lsq",
+    name="lsq-auglag",
     description="Least Squares Augmented Lagrangian approach "
     + "to constrained optimization",
     scalar=False,
@@ -276,12 +276,23 @@ def _optimize_desc_least_squares(
 
 
 @register_optimizer(
-    name=["dogleg", "subspace", "dogleg-bfgs", "subspace-bfgs"],
+    name=[
+        "fmin-exact",
+        "fmin-dogleg",
+        "fmin-subspace",
+        "fmin-exact-bfgs",
+        "fmin-dogleg-bfgs",
+        "fmin-subspace-bfgs",
+    ],
     description=[
+        "Trust region method using iterative cholesky method to exactly solve the "
+        + "trust region subproblem.",
         "Trust region method using Powell's dogleg method to approximately solve the "
         + "trust region subproblem.",
         "Trust region method solving the subproblem over the 2d subspace spanned by "
         + "the gradient and newton direction.",
+        "Trust region method using iterative cholesky method to exactly solve the "
+        + "trust region subproblem. Uses BFGS to approximate hessian",
         "Trust region method using Powell's dogleg method to approximately solve the "
         + "trust region subproblem. Uses BFGS to approximate hessian",
         "Trust region method solving the subproblem over the 2d subspace spanned by "
@@ -291,7 +302,7 @@ def _optimize_desc_least_squares(
     equality_constraints=False,
     inequality_constraints=False,
     stochastic=False,
-    hessian=[True, True, False, False],
+    hessian=[True, True, True, False, False, False],
     GPU=True,
 )
 def _optimize_desc_fmin_scalar(
@@ -354,7 +365,7 @@ def _optimize_desc_fmin_scalar(
         grad=objective.grad,
         hess=hess,
         args=(),
-        method=method.replace("-bfgs", ""),
+        method=method.replace("-bfgs", "").replace("fmin-", ""),
         x_scale=x_scale,
         ftol=stoptol["ftol"],
         xtol=stoptol["xtol"],
diff --git a/desc/optimize/fmin_scalar.py b/desc/optimize/fmin_scalar.py
index 8d226f4f81..1418e28d31 100644
--- a/desc/optimize/fmin_scalar.py
+++ b/desc/optimize/fmin_scalar.py
@@ -168,7 +168,7 @@ def fmintr(  # noqa: C901 - FIXME: simplify this
         subproblem = trust_region_step_exact_cho
     else:
         raise ValueError(
-            colored("method should be one of 'dogleg' or 'subspace'", "red")
+            colored("method should be one of 'exact', 'dogleg' or 'subspace'", "red")
         )
 
     if maxiter is None:
diff --git a/docs/optimizers.csv b/docs/optimizers.csv
index baac84c289..d730294658 100644
--- a/docs/optimizers.csv
+++ b/docs/optimizers.csv
@@ -1,9 +1,13 @@
 Name,Scalar,Equality Constraints,Inequality Constraints,Stochastic,Hessian,GPU,Description
+``fmin-auglag``,True,True,True,False,True,True,Augmented Lagrangian method with trust region subproblem.
+``lsq-auglag``,False,True,True,False,False,True,Least Squares Augmented Lagrangian approach to constrained optimization
 ``lsq-exact``,False,False,False,False,False,True,"Trust region least squares method, similar to the `trf` method in scipy"
-``dogleg``,True,False,False,False,True,True,Trust region method using Powell's dogleg method to approximately solve the trust region subproblem.
-``subspace``,True,False,False,False,True,True,Trust region method solving the subproblem over the 2d subspace spanned by the gradient and newton direction.
-``dogleg-bfgs``,True,False,False,False,False,True,Trust region method using Powell's dogleg method to approximately solve the trust region subproblem. Uses BFGS to approximate hessian
-``subspace-bfgs``,True,False,False,False,False,True,Trust region method solving the subproblem over the 2d subspace spanned by the gradient and newton direction. Uses BFGS to approximate hessian
+``fmin-exact``,True,False,False,False,True,True,Trust region method using iterative cholesky method to exactly solve the trust region subproblem.
+``fmin-dogleg``,True,False,False,False,True,True,Trust region method using Powell's dogleg method to approximately solve the trust region subproblem.
+``fmin-subspace``,True,False,False,False,True,True,Trust region method solving the subproblem over the 2d subspace spanned by the gradient and newton direction.
+``fmin-exact-bfgs``,True,False,False,False,False,True,Trust region method using iterative cholesky method to exactly solve the trust region subproblem. Uses BFGS to approximate hessian
+``fmin-dogleg-bfgs``,True,False,False,False,False,True,Trust region method using Powell's dogleg method to approximately solve the trust region subproblem. Uses BFGS to approximate hessian
+``fmin-subspace-bfgs``,True,False,False,False,False,True,Trust region method solving the subproblem over the 2d subspace spanned by the gradient and newton direction. Uses BFGS to approximate hessian
 ``sgd``,True,False,False,True,False,True,Stochastic gradient descent with Nesterov momentum
 ``scipy-bfgs``,True,False,False,False,False,False,BFGS quasi-newton method with line search. See https://docs.scipy.org/doc/scipy/reference/optimize.minimize-bfgs.html
 ``scipy-CG``,True,False,False,False,False,False,Nonlinear conjugate gradient method. See https://docs.scipy.org/doc/scipy/reference/optimize.minimize-cg.html
diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py
index 7f1e8e773b..5050090abe 100644
--- a/tests/test_optimizer.py
+++ b/tests/test_optimizer.py
@@ -423,12 +423,12 @@ def test_maxiter_1_and_0_solve():
     objectives = ForceBalance()
     obj = ObjectiveFunction(objectives)
     eq = desc.examples.get("SOLOVEV")
-    for opt in ["lsq-exact", "dogleg-bfgs"]:
+    for opt in ["lsq-exact", "fmin-dogleg-bfgs"]:
         eq, result = eq.solve(
             maxiter=1, constraints=constraints, objective=obj, optimizer=opt, verbose=3
         )
         assert result["nit"] == 1
-    for opt in ["lsq-exact", "dogleg-bfgs"]:
+    for opt in ["lsq-exact", "fmin-dogleg-bfgs"]:
         eq, result = eq.solve(
             maxiter=0, constraints=constraints, objective=obj, optimizer=opt, verbose=3
         )
@@ -862,7 +862,7 @@ def test_constrained_AL_lsq():
     eq2, result = eq.optimize(
         objective=obj,
         constraints=constraints,
-        optimizer="auglag-lsq",
+        optimizer="lsq-auglag",
         maxiter=500,
         verbose=3,
         x_scale="auto",
@@ -904,7 +904,7 @@ def test_constrained_AL_scalar():
     eq2, result = eq.optimize(
         objective=obj,
         constraints=constraints,
-        optimizer="auglag",
+        optimizer="fmin-auglag",
         maxiter=500,
         verbose=3,
         x_scale="auto",

From 63066075fca28ce0501cf126e534cbb3e251d8e6 Mon Sep 17 00:00:00 2001
From: Rory Conlin <roryconlin@gmail.com>
Date: Sun, 28 May 2023 15:31:12 -0400
Subject: [PATCH 40/41] Expose ctol option to the user

---
 desc/equilibrium/equilibrium.py | 4 +++-
 desc/optimize/optimizer.py      | 7 ++++++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/desc/equilibrium/equilibrium.py b/desc/equilibrium/equilibrium.py
index 1bce661425..d448b061b4 100644
--- a/desc/equilibrium/equilibrium.py
+++ b/desc/equilibrium/equilibrium.py
@@ -548,6 +548,7 @@ def optimize(
         ftol=None,
         xtol=None,
         gtol=None,
+        ctol=None,
         maxiter=None,
         x_scale="auto",
         options=None,
@@ -564,7 +565,7 @@ def optimize(
             Objective function to satisfy. Default = fixed-boundary force balance.
         optimizer : str or Optimizer (optional)
             optimizer to use
-        ftol, xtol, gtol : float
+        ftol, xtol, gtol, ctol : float
             stopping tolerances. `None` will use defaults for given optimizer.
         maxiter : int
             Maximum number of solver steps.
@@ -618,6 +619,7 @@ def optimize(
             ftol=ftol,
             xtol=xtol,
             gtol=gtol,
+            ctol=ctol,
             x_scale=x_scale,
             verbose=verbose,
             maxiter=maxiter,
diff --git a/desc/optimize/optimizer.py b/desc/optimize/optimizer.py
index 9fd05eb60a..cdfdf681f8 100644
--- a/desc/optimize/optimizer.py
+++ b/desc/optimize/optimizer.py
@@ -73,6 +73,7 @@ def optimize(  # noqa: C901 - FIXME: simplify this
         ftol=None,
         xtol=None,
         gtol=None,
+        ctol=None,
         x_scale="auto",
         verbose=1,
         maxiter=None,
@@ -102,6 +103,10 @@ def optimize(  # noqa: C901 - FIXME: simplify this
             Absolute tolerance for termination by the norm of the gradient.
             Optimizer terminates when ``norm(g) < gtol``, where
             If None, defaults to 1e-8.
+        ctol : float or None, optional
+            Stopping tolerance on infinity norm of the constraint violation.
+            Optimization will stop when ctol and one of the other tolerances
+            are satisfied. If None, defaults to 1e-4.
         x_scale : array_like or ``'auto'``, optional
             Characteristic scale of each variable. Setting ``x_scale`` is equivalent
             to reformulating the problem in scaled variables ``xs = x / x_scale``.
@@ -204,7 +209,7 @@ def optimize(  # noqa: C901 - FIXME: simplify this
             ftol,
             xtol,
             gtol,
-            None,
+            ctol,
             maxiter,
             options,
         )

From cd9a32944fb0a4578ce742613c628c17fc2d0150 Mon Sep 17 00:00:00 2001
From: Rory Conlin <roryconlin@gmail.com>
Date: Wed, 7 Jun 2023 12:33:01 -0400
Subject: [PATCH 41/41] Update docstrings

---
 desc/optimize/_desc_wrappers.py    | 10 +++++-----
 desc/optimize/_scipy_wrappers.py   |  6 +++---
 desc/optimize/aug_lagrangian.py    | 17 ++++++++++-------
 desc/optimize/aug_lagrangian_ls.py | 15 +++++++++------
 desc/optimize/fmin_scalar.py       | 13 ++++++-------
 desc/optimize/least_squares.py     | 13 ++++++-------
 desc/optimize/stochastic.py        | 16 ++++++----------
 7 files changed, 45 insertions(+), 45 deletions(-)

diff --git a/desc/optimize/_desc_wrappers.py b/desc/optimize/_desc_wrappers.py
index 06eb71b9be..c49f1e771c 100644
--- a/desc/optimize/_desc_wrappers.py
+++ b/desc/optimize/_desc_wrappers.py
@@ -48,7 +48,7 @@ def _optimize_desc_aug_lagrangian(
         * 1 : display a termination report.
         * 2 : display progress during iterations
     stoptol : dict
-        Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol",
+        Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol", "ctol",
         "maxiter", "max_nfev", "max_njev", "max_ngev", "max_nhev"}
     options : dict, optional
         Dictionary of optional keyword arguments to override default solver
@@ -142,7 +142,7 @@ def _optimize_desc_aug_lagrangian_least_squares(
         * 1 : display a termination report.
         * 2 : display progress during iterations
     stoptol : dict
-        Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol",
+        Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol", "ctol",
         "maxiter", "max_nfev", "max_njev", "max_ngev", "max_nhev"}
     options : dict, optional
         Dictionary of optional keyword arguments to override default solver
@@ -234,7 +234,7 @@ def _optimize_desc_least_squares(
         * 1 : display a termination report.
         * 2 : display progress during iterations
     stoptol : dict
-        Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol",
+        Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol", "ctol",
         "maxiter", "max_nfev", "max_njev", "max_ngev", "max_nhev"}
     options : dict, optional
         Dictionary of optional keyword arguments to override default solver
@@ -333,7 +333,7 @@ def _optimize_desc_fmin_scalar(
         * 1 : display a termination report.
         * 2 : display progress during iterations
     stoptol : dict
-        Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol",
+        Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol", "ctol",
         "maxiter", "max_nfev", "max_njev", "max_ngev", "max_nhev"}
     options : dict, optional
         Dictionary of optional keyword arguments to override default solver
@@ -416,7 +416,7 @@ def _optimize_desc_stochastic(
         * 1 : display a termination report.
         * 2 : display progress during iterations
     stoptol : dict
-        Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol",
+        Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol", "ctol",
         "maxiter", "max_nfev", "max_njev", "max_ngev", "max_nhev"}
     options : dict, optional
         Dictionary of optional keyword arguments to override default solver
diff --git a/desc/optimize/_scipy_wrappers.py b/desc/optimize/_scipy_wrappers.py
index 7c6e303d5a..31f3cf66fe 100644
--- a/desc/optimize/_scipy_wrappers.py
+++ b/desc/optimize/_scipy_wrappers.py
@@ -82,7 +82,7 @@ def _optimize_scipy_minimize(  # noqa: C901 - FIXME: simplify this
         * 1 : display a termination report.
         * 2 : display progress during iterations
     stoptol : dict
-        Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol",
+        Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol", "ctol",
         "maxiter", "max_nfev", "max_njev", "max_ngev", "max_nhev"}
     options : dict, optional
         Dictionary of optional keyword arguments to override default solver
@@ -341,7 +341,7 @@ def _optimize_scipy_least_squares(  # noqa: C901 - FIXME: simplify this
         * 1 : display a termination report.
         * 2 : display progress during iterations
     stoptol : dict
-        Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol",
+        Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol", "ctol",
         "maxiter", "max_nfev", "max_njev", "max_ngev", "max_nhev"}
     options : dict, optional
         Dictionary of optional keyword arguments to override default solver
@@ -544,7 +544,7 @@ def _optimize_scipy_constrained(  # noqa: C901 - FIXME: simplify this
         * 1 : display a termination report.
         * 2 : display progress during iterations
     stoptol : dict
-        Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol",
+        Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol", "ctol",
         "maxiter", "max_nfev", "max_njev", "max_ngev", "max_nhev"}
     options : dict, optional
         Dictionary of optional keyword arguments to override default solver
diff --git a/desc/optimize/aug_lagrangian.py b/desc/optimize/aug_lagrangian.py
index b0024c3208..b203b4224e 100644
--- a/desc/optimize/aug_lagrangian.py
+++ b/desc/optimize/aug_lagrangian.py
@@ -62,20 +62,23 @@ def fmin_auglag(  # noqa: C901 - FIXME: simplify this
         function. If set to ``'hess'``, the scale is iteratively updated using the
         inverse norms of the columns of the Hessian matrix.
     ftol : float or None, optional
-        Tolerance for termination by the change of the cost function. Default
-        is 1e-8. The optimization process is stopped when ``dF < ftol * F``,
+        Tolerance for termination by the change of the cost function.
+        The optimization process is stopped when ``dF < ftol * F``,
         and there was an adequate agreement between a local quadratic model and
         the true model in the last step. If None, the termination by this
         condition is disabled.
     xtol : float or None, optional
         Tolerance for termination by the change of the independent variables.
-        Default is 1e-8. Optimization is stopped when
-        ``norm(dx) < xtol * (xtol + norm(x))``. If None, the termination by
-        this condition is disabled.
+        Optimization is stopped when ``norm(dx) < xtol * (xtol + norm(x))``.
+        If None, the termination by this condition is disabled.
     gtol : float or None, optional
-        Absolute tolerance for termination by the norm of the gradient. Default is 1e-8.
-        Optimizer teriminates when ``norm(g) < gtol``, where
+        Absolute tolerance for termination by the norm of the gradient.
+        Optimizer teriminates when ``max(abs(g)) < gtol``.
         If None, the termination by this condition is disabled.
+    ctol : float, optional
+        Tolerance for stopping based on infinity norm of the constraint violation.
+        Optimizer terminates when ``max(abs(constr_violation)) < ctol`` AND one or more
+        of the other tolerances are met (``ftol``, ``xtol``, ``gtol``)
     verbose : {0, 1, 2}, optional
         * 0 (default) : work silently.
         * 1 : display a termination report.
diff --git a/desc/optimize/aug_lagrangian_ls.py b/desc/optimize/aug_lagrangian_ls.py
index 33f07e702c..20473be20c 100644
--- a/desc/optimize/aug_lagrangian_ls.py
+++ b/desc/optimize/aug_lagrangian_ls.py
@@ -62,20 +62,23 @@ def lsq_auglag(  # noqa: C901 - FIXME: simplify this
         function. If set to ``'hess'``, the scale is iteratively updated using the
         inverse norms of the columns of the Hessian matrix.
     ftol : float or None, optional
-        Tolerance for termination by the change of the cost function. Default
-        is 1e-8. The optimization process is stopped when ``dF < ftol * F``,
+        Tolerance for termination by the change of the cost function.
+        The optimization process is stopped when ``dF < ftol * F``,
         and there was an adequate agreement between a local quadratic model and
         the true model in the last step. If None, the termination by this
         condition is disabled.
     xtol : float or None, optional
         Tolerance for termination by the change of the independent variables.
-        Default is 1e-8. Optimization is stopped when
-        ``norm(dx) < xtol * (xtol + norm(x))``. If None, the termination by
-        this condition is disabled.
+        Optimization is stopped when ``norm(dx) < xtol * (xtol + norm(x))``.
+        If None, the termination by this condition is disabled.
     gtol : float or None, optional
-        Absolute tolerance for termination by the norm of the gradient. Default is 1e-8.
+        Absolute tolerance for termination by the norm of the gradient.
         Optimizer teriminates when ``norm(g) < gtol``, where
         If None, the termination by this condition is disabled.
+    ctol : float, optional
+        Tolerance for stopping based on infinity norm of the constraint violation.
+        Optimizer terminates when ``max(abs(constr_violation)) < ctol`` AND one or more
+        of the other tolerances are met (``ftol``, ``xtol``, ``gtol``)
     verbose : {0, 1, 2}, optional
         * 0 (default) : work silently.
         * 1 : display a termination report.
diff --git a/desc/optimize/fmin_scalar.py b/desc/optimize/fmin_scalar.py
index 1418e28d31..150b95fc1e 100644
--- a/desc/optimize/fmin_scalar.py
+++ b/desc/optimize/fmin_scalar.py
@@ -79,19 +79,18 @@ def fmintr(  # noqa: C901 - FIXME: simplify this
         function. If set to ``'hess'``, the scale is iteratively updated using the
         inverse norms of the columns of the Hessian matrix.
     ftol : float or None, optional
-        Tolerance for termination by the change of the cost function. Default
-        is 1e-8. The optimization process is stopped when ``dF < ftol * F``,
+        Tolerance for termination by the change of the cost function.
+        The optimization process is stopped when ``dF < ftol * F``,
         and there was an adequate agreement between a local quadratic model and
         the true model in the last step. If None, the termination by this
         condition is disabled.
     xtol : float or None, optional
         Tolerance for termination by the change of the independent variables.
-        Default is 1e-8. Optimization is stopped when
-        ``norm(dx) < xtol * (xtol + norm(x))``. If None, the termination by
-        this condition is disabled.
+        Optimization is stopped when ``norm(dx) < xtol * (xtol + norm(x))``.
+        If None, the termination by this condition is disabled.
     gtol : float or None, optional
-        Absolute tolerance for termination by the norm of the gradient. Default is 1e-8.
-        Optimizer teriminates when ``norm(g) < gtol``, where
+        Absolute tolerance for termination by the norm of the gradient.
+        Optimizer teriminates when ``max(abs(g)) < gtol``.
         If None, the termination by this condition is disabled.
     verbose : {0, 1, 2}, optional
         * 0 (default) : work silently.
diff --git a/desc/optimize/least_squares.py b/desc/optimize/least_squares.py
index f89caf1dfb..2ac5b15856 100644
--- a/desc/optimize/least_squares.py
+++ b/desc/optimize/least_squares.py
@@ -69,19 +69,18 @@ def lsqtr(  # noqa: C901 - FIXME: simplify this
         function. If set to ``'jac'``, the scale is iteratively updated using the
         inverse norms of the columns of the Jacobian matrix.
     ftol : float or None, optional
-        Tolerance for termination by the change of the cost function. Default
-        is 1e-8. The optimization process is stopped when ``dF < ftol * F``,
+        Tolerance for termination by the change of the cost function.
+        The optimization process is stopped when ``dF < ftol * F``,
         and there was an adequate agreement between a local quadratic model and
         the true model in the last step. If None, the termination by this
         condition is disabled.
     xtol : float or None, optional
         Tolerance for termination by the change of the independent variables.
-        Default is 1e-8. Optimization is stopped when
-        ``norm(dx) < xtol * (xtol + norm(x))``. If None, the termination by
-        this condition is disabled.
+        Optimization is stopped when ``norm(dx) < xtol * (xtol + norm(x))``.
+        If None, the termination by this condition is disabled.
     gtol : float or None, optional
-        Absolute tolerance for termination by the norm of the gradient. Default is 1e-8.
-        Optimizer teriminates when ``norm(g) < gtol``, where
+        Absolute tolerance for termination by the norm of the gradient.
+        Optimizer teriminates when ``max(abs(g)) < gtol``.
         If None, the termination by this condition is disabled.
     verbose : {0, 1, 2}, optional
         * 0 (default) : work silently.
diff --git a/desc/optimize/stochastic.py b/desc/optimize/stochastic.py
index bd1a5c5e9d..ac918920f0 100644
--- a/desc/optimize/stochastic.py
+++ b/desc/optimize/stochastic.py
@@ -47,19 +47,15 @@ def sgd(
         Step size update rule. Currently only the default "sgd" is available. Future
         updates may include RMSProp, Adam, etc.
     ftol : float or None, optional
-        Tolerance for termination by the change of the cost function. Default
-        is 1e-8. The optimization process is stopped when ``dF < ftol * F``,
-        and there was an adequate agreement between a local quadratic model and
-        the true model in the last step. If None, the termination by this
-        condition is disabled.
+        Tolerance for termination by the change of the cost function.
+        The optimization process is stopped when ``dF < ftol * F``.
     xtol : float or None, optional
         Tolerance for termination by the change of the independent variables.
-        Default is 1e-8. Optimization is stopped when
-        ``norm(dx) < xtol * (xtol + norm(x))``. If None, the termination by
-        this condition is disabled.
+        Optimization is stopped when ``norm(dx) < xtol * (xtol + norm(x))``.
+        If None, the termination by this condition is disabled.
     gtol : float or None, optional
-        Absolute tolerance for termination by the norm of the gradient. Default is 1e-6.
-        Optimizer teriminates when ``norm(g) < gtol``, where
+        Absolute tolerance for termination by the norm of the gradient.
+        Optimizer teriminates when ``max(abs(g)) < gtol``.
         If None, the termination by this condition is disabled.
     verbose : {0, 1, 2}, optional
         * 0 (default) : work silently.