diff --git a/desc/equilibrium/equilibrium.py b/desc/equilibrium/equilibrium.py
index ae2314aed8..4413cb6d00 100644
--- a/desc/equilibrium/equilibrium.py
+++ b/desc/equilibrium/equilibrium.py
@@ -548,6 +548,7 @@ def optimize(
         ftol=None,
         xtol=None,
         gtol=None,
+        ctol=None,
         maxiter=None,
         x_scale="auto",
         options=None,
@@ -564,7 +565,7 @@ def optimize(
             Objective function to satisfy. Default = fixed-boundary force balance.
         optimizer : str or Optimizer (optional)
             optimizer to use
-        ftol, xtol, gtol : float
+        ftol, xtol, gtol, ctol : float
             stopping tolerances. `None` will use defaults for given optimizer.
         maxiter : int
             Maximum number of solver steps.
@@ -618,6 +619,7 @@ def optimize(
             ftol=ftol,
             xtol=xtol,
             gtol=gtol,
+            ctol=ctol,
             x_scale=x_scale,
             verbose=verbose,
             maxiter=maxiter,
diff --git a/desc/optimize/__init__.py b/desc/optimize/__init__.py
index 7a5531662f..7093f03c4a 100644
--- a/desc/optimize/__init__.py
+++ b/desc/optimize/__init__.py
@@ -2,6 +2,8 @@
 
 from . import _desc_wrappers, _scipy_wrappers
 from ._constraint_wrappers import LinearConstraintProjection, ProximalProjection
+from .aug_lagrangian import fmin_auglag
+from .aug_lagrangian_ls import lsq_auglag
 from .fmin_scalar import fmintr
 from .least_squares import lsqtr
 from .optimizer import Optimizer, optimizers, register_optimizer
diff --git a/desc/optimize/_desc_wrappers.py b/desc/optimize/_desc_wrappers.py
index 292f5399c3..35e7d63b1e 100644
--- a/desc/optimize/_desc_wrappers.py
+++ b/desc/optimize/_desc_wrappers.py
@@ -1,11 +1,200 @@
+from scipy.optimize import NonlinearConstraint
+
 from desc.backend import jnp
 
+from .aug_lagrangian import fmin_auglag
+from .aug_lagrangian_ls import lsq_auglag
 from .fmin_scalar import fmintr
 from .least_squares import lsqtr
 from .optimizer import register_optimizer
 from .stochastic import sgd
 
 
+@register_optimizer(
+    name="fmin-auglag",
+    description="Augmented Lagrangian method with trust region subproblem.",
+    scalar=True,
+    equality_constraints=True,
+    inequality_constraints=True,
+    stochastic=False,
+    hessian=True,
+    GPU=True,
+)
+def _optimize_desc_aug_lagrangian(
+    objective, constraint, x0, method, x_scale, verbose, stoptol, options=None
+):
+    """Wrapper for desc.optimize.fmin_lag_ls_stel.
+
+    Parameters
+    ----------
+    objective : ObjectiveFunction
+        Function to minimize.
+    constraint : ObjectiveFunction
+        Constraint to satisfy
+    x0 : ndarray
+        Starting point.
+    method : str
+        Name of the method to use. Any of the options in scipy.optimize.least_squares.
+    x_scale : array_like or ‘jac’, optional
+        Characteristic scale of each variable. Setting x_scale is equivalent to
+        reformulating the problem in scaled variables xs = x / x_scale. An alternative
+        view is that the size of a trust region along jth dimension is proportional to
+        x_scale[j]. Improved convergence may be achieved by setting x_scale such that
+        a step of a given size along any of the scaled variables has a similar effect
+        on the cost function. If set to ‘jac’, the scale is iteratively updated using
+        the inverse norms of the columns of the Jacobian matrix.
+    verbose : int
+        * 0  : work silently.
+        * 1 : display a termination report.
+        * 2 : display progress during iterations
+    stoptol : dict
+        Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol", "ctol",
+        "maxiter", "max_nfev", "max_njev", "max_ngev", "max_nhev"}
+    options : dict, optional
+        Dictionary of optional keyword arguments to override default solver
+        settings. See the code for more details.
+
+    Returns
+    -------
+    res : OptimizeResult
+       The optimization result represented as a ``OptimizeResult`` object.
+       Important attributes are: ``x`` the solution array, ``success`` a
+       Boolean flag indicating if the optimizer exited successfully and
+       ``message`` which describes the cause of the termination. See
+       `OptimizeResult` for a description of other attributes.
+
+    """
+    options = {} if options is None else options
+    if not isinstance(x_scale, str) and jnp.allclose(x_scale, 1):
+        options.setdefault("initial_trust_ratio", 1e-3)
+        options.setdefault("max_trust_radius", 1.0)
+    options["max_nfev"] = stoptol["max_nfev"]
+    options["max_ngev"] = stoptol["max_ngev"]
+    options["max_nhev"] = stoptol["max_nhev"]
+
+    if constraint is not None:
+        lb, ub = constraint.bounds_scaled
+        constraint_wrapped = NonlinearConstraint(
+            constraint.compute_scaled,
+            lb,
+            ub,
+            constraint.jac_scaled,
+        )
+    else:
+        constraint_wrapped = None
+
+    result = fmin_auglag(
+        objective.compute_scalar,
+        x0=x0,
+        grad=objective.grad,
+        hess=objective.hess,
+        bounds=(-jnp.inf, jnp.inf),
+        constraint=constraint_wrapped,
+        args=(),
+        x_scale=x_scale,
+        ftol=stoptol["ftol"],
+        xtol=stoptol["xtol"],
+        gtol=stoptol["gtol"],
+        ctol=stoptol["ctol"],
+        verbose=verbose,
+        maxiter=stoptol["maxiter"],
+        options=options,
+    )
+    return result
+
+
+@register_optimizer(
+    name="lsq-auglag",
+    description="Least Squares Augmented Lagrangian approach "
+    + "to constrained optimization",
+    scalar=False,
+    equality_constraints=True,
+    inequality_constraints=True,
+    stochastic=False,
+    hessian=False,
+    GPU=True,
+)
+def _optimize_desc_aug_lagrangian_least_squares(
+    objective, constraint, x0, method, x_scale, verbose, stoptol, options=None
+):
+    """Wrapper for desc.optimize.fmin_lag_ls_stel.
+
+    Parameters
+    ----------
+    objective : ObjectiveFunction
+        Function to minimize.
+    constraint : ObjectiveFunction
+        Constraint to satisfy
+    x0 : ndarray
+        Starting point.
+    method : str
+        Name of the method to use. Any of the options in scipy.optimize.least_squares.
+    x_scale : array_like or ‘jac’, optional
+        Characteristic scale of each variable. Setting x_scale is equivalent to
+        reformulating the problem in scaled variables xs = x / x_scale. An alternative
+        view is that the size of a trust region along jth dimension is proportional to
+        x_scale[j]. Improved convergence may be achieved by setting x_scale such that
+        a step of a given size along any of the scaled variables has a similar effect
+        on the cost function. If set to ‘jac’, the scale is iteratively updated using
+        the inverse norms of the columns of the Jacobian matrix.
+    verbose : int
+        * 0  : work silently.
+        * 1 : display a termination report.
+        * 2 : display progress during iterations
+    stoptol : dict
+        Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol", "ctol",
+        "maxiter", "max_nfev", "max_njev", "max_ngev", "max_nhev"}
+    options : dict, optional
+        Dictionary of optional keyword arguments to override default solver
+        settings. See the code for more details.
+
+    Returns
+    -------
+    res : OptimizeResult
+       The optimization result represented as a ``OptimizeResult`` object.
+       Important attributes are: ``x`` the solution array, ``success`` a
+       Boolean flag indicating if the optimizer exited successfully and
+       ``message`` which describes the cause of the termination. See
+       `OptimizeResult` for a description of other attributes.
+
+    """
+    options = {} if options is None else options
+    if not isinstance(x_scale, str) and jnp.allclose(x_scale, 1):
+        options.setdefault("initial_trust_radius", 1e-3)
+        options.setdefault("max_trust_radius", 1.0)
+    options["max_nfev"] = stoptol["max_nfev"]
+    options["max_njev"] = stoptol["max_njev"]
+
+    if constraint is not None:
+        lb, ub = constraint.bounds_scaled
+        constraint_wrapped = NonlinearConstraint(
+            constraint.compute_scaled,
+            lb,
+            ub,
+            constraint.jac_scaled,
+        )
+    else:
+        constraint_wrapped = None
+
+    result = lsq_auglag(
+        objective.compute_scaled_error,
+        x0=x0,
+        jac=objective.jac_scaled,
+        bounds=(-jnp.inf, jnp.inf),
+        constraint=constraint_wrapped,
+        args=(),
+        x_scale=x_scale,
+        ftol=stoptol["ftol"],
+        xtol=stoptol["xtol"],
+        gtol=stoptol["gtol"],
+        ctol=stoptol["ctol"],
+        verbose=verbose,
+        maxiter=stoptol["maxiter"],
+        options=options,
+    )
+    return result
+
+
 @register_optimizer(
     name="lsq-exact",
     description="Trust region least squares method, "
@@ -45,7 +234,7 @@ def _optimize_desc_least_squares(
         * 1 : display a termination report.
         * 2 : display progress during iterations
     stoptol : dict
-        Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol",
+        Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol", "ctol",
         "maxiter", "max_nfev", "max_njev", "max_ngev", "max_nhev"}
     options : dict, optional
         Dictionary of optional keyword arguments to override default solver
@@ -89,12 +278,23 @@ def _optimize_desc_least_squares(
 
 
 @register_optimizer(
-    name=["dogleg", "subspace", "dogleg-bfgs", "subspace-bfgs"],
+    name=[
+        "fmin-exact",
+        "fmin-dogleg",
+        "fmin-subspace",
+        "fmin-exact-bfgs",
+        "fmin-dogleg-bfgs",
+        "fmin-subspace-bfgs",
+    ],
     description=[
+        "Trust region method using iterative cholesky method to exactly solve the "
+        + "trust region subproblem.",
         "Trust region method using Powell's dogleg method to approximately solve the "
         + "trust region subproblem.",
         "Trust region method solving the subproblem over the 2d subspace spanned by "
         + "the gradient and newton direction.",
+        "Trust region method using iterative cholesky method to exactly solve the "
+        + "trust region subproblem. Uses BFGS to approximate hessian",
         "Trust region method using Powell's dogleg method to approximately solve the "
         + "trust region subproblem. Uses BFGS to approximate hessian",
         "Trust region method solving the subproblem over the 2d subspace spanned by "
@@ -104,7 +304,7 @@ def _optimize_desc_least_squares(
     equality_constraints=False,
     inequality_constraints=False,
     stochastic=False,
-    hessian=[True, True, False, False],
+    hessian=[True, True, True, False, False, False],
     GPU=True,
 )
 def _optimize_desc_fmin_scalar(
@@ -135,7 +335,7 @@ def _optimize_desc_fmin_scalar(
         * 1 : display a termination report.
         * 2 : display progress during iterations
     stoptol : dict
-        Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol",
+        Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol", "ctol",
         "maxiter", "max_nfev", "max_njev", "max_ngev", "max_nhev"}
     options : dict, optional
         Dictionary of optional keyword arguments to override default solver
@@ -169,7 +369,7 @@ def _optimize_desc_fmin_scalar(
         grad=objective.grad,
         hess=hess,
         args=(),
-        method=method.replace("-bfgs", ""),
+        method=method.replace("-bfgs", "").replace("fmin-", ""),
         x_scale=x_scale,
         ftol=stoptol["ftol"],
         xtol=stoptol["xtol"],
@@ -220,7 +420,7 @@ def _optimize_desc_stochastic(
         * 1 : display a termination report.
         * 2 : display progress during iterations
     stoptol : dict
-        Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol",
+        Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol", "ctol",
         "maxiter", "max_nfev", "max_njev", "max_ngev", "max_nhev"}
     options : dict, optional
         Dictionary of optional keyword arguments to override default solver
diff --git a/desc/optimize/_scipy_wrappers.py b/desc/optimize/_scipy_wrappers.py
index f2a7271f0c..dda278b908 100644
--- a/desc/optimize/_scipy_wrappers.py
+++ b/desc/optimize/_scipy_wrappers.py
@@ -82,7 +82,7 @@ def _optimize_scipy_minimize(  # noqa: C901 - FIXME: simplify this
         * 1 : display a termination report.
         * 2 : display progress during iterations
     stoptol : dict
-        Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol",
+        Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol", "ctol",
         "maxiter", "max_nfev", "max_njev", "max_ngev", "max_nhev"}
     options : dict, optional
         Dictionary of optional keyword arguments to override default solver
@@ -341,7 +341,7 @@ def _optimize_scipy_least_squares(  # noqa: C901 - FIXME: simplify this
         * 1 : display a termination report.
         * 2 : display progress during iterations
     stoptol : dict
-        Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol",
+        Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol", "ctol",
         "maxiter", "max_nfev", "max_njev", "max_ngev", "max_nhev"}
     options : dict, optional
         Dictionary of optional keyword arguments to override default solver
@@ -544,7 +544,7 @@ def _optimize_scipy_constrained(  # noqa: C901 - FIXME: simplify this
         * 1 : display a termination report.
         * 2 : display progress during iterations
     stoptol : dict
-        Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol",
+        Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol", "ctol",
         "maxiter", "max_nfev", "max_njev", "max_ngev", "max_nhev"}
     options : dict, optional
         Dictionary of optional keyword arguments to override default solver
diff --git a/desc/optimize/aug_lagrangian.py b/desc/optimize/aug_lagrangian.py
new file mode 100644
index 0000000000..b203b4224e
--- /dev/null
+++ b/desc/optimize/aug_lagrangian.py
@@ -0,0 +1,339 @@
+"""Augmented Langrangian for scalar valued objectives."""
+
+from scipy.optimize import NonlinearConstraint, OptimizeResult
+
+from desc.backend import jnp
+from desc.optimize.fmin_scalar import fmintr
+
+from .bound_utils import find_active_constraints
+from .utils import (
+    check_termination,
+    inequality_to_bounds,
+    print_header_nonlinear,
+    print_iteration_nonlinear,
+)
+
+
+def fmin_auglag(  # noqa: C901 - FIXME: simplify this
+    fun,
+    x0,
+    grad,
+    hess,
+    bounds=(-jnp.inf, jnp.inf),
+    constraint=None,
+    args=(),
+    x_scale=1,
+    ftol=1e-6,
+    xtol=1e-6,
+    gtol=1e-6,
+    ctol=1e-6,
+    verbose=1,
+    maxiter=None,
+    options={},
+):
+    """Minimize a function with constraints using an augmented Langrangian method.
+
+    Parameters
+    ----------
+    fun : callable
+        objective to be minimized. Should have a signature like fun(x,*args)-> float
+    x0 : array-like
+        initial guess
+    grad : callable
+        function to compute gradient, df/dx. Should take the same arguments as fun
+    hess : callable
+        function to compute Hessian matrix of fun
+    bounds : tuple of array-like
+        Lower and upper bounds on independent variables. Defaults to no bounds.
+        Each array must match the size of x0 or be a scalar, in the latter case a
+        bound will be the same for all variables. Use np.inf with an appropriate sign
+        to disable bounds on all or some variables.
+    constraint : scipy.optimize.NonlinearConstraint
+        constraint to be satisfied
+    args : tuple
+        additional arguments passed to fun, grad, and hess
+    x_scale : array_like or ``'hess'``, optional
+        Characteristic scale of each variable. Setting ``x_scale`` is equivalent
+        to reformulating the problem in scaled variables ``xs = x / x_scale``.
+        An alternative view is that the size of a trust region along jth
+        dimension is proportional to ``x_scale[j]``. Improved convergence may
+        be achieved by setting ``x_scale`` such that a step of a given size
+        along any of the scaled variables has a similar effect on the cost
+        function. If set to ``'hess'``, the scale is iteratively updated using the
+        inverse norms of the columns of the Hessian matrix.
+    ftol : float or None, optional
+        Tolerance for termination by the change of the cost function.
+        The optimization process is stopped when ``dF < ftol * F``,
+        and there was an adequate agreement between a local quadratic model and
+        the true model in the last step. If None, the termination by this
+        condition is disabled.
+    xtol : float or None, optional
+        Tolerance for termination by the change of the independent variables.
+        Optimization is stopped when ``norm(dx) < xtol * (xtol + norm(x))``.
+        If None, the termination by this condition is disabled.
+    gtol : float or None, optional
+        Absolute tolerance for termination by the norm of the gradient.
+        Optimizer teriminates when ``max(abs(g)) < gtol``.
+        If None, the termination by this condition is disabled.
+    ctol : float, optional
+        Tolerance for stopping based on infinity norm of the constraint violation.
+        Optimizer terminates when ``max(abs(constr_violation)) < ctol`` AND one or more
+        of the other tolerances are met (``ftol``, ``xtol``, ``gtol``)
+    verbose : {0, 1, 2}, optional
+        * 0 (default) : work silently.
+        * 1 : display a termination report.
+        * 2 : display progress during iterations
+    maxiter : int, optional
+        maximum number of iterations. Defaults to size(x)*100
+    options : dict, optional
+        dictionary of optional keyword arguments to override default solver settings.
+        See the code for more details.
+
+    Returns
+    -------
+    res : OptimizeResult
+        The optimization result represented as a ``OptimizeResult`` object.
+        Important attributes are: ``x`` the solution array, ``success`` a
+        Boolean flag indicating if the optimizer exited successfully.
+
+    """
+    if constraint is None:
+        # create a dummy constraint
+        constraint = NonlinearConstraint(
+            fun=lambda x, *args: jnp.array([0.0]),
+            lb=0.0,
+            ub=0.0,
+            jac=lambda x, *args: jnp.zeros((1, x.size)),
+        )
+    (
+        z0,
+        fun_wrapped,
+        grad_wrapped,
+        hess_wrapped,
+        constraint_wrapped,
+        zbounds,
+        z2xs,
+    ) = inequality_to_bounds(
+        x0,
+        fun,
+        grad,
+        hess,
+        constraint,
+        bounds,
+    )
+
+    def lagfun(z, y, mu, *args):
+        c = constraint_wrapped.fun(z, *args)
+        return fun_wrapped(z, *args) - jnp.dot(y, c) + mu / 2 * jnp.dot(c, c)
+
+    def laggrad(z, y, mu, *args):
+        c = constraint_wrapped.fun(z, *args)
+        J = constraint_wrapped.jac(z, *args)
+        return grad_wrapped(z, *args) - jnp.dot(y, J) + mu * jnp.dot(c, J)
+
+    assert callable(hess_wrapped)
+    if callable(constraint_wrapped.hess):
+
+        def laghess(z, y, mu, *args):
+            c = constraint_wrapped.fun(z, *args)
+            Hf = hess_wrapped(z, *args)
+            Jc = constraint_wrapped.jac(z, *args)
+            Hc1 = constraint_wrapped.hess(z, y)
+            Hc2 = constraint_wrapped.hess(z, c)
+            return Hf - Hc1 + mu * (Hc2 + jnp.dot(Jc.T, Jc))
+
+    else:
+
+        def laghess(z, y, mu, *args):
+            H = hess_wrapped(z, *args)
+            J = constraint_wrapped.jac(z, *args)
+            # ignoring higher order derivatives of constraints for now
+            return H + mu * jnp.dot(J.T, J)
+
+    # TODO: figure out how to let BFGS maintain state between subproblems, otherwise
+    # it doesn't really converge
+
+    nfev = 0
+    ngev = 0
+    nhev = 0
+    iteration = 0
+
+    z = z0.copy()
+    f = fun_wrapped(z, *args)
+    c = constraint_wrapped.fun(z)
+    nfev += 1
+
+    mu = options.pop("initial_penalty_parameter", 10)
+    y = options.pop("initial_multipliers", None)
+    if y is None:  # use least squares multiplier estimates
+        _J = constraint_wrapped.jac(z, *args)
+        _g = grad_wrapped(z, *args)
+        y = jnp.linalg.lstsq(_J.T, _g)[0]
+        y = jnp.nan_to_num(y, nan=0.0, posinf=0.0, neginf=0.0)
+
+    if maxiter is None:
+        maxiter = z.size * 100
+    maxiter_inner = options.pop("maxiter_inner", 20)
+    max_nfev = options.pop("max_nfev", 5 * maxiter + 1)
+    max_ngev = options.pop("max_ngev", maxiter + 1)
+    max_nhev = options.pop("max_nhev", maxiter + 1)
+
+    # notation following Conn & Gould, algorithm 14.4.2, but with our mu = their mu^-1
+    omega = options.pop("omega", 1.0)
+    eta = options.pop("eta", 1.0)
+    alpha_omega = options.pop("alpha_omega", 1.0)
+    beta_omega = options.pop("beta_omega", 1.0)
+    alpha_eta = options.pop("alpha_eta", 0.1)
+    beta_eta = options.pop("beta_eta", 0.9)
+    tau = options.pop("tau", 10)
+
+    gtolk = omega / mu**alpha_omega
+    ctolk = eta / mu**alpha_eta
+    zold = z
+    fold = f
+    allx = []
+
+    success = None
+    message = None
+    step_norm = jnp.inf
+    actual_reduction = jnp.inf
+    g_norm = jnp.linalg.norm(laggrad(z, y, mu, *args), ord=jnp.inf)
+    constr_violation = jnp.linalg.norm(c, ord=jnp.inf)
+
+    options.setdefault("initial_trust_radius", "scipy")
+    options["return_tr"] = True
+
+    if verbose > 1:
+        print_header_nonlinear(True, "Penalty param", "max(|mltplr|)")
+        print_iteration_nonlinear(
+            iteration,
+            nfev,
+            f,
+            actual_reduction,
+            step_norm,
+            g_norm,
+            constr_violation,
+            mu,
+            jnp.max(jnp.abs(y)),
+        )
+
+    while iteration < maxiter:
+        result = fmintr(
+            lagfun,
+            z,
+            grad=laggrad,
+            hess=laghess,
+            bounds=zbounds,
+            args=(y, mu) + args,
+            x_scale=x_scale,
+            ftol=0,
+            xtol=0,
+            gtol=gtolk,
+            maxiter=maxiter_inner,
+            verbose=0,
+            options=options.copy(),
+        )
+        allx += result["allx"]
+        nfev += result["nfev"]
+        ngev += result["ngev"]
+        nhev += result["nhev"]
+        iteration += result["nit"]
+        zold = z
+        fold = f
+        z = result["x"]
+        f = fun_wrapped(z, *args)
+        c = constraint_wrapped.fun(z, *args)
+        nfev += 1
+        constr_violation = jnp.linalg.norm(c, ord=jnp.inf)
+        step_norm = jnp.linalg.norm(zold - z)
+        z_norm = jnp.linalg.norm(z)
+        g_norm = result["optimality"]
+        actual_reduction = fold - f
+        # don't stop if we increased cost
+        reduction_ratio = jnp.sign(actual_reduction)
+        # reuse the previous trust radius on the next pass
+        options["initial_trust_radius"] = float(result["alltr"][-1])
+
+        if verbose > 1:
+            print_iteration_nonlinear(
+                iteration,
+                nfev,
+                f,
+                actual_reduction,
+                step_norm,
+                g_norm,
+                constr_violation,
+                mu,
+                jnp.max(jnp.abs(y)),
+            )
+
+        success, message = check_termination(
+            actual_reduction,
+            f,
+            step_norm,
+            z_norm,
+            g_norm,
+            reduction_ratio,
+            ftol,
+            xtol,
+            gtol,
+            iteration,
+            maxiter,
+            nfev,
+            max_nfev,
+            ngev,
+            max_ngev,
+            nhev,
+            max_nhev,
+            constr_violation=constr_violation,
+            ctol=ctol,
+        )
+        if success is not None:
+            break
+
+        if not result["success"]:  # did the subproblem actually finish, or maxiter?
+            continue
+        elif constr_violation < ctolk:
+            y = y - mu * c
+            ctolk = ctolk / (mu**beta_eta)
+            gtolk = gtolk / (mu**beta_omega)
+        else:
+            mu = tau * mu
+            ctolk = eta / (mu**alpha_eta)
+            gtolk = omega / (mu**alpha_omega)
+
+    x, s = z2xs(z)
+    active_mask = find_active_constraints(z, zbounds[0], zbounds[1], rtol=xtol)
+    result = OptimizeResult(
+        x=x,
+        s=s,
+        y=y,
+        success=success,
+        fun=f,
+        grad=result["grad"],
+        v=result["v"],
+        optimality=g_norm,
+        nfev=nfev,
+        ngev=ngev,
+        nhev=nhev,
+        nit=iteration,
+        message=message,
+        active_mask=active_mask,
+        constr_violation=constr_violation,
+    )
+    if verbose > 0:
+        if result["success"]:
+            print(result["message"])
+        else:
+            print("Warning: " + result["message"])
+        print(f"""         Current function value: {result["fun"]:.3e}""")
+        print(f"""         Constraint violation: {result['constr_violation']:.3e}""")
+        print(f"""         Total delta_x: {jnp.linalg.norm(x0 - result["x"]):.3e}""")
+        print(f"""         Iterations: {result["nit"]:d}""")
+        print(f"""         Function evaluations: {result["nfev"]:d}""")
+        print(f"""         Gradient evaluations: {result["ngev"]:d}""")
+        print(f"""         Hessian evaluations: {result["nhev"]:d}""")
+
+    result["allx"] = [z2xs(x)[0] for x in allx]
+
+    return result
diff --git a/desc/optimize/aug_lagrangian_ls.py b/desc/optimize/aug_lagrangian_ls.py
new file mode 100644
index 0000000000..20473be20c
--- /dev/null
+++ b/desc/optimize/aug_lagrangian_ls.py
@@ -0,0 +1,330 @@
+"""Augmented Langrangian for vector valued objectives."""
+
+from scipy.optimize import NonlinearConstraint, OptimizeResult
+
+from desc.backend import jnp
+from desc.optimize.least_squares import lsqtr
+
+from .bound_utils import find_active_constraints
+from .utils import (
+    check_termination,
+    inequality_to_bounds,
+    print_header_nonlinear,
+    print_iteration_nonlinear,
+)
+
+
+def lsq_auglag(  # noqa: C901 - FIXME: simplify this
+    fun,
+    x0,
+    jac,
+    bounds=(-jnp.inf, jnp.inf),
+    constraint=None,
+    args=(),
+    x_scale=1,
+    ftol=1e-6,
+    xtol=1e-6,
+    gtol=1e-6,
+    ctol=1e-6,
+    verbose=1,
+    maxiter=None,
+    options={},
+):
+    """Minimize a function with constraints using an augmented Langrangian method.
+
+    The objective function is assumed to be vector valued, and is minimized in the least
+    squares sense.
+
+    Parameters
+    ----------
+    fun : callable
+        objective to be minimized. Should have a signature like fun(x,*args)-> 1d array
+    x0 : array-like
+        initial guess
+    jac : callable:
+        function to compute Jacobian matrix of fun
+    bounds : tuple of array-like
+        Lower and upper bounds on independent variables. Defaults to no bounds.
+        Each array must match the size of x0 or be a scalar, in the latter case a
+        bound will be the same for all variables. Use np.inf with an appropriate sign
+        to disable bounds on all or some variables.
+    constraint : scipy.optimize.NonlinearConstraint
+        constraint to be satisfied
+    args : tuple
+        additional arguments passed to fun, grad, and hess
+    x_scale : array_like or ``'hess'``, optional
+        Characteristic scale of each variable. Setting ``x_scale`` is equivalent
+        to reformulating the problem in scaled variables ``xs = x / x_scale``.
+        An alternative view is that the size of a trust region along jth
+        dimension is proportional to ``x_scale[j]``. Improved convergence may
+        be achieved by setting ``x_scale`` such that a step of a given size
+        along any of the scaled variables has a similar effect on the cost
+        function. If set to ``'hess'``, the scale is iteratively updated using the
+        inverse norms of the columns of the Hessian matrix.
+    ftol : float or None, optional
+        Tolerance for termination by the change of the cost function.
+        The optimization process is stopped when ``dF < ftol * F``,
+        and there was an adequate agreement between a local quadratic model and
+        the true model in the last step. If None, the termination by this
+        condition is disabled.
+    xtol : float or None, optional
+        Tolerance for termination by the change of the independent variables.
+        Optimization is stopped when ``norm(dx) < xtol * (xtol + norm(x))``.
+        If None, the termination by this condition is disabled.
+    gtol : float or None, optional
+        Absolute tolerance for termination by the norm of the gradient.
+        Optimizer teriminates when ``norm(g) < gtol``, where
+        If None, the termination by this condition is disabled.
+    ctol : float, optional
+        Tolerance for stopping based on infinity norm of the constraint violation.
+        Optimizer terminates when ``max(abs(constr_violation)) < ctol`` AND one or more
+        of the other tolerances are met (``ftol``, ``xtol``, ``gtol``)
+    verbose : {0, 1, 2}, optional
+        * 0 (default) : work silently.
+        * 1 : display a termination report.
+        * 2 : display progress during iterations
+    maxiter : int, optional
+        maximum number of iterations. Defaults to size(x)*100
+    options : dict, optional
+        dictionary of optional keyword arguments to override default solver settings.
+        See the code for more details.
+
+    Returns
+    -------
+    res : OptimizeResult
+        The optimization result represented as a ``OptimizeResult`` object.
+        Important attributes are: ``x`` the solution array, ``success`` a
+        Boolean flag indicating if the optimizer exited successfully.
+
+    """
+    if constraint is None:
+        # create a dummy constraint
+        constraint = NonlinearConstraint(
+            fun=lambda x, *args: jnp.array([0.0]),
+            lb=0.0,
+            ub=0.0,
+            jac=lambda x, *args: jnp.zeros((1, x.size)),
+        )
+    (
+        z0,
+        fun_wrapped,
+        jac_wrapped,
+        _,
+        constraint_wrapped,
+        zbounds,
+        z2xs,
+    ) = inequality_to_bounds(
+        x0,
+        fun,
+        jac,
+        None,
+        constraint,
+        bounds,
+    )
+
+    # L(x,y,mu) = 1/2 f(x)^2 - y*c(x) + mu/2 c(x)^2 + y^2/(2*mu)
+    # = 1/2 f(x)^2 + 1/2 [-y/sqrt(mu) + sqrt(mu) c(x)]^2
+
+    def lagfun(z, y, mu, *args):
+        f = fun_wrapped(z, *args)
+        c = constraint_wrapped.fun(z, *args)
+        c = -y / jnp.sqrt(mu) + jnp.sqrt(mu) * c
+        return jnp.concatenate((f, c))
+
+    def lagjac(z, y, mu, *args):
+        Jf = jac_wrapped(z, *args)
+        Jc = constraint_wrapped.jac(z, *args)
+        Jc = jnp.sqrt(mu) * Jc
+        return jnp.vstack((Jf, Jc))
+
+    def laggrad(z, y, mu, *args):
+        f = lagfun(z, y, mu, *args)
+        J = lagjac(z, y, mu, *args)
+        return f @ J
+
+    nfev = 0
+    njev = 0
+    iteration = 0
+
+    z = z0.copy()
+    f = fun_wrapped(z, *args)
+    cost = 1 / 2 * jnp.dot(f, f)
+    c = constraint_wrapped.fun(z)
+    nfev += 1
+
+    mu = options.pop("initial_penalty_parameter", 10)
+    y = options.pop("initial_multipliers", None)
+    if y is None:  # use least squares multiplier estimates
+        _J = constraint_wrapped.jac(z, *args)
+        _g = f @ jac_wrapped(z, *args)
+        y = jnp.linalg.lstsq(_J.T, _g)[0]
+        y = jnp.nan_to_num(y, nan=0.0, posinf=0.0, neginf=0.0)
+
+    if maxiter is None:
+        maxiter = z.size * 100
+    maxiter_inner = options.pop("maxiter_inner", 20)
+    max_nfev = options.pop("max_nfev", 5 * maxiter + 1)
+    max_njev = options.pop("max_njev", maxiter + 1)
+
+    # notation following Conn & Gould, algorithm 14.4.2, but with our mu = their mu^-1
+    omega = options.pop("omega", 1.0)
+    eta = options.pop("eta", 1.0)
+    alpha_omega = options.pop("alpha_omega", 1.0)
+    beta_omega = options.pop("beta_omega", 1.0)
+    alpha_eta = options.pop("alpha_eta", 0.1)
+    beta_eta = options.pop("beta_eta", 0.9)
+    tau = options.pop("tau", 10)
+
+    gtolk = omega / mu**alpha_omega
+    ctolk = eta / mu**alpha_eta
+    zold = z
+    cost_old = cost
+    allx = []
+
+    success = None
+    message = None
+    step_norm = jnp.inf
+    actual_reduction = jnp.inf
+
+    g_norm = jnp.linalg.norm(laggrad(z, y, mu, *args), ord=jnp.inf)
+    constr_violation = jnp.linalg.norm(c, ord=jnp.inf)
+
+    options.setdefault("initial_trust_radius", "scipy")
+    options["return_tr"] = True
+
+    if verbose > 1:
+        print_header_nonlinear(True, "Penalty param", "max(|mltplr|)")
+        print_iteration_nonlinear(
+            iteration,
+            nfev,
+            cost,
+            actual_reduction,
+            step_norm,
+            g_norm,
+            constr_violation,
+            mu,
+            jnp.max(jnp.abs(y)),
+        )
+
+    while iteration < maxiter:
+        result = lsqtr(
+            lagfun,
+            z,
+            lagjac,
+            bounds=zbounds,
+            args=(y, mu) + args,
+            x_scale=x_scale,
+            ftol=0,
+            xtol=0,
+            gtol=gtolk,
+            maxiter=maxiter_inner,
+            verbose=0,
+            options=options.copy(),
+        )
+        # update outer counters
+        allx += result["allx"]
+        nfev += result["nfev"]
+        njev += result["njev"]
+        iteration += result["nit"]
+        z = result["x"]
+        f = fun_wrapped(z, *args)
+        cost = 1 / 2 * jnp.dot(f, f)
+        c = constraint_wrapped.fun(z)
+        nfev += 1
+        constr_violation = jnp.linalg.norm(c, ord=jnp.inf)
+        step_norm = jnp.linalg.norm(zold - z)
+        z_norm = jnp.linalg.norm(z)
+        g_norm = result["optimality"]
+        actual_reduction = cost_old - cost
+        # don't stop if we increased cost
+        reduction_ratio = jnp.sign(actual_reduction)
+        # reuse the previous trust radius on the next pass
+        options["initial_trust_radius"] = float(result["alltr"][-1])
+
+        if verbose > 1:
+            print_iteration_nonlinear(
+                iteration,
+                nfev,
+                cost,
+                actual_reduction,
+                step_norm,
+                g_norm,
+                constr_violation,
+                mu,
+                jnp.max(jnp.abs(y)),
+            )
+
+        # check if we can stop the outer loop
+        success, message = check_termination(
+            actual_reduction,
+            cost,
+            step_norm,
+            z_norm,
+            g_norm,
+            reduction_ratio,
+            ftol,
+            xtol,
+            gtol,
+            iteration,
+            maxiter,
+            nfev,
+            max_nfev,
+            njev,
+            max_njev,
+            0,
+            jnp.inf,
+            constr_violation=constr_violation,
+            ctol=ctol,
+        )
+        if success is not None:
+            break
+
+        if not result["success"]:  # did the subproblem actually finish, or maxiter?
+            continue
+        elif constr_violation < ctolk:
+            y = y - mu * c
+            ctolk = ctolk / (mu**beta_eta)
+            gtolk = gtolk / (mu**beta_omega)
+        else:
+            mu = tau * mu
+            ctolk = eta / (mu**alpha_eta)
+            gtolk = omega / (mu**alpha_omega)
+
+        zold = z
+        cost_old = cost
+
+    x, s = z2xs(z)
+    active_mask = find_active_constraints(z, zbounds[0], zbounds[1], rtol=xtol)
+    result = OptimizeResult(
+        x=x,
+        s=s,
+        y=y,
+        success=success,
+        cost=cost,
+        fun=f,
+        grad=result["grad"],
+        v=result["v"],
+        jac=result["jac"],
+        optimality=g_norm,
+        nfev=nfev,
+        njev=njev,
+        nit=iteration,
+        message=message,
+        active_mask=active_mask,
+        constr_violation=constr_violation,
+    )
+    if verbose > 0:
+        if result["success"]:
+            print(result["message"])
+        else:
+            print("Warning: " + result["message"])
+        print(f"""         Current function value: {result["cost"]:.3e}""")
+        print(f"""         Constraint violation: {result['constr_violation']:.3e}""")
+        print(f"""         Total delta_x: {jnp.linalg.norm(x0 - result["x"]):.3e}""")
+        print(f"""         Iterations: {result["nit"]:d}""")
+        print(f"""         Function evaluations: {result["nfev"]:d}""")
+        print(f"""         Jacobian evaluations: {result["njev"]:d}""")
+
+    result["allx"] = [z2xs(x)[0] for x in allx]
+
+    return result
diff --git a/desc/optimize/fmin_scalar.py b/desc/optimize/fmin_scalar.py
index a43f5ee338..150b95fc1e 100644
--- a/desc/optimize/fmin_scalar.py
+++ b/desc/optimize/fmin_scalar.py
@@ -79,19 +79,18 @@ def fmintr(  # noqa: C901 - FIXME: simplify this
         function. If set to ``'hess'``, the scale is iteratively updated using the
         inverse norms of the columns of the Hessian matrix.
     ftol : float or None, optional
-        Tolerance for termination by the change of the cost function. Default
-        is 1e-8. The optimization process is stopped when ``dF < ftol * F``,
+        Tolerance for termination by the change of the cost function.
+        The optimization process is stopped when ``dF < ftol * F``,
         and there was an adequate agreement between a local quadratic model and
         the true model in the last step. If None, the termination by this
         condition is disabled.
     xtol : float or None, optional
         Tolerance for termination by the change of the independent variables.
-        Default is 1e-8. Optimization is stopped when
-        ``norm(dx) < xtol * (xtol + norm(x))``. If None, the termination by
-        this condition is disabled.
+        Optimization is stopped when ``norm(dx) < xtol * (xtol + norm(x))``.
+        If None, the termination by this condition is disabled.
     gtol : float or None, optional
-        Absolute tolerance for termination by the norm of the gradient. Default is 1e-8.
-        Optimizer teriminates when ``norm(g) < gtol``, where
+        Absolute tolerance for termination by the norm of the gradient.
+        Optimizer teriminates when ``max(abs(g)) < gtol``.
         If None, the termination by this condition is disabled.
     verbose : {0, 1, 2}, optional
         * 0 (default) : work silently.
@@ -168,7 +167,7 @@ def fmintr(  # noqa: C901 - FIXME: simplify this
         subproblem = trust_region_step_exact_cho
     else:
         raise ValueError(
-            colored("method should be one of 'dogleg' or 'subspace'", "red")
+            colored("method should be one of 'exact', 'dogleg' or 'subspace'", "red")
         )
 
     if maxiter is None:
@@ -405,6 +404,7 @@ def fmintr(  # noqa: C901 - FIXME: simplify this
         success=success,
         fun=f,
         grad=g,
+        v=v,
         hess=H,
         optimality=g_norm,
         nfev=nfev,
diff --git a/desc/optimize/least_squares.py b/desc/optimize/least_squares.py
index 2b9b6754d6..2ac5b15856 100644
--- a/desc/optimize/least_squares.py
+++ b/desc/optimize/least_squares.py
@@ -69,19 +69,18 @@ def lsqtr(  # noqa: C901 - FIXME: simplify this
         function. If set to ``'jac'``, the scale is iteratively updated using the
         inverse norms of the columns of the Jacobian matrix.
     ftol : float or None, optional
-        Tolerance for termination by the change of the cost function. Default
-        is 1e-8. The optimization process is stopped when ``dF < ftol * F``,
+        Tolerance for termination by the change of the cost function.
+        The optimization process is stopped when ``dF < ftol * F``,
         and there was an adequate agreement between a local quadratic model and
         the true model in the last step. If None, the termination by this
         condition is disabled.
     xtol : float or None, optional
         Tolerance for termination by the change of the independent variables.
-        Default is 1e-8. Optimization is stopped when
-        ``norm(dx) < xtol * (xtol + norm(x))``. If None, the termination by
-        this condition is disabled.
+        Optimization is stopped when ``norm(dx) < xtol * (xtol + norm(x))``.
+        If None, the termination by this condition is disabled.
     gtol : float or None, optional
-        Absolute tolerance for termination by the norm of the gradient. Default is 1e-8.
-        Optimizer teriminates when ``norm(g) < gtol``, where
+        Absolute tolerance for termination by the norm of the gradient.
+        Optimizer teriminates when ``max(abs(g)) < gtol``.
         If None, the termination by this condition is disabled.
     verbose : {0, 1, 2}, optional
         * 0 (default) : work silently.
@@ -389,6 +388,7 @@ def lsqtr(  # noqa: C901 - FIXME: simplify this
         cost=cost,
         fun=f,
         grad=g,
+        v=v,
         jac=J,
         optimality=g_norm,
         nfev=nfev,
diff --git a/desc/optimize/optimizer.py b/desc/optimize/optimizer.py
index 328ed9db68..cdfdf681f8 100644
--- a/desc/optimize/optimizer.py
+++ b/desc/optimize/optimizer.py
@@ -73,6 +73,7 @@ def optimize(  # noqa: C901 - FIXME: simplify this
         ftol=None,
         xtol=None,
         gtol=None,
+        ctol=None,
         x_scale="auto",
         verbose=1,
         maxiter=None,
@@ -102,6 +103,10 @@ def optimize(  # noqa: C901 - FIXME: simplify this
             Absolute tolerance for termination by the norm of the gradient.
             Optimizer terminates when ``norm(g) < gtol``, where
             If None, defaults to 1e-8.
+        ctol : float or None, optional
+            Stopping tolerance on infinity norm of the constraint violation.
+            Optimization will stop when ctol and one of the other tolerances
+            are satisfied. If None, defaults to 1e-4.
         x_scale : array_like or ``'auto'``, optional
             Characteristic scale of each variable. Setting ``x_scale`` is equivalent
             to reformulating the problem in scaled variables ``xs = x / x_scale``.
@@ -204,7 +209,7 @@ def optimize(  # noqa: C901 - FIXME: simplify this
             ftol,
             xtol,
             gtol,
-            None,
+            ctol,
             maxiter,
             options,
         )
@@ -226,6 +231,8 @@ def optimize(  # noqa: C901 - FIXME: simplify this
 
         if verbose > 0:
             print("Starting optimization")
+            print("Using method: " + str(self.method))
+
         timer.start("Solution time")
 
         result = optimizers[method]["fun"](
diff --git a/desc/optimize/stochastic.py b/desc/optimize/stochastic.py
index bd1a5c5e9d..ac918920f0 100644
--- a/desc/optimize/stochastic.py
+++ b/desc/optimize/stochastic.py
@@ -47,19 +47,15 @@ def sgd(
         Step size update rule. Currently only the default "sgd" is available. Future
         updates may include RMSProp, Adam, etc.
     ftol : float or None, optional
-        Tolerance for termination by the change of the cost function. Default
-        is 1e-8. The optimization process is stopped when ``dF < ftol * F``,
-        and there was an adequate agreement between a local quadratic model and
-        the true model in the last step. If None, the termination by this
-        condition is disabled.
+        Tolerance for termination by the change of the cost function.
+        The optimization process is stopped when ``dF < ftol * F``.
     xtol : float or None, optional
         Tolerance for termination by the change of the independent variables.
-        Default is 1e-8. Optimization is stopped when
-        ``norm(dx) < xtol * (xtol + norm(x))``. If None, the termination by
-        this condition is disabled.
+        Optimization is stopped when ``norm(dx) < xtol * (xtol + norm(x))``.
+        If None, the termination by this condition is disabled.
     gtol : float or None, optional
-        Absolute tolerance for termination by the norm of the gradient. Default is 1e-6.
-        Optimizer teriminates when ``norm(g) < gtol``, where
+        Absolute tolerance for termination by the norm of the gradient.
+        Optimizer teriminates when ``max(abs(g)) < gtol``.
         If None, the termination by this condition is disabled.
     verbose : {0, 1, 2}, optional
         * 0 (default) : work silently.
diff --git a/desc/optimize/utils.py b/desc/optimize/utils.py
index af7c249431..c48cd3ef89 100644
--- a/desc/optimize/utils.py
+++ b/desc/optimize/utils.py
@@ -1,8 +1,136 @@
 """Utility functions used in optimization problems."""
 
+import copy
+
 import numpy as np
 
-from desc.backend import cond, jit, jnp
+from desc.backend import cond, jit, jnp, put
+from desc.utils import Index
+
+
+def inequality_to_bounds(x0, fun, grad, hess, constraint, bounds):
+    """Convert inequality constraints to bounds using slack variables.
+
+    We do this by introducing slack variables s
+
+    ie, lb < con(x) < ub --> con(x) - s == 0, lb < s < ub
+
+    A new state vector z is defined as [x, s] and the problem
+    is transformed into one that has only equality constraints
+    and simple bounds on the variables z
+
+    Parameters
+    ----------
+    x0 : ndarray
+        Starting point for primal variables
+    fun, grad, hess : callable
+        Functions for computing the objective and derivatives
+    constraint : scipy.optimize.NonlinearConstraint
+        constraint object of both equality and inequality constraints
+    bounds : tuple
+        lower and upper bounds for primal variables x
+
+    Returns
+    -------
+    z0 : ndarray
+        Starting point for primal + slack variables
+    fun, grad, hess : callable
+        functions for computing objective and derivatives wrt z
+    constraint : scipy.optimize.NonlinearConstraint
+        constraint containing just equality constraints
+    bounds : tuple
+        lower and upper bounds on combined variable z
+    z2xs : callable
+        function for splitting combined variable z into primal
+        and slack variables x and s
+
+    """
+    c0 = constraint.fun(x0)
+    ncon = c0.size
+    bounds = tuple(jnp.broadcast_to(bi, x0.shape) for bi in bounds)
+    cbounds = (constraint.lb, constraint.ub)
+    cbounds = tuple(jnp.broadcast_to(bi, c0.shape) for bi in cbounds)
+    lbs, ubs = cbounds
+    lbx, ubx = bounds
+
+    ineq_mask = lbs != ubs
+    eq_mask = lbs == ubs
+    eq_target = lbs[~ineq_mask]
+    nslack = jnp.sum(ineq_mask)
+    zbounds = (
+        jnp.concatenate([lbx, lbs[ineq_mask]]),
+        jnp.concatenate([ubx, ubs[ineq_mask]]),
+    )
+    s0 = c0[ineq_mask]
+    s0 = jnp.clip(s0, lbs[ineq_mask], ubs[ineq_mask])
+    z0 = jnp.concatenate([x0, s0])
+    target = jnp.zeros(c0.size)
+    target = put(target, eq_mask, eq_target)
+
+    def z2xs(z):
+        return z[: len(z) - nslack], z[len(z) - nslack :]
+
+    def fun_wrapped(z, *args):
+        x, s = z2xs(z)
+        return fun(x, *args)
+
+    if hess is None:
+        # assume grad is really jac of least squares
+        def grad_wrapped(z, *args):
+            x, s = z2xs(z)
+            g = grad(x, *args)
+            return jnp.hstack([g, jnp.zeros((g.shape[0], nslack))])
+
+    else:
+
+        def grad_wrapped(z, *args):
+            x, s = z2xs(z)
+            g = grad(x, *args)
+            return jnp.concatenate([g, jnp.zeros(nslack)])
+
+    if callable(hess):
+
+        def hess_wrapped(z, *args):
+            x, s = z2xs(z)
+            H = hess(x, *args)
+            return jnp.pad(H, (0, nslack))
+
+    else:  # using BFGS
+        hess_wrapped = hess
+
+    def confun_wrapped(z, *args):
+        x, s = z2xs(z)
+        c = constraint.fun(x, *args)
+        sbig = jnp.zeros(ncon)
+        sbig = put(sbig, ineq_mask, s)
+        return c - sbig - target
+
+    def conjac_wrapped(z, *args):
+        x, s = z2xs(z)
+        J = constraint.jac(x, *args)
+        I = jnp.eye(nslack)
+        Js = jnp.zeros((ncon, nslack))
+        Js = put(Js, Index[ineq_mask, :], -I)
+        return jnp.hstack([J, Js])
+
+    if callable(constraint.hess):
+
+        def conhess_wrapped(z, y, *args):
+            x, s = z2xs(z)
+            H = constraint.hess(x, y, *args)
+            return jnp.pad(H, (0, nslack))
+
+    else:  # using BFGS
+        conhess_wrapped = constraint.hess
+
+    newcon = copy.copy(constraint)
+    newcon.fun = confun_wrapped
+    newcon.jac = conjac_wrapped
+    newcon.hess = conhess_wrapped
+    newcon.lb = target
+    newcon.ub = target
+
+    return z0, fun_wrapped, grad_wrapped, hess_wrapped, newcon, zbounds, z2xs
 
 
 @jit
@@ -188,7 +316,7 @@ def evaluate_quadratic_form_jac(J, g, s, diag=None):
     return 0.5 * q + l
 
 
-def print_header_nonlinear(constrained=False):
+def print_header_nonlinear(constrained=False, *args):
     """Print a pretty header."""
     s = "{:^15}{:^15}{:^15}{:^15}{:^15}{:^15}".format(
         "Iteration",
@@ -199,12 +327,21 @@ def print_header_nonlinear(constrained=False):
         "Optimality",
     )
     if constrained:
-        s += "{:^24}".format("Constraint violation")
+        s += "{:^15}".format("Constr viol.")
+    for arg in args:
+        s += "{:^15}".format(arg)
     print(s)
 
 
 def print_iteration_nonlinear(
-    iteration, nfev, cost, cost_reduction, step_norm, optimality, constr_violation=None
+    iteration,
+    nfev,
+    cost,
+    cost_reduction,
+    step_norm,
+    optimality,
+    constr_violation=None,
+    *args,
 ):
     """Print a line of optimizer output."""
     if iteration is None or abs(iteration) == np.inf:
@@ -220,27 +357,29 @@ def print_iteration_nonlinear(
     if cost is None or abs(cost) == np.inf:
         cost = " " * 15
     else:
-        cost = "{:^15.4e}".format(cost)
+        cost = "{: ^15.3e}".format(cost)
 
     if cost_reduction is None or abs(cost_reduction) == np.inf:
         cost_reduction = " " * 15
     else:
-        cost_reduction = "{:^15.2e}".format(cost_reduction)
+        cost_reduction = "{: ^15.3e}".format(cost_reduction)
 
     if step_norm is None or abs(step_norm) == np.inf:
         step_norm = " " * 15
     else:
-        step_norm = "{:^15.2e}".format(step_norm)
+        step_norm = "{:^15.3e}".format(step_norm)
 
     if optimality is None or abs(optimality) == np.inf:
         optimality = " " * 15
     else:
-        optimality = "{:^15.2e}".format(optimality)
+        optimality = "{:^15.3e}".format(optimality)
     s = "{}{}{}{}{}{}".format(
         iteration, nfev, cost, cost_reduction, step_norm, optimality
     )
     if constr_violation is not None:
-        s += "{:^24.2e}".format(constr_violation)
+        s += "{:^15.3e}".format(constr_violation)
+    for arg in args:
+        s += "{:^15.3e}".format(arg)
     print(s)
 
 
diff --git a/docs/optimizers.csv b/docs/optimizers.csv
index baac84c289..d730294658 100644
--- a/docs/optimizers.csv
+++ b/docs/optimizers.csv
@@ -1,9 +1,13 @@
 Name,Scalar,Equality Constraints,Inequality Constraints,Stochastic,Hessian,GPU,Description
+``fmin-auglag``,True,True,True,False,True,True,Augmented Lagrangian method with trust region subproblem.
+``lsq-auglag``,False,True,True,False,False,True,Least Squares Augmented Lagrangian approach to constrained optimization
 ``lsq-exact``,False,False,False,False,False,True,"Trust region least squares method, similar to the `trf` method in scipy"
-``dogleg``,True,False,False,False,True,True,Trust region method using Powell's dogleg method to approximately solve the trust region subproblem.
-``subspace``,True,False,False,False,True,True,Trust region method solving the subproblem over the 2d subspace spanned by the gradient and newton direction.
-``dogleg-bfgs``,True,False,False,False,False,True,Trust region method using Powell's dogleg method to approximately solve the trust region subproblem. Uses BFGS to approximate hessian
-``subspace-bfgs``,True,False,False,False,False,True,Trust region method solving the subproblem over the 2d subspace spanned by the gradient and newton direction. Uses BFGS to approximate hessian
+``fmin-exact``,True,False,False,False,True,True,Trust region method using iterative cholesky method to exactly solve the trust region subproblem.
+``fmin-dogleg``,True,False,False,False,True,True,Trust region method using Powell's dogleg method to approximately solve the trust region subproblem.
+``fmin-subspace``,True,False,False,False,True,True,Trust region method solving the subproblem over the 2d subspace spanned by the gradient and newton direction.
+``fmin-exact-bfgs``,True,False,False,False,False,True,Trust region method using iterative cholesky method to exactly solve the trust region subproblem. Uses BFGS to approximate hessian
+``fmin-dogleg-bfgs``,True,False,False,False,False,True,Trust region method using Powell's dogleg method to approximately solve the trust region subproblem. Uses BFGS to approximate hessian
+``fmin-subspace-bfgs``,True,False,False,False,False,True,Trust region method solving the subproblem over the 2d subspace spanned by the gradient and newton direction. Uses BFGS to approximate hessian
 ``sgd``,True,False,False,True,False,True,Stochastic gradient descent with Nesterov momentum
 ``scipy-bfgs``,True,False,False,False,False,False,BFGS quasi-newton method with line search. See https://docs.scipy.org/doc/scipy/reference/optimize.minimize-bfgs.html
 ``scipy-CG``,True,False,False,False,False,False,Nonlinear conjugate gradient method. See https://docs.scipy.org/doc/scipy/reference/optimize.minimize-cg.html
diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py
index 722e961c34..5050090abe 100644
--- a/tests/test_optimizer.py
+++ b/tests/test_optimizer.py
@@ -3,7 +3,14 @@
 import numpy as np
 import pytest
 from numpy.random import default_rng
-from scipy.optimize import BFGS, least_squares, rosen, rosen_der
+from scipy.optimize import (
+    BFGS,
+    NonlinearConstraint,
+    least_squares,
+    minimize,
+    rosen,
+    rosen_der,
+)
 
 import desc.examples
 from desc.backend import jit, jnp
@@ -20,6 +27,8 @@
     FixPressure,
     FixPsi,
     ForceBalance,
+    GenericObjective,
+    MagneticWell,
     MeanCurvature,
     ObjectiveFunction,
     Volume,
@@ -29,7 +38,9 @@
     LinearConstraintProjection,
     Optimizer,
     ProximalProjection,
+    fmin_auglag,
     fmintr,
+    lsq_auglag,
     lsqtr,
     optimizers,
     sgd,
@@ -412,12 +423,12 @@ def test_maxiter_1_and_0_solve():
     objectives = ForceBalance()
     obj = ObjectiveFunction(objectives)
     eq = desc.examples.get("SOLOVEV")
-    for opt in ["lsq-exact", "dogleg-bfgs"]:
+    for opt in ["lsq-exact", "fmin-dogleg-bfgs"]:
         eq, result = eq.solve(
             maxiter=1, constraints=constraints, objective=obj, optimizer=opt, verbose=3
         )
         assert result["nit"] == 1
-    for opt in ["lsq-exact", "dogleg-bfgs"]:
+    for opt in ["lsq-exact", "fmin-dogleg-bfgs"]:
         eq, result = eq.solve(
             maxiter=0, constraints=constraints, objective=obj, optimizer=opt, verbose=3
         )
@@ -725,3 +736,185 @@ def hess(x):
 
     np.testing.assert_allclose(out1["x"], out3["x"], rtol=1e-06, atol=1e-06)
     np.testing.assert_allclose(out2["x"], out3["x"], rtol=1e-06, atol=1e-06)
+
+
+@pytest.mark.unit
+def test_auglag():
+    """Test that our augmented lagrangian works as well as scipy for convex problems."""
+    rng = default_rng(12)
+
+    n = 15  # number of variables
+    m = 10  # number of constraints
+    p = 7  # number of primals
+    Qs = []
+    gs = []
+    for i in range(m + p):
+        A = 0.5 - rng.random((n, n))
+        Qs.append(A.T @ A)
+        gs.append(0.5 - rng.random(n))
+
+    @jit
+    def vecfun(x):
+        y0 = x @ Qs[0] + gs[0]
+        y1 = x @ Qs[1] + gs[1]
+        y = jnp.concatenate([y0, y1**2])
+        return y
+
+    @jit
+    def fun(x):
+        y = vecfun(x)
+        return 1 / 2 * jnp.dot(y, y)
+
+    grad = jit(Derivative(fun, mode="grad"))
+    hess = jit(Derivative(fun, mode="hess"))
+    jac = jit(Derivative(vecfun, mode="fwd"))
+
+    @jit
+    def con(x):
+        cs = []
+        for i in range(m):
+            cs.append(x @ Qs[p + i] @ x + gs[p + i] @ x)
+        return jnp.array(cs)
+
+    conjac = jit(Derivative(con, mode="fwd"))
+    conhess = jit(Derivative(lambda x, v: v @ con(x), mode="hess"))
+
+    constraint = NonlinearConstraint(con, -np.inf, 0, conjac, conhess)
+    x0 = rng.random(n)
+
+    out1 = fmin_auglag(
+        fun,
+        x0,
+        grad,
+        hess=hess,
+        bounds=(-jnp.inf, jnp.inf),
+        constraint=constraint,
+        args=(),
+        x_scale="auto",
+        ftol=0,
+        xtol=1e-6,
+        gtol=1e-6,
+        ctol=1e-6,
+        verbose=3,
+        maxiter=None,
+        options={},
+    )
+    print(out1["active_mask"])
+    out2 = lsq_auglag(
+        vecfun,
+        x0,
+        jac,
+        bounds=(-jnp.inf, jnp.inf),
+        constraint=constraint,
+        args=(),
+        x_scale="auto",
+        ftol=0,
+        xtol=1e-6,
+        gtol=1e-6,
+        ctol=1e-6,
+        verbose=3,
+        maxiter=None,
+        options={},
+    )
+
+    out3 = minimize(
+        fun,
+        x0,
+        jac=grad,
+        hess=hess,
+        constraints=constraint,
+        method="trust-constr",
+        options={"verbose": 3, "maxiter": 1000},
+    )
+
+    np.testing.assert_allclose(out1["x"], out3["x"], rtol=1e-4, atol=1e-4)
+    np.testing.assert_allclose(out2["x"], out3["x"], rtol=1e-4, atol=1e-4)
+
+
+@pytest.mark.slow
+@pytest.mark.regression
+def test_constrained_AL_lsq():
+    """Tests that the least squares augmented Lagrangian optimizer does something."""
+    eq = desc.examples.get("SOLOVEV")
+
+    constraints = (
+        FixBoundaryR(modes=[0, 0, 0]),  # fix specified major axis position
+        FixBoundaryZ(),  # fix Z shape but not R
+        FixPressure(),  # fix pressure profile
+        FixIota(),  # fix rotational transform profile
+        FixPsi(),  # fix total toroidal magnetic flux
+    )
+    # some random constraints to keep the shape from getting wacky
+    V = eq.compute("V")["V"]
+    Vbounds = (0.95 * V, 1.05 * V)
+    AR = eq.compute("R0/a")["R0/a"]
+    ARbounds = (0.95 * AR, 1.05 * AR)
+    constraints += (
+        Volume(bounds=Vbounds),
+        AspectRatio(bounds=ARbounds),
+        MagneticWell(bounds=(0, jnp.inf)),
+        ForceBalance(bounds=(-1e-3, 1e-3), normalize_target=False),
+    )
+    H = eq.compute(
+        "curvature_H", grid=LinearGrid(M=eq.M_grid, N=eq.N_grid, NFP=eq.NFP)
+    )["curvature_H"]
+    obj = ObjectiveFunction(MeanCurvature(target=H))
+    eq2, result = eq.optimize(
+        objective=obj,
+        constraints=constraints,
+        optimizer="lsq-auglag",
+        maxiter=500,
+        verbose=3,
+        x_scale="auto",
+        copy=True,
+        options={},
+    )
+    V2 = eq2.compute("V")["V"]
+    AR2 = eq2.compute("R0/a")["R0/a"]
+    Dwell = constraints[-2].compute(*constraints[-2].xs(eq2))
+    assert ARbounds[0] < AR2 < ARbounds[1]
+    assert Vbounds[0] < V2 < Vbounds[1]
+    assert eq2.is_nested()
+    np.testing.assert_array_less(-Dwell, 0)
+
+
+@pytest.mark.slow
+@pytest.mark.regression
+def test_constrained_AL_scalar():
+    """Tests that the augmented Lagrangian constrained optimizer does something."""
+    eq = desc.examples.get("SOLOVEV")
+
+    constraints = (
+        FixBoundaryR(modes=[0, 0, 0]),  # fix specified major axis position
+        FixBoundaryZ(),  # fix Z shape but not R
+        FixPressure(),  # fix pressure profile
+        FixIota(),  # fix rotational transform profile
+        FixPsi(),  # fix total toroidal magnetic flux
+    )
+    V = eq.compute("V")["V"]
+    AR = eq.compute("R0/a")["R0/a"]
+    constraints += (
+        Volume(target=V),
+        AspectRatio(target=AR),
+        MagneticWell(bounds=(0, jnp.inf)),
+        ForceBalance(bounds=(-1e-3, 1e-3), normalize_target=False),
+    )
+    # Dummy objective to return 0, we just want a feasible solution.
+    obj = ObjectiveFunction(GenericObjective("0"))
+    eq2, result = eq.optimize(
+        objective=obj,
+        constraints=constraints,
+        optimizer="fmin-auglag",
+        maxiter=500,
+        verbose=3,
+        x_scale="auto",
+        copy=True,
+        options={},
+    )
+    V2 = eq2.compute("V")["V"]
+    AR2 = eq2.compute("R0/a")["R0/a"]
+    Dwell = constraints[-2].compute(*constraints[-2].xs(eq2))
+    np.testing.assert_allclose(AR, AR2)
+    np.testing.assert_allclose(V, V2)
+    assert eq2.is_nested()
+    np.testing.assert_array_less(-Dwell, 0)