diff --git a/desc/equilibrium/equilibrium.py b/desc/equilibrium/equilibrium.py index ae2314aed8..4413cb6d00 100644 --- a/desc/equilibrium/equilibrium.py +++ b/desc/equilibrium/equilibrium.py @@ -548,6 +548,7 @@ def optimize( ftol=None, xtol=None, gtol=None, + ctol=None, maxiter=None, x_scale="auto", options=None, @@ -564,7 +565,7 @@ def optimize( Objective function to satisfy. Default = fixed-boundary force balance. optimizer : str or Optimizer (optional) optimizer to use - ftol, xtol, gtol : float + ftol, xtol, gtol, ctol : float stopping tolerances. `None` will use defaults for given optimizer. maxiter : int Maximum number of solver steps. @@ -618,6 +619,7 @@ def optimize( ftol=ftol, xtol=xtol, gtol=gtol, + ctol=ctol, x_scale=x_scale, verbose=verbose, maxiter=maxiter, diff --git a/desc/optimize/__init__.py b/desc/optimize/__init__.py index 7a5531662f..7093f03c4a 100644 --- a/desc/optimize/__init__.py +++ b/desc/optimize/__init__.py @@ -2,6 +2,8 @@ from . import _desc_wrappers, _scipy_wrappers from ._constraint_wrappers import LinearConstraintProjection, ProximalProjection +from .aug_lagrangian import fmin_auglag +from .aug_lagrangian_ls import lsq_auglag from .fmin_scalar import fmintr from .least_squares import lsqtr from .optimizer import Optimizer, optimizers, register_optimizer diff --git a/desc/optimize/_desc_wrappers.py b/desc/optimize/_desc_wrappers.py index 292f5399c3..35e7d63b1e 100644 --- a/desc/optimize/_desc_wrappers.py +++ b/desc/optimize/_desc_wrappers.py @@ -1,11 +1,200 @@ +from scipy.optimize import NonlinearConstraint + from desc.backend import jnp +from .aug_lagrangian import fmin_auglag +from .aug_lagrangian_ls import lsq_auglag from .fmin_scalar import fmintr from .least_squares import lsqtr from .optimizer import register_optimizer from .stochastic import sgd +@register_optimizer( + name="fmin-auglag", + description="Augmented Lagrangian method with trust region subproblem.", + scalar=True, + equality_constraints=True, + inequality_constraints=True, + stochastic=False, + hessian=True, + GPU=True, +) +def _optimize_desc_aug_lagrangian( + objective, constraint, x0, method, x_scale, verbose, stoptol, options=None +): + """Wrapper for desc.optimize.fmin_lag_ls_stel. + + Parameters + ---------- + objective : ObjectiveFunction + Function to minimize. + constraint : ObjectiveFunction + Constraint to satisfy + x0 : ndarray + Starting point. + method : str + Name of the method to use. Any of the options in scipy.optimize.least_squares. + x_scale : array_like or ‘jac’, optional + Characteristic scale of each variable. Setting x_scale is equivalent to + reformulating the problem in scaled variables xs = x / x_scale. An alternative + view is that the size of a trust region along jth dimension is proportional to + x_scale[j]. Improved convergence may be achieved by setting x_scale such that + a step of a given size along any of the scaled variables has a similar effect + on the cost function. If set to ‘jac’, the scale is iteratively updated using + the inverse norms of the columns of the Jacobian matrix. + verbose : int + * 0 : work silently. + * 1 : display a termination report. + * 2 : display progress during iterations + stoptol : dict + Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol", "ctol", + "maxiter", "max_nfev", "max_njev", "max_ngev", "max_nhev"} + options : dict, optional + Dictionary of optional keyword arguments to override default solver + settings. See the code for more details. + + Returns + ------- + res : OptimizeResult + The optimization result represented as a ``OptimizeResult`` object. + Important attributes are: ``x`` the solution array, ``success`` a + Boolean flag indicating if the optimizer exited successfully and + ``message`` which describes the cause of the termination. See + `OptimizeResult` for a description of other attributes. + + """ + options = {} if options is None else options + if not isinstance(x_scale, str) and jnp.allclose(x_scale, 1): + options.setdefault("initial_trust_ratio", 1e-3) + options.setdefault("max_trust_radius", 1.0) + options["max_nfev"] = stoptol["max_nfev"] + options["max_ngev"] = stoptol["max_ngev"] + options["max_nhev"] = stoptol["max_nhev"] + + if constraint is not None: + lb, ub = constraint.bounds_scaled + constraint_wrapped = NonlinearConstraint( + constraint.compute_scaled, + lb, + ub, + constraint.jac_scaled, + ) + else: + constraint_wrapped = None + + result = fmin_auglag( + objective.compute_scalar, + x0=x0, + grad=objective.grad, + hess=objective.hess, + bounds=(-jnp.inf, jnp.inf), + constraint=constraint_wrapped, + args=(), + x_scale=x_scale, + ftol=stoptol["ftol"], + xtol=stoptol["xtol"], + gtol=stoptol["gtol"], + ctol=stoptol["ctol"], + verbose=verbose, + maxiter=stoptol["maxiter"], + options=options, + ) + return result + + +@register_optimizer( + name="lsq-auglag", + description="Least Squares Augmented Lagrangian approach " + + "to constrained optimization", + scalar=False, + equality_constraints=True, + inequality_constraints=True, + stochastic=False, + hessian=False, + GPU=True, +) +def _optimize_desc_aug_lagrangian_least_squares( + objective, constraint, x0, method, x_scale, verbose, stoptol, options=None +): + """Wrapper for desc.optimize.fmin_lag_ls_stel. + + Parameters + ---------- + objective : ObjectiveFunction + Function to minimize. + constraint : ObjectiveFunction + Constraint to satisfy + x0 : ndarray + Starting point. + method : str + Name of the method to use. Any of the options in scipy.optimize.least_squares. + x_scale : array_like or ‘jac’, optional + Characteristic scale of each variable. Setting x_scale is equivalent to + reformulating the problem in scaled variables xs = x / x_scale. An alternative + view is that the size of a trust region along jth dimension is proportional to + x_scale[j]. Improved convergence may be achieved by setting x_scale such that + a step of a given size along any of the scaled variables has a similar effect + on the cost function. If set to ‘jac’, the scale is iteratively updated using + the inverse norms of the columns of the Jacobian matrix. + verbose : int + * 0 : work silently. + * 1 : display a termination report. + * 2 : display progress during iterations + stoptol : dict + Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol", "ctol", + "maxiter", "max_nfev", "max_njev", "max_ngev", "max_nhev"} + options : dict, optional + Dictionary of optional keyword arguments to override default solver + settings. See the code for more details. + + Returns + ------- + res : OptimizeResult + The optimization result represented as a ``OptimizeResult`` object. + Important attributes are: ``x`` the solution array, ``success`` a + Boolean flag indicating if the optimizer exited successfully and + ``message`` which describes the cause of the termination. See + `OptimizeResult` for a description of other attributes. + + """ + options = {} if options is None else options + if not isinstance(x_scale, str) and jnp.allclose(x_scale, 1): + options.setdefault("initial_trust_radius", 1e-3) + options.setdefault("max_trust_radius", 1.0) + options["max_nfev"] = stoptol["max_nfev"] + options["max_njev"] = stoptol["max_njev"] + + if constraint is not None: + lb, ub = constraint.bounds_scaled + constraint_wrapped = NonlinearConstraint( + constraint.compute_scaled, + lb, + ub, + constraint.jac_scaled, + ) + else: + constraint_wrapped = None + + result = lsq_auglag( + objective.compute_scaled_error, + x0=x0, + jac=objective.jac_scaled, + bounds=(-jnp.inf, jnp.inf), + constraint=constraint_wrapped, + args=(), + x_scale=x_scale, + ftol=stoptol["ftol"], + xtol=stoptol["xtol"], + gtol=stoptol["gtol"], + ctol=stoptol["ctol"], + verbose=verbose, + maxiter=stoptol["maxiter"], + options=options, + ) + return result + + @register_optimizer( name="lsq-exact", description="Trust region least squares method, " @@ -45,7 +234,7 @@ def _optimize_desc_least_squares( * 1 : display a termination report. * 2 : display progress during iterations stoptol : dict - Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol", + Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol", "ctol", "maxiter", "max_nfev", "max_njev", "max_ngev", "max_nhev"} options : dict, optional Dictionary of optional keyword arguments to override default solver @@ -89,12 +278,23 @@ def _optimize_desc_least_squares( @register_optimizer( - name=["dogleg", "subspace", "dogleg-bfgs", "subspace-bfgs"], + name=[ + "fmin-exact", + "fmin-dogleg", + "fmin-subspace", + "fmin-exact-bfgs", + "fmin-dogleg-bfgs", + "fmin-subspace-bfgs", + ], description=[ + "Trust region method using iterative cholesky method to exactly solve the " + + "trust region subproblem.", "Trust region method using Powell's dogleg method to approximately solve the " + "trust region subproblem.", "Trust region method solving the subproblem over the 2d subspace spanned by " + "the gradient and newton direction.", + "Trust region method using iterative cholesky method to exactly solve the " + + "trust region subproblem. Uses BFGS to approximate hessian", "Trust region method using Powell's dogleg method to approximately solve the " + "trust region subproblem. Uses BFGS to approximate hessian", "Trust region method solving the subproblem over the 2d subspace spanned by " @@ -104,7 +304,7 @@ def _optimize_desc_least_squares( equality_constraints=False, inequality_constraints=False, stochastic=False, - hessian=[True, True, False, False], + hessian=[True, True, True, False, False, False], GPU=True, ) def _optimize_desc_fmin_scalar( @@ -135,7 +335,7 @@ def _optimize_desc_fmin_scalar( * 1 : display a termination report. * 2 : display progress during iterations stoptol : dict - Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol", + Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol", "ctol", "maxiter", "max_nfev", "max_njev", "max_ngev", "max_nhev"} options : dict, optional Dictionary of optional keyword arguments to override default solver @@ -169,7 +369,7 @@ def _optimize_desc_fmin_scalar( grad=objective.grad, hess=hess, args=(), - method=method.replace("-bfgs", ""), + method=method.replace("-bfgs", "").replace("fmin-", ""), x_scale=x_scale, ftol=stoptol["ftol"], xtol=stoptol["xtol"], @@ -220,7 +420,7 @@ def _optimize_desc_stochastic( * 1 : display a termination report. * 2 : display progress during iterations stoptol : dict - Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol", + Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol", "ctol", "maxiter", "max_nfev", "max_njev", "max_ngev", "max_nhev"} options : dict, optional Dictionary of optional keyword arguments to override default solver diff --git a/desc/optimize/_scipy_wrappers.py b/desc/optimize/_scipy_wrappers.py index f2a7271f0c..dda278b908 100644 --- a/desc/optimize/_scipy_wrappers.py +++ b/desc/optimize/_scipy_wrappers.py @@ -82,7 +82,7 @@ def _optimize_scipy_minimize( # noqa: C901 - FIXME: simplify this * 1 : display a termination report. * 2 : display progress during iterations stoptol : dict - Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol", + Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol", "ctol", "maxiter", "max_nfev", "max_njev", "max_ngev", "max_nhev"} options : dict, optional Dictionary of optional keyword arguments to override default solver @@ -341,7 +341,7 @@ def _optimize_scipy_least_squares( # noqa: C901 - FIXME: simplify this * 1 : display a termination report. * 2 : display progress during iterations stoptol : dict - Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol", + Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol", "ctol", "maxiter", "max_nfev", "max_njev", "max_ngev", "max_nhev"} options : dict, optional Dictionary of optional keyword arguments to override default solver @@ -544,7 +544,7 @@ def _optimize_scipy_constrained( # noqa: C901 - FIXME: simplify this * 1 : display a termination report. * 2 : display progress during iterations stoptol : dict - Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol", + Dictionary of stopping tolerances, with keys {"xtol", "ftol", "gtol", "ctol", "maxiter", "max_nfev", "max_njev", "max_ngev", "max_nhev"} options : dict, optional Dictionary of optional keyword arguments to override default solver diff --git a/desc/optimize/aug_lagrangian.py b/desc/optimize/aug_lagrangian.py new file mode 100644 index 0000000000..b203b4224e --- /dev/null +++ b/desc/optimize/aug_lagrangian.py @@ -0,0 +1,339 @@ +"""Augmented Langrangian for scalar valued objectives.""" + +from scipy.optimize import NonlinearConstraint, OptimizeResult + +from desc.backend import jnp +from desc.optimize.fmin_scalar import fmintr + +from .bound_utils import find_active_constraints +from .utils import ( + check_termination, + inequality_to_bounds, + print_header_nonlinear, + print_iteration_nonlinear, +) + + +def fmin_auglag( # noqa: C901 - FIXME: simplify this + fun, + x0, + grad, + hess, + bounds=(-jnp.inf, jnp.inf), + constraint=None, + args=(), + x_scale=1, + ftol=1e-6, + xtol=1e-6, + gtol=1e-6, + ctol=1e-6, + verbose=1, + maxiter=None, + options={}, +): + """Minimize a function with constraints using an augmented Langrangian method. + + Parameters + ---------- + fun : callable + objective to be minimized. Should have a signature like fun(x,*args)-> float + x0 : array-like + initial guess + grad : callable + function to compute gradient, df/dx. Should take the same arguments as fun + hess : callable + function to compute Hessian matrix of fun + bounds : tuple of array-like + Lower and upper bounds on independent variables. Defaults to no bounds. + Each array must match the size of x0 or be a scalar, in the latter case a + bound will be the same for all variables. Use np.inf with an appropriate sign + to disable bounds on all or some variables. + constraint : scipy.optimize.NonlinearConstraint + constraint to be satisfied + args : tuple + additional arguments passed to fun, grad, and hess + x_scale : array_like or ``'hess'``, optional + Characteristic scale of each variable. Setting ``x_scale`` is equivalent + to reformulating the problem in scaled variables ``xs = x / x_scale``. + An alternative view is that the size of a trust region along jth + dimension is proportional to ``x_scale[j]``. Improved convergence may + be achieved by setting ``x_scale`` such that a step of a given size + along any of the scaled variables has a similar effect on the cost + function. If set to ``'hess'``, the scale is iteratively updated using the + inverse norms of the columns of the Hessian matrix. + ftol : float or None, optional + Tolerance for termination by the change of the cost function. + The optimization process is stopped when ``dF < ftol * F``, + and there was an adequate agreement between a local quadratic model and + the true model in the last step. If None, the termination by this + condition is disabled. + xtol : float or None, optional + Tolerance for termination by the change of the independent variables. + Optimization is stopped when ``norm(dx) < xtol * (xtol + norm(x))``. + If None, the termination by this condition is disabled. + gtol : float or None, optional + Absolute tolerance for termination by the norm of the gradient. + Optimizer teriminates when ``max(abs(g)) < gtol``. + If None, the termination by this condition is disabled. + ctol : float, optional + Tolerance for stopping based on infinity norm of the constraint violation. + Optimizer terminates when ``max(abs(constr_violation)) < ctol`` AND one or more + of the other tolerances are met (``ftol``, ``xtol``, ``gtol``) + verbose : {0, 1, 2}, optional + * 0 (default) : work silently. + * 1 : display a termination report. + * 2 : display progress during iterations + maxiter : int, optional + maximum number of iterations. Defaults to size(x)*100 + options : dict, optional + dictionary of optional keyword arguments to override default solver settings. + See the code for more details. + + Returns + ------- + res : OptimizeResult + The optimization result represented as a ``OptimizeResult`` object. + Important attributes are: ``x`` the solution array, ``success`` a + Boolean flag indicating if the optimizer exited successfully. + + """ + if constraint is None: + # create a dummy constraint + constraint = NonlinearConstraint( + fun=lambda x, *args: jnp.array([0.0]), + lb=0.0, + ub=0.0, + jac=lambda x, *args: jnp.zeros((1, x.size)), + ) + ( + z0, + fun_wrapped, + grad_wrapped, + hess_wrapped, + constraint_wrapped, + zbounds, + z2xs, + ) = inequality_to_bounds( + x0, + fun, + grad, + hess, + constraint, + bounds, + ) + + def lagfun(z, y, mu, *args): + c = constraint_wrapped.fun(z, *args) + return fun_wrapped(z, *args) - jnp.dot(y, c) + mu / 2 * jnp.dot(c, c) + + def laggrad(z, y, mu, *args): + c = constraint_wrapped.fun(z, *args) + J = constraint_wrapped.jac(z, *args) + return grad_wrapped(z, *args) - jnp.dot(y, J) + mu * jnp.dot(c, J) + + assert callable(hess_wrapped) + if callable(constraint_wrapped.hess): + + def laghess(z, y, mu, *args): + c = constraint_wrapped.fun(z, *args) + Hf = hess_wrapped(z, *args) + Jc = constraint_wrapped.jac(z, *args) + Hc1 = constraint_wrapped.hess(z, y) + Hc2 = constraint_wrapped.hess(z, c) + return Hf - Hc1 + mu * (Hc2 + jnp.dot(Jc.T, Jc)) + + else: + + def laghess(z, y, mu, *args): + H = hess_wrapped(z, *args) + J = constraint_wrapped.jac(z, *args) + # ignoring higher order derivatives of constraints for now + return H + mu * jnp.dot(J.T, J) + + # TODO: figure out how to let BFGS maintain state between subproblems, otherwise + # it doesn't really converge + + nfev = 0 + ngev = 0 + nhev = 0 + iteration = 0 + + z = z0.copy() + f = fun_wrapped(z, *args) + c = constraint_wrapped.fun(z) + nfev += 1 + + mu = options.pop("initial_penalty_parameter", 10) + y = options.pop("initial_multipliers", None) + if y is None: # use least squares multiplier estimates + _J = constraint_wrapped.jac(z, *args) + _g = grad_wrapped(z, *args) + y = jnp.linalg.lstsq(_J.T, _g)[0] + y = jnp.nan_to_num(y, nan=0.0, posinf=0.0, neginf=0.0) + + if maxiter is None: + maxiter = z.size * 100 + maxiter_inner = options.pop("maxiter_inner", 20) + max_nfev = options.pop("max_nfev", 5 * maxiter + 1) + max_ngev = options.pop("max_ngev", maxiter + 1) + max_nhev = options.pop("max_nhev", maxiter + 1) + + # notation following Conn & Gould, algorithm 14.4.2, but with our mu = their mu^-1 + omega = options.pop("omega", 1.0) + eta = options.pop("eta", 1.0) + alpha_omega = options.pop("alpha_omega", 1.0) + beta_omega = options.pop("beta_omega", 1.0) + alpha_eta = options.pop("alpha_eta", 0.1) + beta_eta = options.pop("beta_eta", 0.9) + tau = options.pop("tau", 10) + + gtolk = omega / mu**alpha_omega + ctolk = eta / mu**alpha_eta + zold = z + fold = f + allx = [] + + success = None + message = None + step_norm = jnp.inf + actual_reduction = jnp.inf + g_norm = jnp.linalg.norm(laggrad(z, y, mu, *args), ord=jnp.inf) + constr_violation = jnp.linalg.norm(c, ord=jnp.inf) + + options.setdefault("initial_trust_radius", "scipy") + options["return_tr"] = True + + if verbose > 1: + print_header_nonlinear(True, "Penalty param", "max(|mltplr|)") + print_iteration_nonlinear( + iteration, + nfev, + f, + actual_reduction, + step_norm, + g_norm, + constr_violation, + mu, + jnp.max(jnp.abs(y)), + ) + + while iteration < maxiter: + result = fmintr( + lagfun, + z, + grad=laggrad, + hess=laghess, + bounds=zbounds, + args=(y, mu) + args, + x_scale=x_scale, + ftol=0, + xtol=0, + gtol=gtolk, + maxiter=maxiter_inner, + verbose=0, + options=options.copy(), + ) + allx += result["allx"] + nfev += result["nfev"] + ngev += result["ngev"] + nhev += result["nhev"] + iteration += result["nit"] + zold = z + fold = f + z = result["x"] + f = fun_wrapped(z, *args) + c = constraint_wrapped.fun(z, *args) + nfev += 1 + constr_violation = jnp.linalg.norm(c, ord=jnp.inf) + step_norm = jnp.linalg.norm(zold - z) + z_norm = jnp.linalg.norm(z) + g_norm = result["optimality"] + actual_reduction = fold - f + # don't stop if we increased cost + reduction_ratio = jnp.sign(actual_reduction) + # reuse the previous trust radius on the next pass + options["initial_trust_radius"] = float(result["alltr"][-1]) + + if verbose > 1: + print_iteration_nonlinear( + iteration, + nfev, + f, + actual_reduction, + step_norm, + g_norm, + constr_violation, + mu, + jnp.max(jnp.abs(y)), + ) + + success, message = check_termination( + actual_reduction, + f, + step_norm, + z_norm, + g_norm, + reduction_ratio, + ftol, + xtol, + gtol, + iteration, + maxiter, + nfev, + max_nfev, + ngev, + max_ngev, + nhev, + max_nhev, + constr_violation=constr_violation, + ctol=ctol, + ) + if success is not None: + break + + if not result["success"]: # did the subproblem actually finish, or maxiter? + continue + elif constr_violation < ctolk: + y = y - mu * c + ctolk = ctolk / (mu**beta_eta) + gtolk = gtolk / (mu**beta_omega) + else: + mu = tau * mu + ctolk = eta / (mu**alpha_eta) + gtolk = omega / (mu**alpha_omega) + + x, s = z2xs(z) + active_mask = find_active_constraints(z, zbounds[0], zbounds[1], rtol=xtol) + result = OptimizeResult( + x=x, + s=s, + y=y, + success=success, + fun=f, + grad=result["grad"], + v=result["v"], + optimality=g_norm, + nfev=nfev, + ngev=ngev, + nhev=nhev, + nit=iteration, + message=message, + active_mask=active_mask, + constr_violation=constr_violation, + ) + if verbose > 0: + if result["success"]: + print(result["message"]) + else: + print("Warning: " + result["message"]) + print(f""" Current function value: {result["fun"]:.3e}""") + print(f""" Constraint violation: {result['constr_violation']:.3e}""") + print(f""" Total delta_x: {jnp.linalg.norm(x0 - result["x"]):.3e}""") + print(f""" Iterations: {result["nit"]:d}""") + print(f""" Function evaluations: {result["nfev"]:d}""") + print(f""" Gradient evaluations: {result["ngev"]:d}""") + print(f""" Hessian evaluations: {result["nhev"]:d}""") + + result["allx"] = [z2xs(x)[0] for x in allx] + + return result diff --git a/desc/optimize/aug_lagrangian_ls.py b/desc/optimize/aug_lagrangian_ls.py new file mode 100644 index 0000000000..20473be20c --- /dev/null +++ b/desc/optimize/aug_lagrangian_ls.py @@ -0,0 +1,330 @@ +"""Augmented Langrangian for vector valued objectives.""" + +from scipy.optimize import NonlinearConstraint, OptimizeResult + +from desc.backend import jnp +from desc.optimize.least_squares import lsqtr + +from .bound_utils import find_active_constraints +from .utils import ( + check_termination, + inequality_to_bounds, + print_header_nonlinear, + print_iteration_nonlinear, +) + + +def lsq_auglag( # noqa: C901 - FIXME: simplify this + fun, + x0, + jac, + bounds=(-jnp.inf, jnp.inf), + constraint=None, + args=(), + x_scale=1, + ftol=1e-6, + xtol=1e-6, + gtol=1e-6, + ctol=1e-6, + verbose=1, + maxiter=None, + options={}, +): + """Minimize a function with constraints using an augmented Langrangian method. + + The objective function is assumed to be vector valued, and is minimized in the least + squares sense. + + Parameters + ---------- + fun : callable + objective to be minimized. Should have a signature like fun(x,*args)-> 1d array + x0 : array-like + initial guess + jac : callable: + function to compute Jacobian matrix of fun + bounds : tuple of array-like + Lower and upper bounds on independent variables. Defaults to no bounds. + Each array must match the size of x0 or be a scalar, in the latter case a + bound will be the same for all variables. Use np.inf with an appropriate sign + to disable bounds on all or some variables. + constraint : scipy.optimize.NonlinearConstraint + constraint to be satisfied + args : tuple + additional arguments passed to fun, grad, and hess + x_scale : array_like or ``'hess'``, optional + Characteristic scale of each variable. Setting ``x_scale`` is equivalent + to reformulating the problem in scaled variables ``xs = x / x_scale``. + An alternative view is that the size of a trust region along jth + dimension is proportional to ``x_scale[j]``. Improved convergence may + be achieved by setting ``x_scale`` such that a step of a given size + along any of the scaled variables has a similar effect on the cost + function. If set to ``'hess'``, the scale is iteratively updated using the + inverse norms of the columns of the Hessian matrix. + ftol : float or None, optional + Tolerance for termination by the change of the cost function. + The optimization process is stopped when ``dF < ftol * F``, + and there was an adequate agreement between a local quadratic model and + the true model in the last step. If None, the termination by this + condition is disabled. + xtol : float or None, optional + Tolerance for termination by the change of the independent variables. + Optimization is stopped when ``norm(dx) < xtol * (xtol + norm(x))``. + If None, the termination by this condition is disabled. + gtol : float or None, optional + Absolute tolerance for termination by the norm of the gradient. + Optimizer teriminates when ``norm(g) < gtol``, where + If None, the termination by this condition is disabled. + ctol : float, optional + Tolerance for stopping based on infinity norm of the constraint violation. + Optimizer terminates when ``max(abs(constr_violation)) < ctol`` AND one or more + of the other tolerances are met (``ftol``, ``xtol``, ``gtol``) + verbose : {0, 1, 2}, optional + * 0 (default) : work silently. + * 1 : display a termination report. + * 2 : display progress during iterations + maxiter : int, optional + maximum number of iterations. Defaults to size(x)*100 + options : dict, optional + dictionary of optional keyword arguments to override default solver settings. + See the code for more details. + + Returns + ------- + res : OptimizeResult + The optimization result represented as a ``OptimizeResult`` object. + Important attributes are: ``x`` the solution array, ``success`` a + Boolean flag indicating if the optimizer exited successfully. + + """ + if constraint is None: + # create a dummy constraint + constraint = NonlinearConstraint( + fun=lambda x, *args: jnp.array([0.0]), + lb=0.0, + ub=0.0, + jac=lambda x, *args: jnp.zeros((1, x.size)), + ) + ( + z0, + fun_wrapped, + jac_wrapped, + _, + constraint_wrapped, + zbounds, + z2xs, + ) = inequality_to_bounds( + x0, + fun, + jac, + None, + constraint, + bounds, + ) + + # L(x,y,mu) = 1/2 f(x)^2 - y*c(x) + mu/2 c(x)^2 + y^2/(2*mu) + # = 1/2 f(x)^2 + 1/2 [-y/sqrt(mu) + sqrt(mu) c(x)]^2 + + def lagfun(z, y, mu, *args): + f = fun_wrapped(z, *args) + c = constraint_wrapped.fun(z, *args) + c = -y / jnp.sqrt(mu) + jnp.sqrt(mu) * c + return jnp.concatenate((f, c)) + + def lagjac(z, y, mu, *args): + Jf = jac_wrapped(z, *args) + Jc = constraint_wrapped.jac(z, *args) + Jc = jnp.sqrt(mu) * Jc + return jnp.vstack((Jf, Jc)) + + def laggrad(z, y, mu, *args): + f = lagfun(z, y, mu, *args) + J = lagjac(z, y, mu, *args) + return f @ J + + nfev = 0 + njev = 0 + iteration = 0 + + z = z0.copy() + f = fun_wrapped(z, *args) + cost = 1 / 2 * jnp.dot(f, f) + c = constraint_wrapped.fun(z) + nfev += 1 + + mu = options.pop("initial_penalty_parameter", 10) + y = options.pop("initial_multipliers", None) + if y is None: # use least squares multiplier estimates + _J = constraint_wrapped.jac(z, *args) + _g = f @ jac_wrapped(z, *args) + y = jnp.linalg.lstsq(_J.T, _g)[0] + y = jnp.nan_to_num(y, nan=0.0, posinf=0.0, neginf=0.0) + + if maxiter is None: + maxiter = z.size * 100 + maxiter_inner = options.pop("maxiter_inner", 20) + max_nfev = options.pop("max_nfev", 5 * maxiter + 1) + max_njev = options.pop("max_njev", maxiter + 1) + + # notation following Conn & Gould, algorithm 14.4.2, but with our mu = their mu^-1 + omega = options.pop("omega", 1.0) + eta = options.pop("eta", 1.0) + alpha_omega = options.pop("alpha_omega", 1.0) + beta_omega = options.pop("beta_omega", 1.0) + alpha_eta = options.pop("alpha_eta", 0.1) + beta_eta = options.pop("beta_eta", 0.9) + tau = options.pop("tau", 10) + + gtolk = omega / mu**alpha_omega + ctolk = eta / mu**alpha_eta + zold = z + cost_old = cost + allx = [] + + success = None + message = None + step_norm = jnp.inf + actual_reduction = jnp.inf + + g_norm = jnp.linalg.norm(laggrad(z, y, mu, *args), ord=jnp.inf) + constr_violation = jnp.linalg.norm(c, ord=jnp.inf) + + options.setdefault("initial_trust_radius", "scipy") + options["return_tr"] = True + + if verbose > 1: + print_header_nonlinear(True, "Penalty param", "max(|mltplr|)") + print_iteration_nonlinear( + iteration, + nfev, + cost, + actual_reduction, + step_norm, + g_norm, + constr_violation, + mu, + jnp.max(jnp.abs(y)), + ) + + while iteration < maxiter: + result = lsqtr( + lagfun, + z, + lagjac, + bounds=zbounds, + args=(y, mu) + args, + x_scale=x_scale, + ftol=0, + xtol=0, + gtol=gtolk, + maxiter=maxiter_inner, + verbose=0, + options=options.copy(), + ) + # update outer counters + allx += result["allx"] + nfev += result["nfev"] + njev += result["njev"] + iteration += result["nit"] + z = result["x"] + f = fun_wrapped(z, *args) + cost = 1 / 2 * jnp.dot(f, f) + c = constraint_wrapped.fun(z) + nfev += 1 + constr_violation = jnp.linalg.norm(c, ord=jnp.inf) + step_norm = jnp.linalg.norm(zold - z) + z_norm = jnp.linalg.norm(z) + g_norm = result["optimality"] + actual_reduction = cost_old - cost + # don't stop if we increased cost + reduction_ratio = jnp.sign(actual_reduction) + # reuse the previous trust radius on the next pass + options["initial_trust_radius"] = float(result["alltr"][-1]) + + if verbose > 1: + print_iteration_nonlinear( + iteration, + nfev, + cost, + actual_reduction, + step_norm, + g_norm, + constr_violation, + mu, + jnp.max(jnp.abs(y)), + ) + + # check if we can stop the outer loop + success, message = check_termination( + actual_reduction, + cost, + step_norm, + z_norm, + g_norm, + reduction_ratio, + ftol, + xtol, + gtol, + iteration, + maxiter, + nfev, + max_nfev, + njev, + max_njev, + 0, + jnp.inf, + constr_violation=constr_violation, + ctol=ctol, + ) + if success is not None: + break + + if not result["success"]: # did the subproblem actually finish, or maxiter? + continue + elif constr_violation < ctolk: + y = y - mu * c + ctolk = ctolk / (mu**beta_eta) + gtolk = gtolk / (mu**beta_omega) + else: + mu = tau * mu + ctolk = eta / (mu**alpha_eta) + gtolk = omega / (mu**alpha_omega) + + zold = z + cost_old = cost + + x, s = z2xs(z) + active_mask = find_active_constraints(z, zbounds[0], zbounds[1], rtol=xtol) + result = OptimizeResult( + x=x, + s=s, + y=y, + success=success, + cost=cost, + fun=f, + grad=result["grad"], + v=result["v"], + jac=result["jac"], + optimality=g_norm, + nfev=nfev, + njev=njev, + nit=iteration, + message=message, + active_mask=active_mask, + constr_violation=constr_violation, + ) + if verbose > 0: + if result["success"]: + print(result["message"]) + else: + print("Warning: " + result["message"]) + print(f""" Current function value: {result["cost"]:.3e}""") + print(f""" Constraint violation: {result['constr_violation']:.3e}""") + print(f""" Total delta_x: {jnp.linalg.norm(x0 - result["x"]):.3e}""") + print(f""" Iterations: {result["nit"]:d}""") + print(f""" Function evaluations: {result["nfev"]:d}""") + print(f""" Jacobian evaluations: {result["njev"]:d}""") + + result["allx"] = [z2xs(x)[0] for x in allx] + + return result diff --git a/desc/optimize/fmin_scalar.py b/desc/optimize/fmin_scalar.py index a43f5ee338..150b95fc1e 100644 --- a/desc/optimize/fmin_scalar.py +++ b/desc/optimize/fmin_scalar.py @@ -79,19 +79,18 @@ def fmintr( # noqa: C901 - FIXME: simplify this function. If set to ``'hess'``, the scale is iteratively updated using the inverse norms of the columns of the Hessian matrix. ftol : float or None, optional - Tolerance for termination by the change of the cost function. Default - is 1e-8. The optimization process is stopped when ``dF < ftol * F``, + Tolerance for termination by the change of the cost function. + The optimization process is stopped when ``dF < ftol * F``, and there was an adequate agreement between a local quadratic model and the true model in the last step. If None, the termination by this condition is disabled. xtol : float or None, optional Tolerance for termination by the change of the independent variables. - Default is 1e-8. Optimization is stopped when - ``norm(dx) < xtol * (xtol + norm(x))``. If None, the termination by - this condition is disabled. + Optimization is stopped when ``norm(dx) < xtol * (xtol + norm(x))``. + If None, the termination by this condition is disabled. gtol : float or None, optional - Absolute tolerance for termination by the norm of the gradient. Default is 1e-8. - Optimizer teriminates when ``norm(g) < gtol``, where + Absolute tolerance for termination by the norm of the gradient. + Optimizer teriminates when ``max(abs(g)) < gtol``. If None, the termination by this condition is disabled. verbose : {0, 1, 2}, optional * 0 (default) : work silently. @@ -168,7 +167,7 @@ def fmintr( # noqa: C901 - FIXME: simplify this subproblem = trust_region_step_exact_cho else: raise ValueError( - colored("method should be one of 'dogleg' or 'subspace'", "red") + colored("method should be one of 'exact', 'dogleg' or 'subspace'", "red") ) if maxiter is None: @@ -405,6 +404,7 @@ def fmintr( # noqa: C901 - FIXME: simplify this success=success, fun=f, grad=g, + v=v, hess=H, optimality=g_norm, nfev=nfev, diff --git a/desc/optimize/least_squares.py b/desc/optimize/least_squares.py index 2b9b6754d6..2ac5b15856 100644 --- a/desc/optimize/least_squares.py +++ b/desc/optimize/least_squares.py @@ -69,19 +69,18 @@ def lsqtr( # noqa: C901 - FIXME: simplify this function. If set to ``'jac'``, the scale is iteratively updated using the inverse norms of the columns of the Jacobian matrix. ftol : float or None, optional - Tolerance for termination by the change of the cost function. Default - is 1e-8. The optimization process is stopped when ``dF < ftol * F``, + Tolerance for termination by the change of the cost function. + The optimization process is stopped when ``dF < ftol * F``, and there was an adequate agreement between a local quadratic model and the true model in the last step. If None, the termination by this condition is disabled. xtol : float or None, optional Tolerance for termination by the change of the independent variables. - Default is 1e-8. Optimization is stopped when - ``norm(dx) < xtol * (xtol + norm(x))``. If None, the termination by - this condition is disabled. + Optimization is stopped when ``norm(dx) < xtol * (xtol + norm(x))``. + If None, the termination by this condition is disabled. gtol : float or None, optional - Absolute tolerance for termination by the norm of the gradient. Default is 1e-8. - Optimizer teriminates when ``norm(g) < gtol``, where + Absolute tolerance for termination by the norm of the gradient. + Optimizer teriminates when ``max(abs(g)) < gtol``. If None, the termination by this condition is disabled. verbose : {0, 1, 2}, optional * 0 (default) : work silently. @@ -389,6 +388,7 @@ def lsqtr( # noqa: C901 - FIXME: simplify this cost=cost, fun=f, grad=g, + v=v, jac=J, optimality=g_norm, nfev=nfev, diff --git a/desc/optimize/optimizer.py b/desc/optimize/optimizer.py index 328ed9db68..cdfdf681f8 100644 --- a/desc/optimize/optimizer.py +++ b/desc/optimize/optimizer.py @@ -73,6 +73,7 @@ def optimize( # noqa: C901 - FIXME: simplify this ftol=None, xtol=None, gtol=None, + ctol=None, x_scale="auto", verbose=1, maxiter=None, @@ -102,6 +103,10 @@ def optimize( # noqa: C901 - FIXME: simplify this Absolute tolerance for termination by the norm of the gradient. Optimizer terminates when ``norm(g) < gtol``, where If None, defaults to 1e-8. + ctol : float or None, optional + Stopping tolerance on infinity norm of the constraint violation. + Optimization will stop when ctol and one of the other tolerances + are satisfied. If None, defaults to 1e-4. x_scale : array_like or ``'auto'``, optional Characteristic scale of each variable. Setting ``x_scale`` is equivalent to reformulating the problem in scaled variables ``xs = x / x_scale``. @@ -204,7 +209,7 @@ def optimize( # noqa: C901 - FIXME: simplify this ftol, xtol, gtol, - None, + ctol, maxiter, options, ) @@ -226,6 +231,8 @@ def optimize( # noqa: C901 - FIXME: simplify this if verbose > 0: print("Starting optimization") + print("Using method: " + str(self.method)) + timer.start("Solution time") result = optimizers[method]["fun"]( diff --git a/desc/optimize/stochastic.py b/desc/optimize/stochastic.py index bd1a5c5e9d..ac918920f0 100644 --- a/desc/optimize/stochastic.py +++ b/desc/optimize/stochastic.py @@ -47,19 +47,15 @@ def sgd( Step size update rule. Currently only the default "sgd" is available. Future updates may include RMSProp, Adam, etc. ftol : float or None, optional - Tolerance for termination by the change of the cost function. Default - is 1e-8. The optimization process is stopped when ``dF < ftol * F``, - and there was an adequate agreement between a local quadratic model and - the true model in the last step. If None, the termination by this - condition is disabled. + Tolerance for termination by the change of the cost function. + The optimization process is stopped when ``dF < ftol * F``. xtol : float or None, optional Tolerance for termination by the change of the independent variables. - Default is 1e-8. Optimization is stopped when - ``norm(dx) < xtol * (xtol + norm(x))``. If None, the termination by - this condition is disabled. + Optimization is stopped when ``norm(dx) < xtol * (xtol + norm(x))``. + If None, the termination by this condition is disabled. gtol : float or None, optional - Absolute tolerance for termination by the norm of the gradient. Default is 1e-6. - Optimizer teriminates when ``norm(g) < gtol``, where + Absolute tolerance for termination by the norm of the gradient. + Optimizer teriminates when ``max(abs(g)) < gtol``. If None, the termination by this condition is disabled. verbose : {0, 1, 2}, optional * 0 (default) : work silently. diff --git a/desc/optimize/utils.py b/desc/optimize/utils.py index af7c249431..c48cd3ef89 100644 --- a/desc/optimize/utils.py +++ b/desc/optimize/utils.py @@ -1,8 +1,136 @@ """Utility functions used in optimization problems.""" +import copy + import numpy as np -from desc.backend import cond, jit, jnp +from desc.backend import cond, jit, jnp, put +from desc.utils import Index + + +def inequality_to_bounds(x0, fun, grad, hess, constraint, bounds): + """Convert inequality constraints to bounds using slack variables. + + We do this by introducing slack variables s + + ie, lb < con(x) < ub --> con(x) - s == 0, lb < s < ub + + A new state vector z is defined as [x, s] and the problem + is transformed into one that has only equality constraints + and simple bounds on the variables z + + Parameters + ---------- + x0 : ndarray + Starting point for primal variables + fun, grad, hess : callable + Functions for computing the objective and derivatives + constraint : scipy.optimize.NonlinearConstraint + constraint object of both equality and inequality constraints + bounds : tuple + lower and upper bounds for primal variables x + + Returns + ------- + z0 : ndarray + Starting point for primal + slack variables + fun, grad, hess : callable + functions for computing objective and derivatives wrt z + constraint : scipy.optimize.NonlinearConstraint + constraint containing just equality constraints + bounds : tuple + lower and upper bounds on combined variable z + z2xs : callable + function for splitting combined variable z into primal + and slack variables x and s + + """ + c0 = constraint.fun(x0) + ncon = c0.size + bounds = tuple(jnp.broadcast_to(bi, x0.shape) for bi in bounds) + cbounds = (constraint.lb, constraint.ub) + cbounds = tuple(jnp.broadcast_to(bi, c0.shape) for bi in cbounds) + lbs, ubs = cbounds + lbx, ubx = bounds + + ineq_mask = lbs != ubs + eq_mask = lbs == ubs + eq_target = lbs[~ineq_mask] + nslack = jnp.sum(ineq_mask) + zbounds = ( + jnp.concatenate([lbx, lbs[ineq_mask]]), + jnp.concatenate([ubx, ubs[ineq_mask]]), + ) + s0 = c0[ineq_mask] + s0 = jnp.clip(s0, lbs[ineq_mask], ubs[ineq_mask]) + z0 = jnp.concatenate([x0, s0]) + target = jnp.zeros(c0.size) + target = put(target, eq_mask, eq_target) + + def z2xs(z): + return z[: len(z) - nslack], z[len(z) - nslack :] + + def fun_wrapped(z, *args): + x, s = z2xs(z) + return fun(x, *args) + + if hess is None: + # assume grad is really jac of least squares + def grad_wrapped(z, *args): + x, s = z2xs(z) + g = grad(x, *args) + return jnp.hstack([g, jnp.zeros((g.shape[0], nslack))]) + + else: + + def grad_wrapped(z, *args): + x, s = z2xs(z) + g = grad(x, *args) + return jnp.concatenate([g, jnp.zeros(nslack)]) + + if callable(hess): + + def hess_wrapped(z, *args): + x, s = z2xs(z) + H = hess(x, *args) + return jnp.pad(H, (0, nslack)) + + else: # using BFGS + hess_wrapped = hess + + def confun_wrapped(z, *args): + x, s = z2xs(z) + c = constraint.fun(x, *args) + sbig = jnp.zeros(ncon) + sbig = put(sbig, ineq_mask, s) + return c - sbig - target + + def conjac_wrapped(z, *args): + x, s = z2xs(z) + J = constraint.jac(x, *args) + I = jnp.eye(nslack) + Js = jnp.zeros((ncon, nslack)) + Js = put(Js, Index[ineq_mask, :], -I) + return jnp.hstack([J, Js]) + + if callable(constraint.hess): + + def conhess_wrapped(z, y, *args): + x, s = z2xs(z) + H = constraint.hess(x, y, *args) + return jnp.pad(H, (0, nslack)) + + else: # using BFGS + conhess_wrapped = constraint.hess + + newcon = copy.copy(constraint) + newcon.fun = confun_wrapped + newcon.jac = conjac_wrapped + newcon.hess = conhess_wrapped + newcon.lb = target + newcon.ub = target + + return z0, fun_wrapped, grad_wrapped, hess_wrapped, newcon, zbounds, z2xs @jit @@ -188,7 +316,7 @@ def evaluate_quadratic_form_jac(J, g, s, diag=None): return 0.5 * q + l -def print_header_nonlinear(constrained=False): +def print_header_nonlinear(constrained=False, *args): """Print a pretty header.""" s = "{:^15}{:^15}{:^15}{:^15}{:^15}{:^15}".format( "Iteration", @@ -199,12 +327,21 @@ def print_header_nonlinear(constrained=False): "Optimality", ) if constrained: - s += "{:^24}".format("Constraint violation") + s += "{:^15}".format("Constr viol.") + for arg in args: + s += "{:^15}".format(arg) print(s) def print_iteration_nonlinear( - iteration, nfev, cost, cost_reduction, step_norm, optimality, constr_violation=None + iteration, + nfev, + cost, + cost_reduction, + step_norm, + optimality, + constr_violation=None, + *args, ): """Print a line of optimizer output.""" if iteration is None or abs(iteration) == np.inf: @@ -220,27 +357,29 @@ def print_iteration_nonlinear( if cost is None or abs(cost) == np.inf: cost = " " * 15 else: - cost = "{:^15.4e}".format(cost) + cost = "{: ^15.3e}".format(cost) if cost_reduction is None or abs(cost_reduction) == np.inf: cost_reduction = " " * 15 else: - cost_reduction = "{:^15.2e}".format(cost_reduction) + cost_reduction = "{: ^15.3e}".format(cost_reduction) if step_norm is None or abs(step_norm) == np.inf: step_norm = " " * 15 else: - step_norm = "{:^15.2e}".format(step_norm) + step_norm = "{:^15.3e}".format(step_norm) if optimality is None or abs(optimality) == np.inf: optimality = " " * 15 else: - optimality = "{:^15.2e}".format(optimality) + optimality = "{:^15.3e}".format(optimality) s = "{}{}{}{}{}{}".format( iteration, nfev, cost, cost_reduction, step_norm, optimality ) if constr_violation is not None: - s += "{:^24.2e}".format(constr_violation) + s += "{:^15.3e}".format(constr_violation) + for arg in args: + s += "{:^15.3e}".format(arg) print(s) diff --git a/docs/optimizers.csv b/docs/optimizers.csv index baac84c289..d730294658 100644 --- a/docs/optimizers.csv +++ b/docs/optimizers.csv @@ -1,9 +1,13 @@ Name,Scalar,Equality Constraints,Inequality Constraints,Stochastic,Hessian,GPU,Description +``fmin-auglag``,True,True,True,False,True,True,Augmented Lagrangian method with trust region subproblem. +``lsq-auglag``,False,True,True,False,False,True,Least Squares Augmented Lagrangian approach to constrained optimization ``lsq-exact``,False,False,False,False,False,True,"Trust region least squares method, similar to the `trf` method in scipy" -``dogleg``,True,False,False,False,True,True,Trust region method using Powell's dogleg method to approximately solve the trust region subproblem. -``subspace``,True,False,False,False,True,True,Trust region method solving the subproblem over the 2d subspace spanned by the gradient and newton direction. -``dogleg-bfgs``,True,False,False,False,False,True,Trust region method using Powell's dogleg method to approximately solve the trust region subproblem. Uses BFGS to approximate hessian -``subspace-bfgs``,True,False,False,False,False,True,Trust region method solving the subproblem over the 2d subspace spanned by the gradient and newton direction. Uses BFGS to approximate hessian +``fmin-exact``,True,False,False,False,True,True,Trust region method using iterative cholesky method to exactly solve the trust region subproblem. +``fmin-dogleg``,True,False,False,False,True,True,Trust region method using Powell's dogleg method to approximately solve the trust region subproblem. +``fmin-subspace``,True,False,False,False,True,True,Trust region method solving the subproblem over the 2d subspace spanned by the gradient and newton direction. +``fmin-exact-bfgs``,True,False,False,False,False,True,Trust region method using iterative cholesky method to exactly solve the trust region subproblem. Uses BFGS to approximate hessian +``fmin-dogleg-bfgs``,True,False,False,False,False,True,Trust region method using Powell's dogleg method to approximately solve the trust region subproblem. Uses BFGS to approximate hessian +``fmin-subspace-bfgs``,True,False,False,False,False,True,Trust region method solving the subproblem over the 2d subspace spanned by the gradient and newton direction. Uses BFGS to approximate hessian ``sgd``,True,False,False,True,False,True,Stochastic gradient descent with Nesterov momentum ``scipy-bfgs``,True,False,False,False,False,False,BFGS quasi-newton method with line search. See https://docs.scipy.org/doc/scipy/reference/optimize.minimize-bfgs.html ``scipy-CG``,True,False,False,False,False,False,Nonlinear conjugate gradient method. See https://docs.scipy.org/doc/scipy/reference/optimize.minimize-cg.html diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py index 722e961c34..5050090abe 100644 --- a/tests/test_optimizer.py +++ b/tests/test_optimizer.py @@ -3,7 +3,14 @@ import numpy as np import pytest from numpy.random import default_rng -from scipy.optimize import BFGS, least_squares, rosen, rosen_der +from scipy.optimize import ( + BFGS, + NonlinearConstraint, + least_squares, + minimize, + rosen, + rosen_der, +) import desc.examples from desc.backend import jit, jnp @@ -20,6 +27,8 @@ FixPressure, FixPsi, ForceBalance, + GenericObjective, + MagneticWell, MeanCurvature, ObjectiveFunction, Volume, @@ -29,7 +38,9 @@ LinearConstraintProjection, Optimizer, ProximalProjection, + fmin_auglag, fmintr, + lsq_auglag, lsqtr, optimizers, sgd, @@ -412,12 +423,12 @@ def test_maxiter_1_and_0_solve(): objectives = ForceBalance() obj = ObjectiveFunction(objectives) eq = desc.examples.get("SOLOVEV") - for opt in ["lsq-exact", "dogleg-bfgs"]: + for opt in ["lsq-exact", "fmin-dogleg-bfgs"]: eq, result = eq.solve( maxiter=1, constraints=constraints, objective=obj, optimizer=opt, verbose=3 ) assert result["nit"] == 1 - for opt in ["lsq-exact", "dogleg-bfgs"]: + for opt in ["lsq-exact", "fmin-dogleg-bfgs"]: eq, result = eq.solve( maxiter=0, constraints=constraints, objective=obj, optimizer=opt, verbose=3 ) @@ -725,3 +736,185 @@ def hess(x): np.testing.assert_allclose(out1["x"], out3["x"], rtol=1e-06, atol=1e-06) np.testing.assert_allclose(out2["x"], out3["x"], rtol=1e-06, atol=1e-06) + + +@pytest.mark.unit +def test_auglag(): + """Test that our augmented lagrangian works as well as scipy for convex problems.""" + rng = default_rng(12) + + n = 15 # number of variables + m = 10 # number of constraints + p = 7 # number of primals + Qs = [] + gs = [] + for i in range(m + p): + A = 0.5 - rng.random((n, n)) + Qs.append(A.T @ A) + gs.append(0.5 - rng.random(n)) + + @jit + def vecfun(x): + y0 = x @ Qs[0] + gs[0] + y1 = x @ Qs[1] + gs[1] + y = jnp.concatenate([y0, y1**2]) + return y + + @jit + def fun(x): + y = vecfun(x) + return 1 / 2 * jnp.dot(y, y) + + grad = jit(Derivative(fun, mode="grad")) + hess = jit(Derivative(fun, mode="hess")) + jac = jit(Derivative(vecfun, mode="fwd")) + + @jit + def con(x): + cs = [] + for i in range(m): + cs.append(x @ Qs[p + i] @ x + gs[p + i] @ x) + return jnp.array(cs) + + conjac = jit(Derivative(con, mode="fwd")) + conhess = jit(Derivative(lambda x, v: v @ con(x), mode="hess")) + + constraint = NonlinearConstraint(con, -np.inf, 0, conjac, conhess) + x0 = rng.random(n) + + out1 = fmin_auglag( + fun, + x0, + grad, + hess=hess, + bounds=(-jnp.inf, jnp.inf), + constraint=constraint, + args=(), + x_scale="auto", + ftol=0, + xtol=1e-6, + gtol=1e-6, + ctol=1e-6, + verbose=3, + maxiter=None, + options={}, + ) + print(out1["active_mask"]) + out2 = lsq_auglag( + vecfun, + x0, + jac, + bounds=(-jnp.inf, jnp.inf), + constraint=constraint, + args=(), + x_scale="auto", + ftol=0, + xtol=1e-6, + gtol=1e-6, + ctol=1e-6, + verbose=3, + maxiter=None, + options={}, + ) + + out3 = minimize( + fun, + x0, + jac=grad, + hess=hess, + constraints=constraint, + method="trust-constr", + options={"verbose": 3, "maxiter": 1000}, + ) + + np.testing.assert_allclose(out1["x"], out3["x"], rtol=1e-4, atol=1e-4) + np.testing.assert_allclose(out2["x"], out3["x"], rtol=1e-4, atol=1e-4) + + +@pytest.mark.slow +@pytest.mark.regression +def test_constrained_AL_lsq(): + """Tests that the least squares augmented Lagrangian optimizer does something.""" + eq = desc.examples.get("SOLOVEV") + + constraints = ( + FixBoundaryR(modes=[0, 0, 0]), # fix specified major axis position + FixBoundaryZ(), # fix Z shape but not R + FixPressure(), # fix pressure profile + FixIota(), # fix rotational transform profile + FixPsi(), # fix total toroidal magnetic flux + ) + # some random constraints to keep the shape from getting wacky + V = eq.compute("V")["V"] + Vbounds = (0.95 * V, 1.05 * V) + AR = eq.compute("R0/a")["R0/a"] + ARbounds = (0.95 * AR, 1.05 * AR) + constraints += ( + Volume(bounds=Vbounds), + AspectRatio(bounds=ARbounds), + MagneticWell(bounds=(0, jnp.inf)), + ForceBalance(bounds=(-1e-3, 1e-3), normalize_target=False), + ) + H = eq.compute( + "curvature_H", grid=LinearGrid(M=eq.M_grid, N=eq.N_grid, NFP=eq.NFP) + )["curvature_H"] + obj = ObjectiveFunction(MeanCurvature(target=H)) + eq2, result = eq.optimize( + objective=obj, + constraints=constraints, + optimizer="lsq-auglag", + maxiter=500, + verbose=3, + x_scale="auto", + copy=True, + options={}, + ) + V2 = eq2.compute("V")["V"] + AR2 = eq2.compute("R0/a")["R0/a"] + Dwell = constraints[-2].compute(*constraints[-2].xs(eq2)) + assert ARbounds[0] < AR2 < ARbounds[1] + assert Vbounds[0] < V2 < Vbounds[1] + assert eq2.is_nested() + np.testing.assert_array_less(-Dwell, 0) + + +@pytest.mark.slow +@pytest.mark.regression +def test_constrained_AL_scalar(): + """Tests that the augmented Lagrangian constrained optimizer does something.""" + eq = desc.examples.get("SOLOVEV") + + constraints = ( + FixBoundaryR(modes=[0, 0, 0]), # fix specified major axis position + FixBoundaryZ(), # fix Z shape but not R + FixPressure(), # fix pressure profile + FixIota(), # fix rotational transform profile + FixPsi(), # fix total toroidal magnetic flux + ) + V = eq.compute("V")["V"] + AR = eq.compute("R0/a")["R0/a"] + constraints += ( + Volume(target=V), + AspectRatio(target=AR), + MagneticWell(bounds=(0, jnp.inf)), + ForceBalance(bounds=(-1e-3, 1e-3), normalize_target=False), + ) + # Dummy objective to return 0, we just want a feasible solution. + obj = ObjectiveFunction(GenericObjective("0")) + eq2, result = eq.optimize( + objective=obj, + constraints=constraints, + optimizer="fmin-auglag", + maxiter=500, + verbose=3, + x_scale="auto", + copy=True, + options={}, + ) + V2 = eq2.compute("V")["V"] + AR2 = eq2.compute("R0/a")["R0/a"] + Dwell = constraints[-2].compute(*constraints[-2].xs(eq2)) + np.testing.assert_allclose(AR, AR2) + np.testing.assert_allclose(V, V2) + assert eq2.is_nested() + np.testing.assert_array_less(-Dwell, 0)