diff --git a/desc/objectives/utils.py b/desc/objectives/utils.py index 654bb17783..192942a415 100644 --- a/desc/objectives/utils.py +++ b/desc/objectives/utils.py @@ -9,7 +9,7 @@ from desc.utils import Index, errorif, flatten_list, svd_inv_null, unique_list, warnif -def factorize_linear_constraints(objective, constraint): # noqa: C901 +def factorize_linear_constraints(objective, constraint, x_scale="auto"): # noqa: C901 """Compute and factorize A to get pseudoinverse and nullspace. Given constraints of the form Ax=b, factorize A to find a particular solution xp @@ -22,6 +22,10 @@ def factorize_linear_constraints(objective, constraint): # noqa: C901 Objective function to optimize. constraint : ObjectiveFunction Objective function of linear constraints to enforce. + x_scale : array_like or ``'auto'``, optional + Characteristic scale of each variable. Setting ``x_scale`` is equivalent + to reformulating the problem in scaled variables ``xs = x / x_scale``. + If set to ``'auto'``, the scale is determined from the initial state vector. Returns ------- @@ -33,6 +37,8 @@ def factorize_linear_constraints(objective, constraint): # noqa: C901 Combined RHS vector. Z : ndarray Null space operator for full combined A such that A @ Z == 0. + D : ndarray + Scale of the full state vector x, as set by the parameter ``x_scale``. unfixed_idx : ndarray Indices of x that correspond to non-fixed values. project, recover : function @@ -130,32 +136,53 @@ def factorize_linear_constraints(objective, constraint): # noqa: C901 ) A = A[unfixed_rows][:, unfixed_idx] b = b[unfixed_rows] + unfixed_idx = indices_idx + fixed_idx = np.delete(np.arange(xp.size), unfixed_idx) + + # compute x_scale if not provided + if x_scale == "auto": + x_scale = objective.x(*objective.things) + errorif( + x_scale.shape != xp.shape, + ValueError, + "x_scale must be the same size as the full state vector. " + + f"Got size {x_scale.size} for state vector of size {xp.size}.", + ) + D = np.where(np.abs(x_scale) < 1e2, 1, np.abs(x_scale)) + + # null space & particular solution + A = A * D[None, unfixed_idx] if A.size: - Ainv_full, Z = svd_inv_null(A) + A_inv, Z = svd_inv_null(A) else: - Ainv_full = A.T + A_inv = A.T Z = np.eye(A.shape[1]) - Ainv_full = jnp.asarray(Ainv_full) - Z = jnp.asarray(Z) - b = jnp.asarray(b) - xp = put(xp, unfixed_idx, Ainv_full @ b) + xp = put(xp, unfixed_idx, A_inv @ b) + xp = put(xp, fixed_idx, ((1 / D) * xp)[fixed_idx]) + + # cast to jnp arrays xp = jnp.asarray(xp) + A = jnp.asarray(A) + b = jnp.asarray(b) + Z = jnp.asarray(Z) + D = jnp.asarray(D) @jit - def project(x): + def project(x_full): """Project a full state vector into the reduced optimization vector.""" - x_reduced = Z.T @ ((x - xp)[unfixed_idx]) + x_reduced = Z.T @ ((1 / D) * x_full - xp)[unfixed_idx] return jnp.atleast_1d(jnp.squeeze(x_reduced)) @jit def recover(x_reduced): """Recover the full state vector from the reduced optimization vector.""" dx = put(jnp.zeros(objective.dim_x), unfixed_idx, Z @ x_reduced) - return jnp.atleast_1d(jnp.squeeze(xp + dx)) + x_full = D * (xp + dx) + return jnp.atleast_1d(jnp.squeeze(x_full)) # check that all constraints are actually satisfiable - params = objective.unpack_state(xp, False) + params = objective.unpack_state(D * xp, False) for con in constraint.objectives: xpi = [params[i] for i, t in enumerate(objective.things) if t in con.things] y1 = con.compute_unscaled(*xpi) @@ -198,7 +225,7 @@ def recover(x_reduced): "or be due to floating point error.", ) - return xp, A, b, Z, unfixed_idx, project, recover + return xp, A, b, Z, D, unfixed_idx, project, recover def softmax(arr, alpha): diff --git a/desc/optimize/_constraint_wrappers.py b/desc/optimize/_constraint_wrappers.py index ddeb280e29..57614644a6 100644 --- a/desc/optimize/_constraint_wrappers.py +++ b/desc/optimize/_constraint_wrappers.py @@ -103,6 +103,7 @@ def build(self, use_jit=None, verbose=1): self._A, self._b, self._Z, + self._D, self._unfixed_idx, self._project, self._recover, @@ -113,10 +114,8 @@ def build(self, use_jit=None, verbose=1): self._dim_x = self._objective.dim_x self._dim_x_reduced = self._Z.shape[1] - # equivalent matrix for A[unfixed_idx]@Z == A@unfixed_idx_mat - self._unfixed_idx_mat = ( - jnp.eye(self._objective.dim_x)[:, self._unfixed_idx] @ self._Z - ) + # equivalent matrix for A[unfixed_idx] @ D @ Z == A @ unfixed_idx_mat + self._unfixed_idx_mat = jnp.diag(self._D)[:, self._unfixed_idx] @ self._Z self._built = True timer.stop("Linear constraint projection build") @@ -261,7 +260,7 @@ def grad(self, x_reduced, constants=None): """ x = self.recover(x_reduced) df = self._objective.grad(x, constants) - return df[self._unfixed_idx] @ self._Z + return df[self._unfixed_idx] @ (self._Z * self._D[self._unfixed_idx, None]) def hess(self, x_reduced, constants=None): """Compute Hessian of self.compute_scalar. @@ -281,13 +280,19 @@ def hess(self, x_reduced, constants=None): """ x = self.recover(x_reduced) df = self._objective.hess(x, constants) - return self._Z.T @ df[self._unfixed_idx, :][:, self._unfixed_idx] @ self._Z + return ( + (self._Z.T * (1 / self._D)[None, self._unfixed_idx]) + @ df[self._unfixed_idx, :][:, self._unfixed_idx] + @ (self._Z * self._D[self._unfixed_idx, None]) + ) def _jac(self, x_reduced, constants=None, op="scaled"): x = self.recover(x_reduced) if self._objective._deriv_mode == "blocked": fun = getattr(self._objective, "jac_" + op) - return fun(x, constants)[:, self._unfixed_idx] @ self._Z + return fun(x, constants)[:, self._unfixed_idx] @ ( + self._Z * self._D[self._unfixed_idx, None] + ) v = self._unfixed_idx_mat df = getattr(self._objective, "jvp_" + op)(v.T, x, constants) @@ -401,7 +406,7 @@ def jvp_unscaled(self, v, x_reduced, constants=None): def _vjp(self, v, x_reduced, constants=None, op="vjp_scaled"): x = self.recover(x_reduced) df = getattr(self._objective, op)(v, x, constants) - return df[self._unfixed_idx] @ self._Z + return df[self._unfixed_idx] @ (self._Z * self._D[self._unfixed_idx, None]) def vjp_scaled(self, v, x_reduced, constants=None): """Compute vector-Jacobian product of self.compute_scaled. @@ -544,8 +549,8 @@ def _set_eq_state_vector(self): self._args.remove(arg) linear_constraint = ObjectiveFunction(self._linear_constraints) linear_constraint.build() - _, A, _, self._Z, self._unfixed_idx, _, _ = factorize_linear_constraints( - self._constraint, linear_constraint + _, _, _, self._Z, self._D, self._unfixed_idx, _, _ = ( + factorize_linear_constraints(self._constraint, linear_constraint) ) # dx/dc - goes from the full state to optimization variables for eq @@ -629,14 +634,14 @@ def build(self, use_jit=None, verbose=1): # noqa: C901 ) self._dimx_per_thing = [t.dim_x for t in self.things] - # equivalent matrix for A[unfixed_idx]@Z == A@unfixed_idx_mat + # equivalent matrix for A[unfixed_idx] @ D @ Z == A @ unfixed_idx_mat self._unfixed_idx_mat = jnp.eye(self._objective.dim_x) self._unfixed_idx_mat = jnp.split( self._unfixed_idx_mat, np.cumsum([t.dim_x for t in self.things]), axis=-1 ) - self._unfixed_idx_mat[self._eq_idx] = ( - self._unfixed_idx_mat[self._eq_idx][:, self._unfixed_idx] @ self._Z - ) + self._unfixed_idx_mat[self._eq_idx] = self._unfixed_idx_mat[self._eq_idx][ + :, self._unfixed_idx + ] @ (self._Z * self._D[self._unfixed_idx, None]) self._unfixed_idx_mat = np.concatenate( [np.atleast_2d(foo) for foo in self._unfixed_idx_mat], axis=-1 ) @@ -1029,7 +1034,8 @@ def jvp_unscaled(self, v, x, constants=None): @functools.partial(jit, static_argnames=("self", "op")) def _jvp_f(self, xf, dc, constants, op): Fx = getattr(self._constraint, "jac_" + op)(xf, constants) - Fx_reduced = Fx[:, self._unfixed_idx] @ self._Z + # TODO: replace with self._unfixed_idx_mat? + Fx_reduced = Fx @ jnp.diag(self._D)[:, self._unfixed_idx] @ self._Z Fc = Fx @ (self._dxdc @ dc) Fxh = Fx_reduced cutoff = jnp.finfo(Fxh.dtype).eps * max(Fxh.shape) diff --git a/desc/perturbations.py b/desc/perturbations.py index 18d116e2fd..d578de9732 100644 --- a/desc/perturbations.py +++ b/desc/perturbations.py @@ -202,7 +202,7 @@ def perturb( # noqa: C901 - FIXME: break this up into simpler pieces if verbose > 0: print("Factorizing linear constraints") timer.start("linear constraint factorize") - xp, _, _, Z, unfixed_idx, project, recover = factorize_linear_constraints( + xp, _, _, Z, D, unfixed_idx, project, recover = factorize_linear_constraints( objective, constraint ) timer.stop("linear constraint factorize") @@ -326,7 +326,7 @@ def perturb( # noqa: C901 - FIXME: break this up into simpler pieces print("Computing df") timer.start("df computation") Jx = objective.jac_scaled_error(x) - Jx_reduced = Jx[:, unfixed_idx] @ Z @ scale + Jx_reduced = Jx @ jnp.diag(D)[:, unfixed_idx] @ Z @ scale RHS1 = objective.jvp_scaled(tangents, x) if include_f: f = objective.compute_scaled_error(x) @@ -423,9 +423,7 @@ def perturb( # noqa: C901 - FIXME: break this up into simpler pieces con.update_target(eq_new) constraint = ObjectiveFunction(constraints) constraint.build(verbose=verbose) - xp, _, _, Z, unfixed_idx, project, recover = factorize_linear_constraints( - objective, constraint - ) + _, _, _, _, _, _, _, recover = factorize_linear_constraints(objective, constraint) # update other attributes dx_reduced = dx1_reduced + dx2_reduced + dx3_reduced @@ -582,7 +580,7 @@ def optimal_perturb( # noqa: C901 - FIXME: break this up into simpler pieces constraint = ObjectiveFunction(constraints) constraint.build(verbose=verbose) - _, _, _, Z, unfixed_idx, project, recover = factorize_linear_constraints( + _, _, _, Z, D, unfixed_idx, project, recover = factorize_linear_constraints( objective_f, constraint ) @@ -599,7 +597,7 @@ def optimal_perturb( # noqa: C901 - FIXME: break this up into simpler pieces dx2_reduced = 0 # dx/dx_reduced - dxdx_reduced = jnp.eye(eq.dim_x)[:, unfixed_idx] @ Z + dxdx_reduced = jnp.diag(D)[:, unfixed_idx] @ Z # dx/dc dxdc = [] @@ -647,8 +645,8 @@ def optimal_perturb( # noqa: C901 - FIXME: break this up into simpler pieces timer.disp("dg computation") # projections onto optimization space - Fx_reduced = Fx[:, unfixed_idx] @ Z - Gx_reduced = Gx[:, unfixed_idx] @ Z + Fx_reduced = Fx @ jnp.diag(D)[:, unfixed_idx] @ Z + Gx_reduced = Gx @ jnp.diag(D)[:, unfixed_idx] @ Z Fc = Fx @ dxdc Gc = Gx @ dxdc @@ -787,9 +785,7 @@ def optimal_perturb( # noqa: C901 - FIXME: break this up into simpler pieces con.update_target(eq_new) constraint = ObjectiveFunction(constraints) constraint.build(verbose=verbose) - _, _, _, Z, unfixed_idx, project, recover = factorize_linear_constraints( - objective_f, constraint - ) + _, _, _, _, _, _, _, recover = factorize_linear_constraints(objective_f, constraint) # update other attributes dx_reduced = dx1_reduced + dx2_reduced diff --git a/desc/vmec.py b/desc/vmec.py index 60014a5229..aba8351c2a 100644 --- a/desc/vmec.py +++ b/desc/vmec.py @@ -198,7 +198,7 @@ def load( constraints = maybe_add_self_consistency(eq, constraints) objective = ObjectiveFunction(constraints) objective.build(verbose=0) - _, _, _, _, _, project, recover = factorize_linear_constraints( + _, _, _, _, _, _, project, recover = factorize_linear_constraints( objective, objective ) args = objective.unpack_state(recover(project(objective.x(eq))), False)[0] diff --git a/tests/test_bootstrap.py b/tests/test_bootstrap.py index 34ffe08e22..811480bbaf 100644 --- a/tests/test_bootstrap.py +++ b/tests/test_bootstrap.py @@ -1589,7 +1589,7 @@ def test_bootstrap_optimization_comparison_qa(): objective=objective, constraints=constraints, optimizer="proximal-lsq-exact", - maxiter=4, + maxiter=5, gtol=1e-16, verbose=3, ) @@ -1622,5 +1622,5 @@ def test_bootstrap_optimization_comparison_qa(): grid.compress(data2[""]), grid.compress(data2[" Redl"]), rtol=1.8e-2 ) np.testing.assert_allclose( - grid.compress(data1[""]), grid.compress(data2[""]), rtol=1.8e-2 + grid.compress(data1[""]), grid.compress(data2[""]), rtol=1.9e-2 ) diff --git a/tests/test_linear_objectives.py b/tests/test_linear_objectives.py index 167765cd83..f67f934a55 100644 --- a/tests/test_linear_objectives.py +++ b/tests/test_linear_objectives.py @@ -451,7 +451,7 @@ def test_correct_indexing_passed_modes(): constraint = ObjectiveFunction(constraints, use_jit=False) constraint.build() - xp, A, b, Z, unfixed_idx, project, recover = factorize_linear_constraints( + xp, A, b, Z, D, unfixed_idx, project, recover = factorize_linear_constraints( objective, constraint ) @@ -461,8 +461,8 @@ def test_correct_indexing_passed_modes(): atol = 2e-15 np.testing.assert_allclose(x1, x2, atol=atol) np.testing.assert_allclose(A @ xp[unfixed_idx], b, atol=atol) - np.testing.assert_allclose(A @ x1[unfixed_idx], b, atol=atol) - np.testing.assert_allclose(A @ x2[unfixed_idx], b, atol=atol) + np.testing.assert_allclose(A @ (x1[unfixed_idx] / D[unfixed_idx]), b, atol=atol) + np.testing.assert_allclose(A @ (x2[unfixed_idx] / D[unfixed_idx]), b, atol=atol) np.testing.assert_allclose(A @ Z, 0, atol=atol) @@ -514,7 +514,7 @@ def test_correct_indexing_passed_modes_and_passed_target(): constraint = ObjectiveFunction(constraints, use_jit=False) constraint.build() - xp, A, b, Z, unfixed_idx, project, recover = factorize_linear_constraints( + xp, A, b, Z, D, unfixed_idx, project, recover = factorize_linear_constraints( objective, constraint ) @@ -524,8 +524,8 @@ def test_correct_indexing_passed_modes_and_passed_target(): atol = 2e-15 np.testing.assert_allclose(x1, x2, atol=atol) np.testing.assert_allclose(A @ xp[unfixed_idx], b, atol=atol) - np.testing.assert_allclose(A @ x1[unfixed_idx], b, atol=atol) - np.testing.assert_allclose(A @ x2[unfixed_idx], b, atol=atol) + np.testing.assert_allclose(A @ (x1[unfixed_idx] / D[unfixed_idx]), b, atol=atol) + np.testing.assert_allclose(A @ (x2[unfixed_idx] / D[unfixed_idx]), b, atol=atol) np.testing.assert_allclose(A @ Z, 0, atol=atol) @@ -574,7 +574,7 @@ def test_correct_indexing_passed_modes_axis(): constraint = ObjectiveFunction(constraints, use_jit=False) constraint.build() - xp, A, b, Z, unfixed_idx, project, recover = factorize_linear_constraints( + xp, A, b, Z, D, unfixed_idx, project, recover = factorize_linear_constraints( objective, constraint ) @@ -584,8 +584,8 @@ def test_correct_indexing_passed_modes_axis(): atol = 2e-15 np.testing.assert_allclose(x1, x2, atol=atol) np.testing.assert_allclose(A @ xp[unfixed_idx], b, atol=atol) - np.testing.assert_allclose(A @ x1[unfixed_idx], b, atol=atol) - np.testing.assert_allclose(A @ x2[unfixed_idx], b, atol=atol) + np.testing.assert_allclose(A @ (x1[unfixed_idx] / D[unfixed_idx]), b, atol=atol) + np.testing.assert_allclose(A @ (x2[unfixed_idx] / D[unfixed_idx]), b, atol=atol) np.testing.assert_allclose(A @ Z, 0, atol=atol) @@ -703,7 +703,7 @@ def test_correct_indexing_passed_modes_and_passed_target_axis(): constraint = ObjectiveFunction(constraints, use_jit=False) constraint.build() - xp, A, b, Z, unfixed_idx, project, recover = factorize_linear_constraints( + xp, A, b, Z, D, unfixed_idx, project, recover = factorize_linear_constraints( objective, constraint ) @@ -713,8 +713,8 @@ def test_correct_indexing_passed_modes_and_passed_target_axis(): atol = 2e-15 np.testing.assert_allclose(x1, x2, atol=atol) np.testing.assert_allclose(A @ xp[unfixed_idx], b, atol=atol) - np.testing.assert_allclose(A @ x1[unfixed_idx], b, atol=atol) - np.testing.assert_allclose(A @ x2[unfixed_idx], b, atol=atol) + np.testing.assert_allclose(A @ (x1[unfixed_idx] / D[unfixed_idx]), b, atol=atol) + np.testing.assert_allclose(A @ (x2[unfixed_idx] / D[unfixed_idx]), b, atol=atol) np.testing.assert_allclose(A @ Z, 0, atol=atol) diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py index 4514724333..5ad55eae12 100644 --- a/tests/test_optimizer.py +++ b/tests/test_optimizer.py @@ -5,6 +5,7 @@ import numpy as np import pytest from numpy.random import default_rng +from scipy.constants import mu_0 from scipy.optimize import ( BFGS, NonlinearConstraint, @@ -32,6 +33,7 @@ FixParameters, FixPressure, FixPsi, + FixSumCoilCurrent, ForceBalance, GenericObjective, MagneticWell, @@ -1354,3 +1356,30 @@ def test_quad_flux_with_surface_current_field(): (field_modular_opt,), result = opt.optimize( field, objective=obj, constraints=constraints, maxiter=1, copy=True ) + + +@pytest.mark.unit +def test_optimize_coil_currents(DummyCoilSet): + """Tests optimization takes step sizes proportional to variable scales.""" + eq = desc.examples.get("precise_QH") + coils = load(load_from=str(DummyCoilSet["output_path_sym"]), file_format="hdf5") + grid = LinearGrid(rho=1.0, M=eq.M_grid, N=eq.N_grid, NFP=eq.NFP, sym=eq.sym) + current = 2 * np.pi * eq.compute("G", grid=grid)["G"][0] / mu_0 + for coil in coils: + coil.current = current / coils.num_coils + + objective = ObjectiveFunction(QuadraticFlux(eq=eq, field=coils, vacuum=True)) + constraints = FixSumCoilCurrent(coils) + optimizer = Optimizer("lsq-exact") + [coils_opt], _ = optimizer.optimize( + things=coils, + objective=objective, + constraints=constraints, + verbose=2, + copy=True, + ) + # check that optimized coil currents changed by more than 15% from initial values + np.testing.assert_array_less( + np.asarray(coils.current) * 0.15, + np.abs(np.asarray(coils_opt.current) - np.asarray(coils.current)), + )