diff --git a/desc/objectives/utils.py b/desc/objectives/utils.py
index 27a8d30b37..5167573f51 100644
--- a/desc/objectives/utils.py
+++ b/desc/objectives/utils.py
@@ -9,7 +9,7 @@
 from desc.utils import Index, errorif, flatten_list, svd_inv_null, unique_list, warnif
 
 
-def factorize_linear_constraints(objective, constraint):  # noqa: C901
+def factorize_linear_constraints(objective, constraint, x_scale="auto"):  # noqa: C901
     """Compute and factorize A to get pseudoinverse and nullspace.
 
     Given constraints of the form Ax=b, factorize A to find a particular solution xp
@@ -22,6 +22,10 @@ def factorize_linear_constraints(objective, constraint):  # noqa: C901
         Objective function to optimize.
     constraint : ObjectiveFunction
         Objective function of linear constraints to enforce.
+    x_scale : array_like or ``'auto'``, optional
+        Characteristic scale of each variable. Setting ``x_scale`` is equivalent
+        to reformulating the problem in scaled variables ``xs = x / x_scale``.
+        If set to ``'auto'``, the scale is determined from the initial state vector.
 
     Returns
     -------
@@ -33,6 +37,8 @@ def factorize_linear_constraints(objective, constraint):  # noqa: C901
         Combined RHS vector.
     Z : ndarray
         Null space operator for full combined A such that A @ Z == 0.
+    D : ndarray
+        Scale of the full state vector x, as set by the parameter ``x_scale``.
     unfixed_idx : ndarray
         Indices of x that correspond to non-fixed values.
     project, recover : function
@@ -130,32 +136,53 @@ def factorize_linear_constraints(objective, constraint):  # noqa: C901
             )
         A = A[unfixed_rows][:, unfixed_idx]
         b = b[unfixed_rows]
+
     unfixed_idx = indices_idx
+    fixed_idx = np.delete(np.arange(xp.size), unfixed_idx)
+
+    # compute x_scale if not provided
+    if x_scale == "auto":
+        x_scale = objective.x(*objective.things)
+    errorif(
+        x_scale.shape != xp.shape,
+        ValueError,
+        "x_scale must be the same size as the full state vector. "
+        + f"Got size {x_scale.size} for state vector of size {xp.size}.",
+    )
+    D = np.where(np.abs(x_scale) < 1e2, 1, np.abs(x_scale))
+
+    # null space & particular solution
+    A = A * D[None, unfixed_idx]
     if A.size:
-        Ainv_full, Z = svd_inv_null(A)
+        A_inv, Z = svd_inv_null(A)
     else:
-        Ainv_full = A.T
+        A_inv = A.T
         Z = np.eye(A.shape[1])
-    Ainv_full = jnp.asarray(Ainv_full)
-    Z = jnp.asarray(Z)
-    b = jnp.asarray(b)
-    xp = put(xp, unfixed_idx, Ainv_full @ b)
+    xp = put(xp, unfixed_idx, A_inv @ b)
+    xp = put(xp, fixed_idx, ((1 / D) * xp)[fixed_idx])
+
+    # cast to jnp arrays
     xp = jnp.asarray(xp)
+    A = jnp.asarray(A)
+    b = jnp.asarray(b)
+    Z = jnp.asarray(Z)
+    D = jnp.asarray(D)
 
     @jit
-    def project(x):
+    def project(x_full):
         """Project a full state vector into the reduced optimization vector."""
-        x_reduced = Z.T @ ((x - xp)[unfixed_idx])
+        x_reduced = Z.T @ ((1 / D) * x_full - xp)[unfixed_idx]
         return jnp.atleast_1d(jnp.squeeze(x_reduced))
 
     @jit
     def recover(x_reduced):
         """Recover the full state vector from the reduced optimization vector."""
         dx = put(jnp.zeros(objective.dim_x), unfixed_idx, Z @ x_reduced)
-        return jnp.atleast_1d(jnp.squeeze(xp + dx))
+        x_full = D * (xp + dx)
+        return jnp.atleast_1d(jnp.squeeze(x_full))
 
     # check that all constraints are actually satisfiable
-    params = objective.unpack_state(xp, False)
+    params = objective.unpack_state(D * xp, False)
     for con in constraint.objectives:
         xpi = [params[i] for i, t in enumerate(objective.things) if t in con.things]
         y1 = con.compute_unscaled(*xpi)
@@ -197,7 +224,7 @@ def recover(x_reduced):
                 "or be due to floating point error.",
             )
 
-    return xp, A, b, Z, unfixed_idx, project, recover
+    return xp, A, b, Z, D, unfixed_idx, project, recover
 
 
 def softmax(arr, alpha):
diff --git a/desc/optimize/_constraint_wrappers.py b/desc/optimize/_constraint_wrappers.py
index 93bf5385ae..8d4e480161 100644
--- a/desc/optimize/_constraint_wrappers.py
+++ b/desc/optimize/_constraint_wrappers.py
@@ -103,6 +103,7 @@ def build(self, use_jit=None, verbose=1):
             self._A,
             self._b,
             self._Z,
+            self._D,
             self._unfixed_idx,
             self._project,
             self._recover,
@@ -113,10 +114,8 @@ def build(self, use_jit=None, verbose=1):
         self._dim_x = self._objective.dim_x
         self._dim_x_reduced = self._Z.shape[1]
 
-        # equivalent matrix for A[unfixed_idx]@Z == A@unfixed_idx_mat
-        self._unfixed_idx_mat = (
-            jnp.eye(self._objective.dim_x)[:, self._unfixed_idx] @ self._Z
-        )
+        # equivalent matrix for A[unfixed_idx] @ D @ Z == A @ unfixed_idx_mat
+        self._unfixed_idx_mat = jnp.diag(self._D)[:, self._unfixed_idx] @ self._Z
 
         self._built = True
         timer.stop("Linear constraint projection build")
@@ -261,7 +260,7 @@ def grad(self, x_reduced, constants=None):
         """
         x = self.recover(x_reduced)
         df = self._objective.grad(x, constants)
-        return df[self._unfixed_idx] @ self._Z
+        return df[self._unfixed_idx] @ (self._Z * self._D[self._unfixed_idx, None])
 
     def hess(self, x_reduced, constants=None):
         """Compute Hessian of self.compute_scalar.
@@ -281,13 +280,19 @@ def hess(self, x_reduced, constants=None):
         """
         x = self.recover(x_reduced)
         df = self._objective.hess(x, constants)
-        return self._Z.T @ df[self._unfixed_idx, :][:, self._unfixed_idx] @ self._Z
+        return (
+            (self._Z.T * (1 / self._D)[None, self._unfixed_idx])
+            @ df[self._unfixed_idx, :][:, self._unfixed_idx]
+            @ (self._Z * self._D[self._unfixed_idx, None])
+        )
 
     def _jac(self, x_reduced, constants=None, op="scaled"):
         x = self.recover(x_reduced)
         if self._objective._deriv_mode == "blocked":
             fun = getattr(self._objective, "jac_" + op)
-            return fun(x, constants)[:, self._unfixed_idx] @ self._Z
+            return fun(x, constants)[:, self._unfixed_idx] @ (
+                self._Z * self._D[self._unfixed_idx, None]
+            )
 
         v = self._unfixed_idx_mat
         df = getattr(self._objective, "jvp_" + op)(v.T, x, constants)
@@ -401,7 +406,7 @@ def jvp_unscaled(self, v, x_reduced, constants=None):
     def _vjp(self, v, x_reduced, constants=None, op="vjp_scaled"):
         x = self.recover(x_reduced)
         df = getattr(self._objective, op)(v, x, constants)
-        return df[self._unfixed_idx] @ self._Z
+        return df[self._unfixed_idx] @ (self._Z * self._D[self._unfixed_idx, None])
 
     def vjp_scaled(self, v, x_reduced, constants=None):
         """Compute vector-Jacobian product of self.compute_scaled.
@@ -533,8 +538,8 @@ def _set_eq_state_vector(self):
             self._args.remove(arg)
         linear_constraint = ObjectiveFunction(self._linear_constraints)
         linear_constraint.build()
-        _, A, _, self._Z, self._unfixed_idx, _, _ = factorize_linear_constraints(
-            self._constraint, linear_constraint
+        _, _, _, self._Z, self._D, self._unfixed_idx, _, _ = (
+            factorize_linear_constraints(self._constraint, linear_constraint)
         )
 
         # dx/dc - goes from the full state to optimization variables for eq
@@ -618,14 +623,14 @@ def build(self, use_jit=None, verbose=1):  # noqa: C901
         )
         self._dimx_per_thing = [t.dim_x for t in self.things]
 
-        # equivalent matrix for A[unfixed_idx]@Z == A@unfixed_idx_mat
+        # equivalent matrix for A[unfixed_idx] @ D @ Z == A @ unfixed_idx_mat
         self._unfixed_idx_mat = jnp.eye(self._objective.dim_x)
         self._unfixed_idx_mat = jnp.split(
             self._unfixed_idx_mat, np.cumsum([t.dim_x for t in self.things]), axis=-1
         )
-        self._unfixed_idx_mat[self._eq_idx] = (
-            self._unfixed_idx_mat[self._eq_idx][:, self._unfixed_idx] @ self._Z
-        )
+        self._unfixed_idx_mat[self._eq_idx] = self._unfixed_idx_mat[self._eq_idx][
+            :, self._unfixed_idx
+        ] @ (self._Z * self._D[self._unfixed_idx, None])
         self._unfixed_idx_mat = np.concatenate(
             [np.atleast_2d(foo) for foo in self._unfixed_idx_mat], axis=-1
         )
@@ -1018,7 +1023,8 @@ def jvp_unscaled(self, v, x, constants=None):
     @functools.partial(jit, static_argnames=("self", "op"))
     def _jvp_f(self, xf, dc, constants, op):
         Fx = getattr(self._constraint, "jac_" + op)(xf, constants)
-        Fx_reduced = Fx[:, self._unfixed_idx] @ self._Z
+        # TODO: replace with self._unfixed_idx_mat?
+        Fx_reduced = Fx @ jnp.diag(self._D)[:, self._unfixed_idx] @ self._Z
         Fc = Fx @ (self._dxdc @ dc)
         Fxh = Fx_reduced
         cutoff = jnp.finfo(Fxh.dtype).eps * max(Fxh.shape)
diff --git a/desc/perturbations.py b/desc/perturbations.py
index 3fa622b248..03529a1ed1 100644
--- a/desc/perturbations.py
+++ b/desc/perturbations.py
@@ -185,7 +185,7 @@ def perturb(  # noqa: C901 - FIXME: break this up into simpler pieces
     if verbose > 0:
         print("Factorizing linear constraints")
     timer.start("linear constraint factorize")
-    xp, _, _, Z, unfixed_idx, project, recover = factorize_linear_constraints(
+    xp, _, _, Z, D, unfixed_idx, project, recover = factorize_linear_constraints(
         objective, constraint
     )
     timer.stop("linear constraint factorize")
@@ -291,7 +291,7 @@ def perturb(  # noqa: C901 - FIXME: break this up into simpler pieces
             print("Computing df")
         timer.start("df computation")
         Jx = objective.jac_scaled_error(x)
-        Jx_reduced = Jx[:, unfixed_idx] @ Z @ scale
+        Jx_reduced = Jx @ jnp.diag(D)[:, unfixed_idx] @ Z @ scale
         RHS1 = objective.jvp_scaled(tangents, x)
         if include_f:
             f = objective.compute_scaled_error(x)
@@ -388,9 +388,7 @@ def perturb(  # noqa: C901 - FIXME: break this up into simpler pieces
             con.update_target(eq_new)
     constraint = ObjectiveFunction(constraints)
     constraint.build(verbose=verbose)
-    xp, _, _, Z, unfixed_idx, project, recover = factorize_linear_constraints(
-        objective, constraint
-    )
+    _, _, _, _, _, _, _, recover = factorize_linear_constraints(objective, constraint)
 
     # update other attributes
     dx_reduced = dx1_reduced + dx2_reduced + dx3_reduced
@@ -547,7 +545,7 @@ def optimal_perturb(  # noqa: C901 - FIXME: break this up into simpler pieces
     constraint = ObjectiveFunction(constraints)
     constraint.build(verbose=verbose)
 
-    _, _, _, Z, unfixed_idx, project, recover = factorize_linear_constraints(
+    _, _, _, Z, D, unfixed_idx, project, recover = factorize_linear_constraints(
         objective_f, constraint
     )
 
@@ -564,7 +562,7 @@ def optimal_perturb(  # noqa: C901 - FIXME: break this up into simpler pieces
     dx2_reduced = 0
 
     # dx/dx_reduced
-    dxdx_reduced = jnp.eye(eq.dim_x)[:, unfixed_idx] @ Z
+    dxdx_reduced = jnp.diag(D)[:, unfixed_idx] @ Z
 
     # dx/dc
     dxdc = []
@@ -612,8 +610,8 @@ def optimal_perturb(  # noqa: C901 - FIXME: break this up into simpler pieces
             timer.disp("dg computation")
 
         # projections onto optimization space
-        Fx_reduced = Fx[:, unfixed_idx] @ Z
-        Gx_reduced = Gx[:, unfixed_idx] @ Z
+        Fx_reduced = Fx @ jnp.diag(D)[:, unfixed_idx] @ Z
+        Gx_reduced = Gx @ jnp.diag(D)[:, unfixed_idx] @ Z
         Fc = Fx @ dxdc
         Gc = Gx @ dxdc
 
@@ -752,9 +750,7 @@ def optimal_perturb(  # noqa: C901 - FIXME: break this up into simpler pieces
             con.update_target(eq_new)
     constraint = ObjectiveFunction(constraints)
     constraint.build(verbose=verbose)
-    _, _, _, Z, unfixed_idx, project, recover = factorize_linear_constraints(
-        objective_f, constraint
-    )
+    _, _, _, _, _, _, _, recover = factorize_linear_constraints(objective_f, constraint)
 
     # update other attributes
     dx_reduced = dx1_reduced + dx2_reduced
diff --git a/desc/vmec.py b/desc/vmec.py
index 14c4d2346c..a0212dbd31 100644
--- a/desc/vmec.py
+++ b/desc/vmec.py
@@ -192,7 +192,7 @@ def load(
         constraints = maybe_add_self_consistency(eq, constraints)
         objective = ObjectiveFunction(constraints)
         objective.build(verbose=0)
-        _, _, _, _, _, project, recover = factorize_linear_constraints(
+        _, _, _, _, _, _, project, recover = factorize_linear_constraints(
             objective, objective
         )
         args = objective.unpack_state(recover(project(objective.x(eq))), False)[0]
diff --git a/tests/test_bootstrap.py b/tests/test_bootstrap.py
index 34ffe08e22..811480bbaf 100644
--- a/tests/test_bootstrap.py
+++ b/tests/test_bootstrap.py
@@ -1589,7 +1589,7 @@ def test_bootstrap_optimization_comparison_qa():
         objective=objective,
         constraints=constraints,
         optimizer="proximal-lsq-exact",
-        maxiter=4,
+        maxiter=5,
         gtol=1e-16,
         verbose=3,
     )
@@ -1622,5 +1622,5 @@ def test_bootstrap_optimization_comparison_qa():
         grid.compress(data2["<J*B>"]), grid.compress(data2["<J*B> Redl"]), rtol=1.8e-2
     )
     np.testing.assert_allclose(
-        grid.compress(data1["<J*B>"]), grid.compress(data2["<J*B>"]), rtol=1.8e-2
+        grid.compress(data1["<J*B>"]), grid.compress(data2["<J*B>"]), rtol=1.9e-2
     )
diff --git a/tests/test_linear_objectives.py b/tests/test_linear_objectives.py
index 167765cd83..f67f934a55 100644
--- a/tests/test_linear_objectives.py
+++ b/tests/test_linear_objectives.py
@@ -451,7 +451,7 @@ def test_correct_indexing_passed_modes():
     constraint = ObjectiveFunction(constraints, use_jit=False)
     constraint.build()
 
-    xp, A, b, Z, unfixed_idx, project, recover = factorize_linear_constraints(
+    xp, A, b, Z, D, unfixed_idx, project, recover = factorize_linear_constraints(
         objective, constraint
     )
 
@@ -461,8 +461,8 @@ def test_correct_indexing_passed_modes():
     atol = 2e-15
     np.testing.assert_allclose(x1, x2, atol=atol)
     np.testing.assert_allclose(A @ xp[unfixed_idx], b, atol=atol)
-    np.testing.assert_allclose(A @ x1[unfixed_idx], b, atol=atol)
-    np.testing.assert_allclose(A @ x2[unfixed_idx], b, atol=atol)
+    np.testing.assert_allclose(A @ (x1[unfixed_idx] / D[unfixed_idx]), b, atol=atol)
+    np.testing.assert_allclose(A @ (x2[unfixed_idx] / D[unfixed_idx]), b, atol=atol)
     np.testing.assert_allclose(A @ Z, 0, atol=atol)
 
 
@@ -514,7 +514,7 @@ def test_correct_indexing_passed_modes_and_passed_target():
     constraint = ObjectiveFunction(constraints, use_jit=False)
     constraint.build()
 
-    xp, A, b, Z, unfixed_idx, project, recover = factorize_linear_constraints(
+    xp, A, b, Z, D, unfixed_idx, project, recover = factorize_linear_constraints(
         objective, constraint
     )
 
@@ -524,8 +524,8 @@ def test_correct_indexing_passed_modes_and_passed_target():
     atol = 2e-15
     np.testing.assert_allclose(x1, x2, atol=atol)
     np.testing.assert_allclose(A @ xp[unfixed_idx], b, atol=atol)
-    np.testing.assert_allclose(A @ x1[unfixed_idx], b, atol=atol)
-    np.testing.assert_allclose(A @ x2[unfixed_idx], b, atol=atol)
+    np.testing.assert_allclose(A @ (x1[unfixed_idx] / D[unfixed_idx]), b, atol=atol)
+    np.testing.assert_allclose(A @ (x2[unfixed_idx] / D[unfixed_idx]), b, atol=atol)
     np.testing.assert_allclose(A @ Z, 0, atol=atol)
 
 
@@ -574,7 +574,7 @@ def test_correct_indexing_passed_modes_axis():
     constraint = ObjectiveFunction(constraints, use_jit=False)
     constraint.build()
 
-    xp, A, b, Z, unfixed_idx, project, recover = factorize_linear_constraints(
+    xp, A, b, Z, D, unfixed_idx, project, recover = factorize_linear_constraints(
         objective, constraint
     )
 
@@ -584,8 +584,8 @@ def test_correct_indexing_passed_modes_axis():
     atol = 2e-15
     np.testing.assert_allclose(x1, x2, atol=atol)
     np.testing.assert_allclose(A @ xp[unfixed_idx], b, atol=atol)
-    np.testing.assert_allclose(A @ x1[unfixed_idx], b, atol=atol)
-    np.testing.assert_allclose(A @ x2[unfixed_idx], b, atol=atol)
+    np.testing.assert_allclose(A @ (x1[unfixed_idx] / D[unfixed_idx]), b, atol=atol)
+    np.testing.assert_allclose(A @ (x2[unfixed_idx] / D[unfixed_idx]), b, atol=atol)
     np.testing.assert_allclose(A @ Z, 0, atol=atol)
 
 
@@ -703,7 +703,7 @@ def test_correct_indexing_passed_modes_and_passed_target_axis():
     constraint = ObjectiveFunction(constraints, use_jit=False)
     constraint.build()
 
-    xp, A, b, Z, unfixed_idx, project, recover = factorize_linear_constraints(
+    xp, A, b, Z, D, unfixed_idx, project, recover = factorize_linear_constraints(
         objective, constraint
     )
 
@@ -713,8 +713,8 @@ def test_correct_indexing_passed_modes_and_passed_target_axis():
     atol = 2e-15
     np.testing.assert_allclose(x1, x2, atol=atol)
     np.testing.assert_allclose(A @ xp[unfixed_idx], b, atol=atol)
-    np.testing.assert_allclose(A @ x1[unfixed_idx], b, atol=atol)
-    np.testing.assert_allclose(A @ x2[unfixed_idx], b, atol=atol)
+    np.testing.assert_allclose(A @ (x1[unfixed_idx] / D[unfixed_idx]), b, atol=atol)
+    np.testing.assert_allclose(A @ (x2[unfixed_idx] / D[unfixed_idx]), b, atol=atol)
     np.testing.assert_allclose(A @ Z, 0, atol=atol)
 
 
diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py
index 7c5f810d98..71cc515a58 100644
--- a/tests/test_optimizer.py
+++ b/tests/test_optimizer.py
@@ -5,6 +5,7 @@
 import numpy as np
 import pytest
 from numpy.random import default_rng
+from scipy.constants import mu_0
 from scipy.optimize import (
     BFGS,
     NonlinearConstraint,
@@ -32,6 +33,7 @@
     FixParameters,
     FixPressure,
     FixPsi,
+    FixSumCoilCurrent,
     ForceBalance,
     GenericObjective,
     MagneticWell,
@@ -1350,3 +1352,30 @@ def test_quad_flux_with_surface_current_field():
     (field_modular_opt,), result = opt.optimize(
         field, objective=obj, constraints=constraints, maxiter=1, copy=True
     )
+
+
+@pytest.mark.unit
+def test_optimize_coil_currents(DummyCoilSet):
+    """Tests optimization takes step sizes proportional to variable scales."""
+    eq = desc.examples.get("precise_QH")
+    coils = load(load_from=str(DummyCoilSet["output_path_sym"]), file_format="hdf5")
+    grid = LinearGrid(rho=1.0, M=eq.M_grid, N=eq.N_grid, NFP=eq.NFP, sym=eq.sym)
+    current = 2 * np.pi * eq.compute("G", grid=grid)["G"][0] / mu_0
+    for coil in coils:
+        coil.current = current / coils.num_coils
+
+    objective = ObjectiveFunction(QuadraticFlux(eq=eq, field=coils, vacuum=True))
+    constraints = FixSumCoilCurrent(coils)
+    optimizer = Optimizer("lsq-exact")
+    [coils_opt], _ = optimizer.optimize(
+        things=coils,
+        objective=objective,
+        constraints=constraints,
+        verbose=2,
+        copy=True,
+    )
+    # check that optimized coil currents changed by more than 15% from initial values
+    np.testing.assert_array_less(
+        np.asarray(coils.current) * 0.15,
+        np.abs(np.asarray(coils_opt.current) - np.asarray(coils.current)),
+    )