Skip to content

Commit

Permalink
Fix calculation of curve fit weights (#1224)
Browse files Browse the repository at this point in the history
### Summary

This PR updates calculation of weights to compute residual in the curve
fitting.

### Details and comments

When the error bar of data points is significantly small, these data
points become a dominant source of residual to minimize. This means
other data points contribute little to the fit, and causes local overfit
to certain data points. This is fixed by clipping the weights to remove
outlier.
  • Loading branch information
nkanazawa1989 authored Aug 15, 2023
1 parent c66034c commit e4a54fb
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 7 deletions.
22 changes: 16 additions & 6 deletions qiskit_experiments/curve_analysis/curve_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,20 +316,30 @@ def _run_curve_fit(

valid_uncertainty = np.all(np.isfinite(curve_data.y_err))

model_weights = {}
if valid_uncertainty:
for model in models:
sub_yerr = curve_data.get_subset_of(model._name).y_err
if len(sub_yerr) == 0:
continue
nonzero_yerr = np.where(np.isclose(sub_yerr, 0.0), np.finfo(float).eps, sub_yerr)
raw_weights = 1 / nonzero_yerr
# Remove outlier. When all sample values are the same with sample average,
# or sampling error is zero with shot-weighted average,
# some yerr values might be very close to zero, yielding significant weights.
# With such outlier, the fit doesn't sense residual of other data points.
maximum_weight = np.percentile(raw_weights, 90)
model_weights[model._name] = np.clip(raw_weights, 0.0, maximum_weight)

# Objective function for minimize. This computes composite residuals of sub models.
def _objective(_params):
ys = []
for model in models:
sub_data = curve_data.get_subset_of(model._name)
with np.errstate(divide="ignore"):
# Ignore numpy runtime warning.
# Zero y_err point introduces infinite weight,
# but this should be managed by LMFIT.
weights = 1.0 / sub_data.y_err if valid_uncertainty else None
yi = model._residual(
params=_params,
data=sub_data.y,
weights=weights,
weights=model_weights.get(model._name, None),
x=sub_data.x,
)
ys.append(yi)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
fixes:
- |
Fix calculation of weight for curve fitting. Previously the weights of data points to obtain
the residual of fit curve were computed by the inverse of the error bars of y data.
This may yield significant weights on certain data points when their error bar is small or zero,
and this can cause the local overfit to these data points.
To avoid this edge case of small error bars, computed weights are now clipped at 90 percentile.
This update might slightly change the outcome of fit.
43 changes: 43 additions & 0 deletions test/curve_analysis/test_baseclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,49 @@ def test_end_to_end_parallel_analysis(self):
self.assertAlmostEqual(taus[0].value.nominal_value, tau1, delta=0.1)
self.assertAlmostEqual(taus[1].value.nominal_value, tau2, delta=0.1)

def test_end_to_end_zero_yerr(self):
"""Integration test for an edge case of having zero y error.
When the error bar is zero, the fit weights to compute residual tend to become larger.
When the weight is too much significant, the result locally overfits to
certain data points with smaller or zero y error.
"""
analysis = CurveAnalysis(models=[ExpressionModel(expr="amp * x**2", name="test")])
analysis.set_options(
data_processor=DataProcessor(input_key="counts", data_actions=[Probability("1")]),
result_parameters=["amp"],
average_method="sample", # Use sample average to make some yerr = 0
plot=False,
p0={"amp": 0.2},
)

amp = 0.3
x = np.linspace(0, 1, 100)
y = amp * x**2

# Replace small y values with zero.
# Since mock function samples count dictionary from binomial distribution,
# y=0 (or 1) yield always the same count dictionary
# and hence y error becomes zero with sample averaging.
# In this case, amp = 0 may yield the best result.
y[0] = 0
y[1] = 0
y[2] = 0

test_data1 = self.single_sampler(x, y, seed=123)
test_data2 = self.single_sampler(x, y, seed=124)
test_data3 = self.single_sampler(x, y, seed=125)

expdata = ExperimentData(experiment=FakeExperiment())
expdata.add_data(test_data1.data())
expdata.add_data(test_data2.data())
expdata.add_data(test_data3.data())

result = analysis.run(expdata)
self.assertExperimentDone(result)

self.assertAlmostEqual(result.analysis_results("amp").value.nominal_value, amp, delta=0.1)

def test_get_init_params(self):
"""Integration test for getting initial parameter from overview entry."""

Expand Down
2 changes: 1 addition & 1 deletion test/library/calibration/test_ramsey_xy.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def test_end_to_end(self, freq_shift: float):
This test also checks that we can pickup frequency shifts with different signs.
"""
test_tol = 0.01
test_tol = 0.03
abs_tol = max(1e3, abs(freq_shift) * test_tol)

exp_helper = RamseyXYHelper()
Expand Down

0 comments on commit e4a54fb

Please sign in to comment.