From ac5d058569a4824f1199a7aea5c6f93f8ebd97f0 Mon Sep 17 00:00:00 2001
From: Matthew Newville <newville@cars.uchicago.edu>
Date: Fri, 20 Dec 2019 13:07:49 -0600
Subject: [PATCH 1/3] update emcee example to better present results

---
 examples/doc_fitting_emcee.py | 32 ++++++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 8 deletions(-)
diff --git a/examples/doc_fitting_emcee.py b/examples/doc_fitting_emcee.py
index 6ac1a5166..586280453 100644
--- a/examples/doc_fitting_emcee.py
+++ b/examples/doc_fitting_emcee.py
@@ -8,6 +8,7 @@
     HASPYLAB = True
 except ImportError:
     HASPYLAB = False
+HASPYLAB = False
 
 try:
     import corner
@@ -70,16 +71,21 @@ def residual(p):
 print('--------------------------------------------')
 lmfit.report_fit(res.params)
 
+
 # find the maximum likelihood solution
 highest_prob = np.argmax(res.lnprob)
 hp_loc = np.unravel_index(highest_prob, res.lnprob.shape)
 mle_soln = res.chain[hp_loc]
 for i, par in enumerate(p):
     p[par].value = mle_soln[i]
-print("\nMaximum likelihood Estimation")
-print('-----------------------------')
-for _, vals in p.items():
-    print(vals)
+
+print('\nMaximum Likelihood Estimation from emcee       ')
+print('-------------------------------------------------')
+print('Parameter  MLE Value   Median Value   Uncertainty')
+fmt = '  {:5s}  {:11.5f} {:11.5f}   {:11.5f}'.format
+for name, param in p.items():
+    print(fmt(name, param.value, res.params[name].value,
+              res.params[name].stderr))
 
 if HASPYLAB:
     plt.figure()
@@ -89,7 +95,17 @@ def residual(p):
     plt.legend()
     plt.show()
 
-quantiles = np.percentile(res.flatchain['t1'], [2.28, 15.9, 50, 84.2, 97.7])
-print("1 sigma spread", 0.5 * (quantiles[3] - quantiles[1]))
-print("2 sigma spread", 0.5 * (quantiles[4] - quantiles[0]))
-# <end of examples/doc_fitting_emcee.py>
+print('\nError Estimates from emcee    ')
+print('------------------------------------------------------')
+print('Parameter  -2sigma  -1sigma   median  +1sigma  +2sigma ')
+
+for name in p.keys():
+    quantiles = np.percentile(res.flatchain[name],
+                              [2.275, 15.865, 50, 84.135, 97.275])
+    median = quantiles[2]
+    err_m2 = quantiles[0] - median
+    err_m1 = quantiles[1] - median
+    err_p1 = quantiles[3] - median
+    err_p2 = quantiles[4] - median
+    fmt = '  {:5s}   {:8.4f} {:8.4f} {:8.4f} {:8.4f} {:8.4f}'.format
+    print(fmt(name, err_m2, err_m1, median, err_p1, err_p2))

From 16fc7d1a47362d41219807bab317168df9dc9bc4 Mon Sep 17 00:00:00 2001
From: Matthew Newville <newville@cars.uchicago.edu>
Date: Fri, 20 Dec 2019 13:15:48 -0600
Subject: [PATCH 2/3] update doc for emcee example, some comparison with
 confidence_intervals

---
 doc/confidence.rst |  27 +++--
 doc/fitting.rst    | 252 +++++++++++++++++++++++++++++++++++----------
 2 files changed, 215 insertions(+), 64 deletions(-)

diff --git a/doc/confidence.rst b/doc/confidence.rst
index 20d89acf6..07da44c8f 100644
--- a/doc/confidence.rst
+++ b/doc/confidence.rst
@@ -106,15 +106,20 @@ parameters and set it manually:
         result.params[p].stderr = abs(result.params[p].value * 0.1)
 
 
-An advanced example
--------------------
+..  _label-confidence-advanced:
+
+An advanced example for evaluating confidence intervals
+---------------------------------------------------------
 
 Now we look at a problem where calculating the error from approximated
-covariance can lead to misleading result -- two decaying exponentials.  In
-fact such a problem is particularly hard for the Levenberg-Marquardt
-method, so we first estimate the results using the slower but robust
-Nelder-Mead  method, and *then* use Levenberg-Marquardt to estimate the
-uncertainties and correlations.
+covariance can lead to misleading result -- the same double exponential
+problem shown in :ref:`label-emcee`.  In fact such a problem is particularly
+hard for the Levenberg-Marquardt method, so we first estimate the results
+using the slower but robust Nelder-Mead method.  We can then compare the
+uncertainties computed (if the ``numdifftools`` package is installed) with
+those estimated using Levenberg-Marquardt around the previously found
+solution.  We can also compare to the results of using ``emcee``.
+
 
 .. jupyter-execute::
     :hide-code:
@@ -168,7 +173,13 @@ Plots of the confidence region are shown in the figures below for ``a1`` and
     plt.show()
 
 Neither of these plots is very much like an ellipse, which is implicitly
-assumed by the approach using the covariance matrix.
+assumed by the approach using the covariance matrix.  The plots actually
+look quite a bit like those found with MCMC and shown in the "corner plot"
+in :ref:`label-emcee`.  In fact, comparing the confidence interval results
+here with the results for the 1- and 2-:math:`\sigma` error estimated with
+``emcee``, we can see that the agreement is pretty good and that the
+asymmetry in the parameter distributions are reflected well in the
+asymmetry of the uncertainties
 
 The trace returned as the optional second argument from
 :func:`conf_interval` contains a dictionary for each variable parameter.
diff --git a/doc/fitting.rst b/doc/fitting.rst
index f4d1b55f3..1fcec297b 100644
--- a/doc/fitting.rst
+++ b/doc/fitting.rst
@@ -2,7 +2,6 @@
 
 .. module:: lmfit.minimizer
 
-
 =======================================
 Performing Fits and Analyzing Outputs
 =======================================
@@ -340,12 +339,105 @@ information criterion, and/or Bayesian information criterion.  Generally,
 the Bayesian information criterion is considered the most conservative of
 these statistics.
 
-..  _fit-itercb-label:
+
+Uncertainties in Variable Parameters, and their Correlations
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+As mentioned above, when a fit is complete the uncertainties for fitted
+Parameters as well as the correlations between pairs of Parameters are
+usually calculated.  This happens automatically either when using the
+default :meth:`leastsq` method, the :meth:`least_squares` method, or for
+most other fitting methods if the highly-recommended ``numdifftools``
+package is available.  The estimated standard error (the :math:`1\sigma`
+uncertainty) for each variable Parameter will be contained in the
+:attr:`stderr`, while the :attr:`correl` attribute for each Parameter will
+contain a dictionary of the correlation with each other variable Parameter.
+
+These estimates of the uncertainties are done by inverting the Hessian
+matrix which represents the second derivative of fit quality for each
+variable parameter. There are situations for which the uncertainties cannot
+be estimated, which generally indicates that this matrix cannot be inverted
+because one of the fit is not actually sensitive to one of the variables.
+This can happen if a Parameter is stuck at an upper or lower bound, if the
+variable is simply not used by the fit, or if the value for the variable is
+such that it has no real influence on the fit.
+
+In principle, the scale of the uncertainties in the Parameters is closely
+tied to the goodness-of-fit statistics chi-square and reduced chi-square
+(``chisqr`` and ``redchi``).  The standard errors or :math:`1 \sigma`
+uncertainties are those that increase chi-square by 1.  Since a "good fit"
+should have ``redchi`` of around 1, this requires that the data
+uncertainties (and to some extent the sampling of the N data points) is
+correct. Unfortunately, it is often not the case that one has high-quality
+estimates of the data uncertainties (getting the data is hard enough!).
+Because of this common situation, the uncertainties reported and held in
+:attr:`stderr` are not those that increase chi-square by 1, but those that
+increase chi-square by reduced chi-square.  This is equivalent to rescaling
+the uncertainty in the data such that reduced chi-square would be 1.  To be
+clear, this rescaling is done by default because if reduced chi-square is
+far from 1, this rescaling often makes the reported uncertainties sensible,
+and if reduced chi-square is near 1 it does little harm.  If you have good
+scaling of the data uncertainty and believe the scale of the residual
+array is correct,  this automatic rescaling can be turned off using
+``scale_covar=False``.
+
+Note that the simple (and fast!) approach to estimating uncertainties and
+correlations by inverting the second derivative matrix assumes that the
+components of the residual array (if, indeed, an array is used) are
+distributed around 0 with a normal (Gaussian distribution), and that a map
+of probability distributions for pairs would be elliptical -- the size of
+the of ellipse gives the uncertainty itself and the eccentricity of the
+ellipse gives the correlation.  This simple approach to assessing
+uncertainties ignores outliers, highly asymmetric uncertainties, or complex
+correlations between Parameters.  In fact, it is not too hard to come up
+with problems where such effects are important.  Our experience is that the
+automated results are usually the right scale and quite reasonable as
+initial estimates, but a more thorough exploration of the Parameter space
+using the tools described in :ref:`label-emcee` and
+:ref:`label-confidence-advanced` can give a more complete understanding of
+the distributions and relations between Parameters.
+
+
+..  _fit-reports-label:
+
+Getting and Printing Fit Reports
+===========================================
+
+.. currentmodule:: lmfit.printfuncs
+
+.. autofunction:: fit_report
+
+An example using this to write out a fit report would be:
+
+.. jupyter-execute:: ../examples/doc_fitting_withreport.py
+    :hide-output:
+
+which would give as output:
+
+.. jupyter-execute::
+    :hide-code:
+
+    print(fit_report(out))
+
+To be clear, you can get at all of these values from the fit result ``out``
+and ``out.params``.  For example, a crude printout of the best fit variables
+and standard errors could be done as
+
+.. jupyter-execute::
+
+    print('-------------------------------')
+    print('Parameter    Value       Stderr')
+    for name, param in out.params.items():
+        print('{:7s} {:11.5f} {:11.5f}'.format(name, param.value, param.stderr))
 
 
+..  _fit-itercb-label:
+
 Using a Iteration Callback Function
 ====================================
 
+.. currentmodule:: lmfit.minimizer
+
 An iteration callback function is a function to be called at each
 iteration, just after the objective function is called.  The iteration
 callback allows user-supplied code to be run at each iteration, and can be
@@ -374,11 +466,14 @@ exception is raised in the iteration callback. When a fit is aborted this
 way, the parameters will have the values from the last iteration.  The fit
 statistics are not likely to be meaningful, and uncertainties will not be computed.
 
+
 ..  _fit-minimizer-label:
 
 Using the :class:`Minimizer` class
 =======================================
 
+.. currentmodule:: lmfit.minimizer
+
 For full control of the fitting process, you will want to create a
 :class:`Minimizer` object.
 
@@ -410,14 +505,31 @@ For more information, check the examples in ``examples/lmfit_brute_example.ipynb
 
 .. automethod:: Minimizer.emcee
 
+
 .. _label-emcee:
 
 :meth:`Minimizer.emcee` - calculating the posterior probability distribution of parameters
 ==============================================================================================
 
-:meth:`Minimizer.emcee` can be used to obtain the posterior probability distribution of
-parameters, given a set of experimental data. An example problem is a double
-exponential decay. A small amount of Gaussian noise is also added in:
+:meth:`Minimizer.emcee` can be used to obtain the posterior probability
+distribution of parameters, given a set of experimental data.  Note that this
+method does *not* actually perform a fit at all.  Instead, it explores
+parameter space to determine the probability distributions for the parameters,
+but without an explicit goal of attempting to refine the solution.  It should
+not be used for fitting, but it is a useful method to to more thoroughly
+explore the parameter space around the solution after a fit has been done and
+thereby get an improved understanding of the probability distribution for the
+parameters.  It may be able to refine your estimate of the most likely values
+for a set of parameters, but it will not iteratively find a good solution to
+the minimization problem.  To use this method effectively, you should first
+use another minimization method and then use this method to explore the
+parameter space around thosee best-fit values.
+
+To illustrate this, we'll use an example problem of fitting data to function
+of a double exponential decay, including a modest amount of Gaussian noise to
+the data.  Note that this example is the same problem used in
+:ref:`label-confidence-advanced` for evaluating confidence intervals in the
+parameters, which is a similar goal to the one here.
 
 .. jupyter-execute::
     :hide-code:
@@ -456,7 +568,10 @@ Create a Parameter set for the initial guesses:
         v = p.valuesdict()
         return v['a1'] * np.exp(-x / v['t1']) + v['a2'] * np.exp(-(x - 0.1) / v['t2']) - y
 
-Solving with :func:`minimize` gives the Maximum Likelihood solution:
+Solving with :func:`minimize` gives the Maximum Likelihood solution.   Note
+that we use the robust Nelder-Mead method here.  The default Levenberg-Marquardt
+method seems to have difficulty with exponential decays, though it can refine
+the solution if starting near the solution:
 
 .. jupyter-execute::
 
@@ -472,13 +587,19 @@ and plotting the fit using the Maximum Likelihood solution gives the graph below
     plt.legend(loc='best')
     plt.show()
 
-However, this doesn't give a probability distribution for the parameters.
+Note that the fit here (for which the ``numdifftools`` package is installed)
+does estimate and report uncertainties in the parameters and correlations for
+the parameters, and reports the correlation of parameters ``a2`` and ``t2`` to
+be very high. As we'll see, these estimates are pretty good, but when faced
+with such high correlation, it can be helpful to get the full probability
+distribution for the parameters.  MCMC methods are very good for this.
+
 Furthermore, we wish to deal with the data uncertainty. This is called
-marginalisation of a nuisance parameter. ``emcee`` requires a function that returns
-the log-posterior probability. The log-posterior probability is a sum of the
-log-prior probability and log-likelihood functions. The log-prior probability is
-assumed to be zero if all the parameters are within their bounds and ``-np.inf``
-if any of the parameters are outside their bounds.
+marginalisation of a nuisance parameter. ``emcee`` requires a function that
+returns the log-posterior probability. The log-posterior probability is a sum
+of the log-prior probability and log-likelihood functions. The log-prior
+probability is assumed to be zero if all the parameters are within their
+bounds and ``-np.inf`` if any of the parameters are outside their bounds.
 
 If the objective function returns an array of unweighted residuals (i.e.,
 ``data-model``) as is the case here, you can use ``is_weighted=False`` as an
@@ -490,7 +611,8 @@ place boundaries on this parameter one can do:
 
     mi.params.add('__lnsigma', value=np.log(0.1), min=np.log(0.001), max=np.log(2))
 
-Now we have to set up the minimizer and do the sampling:
+Now we have to set up the minimizer and do the sampling (again, just to be
+clear, this is *not* doing a fit):
 
 .. jupyter-execute::
     :hide-output:
@@ -498,16 +620,18 @@ Now we have to set up the minimizer and do the sampling:
     res = lmfit.minimize(residual, method='emcee', nan_policy='omit', burn=300, steps=1000, thin=20,
                          params=mi.params, is_weighted=False, progress=False)
 
-Of note, the ``is_weighted`` argument will be ignored if your objective function
-returns a float instead of an array. See the Notes in :meth:`Minimizer.emcee` for
-more information. For the documentation we set ``progress=False``; the default is to show a progress bar,
-provided that the ``tqdm`` package is installed.
+As mentioned in the Notes for :meth:`Minimizer.emcee`, the ``is_weighted``
+argument will be ignored if your objective function returns a float instead of
+an array. For the documentation we set ``progress=False``; the default is to
+print a progress bar to the Terminal if the ``tqdm`` package is installed.
 
-The performance of the method (i.e., whether or not the sampling went well) can be assessed by checking
-the integrated autocorrelation time and/or the acceptance fraction of the walkers. For this specific
-example the autocorrelation time could not be estimated because the "chain is too short". Thus, we plot
-the acceptance fraction per walker and its values (or mean value) suggests that the sampling worked as
-intended (as a rule of thumb this value should be between 0.2 and 0.5).
+The success of the method (i.e., whether or not the sampling went well) can be
+assessed by checking the integrated autocorrelation time and/or the acceptance
+fraction of the walkers. For this specific example the autocorrelation time
+could not be estimated because the "chain is too short".  Instead, we plot the
+acceptance fraction per walker and its mean value suggests that the sampling
+worked as intended (as a rule of thumb the value should be between 0.2 and
+0.5).
 
 .. jupyter-execute::
 
@@ -516,8 +640,8 @@ intended (as a rule of thumb this value should be between 0.2 and 0.5).
     plt.ylabel('acceptance fraction')
     plt.show()
 
-Lets have a look at those posterior distributions for the parameters. This requires
-installation of the ``corner`` package:
+With the results from ``emcee``, we can visualize the posterior distributions
+for the parameters using the ``corner`` package:
 
 .. jupyter-execute::
 
@@ -527,19 +651,30 @@ installation of the ``corner`` package:
                                truths=list(res.params.valuesdict().values()))
 
 The values reported in the :class:`MinimizerResult` are the medians of the
-probability distributions and a 1 :math:`\sigma` quantile, estimated as half the
-difference between the 15.8 and 84.2 percentiles. The median value is not
-necessarily the same as the Maximum Likelihood Estimate. We'll get that as well.
-You can see that we recovered the right uncertainty level on the data:
+probability distributions and a 1 :math:`\sigma` quantile, estimated as half
+the difference between the 15.8 and 84.2 percentiles.  Printing these values:
+
 
 .. jupyter-execute::
 
-    print("median of posterior probability distribution")
+    print('median of posterior probability distribution')
     print('--------------------------------------------')
     lmfit.report_fit(res.params)
 
-
-Find the Maximum Likelihood Estimation (MLE):
+You can see that this recovered the right uncertainty level on the data.  Note
+that these values agree pretty well with the results, uncertainties and
+correlations found by the fit and using ``numdifftools`` to estimate the
+covariance matrix.  That is, even though the parameters ``a2``, ``t1``, and
+``t2`` are all highly correlated and do not display perfectly Gaussian
+probability distributions, the probability distributions found by explicitly
+sampling the parameter space are not so far from elliptical as to make the
+simple (and much faster) estimates from inverting the covariance matrix
+completely invalid.
+
+As mentioned above, the result from ``emcee`` reports the median values, which
+are not necessarily the same as the Maximum Likelihood Estimate.  To obtain
+the values for the Maximum Likelihood Estimation (MLE) we find the location in
+the chain with the highest probability:
 
 .. jupyter-execute::
 
@@ -549,35 +684,40 @@ Find the Maximum Likelihood Estimation (MLE):
     for i, par in enumerate(p):
         p[par].value = mle_soln[i]
 
-    print("Maximum Likelihood Estimation")
-    print('-----------------------------')
-    for _, vals in p.items():
-        print(vals)
-
-Finally, lets work out a 1 and 2-:math:`\sigma` error estimate for 't1':
-
-.. jupyter-execute::
-
-    quantiles = np.percentile(res.flatchain['t1'], [2.28, 15.9, 50, 84.2, 97.7])
-    print("1 sigma spread", 0.5 * (quantiles[3] - quantiles[1]))
-    print("2 sigma spread", 0.5 * (quantiles[4] - quantiles[0]))
 
+    print('\nMaximum Likelihood Estimation from emcee       ')
+    print('-------------------------------------------------')
+    print('Parameter  MLE Value   Median Value   Uncertainty')
+    fmt = '  {:5s}  {:11.5f} {:11.5f}   {:11.5f}'.format
+    for name, param in p.items():
+        print(fmt(name, param.value, res.params[name].value,
+                  res.params[name].stderr))
 
-Getting and Printing Fit Reports
-===========================================
 
-.. currentmodule:: lmfit.printfuncs
+Here the difference between MLE and median value are seen to be below 0.5%,
+and well within the estimated 1-:math:`\sigma` uncertainty.
 
-.. autofunction:: fit_report
-
-An example using this to write out a fit report would be:
-
-.. jupyter-execute:: ../examples/doc_fitting_withreport.py
-    :hide-output:
-
-which would give as output:
+Finally, we can use the samples from ``emcee`` to work out the 1- and
+2-:math:`\sigma` error estimates.
 
 .. jupyter-execute::
-    :hide-code:
 
-    print(fit_report(out))
+    print('\nError Estimates from emcee    ')
+    print('------------------------------------------------------')
+    print('Parameter  -2sigma  -1sigma   median  +1sigma  +2sigma ')
+
+    for name in p.keys():
+        quantiles = np.percentile(res.flatchain[name],
+                                  [2.275, 15.865, 50, 84.135, 97.275])
+        median = quantiles[2]
+        err_m2 = quantiles[0] - median
+        err_m1 = quantiles[1] - median
+        err_p1 = quantiles[3] - median
+        err_p2 = quantiles[4] - median
+        fmt = '  {:5s}   {:8.4f} {:8.4f} {:8.4f} {:8.4f} {:8.4f}'.format
+        print(fmt(name, err_m2, err_m1, median, err_p1, err_p2))
+
+And we see that the initial estimates for the 1-:math:`\sigma` standard error
+using ``numdifftools`` was not too bad.  We'll return to this example
+problem in :ref:`label-confidence-advanced` and use a different method to
+calculate the 1- and 2-:math:`\sigma` error bars.

From 2d6ba409ea4d4ac3c7f12976c261d1a0f5771ead Mon Sep 17 00:00:00 2001
From: Matthew Newville <newville@cars.uchicago.edu>
Date: Fri, 20 Dec 2019 13:17:01 -0600
Subject: [PATCH 3/3] doc update for peak amplitude as 'area under curve', etc

---
 doc/builtin_models.rst | 33 ++++++++++++++++++++++-----------
 1 file changed, 22 insertions(+), 11 deletions(-)

diff --git a/doc/builtin_models.rst b/doc/builtin_models.rst
index 52c763d98..a4c7f70b5 100644
--- a/doc/builtin_models.rst
+++ b/doc/builtin_models.rst
@@ -34,13 +34,9 @@ All the models listed below are one-dimensional, with an independent
 variable named ``x``.  Many of these models represent a function with a
 distinct peak, and so share common features.  To maintain uniformity,
 common parameter names are used whenever possible.  Thus, most models have
-a parameter called ``amplitude`` that represents the overall height (or
-area of) a peak or function, a ``center`` parameter that represents a peak
-centroid position, and a ``sigma`` parameter that gives a characteristic
-width.  Many peak shapes also have a parameter ``fwhm`` (constrained by
-``sigma``) giving the full width at half maximum and a parameter ``height``
-(constrained by ``sigma`` and ``amplitude``) to give the maximum peak
-height.
+a parameter called ``amplitude`` that represents the overall intensity (or
+area of) a peak or function and a ``sigma`` parameter that gives a
+characteristic width.
 
 After a list of built-in models, a few examples of their use are given.
 
@@ -48,10 +44,25 @@ Peak-like models
 -------------------
 
 There are many peak-like models available.  These include
-:class:`GaussianModel`, :class:`LorentzianModel`, :class:`VoigtModel` and
-some less commonly used variations.  The :meth:`guess`
-methods for all of these make a fairly crude guess for the value of
-``amplitude``, but also set a lower bound of 0 on the value of ``sigma``.
+:class:`GaussianModel`, :class:`LorentzianModel`, :class:`VoigtModel`,
+:class:`PseudoVoigtModel`, and some less commonly used variations.  Most of
+these models are *unit-normalized* and share the same parameter names so
+that you can easily switch between models and interpret the results.  The
+``amplitude`` parameter is the multiplicative factor for the
+unit-normalized peak lineshape, and so will represent the strength of that
+peak or the area under that curve.  The ``center`` parameter will be the
+centroid ``x`` value.  The ``sigma`` parameter is the characteristic width
+of the peak, with many functions using :math:`(x-\mu)/\sigma` where
+:math:`\mu` is the centroid value.  Most of these peak functions will have
+two additional parameters derived from and constrained by the other
+parameters. The first of these is ``fwhm`` which will hold the estimated
+"Full Width at Half Max" for the peak, which is often easier to compare
+between different models than ``sigma``.  The second of these is ``height``
+which will contain the maximum value of the peak, typically the value at
+:math:`x = \mu`.  Finally, each of these models has a :meth:`guess` method
+that uses data to make a fairly crude but usually sufficient guess for the
+value of ``amplitude``, ``center``, and ``sigma``, and sets a lower bound
+of 0 on the value of ``sigma``.
 
 :class:`GaussianModel`
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~