diff --git a/examples/Sinusoid/calibrate.jl b/examples/Sinusoid/calibrate.jl
index 8de90da6..ac610e53 100644
--- a/examples/Sinusoid/calibrate.jl
+++ b/examples/Sinusoid/calibrate.jl
@@ -115,6 +115,9 @@ p = plot(
     legend = :bottomright,
     xlims = (0, 6),
     ylims = (-6, 10),
+    guidefontsize = 14,
+    tickfontsize = 12,
+    legendfontsize = 12,
 )
 vline!([theta_true[1]], color = :red, style = :dash, label = :false)
 hline!([theta_true[2]], color = :red, style = :dash, label = :false)
diff --git a/examples/Sinusoid/emulate.jl b/examples/Sinusoid/emulate.jl
index f13aee37..dcf0027c 100644
--- a/examples/Sinusoid/emulate.jl
+++ b/examples/Sinusoid/emulate.jl
@@ -14,7 +14,7 @@
 # First, we load the packages we need:
 using LinearAlgebra, Random
 
-using Distributions, Plots
+using Distributions, Plots, Plots.PlotMeasures
 using JLD2
 
 using CalibrateEmulateSample
@@ -179,7 +179,7 @@ p2 = contour(
     ylabel = "Vertical Shift",
     title = "True Sinusoid Mean",
 )
-p = plot(p1, p2, size = (600, 300), layout = (1, 2))
+p = plot(p1, p2, size = (600, 300), layout = (1, 2), guidefontsize = 12, tickfontsize = 10, legendfontsize = 10)
 savefig(p, joinpath(data_save_directory, "sinusoid_groundtruth_contours.png"))
 # The first panel shows how the range varies with respect to the two parameters in the Gaussian process
 # emulator. The contours show the range is mostly dependent on the amplitude, with little variation with
@@ -212,7 +212,17 @@ p2 = contour(
     title = "GP Sinusoid Mean",
 )
 plot!(inputs[1, :], inputs[2, :]; seriestype = :scatter, zcolor = outputs[2, :], label = :false)
-p = plot(p1, p2, size = (600, 300), layout = (1, 2))
+p = plot(
+    p1,
+    p2,
+    right_margin = 3mm,
+    bottom_margin = 3mm,
+    size = (600, 300),
+    layout = (1, 2),
+    guidefontsize = 12,
+    tickfontsize = 10,
+    legendfontsize = 10,
+)
 savefig(p, joinpath(data_save_directory, "sinusoid_GP_emulator_contours.png"))
 
 # Plot RF emulator contours
@@ -238,7 +248,17 @@ p2 = contour(
     title = "RF Sinusoid Mean",
 )
 plot!(inputs[1, :], inputs[2, :]; seriestype = :scatter, zcolor = outputs[2, :], label = :false)
-p = plot(p1, p2, size = (600, 300), layout = (1, 2))
+p = plot(
+    p1,
+    p2,
+    right_margin = 3mm,
+    bottom_margin = 3mm,
+    size = (600, 300),
+    layout = (1, 2),
+    guidefontsize = 12,
+    tickfontsize = 10,
+    legendfontsize = 10,
+)
 savefig(p, joinpath(data_save_directory, "sinusoid_RF_emulator_contours.png"))
 
 # Both the GP and RF emulator give similar results to the ground truth G(θ), indicating they are correctly
@@ -272,7 +292,17 @@ p2 = contour(
     ylabel = "Vertical Shift",
     title = "GP 1σ in Sinusoid Mean",
 )
-p = plot(p1, p2, size = (600, 300), layout = (1, 2))
+p = plot(
+    p1,
+    p2,
+    size = (600, 300),
+    layout = (1, 2),
+    right_margin = 3mm,
+    bottom_margin = 3mm,
+    guidefontsize = 12,
+    tickfontsize = 10,
+    legendfontsize = 10,
+)
 savefig(p, joinpath(data_save_directory, "sinusoid_GP_emulator_std_contours.png"))
 
 
@@ -299,7 +329,7 @@ p2 = contour(
     ylabel = "Vertical Shift",
     title = "RF 1σ in Sinusoid Mean",
 )
-p = plot(p1, p2, size = (600, 300), layout = (1, 2))
+p = plot(p1, p2, size = (600, 300), layout = (1, 2), guidefontsize = 12, tickfontsize = 10, legendfontsize = 10)
 savefig(p, joinpath(data_save_directory, "sinusoid_RF_emulator_std_contours.png"))
 # The GP and RF uncertainty predictions are similar and show lower uncertainties around the region of interest
 # where we have more training points.
@@ -331,7 +361,7 @@ p2 = contour(
     ylabel = "Vertical Shift",
     title = "GP error in Sinusoid Mean",
 )
-p = plot(p1, p2, size = (600, 300), layout = (1, 2))
+p = plot(p1, p2, size = (600, 300), layout = (1, 2), guidefontsize = 12, tickfontsize = 10, legendfontsize = 10)
 savefig(p, joinpath(data_save_directory, "sinusoid_GP_errors_contours.png"))
 
 rf_diff_grid = abs.(rf_grid - g_true_grid)
@@ -357,7 +387,7 @@ p2 = contour(
     ylabel = "Vertical Shift",
     title = "RF error in Sinusoid Mean",
 )
-p = plot(p1, p2, size = (600, 300), layout = (1, 2))
+p = plot(p1, p2, size = (600, 300), layout = (1, 2), guidefontsize = 12, tickfontsize = 10, legendfontsize = 10)
 savefig(p, joinpath(data_save_directory, "sinusoid_RF_errors_contours.png"))
 
 # Here, we want the emulator to show the low errors in the region around the true parameter values near θ = (3, 6),
diff --git a/examples/Sinusoid/sample.jl b/examples/Sinusoid/sample.jl
index fe0cdeff..e2faaa19 100644
--- a/examples/Sinusoid/sample.jl
+++ b/examples/Sinusoid/sample.jl
@@ -172,9 +172,9 @@ plot_all = plot(
     layout = layout,
     size = (600, 600),
     legend = :true,
-    guidefontsize = 8,
-    tickfontsize = 6,
-    legendfontsize = 6,
+    guidefontsize = 14,
+    tickfontsize = 12,
+    legendfontsize = 12,
 )
 
 savefig(plot_all, joinpath(data_save_directory, "sinusoid_MCMC_hist_GP.png"))
@@ -293,9 +293,9 @@ plot_all = plot(
     layout = layout,
     size = (600, 600),
     legend = :true,
-    guidefontsize = 8,
-    tickfontsize = 6,
-    legendfontsize = 6,
+    guidefontsize = 14,
+    tickfontsize = 12,
+    legendfontsize = 12,
 )
 
 savefig(plot_all, joinpath(data_save_directory, "sinusoid_MCMC_hist_RF.png"))
diff --git a/examples/Sinusoid/sinusoid_setup.jl b/examples/Sinusoid/sinusoid_setup.jl
index c6142d5a..897ae8a3 100644
--- a/examples/Sinusoid/sinusoid_setup.jl
+++ b/examples/Sinusoid/sinusoid_setup.jl
@@ -74,7 +74,15 @@ function generate_obs(amplitude_true, vert_shift_true, Γ; rng = Random.GLOBAL_R
     # Generate the "true" signal for these parameters
     signal_true = model(amplitude_true, vert_shift_true; rng = rng)
     # Plot
-    p = plot(signal_true, color = :black, linewidth = 3, label = "True signal")
+    p = plot(
+        signal_true,
+        color = :black,
+        linewidth = 3,
+        label = "True signal",
+        guidefontsize = 14,
+        tickfontsize = 12,
+        legendfontsize = 12,
+    )
 
     # We will observe properties of the signal that inform us about the amplitude and vertical 
     # position. These properties will be the range (the difference between the maximum and the minimum),
diff --git a/paper.bib b/paper.bib
index ea5bbb22..0deddb59 100644
--- a/paper.bib
+++ b/paper.bib
@@ -1,6 +1,5 @@
 @article{julia,
   doi       = {10.1137/141000671},
-  url       = {https://doi.org/10.1137%2F141000671},
   year      = 2017,
   month     = {jan},
   publisher = {Society for Industrial {\&} Applied Mathematics ({SIAM})},
@@ -17,7 +16,8 @@ @book{Sisson:2018
 isbn = {978-1-351-64346-7},
 publisher = {CRC Press},
 author = {Sisson, Scott A. and Fan, Yanan and Beaumont, Mark},
-year = {2018}
+year = {2018},
+doi = {10.1201/9781315117195}
 }
 
 @incollection{Nott:2018,
@@ -29,6 +29,7 @@ @incollection{Nott:2018
   pages   = {211-241},
   year    = {2018},
   publisher = {CRC Press},
+  doi = {10.1201/9781315117195-8}
 }
 
 @article{Cleary:2021,
@@ -38,14 +39,12 @@ @article{Cleary:2021
 pages = {109716},
 year = {2021},
 issn = {0021-9991},
-doi = {https://doi.org/10.1016/j.jcp.2020.109716},
-url = {https://www.sciencedirect.com/science/article/pii/S0021999120304903},
+doi = {10.1016/j.jcp.2020.109716},
 author = {Emmet Cleary and Alfredo Garbuno-Inigo and Shiwei Lan and Tapio Schneider and Andrew M. Stuart},
 }
 
 @article{Dunbar:2022a,
 doi = {10.21105/joss.04869},
-url = {https://doi.org/10.21105/joss.04869},
 year = {2022},
 publisher = {The Open Journal},
 volume = {7},
@@ -72,7 +71,7 @@ @mastersthesis{Hillier:2022
     school   = {Massachusetts Institute of Technology. Department of Electrical Engineering and Computer Science},
     author   = {Adeline Hillier},
     year     = {2022},
-    url = {https://hdl.handle.net/1721.1/145140}
+    doi = {1721.1/145140}
 }
 
 
@@ -82,7 +81,8 @@ @book{Rasmussen:2006
   volume={2},
   number={3},
   year={2006},
-  publisher={MIT press Cambridge, MA}
+  publisher={MIT press Cambridge, MA},
+  doi = {10.1142/S0129065704001899}
 }
 
 @article{Iglesias:2013,
@@ -93,7 +93,8 @@ @article{Iglesias:2013
   number={4},
   pages={045001},
   year={2013},
-  publisher={IOP Publishing}
+  publisher={IOP Publishing},
+  doi={10.1088/0266-5611/29/4/045001}
 }
 
 @inproceedings{Rahimi:2007,
@@ -104,7 +105,7 @@ @inproceedings{Rahimi:2007
   number={4},
   pages={5},
   year={2007},
-  organization={Citeseer}
+  url = {https://proceedings.neurips.cc/paper_files/paper/2007/file/013a006f03dbc5392effeb8f18fda755-Paper.pdf},
 }
 
 @inproceedings{Rahimi:2008,
@@ -113,7 +114,8 @@ @inproceedings{Rahimi:2008
   booktitle={2008 46th Annual Allerton Conference on Communication, Control, and Computing},
   pages={555--561},
   year={2008},
-  organization={IEEE}
+  organization={IEEE},
+  doi = {10.1109/allerton.2008.4797607}
 }
 
 @article{Liu:2022,
@@ -138,21 +140,19 @@ @article{Cotter:2013
 keywords = {algorithms, Bayesian inverse problems, Bayesian nonparametrics, Gaussian random field, MCMC},
 year = {2013},
 doi = {10.1214/13-STS421},
-URL = {https://doi.org/10.1214/13-STS421}
 }
 
 @article{Sherlock:2010,
  ISSN = {08834237},
- URL = {http://www.jstor.org/stable/41058939},
  author = {Chris Sherlock and Paul Fearnhead and Gareth O. Roberts},
  journal = {Statistical Science},
  number = {2},
  pages = {172--190},
  publisher = {Institute of Mathematical Statistics},
  title = {The Random Walk Metropolis: Linking Theory and Practice Through a Case Study},
- urldate = {2023-12-06},
  volume = {25},
- year = {2010}
+ year = {2010},
+ doi = {10.1214/10-sts327}
 }
 
 @article{Dunbar:2021,
@@ -163,7 +163,7 @@ @article{Dunbar:2021
 number = {9},
 pages = {e2020MS002454},
 keywords = {uncertainty quantification, model calibration, machine learning, general circulation model, parametric uncertainty, inverse problem},
-doi = {https://doi.org/10.1029/2020MS002454},
+doi = {10.1029/2020MS002454},
 year = {2021}
 }
 
@@ -177,7 +177,7 @@ @article{Howland:2022
 number = {3},
 pages = {e2021MS002735},
 keywords = {uncertainty quantification, Bayesian learning, GCM, seasonal cycle},
-doi = {https://doi.org/10.1029/2021MS002735},
+doi = {10.1029/2021MS002735},
 year = {2022}
 }
 
@@ -189,7 +189,7 @@ @article{Dunbar:2022b
 number = {9},
 pages = {e2022MS002997},
 keywords = {optimal design, model calibration, uncertainty quantification, general circulation model, optimal placement, machine learning},
-doi = {https://doi.org/10.1029/2022MS002997},
+doi = {10.1029/2022MS002997},
 year = {2022}
 }
 
@@ -212,12 +212,12 @@ @article{Fairbrother:2022
   journal={Journal of Statistical Software},
   volume={102},
   pages={1--36},
-  year={2022}
+  year={2022},
+  doi={10.18637/jss.v102.i01}
 }
 
 @article{Dixit:2022,
 doi = {10.21105/joss.04561},
-url = {https://doi.org/10.21105/joss.04561},
 year = {2022},
 publisher = {The Open Journal},
 volume = {7},
@@ -248,14 +248,12 @@ @article{Tankhilevich:2020
     month = {02},
     issn = {1367-4803},
     doi = {10.1093/bioinformatics/btaa078},
-    url = {https://doi.org/10.1093/bioinformatics/btaa078},
     note = {btaa078},
     eprint = {https://academic.oup.com/bioinformatics/advance-article-pdf/doi/10.1093/bioinformatics/btaa078/32353462/btaa078.pdf},
 }
 
 @article{Huang:2022,
 doi = {10.1088/1361-6420/ac99fa},
-url = {https://dx.doi.org/10.1088/1361-6420/ac99fa},
 year = {2022},
 month = {oct},
 publisher = {IOP Publishing},
@@ -271,7 +269,6 @@ @article{Mansfield:2022
         title = {Calibration and Uncertainty Quantification of a Gravity Wave Parameterization: A Case Study of the {Quasi}-{Biennial} {Oscillation} in an Intermediate Complexity Climate Model},
         volume = {14},
         issn = {1942-2466},
-        url = {https://onlinelibrary.wiley.com/doi/abs/10.1029/2022MS003245},
         doi = {10.1029/2022MS003245},
         language = {en},
         number = {11},
@@ -283,8 +280,6 @@ @article{Mansfield:2022
 @article{King:2023,
         type = {preprint},
         title = {Bayesian History Matching applied to the calibration of a gravity wave parameterization},
-        url = {https://essopenarchive.org/users/709594/articles/693996-bayesian-history-matching-applied-to-the-calibration-of-a-gravity-wave-parameterization?commit=613feab171223df3237efc786346eacd545733f1},
-urldate = {2024-01-05},
         institution = {Preprints},
         author = {King, Robert C and Mansfield, Laura A and Sheshadri, Aditi},
         month = dec,
@@ -301,12 +296,12 @@ @article{Metropolis:1953
   number={6},
   pages={1087--1092},
   year={1953},
-  publisher={American Institute of Physics}
+  publisher={American Institute of Physics},
+  doi = {10.1063/1.1699114}
 }
 
 @article{Huggins:2023,
 doi = {10.21105/joss.05428},
-url = {https://doi.org/10.21105/joss.05428},
 year = {2023},
 publisher = {The Open Journal},
 volume = {8},
@@ -317,13 +312,17 @@ @article{Huggins:2023
 journal = {Journal of Open Source Software}
 }
 
-@article{Gammal:2022,
-      title={Fast and robust Bayesian Inference using Gaussian Processes with GPry}, 
-      author={Jonas El Gammal and Nils Schöneberg and Jesús Torrado and Christian Fidler},
-      year={2022},
-      eprint={2211.02045},
-      archivePrefix={arXiv},
-      primaryClass={astro-ph.CO}
+@article{Gammal:2023,
+doi = {10.1088/1475-7516/2023/10/021},
+year = {2023},
+month = {oct},
+publisher = {IOP Publishing},
+volume = {2023},
+number = {10},
+pages = {021},
+author = {Jonas El Gammal and Nils Schöneberg and Jesús Torrado and Christian Fidler},
+title = {Fast and robust Bayesian inference using Gaussian processes with GPry},
+journal = {Journal of Cosmology and Astroparticle Physics},
 }
 
 @article{livingstone:2022,
@@ -334,7 +333,8 @@ @article{livingstone:2022
   number={2},
   pages={496--523},
   year={2022},
-  publisher={Oxford University Press}
+  publisher={Oxford University Press},
+  doi={10.1111/rssb.12482}
 }
 
 @article{hoffman:2014,
diff --git a/paper.md b/paper.md
index cd28e7bf..ade49eb5 100644
--- a/paper.md
+++ b/paper.md
@@ -20,7 +20,7 @@ authors:
     orcid: 0000-0002-2878-3874
     affiliation: 4
   - name: Andre Nogueira de Souza
-    orcid: 
+    orcid: 0000-0002-9906-7824
     affiliation: 5
   - name: Laura Anne Mansfield
     orcid: 0000-0002-6285-6045
@@ -51,27 +51,33 @@ bibliography: paper.bib
 
 # Summary
 
-A Julia language [@julia] package providing practical and modular implementation of ``Calibrate, Emulate, Sample" [@Cleary:2021], hereafter CES, an accelerated workflow for obtaining model parametric uncertainty is presented. This is also known as Bayesian inversion or uncertainty quantification. To apply CES one requires a computer model (written in any programming language) dependent on free parameters, a prior distribution encoding some prior knowledge about the distribution over the free parameters, and some data with which to constrain this prior distribution. The pipeline has three stages, most easily explained in reverse: the last stage is to draw samples (Sample) from the Bayesian posterior distribution, i.e. the prior distribution conditioned on the observed data; to accelerate and regularize this process we train statistical emulators to represent the user-provided parameter-to-data map (Emulate); the training points for these emulators are generated by the computer model, and selected adaptively around regions of high posterior mass (Calibrate). We describe CES as an accelerated workflow, as it is often able to use dramatically fewer evaluations of the computer model when compared with applying sampling algorithms, such as Markov chain Monte Carlo (MCMC), directly.
+A Julia language [@julia] package providing practical and modular implementation of ``Calibrate, Emulate, Sample" [@Cleary:2021], hereafter CES, an accelerated workflow for obtaining model parametric uncertainty is presented. This is also known as Bayesian inversion or uncertainty quantification. To apply CES one requires a computer model (written in any programming language) dependent on free parameters, a prior distribution encoding some prior knowledge about the distribution over the free parameters, and some data with which to constrain this prior distribution. The pipeline has three stages, most easily explained in reverse:
+
+1. The goal of the workflow is to draw samples (Sample) from the Bayesian posterior distribution, that is, the prior distribution conditioned on the observed data,
+2. To accelerate and regularize sampling we train statistical emulators to represent the user-provided parameter-to-data map (Emulate),
+3. The training points for these emulators are generated by the computer model, and selected adaptively around regions of high posterior mass (Calibrate).
+
+We describe CES as an accelerated workflow, as it is often able to use dramatically fewer evaluations of the computer model when compared with applying sampling algorithms, such as Markov chain Monte Carlo (MCMC), directly.
 
 * Calibration tools: We recommend choosing adaptive training points with Ensemble Kalman methods such as EKI [@Iglesias:2013] and its variants [@Huang:2022]; and CES provides explicit utilities from the codebase EnsembleKalmanProcesses.jl [@Dunbar:2022a].
 * Emulation tools: CES integrates any statistical emulator, currently implemented are Gaussian Processes (GP) [@Rasmussen:2006], explicitly provided through packages SciKitLearn.jl [@scikit-learn] and GaussianProcesses.jl [@Fairbrother:2022], and Random Features [@Rahimi:2007;@Rahimi:2008;@Liu:2022], explicitly provided through [RandomFeatures.jl](https://doi.org/10.5281/zenodo.7141158) that can provide additional flexibility and scalability, particularly in higher dimensions.
-* Sampling tools: The regularized and accelerated sampling problem is solved with MCMC, and CES provides the variants of Random Walk Metropolis [@Metropolis:1953;@Sherlock:2010], and preconditioned Crank-Nicholson [@Cotter:2013], using APIs from [Turing.jl](https://turinglang.org/). Some regular emulator mean functions are differentiable, and including accelerations of derivative-based MCMC into CES (e.g. NUTS [@hoffman:2014], Barker [@livingstone:2022]) is an active direction of work. 
+* Sampling tools: The regularized and accelerated sampling problem is solved with MCMC, and CES provides the variants of Random Walk Metropolis [@Metropolis:1953;@Sherlock:2010], and preconditioned Crank-Nicholson [@Cotter:2013], using APIs from [Turing.jl](https://turinglang.org/). Some regular emulator mean functions are differentiable, and including accelerations of derivative-based MCMC into CES, [e.g., NUTS, @hoffman:2014; Barker, @livingstone:2022]; is an active direction of work. 
 
 To highlight code accessibility, we also provide a suite of detailed scientifically-inspired examples, with documentation that walks users through some use cases. Such use cases not only demonstrate the capability of the CES pipeline, but also teach users about typical interface and workflow experience.
 
 
 # Statement of need
 
-Computationally expensive computer codes for predictive modelling are ubiquitous across science and engineering disciplines. Free parameter values that exist within these modelling frameworks are typically constrained by observations to produce accurate and robust predictions about the system they are approximating numerically. In a Bayesian setting, this is viewed as evolving an initial parameter distribution (based on prior information) with the input of observed data, to a more informative data-consistent distribution (posterior). Unfortunately, this task is intensely computationally expensive, commonly requiring over $10^5$ evaluations of the expensive computer code (e.g. Random Walk Metropolis), with accelerations relying on intrusive model information, such as a derivative of the parameter-to-data map. CES is able to approximate and accelerate this process in a non-intrusive fashion and requiring only on the order of $10^2$ evaluations of the original computer model. This opens the doors for quantifying parametric uncertainty for a class of numerically intensive computer codes that has previously been unavailable.
+Computationally expensive computer codes for predictive modelling are ubiquitous across science and engineering disciplines. Free parameter values that exist within these modelling frameworks are typically constrained by observations to produce accurate and robust predictions about the system they are approximating numerically. In a Bayesian setting, this is viewed as evolving an initial parameter distribution (based on prior information) with the input of observed data, to a more informative data-consistent distribution (posterior). Unfortunately, this task is intensely computationally expensive, commonly requiring over $10^5$ evaluations of the expensive computer code (e.g., Random Walk Metropolis), with accelerations relying on intrusive model information, such as a derivative of the parameter-to-data map. CES is able to approximate and accelerate this process in a non-intrusive fashion and requiring only on the order of $10^2$ evaluations of the original computer model. This opens the doors for quantifying parametric uncertainty for a class of numerically intensive computer codes that has previously been unavailable.
 
 
 # State of the field
 
-In Julia there are a few tools for performing non-accelerated uncertainty quantification, from classical sensitivity analysis approaches, e.g., [UncertaintyQuantification.jl](https://zenodo.org/records/10149017), GlobalSensitivity.jl [@Dixit:2022], and MCMC, e.g., [Mamba.jl](https://github.com/brian-j-smith/Mamba.jl) or [Turing.jl](https://turinglang.org/). For computational efficiency, ensemble methods also provide approximate sampling (e.g., the Ensemble Kalman Sampler [@Garbuno-Inigo:2020b;@Dunbar:2022a]) though these only provide Gaussian approximations of the posterior.
+In Julia there are a few tools for performing non-accelerated uncertainty quantification, from classical sensitivity analysis approaches, for example, [UncertaintyQuantification.jl](https://zenodo.org/records/10149017), GlobalSensitivity.jl [@Dixit:2022], and MCMC, for example, [Mamba.jl](https://github.com/brian-j-smith/Mamba.jl) or [Turing.jl](https://turinglang.org/). For computational efficiency, ensemble methods also provide approximate sampling, [e.g., the Ensemble Kalman Sampler @Garbuno-Inigo:2020b;@Dunbar:2022a], though these only provide Gaussian approximations of the posterior.
 
-Accelerated uncertainty quantification tools also exist for the related approach of Approximate Bayesian Computation (ABC), e.g., GpABC [@Tankhilevich:2020] or [ApproxBayes.jl](https://github.com/marcjwilliams1/ApproxBayes.jl?tab=readme-ov-file); these tools both approximately sample from the posterior distribution. In ABC, this approximation comes from bypassing the likelihood that is usually required in sampling methods, such as MCMC. Instead, the goal of ABC is to replace the likelihood with a scalar-valued sampling objective that compares model and data. In CES, the approximation comes from learning the parameter-to-data map, then following this it calculates an explicit likelihood and uses exact sampling via MCMC. Some ABC algorithms also make use of statistical emulators to further accelerate sampling (GpABC). Although flexible, ABC encounters challenges due to the subjectivity of summary statistics and distance metrics, that may lead to approximation errors particularly in high-dimensional settings [@Nott:2018]. CES is more restrictive due to use of an explicit Gaussian likelihood, but also leverages this structure to deal with high dimensional data.
+Accelerated uncertainty quantification tools also exist for the related approach of Approximate Bayesian Computation (ABC), for example, GpABC [@Tankhilevich:2020] or [ApproxBayes.jl](https://github.com/marcjwilliams1/ApproxBayes.jl?tab=readme-ov-file); these tools both approximately sample from the posterior distribution. In ABC, this approximation comes from bypassing the likelihood that is usually required in sampling methods, such as MCMC. Instead, the goal of ABC is to replace the likelihood with a scalar-valued sampling objective that compares model and data. In CES, the approximation comes from learning the parameter-to-data map, then following this it calculates an explicit likelihood and uses exact sampling via MCMC. Some ABC algorithms also make use of statistical emulators to further accelerate sampling (GpABC). Although flexible, ABC encounters challenges due to the subjectivity of summary statistics and distance metrics, that may lead to approximation errors particularly in high-dimensional settings [@Nott:2018]. CES is more restrictive due to use of an explicit Gaussian likelihood, but also leverages this structure to deal with high dimensional data.
 
-Several other tools are available in other languages for a purpose of accelerated learning of the posterior distribution or posterior sampling. Two such examples, written in Python, approximate the log-posterior distribution directly with a Gaussian process: [PyVBMC](https://github.com/acerbilab/pyvbmc) [@Huggins:2023] additionaly uses variational approximations to calculate the normalization constant, and [GPry](https://github.com/jonaselgammal/GPry) [@Gammal:2022], which iteratively trains the GP with an active training point selection algorithm. Such algorithms are distinct from CES, which approximates the parameter-to-data map with the Gaussian process, and advocates ensemble Kalman methods to select training points. 
+Several other tools are available in other languages for a purpose of accelerated learning of the posterior distribution or posterior sampling. Two such examples, written in Python, approximate the log-posterior distribution directly with a Gaussian process: [PyVBMC](https://github.com/acerbilab/pyvbmc) [@Huggins:2023] additionaly uses variational approximations to calculate the normalization constant, and [GPry](https://github.com/jonaselgammal/GPry) [@Gammal:2023], which iteratively trains the GP with an active training point selection algorithm. Such algorithms are distinct from CES, which approximates the parameter-to-data map with the Gaussian process, and advocates ensemble Kalman methods to select training points. 
 
 # A simple example from the code documentation
 
@@ -158,11 +164,16 @@ chain = MC.sample(
 
 A histogram of the samples from the CES algorithm is displayed in \autoref{fig:GP_2d_posterior}. We see that the posterior distribution contains the true value $(3.0, 7.0)$ with high probability.
 
-
-
-
 # Research projects using the package
-Some research projects that use this codebase, or modifications of it, are [@Bieli:2022;@Hillier:2022;@Dunbar:2021;@Howland:2022;@Dunbar:2022b;@Mansfield:2022;@King:2023].
+Some research projects that use this codebase, or modifications of it, are
+
+* [@Dunbar:2021]
+* [@Bieli:2022]
+* [@Hillier:2022]
+* [@Howland:2022]
+* [@Dunbar:2022b]
+* [@Mansfield:2022]
+* [@King:2023]
 
 # Acknowledgements
 
diff --git a/paper.pdf b/paper.pdf
index 3d873040..0779883b 100644
Binary files a/paper.pdf and b/paper.pdf differ
diff --git a/sinusoid_GP_emulator_contours.png b/sinusoid_GP_emulator_contours.png
index f16d4e08..8eb67bf4 100644
Binary files a/sinusoid_GP_emulator_contours.png and b/sinusoid_GP_emulator_contours.png differ
diff --git a/sinusoid_MCMC_hist_GP.png b/sinusoid_MCMC_hist_GP.png
index 3f87b482..10e1f7ef 100644
Binary files a/sinusoid_MCMC_hist_GP.png and b/sinusoid_MCMC_hist_GP.png differ
diff --git a/sinusoid_eki_pairs.png b/sinusoid_eki_pairs.png
index 451d3199..229f75d1 100644
Binary files a/sinusoid_eki_pairs.png and b/sinusoid_eki_pairs.png differ
diff --git a/sinusoid_true_vs_observed_signal.png b/sinusoid_true_vs_observed_signal.png
index c01062fd..d143ac7c 100644
Binary files a/sinusoid_true_vs_observed_signal.png and b/sinusoid_true_vs_observed_signal.png differ