diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 33ec93d7b..890bf831c 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -1,5 +1,8 @@ +agents: + queue: new-central + modules: climacommon/2024_03_18 + env: - JULIA_VERSION: "1.9.2" OPENBLAS_NUM_THREADS: 1 GKSwstype: nul @@ -9,22 +12,17 @@ steps: command: - "echo $$JULIA_DEPOT_PATH" - "julia --project -e 'using Pkg; Pkg.instantiate(;verbose=true)'" - - "julia --project -e 'using Conda; Conda.add(\"scipy=1.8.1\", channel=\"conda-forge\")'" - - "julia --project -e 'using Conda; Conda.add(\"scikit-learn=1.1.1\")'" - - "julia --project -e 'using Conda; Conda.add(\"matplotlib=3.7.1\")'" + - "julia --project -e 'using Conda; Conda.add(\"scipy=1.14.1\", channel=\"conda-forge\")'" + - "julia --project -e 'using Conda; Conda.add(\"scikit-learn=1.5.1\")'" env: PYTHON: "" artifact_paths: - "*.toml" - agents: - config: cpu - queue: central - slurm_ntasks: 1 - + - wait - - label: "Lorenz" + - label: "CES - Lorenz" key: "lorenz" command: | export PYTHON="$$JULIA_DEPOT_PATH/conda/3/x86_64/bin/python" @@ -47,8 +45,32 @@ steps: artifact_paths: - "examples/Lorenz/output/*.png" - agents: - config: cpu - queue: central - slurm_ntasks: 1 + + - label: "Emulator - regression 2D" + key: "emulator_compare" + command: | + export PYTHON="$$JULIA_DEPOT_PATH/conda/3/x86_64/bin/python" + export PYTHONHOME="$$JULIA_DEPOT_PATH/conda/3/x86_64/bin" + export CONDA_JL_HOME="$$JULIA_DEPOT_PATH/conda/3/x86_64/" + + mkdir examples/Emulator/Regression_2d_2d/depot + export JULIA_DEPOT_PATH="$$(pwd)/examples/Emulator/Regression_2d_2d/depot:$JULIA_DEPOT_PATH" + + julia --color=yes --project=examples/Emulator/Regression_2d_2d -e ' + println("--- Developing Project") + using Pkg; + Pkg.develop(path=".") + Pkg.update() + println("--- Instantiating Project") + Pkg.instantiate() + println("+++ Running compare_regression") + include("examples/Emulator/Regression_2d_2d/compare_regression.jl")' + artifact_paths: + - "examples/Emulator/Regression_2d_2d/output/*.png" + env: + PYTHON: "$$JULIA_DEPOT_PATH/conda/3/bin/python" + PYTHONHOME: "$$JULIA_DEPOT_PATH/conda/3/bin" + CONDA_JL_HOME: "$$JULIA_DEPOT_PATH/conda/3" + + diff --git a/.github/workflows/Docs.yml b/.github/workflows/Docs.yml index 6239a0de8..76d2e62f5 100644 --- a/.github/workflows/Docs.yml +++ b/.github/workflows/Docs.yml @@ -36,8 +36,8 @@ jobs: PYTHON: "" run: | julia --color=yes --project -e 'using Pkg; Pkg.instantiate()' - julia --project -e 'using Conda; Conda.add("scipy=1.8.1")' - julia --color=yes --project -e 'using Conda; Conda.add("scikit-learn=1.1.1")' + julia --project -e 'using Conda; Conda.add("scipy=1.14.1")' + julia --color=yes --project -e 'using Conda; Conda.add("scikit-learn=1.5.1")' julia --color=yes --project=docs/ -e 'using Pkg; Pkg.instantiate()' julia --color=yes --project=docs/ -e 'using Pkg; Pkg.precompile()' - name: Build and deploy diff --git a/.github/workflows/JuliaFormatter.yml b/.github/workflows/JuliaFormatter.yml index 9c4b59c83..301c30052 100644 --- a/.github/workflows/JuliaFormatter.yml +++ b/.github/workflows/JuliaFormatter.yml @@ -31,7 +31,7 @@ jobs: - uses: julia-actions/setup-julia@latest if: steps.filter.outputs.julia_file_change == 'true' with: - version: 1.7 + version: 1 - name: Apply JuliaFormatter if: steps.filter.outputs.julia_file_change == 'true' diff --git a/.github/workflows/Tests.yml b/.github/workflows/Tests.yml index 2a4a39f15..c0805ddeb 100644 --- a/.github/workflows/Tests.yml +++ b/.github/workflows/Tests.yml @@ -25,8 +25,8 @@ jobs: - name: Set up Julia uses: julia-actions/setup-julia@v1 with: - version: '1' # Latest 1.x release of julia - + version: 1 + - name: Install Julia Project Packages # we add this ENV varaible to force PyCall to download and use Conda rather than # the system python (default on Linux), see the PyCall documentation @@ -39,8 +39,8 @@ jobs: env: PYTHON: "" run: | - julia --project -e 'using Conda; Conda.add("scipy=1.11.4")' - julia --project -e 'using Conda; Conda.add("scikit-learn=1.3.2")' + julia --project -e 'using Conda; Conda.add("scipy=1.14.1")' + julia --project -e 'using Conda; Conda.add("scikit-learn=1.5.1")' - name: Run Unit Tests env: @@ -80,10 +80,9 @@ jobs: - name: Set up Julia uses: julia-actions/setup-julia@v1 with: - version: '1' # Latest 1.x release of julia - + version: 1 - - name: Install Julia Project Packages + - name: Install Julia Project Packages env: PYTHON: "" run: | @@ -93,8 +92,8 @@ jobs: env: PYTHON: "" run: | - julia --project -e 'using Conda; Conda.add("scipy=1.11.4")' - julia --project -e 'using Conda; Conda.add("scikit-learn=1.3.2")' + julia --project -e 'using Conda; Conda.add("scipy=1.14.1")' + julia --project -e 'using Conda; Conda.add("scikit-learn=1.5.1")' - name: Run Unit Tests env: @@ -121,7 +120,7 @@ jobs: - name: Set up Julia uses: julia-actions/setup-julia@v1 with: - version: '1' # Latest 1.x release of julia + version: 1 - name: Install Julia Project Packages env: @@ -133,8 +132,8 @@ jobs: env: PYTHON: "" run: | - julia --project -e 'using Conda; Conda.add(\"scipy=1.11.4\")' - julia --project -e 'using Conda; Conda.add(\"scikit-learn=1.3.2\")' + julia --project -e 'using Conda; Conda.add("scipy=1.14.1")' + julia --project -e 'using Conda; Conda.add("scikit-learn=1.5.1")' - name: Run Unit Tests env: diff --git a/JOSS/paper.bib b/JOSS/paper.bib new file mode 100644 index 000000000..f52e1a134 --- /dev/null +++ b/JOSS/paper.bib @@ -0,0 +1,347 @@ +@article{julia, + doi = {10.1137/141000671}, + year = 2017, + month = {jan}, + publisher = {Society for Industrial {\&} Applied Mathematics ({SIAM})}, + volume = {59}, + number = {1}, + pages = {65--98}, + author = {Jeff Bezanson and Alan Edelman and Stefan Karpinski and Viral B. Shah}, + title = {Julia: A Fresh Approach to Numerical Computing}, + journal = {{SIAM} Review} +} + +@book{Sisson:2018, +title = {Handbook of {Approximate} {Bayesian} {Computation}}, +isbn = {978-1-351-64346-7}, +publisher = {CRC Press}, +author = {Sisson, Scott A. and Fan, Yanan and Beaumont, Mark}, +year = {2018}, +doi = {10.1201/9781315117195} +} + +@incollection{Nott:2018, + author = {Nott, David J. and Ong, Victor M.-H. and Fan, Y. and Sisson, S. A.}, + title = {High-{Dimensional} {ABC}}, + booktitle = {Handbook of {Approximate} {Bayesian} {Computation}}, + isbn = {978-1-315-11719-5}, + chapter = {8}, + pages = {211-241}, + year = {2018}, + publisher = {CRC Press}, + doi = {10.1201/9781315117195-8} +} + +@article{Cleary:2021, +title = {Calibrate, emulate, sample}, +journal = {Journal of Computational Physics}, +volume = {424}, +pages = {109716}, +year = {2021}, +issn = {0021-9991}, +doi = {10.1016/j.jcp.2020.109716}, +author = {Emmet Cleary and Alfredo Garbuno-Inigo and Shiwei Lan and Tapio Schneider and Andrew M. Stuart}, +} + +@article{Dunbar:2022a, +doi = {10.21105/joss.04869}, +year = {2022}, +publisher = {The Open Journal}, +volume = {7}, +number = {80}, +pages = {4869}, +author = {Dunbar, Oliver R. A. and Lopez-Gomez, Ignacio and Garbuno-Iñigo, Alfredo Garbuno-Iñigo and Huang, Daniel Zhengyu and Bach, Eviatar and Wu, Jin-long}, +title = {EnsembleKalmanProcesses.jl: Derivative-free ensemble-based model calibration}, +journal = {Journal of Open Source Software}, +} + +@article{Bieli:2022, +author = {Bieli, Melanie and Dunbar, Oliver R. A. and de Jong, Emily K. and Jaruga, Anna and Schneider, Tapio and Bischoff, Tobias}, +title = {An Efficient {Bayesian} Approach to Learning Droplet Collision Kernels: Proof of Concept Using “{Cloudy},” a New n-Moment Bulk Microphysics Scheme}, +journal = {Journal of Advances in Modeling Earth Systems}, +volume = {14}, +number = {8}, +pages = {e2022MS002994}, +doi = {10.1029/2022MS002994}, +year = {2022} +} + +@mastersthesis{Hillier:2022, + title = {Supervised Calibration and Uncertainty Quantification of Subgrid Closure Parameters using Ensemble {Kalman} Inversion}, + school = {Massachusetts Institute of Technology. Department of Electrical Engineering and Computer Science}, + author = {Adeline Hillier}, + year = {2022} +} + + +@book{Rasmussen:2006, + title={Gaussian processes for machine learning}, + author={Williams, Christopher KI and Rasmussen, Carl Edward}, + volume={2}, + number={3}, + year={2006}, + publisher={MIT press Cambridge, MA}, + doi = {10.1142/S0129065704001899} +} + +@article{Iglesias:2013, + title={Ensemble Kalman methods for inverse problems}, + author={Iglesias, Marco A and Law, Kody JH and Stuart, Andrew M}, + journal={Inverse Problems}, + volume={29}, + number={4}, + pages={045001}, + year={2013}, + publisher={IOP Publishing}, + doi={10.1088/0266-5611/29/4/045001} +} + +@inproceedings{Rahimi:2007, + title={Random Features for Large-Scale Kernel Machines.}, + author={Rahimi, Ali and Recht, Benjamin and others}, + booktitle={NIPS}, + volume={3}, + number={4}, + pages={5}, + year={2007}, + url = {https://proceedings.neurips.cc/paper_files/paper/2007/file/013a006f03dbc5392effeb8f18fda755-Paper.pdf}, +} + +@inproceedings{Rahimi:2008, + title={Uniform approximation of functions with random bases}, + author={Rahimi, Ali and Recht, Benjamin}, + booktitle={2008 46th Annual Allerton Conference on Communication, Control, and Computing}, + pages={555--561}, + year={2008}, + organization={IEEE}, + doi = {10.1109/allerton.2008.4797607} +} + +@article{Liu:2022, + author={Liu, Fanghui and Huang, Xiaolin and Chen, Yudong and Suykens, Johan A. K.}, + journal={IEEE Transactions on Pattern Analysis and Machine Intelligence}, + title={Random Features for Kernel Approximation: A Survey on Algorithms, Theory, and Beyond}, + year={2022}, + volume={44}, + number={10}, + pages={7128-7148}, + doi={10.1109/TPAMI.2021.3097011}, +} + +@article{Cotter:2013, +author = {S. L. Cotter and G. O. Roberts and A. M. Stuart and D. White}, +title = {{MCMC Methods for Functions: Modifying Old Algorithms to Make Them Faster}}, +volume = {28}, +journal = {Statistical Science}, +number = {3}, +publisher = {Institute of Mathematical Statistics}, +pages = {424 -- 446}, +keywords = {algorithms, Bayesian inverse problems, Bayesian nonparametrics, Gaussian random field, MCMC}, +year = {2013}, +doi = {10.1214/13-STS421}, +} + +@article{Sherlock:2010, + ISSN = {08834237}, + author = {Chris Sherlock and Paul Fearnhead and Gareth O. Roberts}, + journal = {Statistical Science}, + number = {2}, + pages = {172--190}, + publisher = {Institute of Mathematical Statistics}, + title = {The Random Walk Metropolis: Linking Theory and Practice Through a Case Study}, + volume = {25}, + year = {2010}, + doi = {10.1214/10-sts327} +} + +@article{Dunbar:2021, +author = {Dunbar, Oliver R. A. and Garbuno-Inigo, Alfredo and Schneider, Tapio and Stuart, Andrew M.}, +title = {Calibration and Uncertainty Quantification of Convective Parameters in an Idealized GCM}, +journal = {Journal of Advances in Modeling Earth Systems}, +volume = {13}, +number = {9}, +pages = {e2020MS002454}, +keywords = {uncertainty quantification, model calibration, machine learning, general circulation model, parametric uncertainty, inverse problem}, +doi = {10.1029/2020MS002454}, +year = {2021} +} + + + +@article{Howland:2022, +author = {Howland, Michael F. and Dunbar, Oliver R. A. and Schneider, Tapio}, +title = {Parameter Uncertainty Quantification in an Idealized GCM With a Seasonal Cycle}, +journal = {Journal of Advances in Modeling Earth Systems}, +volume = {14}, +number = {3}, +pages = {e2021MS002735}, +keywords = {uncertainty quantification, Bayesian learning, GCM, seasonal cycle}, +doi = {10.1029/2021MS002735}, +year = {2022} +} + +@article{Dunbar:2022b, +author = {Dunbar, Oliver R. A. and Howland, Michael F. and Schneider, Tapio and Stuart, Andrew M.}, +title = {Ensemble-Based Experimental Design for Targeting Data Acquisition to Inform Climate Models}, +journal = {Journal of Advances in Modeling Earth Systems}, +volume = {14}, +number = {9}, +pages = {e2022MS002997}, +keywords = {optimal design, model calibration, uncertainty quantification, general circulation model, optimal placement, machine learning}, +doi = {10.1029/2022MS002997}, +year = {2022} +} + + +@article{scikit-learn, + title={Scikit-learn: Machine Learning in {P}ython}, + author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V. + and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P. + and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and + Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.}, + journal={Journal of Machine Learning Research}, + volume={12}, + pages={2825--2830}, + year={2011} +} + +@article{Fairbrother:2022, + title={GaussianProcesses. jl: A Nonparametric Bayes Package for the Julia Language}, + author={Fairbrother, Jamie and Nemeth, Christopher and Rischard, Maxime and Brea, Johanni and Pinder, Thomas}, + journal={Journal of Statistical Software}, + volume={102}, + pages={1--36}, + year={2022}, + doi={10.18637/jss.v102.i01} +} + +@article{Dixit:2022, +doi = {10.21105/joss.04561}, +year = {2022}, +publisher = {The Open Journal}, +volume = {7}, +number = {76}, +pages = {4561}, +author = {Vaibhav Kumar Dixit and Christopher Rackauckas}, +title = {GlobalSensitivity.jl: Performant and Parallel Global Sensitivity Analysis with Julia}, +journal = {Journal of Open Source Software} +} + +@article{Garbuno-Inigo:2020b, + title={Affine invariant interacting {Langevin} dynamics for {Bayesian} inference}, + author={Garbuno-Inigo, Alfredo and Nüsken, Nikolas and Reich, Sebastian}, + journal={SIAM Journal on Applied Dynamical Systems}, + volume={19}, + number={3}, + pages={1633--1658}, + year={2020}, + publisher={SIAM}, + doi={10.1137/19M1304891} +} + +@article{Tankhilevich:2020, + author = {Tankhilevich, Evgeny and Ish-Horowicz, Jonathan and Hameed, Tara and Roesch, Elisabeth and Kleijn, Istvan and Stumpf, Michael P H and He, Fei}, + title = "{GpABC: a Julia package for approximate Bayesian computation with Gaussian process emulation}", + journal = {Bioinformatics}, + year = {2020}, + month = {02}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btaa078}, + note = {btaa078}, + eprint = {https://academic.oup.com/bioinformatics/advance-article-pdf/doi/10.1093/bioinformatics/btaa078/32353462/btaa078.pdf}, +} + +@article{Huang:2022, +doi = {10.1088/1361-6420/ac99fa}, +year = {2022}, +month = {oct}, +publisher = {IOP Publishing}, +volume = {38}, +number = {12}, +pages = {125006}, +author = {Daniel Zhengyu Huang and Jiaoyang Huang and Sebastian Reich and Andrew M Stuart}, +title = {Efficient derivative-free Bayesian inference for large-scale inverse problems}, +journal = {Inverse Problems}, +} + +@article{Mansfield:2022, + title = {Calibration and Uncertainty Quantification of a Gravity Wave Parameterization: A Case Study of the {Quasi}-{Biennial} {Oscillation} in an Intermediate Complexity Climate Model}, + volume = {14}, + issn = {1942-2466}, + doi = {10.1029/2022MS003245}, + language = {en}, + number = {11}, + journal = {Journal of Advances in Modeling Earth Systems}, + author = {Mansfield, L. A. and Sheshadri, A.}, + year = {2022}, +} + +@article{King:2023, + type = {preprint}, + title = {Bayesian History Matching applied to the calibration of a gravity wave parameterization}, + institution = {Preprints}, + author = {King, Robert C and Mansfield, Laura A and Sheshadri, Aditi}, + month = dec, + year = {2023}, + doi = {10.22541/essoar.170365299.96491153/v1}, +} + + +@article{Metropolis:1953, + title={Equation of state calculations by fast computing machines}, + author={Metropolis, Nicholas and Rosenbluth, Arianna W and Rosenbluth, Marshall N and Teller, Augusta H and Teller, Edward}, + journal={The journal of chemical physics}, + volume={21}, + number={6}, + pages={1087--1092}, + year={1953}, + publisher={American Institute of Physics}, + doi = {10.1063/1.1699114} +} + +@article{Huggins:2023, +doi = {10.21105/joss.05428}, +year = {2023}, +publisher = {The Open Journal}, +volume = {8}, +number = {86}, +pages = {5428}, +author = {Bobby Huggins and Chengkun Li and Marlon Tobaben and Mikko J. Aarnos and Luigi Acerbi}, +title = {PyVBMC: Efficient Bayesian inference in Python}, +journal = {Journal of Open Source Software} +} + +@article{Gammal:2023, +doi = {10.1088/1475-7516/2023/10/021}, +year = {2023}, +month = {oct}, +publisher = {IOP Publishing}, +volume = {2023}, +number = {10}, +pages = {021}, +author = {Jonas El Gammal and Nils Schöneberg and Jesús Torrado and Christian Fidler}, +title = {Fast and robust Bayesian inference using Gaussian processes with GPry}, +journal = {Journal of Cosmology and Astroparticle Physics}, +} + +@article{livingstone:2022, + title={The Barker proposal: combining robustness and efficiency in gradient-based MCMC}, + author={Livingstone, Samuel and Zanella, Giacomo}, + journal={Journal of the Royal Statistical Society Series B: Statistical Methodology}, + volume={84}, + number={2}, + pages={496--523}, + year={2022}, + publisher={Oxford University Press}, + doi={10.1111/rssb.12482} +} + +@article{hoffman:2014, + title={The No-U-Turn sampler: adaptively setting path lengths in Hamiltonian Monte Carlo.}, + author={Hoffman, Matthew D and Gelman, Andrew and others}, + journal={J. Mach. Learn. Res.}, + volume={15}, + number={1}, + pages={1593--1623}, + year={2014} +} diff --git a/JOSS/paper.md b/JOSS/paper.md new file mode 100644 index 000000000..ade49eb5e --- /dev/null +++ b/JOSS/paper.md @@ -0,0 +1,183 @@ +--- +title: 'CalibrateEmulateSample.jl: Accelerated Parametric Uncertainty Quantification' +tags: + - machine learning + - optimization + - bayesian + - data assimilation +authors: + - name: Oliver R. A. Dunbar + corresponding: true + orcid: 0000-0001-7374-0382 + affiliation: 1 + - name: Melanie Bieli + orcid: + affiliation: 2 + - name: Alfredo Garbuno-Iñigo + orcid: 0000-0003-3279-619X + affiliation: 3 + - name: Michael Howland + orcid: 0000-0002-2878-3874 + affiliation: 4 + - name: Andre Nogueira de Souza + orcid: 0000-0002-9906-7824 + affiliation: 5 + - name: Laura Anne Mansfield + orcid: 0000-0002-6285-6045 + affiliation: 6 + - name: Gregory L. Wagner + orcid: 0000-0001-5317-2445 + affiliation: 5 + - name: N. Efrat-Henrici + affiliation: 1 + +affiliations: + - name: Geological and Planetary Sciences, California Institute of Technology + index: 1 + - name: Swiss Re Ltd. + index: 2 + - name: Department of Statistics, Mexico Autonomous Institute of Technology + index: 3 + - name: Civil and Environmental Engineering, Massachusetts Institute of Technology + index: 4 + - name: Earth, Atmospheric, and Planetary Sciences, Massachusetts Institute of Technology + index: 5 + - name: Earth System Science, Doerr School of Sustainability, Stanford University + index: 6 + +date: 2 January 2024 +bibliography: paper.bib +--- + +# Summary + +A Julia language [@julia] package providing practical and modular implementation of ``Calibrate, Emulate, Sample" [@Cleary:2021], hereafter CES, an accelerated workflow for obtaining model parametric uncertainty is presented. This is also known as Bayesian inversion or uncertainty quantification. To apply CES one requires a computer model (written in any programming language) dependent on free parameters, a prior distribution encoding some prior knowledge about the distribution over the free parameters, and some data with which to constrain this prior distribution. The pipeline has three stages, most easily explained in reverse: + +1. The goal of the workflow is to draw samples (Sample) from the Bayesian posterior distribution, that is, the prior distribution conditioned on the observed data, +2. To accelerate and regularize sampling we train statistical emulators to represent the user-provided parameter-to-data map (Emulate), +3. The training points for these emulators are generated by the computer model, and selected adaptively around regions of high posterior mass (Calibrate). + +We describe CES as an accelerated workflow, as it is often able to use dramatically fewer evaluations of the computer model when compared with applying sampling algorithms, such as Markov chain Monte Carlo (MCMC), directly. + +* Calibration tools: We recommend choosing adaptive training points with Ensemble Kalman methods such as EKI [@Iglesias:2013] and its variants [@Huang:2022]; and CES provides explicit utilities from the codebase EnsembleKalmanProcesses.jl [@Dunbar:2022a]. +* Emulation tools: CES integrates any statistical emulator, currently implemented are Gaussian Processes (GP) [@Rasmussen:2006], explicitly provided through packages SciKitLearn.jl [@scikit-learn] and GaussianProcesses.jl [@Fairbrother:2022], and Random Features [@Rahimi:2007;@Rahimi:2008;@Liu:2022], explicitly provided through [RandomFeatures.jl](https://doi.org/10.5281/zenodo.7141158) that can provide additional flexibility and scalability, particularly in higher dimensions. +* Sampling tools: The regularized and accelerated sampling problem is solved with MCMC, and CES provides the variants of Random Walk Metropolis [@Metropolis:1953;@Sherlock:2010], and preconditioned Crank-Nicholson [@Cotter:2013], using APIs from [Turing.jl](https://turinglang.org/). Some regular emulator mean functions are differentiable, and including accelerations of derivative-based MCMC into CES, [e.g., NUTS, @hoffman:2014; Barker, @livingstone:2022]; is an active direction of work. + +To highlight code accessibility, we also provide a suite of detailed scientifically-inspired examples, with documentation that walks users through some use cases. Such use cases not only demonstrate the capability of the CES pipeline, but also teach users about typical interface and workflow experience. + + +# Statement of need + +Computationally expensive computer codes for predictive modelling are ubiquitous across science and engineering disciplines. Free parameter values that exist within these modelling frameworks are typically constrained by observations to produce accurate and robust predictions about the system they are approximating numerically. In a Bayesian setting, this is viewed as evolving an initial parameter distribution (based on prior information) with the input of observed data, to a more informative data-consistent distribution (posterior). Unfortunately, this task is intensely computationally expensive, commonly requiring over $10^5$ evaluations of the expensive computer code (e.g., Random Walk Metropolis), with accelerations relying on intrusive model information, such as a derivative of the parameter-to-data map. CES is able to approximate and accelerate this process in a non-intrusive fashion and requiring only on the order of $10^2$ evaluations of the original computer model. This opens the doors for quantifying parametric uncertainty for a class of numerically intensive computer codes that has previously been unavailable. + + +# State of the field + +In Julia there are a few tools for performing non-accelerated uncertainty quantification, from classical sensitivity analysis approaches, for example, [UncertaintyQuantification.jl](https://zenodo.org/records/10149017), GlobalSensitivity.jl [@Dixit:2022], and MCMC, for example, [Mamba.jl](https://github.com/brian-j-smith/Mamba.jl) or [Turing.jl](https://turinglang.org/). For computational efficiency, ensemble methods also provide approximate sampling, [e.g., the Ensemble Kalman Sampler @Garbuno-Inigo:2020b;@Dunbar:2022a], though these only provide Gaussian approximations of the posterior. + +Accelerated uncertainty quantification tools also exist for the related approach of Approximate Bayesian Computation (ABC), for example, GpABC [@Tankhilevich:2020] or [ApproxBayes.jl](https://github.com/marcjwilliams1/ApproxBayes.jl?tab=readme-ov-file); these tools both approximately sample from the posterior distribution. In ABC, this approximation comes from bypassing the likelihood that is usually required in sampling methods, such as MCMC. Instead, the goal of ABC is to replace the likelihood with a scalar-valued sampling objective that compares model and data. In CES, the approximation comes from learning the parameter-to-data map, then following this it calculates an explicit likelihood and uses exact sampling via MCMC. Some ABC algorithms also make use of statistical emulators to further accelerate sampling (GpABC). Although flexible, ABC encounters challenges due to the subjectivity of summary statistics and distance metrics, that may lead to approximation errors particularly in high-dimensional settings [@Nott:2018]. CES is more restrictive due to use of an explicit Gaussian likelihood, but also leverages this structure to deal with high dimensional data. + +Several other tools are available in other languages for a purpose of accelerated learning of the posterior distribution or posterior sampling. Two such examples, written in Python, approximate the log-posterior distribution directly with a Gaussian process: [PyVBMC](https://github.com/acerbilab/pyvbmc) [@Huggins:2023] additionaly uses variational approximations to calculate the normalization constant, and [GPry](https://github.com/jonaselgammal/GPry) [@Gammal:2023], which iteratively trains the GP with an active training point selection algorithm. Such algorithms are distinct from CES, which approximates the parameter-to-data map with the Gaussian process, and advocates ensemble Kalman methods to select training points. + +# A simple example from the code documentation + +We sketch an end-to-end example of the pipeline, with fully-detailed walkthrough given in the online documentation. + +We have a model of a sinusoidal signal that is a function of parameters $\theta=(A,v)$, where $A$ is the amplitude of the signal and $v$ is vertical shift of the signal $$f(A, v) = A \sin(\phi + t) + v, \forall t \in [0,2\pi].$$ Here, $\phi$ is the random phase of each signal. +The goal is to estimate not just point estimates of the parameters $\theta=(A,v)$, but entire probability distributions of them, given some noisy observations. We will use the range and mean of a signal as our observable: $$ G(\theta) = \big[ \text{range}\big(f(\theta)\big), \text{mean}\big(f(\theta)\big) \big] $$ Then, our noisy observations, $y_{obs}$, can be written as: $$ y_{obs} = G(\theta^\dagger) + \mathcal{N}(0, \Gamma)$$ where $\Gamma$ is the observational covariance matrix. We will assume the noise to be independent for each observable, giving us a diagonal covariance matrix. + +![The true and observed range and mean. \label{fig:signal}](sinusoid_true_vs_observed_signal.png){width=50%} + +For this experiment $\theta^\dagger = (A^\dagger,v^\dagger) = (3.0, 7.0)$, and the noisy observations are displayed in blue in \autoref{fig:signal}. + +We define prior distributions on the two parameters. For the amplitude, +we define a prior with mean 2 and standard deviation 1. It is +additionally constrained to be nonnegative. For the vertical shift we define +a prior with mean 0 and standard deviation 5. +```julia +const PD = CalibrateEmulateSample.ParameterDistributions +prior_u1 = PD.constrained_gaussian("amplitude", 2, 1, 0, Inf) +prior_u2 = PD.constrained_gaussian("vert_shift", 0, 5, -Inf, Inf) +prior = PD.combine_distributions([prior_u1, prior_u2]) +``` + +![Marginal distributions of the prior \label{fig:prior}](sinusoid_prior.png){width=70%} + +The prior is displayed in \autoref{fig:prior}. + +We now adaptively find input-output pairs from our map $G$ in a region of interest using an inversion method (an ensemble Kalman process). This is the Calibrate stage, and iteratively generates parameter combinations, that refine around a region of high posterior mass. +```julia +const EKP = CalibrateEmulateSample.EnsembleKalmanProcesses +N_ensemble = 10 +N_iterations = 5 +initial_ensemble = EKP.construct_initial_ensemble(prior, N_ensemble) +ensemble_kalman_process = EKP.EnsembleKalmanProcess( + initial_ensemble, y_obs, Γ, EKP.Inversion(); +) +for i in 1:N_iterations + params_i = EKP.get_phi_final(prior, ensemble_kalman_process) + G_ens = hcat([G(params_i[:, i]) for i in 1:N_ensemble]...) + EKP.update_ensemble!(ensemble_kalman_process, G_ens) +end +``` + +![The resulting ensemble from a calibration. \label{fig:eki}](sinusoid_eki_pairs.png){width=60%} + +The adaptively refined training points from EKP are displayed in \autoref{fig:eki}. We now build an basic Gaussian process emulator from the GaussianProcesses.jl package to emulate the map $G$ using these points. + +```julia +const UT = CalibrateEmulateSample.Utilities +const EM = CalibrateEmulateSample.Emulators + +input_output_pairs = UT.get_training_points( + ensemble_kalman_process, N_iterations, +) +gppackage = EM.GPJL() +gauss_proc = EM.GaussianProcess(gppackage, noise_learn = false) +emulator = EM.Emulator( + gauss_proc, input_output_pairs, normalize_inputs = true, obs_noise_cov = Γ, +) +EM.optimize_hyperparameters!(emulator) # train the emulator +``` + +![The Gaussian process emulator of the range and mean maps, trained on the re-used calibration pairs \label{fig:GP_emulator}](sinusoid_GP_emulator_contours.png){width=80%} + +We evaluate the mean of this emulator on a grid, and also show the value of the true $G$ at training point locations in \autoref{fig:GP_emulator}. + +We can then sample with this emulator using an MCMC scheme. We first choose a good step size (an algorithm parameter) by running some short sampling runs (of length 2,000 steps). Then we run the 100,000 step sampling run to generate samples of the joint posterior distribution. +```julia +const MC = CalibrateEmulateSample.MarkovChainMonteCarlo +mcmc = MC.MCMCWrapper( + MC.RWMHSampling(), y_obs, prior, emulator, +) +# choose a step size +new_step = MC.optimize_stepsize( + mcmc; init_stepsize = 0.1, N = 2000, +) +# Now begin the actual MCMC +chain = MC.sample( + mcmc, 100_000; stepsize = new_step, discard_initial = 2_000, +) +``` + +![The joint posterior distribution histogram \label{fig:GP_2d_posterior}](sinusoid_MCMC_hist_GP.png){width=60%} + +A histogram of the samples from the CES algorithm is displayed in \autoref{fig:GP_2d_posterior}. We see that the posterior distribution contains the true value $(3.0, 7.0)$ with high probability. + +# Research projects using the package +Some research projects that use this codebase, or modifications of it, are + +* [@Dunbar:2021] +* [@Bieli:2022] +* [@Hillier:2022] +* [@Howland:2022] +* [@Dunbar:2022b] +* [@Mansfield:2022] +* [@King:2023] + +# Acknowledgements + +We acknowledge contributions from several others who played a role in the evolution of this package. These include Adeline Hillier, Ignacio Lopez Gomez and Thomas Jackson. The development of this package was supported by the generosity of Eric and Wendy Schmidt by recommendation of the Schmidt Futures program, National Science Foundation Grant AGS-1835860, the Defense Advanced Research Projects Agency (Agreement No. HR00112290030), the Heising-Simons Foundation, Audi Environmental Foundation, and the Cisco Foundation. + + +# References \ No newline at end of file diff --git a/JOSS/paper.pdf b/JOSS/paper.pdf new file mode 100644 index 000000000..0779883b7 Binary files /dev/null and b/JOSS/paper.pdf differ diff --git a/JOSS/sinusoid_GP_emulator_contours.png b/JOSS/sinusoid_GP_emulator_contours.png new file mode 100644 index 000000000..8eb67bf43 Binary files /dev/null and b/JOSS/sinusoid_GP_emulator_contours.png differ diff --git a/JOSS/sinusoid_MCMC_hist_GP.png b/JOSS/sinusoid_MCMC_hist_GP.png new file mode 100644 index 000000000..10e1f7ef1 Binary files /dev/null and b/JOSS/sinusoid_MCMC_hist_GP.png differ diff --git a/JOSS/sinusoid_eki_pairs.png b/JOSS/sinusoid_eki_pairs.png new file mode 100644 index 000000000..229f75d1c Binary files /dev/null and b/JOSS/sinusoid_eki_pairs.png differ diff --git a/JOSS/sinusoid_prior.png b/JOSS/sinusoid_prior.png new file mode 100644 index 000000000..ae7e41d1f Binary files /dev/null and b/JOSS/sinusoid_prior.png differ diff --git a/JOSS/sinusoid_true_vs_observed_signal.png b/JOSS/sinusoid_true_vs_observed_signal.png new file mode 100644 index 000000000..d143ac7c5 Binary files /dev/null and b/JOSS/sinusoid_true_vs_observed_signal.png differ diff --git a/Project.toml b/Project.toml index d704eae63..560eb7b5c 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "CalibrateEmulateSample" uuid = "95e48a1f-0bec-4818-9538-3db4340308e3" authors = ["CLIMA contributors "] -version = "0.4.0" +version = "0.6.0" [deps] AbstractMCMC = "80f14c24-f653-4e6a-9b94-39d6b0f70001" @@ -30,19 +30,19 @@ AdvancedMH = "0.6, 0.7, 0.8" Conda = "1.7" Distributions = "0.24, 0.25" DocStringExtensions = "0.8, 0.9" -EnsembleKalmanProcesses = "1.1" +EnsembleKalmanProcesses = "2" GaussianProcesses = "0.12" MCMCChains = "4.14, 5, 6" +Printf = "1" ProgressBars = "1" PyCall = "1.93" +Random = "1" RandomFeatures = "0.3" ScikitLearn = "0.6, 0.7" StableRNGs = "1" Statistics = "1" StatsBase = "0.33, 0.34" -Printf = "1" -Random = "1" -julia = "1.6, 1.7, 1.8, 1.9" +julia = "1.6, 1.7, 1.8, 1.9, 1.10, 1.11" [extras] Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" diff --git a/README.md b/README.md index defd7b10e..af91e9155 100644 --- a/README.md +++ b/README.md @@ -1,27 +1,30 @@ # CalibrateEmulateSample.jl -Implements a derivative-free machine-learning accelerated pipeline for uncertainty quantification. +Implements a derivative-free machine-learning-accelerated pipeline for uncertainty quantification. | **Documentation** | [![dev][docs-dev-img]][docs-dev-url] | |---------------------:|:-------------------------------------------------| +| **JOSS** | [![DOI][joss-img]][joss-url] | | **DOI** | [![DOI][zenodo-img]][zenodo-latest-url] | | **Docs Build** | [![docs build][docs-bld-img]][docs-bld-url] | | **Unit tests** | [![unit tests][unit-tests-img]][unit-tests-url] | | **Code Coverage** | [![codecov][codecov-img]][codecov-url] | +[joss-img]: https://joss.theoj.org/papers/10.21105/joss.06372/status.svg +[joss-url]: https://doi.org/10.21105/joss.06372 + [zenodo-img]: https://zenodo.org/badge/179573047.svg [zenodo-latest-url]: https://zenodo.org/badge/latestdoi/179573047 [docs-dev-img]: https://img.shields.io/badge/docs-dev-blue.svg [docs-dev-url]: https://CliMA.github.io/CalibrateEmulateSample.jl/dev/ -[docs-bld-img]: https://github.com/CliMA/CalibrateEmulateSample.jl/actions/workflows/Docs.yml/badge.svg?branch=master +[docs-bld-img]: https://github.com/CliMA/CalibrateEmulateSample.jl/actions/workflows/Docs.yml/badge.svg?branch=main [docs-bld-url]: https://github.com/CliMA/CalibrateEmulateSample.jl/actions/workflows/Docs.yml -[unit-tests-img]: https://github.com/CliMA/CalibrateEmulateSample.jl/actions/workflows/Tests.yml/badge.svg?branch=master +[unit-tests-img]: https://github.com/CliMA/CalibrateEmulateSample.jl/actions/workflows/Tests.yml/badge.svg?branch=main [unit-tests-url]: https://github.com/CliMA/CalibrateEmulateSample.jl/actions/workflows/Tests.yml - [codecov-img]: https://codecov.io/gh/CliMA/CalibrateEmulateSample.jl/branch/master/graph/badge.svg [codecov-url]: https://codecov.io/gh/CliMA/CalibrateEmulateSample.jl diff --git a/deps/build.jl b/deps/build.jl index 3ede3b39c..662f4f2ad 100644 --- a/deps/build.jl +++ b/deps/build.jl @@ -7,5 +7,5 @@ if lowercase(get(ENV, "CI", "false")) == "true" Pkg.build("PyCall") end -Conda.add("scipy=1.8.1", channel = "conda-forge") -Conda.add("scikit-learn=1.1.1") +Conda.add("scipy=1.14.1", channel = "conda-forge") +Conda.add("scikit-learn=1.5.1") diff --git a/docs/Manifest.toml b/docs/Manifest.toml new file mode 100644 index 000000000..f8c0a0825 --- /dev/null +++ b/docs/Manifest.toml @@ -0,0 +1,1660 @@ +# This file is machine-generated - editing it directly is not advised + +julia_version = "1.11.1" +manifest_format = "2.0" +project_hash = "e4d965798d55f5903483b71f545533e62ea32fdf" + +[[deps.AMD]] +deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse_jll"] +git-tree-sha1 = "45a1272e3f809d36431e57ab22703c6896b8908f" +uuid = "14f7f29c-3bd6-536c-9a0b-7339e30b5a3e" +version = "0.5.3" + +[[deps.ANSIColoredPrinters]] +git-tree-sha1 = "574baf8110975760d391c710b6341da1afa48d8c" +uuid = "a4c015fc-c6ff-483c-b24f-f7ea428134e9" +version = "0.0.1" + +[[deps.AbstractFFTs]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "d92ad398961a3ed262d8bf04a1a2b8340f915fef" +uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c" +version = "1.5.0" +weakdeps = ["ChainRulesCore", "Test"] + + [deps.AbstractFFTs.extensions] + AbstractFFTsChainRulesCoreExt = "ChainRulesCore" + AbstractFFTsTestExt = "Test" + +[[deps.AbstractMCMC]] +deps = ["BangBang", "ConsoleProgressMonitor", "Distributed", "LogDensityProblems", "Logging", "LoggingExtras", "ProgressLogging", "Random", "StatsBase", "TerminalLoggers", "Transducers"] +git-tree-sha1 = "87e63dcb990029346b091b170252f3c416568afc" +uuid = "80f14c24-f653-4e6a-9b94-39d6b0f70001" +version = "4.4.2" + +[[deps.AbstractTrees]] +git-tree-sha1 = "2d9c9a55f9c93e8887ad391fbae72f8ef55e1177" +uuid = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" +version = "0.4.5" + +[[deps.Adapt]] +deps = ["LinearAlgebra", "Requires"] +git-tree-sha1 = "50c3c56a52972d78e8be9fd135bfb91c9574c140" +uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" +version = "4.1.1" +weakdeps = ["StaticArrays"] + + [deps.Adapt.extensions] + AdaptStaticArraysExt = "StaticArrays" + +[[deps.AdvancedMH]] +deps = ["AbstractMCMC", "Distributions", "FillArrays", "LinearAlgebra", "LogDensityProblems", "Random", "Requires"] +git-tree-sha1 = "b2a1602952739e589cf5e2daff1274a49f22c9a4" +uuid = "5b7e9947-ddc0-4b3f-9b55-0d8042f74170" +version = "0.7.5" + + [deps.AdvancedMH.extensions] + AdvancedMHForwardDiffExt = ["DiffResults", "ForwardDiff"] + AdvancedMHMCMCChainsExt = "MCMCChains" + AdvancedMHStructArraysExt = "StructArrays" + + [deps.AdvancedMH.weakdeps] + DiffResults = "163ba53b-c6d8-5494-b064-1a9d43ac40c5" + ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" + MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d" + StructArrays = "09ab397b-f2b6-538f-b94a-2f83cf4a842a" + +[[deps.AliasTables]] +deps = ["PtrArrays", "Random"] +git-tree-sha1 = "9876e1e164b144ca45e9e3198d0b689cadfed9ff" +uuid = "66dad0bd-aa9a-41b7-9441-69ab47430ed8" +version = "1.1.3" + +[[deps.ArgCheck]] +git-tree-sha1 = "a3a402a35a2f7e0b87828ccabbd5ebfbebe356b4" +uuid = "dce04be8-c92d-5529-be00-80e4d2c0e197" +version = "2.3.0" + +[[deps.ArgTools]] +uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" +version = "1.1.2" + +[[deps.Arpack]] +deps = ["Arpack_jll", "Libdl", "LinearAlgebra", "Logging"] +git-tree-sha1 = "9b9b347613394885fd1c8c7729bfc60528faa436" +uuid = "7d9fca2a-8960-54d3-9f78-7d1dccf2cb97" +version = "0.5.4" + +[[deps.Arpack_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "OpenBLAS_jll", "Pkg"] +git-tree-sha1 = "5ba6c757e8feccf03a1554dfaf3e26b3cfc7fd5e" +uuid = "68821587-b530-5797-8361-c406ea357684" +version = "3.5.1+1" + +[[deps.ArrayInterface]] +deps = ["Adapt", "LinearAlgebra"] +git-tree-sha1 = "d60a1922358aa203019b7857a2c8c37329b8736c" +uuid = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" +version = "7.17.0" + + [deps.ArrayInterface.extensions] + ArrayInterfaceBandedMatricesExt = "BandedMatrices" + ArrayInterfaceBlockBandedMatricesExt = "BlockBandedMatrices" + ArrayInterfaceCUDAExt = "CUDA" + ArrayInterfaceCUDSSExt = "CUDSS" + ArrayInterfaceChainRulesCoreExt = "ChainRulesCore" + ArrayInterfaceChainRulesExt = "ChainRules" + ArrayInterfaceGPUArraysCoreExt = "GPUArraysCore" + ArrayInterfaceReverseDiffExt = "ReverseDiff" + ArrayInterfaceSparseArraysExt = "SparseArrays" + ArrayInterfaceStaticArraysCoreExt = "StaticArraysCore" + ArrayInterfaceTrackerExt = "Tracker" + + [deps.ArrayInterface.weakdeps] + BandedMatrices = "aae01518-5342-5314-be14-df237901396f" + BlockBandedMatrices = "ffab5731-97b5-5995-9138-79e8c1846df0" + CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" + CUDSS = "45b445bb-4962-46a0-9369-b4df9d0f772e" + ChainRules = "082447d4-558c-5d27-93f4-14fc19e9eca2" + ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" + GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527" + ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" + SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" + StaticArraysCore = "1e83bf80-4336-4d27-bf5d-d5a4f845583c" + Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" + +[[deps.Artifacts]] +uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" +version = "1.11.0" + +[[deps.AxisAlgorithms]] +deps = ["LinearAlgebra", "Random", "SparseArrays", "WoodburyMatrices"] +git-tree-sha1 = "01b8ccb13d68535d73d2b0c23e39bd23155fb712" +uuid = "13072b0f-2c55-5437-9ae7-d433b7a33950" +version = "1.1.0" + +[[deps.AxisArrays]] +deps = ["Dates", "IntervalSets", "IterTools", "RangeArrays"] +git-tree-sha1 = "16351be62963a67ac4083f748fdb3cca58bfd52f" +uuid = "39de3d68-74b9-583c-8d2d-e117c070f3a9" +version = "0.4.7" + +[[deps.BangBang]] +deps = ["Compat", "ConstructionBase", "InitialValues", "LinearAlgebra", "Requires", "Setfield", "Tables"] +git-tree-sha1 = "7aa7ad1682f3d5754e3491bb59b8103cae28e3a3" +uuid = "198e06fe-97b7-11e9-32a5-e1d131e6ad66" +version = "0.3.40" + + [deps.BangBang.extensions] + BangBangChainRulesCoreExt = "ChainRulesCore" + BangBangDataFramesExt = "DataFrames" + BangBangStaticArraysExt = "StaticArrays" + BangBangStructArraysExt = "StructArrays" + BangBangTypedTablesExt = "TypedTables" + + [deps.BangBang.weakdeps] + ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" + DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" + StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" + StructArrays = "09ab397b-f2b6-538f-b94a-2f83cf4a842a" + TypedTables = "9d95f2ec-7b3d-5a63-8d20-e2491e220bb9" + +[[deps.Base64]] +uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" +version = "1.11.0" + +[[deps.Baselet]] +git-tree-sha1 = "aebf55e6d7795e02ca500a689d326ac979aaf89e" +uuid = "9718e550-a3fa-408a-8086-8db961cd8217" +version = "0.1.1" + +[[deps.BenchmarkTools]] +deps = ["JSON", "Logging", "Printf", "Profile", "Statistics", "UUIDs"] +git-tree-sha1 = "f1dff6729bc61f4d49e140da1af55dcd1ac97b2f" +uuid = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" +version = "1.5.0" + +[[deps.BitTwiddlingConvenienceFunctions]] +deps = ["Static"] +git-tree-sha1 = "f21cfd4950cb9f0587d5067e69405ad2acd27b87" +uuid = "62783981-4cbd-42fc-bca8-16325de8dc4b" +version = "0.1.6" + +[[deps.Bzip2_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "8873e196c2eb87962a2048b3b8e08946535864a1" +uuid = "6e34b625-4abd-537c-b88f-471c36dfa7a0" +version = "1.0.8+2" + +[[deps.CPUSummary]] +deps = ["CpuId", "IfElse", "PrecompileTools", "Static"] +git-tree-sha1 = "5a97e67919535d6841172016c9530fd69494e5ec" +uuid = "2a0fbf3d-bb9c-48f3-b0a9-814d99fd7ab9" +version = "0.2.6" + +[[deps.Cairo_jll]] +deps = ["Artifacts", "Bzip2_jll", "CompilerSupportLibraries_jll", "Fontconfig_jll", "FreeType2_jll", "Glib_jll", "JLLWrappers", "LZO_jll", "Libdl", "Pixman_jll", "Xorg_libXext_jll", "Xorg_libXrender_jll", "Zlib_jll", "libpng_jll"] +git-tree-sha1 = "009060c9a6168704143100f36ab08f06c2af4642" +uuid = "83423d85-b0ee-5818-9007-b63ccbeb887a" +version = "1.18.2+1" + +[[deps.CalibrateEmulateSample]] +deps = ["AbstractMCMC", "AdvancedMH", "Conda", "Distributions", "DocStringExtensions", "EnsembleKalmanProcesses", "GaussianProcesses", "LinearAlgebra", "MCMCChains", "Pkg", "Printf", "ProgressBars", "PyCall", "Random", "RandomFeatures", "ScikitLearn", "StableRNGs", "Statistics", "StatsBase"] +path = ".." +uuid = "95e48a1f-0bec-4818-9538-3db4340308e3" +version = "0.5.3" + +[[deps.ChainRulesCore]] +deps = ["Compat", "LinearAlgebra"] +git-tree-sha1 = "3e4b134270b372f2ed4d4d0e936aabaefc1802bc" +uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" +version = "1.25.0" +weakdeps = ["SparseArrays"] + + [deps.ChainRulesCore.extensions] + ChainRulesCoreSparseArraysExt = "SparseArrays" + +[[deps.CloseOpenIntervals]] +deps = ["Static", "StaticArrayInterface"] +git-tree-sha1 = "05ba0d07cd4fd8b7a39541e31a7b0254704ea581" +uuid = "fb6a15b2-703c-40df-9091-08a04967cfa9" +version = "0.1.13" + +[[deps.CodecBzip2]] +deps = ["Bzip2_jll", "TranscodingStreams"] +git-tree-sha1 = "e7c529cc31bb85b97631b922fa2e6baf246f5905" +uuid = "523fee87-0ab8-5b00-afb7-3ecf72e48cfd" +version = "0.8.4" + +[[deps.CodecZlib]] +deps = ["TranscodingStreams", "Zlib_jll"] +git-tree-sha1 = "bce6804e5e6044c6daab27bb533d1295e4a2e759" +uuid = "944b1d66-785c-5afd-91f1-9de20f533193" +version = "0.7.6" + +[[deps.CommonSubexpressions]] +deps = ["MacroTools"] +git-tree-sha1 = "cda2cfaebb4be89c9084adaca7dd7333369715c5" +uuid = "bbf7d656-a473-5ed7-a52c-81e309532950" +version = "0.3.1" + +[[deps.CommonWorldInvalidations]] +git-tree-sha1 = "ae52d1c52048455e85a387fbee9be553ec2b68d0" +uuid = "f70d9fcc-98c5-4d4a-abd7-e4cdeebd8ca8" +version = "1.0.0" + +[[deps.Compat]] +deps = ["TOML", "UUIDs"] +git-tree-sha1 = "8ae8d32e09f0dcf42a36b90d4e17f5dd2e4c4215" +uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" +version = "4.16.0" +weakdeps = ["Dates", "LinearAlgebra"] + + [deps.Compat.extensions] + CompatLinearAlgebraExt = "LinearAlgebra" + +[[deps.CompilerSupportLibraries_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" +version = "1.1.1+0" + +[[deps.CompositionsBase]] +git-tree-sha1 = "802bb88cd69dfd1509f6670416bd4434015693ad" +uuid = "a33af91c-f02d-484b-be07-31d278c5ca2b" +version = "0.1.2" +weakdeps = ["InverseFunctions"] + + [deps.CompositionsBase.extensions] + CompositionsBaseInverseFunctionsExt = "InverseFunctions" + +[[deps.Conda]] +deps = ["Downloads", "JSON", "VersionParsing"] +git-tree-sha1 = "b19db3927f0db4151cb86d073689f2428e524576" +uuid = "8f4d0f93-b110-5947-807f-2305c1781a2d" +version = "1.10.2" + +[[deps.ConsoleProgressMonitor]] +deps = ["Logging", "ProgressMeter"] +git-tree-sha1 = "3ab7b2136722890b9af903859afcf457fa3059e8" +uuid = "88cd18e8-d9cc-4ea6-8889-5259c0d15c8b" +version = "0.1.2" + +[[deps.ConstructionBase]] +git-tree-sha1 = "76219f1ed5771adbb096743bff43fb5fdd4c1157" +uuid = "187b0558-2788-49d3-abe0-74a17ed4e7c9" +version = "1.5.8" +weakdeps = ["IntervalSets", "LinearAlgebra", "StaticArrays"] + + [deps.ConstructionBase.extensions] + ConstructionBaseIntervalSetsExt = "IntervalSets" + ConstructionBaseLinearAlgebraExt = "LinearAlgebra" + ConstructionBaseStaticArraysExt = "StaticArrays" + +[[deps.Convex]] +deps = ["AbstractTrees", "BenchmarkTools", "LDLFactorizations", "LinearAlgebra", "MathOptInterface", "OrderedCollections", "SparseArrays", "Test"] +git-tree-sha1 = "dac1878b4996fa56292d2c3bd28f2498b980bb93" +uuid = "f65535da-76fb-5f13-bab9-19810c17039a" +version = "0.16.3" + +[[deps.CpuId]] +deps = ["Markdown"] +git-tree-sha1 = "fcbb72b032692610bfbdb15018ac16a36cf2e406" +uuid = "adafc99b-e345-5852-983c-f28acb93d879" +version = "0.3.1" + +[[deps.Crayons]] +git-tree-sha1 = "249fe38abf76d48563e2f4556bebd215aa317e15" +uuid = "a8cc5b0e-0ffa-5ad4-8c14-923d3ee1735f" +version = "4.1.1" + +[[deps.DataAPI]] +git-tree-sha1 = "abe83f3a2f1b857aac70ef8b269080af17764bbe" +uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a" +version = "1.16.0" + +[[deps.DataFrames]] +deps = ["Compat", "DataAPI", "DataStructures", "Future", "InlineStrings", "InvertedIndices", "IteratorInterfaceExtensions", "LinearAlgebra", "Markdown", "Missings", "PooledArrays", "PrecompileTools", "PrettyTables", "Printf", "Random", "Reexport", "SentinelArrays", "SortingAlgorithms", "Statistics", "TableTraits", "Tables", "Unicode"] +git-tree-sha1 = "fb61b4812c49343d7ef0b533ba982c46021938a6" +uuid = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" +version = "1.7.0" + +[[deps.DataStructures]] +deps = ["Compat", "InteractiveUtils", "OrderedCollections"] +git-tree-sha1 = "1d0a14036acb104d9e89698bd408f63ab58cdc82" +uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" +version = "0.18.20" + +[[deps.DataValueInterfaces]] +git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6" +uuid = "e2d170a0-9d28-54be-80f0-106bbe20a464" +version = "1.0.0" + +[[deps.Dates]] +deps = ["Printf"] +uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" +version = "1.11.0" + +[[deps.DefineSingletons]] +git-tree-sha1 = "0fba8b706d0178b4dc7fd44a96a92382c9065c2c" +uuid = "244e2a9f-e319-4986-a169-4d1fe445cd52" +version = "0.1.2" + +[[deps.DiffResults]] +deps = ["StaticArraysCore"] +git-tree-sha1 = "782dd5f4561f5d267313f23853baaaa4c52ea621" +uuid = "163ba53b-c6d8-5494-b064-1a9d43ac40c5" +version = "1.1.0" + +[[deps.DiffRules]] +deps = ["IrrationalConstants", "LogExpFunctions", "NaNMath", "Random", "SpecialFunctions"] +git-tree-sha1 = "23163d55f885173722d1e4cf0f6110cdbaf7e272" +uuid = "b552c78f-8df3-52c6-915a-8e097449b14b" +version = "1.15.1" + +[[deps.Distances]] +deps = ["LinearAlgebra", "Statistics", "StatsAPI"] +git-tree-sha1 = "c7e3a542b999843086e2f29dac96a618c105be1d" +uuid = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7" +version = "0.10.12" +weakdeps = ["ChainRulesCore", "SparseArrays"] + + [deps.Distances.extensions] + DistancesChainRulesCoreExt = "ChainRulesCore" + DistancesSparseArraysExt = "SparseArrays" + +[[deps.Distributed]] +deps = ["Random", "Serialization", "Sockets"] +uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" +version = "1.11.0" + +[[deps.Distributions]] +deps = ["AliasTables", "FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SpecialFunctions", "Statistics", "StatsAPI", "StatsBase", "StatsFuns"] +git-tree-sha1 = "3101c32aab536e7a27b1763c0797dba151b899ad" +uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" +version = "0.25.113" + + [deps.Distributions.extensions] + DistributionsChainRulesCoreExt = "ChainRulesCore" + DistributionsDensityInterfaceExt = "DensityInterface" + DistributionsTestExt = "Test" + + [deps.Distributions.weakdeps] + ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" + DensityInterface = "b429d917-457f-4dbc-8f4c-0cc954292b1d" + Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[[deps.DocStringExtensions]] +deps = ["LibGit2"] +git-tree-sha1 = "2fb1e02f2b635d0845df5d7c167fec4dd739b00d" +uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" +version = "0.9.3" + +[[deps.Documenter]] +deps = ["ANSIColoredPrinters", "AbstractTrees", "Base64", "CodecZlib", "Dates", "DocStringExtensions", "Downloads", "Git", "IOCapture", "InteractiveUtils", "JSON", "LibGit2", "Logging", "Markdown", "MarkdownAST", "Pkg", "PrecompileTools", "REPL", "RegistryInstances", "SHA", "TOML", "Test", "Unicode"] +git-tree-sha1 = "5a1ee886566f2fa9318df1273d8b778b9d42712d" +uuid = "e30172f5-a6a5-5a46-863b-614d45cd2de4" +version = "1.7.0" + +[[deps.Downloads]] +deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"] +uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" +version = "1.6.0" + +[[deps.ElasticArrays]] +deps = ["Adapt"] +git-tree-sha1 = "75e5697f521c9ab89816d3abeea806dfc5afb967" +uuid = "fdbdab4c-e67f-52f5-8c3f-e7b388dad3d4" +version = "1.2.12" + +[[deps.ElasticPDMats]] +deps = ["LinearAlgebra", "MacroTools", "PDMats"] +git-tree-sha1 = "03ec11d0151e8a772b396aecd663e1c76fc8edcf" +uuid = "2904ab23-551e-5aed-883f-487f97af5226" +version = "0.2.3" + +[[deps.EnsembleKalmanProcesses]] +deps = ["Convex", "Distributions", "DocStringExtensions", "GaussianRandomFields", "Interpolations", "LinearAlgebra", "MathOptInterface", "Optim", "QuadGK", "Random", "RecipesBase", "SCS", "SparseArrays", "Statistics", "StatsBase", "TOML"] +git-tree-sha1 = "00bb94ff704d7aeed9c72d4a2a05d6abf6cb7946" +uuid = "aa8a2aa5-91d8-4396-bcef-d4f2ec43552d" +version = "2.0.1" + +[[deps.Expat_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "1c6317308b9dc757616f0b5cb379db10494443a7" +uuid = "2e619515-83b5-522b-bb60-26c02a35a201" +version = "2.6.2+0" + +[[deps.FFMPEG]] +deps = ["FFMPEG_jll"] +git-tree-sha1 = "53ebe7511fa11d33bec688a9178fac4e49eeee00" +uuid = "c87230d0-a227-11e9-1b43-d7ebe4e7570a" +version = "0.4.2" + +[[deps.FFMPEG_jll]] +deps = ["Artifacts", "Bzip2_jll", "FreeType2_jll", "FriBidi_jll", "JLLWrappers", "LAME_jll", "Libdl", "Ogg_jll", "OpenSSL_jll", "Opus_jll", "PCRE2_jll", "Zlib_jll", "libaom_jll", "libass_jll", "libfdk_aac_jll", "libvorbis_jll", "x264_jll", "x265_jll"] +git-tree-sha1 = "466d45dc38e15794ec7d5d63ec03d776a9aff36e" +uuid = "b22a6f82-2f65-5046-a5b2-351ab43fb4e5" +version = "4.4.4+1" + +[[deps.FFTW]] +deps = ["AbstractFFTs", "FFTW_jll", "LinearAlgebra", "MKL_jll", "Preferences", "Reexport"] +git-tree-sha1 = "4820348781ae578893311153d69049a93d05f39d" +uuid = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341" +version = "1.8.0" + +[[deps.FFTW_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "4d81ed14783ec49ce9f2e168208a12ce1815aa25" +uuid = "f5851436-0d7a-5f13-b9de-f02708fd171a" +version = "3.3.10+1" + +[[deps.FastGaussQuadrature]] +deps = ["LinearAlgebra", "SpecialFunctions", "StaticArrays"] +git-tree-sha1 = "58d83dd5a78a36205bdfddb82b1bb67682e64487" +uuid = "442a2c76-b920-505d-bb47-c5924d526838" +version = "0.4.9" + +[[deps.FileWatching]] +uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee" +version = "1.11.0" + +[[deps.FillArrays]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "6a70198746448456524cb442b8af316927ff3e1a" +uuid = "1a297f60-69ca-5386-bcde-b61e274b549b" +version = "1.13.0" +weakdeps = ["PDMats", "SparseArrays", "Statistics"] + + [deps.FillArrays.extensions] + FillArraysPDMatsExt = "PDMats" + FillArraysSparseArraysExt = "SparseArrays" + FillArraysStatisticsExt = "Statistics" + +[[deps.FiniteDiff]] +deps = ["ArrayInterface", "LinearAlgebra", "Setfield"] +git-tree-sha1 = "b10bdafd1647f57ace3885143936749d61638c3b" +uuid = "6a86dc24-6348-571c-b903-95158fe2bd41" +version = "2.26.0" + + [deps.FiniteDiff.extensions] + FiniteDiffBandedMatricesExt = "BandedMatrices" + FiniteDiffBlockBandedMatricesExt = "BlockBandedMatrices" + FiniteDiffSparseArraysExt = "SparseArrays" + FiniteDiffStaticArraysExt = "StaticArrays" + + [deps.FiniteDiff.weakdeps] + BandedMatrices = "aae01518-5342-5314-be14-df237901396f" + BlockBandedMatrices = "ffab5731-97b5-5995-9138-79e8c1846df0" + SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" + StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" + +[[deps.Fontconfig_jll]] +deps = ["Artifacts", "Bzip2_jll", "Expat_jll", "FreeType2_jll", "JLLWrappers", "Libdl", "Libuuid_jll", "Zlib_jll"] +git-tree-sha1 = "db16beca600632c95fc8aca29890d83788dd8b23" +uuid = "a3f928ae-7b40-5064-980b-68af3947d34b" +version = "2.13.96+0" + +[[deps.Formatting]] +deps = ["Logging", "Printf"] +git-tree-sha1 = "fb409abab2caf118986fc597ba84b50cbaf00b87" +uuid = "59287772-0a20-5a39-b81b-1366585eb4c0" +version = "0.4.3" + +[[deps.ForwardDiff]] +deps = ["CommonSubexpressions", "DiffResults", "DiffRules", "LinearAlgebra", "LogExpFunctions", "NaNMath", "Preferences", "Printf", "Random", "SpecialFunctions"] +git-tree-sha1 = "a2df1b776752e3f344e5116c06d75a10436ab853" +uuid = "f6369f11-7733-5829-9624-2563aa707210" +version = "0.10.38" +weakdeps = ["StaticArrays"] + + [deps.ForwardDiff.extensions] + ForwardDiffStaticArraysExt = "StaticArrays" + +[[deps.FreeType2_jll]] +deps = ["Artifacts", "Bzip2_jll", "JLLWrappers", "Libdl", "Zlib_jll"] +git-tree-sha1 = "5c1d8ae0efc6c2e7b1fc502cbe25def8f661b7bc" +uuid = "d7e528f0-a631-5988-bf34-fe36492bcfd7" +version = "2.13.2+0" + +[[deps.FriBidi_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "1ed150b39aebcc805c26b93a8d0122c940f64ce2" +uuid = "559328eb-81f9-559d-9380-de523a88c83c" +version = "1.0.14+0" + +[[deps.Future]] +deps = ["Random"] +uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820" +version = "1.11.0" + +[[deps.GaussianProcesses]] +deps = ["Distances", "Distributions", "ElasticArrays", "ElasticPDMats", "FastGaussQuadrature", "ForwardDiff", "LinearAlgebra", "Optim", "PDMats", "Printf", "ProgressMeter", "Random", "RecipesBase", "ScikitLearnBase", "SpecialFunctions", "StaticArrays", "Statistics", "StatsFuns"] +git-tree-sha1 = "31749ff6868caf6dd50902eec652a724071dbed3" +uuid = "891a1506-143c-57d2-908e-e1f8e92e6de9" +version = "0.12.5" + +[[deps.GaussianRandomFields]] +deps = ["Arpack", "FFTW", "FastGaussQuadrature", "LinearAlgebra", "RecipesBase", "SpecialFunctions", "Statistics"] +git-tree-sha1 = "d9c335f2c06424029b2addf9abf602e0feb2f53e" +uuid = "e4b2fa32-6e09-5554-b718-106ed5adafe9" +version = "2.1.6" + +[[deps.Gettext_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Libiconv_jll", "Pkg", "XML2_jll"] +git-tree-sha1 = "9b02998aba7bf074d14de89f9d37ca24a1a0b046" +uuid = "78b55507-aeef-58d4-861c-77aaff3498b1" +version = "0.21.0+0" + +[[deps.Git]] +deps = ["Git_jll"] +git-tree-sha1 = "04eff47b1354d702c3a85e8ab23d539bb7d5957e" +uuid = "d7ba0133-e1db-5d97-8f8c-041e4b3a1eb2" +version = "1.3.1" + +[[deps.Git_jll]] +deps = ["Artifacts", "Expat_jll", "JLLWrappers", "LibCURL_jll", "Libdl", "Libiconv_jll", "OpenSSL_jll", "PCRE2_jll", "Zlib_jll"] +git-tree-sha1 = "ea372033d09e4552a04fd38361cd019f9003f4f4" +uuid = "f8c6e375-362e-5223-8a59-34ff63f689eb" +version = "2.46.2+0" + +[[deps.Glib_jll]] +deps = ["Artifacts", "Gettext_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Libiconv_jll", "Libmount_jll", "PCRE2_jll", "Zlib_jll"] +git-tree-sha1 = "674ff0db93fffcd11a3573986e550d66cd4fd71f" +uuid = "7746bdde-850d-59dc-9ae8-88ece973131d" +version = "2.80.5+0" + +[[deps.Graphite2_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "344bf40dcab1073aca04aa0df4fb092f920e4011" +uuid = "3b182d85-2403-5c21-9c21-1e1f0cc25472" +version = "1.3.14+0" + +[[deps.HarfBuzz_jll]] +deps = ["Artifacts", "Cairo_jll", "Fontconfig_jll", "FreeType2_jll", "Glib_jll", "Graphite2_jll", "JLLWrappers", "Libdl", "Libffi_jll"] +git-tree-sha1 = "401e4f3f30f43af2c8478fc008da50096ea5240f" +uuid = "2e76f6c2-a576-52d4-95c1-20adfe4de566" +version = "8.3.1+0" + +[[deps.HostCPUFeatures]] +deps = ["BitTwiddlingConvenienceFunctions", "IfElse", "Libdl", "Static"] +git-tree-sha1 = "8e070b599339d622e9a081d17230d74a5c473293" +uuid = "3e5b6fbb-0976-4d2c-9146-d79de83f2fb0" +version = "0.1.17" + +[[deps.IOCapture]] +deps = ["Logging", "Random"] +git-tree-sha1 = "b6d6bfdd7ce25b0f9b2f6b3dd56b2673a66c8770" +uuid = "b5f81e59-6552-4d32-b1f0-c071b021bf89" +version = "0.2.5" + +[[deps.IfElse]] +git-tree-sha1 = "debdd00ffef04665ccbb3e150747a77560e8fad1" +uuid = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173" +version = "0.1.1" + +[[deps.InitialValues]] +git-tree-sha1 = "4da0f88e9a39111c2fa3add390ab15f3a44f3ca3" +uuid = "22cec73e-a1b8-11e9-2c92-598750a2cf9c" +version = "0.3.1" + +[[deps.InlineStrings]] +git-tree-sha1 = "45521d31238e87ee9f9732561bfee12d4eebd52d" +uuid = "842dd82b-1e85-43dc-bf29-5d0ee9dffc48" +version = "1.4.2" + + [deps.InlineStrings.extensions] + ArrowTypesExt = "ArrowTypes" + ParsersExt = "Parsers" + + [deps.InlineStrings.weakdeps] + ArrowTypes = "31f734f8-188a-4ce0-8406-c8a06bd891cd" + Parsers = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" + +[[deps.IntelOpenMP_jll]] +deps = ["Artifacts", "JLLWrappers", "LazyArtifacts", "Libdl"] +git-tree-sha1 = "10bd689145d2c3b2a9844005d01087cc1194e79e" +uuid = "1d5cc7b8-4909-519e-a0f8-d0f5ad9712d0" +version = "2024.2.1+0" + +[[deps.InteractiveUtils]] +deps = ["Markdown"] +uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" +version = "1.11.0" + +[[deps.Interpolations]] +deps = ["Adapt", "AxisAlgorithms", "ChainRulesCore", "LinearAlgebra", "OffsetArrays", "Random", "Ratios", "Requires", "SharedArrays", "SparseArrays", "StaticArrays", "WoodburyMatrices"] +git-tree-sha1 = "88a101217d7cb38a7b481ccd50d21876e1d1b0e0" +uuid = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59" +version = "0.15.1" + + [deps.Interpolations.extensions] + InterpolationsUnitfulExt = "Unitful" + + [deps.Interpolations.weakdeps] + Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d" + +[[deps.IntervalSets]] +git-tree-sha1 = "dba9ddf07f77f60450fe5d2e2beb9854d9a49bd0" +uuid = "8197267c-284f-5f27-9208-e0e47529a953" +version = "0.7.10" +weakdeps = ["Random", "RecipesBase", "Statistics"] + + [deps.IntervalSets.extensions] + IntervalSetsRandomExt = "Random" + IntervalSetsRecipesBaseExt = "RecipesBase" + IntervalSetsStatisticsExt = "Statistics" + +[[deps.InverseFunctions]] +git-tree-sha1 = "a779299d77cd080bf77b97535acecd73e1c5e5cb" +uuid = "3587e190-3f89-42d0-90ee-14403ec27112" +version = "0.1.17" +weakdeps = ["Dates", "Test"] + + [deps.InverseFunctions.extensions] + InverseFunctionsDatesExt = "Dates" + InverseFunctionsTestExt = "Test" + +[[deps.InvertedIndices]] +git-tree-sha1 = "0dc7b50b8d436461be01300fd8cd45aa0274b038" +uuid = "41ab1584-1d38-5bbf-9106-f11c6c58b48f" +version = "1.3.0" + +[[deps.IrrationalConstants]] +git-tree-sha1 = "7fd44fd4ff43fc60815f8e764c0f352b83c49151" +uuid = "92d709cd-6900-40b7-9082-c6be49f344b6" +version = "0.1.1" + +[[deps.IterTools]] +git-tree-sha1 = "42d5f897009e7ff2cf88db414a389e5ed1bdd023" +uuid = "c8e1da08-722c-5040-9ed9-7db0dc04731e" +version = "1.10.0" + +[[deps.IteratorInterfaceExtensions]] +git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856" +uuid = "82899510-4779-5014-852e-03e436cf321d" +version = "1.0.0" + +[[deps.JLLWrappers]] +deps = ["Artifacts", "Preferences"] +git-tree-sha1 = "be3dc50a92e5a386872a493a10050136d4703f9b" +uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" +version = "1.6.1" + +[[deps.JSON]] +deps = ["Dates", "Mmap", "Parsers", "Unicode"] +git-tree-sha1 = "31e996f0a15c7b280ba9f76636b3ff9e2ae58c9a" +uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" +version = "0.21.4" + +[[deps.KernelDensity]] +deps = ["Distributions", "DocStringExtensions", "FFTW", "Interpolations", "StatsBase"] +git-tree-sha1 = "7d703202e65efa1369de1279c162b915e245eed1" +uuid = "5ab0869b-81aa-558d-bb23-cbf5423bbe9b" +version = "0.6.9" + +[[deps.LAME_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "170b660facf5df5de098d866564877e119141cbd" +uuid = "c1c5ebd0-6772-5130-a774-d5fcae4a789d" +version = "3.100.2+0" + +[[deps.LDLFactorizations]] +deps = ["AMD", "LinearAlgebra", "SparseArrays", "Test"] +git-tree-sha1 = "70f582b446a1c3ad82cf87e62b878668beef9d13" +uuid = "40e66cde-538c-5869-a4ad-c39174c6795b" +version = "0.10.1" + +[[deps.LLVMOpenMP_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "78211fb6cbc872f77cad3fc0b6cf647d923f4929" +uuid = "1d63c593-3942-5779-bab2-d838dc0a180e" +version = "18.1.7+0" + +[[deps.LZO_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "854a9c268c43b77b0a27f22d7fab8d33cdb3a731" +uuid = "dd4b983a-f0e5-5f8d-a1b7-129d4a5fb1ac" +version = "2.10.2+1" + +[[deps.LaTeXStrings]] +git-tree-sha1 = "dda21b8cbd6a6c40d9d02a73230f9d70fed6918c" +uuid = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" +version = "1.4.0" + +[[deps.LayoutPointers]] +deps = ["ArrayInterface", "LinearAlgebra", "ManualMemory", "SIMDTypes", "Static", "StaticArrayInterface"] +git-tree-sha1 = "a9eaadb366f5493a5654e843864c13d8b107548c" +uuid = "10f19ff3-798f-405d-979b-55457f8fc047" +version = "0.1.17" + +[[deps.LazilyInitializedFields]] +git-tree-sha1 = "0f2da712350b020bc3957f269c9caad516383ee0" +uuid = "0e77f7df-68c5-4e49-93ce-4cd80f5598bf" +version = "1.3.0" + +[[deps.LazyArtifacts]] +deps = ["Artifacts", "Pkg"] +uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3" +version = "1.11.0" + +[[deps.LeftChildRightSiblingTrees]] +deps = ["AbstractTrees"] +git-tree-sha1 = "fb6803dafae4a5d62ea5cab204b1e657d9737e7f" +uuid = "1d6d02ad-be62-4b6b-8a6d-2f90e265016e" +version = "0.2.0" + +[[deps.LibCURL]] +deps = ["LibCURL_jll", "MozillaCACerts_jll"] +uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" +version = "0.6.4" + +[[deps.LibCURL_jll]] +deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] +uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" +version = "8.6.0+0" + +[[deps.LibGit2]] +deps = ["Base64", "LibGit2_jll", "NetworkOptions", "Printf", "SHA"] +uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" +version = "1.11.0" + +[[deps.LibGit2_jll]] +deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll"] +uuid = "e37daf67-58a4-590a-8e99-b0245dd2ffc5" +version = "1.7.2+0" + +[[deps.LibSSH2_jll]] +deps = ["Artifacts", "Libdl", "MbedTLS_jll"] +uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" +version = "1.11.0+1" + +[[deps.Libdl]] +uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" +version = "1.11.0" + +[[deps.Libffi_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "0b4a5d71f3e5200a7dff793393e09dfc2d874290" +uuid = "e9f186c6-92d2-5b65-8a66-fee21dc1b490" +version = "3.2.2+1" + +[[deps.Libgcrypt_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Libgpg_error_jll"] +git-tree-sha1 = "8be878062e0ffa2c3f67bb58a595375eda5de80b" +uuid = "d4300ac3-e22c-5743-9152-c294e39db1e4" +version = "1.11.0+0" + +[[deps.Libgpg_error_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "c6ce1e19f3aec9b59186bdf06cdf3c4fc5f5f3e6" +uuid = "7add5ba3-2f88-524e-9cd5-f83b8a55f7b8" +version = "1.50.0+0" + +[[deps.Libiconv_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "61dfdba58e585066d8bce214c5a51eaa0539f269" +uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531" +version = "1.17.0+1" + +[[deps.Libmount_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "0c4f9c4f1a50d8f35048fa0532dabbadf702f81e" +uuid = "4b2f31a3-9ecc-558c-b454-b3730dcb73e9" +version = "2.40.1+0" + +[[deps.Libuuid_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "5ee6203157c120d79034c748a2acba45b82b8807" +uuid = "38a345b3-de98-5d2b-a5d3-14cd9215e700" +version = "2.40.1+0" + +[[deps.LineSearches]] +deps = ["LinearAlgebra", "NLSolversBase", "NaNMath", "Parameters", "Printf"] +git-tree-sha1 = "e4c3be53733db1051cc15ecf573b1042b3a712a1" +uuid = "d3d80556-e9d4-5f37-9878-2ab0fcc64255" +version = "7.3.0" + +[[deps.LinearAlgebra]] +deps = ["Libdl", "OpenBLAS_jll", "libblastrampoline_jll"] +uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +version = "1.11.0" + +[[deps.LogDensityProblems]] +deps = ["ArgCheck", "DocStringExtensions", "Random"] +git-tree-sha1 = "4e0128c1590d23a50dcdb106c7e2dbca99df85c0" +uuid = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c" +version = "2.1.2" + +[[deps.LogExpFunctions]] +deps = ["DocStringExtensions", "IrrationalConstants", "LinearAlgebra"] +git-tree-sha1 = "a2d09619db4e765091ee5c6ffe8872849de0feea" +uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688" +version = "0.3.28" + + [deps.LogExpFunctions.extensions] + LogExpFunctionsChainRulesCoreExt = "ChainRulesCore" + LogExpFunctionsChangesOfVariablesExt = "ChangesOfVariables" + LogExpFunctionsInverseFunctionsExt = "InverseFunctions" + + [deps.LogExpFunctions.weakdeps] + ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" + ChangesOfVariables = "9e997f8a-9a97-42d5-a9f1-ce6bfc15e2c0" + InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112" + +[[deps.Logging]] +uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" +version = "1.11.0" + +[[deps.LoggingExtras]] +deps = ["Dates", "Logging"] +git-tree-sha1 = "f02b56007b064fbfddb4c9cd60161b6dd0f40df3" +uuid = "e6f89c97-d47a-5376-807f-9c37f3926c36" +version = "1.1.0" + +[[deps.LoopVectorization]] +deps = ["ArrayInterface", "CPUSummary", "CloseOpenIntervals", "DocStringExtensions", "HostCPUFeatures", "IfElse", "LayoutPointers", "LinearAlgebra", "OffsetArrays", "PolyesterWeave", "PrecompileTools", "SIMDTypes", "SLEEFPirates", "Static", "StaticArrayInterface", "ThreadingUtilities", "UnPack", "VectorizationBase"] +git-tree-sha1 = "8084c25a250e00ae427a379a5b607e7aed96a2dd" +uuid = "bdcacae8-1622-11e9-2a5c-532679323890" +version = "0.12.171" +weakdeps = ["ChainRulesCore", "ForwardDiff", "SpecialFunctions"] + + [deps.LoopVectorization.extensions] + ForwardDiffExt = ["ChainRulesCore", "ForwardDiff"] + SpecialFunctionsExt = "SpecialFunctions" + +[[deps.MCMCChains]] +deps = ["AbstractMCMC", "AxisArrays", "Dates", "Distributions", "Formatting", "IteratorInterfaceExtensions", "KernelDensity", "LinearAlgebra", "MCMCDiagnosticTools", "MLJModelInterface", "NaturalSort", "OrderedCollections", "PrettyTables", "Random", "RecipesBase", "Serialization", "Statistics", "StatsBase", "StatsFuns", "TableTraits", "Tables"] +git-tree-sha1 = "c659f7508035a7bdd5102aef2de028ab035f289a" +uuid = "c7f686f2-ff18-58e9-bc7b-31028e88f75d" +version = "5.7.1" + +[[deps.MCMCDiagnosticTools]] +deps = ["AbstractFFTs", "DataAPI", "DataStructures", "Distributions", "LinearAlgebra", "MLJModelInterface", "Random", "SpecialFunctions", "Statistics", "StatsBase", "Tables"] +git-tree-sha1 = "d1737c39191aa26f42a64e320de313f1d1fd74b1" +uuid = "be115224-59cd-429b-ad48-344e309966f0" +version = "0.2.1" + +[[deps.MKL_jll]] +deps = ["Artifacts", "IntelOpenMP_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "oneTBB_jll"] +git-tree-sha1 = "f046ccd0c6db2832a9f639e2c669c6fe867e5f4f" +uuid = "856f044c-d86e-5d09-b602-aeab76dc8ba7" +version = "2024.2.0+0" + +[[deps.MLJModelInterface]] +deps = ["Random", "ScientificTypesBase", "StatisticalTraits"] +git-tree-sha1 = "ceaff6618408d0e412619321ae43b33b40c1a733" +uuid = "e80e1ace-859a-464e-9ed9-23947d8ae3ea" +version = "1.11.0" + +[[deps.MacroTools]] +deps = ["Markdown", "Random"] +git-tree-sha1 = "2fa9ee3e63fd3a4f7a9a4f4744a52f4856de82df" +uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" +version = "0.5.13" + +[[deps.ManualMemory]] +git-tree-sha1 = "bcaef4fc7a0cfe2cba636d84cda54b5e4e4ca3cd" +uuid = "d125e4d3-2237-4719-b19c-fa641b8a4667" +version = "0.1.8" + +[[deps.Markdown]] +deps = ["Base64"] +uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" +version = "1.11.0" + +[[deps.MarkdownAST]] +deps = ["AbstractTrees", "Markdown"] +git-tree-sha1 = "465a70f0fc7d443a00dcdc3267a497397b8a3899" +uuid = "d0879d2d-cac2-40c8-9cee-1863dc0c7391" +version = "0.1.2" + +[[deps.MathOptInterface]] +deps = ["BenchmarkTools", "CodecBzip2", "CodecZlib", "DataStructures", "ForwardDiff", "JSON", "LinearAlgebra", "MutableArithmetics", "NaNMath", "OrderedCollections", "PrecompileTools", "Printf", "SparseArrays", "SpecialFunctions", "Test", "Unicode"] +git-tree-sha1 = "e065ca5234f53fd6f920efaee4940627ad991fb4" +uuid = "b8f27783-ece8-5eb3-8dc8-9495eed66fee" +version = "1.34.0" + +[[deps.MbedTLS_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" +version = "2.28.6+0" + +[[deps.MicroCollections]] +deps = ["BangBang", "InitialValues", "Setfield"] +git-tree-sha1 = "629afd7d10dbc6935ec59b32daeb33bc4460a42e" +uuid = "128add7d-3638-4c79-886c-908ea0c25c34" +version = "0.1.4" + +[[deps.Missings]] +deps = ["DataAPI"] +git-tree-sha1 = "ec4f7fbeab05d7747bdf98eb74d130a2a2ed298d" +uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" +version = "1.2.0" + +[[deps.Mmap]] +uuid = "a63ad114-7e13-5084-954f-fe012c677804" +version = "1.11.0" + +[[deps.MozillaCACerts_jll]] +uuid = "14a3606d-f60d-562e-9121-12d972cd8159" +version = "2023.12.12" + +[[deps.MutableArithmetics]] +deps = ["LinearAlgebra", "SparseArrays", "Test"] +git-tree-sha1 = "90077f1e79de8c9c7c8a90644494411111f4e07b" +uuid = "d8a4904e-b15c-11e9-3269-09a3773c0cb0" +version = "1.5.2" + +[[deps.NLSolversBase]] +deps = ["DiffResults", "Distributed", "FiniteDiff", "ForwardDiff"] +git-tree-sha1 = "a0b464d183da839699f4c79e7606d9d186ec172c" +uuid = "d41bc354-129a-5804-8e4c-c37616107c6c" +version = "7.8.3" + +[[deps.NaNMath]] +deps = ["OpenLibm_jll"] +git-tree-sha1 = "0877504529a3e5c3343c6f8b4c0381e57e4387e4" +uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3" +version = "1.0.2" + +[[deps.NaturalSort]] +git-tree-sha1 = "eda490d06b9f7c00752ee81cfa451efe55521e21" +uuid = "c020b1a1-e9b0-503a-9c33-f039bfc54a85" +version = "1.0.0" + +[[deps.NetworkOptions]] +uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" +version = "1.2.0" + +[[deps.OffsetArrays]] +git-tree-sha1 = "1a27764e945a152f7ca7efa04de513d473e9542e" +uuid = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" +version = "1.14.1" +weakdeps = ["Adapt"] + + [deps.OffsetArrays.extensions] + OffsetArraysAdaptExt = "Adapt" + +[[deps.Ogg_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "887579a3eb005446d514ab7aeac5d1d027658b8f" +uuid = "e7412a2a-1a6e-54c0-be00-318e2571c051" +version = "1.3.5+1" + +[[deps.OpenBLAS32_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl"] +git-tree-sha1 = "dd806c813429ff09878ea3eeb317818f3ca02871" +uuid = "656ef2d0-ae68-5445-9ca0-591084a874a2" +version = "0.3.28+3" + +[[deps.OpenBLAS_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"] +uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" +version = "0.3.27+1" + +[[deps.OpenLibm_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "05823500-19ac-5b8b-9628-191a04bc5112" +version = "0.8.1+2" + +[[deps.OpenSSL_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "7493f61f55a6cce7325f197443aa80d32554ba10" +uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95" +version = "3.0.15+1" + +[[deps.OpenSpecFun_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "13652491f6856acfd2db29360e1bbcd4565d04f1" +uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e" +version = "0.5.5+0" + +[[deps.Optim]] +deps = ["Compat", "FillArrays", "ForwardDiff", "LineSearches", "LinearAlgebra", "NLSolversBase", "NaNMath", "Parameters", "PositiveFactorizations", "Printf", "SparseArrays", "StatsBase"] +git-tree-sha1 = "d9b79c4eed437421ac4285148fcadf42e0700e89" +uuid = "429524aa-4258-5aef-a3af-852621145aeb" +version = "1.9.4" +weakdeps = ["MathOptInterface"] + + [deps.Optim.extensions] + OptimMOIExt = "MathOptInterface" + +[[deps.Opus_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "6703a85cb3781bd5909d48730a67205f3f31a575" +uuid = "91d4177d-7536-5919-b921-800302f37372" +version = "1.3.3+0" + +[[deps.OrderedCollections]] +git-tree-sha1 = "dfdf5519f235516220579f949664f1bf44e741c5" +uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" +version = "1.6.3" + +[[deps.PCRE2_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "efcefdf7-47ab-520b-bdef-62a2eaa19f15" +version = "10.42.0+1" + +[[deps.PDMats]] +deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse"] +git-tree-sha1 = "949347156c25054de2db3b166c52ac4728cbad65" +uuid = "90014a1f-27ba-587c-ab20-58faa44d9150" +version = "0.11.31" + +[[deps.Parameters]] +deps = ["OrderedCollections", "UnPack"] +git-tree-sha1 = "34c0e9ad262e5f7fc75b10a9952ca7692cfc5fbe" +uuid = "d96e819e-fc66-5662-9728-84c9c7592b0a" +version = "0.12.3" + +[[deps.Parsers]] +deps = ["Dates", "PrecompileTools", "UUIDs"] +git-tree-sha1 = "8489905bcdbcfac64d1daa51ca07c0d8f0283821" +uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" +version = "2.8.1" + +[[deps.Pixman_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "LLVMOpenMP_jll", "Libdl"] +git-tree-sha1 = "35621f10a7531bc8fa58f74610b1bfb70a3cfc6b" +uuid = "30392449-352a-5448-841d-b1acce4e97dc" +version = "0.43.4+0" + +[[deps.Pkg]] +deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "Random", "SHA", "TOML", "Tar", "UUIDs", "p7zip_jll"] +uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" +version = "1.11.0" +weakdeps = ["REPL"] + + [deps.Pkg.extensions] + REPLExt = "REPL" + +[[deps.PolyesterWeave]] +deps = ["BitTwiddlingConvenienceFunctions", "CPUSummary", "IfElse", "Static", "ThreadingUtilities"] +git-tree-sha1 = "645bed98cd47f72f67316fd42fc47dee771aefcd" +uuid = "1d0040c9-8b98-4ee7-8388-3f51789ca0ad" +version = "0.2.2" + +[[deps.PooledArrays]] +deps = ["DataAPI", "Future"] +git-tree-sha1 = "36d8b4b899628fb92c2749eb488d884a926614d3" +uuid = "2dfb63ee-cc39-5dd5-95bd-886bf059d720" +version = "1.4.3" + +[[deps.PositiveFactorizations]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "17275485f373e6673f7e7f97051f703ed5b15b20" +uuid = "85a6dd25-e78a-55b7-8502-1745935b8125" +version = "0.2.4" + +[[deps.PrecompileTools]] +deps = ["Preferences"] +git-tree-sha1 = "5aa36f7049a63a1528fe8f7c3f2113413ffd4e1f" +uuid = "aea7be01-6a6a-4083-8856-8a6e6704d82a" +version = "1.2.1" + +[[deps.Preferences]] +deps = ["TOML"] +git-tree-sha1 = "9306f6085165d270f7e3db02af26a400d580f5c6" +uuid = "21216c6a-2e73-6563-6e65-726566657250" +version = "1.4.3" + +[[deps.PrettyTables]] +deps = ["Crayons", "LaTeXStrings", "Markdown", "PrecompileTools", "Printf", "Reexport", "StringManipulation", "Tables"] +git-tree-sha1 = "1101cd475833706e4d0e7b122218257178f48f34" +uuid = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" +version = "2.4.0" + +[[deps.Printf]] +deps = ["Unicode"] +uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" +version = "1.11.0" + +[[deps.Profile]] +uuid = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79" +version = "1.11.0" + +[[deps.ProgressBars]] +deps = ["Printf"] +git-tree-sha1 = "b437cdb0385ed38312d91d9c00c20f3798b30256" +uuid = "49802e3a-d2f1-5c88-81d8-b72133a6f568" +version = "1.5.1" + +[[deps.ProgressLogging]] +deps = ["Logging", "SHA", "UUIDs"] +git-tree-sha1 = "80d919dee55b9c50e8d9e2da5eeafff3fe58b539" +uuid = "33c8b6b6-d38a-422a-b730-caa89a2f386c" +version = "0.1.4" + +[[deps.ProgressMeter]] +deps = ["Distributed", "Printf"] +git-tree-sha1 = "8f6bc219586aef8baf0ff9a5fe16ee9c70cb65e4" +uuid = "92933f4c-e287-5a05-a399-4b506db050ca" +version = "1.10.2" + +[[deps.PtrArrays]] +git-tree-sha1 = "77a42d78b6a92df47ab37e177b2deac405e1c88f" +uuid = "43287f4e-b6f4-7ad1-bb20-aadabca52c3d" +version = "1.2.1" + +[[deps.PyCall]] +deps = ["Conda", "Dates", "Libdl", "LinearAlgebra", "MacroTools", "Serialization", "VersionParsing"] +git-tree-sha1 = "9816a3826b0ebf49ab4926e2b18842ad8b5c8f04" +uuid = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" +version = "1.96.4" + +[[deps.QuadGK]] +deps = ["DataStructures", "LinearAlgebra"] +git-tree-sha1 = "cda3b045cf9ef07a08ad46731f5a3165e56cf3da" +uuid = "1fd47b50-473d-5c70-9696-f719f8f3bcdc" +version = "2.11.1" + + [deps.QuadGK.extensions] + QuadGKEnzymeExt = "Enzyme" + + [deps.QuadGK.weakdeps] + Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" + +[[deps.REPL]] +deps = ["InteractiveUtils", "Markdown", "Sockets", "StyledStrings", "Unicode"] +uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" +version = "1.11.0" + +[[deps.Random]] +deps = ["SHA"] +uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +version = "1.11.0" + +[[deps.RandomFeatures]] +deps = ["Distributions", "DocStringExtensions", "EnsembleKalmanProcesses", "FFMPEG", "LinearAlgebra", "LoopVectorization", "Random", "SpecialFunctions", "Statistics", "StatsBase", "Tullio"] +git-tree-sha1 = "8e63cac591d3ffe80ecfa7ba189951009fb642fc" +uuid = "36c3bae2-c0c3-419d-b3b4-eebadd35c5e5" +version = "0.3.4" + +[[deps.RangeArrays]] +git-tree-sha1 = "b9039e93773ddcfc828f12aadf7115b4b4d225f5" +uuid = "b3c3ace0-ae52-54e7-9d0b-2c1406fd6b9d" +version = "0.3.2" + +[[deps.Ratios]] +deps = ["Requires"] +git-tree-sha1 = "1342a47bf3260ee108163042310d26f2be5ec90b" +uuid = "c84ed2f1-dad5-54f0-aa8e-dbefe2724439" +version = "0.4.5" + + [deps.Ratios.extensions] + RatiosFixedPointNumbersExt = "FixedPointNumbers" + + [deps.Ratios.weakdeps] + FixedPointNumbers = "53c48c17-4a7d-5ca2-90c5-79b7896eea93" + +[[deps.RecipesBase]] +deps = ["PrecompileTools"] +git-tree-sha1 = "5c3d09cc4f31f5fc6af001c250bf1278733100ff" +uuid = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" +version = "1.3.4" + +[[deps.Reexport]] +git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b" +uuid = "189a3867-3050-52da-a836-e630ba90ab69" +version = "1.2.2" + +[[deps.RegistryInstances]] +deps = ["LazilyInitializedFields", "Pkg", "TOML", "Tar"] +git-tree-sha1 = "ffd19052caf598b8653b99404058fce14828be51" +uuid = "2792f1a3-b283-48e8-9a74-f99dce5104f3" +version = "0.1.0" + +[[deps.Requires]] +deps = ["UUIDs"] +git-tree-sha1 = "838a3a4188e2ded87a4f9f184b4b0d78a1e91cb7" +uuid = "ae029012-a4dd-5104-9daa-d747884805df" +version = "1.3.0" + +[[deps.Rmath]] +deps = ["Random", "Rmath_jll"] +git-tree-sha1 = "f65dcb5fa46aee0cf9ed6274ccbd597adc49aa7b" +uuid = "79098fc4-a85e-5d69-aa6a-4863f24498fa" +version = "0.7.1" + +[[deps.Rmath_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "e60724fd3beea548353984dc61c943ecddb0e29a" +uuid = "f50d1b31-88e8-58de-be2c-1cc44531875f" +version = "0.4.3+0" + +[[deps.SCS]] +deps = ["MathOptInterface", "Requires", "SCS_jll", "SparseArrays"] +git-tree-sha1 = "0dfe49eaa058ce905a4199af379b8e411e6126e5" +uuid = "c946c3f1-0d1f-5ce8-9dea-7daa1f7e2d13" +version = "2.0.1" + + [deps.SCS.extensions] + SCSSCS_GPU_jllExt = ["SCS_GPU_jll"] + SCSSCS_MKL_jllExt = ["SCS_MKL_jll"] + + [deps.SCS.weakdeps] + SCS_GPU_jll = "af6e375f-46ec-5fa0-b791-491b0dfa44a4" + SCS_MKL_jll = "3f2553a9-4106-52be-b7dd-865123654657" + +[[deps.SCS_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "LLVMOpenMP_jll", "Libdl", "OpenBLAS32_jll"] +git-tree-sha1 = "668bcf4b25cf992564321ccb70b205f9a7487cfa" +uuid = "f4f2fc5b-1d94-523c-97ea-2ab488bedf4b" +version = "3.2.6+0" + +[[deps.SHA]] +uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" +version = "0.7.0" + +[[deps.SIMDTypes]] +git-tree-sha1 = "330289636fb8107c5f32088d2741e9fd7a061a5c" +uuid = "94e857df-77ce-4151-89e5-788b33177be4" +version = "0.1.0" + +[[deps.SLEEFPirates]] +deps = ["IfElse", "Static", "VectorizationBase"] +git-tree-sha1 = "456f610ca2fbd1c14f5fcf31c6bfadc55e7d66e0" +uuid = "476501e8-09a2-5ece-8869-fb82de89a1fa" +version = "0.6.43" + +[[deps.ScientificTypesBase]] +git-tree-sha1 = "a8e18eb383b5ecf1b5e6fc237eb39255044fd92b" +uuid = "30f210dd-8aff-4c5f-94ba-8e64358c1161" +version = "3.0.0" + +[[deps.ScikitLearn]] +deps = ["Compat", "Conda", "DataFrames", "Distributed", "IterTools", "LinearAlgebra", "MacroTools", "Parameters", "Printf", "PyCall", "Random", "ScikitLearnBase", "SparseArrays", "StatsBase", "VersionParsing"] +git-tree-sha1 = "3df098033358431591827bb86cada0bed744105a" +uuid = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +version = "0.7.0" + +[[deps.ScikitLearnBase]] +deps = ["LinearAlgebra", "Random", "Statistics"] +git-tree-sha1 = "7877e55c1523a4b336b433da39c8e8c08d2f221f" +uuid = "6e75b9c4-186b-50bd-896f-2d2496a4843e" +version = "0.5.0" + +[[deps.SentinelArrays]] +deps = ["Dates", "Random"] +git-tree-sha1 = "d0553ce4031a081cc42387a9b9c8441b7d99f32d" +uuid = "91c51154-3ec4-41a3-a24f-3f23e20d615c" +version = "1.4.7" + +[[deps.Serialization]] +uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" +version = "1.11.0" + +[[deps.Setfield]] +deps = ["ConstructionBase", "Future", "MacroTools", "StaticArraysCore"] +git-tree-sha1 = "e2cc6d8c88613c05e1defb55170bf5ff211fbeac" +uuid = "efcf1570-3423-57d1-acb7-fd33fddbac46" +version = "1.1.1" + +[[deps.SharedArrays]] +deps = ["Distributed", "Mmap", "Random", "Serialization"] +uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383" +version = "1.11.0" + +[[deps.Sockets]] +uuid = "6462fe0b-24de-5631-8697-dd941f90decc" +version = "1.11.0" + +[[deps.SortingAlgorithms]] +deps = ["DataStructures"] +git-tree-sha1 = "66e0a8e672a0bdfca2c3f5937efb8538b9ddc085" +uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c" +version = "1.2.1" + +[[deps.SparseArrays]] +deps = ["Libdl", "LinearAlgebra", "Random", "Serialization", "SuiteSparse_jll"] +uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +version = "1.11.0" + +[[deps.SpecialFunctions]] +deps = ["IrrationalConstants", "LogExpFunctions", "OpenLibm_jll", "OpenSpecFun_jll"] +git-tree-sha1 = "2f5d4697f21388cbe1ff299430dd169ef97d7e14" +uuid = "276daf66-3868-5448-9aa4-cd146d93841b" +version = "2.4.0" +weakdeps = ["ChainRulesCore"] + + [deps.SpecialFunctions.extensions] + SpecialFunctionsChainRulesCoreExt = "ChainRulesCore" + +[[deps.SplittablesBase]] +deps = ["Setfield", "Test"] +git-tree-sha1 = "e08a62abc517eb79667d0a29dc08a3b589516bb5" +uuid = "171d559e-b47b-412a-8079-5efa626c420e" +version = "0.1.15" + +[[deps.StableRNGs]] +deps = ["Random"] +git-tree-sha1 = "83e6cce8324d49dfaf9ef059227f91ed4441a8e5" +uuid = "860ef19b-820b-49d6-a774-d7a799459cd3" +version = "1.0.2" + +[[deps.Static]] +deps = ["CommonWorldInvalidations", "IfElse", "PrecompileTools"] +git-tree-sha1 = "87d51a3ee9a4b0d2fe054bdd3fc2436258db2603" +uuid = "aedffcd0-7271-4cad-89d0-dc628f76c6d3" +version = "1.1.1" + +[[deps.StaticArrayInterface]] +deps = ["ArrayInterface", "Compat", "IfElse", "LinearAlgebra", "PrecompileTools", "Static"] +git-tree-sha1 = "96381d50f1ce85f2663584c8e886a6ca97e60554" +uuid = "0d7ed370-da01-4f52-bd93-41d350b8b718" +version = "1.8.0" +weakdeps = ["OffsetArrays", "StaticArrays"] + + [deps.StaticArrayInterface.extensions] + StaticArrayInterfaceOffsetArraysExt = "OffsetArrays" + StaticArrayInterfaceStaticArraysExt = "StaticArrays" + +[[deps.StaticArrays]] +deps = ["LinearAlgebra", "PrecompileTools", "Random", "StaticArraysCore"] +git-tree-sha1 = "777657803913ffc7e8cc20f0fd04b634f871af8f" +uuid = "90137ffa-7385-5640-81b9-e52037218182" +version = "1.9.8" +weakdeps = ["ChainRulesCore", "Statistics"] + + [deps.StaticArrays.extensions] + StaticArraysChainRulesCoreExt = "ChainRulesCore" + StaticArraysStatisticsExt = "Statistics" + +[[deps.StaticArraysCore]] +git-tree-sha1 = "192954ef1208c7019899fbf8049e717f92959682" +uuid = "1e83bf80-4336-4d27-bf5d-d5a4f845583c" +version = "1.4.3" + +[[deps.StatisticalTraits]] +deps = ["ScientificTypesBase"] +git-tree-sha1 = "542d979f6e756f13f862aa00b224f04f9e445f11" +uuid = "64bff920-2084-43da-a3e6-9bb72801c0c9" +version = "3.4.0" + +[[deps.Statistics]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "ae3bb1eb3bba077cd276bc5cfc337cc65c3075c0" +uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +version = "1.11.1" +weakdeps = ["SparseArrays"] + + [deps.Statistics.extensions] + SparseArraysExt = ["SparseArrays"] + +[[deps.StatsAPI]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "1ff449ad350c9c4cbc756624d6f8a8c3ef56d3ed" +uuid = "82ae8749-77ed-4fe6-ae5f-f523153014b0" +version = "1.7.0" + +[[deps.StatsBase]] +deps = ["DataAPI", "DataStructures", "LinearAlgebra", "LogExpFunctions", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics", "StatsAPI"] +git-tree-sha1 = "d1bf48bfcc554a3761a133fe3a9bb01488e06916" +uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" +version = "0.33.21" + +[[deps.StatsFuns]] +deps = ["ChainRulesCore", "InverseFunctions", "IrrationalConstants", "LogExpFunctions", "Reexport", "Rmath", "SpecialFunctions"] +git-tree-sha1 = "5950925ff997ed6fb3e985dcce8eb1ba42a0bbe7" +uuid = "4c63d2b9-4356-54db-8cca-17b64c39e42c" +version = "0.9.18" + +[[deps.StringManipulation]] +deps = ["PrecompileTools"] +git-tree-sha1 = "a6b1675a536c5ad1a60e5a5153e1fee12eb146e3" +uuid = "892a3eda-7b42-436c-8928-eab12a02cf0e" +version = "0.4.0" + +[[deps.StyledStrings]] +uuid = "f489334b-da3d-4c2e-b8f0-e476e12c162b" +version = "1.11.0" + +[[deps.SuiteSparse]] +deps = ["Libdl", "LinearAlgebra", "Serialization", "SparseArrays"] +uuid = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9" + +[[deps.SuiteSparse_jll]] +deps = ["Artifacts", "Libdl", "libblastrampoline_jll"] +uuid = "bea87d4a-7f5b-5778-9afe-8cc45184846c" +version = "7.7.0+0" + +[[deps.TOML]] +deps = ["Dates"] +uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" +version = "1.0.3" + +[[deps.TableTraits]] +deps = ["IteratorInterfaceExtensions"] +git-tree-sha1 = "c06b2f539df1c6efa794486abfb6ed2022561a39" +uuid = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c" +version = "1.0.1" + +[[deps.Tables]] +deps = ["DataAPI", "DataValueInterfaces", "IteratorInterfaceExtensions", "OrderedCollections", "TableTraits"] +git-tree-sha1 = "598cd7c1f68d1e205689b1c2fe65a9f85846f297" +uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" +version = "1.12.0" + +[[deps.Tar]] +deps = ["ArgTools", "SHA"] +uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" +version = "1.10.0" + +[[deps.TerminalLoggers]] +deps = ["LeftChildRightSiblingTrees", "Logging", "Markdown", "Printf", "ProgressLogging", "UUIDs"] +git-tree-sha1 = "f133fab380933d042f6796eda4e130272ba520ca" +uuid = "5d786b92-1e48-4d6f-9151-6b4477ca9bed" +version = "0.1.7" + +[[deps.Test]] +deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] +uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +version = "1.11.0" + +[[deps.ThreadingUtilities]] +deps = ["ManualMemory"] +git-tree-sha1 = "eda08f7e9818eb53661b3deb74e3159460dfbc27" +uuid = "8290d209-cae3-49c0-8002-c8c24d57dab5" +version = "0.5.2" + +[[deps.TranscodingStreams]] +git-tree-sha1 = "0c45878dcfdcfa8480052b6ab162cdd138781742" +uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa" +version = "0.11.3" + +[[deps.Transducers]] +deps = ["Adapt", "ArgCheck", "BangBang", "Baselet", "CompositionsBase", "ConstructionBase", "DefineSingletons", "Distributed", "InitialValues", "Logging", "Markdown", "MicroCollections", "Requires", "Setfield", "SplittablesBase", "Tables"] +git-tree-sha1 = "3064e780dbb8a9296ebb3af8f440f787bb5332af" +uuid = "28d57a85-8fef-5791-bfe6-a80928e7c999" +version = "0.4.80" + + [deps.Transducers.extensions] + TransducersBlockArraysExt = "BlockArrays" + TransducersDataFramesExt = "DataFrames" + TransducersLazyArraysExt = "LazyArrays" + TransducersOnlineStatsBaseExt = "OnlineStatsBase" + TransducersReferenceablesExt = "Referenceables" + + [deps.Transducers.weakdeps] + BlockArrays = "8e7c35d0-a365-5155-bbbb-fb81a777f24e" + DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" + LazyArrays = "5078a376-72f3-5289-bfd5-ec5146d43c02" + OnlineStatsBase = "925886fa-5bf2-5e8e-b522-a9147a512338" + Referenceables = "42d2dcc6-99eb-4e98-b66c-637b7d73030e" + +[[deps.Tullio]] +deps = ["DiffRules", "LinearAlgebra", "Requires"] +git-tree-sha1 = "6d476962ba4e435d7f4101a403b1d3d72afe72f3" +uuid = "bc48ee85-29a4-5162-ae0b-a64e1601d4bc" +version = "0.3.7" + + [deps.Tullio.extensions] + TullioCUDAExt = "CUDA" + TullioChainRulesCoreExt = "ChainRulesCore" + TullioFillArraysExt = "FillArrays" + TullioTrackerExt = "Tracker" + + [deps.Tullio.weakdeps] + CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" + ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" + FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" + Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" + +[[deps.UUIDs]] +deps = ["Random", "SHA"] +uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" +version = "1.11.0" + +[[deps.UnPack]] +git-tree-sha1 = "387c1f73762231e86e0c9c5443ce3b4a0a9a0c2b" +uuid = "3a884ed6-31ef-47d7-9d2a-63182c4928ed" +version = "1.0.2" + +[[deps.Unicode]] +uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" +version = "1.11.0" + +[[deps.VectorizationBase]] +deps = ["ArrayInterface", "CPUSummary", "HostCPUFeatures", "IfElse", "LayoutPointers", "Libdl", "LinearAlgebra", "SIMDTypes", "Static", "StaticArrayInterface"] +git-tree-sha1 = "4ab62a49f1d8d9548a1c8d1a75e5f55cf196f64e" +uuid = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f" +version = "0.21.71" + +[[deps.VersionParsing]] +git-tree-sha1 = "58d6e80b4ee071f5efd07fda82cb9fbe17200868" +uuid = "81def892-9a0e-5fdd-b105-ffc91e053289" +version = "1.3.0" + +[[deps.WoodburyMatrices]] +deps = ["LinearAlgebra", "SparseArrays"] +git-tree-sha1 = "c1a7aa6219628fcd757dede0ca95e245c5cd9511" +uuid = "efce3f68-66dc-5838-9240-27a6d6f5f9b6" +version = "1.0.0" + +[[deps.XML2_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Libiconv_jll", "Zlib_jll"] +git-tree-sha1 = "6a451c6f33a176150f315726eba8b92fbfdb9ae7" +uuid = "02c8fc9c-b97f-50b9-bbe4-9be30ff0a78a" +version = "2.13.4+0" + +[[deps.XSLT_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Libgcrypt_jll", "Libgpg_error_jll", "Libiconv_jll", "XML2_jll", "Zlib_jll"] +git-tree-sha1 = "a54ee957f4c86b526460a720dbc882fa5edcbefc" +uuid = "aed1982a-8fda-507f-9586-7b0439959a61" +version = "1.1.41+0" + +[[deps.Xorg_libX11_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Xorg_libxcb_jll", "Xorg_xtrans_jll"] +git-tree-sha1 = "afead5aba5aa507ad5a3bf01f58f82c8d1403495" +uuid = "4f6342f7-b3d2-589e-9d20-edeb45f2b2bc" +version = "1.8.6+0" + +[[deps.Xorg_libXau_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "6035850dcc70518ca32f012e46015b9beeda49d8" +uuid = "0c0b7dd1-d40b-584c-a123-a41640f87eec" +version = "1.0.11+0" + +[[deps.Xorg_libXdmcp_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "34d526d318358a859d7de23da945578e8e8727b7" +uuid = "a3789734-cfe1-5b06-b2d0-1dd0d9d62d05" +version = "1.1.4+0" + +[[deps.Xorg_libXext_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Xorg_libX11_jll"] +git-tree-sha1 = "d2d1a5c49fae4ba39983f63de6afcbea47194e85" +uuid = "1082639a-0dae-5f34-9b06-72781eeb8cb3" +version = "1.3.6+0" + +[[deps.Xorg_libXrender_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Xorg_libX11_jll"] +git-tree-sha1 = "47e45cd78224c53109495b3e324df0c37bb61fbe" +uuid = "ea2f1a96-1ddc-540d-b46f-429655e07cfa" +version = "0.9.11+0" + +[[deps.Xorg_libpthread_stubs_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "8fdda4c692503d44d04a0603d9ac0982054635f9" +uuid = "14d82f49-176c-5ed1-bb49-ad3f5cbd8c74" +version = "0.1.1+0" + +[[deps.Xorg_libxcb_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "XSLT_jll", "Xorg_libXau_jll", "Xorg_libXdmcp_jll", "Xorg_libpthread_stubs_jll"] +git-tree-sha1 = "bcd466676fef0878338c61e655629fa7bbc69d8e" +uuid = "c7cfdc94-dc32-55de-ac96-5a1b8d977c5b" +version = "1.17.0+0" + +[[deps.Xorg_xtrans_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "e92a1a012a10506618f10b7047e478403a046c77" +uuid = "c5fb5394-a638-5e4d-96e5-b29de1b5cf10" +version = "1.5.0+0" + +[[deps.Zlib_jll]] +deps = ["Libdl"] +uuid = "83775a58-1f1d-513f-b197-d71354ab007a" +version = "1.2.13+1" + +[[deps.libaom_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "1827acba325fdcdf1d2647fc8d5301dd9ba43a9d" +uuid = "a4ae2306-e953-59d6-aa16-d00cac43593b" +version = "3.9.0+0" + +[[deps.libass_jll]] +deps = ["Artifacts", "Bzip2_jll", "FreeType2_jll", "FriBidi_jll", "HarfBuzz_jll", "JLLWrappers", "Libdl", "Zlib_jll"] +git-tree-sha1 = "e17c115d55c5fbb7e52ebedb427a0dca79d4484e" +uuid = "0ac62f75-1d6f-5e53-bd7c-93b484bb37c0" +version = "0.15.2+0" + +[[deps.libblastrampoline_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "8e850b90-86db-534c-a0d3-1478176c7d93" +version = "5.11.0+0" + +[[deps.libfdk_aac_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "8a22cf860a7d27e4f3498a0fe0811a7957badb38" +uuid = "f638f0a6-7fb0-5443-88ba-1cc74229b280" +version = "2.0.3+0" + +[[deps.libpng_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Zlib_jll"] +git-tree-sha1 = "b70c870239dc3d7bc094eb2d6be9b73d27bef280" +uuid = "b53b4c65-9356-5827-b1ea-8c7a1a84506f" +version = "1.6.44+0" + +[[deps.libvorbis_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Ogg_jll", "Pkg"] +git-tree-sha1 = "490376214c4721cdaca654041f635213c6165cb3" +uuid = "f27f6e37-5d2b-51aa-960f-b287f2bc3b7a" +version = "1.3.7+2" + +[[deps.nghttp2_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" +version = "1.59.0+0" + +[[deps.oneTBB_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "7d0ea0f4895ef2f5cb83645fa689e52cb55cf493" +uuid = "1317d2d5-d96f-522e-a858-c73665f53c3e" +version = "2021.12.0+0" + +[[deps.p7zip_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" +version = "17.4.0+2" + +[[deps.x264_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "4fea590b89e6ec504593146bf8b988b2c00922b2" +uuid = "1270edf5-f2f9-52d2-97e9-ab00b5d0237a" +version = "2021.5.5+0" + +[[deps.x265_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "ee567a171cce03570d77ad3a43e90218e38937a9" +uuid = "dfaa095f-4041-5dcd-9319-2fabd8486b76" +version = "3.5.0+0" diff --git a/docs/make.jl b/docs/make.jl index 4b3260e0c..fef114079 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -6,17 +6,17 @@ using Documenter #---------- examples = [ + "Simple example walkthrough" => "examples/sinusoid_example.md", + "Lorenz example" => "examples/lorenz_example.md", + "Turbulence example" => "examples/edmf_example.md", + "Cloudy example" => "examples/Cloudy_example.md", "Emulator testing" => [ "examples/emulators/regression_2d_2d.md", "examples/emulators/lorenz_integrator_3d_3d.md", - "examples/emulators/ishigami_3d_1d.md", + "examples/emulators/global_sens_analysis.md", ], - "Lorenz example" => "examples/lorenz_example.md", - "Turbulence example" => "examples/edmf_example.md", ] -design = ["AbstractMCMC sampling API" => "API/AbstractMCMC.md"] - api = [ "CalibrateEmulateSample" => [ "Emulators" => [ @@ -29,18 +29,21 @@ api = [ ], ] +emulate = [ + "Emulator" => "emulate.md", + "Gaussian Process" => "GaussianProcessEmulator.md", + "Random Features" => "random_feature_emulator.md", +] pages = [ "Home" => "index.md", "Installation instructions" => "installation_instructions.md", "Contributing" => "contributing.md", - "Calibrate" => "calibrate.md", - "Emulate" => "emulate.md", "Examples" => examples, - "Gaussian Process" => "GaussianProcessEmulator.md", - "Random Features" => "random_feature_emulator.md", - "Package Design" => design, - "API" => api, + "Calibrate" => "calibrate.md", + "Emulate" => emulate, + "Sample" => "sample.md", "Glossary" => "glossary.md", + "API" => api, ] #---------- diff --git a/docs/src/API/Emulators.md b/docs/src/API/Emulators.md index 12fcf7b25..6ea3d19c1 100644 --- a/docs/src/API/Emulators.md +++ b/docs/src/API/Emulators.md @@ -7,6 +7,7 @@ CurrentModule = CalibrateEmulateSample.Emulators ```@docs Emulator optimize_hyperparameters!(::Emulator) +Emulator(::MachineLearningTool, ::PairedDataContainer{FT}) where {FT <: AbstractFloat} predict normalize standardize diff --git a/docs/src/API/GaussianProcess.md b/docs/src/API/GaussianProcess.md index 3b1058352..8778c8353 100644 --- a/docs/src/API/GaussianProcess.md +++ b/docs/src/API/GaussianProcess.md @@ -8,6 +8,13 @@ CurrentModule = CalibrateEmulateSample.Emulators GaussianProcessesPackage PredictionType GaussianProcess +GaussianProcess( + ::GPPkg; + ::Union{K, KPy, Nothing}, + ::Any, + ::FT, + ::PredictionType, +) where {GPPkg <: GaussianProcessesPackage, K <: Kernel, KPy <: PyObject, FT <: AbstractFloat} build_models!(::GaussianProcess{GPJL}, ::PairedDataContainer{FT}) where {FT <: AbstractFloat} optimize_hyperparameters!(::GaussianProcess{GPJL}) predict(::GaussianProcess{GPJL}, ::AbstractMatrix{FT}) where {FT <: AbstractFloat} diff --git a/docs/src/API/RandomFeatures.md b/docs/src/API/RandomFeatures.md index d4bda4c95..7e112f479 100644 --- a/docs/src/API/RandomFeatures.md +++ b/docs/src/API/RandomFeatures.md @@ -43,7 +43,7 @@ get_batch_sizes get_n_features get_input_dim get_output_dim -get_rng +EKP.get_rng get_kernel_structure get_feature_decomposition get_optimizer_options diff --git a/docs/src/GaussianProcessEmulator.md b/docs/src/GaussianProcessEmulator.md index 72022024c..cbb5e8b22 100644 --- a/docs/src/GaussianProcessEmulator.md +++ b/docs/src/GaussianProcessEmulator.md @@ -16,7 +16,8 @@ A useful resource to learn about Gaussian processes is [Rasmussen and Williams ( `CalibrateEmulateSample.jl` allows the Gaussian process emulator to be built using either [`GaussianProcesses.jl`](https://stor-i.github.io/GaussianProcesses.jl/latest/) -or [`ScikitLearn.jl`](https://scikitlearnjl.readthedocs.io/en/latest/models/#scikitlearn-models). +or [`ScikitLearn.jl`](https://scikitlearnjl.readthedocs.io/en/latest/models/#scikitlearn-models). Different packages may be optimized for different settings, we recommend users give both a try, and checkout the individual package documentation to make a choice for their problem setting. + To use `GaussianProcesses.jl`, define the package type as ```julia gppackage = Emulators.GPJL() @@ -27,7 +28,6 @@ To use `ScikitLearn.jl`, define the package type as gppackage = Emulators.SKLJL() ``` - Initialize a basic Gaussian Process with ```julia gauss_proc = GaussianProcess(gppackage) @@ -103,10 +103,9 @@ gauss_proc = GaussianProcess( ``` You can also combine multiple ScikitLearn kernels via linear operations in the same way as above. -# Learning the noise +# Learning additional white noise -Often it is useful to learn the noise of the data by adding a white noise kernel. This is added with the -Boolean keyword `noise_learn` when initializing the Gaussian process. The default is true. +Often it is useful to learn the discrepancy between the Gaussian process prediction and the data, by learning additional white noise. Though one often knows, and provides, the discrepancy between the true model and data with an observational noise covariance; the additional white kernel can help account for approximation error from the selected Gaussian process kernel and the true model. This is added with the Boolean keyword `noise_learn` when initializing the Gaussian process. The default is true. ```julia gauss_proc = GaussianProcess( @@ -118,8 +117,8 @@ When `noise_learn` is true, an additional white noise kernel is added to the ker across all parameter values, including the training data. The scale parameters of the white noise kernel are learned in `optimize_hyperparameters!(emulator)`. -You may not need to learn the noise if you already have a good estimate of the noise from your training data. -When `noise_learn` is false, additional regularization is added for stability. +You may not need to learn the noise if you already have a good estimate of the noise from your training data, and if the Gaussian process kernel is well specified. +When `noise_learn` is false, a small additional regularization is added for stability. The default value is `1e-3` but this can be chosen through the optional argument `alg_reg_noise`: ```julia diff --git a/docs/src/assets/GFunction_sens_RF-scalar_10.png b/docs/src/assets/GFunction_sens_RF-scalar_10.png new file mode 100644 index 000000000..2a7adb40f Binary files /dev/null and b/docs/src/assets/GFunction_sens_RF-scalar_10.png differ diff --git a/docs/src/assets/GFunction_sens_RF-scalar_3.png b/docs/src/assets/GFunction_sens_RF-scalar_3.png new file mode 100644 index 000000000..754280d1c Binary files /dev/null and b/docs/src/assets/GFunction_sens_RF-scalar_3.png differ diff --git a/docs/src/assets/GFunction_sens_RF-scalar_6.png b/docs/src/assets/GFunction_sens_RF-scalar_6.png new file mode 100644 index 000000000..db6ed57fe Binary files /dev/null and b/docs/src/assets/GFunction_sens_RF-scalar_6.png differ diff --git a/docs/src/assets/GFunction_slices_RF-scalar_10.png b/docs/src/assets/GFunction_slices_RF-scalar_10.png new file mode 100644 index 000000000..08d0bd81e Binary files /dev/null and b/docs/src/assets/GFunction_slices_RF-scalar_10.png differ diff --git a/docs/src/assets/GFunction_slices_RF-scalar_3.png b/docs/src/assets/GFunction_slices_RF-scalar_3.png new file mode 100644 index 000000000..727f2a594 Binary files /dev/null and b/docs/src/assets/GFunction_slices_RF-scalar_3.png differ diff --git a/docs/src/assets/GFunction_slices_RF-scalar_6.png b/docs/src/assets/GFunction_slices_RF-scalar_6.png new file mode 100644 index 000000000..25ff30941 Binary files /dev/null and b/docs/src/assets/GFunction_slices_RF-scalar_6.png differ diff --git a/docs/src/assets/Lorenz-posterior-RF.png b/docs/src/assets/Lorenz-posterior-RF.png new file mode 100644 index 000000000..6f098e4a8 Binary files /dev/null and b/docs/src/assets/Lorenz-posterior-RF.png differ diff --git a/docs/src/assets/Lorenz-posterior.png b/docs/src/assets/Lorenz-posterior.png new file mode 100644 index 000000000..8c1bc5f1a Binary files /dev/null and b/docs/src/assets/Lorenz-posterior.png differ diff --git a/docs/src/assets/Lorenz-prior.png b/docs/src/assets/Lorenz-prior.png new file mode 100644 index 000000000..433546954 Binary files /dev/null and b/docs/src/assets/Lorenz-prior.png differ diff --git a/docs/src/assets/Lorenz-training-points.png b/docs/src/assets/Lorenz-training-points.png new file mode 100644 index 000000000..b6fafa3fd Binary files /dev/null and b/docs/src/assets/Lorenz-training-points.png differ diff --git a/docs/src/assets/cloudy_ces_schematic.png b/docs/src/assets/cloudy_ces_schematic.png new file mode 100644 index 000000000..f06fcfb6b Binary files /dev/null and b/docs/src/assets/cloudy_ces_schematic.png differ diff --git a/docs/src/assets/cloudy_eki.gif b/docs/src/assets/cloudy_eki.gif new file mode 100644 index 000000000..72230b8ae Binary files /dev/null and b/docs/src/assets/cloudy_eki.gif differ diff --git a/docs/src/assets/cloudy_marginal_posterior_constr_gp-gpjl_N0.png b/docs/src/assets/cloudy_marginal_posterior_constr_gp-gpjl_N0.png new file mode 100644 index 000000000..4e96acdd3 Binary files /dev/null and b/docs/src/assets/cloudy_marginal_posterior_constr_gp-gpjl_N0.png differ diff --git a/docs/src/assets/cloudy_marginal_posterior_constr_gp-gpjl_k.png b/docs/src/assets/cloudy_marginal_posterior_constr_gp-gpjl_k.png new file mode 100644 index 000000000..5e0777a9f Binary files /dev/null and b/docs/src/assets/cloudy_marginal_posterior_constr_gp-gpjl_k.png differ diff --git a/docs/src/assets/cloudy_marginal_posterior_constr_gp-gpjl_theta.png b/docs/src/assets/cloudy_marginal_posterior_constr_gp-gpjl_theta.png new file mode 100644 index 000000000..8caf094c9 Binary files /dev/null and b/docs/src/assets/cloudy_marginal_posterior_constr_gp-gpjl_theta.png differ diff --git a/docs/src/assets/cloudy_pairplot_posterior_constr_gp-gpjl.png b/docs/src/assets/cloudy_pairplot_posterior_constr_gp-gpjl.png new file mode 100644 index 000000000..5da7eb28e Binary files /dev/null and b/docs/src/assets/cloudy_pairplot_posterior_constr_gp-gpjl.png differ diff --git a/docs/src/assets/cloudy_pairplot_posterior_constr_rf-nosvd-nonsep.png b/docs/src/assets/cloudy_pairplot_posterior_constr_rf-nosvd-nonsep.png new file mode 100644 index 000000000..38fefefd6 Binary files /dev/null and b/docs/src/assets/cloudy_pairplot_posterior_constr_rf-nosvd-nonsep.png differ diff --git a/docs/src/assets/cloudy_pairplot_posterior_constr_rf-scalar.png b/docs/src/assets/cloudy_pairplot_posterior_constr_rf-scalar.png new file mode 100644 index 000000000..e2e4bf43a Binary files /dev/null and b/docs/src/assets/cloudy_pairplot_posterior_constr_rf-scalar.png differ diff --git a/docs/src/assets/cloudy_priors.png b/docs/src/assets/cloudy_priors.png new file mode 100644 index 000000000..dd28b0d82 Binary files /dev/null and b/docs/src/assets/cloudy_priors.png differ diff --git a/docs/src/assets/edmf_nonsep_posterior_2d.png b/docs/src/assets/edmf_nonsep_posterior_2d.png new file mode 100644 index 000000000..22fb82ba6 Binary files /dev/null and b/docs/src/assets/edmf_nonsep_posterior_2d.png differ diff --git a/docs/src/assets/edmf_nonsep_posterior_5d.png b/docs/src/assets/edmf_nonsep_posterior_5d.png new file mode 100644 index 000000000..6cc1d509a Binary files /dev/null and b/docs/src/assets/edmf_nonsep_posterior_5d.png differ diff --git a/docs/src/assets/sinusoid_GP_emulator_contours.png b/docs/src/assets/sinusoid_GP_emulator_contours.png new file mode 100644 index 000000000..f16d4e083 Binary files /dev/null and b/docs/src/assets/sinusoid_GP_emulator_contours.png differ diff --git a/docs/src/assets/sinusoid_GP_errors_contours.png b/docs/src/assets/sinusoid_GP_errors_contours.png new file mode 100644 index 000000000..8cf5a8211 Binary files /dev/null and b/docs/src/assets/sinusoid_GP_errors_contours.png differ diff --git a/docs/src/assets/sinusoid_MCMC_hist_GP.png b/docs/src/assets/sinusoid_MCMC_hist_GP.png new file mode 100644 index 000000000..3f87b4827 Binary files /dev/null and b/docs/src/assets/sinusoid_MCMC_hist_GP.png differ diff --git a/docs/src/assets/sinusoid_MCMC_hist_RF.png b/docs/src/assets/sinusoid_MCMC_hist_RF.png new file mode 100644 index 000000000..0aa9db9fd Binary files /dev/null and b/docs/src/assets/sinusoid_MCMC_hist_RF.png differ diff --git a/docs/src/assets/sinusoid_RF_emulator_contours.png b/docs/src/assets/sinusoid_RF_emulator_contours.png new file mode 100644 index 000000000..04b484021 Binary files /dev/null and b/docs/src/assets/sinusoid_RF_emulator_contours.png differ diff --git a/docs/src/assets/sinusoid_RF_errors_contours.png b/docs/src/assets/sinusoid_RF_errors_contours.png new file mode 100644 index 000000000..b864e1d78 Binary files /dev/null and b/docs/src/assets/sinusoid_RF_errors_contours.png differ diff --git a/docs/src/assets/sinusoid_eki_pairs.png b/docs/src/assets/sinusoid_eki_pairs.png new file mode 100644 index 000000000..451d31996 Binary files /dev/null and b/docs/src/assets/sinusoid_eki_pairs.png differ diff --git a/docs/src/assets/sinusoid_groundtruth_contours.png b/docs/src/assets/sinusoid_groundtruth_contours.png new file mode 100644 index 000000000..289044581 Binary files /dev/null and b/docs/src/assets/sinusoid_groundtruth_contours.png differ diff --git a/docs/src/assets/sinusoid_posterior_GP.png b/docs/src/assets/sinusoid_posterior_GP.png new file mode 100644 index 000000000..046765e89 Binary files /dev/null and b/docs/src/assets/sinusoid_posterior_GP.png differ diff --git a/docs/src/assets/sinusoid_posterior_RF.png b/docs/src/assets/sinusoid_posterior_RF.png new file mode 100644 index 000000000..2f022118e Binary files /dev/null and b/docs/src/assets/sinusoid_posterior_RF.png differ diff --git a/docs/src/assets/sinusoid_prior.png b/docs/src/assets/sinusoid_prior.png new file mode 100644 index 000000000..ae7e41d1f Binary files /dev/null and b/docs/src/assets/sinusoid_prior.png differ diff --git a/docs/src/assets/sinusoid_true_vs_observed_signal.png b/docs/src/assets/sinusoid_true_vs_observed_signal.png new file mode 100644 index 000000000..c01062fda Binary files /dev/null and b/docs/src/assets/sinusoid_true_vs_observed_signal.png differ diff --git a/docs/src/calibrate.md b/docs/src/calibrate.md index f11d52585..7874430cc 100644 --- a/docs/src/calibrate.md +++ b/docs/src/calibrate.md @@ -15,3 +15,7 @@ Calibration can be performed using different ensemble Kalman processes: ensemble Documentation on how to construct an EnsembleKalmanProcess from the computer model and the data can be found in the EnsembleKalmanProcesses [docs](https://clima.github.io/EnsembleKalmanProcesses.jl/dev/ensemble_kalman_inversion/). +## Julia-free forward model + +One draw of our approach is that it does not require the forward map to be written in Julia. To aid construction of such a workflow, EnsembleKalmanProcesses.jl provides a documented example of a BASH workflow for the [sinusoid problem](https://clima.github.io/EnsembleKalmanProcesses.jl/dev/examples/sinusoid_example_toml/), with source code [here](https://github.com/CliMA/EnsembleKalmanProcesses.jl/tree/main/examples/SinusoidInterface). The forward map interacts with the calibration tools (EKP) only though TOML file reading an writing, and thus can be written in any language; for example, to be used with [slurm HPC scripts](https://clima.github.io/EnsembleKalmanProcesses.jl/dev/examples/ClimateMachine_example/), with source code [here](https://github.com/CliMA/EnsembleKalmanProcesses.jl/tree/main/examples/ClimateMachine). + diff --git a/docs/src/contributing.md b/docs/src/contributing.md index 25591b3c2..44941f27a 100644 --- a/docs/src/contributing.md +++ b/docs/src/contributing.md @@ -13,7 +13,7 @@ Thank you for considering contributing to `CalibrateEmulateSample`! We encourage - Improve documentation or comments if you found something hard to use; -- Implement a new feature if you need it. We strongly encourage opening an issue to make sure the administrators are on board before opening a PR with an unsolicited feature addition. +- Implement a new feature if you need it. We strongly encourage opening an issue to make sure the administrators are on board before opening a PR with an unsolicited feature addition. Examples could include implementing new [statistical emulators](@ref modular-interface), or implementing new data compression tools (beyond normalization, standardization and truncated SVD) ## Using `git` diff --git a/docs/src/emulate.md b/docs/src/emulate.md index 52a9db7a5..a36705369 100644 --- a/docs/src/emulate.md +++ b/docs/src/emulate.md @@ -29,11 +29,11 @@ The optional arguments above relate to the data processing. ### Emulator Training The emulator is trained when we combine the machine learning tool and the data into the `Emulator` above. -For any machine learning tool, we must also optimize the hyperparameters: +For any machine learning tool, hyperparameters are optimized. ```julia optimize_hyperparameters!(emulator) ``` -In the Lorenz example, this line learns the hyperparameters of the Gaussian process, which depend on the choice of [kernel](https://clima.github.io/CalibrateEmulateSample.jl/dev/GaussianProcessEmulator/#kernels). +For some machine learning packages however, this may be completed during construction automatically, and for others this will not. If automatic construction took place, the `optimize_hyperparameters!` line does not perform any new task, so may be safely called. In the Lorenz example, this line learns the hyperparameters of the Gaussian process, which depend on the choice of [kernel](https://clima.github.io/CalibrateEmulateSample.jl/dev/GaussianProcessEmulator/#kernels), and the choice of GP package. Predictions at new inputs can then be made using ```julia y, cov = Emulator.predict(emulator, new_inputs) @@ -66,23 +66,23 @@ This arises from the optional arguments We normalize the input data in a standard way by centering, and scaling with the empirical covariance - `standardize_outputs = true` (default: `false`) - `standardize_outputs_factors = factor_vector` (default: `nothing`) -To help with poor conditioning of the covariance matrix, users can also standardize each output dimension with by a multiplicative factor given by the elements of `factor_vector` +To help with poor conditioning of the covariance matrix, users can also standardize each output dimension with by a multiplicative factor given by the elements of `factor_vector`. -## Modular interface +## [Modular interface](@id modular-interface) -Each statistical emulator has the following supertype and methods: - -```julia -abstract type MachineLearningTool end -function build_models!(mlt, iopairs) -function optimize_hyperparameters!(mlt) -function predict(mlt, new_inputs) -``` -Add a new tool as follows: +Developers may contribute new tools by performing the following 1. Create `MyMLToolName.jl`, and include "MyMLToolName.jl" in `Emulators.jl` -2. Create a struct `MyMLTool <: MachineLearningTool` -3. Create these three methods to build, train, and predict with your tool (use `GaussianProcess.jl` as a guide) +2. Create a struct `MyMLTool <: MachineLearningTool`, containing any arguments or optimizer options +3. Create the following three methods to build, train, and predict with your tool (use `GaussianProcess.jl` as a guide) +``` +build_models!(mlt::MyMLTool, iopairs::PairedDataContainer) -> Nothing +optimize_hyperparameters!(mlt::MyMLTool, args...; kwargs...) -> Nothing +function predict(mlt::MyMLTool, new_inputs::Matrix; kwargs...) -> Matrix, Union{Matrix, Array{,3} +``` +!!! note "on dimensions of the predict inputs and outputs" + The `predict` method takes as input, an `input_dim`-by-`N_new` matrix. It return both a predicted mean and a predicted (co)variance at new inputs. + (i) for scalar-output methods relying on diagonalization, return `output_dim`-by-`N_new` matrices for mean and variance, + (ii) For vector-output methods, return `output_dim`-by-`N_new` for mean and `output_dim`-by-`output_dim`-by-`N_new` for covariances. -!!! note - The `predict` method currently needs to return both a predicted mean and a predicted (co)variance at new inputs, which are used in the *Sample* stage. +Please get in touch with our development team when contributing new statistical emulators, to help us ensure the smoothest interface with any new tools. diff --git a/docs/src/examples/Cloudy_example.md b/docs/src/examples/Cloudy_example.md new file mode 100755 index 000000000..f34f2e51b --- /dev/null +++ b/docs/src/examples/Cloudy_example.md @@ -0,0 +1,614 @@ +# [Learning the initial parameters of a droplet mass distribution in Cloudy](@id Cloudy-example) + +!!! info "How do I run this code?" + The full code is found in the [`examples/`](https://github.com/CliMA/CalibrateEmulateSample.jl/tree/main/examples) directory of the github repository + +!!! warn "version control for Cloudy" + Due to rapid developments in Cloudy, this example will not work with the latest version. It is known to work pinned to specific commit `b4fa7e3`, please add Cloudy to the example Project using command `add Cloudy#b4fa7e3` in `Pkg` to avoid errors. + + +This example is based on [Cloudy](https://github.com/CliMA/Cloudy.jl.git), a +microphysics model that simulates how cloud droplets collide and coalesce into +larger drops. Collision-coalescence is a crucial process for the formation of +rain. + +Cloudy is initialized with a mass distribution of the cloud droplets; this +distribution is then stepped forward in time, with more and more droplets +colliding and combining into bigger drops according to the droplet-droplet +interactions specified by a collision-coalescence kernel. The evolution of the +droplet distribution is completely determined by the shape of the initial distribution and the form of the kernel. + +We will show how Calibrate-Emulate-Sample (CES) can be used to learn the parameters of the initial cloud droplet mass distribution from observations of the moments of that mass distribution at a later time. + +Cloudy is used here in a "perfect model" (aka "known truth") setting, which +means that the "observations" are generated by Cloudy itself, by running it with +the true parameter values---in more realistic applications, one would use actual +measurements of cloud properties. + +The following schematic gives an overview of the example: + +![cloudy_schematic](../assets/cloudy_ces_schematic.png) + +The input to the CES algorithm consists of data ``y``, the observational covariance ``Γ``, and prior parameter distributions. The data, a vector of moments of the droplet mass distribution, are obtained by running Cloudy with the parameters set to their true values. The covariance is obtained from model output. The calibration stage is performed by an ensemble Kalman inversion (EKI), in which Cloudy has to be run once per iteration and for each ensemble member. The resulting input-output pairs ``\{\theta_i, \mathcal{G}(\theta_i)\}_i`` are used to train an emulator model. This emulator ``\widetilde{\mathcal{G}}(\theta)`` is cheap to evaluate; it replaces the original parameter-to-data map in the Markov chain Monte Carlo (MCMC) sampling, which produces (approximate) samples of the posterior parameter distribution. These samples are the final output of the CES algorithm. + +[This paper](https://agupubs.onlinelibrary.wiley.com/doi/full/10.1029/2022MS002994) describes Cloudy in much more detail and shows results of experiments using +CES to learn model parameters. + + +### Running the example + +`Cloudy_calibrate.jl` performs the calibration using ensemble Kalman +inversion; `Cloudy_emulate_sample.jl` fits an emulator and uses it to sample the +posterior distributions of the parameters. +Once Cloudy is installed, the example can be run from the julia REPL: +```julia +include("Cloudy_calibrate.jl") +include("Cloudy_emulate_sample.jl") +``` + +### Walkthrough of the code: `Cloudy_calibrate.jl` + +This file performs the calibration stage of CES. + +#### Import packagages and modules + +First we load standard packages, + +```julia +using Distributions +using StatsBase +using LinearAlgebra +using StatsPlots +using Plots +using Plots.PlotMeasures +using Random +using JLD2 +``` + +the Cloudy modules, + +```julia +using Cloudy +using Cloudy.ParticleDistributions +using Cloudy.KernelTensors + +# Import the module that runs Cloudy +include(joinpath(@__DIR__, "DynamicalModel.jl")) +``` + +and finally the EKP packages. + +```julia +using EnsembleKalmanProcesses +using EnsembleKalmanProcesses.ParameterDistributions +using EnsembleKalmanProcesses.DataContainers +using EnsembleKalmanProcesses.PlotRecipes +``` + +The module `DynamicalModel.jl` is the forward solver; it provides a function +that runs Cloudy with a given instance of the parameter vector we want to learn. + +```julia +include("DynamicalModel.jl") +``` + +#### Define the true parameters + +We define the true parameters---they are known here because this is a known +truth example. Knowing the true parameters will allow us to assess how well +Calibrate-Emulate-Sample has managed to solve the inverse problem. + +We will assume that the true particle mass distribution is a Gamma distribution, +which at time ``t = 0`` has parameters ``\phi_0 = [N_{0, 0}, k_0, \theta_0]``. We will then try to learn these parameters from observations ``y = [M_0(t_{end}), M_1(t_{end}), M_2(t_{end})]`` of the zeroth-, first-, and second-order moments +of the distribution at time ``t_{end} > 0`` (where `t_end = 1.0` in this +example). The true parameters ``\phi_{0, \text{true}}`` are defined as follows: + +```julia +param_names = ["N0", "θ", "k"] +n_params = length(param_names) +N0_true = 300.0 # number of particles (scaling factor for Gamma distribution) +θ_true = 1.5597 # scale parameter of Gamma distribution +k_true = 0.0817 # shape parameter of Gamma distribution +ϕ_true = [N0_true, θ_true, k_true] # true parameters in constrained space +dist_true = ParticleDistributions.GammaPrimitiveParticleDistribution(ϕ_true...) +``` + +#### Define priors for the parameters + +As we are working with Bayesian methods, we treat the parameters we want to +learn as random variables whose prior distributions we specify here. The prior +distributions will behave like an "initial guess" for the likely region of +parameter space where we expect the solution to be located. +We use `constrained_gaussian` to add the desired scale and bounds to the prior +distribution, in particular we place lower bounds to preserve positivity (and +numerical stability). + +```julia +# We choose to use normal distributions to represent the prior distributions of +# the parameters in the transformed (unconstrained) space. +prior_N0 = constrained_gaussian(param_names[1], 400, 300, 0.4 * N0_true, Inf) +prior_θ = constrained_gaussian(param_names[2], 1.0, 5.0, 1e-1, Inf) +prior_k = constrained_gaussian(param_names[3], 0.2, 1.0, 1e-4, Inf) +priors = combine_distributions([prior_N0, prior_θ, prior_k]) +``` + +The plot recipe for `ParameterDistribution` types allows for quick visualization of the priors: + +```julia +# Plot the priors +p = plot(priors, constrained=false) +``` + +![priors](../assets/cloudy_priors.png) + +#### Generate (synthetic) observations +We generate synthetic observations by running Cloudy 100 times with the true +parameters (i.e., with the true initial Gamma distribution of droplet masses) and then adding noise to simulate measurement error. + +```julia +dyn_model_settings_true = DynamicalModel.ModelSettings( + kernel, dist_true, moments, tspan) + +G_t = DynamicalModel.run_dyn_model(ϕ_true, dyn_model_settings_true) +n_samples = 100 +y_t = zeros(length(G_t), n_samples) +# In a perfect model setting, the "observational noise" represents the +# internal model variability. Since Cloudy is a purely deterministic model, +# there is no straightforward way of coming up with a covariance structure +# for this internal model variability. We decide to use a diagonal +# covariance, with entries (variances) largely proportional to their +# corresponding data values, G_t +Γy = convert(Array, Diagonal([100.0, 5.0, 30.0])) +μ = zeros(length(G_t)) + +# Add noise +for i in 1:n_samples + y_t[:, i] = G_t .+ rand(MvNormal(μ, Γy)) +end + +truth = Observation( + Dict( + "samples" => vec(mean(y_t, dims = 2)), + "covariances" => Γy, + "names" => data_names, + ) +) +``` + +#### Perform ensemble Kalman inversion + +We sample the initial ensemble from the prior and create the +`EnsembleKalmanProcess` object as an ensemble Kalman inversion (EKI) algorithm +using the `Inversion()` keyword. We also use the `DataMisfitController()` +learning rate scheduler. The number of ensemble members must be larger than the dimension of the parameter space to ensure a full rank ensemble covariance. + +```julia +N_ens = 50 # number of ensemble members +N_iter = 8 # number of EKI iterations +# initial parameters: n_params x N_ens +initial_params = construct_initial_ensemble(rng, priors, N_ens) +ekiobj = EnsembleKalmanProcess( + initial_params, + truth, + Inversion(), + scheduler=DataMisfitController() +) +``` + +We perform the inversion loop. Remember that within calls to `get_ϕ_final` the +EKP transformations are applied, thus the ensemble that is returned will be the +gamma distribution parameters that can be used directly to run the forward model, rather than their corresponding values in the unconstrained space where the EKI takes place. Each ensemble member is stored as a column and therefore for uses such as plotting one needs to reshape to the desired dimension. + +```julia +# Initialize a ParticleDistribution with dummy parameters. The parameters +# will then be set within `run_dyn_model` +dummy = ones(n_params) +dist_type = ParticleDistributions.GammaPrimitiveParticleDistribution(dummy...) +model_settings = DynamicalModel.ModelSettings(kernel, dist_type, moments, tspan) +# EKI iterations +for n in 1:N_iter + # Return transformed parameters in physical/constrained space + ϕ_n = get_ϕ_final(priors, ekiobj) + # Evaluate forward map + G_n = [DynamicalModel.run_dyn_model(ϕ_n[:, i], model_settings) for i in 1:N_ens] + G_ens = hcat(G_n...) # reformat + EnsembleKalmanProcesses.update_ensemble!(ekiobj, G_ens) +end +``` + +#### Visualize and store the results of the calibration + +The optimal parameter vector determined by the ensemble Kalman inversion is the +ensemble mean of the particles after the last iteration, which is printed to +standard output. An output directory is created, where a file +`cloudy_calibrate_results.jld2` is stored, which contains all parameters and +model output from the ensemble Kalman iterations (both as +`DataContainers.DataContainer` objects), the mean and one sample of the +synthetic observations, as well as the true prameters and their priors. In +addition, an animation is produced that shows the evolution of the ensemble of +particles over subsequent iterations of the optimization, both in the +computational (unconstrained) and physical (constrained) spaces. + + +![eki_iterations_animation](../assets/cloudy_eki.gif) + + +### Walkthrough of the code: `Cloudy_emulate_sample.jl` + +This file performs the emulation and sampling stages of the CES algorithm. + + +#### Import packages and modules + +First, we import some standard packages +```julia +using Distributions +using StatsBase +using GaussianProcesses +using LinearAlgebra +using Random +using JLD2 +ENV["GKSwstype"] = "100" +using CairoMakie, PairPlots +``` + +as well as the relevant CES packages needed to construct the emulators and +perform the Markov chain Monte Carlo (MCMC) sampling. We also need some functionality of `EnsembleKalmanProcesses.jl`. + +```julia +using CalibrateEmulateSample.Emulators +using CalibrateEmulateSample.MarkovChainMonteCarlo +using CalibrateEmulateSample.Utilities +using EnsembleKalmanProcesses +using EnsembleKalmanProcesses.ParameterDistributions +using EnsembleKalmanProcesses.DataContainers +``` + +#### Load the calibration results + +We will train the emulator on the input-output pairs we obtained during the +calibration. They are stored within the `EnsembleKalmanProcess` object +(`ekiobj`), which is loaded here together with the other information that was +saved in the calibration step. + +```julia +ekiobj = load(data_save_file)["eki"] +priors = load(data_save_file)["priors"] +truth_sample_mean = load(data_save_file)["truth_sample_mean"] +truth_sample = load(data_save_file)["truth_sample"] +# True parameters: +# - ϕ: in constrained space +# - θ: in unconstrained space +ϕ_true = load(data_save_file)["truth_input_constrained"] +θ_true = transform_constrained_to_unconstrained(priors, ϕ_true) +``` + +The user can choose one or more of the following three emulators: a Gaussian +Process (GP) emulator with `GaussianProcesses.jl` interface (`gp-gpjl`), a +scalar Random Feature (RF) interface (`rf-scalar`), and vector RF with a +nonseparable, nondiagonal kernel structure in the output space +(`rf-nosvd-nonsep`). See +[here](https://clima.github.io/CalibrateEmulateSample.jl/dev/examples/emulators/regression_2d_2d) for a complete overview of the available emulators. + +```julia +cases = [ + "rf-scalar", + "gp-gpjl", # Veeeery slow predictions + "rf-nosvd-nonsep" +] +``` + +We first define some settings for the two emulators, e.g., the prediction type +for the GP emulator, or the number of features and hyperparameter optimizer +options for the RF emulator. The docs for +[GPs](https://clima.github.io/CalibrateEmulateSample.jl/dev/GaussianProcessEmulator/) and [RFs](https://clima.github.io/CalibrateEmulateSample.jl/dev/random_feature_emulator/) explain the different options in more detail and provide some useful heuristics for how to customize the settings depending on the problem at hand. + +```julia +# These settings are the same for all Gaussian Process cases +pred_type = YType() # we want to predict data + +# These settings are the same for all Random Feature cases +n_features = 400 +nugget = 1e-8 +optimizer_options = Dict( + "verbose" => true, + "scheduler" => DataMisfitController(terminate_at = 100.0), + "cov_sample_multiplier" => 1.0, + "n_iteration" => 20, +) +``` + +Emulation is performed through the construction of an `Emulator` object from the +following components: +* a wrapper for the machine learning tool (`mlt`) to be used as emulator +* the input-output pairs on which the emulator will be trained +* optional arguments specifying data processing and dimensionality reduction + functionality + +For `gp-gpjl`, this looks as follows: +```julia +gppackage = GPJL() +# Kernel is the sum of a squared exponential (SE), Matérn 5/2, and white noise +gp_kernel = SE(1.0, 1.0) + Mat52Ard(zeros(3), 0.0) + Noise(log(2.0)) + +# Wrapper for GP +mlt = GaussianProcess( + gppackage; + kernel = gp_kernel, + prediction_type = pred_type, + noise_learn = false, +) + +decorrelate = true +standardize_outputs = true +``` + +And similarly for `rf-scalar` + +```julia +kernel_rank = 3 +kernel_structure = SeparableKernel( + LowRankFactor(kernel_rank, nugget), + OneDimFactor() +) + +mlt = ScalarRandomFeatureInterface( + n_features, + n_params, + kernel_structure = kernel_structure, + optimizer_options = optimizer_options, +) + +decorrelate = true +standardize_outputs = true +``` +and for `rf-nosvd-nonsep`: + +``` +kernel_rank = 4 +mlt = VectorRandomFeatureInterface( + n_features, + n_params, + n_outputs, + kernel_structure = NonseparableKernel(LowRankFactor(kernel_rank, nugget)), + optimizer_options = optimizer_options +) + +# Vector RF does not require decorrelation of outputs +decorrelate = false +standardize_outputs = false +``` +We construct the emulator using the input-output pairs obtained in the +calibration stage (note that we're not using all available input-output +pairs---using all of them may not give the best results, especially if the EKI +parameter converges rapidly and then "stays in the same place" during the remaining iterations). For the `gp-gpjl` and `rf-scalar` cases, we want the output +data to be decorrelated with information from Γy, but for the vector RF case +decorrelation is not required. + +``` +input_output_pairs = get_training_points(ekiobj, + length(get_u(ekiobj))-2) + +# Use the medians of the outputs as standardizing factors +norm_factors = get_standardizing_factors( + get_outputs(input_output_pairs) +) + +# The data processing normalizes input data, and decorrelates +# output data with information from Γy, if required +# Note: The `standardize_outputs_factors` are only used under the +# condition that `standardize_outputs` is true. +emulator = Emulator( + mlt, + input_output_pairs, + decorrelate = decorrelate, + obs_noise_cov = Γy, + standardize_outputs = true, + standardize_outputs_factors = vcat(norm_factors...), +) +``` + +#### Train the emulator + +The emulator is trained when we combine the machine learning tool and the data +into the `Emulator` above. We must also optimize the hyperparameters: + +```julia +optimize_hyperparameters!(emulator) +``` + +To test the predictive skill of the emulator, we can e.g. compare its prediction +on the true parameters to the true data. (One could also hold out a subset of +the input-output pairs from the training and evaluate the emulator's predictions +on them). + +```julia +# Check how well the emulator predicts on the true parameters +y_mean, y_var = Emulators.predict( + emulator, + reshape(θ_true, :, 1); + transform_to_real = true +) + +println("Emulator ($(case)) prediction on true parameters: ") +println(vec(y_mean)) +println("true data: ") +println(truth_sample) # what was used as truth +``` + +The emulator predicts both a mean value and a covariance. + +### Sample the posterior distributions of the parameters + +The last step is to plug the emulator into an MCMC algorithm, which is then used to produce samples from the posterior distribution of the parameters. Essentially, the emulator acts as a stand-in for the original forward model (which in most cases of interest is computationally expensive to run) during the MCMC sampling process. + +We use the mean across all parameter ensembles from the EKI as the initial +parameters for the MCMC. Before running the actual MCMC chain, we determine a good step size by running chains of length `N = 2000`: + +```julia +# initial values +u0 = vec(mean(get_inputs(input_output_pairs), dims = 2)) +println("initial parameters: ", u0) + +# First let's run a short chain to determine a good step size +yt_sample = truth_sample +mcmc = MCMCWrapper( + RWMHSampling(), + truth_sample, + priors, + emulator; + init_params = u0 +) + +new_step = optimize_stepsize( + mcmc; + init_stepsize = 0.1, + N = 2000, + discard_initial = 0 +) +``` + +We choose a sample size of 100,000 for the actual MCMC, discarding the first +1,000 samples as burn-in: + +```julia +# Now begin the actual MCMC +println("Begin MCMC - with step size ", new_step) +chain = MarkovChainMonteCarlo.sample( + mcmc, + 100_000; + stepsize = new_step, + discard_initial = 1_000 +) +``` + +After running the MCMC, we can extract the posterior samples as follows: + +```julia +posterior = MarkovChainMonteCarlo.get_posterior(mcmc, chain) +``` + +The samples of the posterior distributions represent the ultimate output of the +CES process. By constructing histograms of these samples and comparing them with +the known true parameter values, we can evaluate the results' accuracy. Ideally, +the peak of the posterior distribution should be located near the true values, +indicating a high-quality estimation. Additionally, visualizing the prior +distributions alongside the posteriors shows the distributional change effected by the Bayesian learning process. + +### Results + +We first produce pair plots (also known as corner plots or scatter plot matrices) to visualize the posterior parameter distributions as a grid of histograms. Recall that the task was to solve the inverse problem of finding the parameters ``N_{0, 0}``, ``k_0``, and ``\theta_0``, which define a gamma distribution of droplet masses in Cloudy at time ``t = 0``. + +```julia +# Make pair plots of the posterior distributions in the unconstrained +# and in the constrained space (this uses `PairPlots.jl`) +figpath_unconstr = joinpath(output_directory, + "joint_posterior_unconstr.png") +figpath_constr = joinpath(output_directory, + "joint_posterior_constr.png") +labels = get_name(posterior) + +data_unconstr = (; [(Symbol(labels[i]), + posterior_samples_unconstr[i, :]) for i in 1:length(labels)]...) +data_constr = (; [(Symbol(labels[i]), + posterior_samples_constr[i, :]) for i in 1:length(labels)]...) + +p_unconstr = pairplot(data_unconstr => (PairPlots.Scatter(),)) +p_constr = pairplot(data_constr => (PairPlots.Scatter(),)) +``` + +For the GP emulator, the results (shown in the constrained/physical space) look as follows: + +![pairplot_posterior_gpjl](../assets/cloudy_pairplot_posterior_constr_gp-gpjl.png) + +And we can plot the same for the scalar RF emulator... + +![pairplot_posterior_rf-scalar](../assets/cloudy_pairplot_posterior_constr_rf-scalar.png) + +...and for the vector RF emulator: + +![pairplot_posterior_rf-vec](../assets/cloudy_pairplot_posterior_constr_rf-nosvd-nonsep.png) + +In addition, we plot the marginals of the posterior distributions---we are +showing them here for the GP emulator case: +```julia +for idx in 1:n_params + + # Find the range of the posterior samples + xmin = minimum(posterior_samples_constr[idx, :]) + xmax = maximum(posterior_samples_constr[idx, :]) + + # Create a figure and axis for plotting + fig = Figure(; size = (800, 600)) + ax = Axis(fig[1, 1]) + + # Histogram for posterior samples + hist!(ax, posterior_samples_constr[idx, :], bins = 100, + color = :darkorange, label = "posterior") + + # Plotting the prior distribution + hist!(ax, prior_samples_constr[idx, :], bins = 10000, + color = :slategray) + + # Adding a vertical line for the true value + vlines!(ax, [ϕ_true[idx]], color = :indigo, linewidth = 2.6, + label = "true " * param_names[idx]) + + xlims!(ax, xmin, xmax) + ylims!(ax, 0, Inf) + + # Setting title and labels + ax.title = param_names[idx] + ax.xlabel = "Value" + ax.ylabel = "Density" +``` + +This is what the marginal distributions of the three parameters look like, for +the case of the GP emulator, and in the constrained/physical space: + +![posterior_N0_gpjl](../assets/cloudy_marginal_posterior_constr_gp-gpjl_N0.png) + +![posterior_theta_gpjl](../assets/cloudy_marginal_posterior_constr_gp-gpjl_theta.png) + +![posterior_k_gpjl](../assets/cloudy_marginal_posterior_constr_gp-gpjl_k.png) + +Here, the posterior distributions are shown as orange histograms, the prior distribution are shown as grey histograms (though with the exception of the parmaeter `k`, they are barely visible), and the true parameter values are marked as vertical purple lines. + + +### Appendix: What Does Cloudy Do? + +For the purpose of Bayesian parameter learning, the forward model can be treated +as a black box that processes input parameters to yield specific outputs. +However, for those who wish to learn more about the inner workings of Cloudy, we refer to [his paper](https://agupubs.onlinelibrary.wiley.com/doi/full/10.1029/2022MS002994) and offer a brief outline below: + +The mathematical starting point of [Cloudy](https://github.com/CliMA/Cloudy.jl.git) is the stochastic collection equation (SCE; sometimes also called [Smoluchowski equation](https://en.wikipedia.org/wiki/Smoluchowski_coagulation_equation#:~:text=In%20statistical%20physics%2C%20the%20Smoluchowski,size%20x%20at%20time%20t.) after Marian Smoluchowski), which describes the time rate of change of ``f = f(m, t)``, the mass distribution function of liquid water droplets, due to the process of collision and coalescence. The distribution function ``f`` depends on droplet mass ``m`` and time ``t`` and is defined such that ``f(m) \text{ d}m`` denotes the number of droplets with masses in the interval $[m, m + dm]$ per unit volume. + +The stochastic collection equation is an integro-differential equation that can be written as + +```math + \frac{\partial f(m, t)}{\partial t} = \frac{1}{2} \int_{m'=0}^{\infty} f(m', t) f(m-m', t) \mathcal{C}(m', m-m')\text{d}m' - f(m, t) \int_{m'=0}^\infty f(m', t)\mathcal{C}(m, m') \text{d}m', +``` + +where ``\mathcal{C}(m', m'')`` is the collision-coalescence kernel, which encapsulates the physics of droplet-droplet interactions -- it describes the rate at which two drops of masses ``m'`` and ``m''`` come into contact and coalesce into a drop of mass ``m' + m''``. The first term on the right-hand side of the SCE describes the rate of increase of the number of drops having a mass ``m`` due to collision and coalescence of drops of masses ``m'`` and ``m-m'`` (where the factor ``\frac{1}{2} `` avoids double counting), while the second term describes the rate of reduction of drops of mass ``m`` due to collision and coalescence of drops having a mass ``m`` with other drops. + +We can rewrite the SCE in terms of the moments ``M_k`` of ``f``, which are the prognostic variables in Cloudy. They are defined by +```math + M_k = \int_0^\infty m^k f(m, t) \text{d}m +``` + +The time rate of change of the k-th moment of ``f`` is obtained by multiplying the SCE by ``m^k`` and integrating over the entire range of droplet masses (from ``m=0`` to ``\infty``), which yields +```math + \frac{\partial M_k(t)}{\partial t} = \frac{1}{2}\int_0^\infty \left((m+m')^k - m^k - {m'}^k\right) \mathcal{C}(m, m')f(m, t)f(m', t) \, \text{d}m\, \text{d}m' ~~~~~~~~ (1) +``` + +In this example, the kernel is set to be constant -- ``\mathcal{C}(m', m'') = B = \text{const}`` -- and the cloud droplet mass distribution is assumed to be a ``\text{Gamma}(k_t, \theta_t)`` distribution, scaled by a factor ``N_{0,t}`` which denotes the droplet number concentration: +```math +f(m, t) = \frac{N_{0,t}}{\Gamma(k_t)\theta_t^k} m^{k_t-1} \exp{(-m/\theta_t)} +``` +The parameter vector ``\phi_t= [N_{0,t}, k_t, \theta_t]`` changes over time (as indicated by the subscript ``t``), as the shape of the distribution evolves. In fact, there is a priori no reason to assume that the distribution would retain its Gamma shape over time, but this is a common assumption that is made in order to solve the closure problem (without this assumption, one would have to keep track of infinitely many moments of the mass distribution in order to uniquely identify the distribution ``f`` at each time step, which is obviously not practicable). + +For Gamma mass distribution functions, specifying the first three moments (``M_0``, ``M_1``, and ``M_2``) is sufficient to uniquely determine the parameter vector ``\phi_t``, hence Cloudy solves equation (1) for ``k = 0, 1, 2``. This mapping of the parameters of the initial cloud droplet mass distribution to the (zeroth-, first-, and second-order) moments of the distribution at a specified end time is done by `DynamicalModel.jl`. + + + + diff --git a/docs/src/examples/edmf_example.md b/docs/src/examples/edmf_example.md index e29cbdc68..328a2aa2a 100644 --- a/docs/src/examples/edmf_example.md +++ b/docs/src/examples/edmf_example.md @@ -1,5 +1,8 @@ # Extended Eddy-Diffusivity Mass-Flux (EDMF) Scheme +!!! info "How do I run this code?" + The full code is found in the [`examples/`](https://github.com/CliMA/CalibrateEmulateSample.jl/tree/main/examples) directory of the github repository + ## Background The extended EDMF scheme is a unified model of turbulence and convection. More information about the model can be found [here](https://clima.github.io/TurbulenceConvection.jl/stable/). This example builds an emulator of the extended EDMF scheme from input-output pairs obtained during a calibration process, and runs emulator-based MCMC to obtain an estimate of the joint parameter distribution. @@ -24,8 +27,9 @@ and call, ``` > julia --project uq_for_EDMF.jl ``` + !!! info - These runs take currently take ~1 hour to complete + These runs take currently take ~1-3 hours to complete with Gaussian process emulator. Random feature training currently requires significant multithreading for performance and takes a similar amount of time. ## Solution and output @@ -35,6 +39,15 @@ The posterior is visualized by using `plot_posterior.jl`, which produces corner- ``` julia --project plot_posterior.jl ``` +For example, using Random features for case `exp_name = "ent-det-calibration"` one obtains +```@raw html + +``` +and `exp_name = "ent-det-tked-tkee-stab-calibration"` or one obtains +```@raw html + +``` + The posterior samples can also be investigated directly. They are stored as a `ParameterDistribution`-type `Samples` object. One can load this and extract an array of parameters with: ```julia # input: @@ -53,3 +66,5 @@ mapslices(x -> transform_unconstrained_to_constrained(posterior, x), posterior_s The computational ``\theta``-space are the parameters on which the algorithms act. Statistics (e.g. mean/covariance) are most meaningful when taken in this space. The physical ``\phi``-space is a (nonlinear) transformation of the computational space to apply parameter constraints. To pass parameter values back into the forward model, one must transform them. Full details and examples can be found [here](https://clima.github.io/EnsembleKalmanProcesses.jl/stable/parameter_distributions/) + + diff --git a/docs/src/examples/emulators/ishigami_3d_1d.md b/docs/src/examples/emulators/global_sens_analysis.md similarity index 70% rename from docs/src/examples/emulators/ishigami_3d_1d.md rename to docs/src/examples/emulators/global_sens_analysis.md index ea604eea1..79f22a77e 100644 --- a/docs/src/examples/emulators/ishigami_3d_1d.md +++ b/docs/src/examples/emulators/global_sens_analysis.md @@ -1,10 +1,25 @@ -# Global Sensitiviy Analysis for an emulated Ishigami function +# Global Sensitiviy Analysis (GSA) test functions -In this example, we assess directly the performance of our machine learning emulators. The task is to learn a function for use in a [global sensitivity analysis](https://en.wikipedia.org/wiki/Variance-based_sensitivity_analysis). In particular, we learn the Ishigami function +!!! info "How do I run this code?" + The full code is found in the [`examples/Emulator/`](https://github.com/CliMA/CalibrateEmulateSample.jl/tree/main/examples/Emulator) directory of the github repository + +In this example, we assess directly the performance of our machine learning emulators. The task is to learn a function for use in a [global sensitivity analysis](https://en.wikipedia.org/wiki/Variance-based_sensitivity_analysis). In particular, we have two cases: + +- The Ishigami function ```math f(x; a, b) = (1 + bx_3^4)\sin(x_1) + a \sin(x_2), \forall x\in [-\pi,\pi]^3 ``` -with ``a=7, b=0.1``. In this example, global sensitivity analysis refers to calculation of two Sobol indices. The first index collects proportions ``V_i`` (a.k.a `firstorder`) of the variance of ``f`` attributable to the input ``x_i``, and the second index collects proportions ``TV_i`` (a.k.a `totalorder`) of the residual variance having removed contributions attributable to inputs ``x_j`` ``\forall j\neq i``. The Ishigami function has an analytic formula for these Sobol indices, it is also known that one can obtain numerical approximation through quasi-Monto-Carlo methods by evaluating the Ishigami function on a special quasi-random Sobol sequence. +with ``a=7, b=0.1``. +- The Sobol G-function +```math +f(x; a) = \prod_{i=1}^d \frac{|4x_i - 2|+a_i}{1+x_i}, +``` +with the user-chosen input dimension ``d``, and coefficients ``a_i = \frac{i-1}{2} \geq 0`` for ``i=1,\dots,d``, where small $i$ (and thus small $a_i$) imply larger first-order effects, and interactions are primarily present between these variables. + + +In this example, global sensitivity analysis refers to calculation of two Sobol indices. The first index collects proportions ``V_i`` (a.k.a `firstorder`) of the variance of ``f`` attributable to the input ``x_i``, and the second index collects proportions ``TV_i`` (a.k.a `totalorder`) of the residual variance having removed contributions attributable to inputs ``x_j`` ``\forall j\neq i``. The Ishigami function has an analytic formula for these Sobol indices, it is also known that one can obtain numerical approximation through quasi-Monto-Carlo methods by evaluating the Ishigami function on a special quasi-random Sobol sequence. + +## Ishigami To emulate the Ishigami function, the data consists of 300 pairs ``\{x,f(x)+\eta\}`` where ``\eta \sim N(0,\Sigma)`` is additive noise, and the x are sampled from the Sobol sequence. The emulators are validated by evaluating the posterior mean function on the full 16000 points of the Sobol sequence and the Sobol indices are estimated. We rerun the experiment for many repeats of the random feature hyperparameter optimization and present the statistics of these indices, as well as plotting a realization of the emulator. @@ -120,7 +135,7 @@ y_pred, y_var = predict(emulator, samples', transform_to_real = true) result_pred = analyze(data, y_pred') ``` -## Gaussian Process Emulator (sci-kit learn `GP`) +### Gaussian Process Emulator (sci-kit learn `GP`) Here is the plot for one emulation ```@raw html @@ -144,7 +159,7 @@ Sampled Emulated Sobol Indices (# obs 300, noise var 0.01) totalorder: [0.5502469909342245, 0.4587734278791574, 0.23542404141319245] ``` -## Random feature emulator (Separable Low-rank kernel `RF-scalar`) +### Random feature emulator (Separable Low-rank kernel `RF-scalar`) Here is the plot for one emulation ```@raw html @@ -172,3 +187,34 @@ Sampled Emulated Sobol Indices (# obs 300, noise var 0.01) ``` +## Sobol G-function results + +To emulate the Sobol function, a similar code script is used to set up the Ishigami emulation. The primary change is that the input dimension is now a user parameter `n_dimension` that can be adjusted, and some reasonable defaults are set within the script. As an output, plots are produced of the Sobol function values, and slices through the function. + +For example, we repeat the scalar random feature emulation task 30 times over different training realizations. + +For three input dimensions, one obtains the following plot of the analytic indices (`X-true`), qMC-approximated with true function (`X-approx`), and the 95% confidence interval of the qMC-approximated with emulator (`X-emulate`). + +```@raw html + +``` + +One also obtains the slices through the emulated G-function, with red being the training points and blue being the prediction + +```@raw html + +``` + +For ten input dimensions one obtains similar plots + +```@raw html + +``` + +Here we plot only slices through the three most sensitive dimensions + +```@raw html + +``` + + diff --git a/docs/src/examples/emulators/lorenz_integrator_3d_3d.md b/docs/src/examples/emulators/lorenz_integrator_3d_3d.md index 735c363e0..87c40d14f 100644 --- a/docs/src/examples/emulators/lorenz_integrator_3d_3d.md +++ b/docs/src/examples/emulators/lorenz_integrator_3d_3d.md @@ -1,5 +1,8 @@ # Integrating Lorenz 63 with an emulated integrator +!!! info "How do I run this code?" + The full code is found in the [`examples/Emulator/`](https://github.com/CliMA/CalibrateEmulateSample.jl/tree/main/examples/Emulator) directory of the github repository + In this example, we assess directly the performance of our machine learning emulators. The task is to learn the forward Euler integrator of a [Lorenz 63 system](https://en.wikipedia.org/wiki/Lorenz_system). The model parameters are set to their classical settings ``(\sigma, \rho, \beta) = (10,28,\frac{8}{3})`` to exhibit chaotic behavior. The discrete system is given as: ```math diff --git a/docs/src/examples/emulators/regression_2d_2d.md b/docs/src/examples/emulators/regression_2d_2d.md index 52a98d366..1dff8ed6c 100644 --- a/docs/src/examples/emulators/regression_2d_2d.md +++ b/docs/src/examples/emulators/regression_2d_2d.md @@ -1,5 +1,8 @@ # Regression of ``\mathbb{R}^2 \to \mathbb{R}^2`` smooth function +!!! info "How do I run this code?" + The full code is found in the [`examples/Emulator/`](https://github.com/CliMA/CalibrateEmulateSample.jl/tree/main/examples/Emulator) directory of the github repository + In this example, we assess directly the performance of our machine learning emulators. The task is to learn the function: ```math diff --git a/docs/src/examples/lorenz_example.md b/docs/src/examples/lorenz_example.md index 4558736e7..cdcd4a4f5 100644 --- a/docs/src/examples/lorenz_example.md +++ b/docs/src/examples/lorenz_example.md @@ -1,26 +1,324 @@ # Lorenz 96 example -We provide the following template for how the tools may be applied. +!!! info "How do I run this code?" + The full code is found in the [`examples/`](https://github.com/CliMA/CalibrateEmulateSample.jl/tree/main/examples) directory of the github repository -For small examples typically have 2 files. +The Lorenz 96 (hereafter L96) example is a toy-problem for the application of the `CalibrateEmulateSample.jl` optimization and approximate uncertainty quantification methodologies. +Here is L96 with additional periodic-in-time forcing, we try to determine parameters (sinusoidal amplitude and stationary component of the forcing) from some output statistics. +The standard L96 equations are implemented with an additional forcing term with time dependence. +The output statistics which are used for learning are the finite time-averaged variances. -- `GModel.jl` Contains the forward map. The inputs should be the so-called free parameters we are interested in learning, and the output should be the measured data -- The example script which contains the inverse problem setup and solve +The standard single-scale L96 equations are implemented. +The Lorenz 96 system ([Lorenz, 1996](http://www.raidl.cz/file/18/lorenz-1996-_predictability_partly_solved.pdf)) is given by +```math +\frac{d x_i}{d t} = (x_{i+1} - x_{i-2}) x_{i-1} - x_i + F, +``` +with $i$ indicating the index of the given longitude. The number of longitudes is given by $N$. +The boundary conditions are given by +```math +x_{-1} = x_{N-1}, \ x_0 = x_N, \ x_{N+1} = x_1. +``` +The time scaling is such that the characteristic time is 5 days ([Lorenz, 1996](http://www.raidl.cz/file/18/lorenz-1996-_predictability_partly_solved.pdf)). +For very small values of ``F``, the solutions $x_i$ decay to $F$ after the initial transient feature. +For moderate values of ``F``, the solutions are periodic, and for larger values of ``F``, the system is chaotic. +The solution variance is a function of the forcing magnitude. +Variations in the base state as a function of time can be imposed through a time-dependent forcing term ``F(t)``. -## The structure of the example script -First we create the data and the setting for the model -1. Set up the forward model. -2. Construct/load the truth data. Store this data conveniently in the `Observations.Observation` object +A temporal forcing term is defined +```math +F = F_s + A \sin(\omega t), +``` +with steady-state forcing ``F_s``, transient forcing amplitude ``A``, and transient forcing frequency ``\omega``. +The total forcing ``F`` must be within the chaotic regime of L96 for all time given the prescribed $N$. -Then we set up the inverse problem -3. Define the prior distributions. Use the `ParameterDistribution` object -4. Decide on which `process` tool you would like to use (we recommend you begin with `Invesion()`). Then initialize this with the relevant constructor -5. initialize the `EnsembleKalmanProcess` object +The L96 dynamics are solved with RK4 integration. -Then we solve the inverse problem, in a loop perform the following for as many iterations as required: -7. Obtain the current parameter ensemble -8. Transform them from the unbounded computational space to the physical space -9. call the forward map on the ensemble of parameters, producing an ensemble of measured data -10. call the `update_ensemble!` function to generate a new parameter ensemble based on the new data +# Structure + +The example is structured with two distinct components: 1) L96 dynamical system solver; 2) calibrate-emulate sample code. Each of these are described below. + +The forward mapping from input parameters to output statistics of the L96 system is solved using the `GModel.jl` code, which runs the L96 model across different input parameters ``\theta``. The source code for the L96 system solution is within the `GModel_common.jl` code. + +The Calibrate code is located in `calibrate.jl` which provides the functionality to run the L96 dynamical system (within the `GModel.jl` code), extract time-averaged statistics from the L96 states, and use the time-average statistics for calibration. While this example description is self-contained, there is an additional description of the use of `EnsembleKalmanProcesses.jl` for the L96 example that is accessible [here](https://clima.github.io/EnsembleKalmanProcesses.jl/dev/examples/lorenz_example/). + +The Emulate-Sample code is located in `emulate_sample.jl` which provides the functionality to use the input-output pairs from the Calibrate stage for emulation and sampling (uncertainty quantification). The `emulate_sample.jl` code relies on outputs from the `calibrate.jl` code + +# Walkthrough of the code +This walkthrough covers calibrate-emulate-sample for the L96 problem defined above. The goal is to learn parameters ```F_s``` and ```A``` based on the time averaged statistics in a perfect model setting. This document focuses on the emulate-sample (`emulate_sample.jl`) stages, but discussion of the calibration stage `calibrate.jl` are made when necessary. This code relies on data generated by first running `calibrate.jl`. A detailed walkthrough of the calibration stage of CES for the L96 example is available [here](https://clima.github.io/EnsembleKalmanProcesses.jl/dev/examples/lorenz_example/). + +## Inputs + +First, we load standard packages +```julia +# Import modules +using Distributions # probability distributions and associated functions +using LinearAlgebra +using StatsPlots +using Plots +using Random +using JLD2 +``` + +Then, we load `CalibrateEmulateSample.jl` packages +```julia +# CES +using CalibrateEmulateSample.Emulators +using CalibrateEmulateSample.MarkovChainMonteCarlo +using CalibrateEmulateSample.Utilities +using CalibrateEmulateSample.EnsembleKalmanProcesses +using CalibrateEmulateSample.ParameterDistributions +using CalibrateEmulateSample.DataContainers +``` + +The first input settings define which input-output pairs to use for training the emulator. The Calibrate stage (run using `calibrate.jl`) generates parameter-to-data pairs by running the L96 system using an iterative optimization approach (`EnsembleKalmanProcess.jl`). So we first define which iterations we would like to use data from for our emulator training +```julia +min_iter = 1 +max_iter = 5 # number of EKP iterations to use data from is at most this +``` + +The second input settings define the Lorenz dynamics. The `emulate_sample.jl` code does not actually run the L96 system, it only uses L96 system runs from the `calibrate.jl` stage to train an emulator and to perform sampling. Therefore, the settings governing the L96 dynamics are fully defined in `calibrate.jl` and can be modified as necessary. The rest of the input settings in this section are defined in `calibrate.jl`. +```julia +F_true = 8.0 # Mean F +A_true = 2.5 # Transient F amplitude +ω_true = 2.0 * π / (360.0 / τc) # Frequency of the transient F (non-dim) +params_true = [F_true, A_true] +param_names = ["F", "A"] +``` +The use of the transient forcing term is with the flag, `dynamics`. Stationary forcing is `dynamics=1` ($A=0$) and transient forcing is used with `dynamics=2` ($A\neq0$). +The system with $N$ longitudes is solved over time horizon `t_start` to `Tfit` at fixed time step `dt`. +```julia +N = 36 +dt = 1/64 +t_start = 100 +# Characteristic time scale +τc = 5.0 # days, prescribed by the L96 problem +# This has to be less than 360 and 360 must be divisible by Ts_days +Ts_days = 30.0 # Integration length in days (will be made non-dimensional later) +# Integration length +Tfit = Ts_days / τc +``` +The states are integrated over time `Ts_days` to construct the time averaged statistics for use by the Ensemble Kalman Process calibration. The specification of the statistics to be gathered from the states are provided by `stats_type`. + +We implement (biased) priors as follows +```julia +prior_means = [F_true + 1.0, A_true + 0.5] +prior_stds = [2.0, 0.5 * A_true] +# constrained_gaussian("name", desired_mean, desired_std, lower_bd, upper_bd) +prior_F = constrained_gaussian(param_names[1], prior_means[1], prior_stds[1], 0, Inf) +prior_A = constrained_gaussian(param_names[2], prior_means[2], prior_stds[2], 0, Inf) +priors = combine_distributions([prior_F, prior_A]) +``` +We use the recommended [`constrained_gaussian`] to add the desired scale and bounds to the prior distribution, in particular we place lower bounds to preserve positivity. + +The priors can be plotted directly using `plot(priors)`, as seen below in the example code from `calibrate.jl` +```julia +# Plot the prior distribution +p = plot(priors, title = "prior") +plot!(p.subplots[1], [F_true], seriestype = "vline", w = 1.5, c = :steelblue, ls = :dash, xlabel = "F") # vline on top histogram +plot!(p.subplots[2], [A_true], seriestype = "vline", w = 1.5, c = :steelblue, ls = :dash, xlabel = "A") # vline on top histogram +``` + +```@raw html + +``` + +The observational noise can be generated using the L96 system or prescribed, as specified by `var_prescribe`. +`var_prescribe==false` +The observational noise is constructed by generating independent instantiations of the L96 statistics of interest at the true parameters for different initial conditions. +The empirical covariance matrix is constructed. + +`var_prescribe==true` +The observational noise is prescribed as a Gaussian distribution with prescribed mean and variance. + +## Calibrate +The calibration stage must be run before the emulate-sample stages. The calibration stage is run using `calibrate.jl`. This code will generate parameter-data pairs that will be used to train the emulator. The parameter-data pairs are visualized below + +```@raw html + +``` + +## Emulate + +Having run the `calibrate.jl` code to generate input-output pairs from parameters to data using `EnsembleKalmanProcesses.jl`, we will now run the Emulate and Sample stages (`emulate_sample.jl`). First, we need to define which machine learning model we will use for the emulation. We have 8 cases that the user can toggle or customize +```julia +cases = [ + "GP", # diagonalize, train scalar GP, assume diag inputs + "RF-scalar-diagin", # diagonalize, train scalar RF, assume diag inputs (most comparable to GP) + "RF-scalar", # diagonalize, train scalar RF, do not asume diag inputs + "RF-vector-svd-diag", + "RF-vector-svd-nondiag", + "RF-vector-nosvd-diag", + "RF-vector-nosvd-nondiag", + "RF-vector-svd-nonsep", +] +``` +The first is for GP with `GaussianProcesses.jl` interface. The next two are for the scalar RF interface, which most closely follows exactly replacing a GP. The rest are examples of vector RF with different types of data processing, (svd = same processing as scalar RF, nosvd = unprocessed) and different RF kernel structures in the output space of increasing complexity/flexibility (diag = Separable diagonal, nondiag = Separable nondiagonal, nonsep = nonseparable nondiagonal). + +The example then loads the relevant training data that was constructed in the `calibrate.jl` call. +```julia +# loading relevant data +homedir = pwd() +println(homedir) +figure_save_directory = joinpath(homedir, "output/") +data_save_directory = joinpath(homedir, "output/") +data_save_file = joinpath(data_save_directory, "calibrate_results.jld2") +ekiobj = load(data_save_file)["eki"] +priors = load(data_save_file)["priors"] +truth_sample_mean = load(data_save_file)["truth_sample_mean"] +truth_sample = load(data_save_file)["truth_sample"] +truth_params_constrained = load(data_save_file)["truth_input_constrained"] #true parameters in constrained space +truth_params = transform_constrained_to_unconstrained(priors, truth_params_constrained) +Γy = ekiobj.obs_noise_cov +``` + +We then set up the structure of the emulator. An example for GP (`GP`) +```julia +gppackage = Emulators.GPJL() +pred_type = Emulators.YType() +mlt = GaussianProcess( + gppackage; + kernel = nothing, # use default squared exponential kernel + prediction_type = pred_type, + noise_learn = false, +) +``` +which calls `GaussianProcess.jl`. In this L96 example, since we focus on learning $F_s$ and $A$, we do not need to explicitly learn the noise, so `noise_learn = false`. + +An example for scalar RF (`RF-scalar`) +```julia +n_features = 100 +kernel_structure = SeparableKernel(LowRankFactor(2, nugget), OneDimFactor()) +mlt = ScalarRandomFeatureInterface( + n_features, + n_params, + rng = rng, + kernel_structure = kernel_structure, + optimizer_options = overrides, +) +``` +Optimizer options for `ScalarRandomFeature.jl` are provided throough `overrides` +```julia +overrides = Dict( + "verbose" => true, + "scheduler" => DataMisfitController(terminate_at = 100.0), + "cov_sample_multiplier" => 1.0, + "n_iteration" => 20, + ) +# we do not want termination, as our priors have relatively little interpretation +``` + +We then build the emulator with the parameters as defined above +```julia +emulator = Emulator( + mlt, + input_output_pairs; + obs_noise_cov = Γy, + normalize_inputs = normalized, + standardize_outputs = standardize, + standardize_outputs_factors = norm_factor, + retained_svd_frac = retained_svd_frac, + decorrelate = decorrelate, + ) +``` + +For RF and some GP packages, the training occurs during construction of the Emulator, however sometimes one must call an optimize step afterwards +```julia +optimize_hyperparameters!(emulator) +``` + +The emulator is checked for accuracy by evaluating its predictions on the true parameters +```julia +# Check how well the Gaussian Process regression predicts on the +# true parameters +y_mean, y_var = Emulators.predict(emulator, reshape(truth_params, :, 1), transform_to_real = true) +y_mean_test, y_var_test = Emulators.predict(emulator, get_inputs(input_output_pairs_test), transform_to_real = true) + +println("ML prediction on true parameters: ") +println(vec(y_mean)) +println("true data: ") +println(truth_sample) # what was used as truth +println(" ML predicted standard deviation") +println(sqrt.(diag(y_var[1], 0))) +println("ML MSE (truth): ") +println(mean((truth_sample - vec(y_mean)) .^ 2)) +println("ML MSE (next ensemble): ") +println(mean((get_outputs(input_output_pairs_test) - y_mean_test) .^ 2)) +``` + +## Sample + +Now the emulator is constructed and validated, so we next focus on the MCMC sampling. First, we run a short chain ($2,000$ steps) to determine the step size +```julia +# First lets run a short chain to determine a good step size +mcmc = MCMCWrapper(RWMHSampling(), truth_sample, priors, emulator; init_params = u0) +new_step = optimize_stepsize(mcmc; init_stepsize = 0.1, N = 2000, discard_initial = 0) +``` + +The step size has been determined, so now we run the full MCMC ($100,000$ steps where the first $2,000$ are discarded) +```julia +# Now begin the actual MCMC +println("Begin MCMC - with step size ", new_step) +chain = MarkovChainMonteCarlo.sample(mcmc, 100_000; stepsize = new_step, discard_initial = 2_000) +``` + +And we finish by extracting the posterior samples +```julia +posterior = MarkovChainMonteCarlo.get_posterior(mcmc, chain) +``` + +And evaluate the results with these printed statements +```julia +post_mean = mean(posterior) +post_cov = cov(posterior) +println("post_mean") +println(post_mean) +println("post_cov") +println(post_cov) +println("D util") +println(det(inv(post_cov))) +println(" ") +``` + +# Running the Example and Postprocessing +First, the calibrate code must be executed, which will perform the calibration step and, generating input-output pairs of the parameter to data mapping. Then, the emulate-sample code is run, which will load the input-ouput pairs that were generated in the calibration step. + +## Calibrate +The L96 parameter calibration can be run using `julia --project calibrate.jl` + +The output will provide the estimated parameters in the constrained `ϕ`-space. The `priors` are required in the get-method to apply these constraints. + +Printed output: +```julia +# EKI results: Has the ensemble collapsed toward the truth? +println("True parameters: ") +println(params_true) +println("\nEKI results:") +println(get_ϕ_mean_final(priors, ekiobj)) +``` + +The parameters and forward model outputs will be saved in `parameter_storage.jld2` and `data_storage.jld2`, respectively. +The data will be saved in the directory `output`. A scatter plot animation of the ensemble convergence to the true parameters is saved in the directory `output`. These points represent the training points that are used for the emulator. + +```@raw html + +``` + +## Emulate-sample +The L96 parameter estimation can be run using `julia --project emulate_sample.jl` + +The output will provide the estimated posterior distribution over the parameters. The emulate-sample code will run for several choices in the machine learning model that is used for the emulation stage, inclding Gaussian Process regression and RF, and using singular value data decorrelation or not. + +The sampling results from two emulators are shown below. We can see that the posterior is relatively insensitive to the choice of the machine learning emulation tool in this L96 example. + +# L96 CES example case: GP regression emulator (case="GP") + +```@raw html + +``` + +# L96 CES example case: RF scalar emulator (case="RF-scalar") + +```@raw html + +``` -One can then obtain the solution, dependent on the `process` type. diff --git a/docs/src/examples/sinusoid_example.md b/docs/src/examples/sinusoid_example.md new file mode 100644 index 000000000..bfac67164 --- /dev/null +++ b/docs/src/examples/sinusoid_example.md @@ -0,0 +1,449 @@ +# Sinusoid Example + +!!! info "How do I run this code?" + The full code is found in the [`examples/`](https://github.com/CliMA/CalibrateEmulateSample.jl/tree/main/examples) directory of the github repository + +## Background +This example demonstrates how to use `CalibrateEmulateSample.jl` for a simple model that generates noisy +observables of a signal. The sinusoid signal is defined by two parameters: its shift along the vertical axis +and its amplitude. We make noisy observations of the signal and we can calculate the mean of the signal, +which is informative about its shift along the axis, and the range of the signal, which is informative +about the amplitude. Although our sinusoid function is simple and quick to evaluate, we shall pretend it is non-differentiable and expensive to evaluate, as a case study for carrying out uncertainty quantification on +more complex systems. Additionally, we will work in a "perfect model" setting for this example, meaning we will +generate pseudo-observations for our model and pretend that these are noisy observations of our system. + + +### Model +We have a model of a sinusoidal signal that is a function of parameters $\theta=(A,v)$, where $A$ is the amplitude of the signal and $v$ is vertical shift of the signal: + +```math +f(A, v) = A \sin(\phi + t) + v, \forall t \in [0,2\pi] +``` +Here, $\phi$ is the random phase of each signal. +The goal is to estimate not just the point estimates of the parameters $\theta=(A,v)$, but entire probability distributions of them, given some noisy observations. We will use the range and mean of a signal as our observable: +```math +G(\theta) = \big[ \text{range}\big(f(\theta)\big), \text{mean}\big(f(\theta)\big) \big] +``` +This highlights the role of choosing a good observable, in particular our choice of $G$ is independent of the random phase shift $\phi$ and is in fact deterministic. This allows us to write out an expression for the noisy observation, $y_{obs}$: +```math +y_{obs} = G(\theta) + \gamma, \qquad \gamma \sim \mathcal{N}(0, \Gamma) +``` +where $\Gamma$ is the observational covariance matrix. We will assume the noise to be independent for each observable, giving us a diagonal covariance matrix. + + + +# Walkthrough of code + +You can find the full scripts to reproduce this tutorial in `examples/Sinusoid/`. The code is split into four sections: + +1. Model set up in `sinusoid_setup.jl` +2. Calibrate in `calibrate.jl` +3. Emulate in `emulate.jl` +4. Sample in `sample.jl` + +You do not need to explicitly run `sinusoid_setup.jl` as it is called from `calibrate.jl`. However, this file contains the functions for the model and for generating pseudo-observations. +You will need to run steps 2-4 in order as each one relies on output saved from the previous steps. + + +## Set up + +First, we load the packages we need for setting up the model: +```julia +using LinearAlgebra, Random +using Plots +using JLD2 +using Statistics, Distributions + +``` + +We define a model that generates a sinusoid given parameters $\theta=(A,v)$ +(amplitude and vertical shift). We will estimate these parameters from data. +The model adds a random phase shift upon evaluation. + +```julia +# Define x-axis +dt = 0.01 +trange = 0:dt:(2 * pi + dt) + +function model(amplitude, vert_shift) + # Set phi, the random phase + phi = 2 * pi * rand() + return amplitude * sin.(trange .+ phi) .+ vert_shift +end + +``` + +We will define a "true" amplitude and vertical shift to generate some pseudo-observations. +Let $\theta=(3.0, 7.0)$. +```julia +amplitude_true = 3.0 +vert_shift_true = 7.0 +# Our input parameters are 2d and our outputs are 2d +theta_true = [amplitude_true, vert_shift_true] +dim_params = 2 +# Generate the "true" signal for these parameters +signal_true = model(amplitude_true, vert_shift_true) +``` +We will observe properties of the signal that inform us about the amplitude and vertical +position. These properties will be the range (the difference between the maximum and the minimum), +which is informative about the amplitude of the sinusoid, and the mean, which is informative +about the vertical shift. +```julia +y1_true = maximum(signal_true) - minimum(signal_true) +y2_true = mean(signal_true) +``` +However, our observations are typically not noise-free, so we add some white noise to our +observables. We call this $y_{obs}$. The user can choose the observational covariance matrix, $\Gamma$. We will assume the noise is independent (a diagonal covariance matrix $\Gamma=0.2 * I$). +```julia +dim_output = 2 +Γ = 0.2 * I +white_noise = MvNormal(zeros(dim_output), Γ) +y_obs = [y1_true, y2_true] .+ rand(white_noise) +println("Observations:", y_obs) +``` +This gives $y_{obs}=(6.15, 6.42)$. +We can plot the true signal in black, the true observables in red and the noisy observables in blue. +![signal](../assets/sinusoid_true_vs_observed_signal.png) + + +It will be helpful for us to define a function $G(\theta)$, which returns these observables +(the range and the mean) of the sinusoid given a parameter vector. + +```julia +function G(theta) + amplitude, vert_shift = theta + sincurve = model(amplitude, vert_shift) + return [maximum(sincurve) - minimum(sincurve), mean(sincurve)] +end +``` + +## Calibrate + +We are interested in learning the posterior distribution of $\theta$ for the inverse problem +$y_{obs}=G(\theta)+\mathcal{N}(0,\Gamma)$. We first carry out calibration, which aims to solve the inverse problem +for point estimates of the optimal values for $\theta$. Specifically, we use an ensemble based calibration method, +such as Ensemble Kalman Inversion, because it provides us with ensembles of $G(\theta)$ evaluations that +are focused near to the optimal values for $\theta$. These ensembles provide us with a suitable dataset +for training an emulator to be used in sampling the posterior distribution. + +We are using the [EnsembleKalmanProcesses.jl](https://clima.github.io/EnsembleKalmanProcesses.jl/dev/) +package for Ensemble Kalman Inversion (EKI). We start with user-defined prior distributions and sample +an ensemble of parameters $\theta$, which we use to evaluate $G(\theta)$. Then, we iteratively update the +ensemble until our parameters $\theta$ are near to the optimal. + +First, we will load the packages we need from CES: + +```julia +# CES +using CalibrateEmulateSample +const EKP = CalibrateEmulateSample.EnsembleKalmanProcesses +const PD = EKP.ParameterDistributions +``` + +We define prior distributions on the two parameters. For the amplitude, +we define a prior with mean 2 and standard deviation 1. It is +additionally constrained to be nonnegative. For the vertical shift we define +a Gaussian prior with mean 0 and standard deviation 5. +```julia +prior_u1 = PD.constrained_gaussian("amplitude", 2, 1, 0, Inf) +prior_u2 = PD.constrained_gaussian("vert_shift", 0, 5, -Inf, Inf) +prior = PD.combine_distributions([prior_u1, prior_u2]) +# Plot priors +p = plot(prior, fill = :lightgray) +``` +![prior](../assets/sinusoid_prior.png) + +We now generate the initial ensemble and set up the EKI. +```julia +N_ensemble = 10 +N_iterations = 5 + +initial_ensemble = EKP.construct_initial_ensemble(prior, N_ensemble) + +ensemble_kalman_process = EKP.EnsembleKalmanProcess(initial_ensemble, y_obs, Γ, EKP.Inversion()) +``` + +We are now ready to carry out the inversion. At each iteration, we get the +ensemble from the last iteration, apply $G(\theta)$ to each ensemble member, +and apply the Kalman update to the ensemble. +```julia +for i in 1:N_iterations + params_i = EKP.get_ϕ_final(prior, ensemble_kalman_process) + + G_ens = hcat([G(params_i[:, i]) for i in 1:N_ensemble]...) + + EKP.update_ensemble!(ensemble_kalman_process, G_ens) +end +``` + +Finally, we get the ensemble after the last iteration. This provides our estimate of the parameters. +```julia +final_ensemble = EKP.get_ϕ_final(prior, ensemble_kalman_process) + +# Check that the ensemble mean is close to the theta_true +println("Ensemble mean: ", mean(final_ensemble, dims=2)) # [3.05, 6.37] +println("True parameters: ", theta_true) # [3.0, 7.0] +``` + +| Parameter | Truth | EKI mean | +| :---------------- | :------: | :----: | +| Amplitude | 3.0 | 3.05 | +| Vertical shift | 7.0 | 6.37 | + +The EKI ensemble mean at the final iteration is close to the true parameters, which is good. +We can also see how the ensembles evolve at each iteration in the plot below. + +![eki](../assets/sinusoid_eki_pairs.png) + +The ensembles are initially spread out but move closer to the true parameter values +with each iteration, indicating the EKI algorithm is converging towards the minimum. +Taking the mean of the ensemble gives a point estimate of the optimal parameters. +However, EKI does not give us an estimate of the uncertainty, as the ensemble collapses. +To carry out uncertainty quantification, we can sample from the posterior distribution, +which requires a "cheap" method to evaluate our model, i.e., an emulator. +In the next step of CES, we will build an emulator using the dataset generated in EKI. + +## Emulate + +In the previous calibrate step, we learned point estimates for the optimal parameters $\theta$, but +for uncertainty quantification, we want to learn posterior distributions on our parameters. +We can sample from posterior distributions with Markov chain Monte Carlo (MCMC) methods, but these +typically require many model evaluations. In many scientific problems, model evaluations are highly +costly, making this infeasible. To get around this, we build an emulator of our model, +which allows us to approximate the expensive model almost instantaneously. An emulator can also be +helpful for noisy problems as they provide a smoother approximation, leading to better MCMC +convergence properties. +In this section, we show how the codebase can be used to build emulators of our sinusoid model. + +We ran Ensemble Kalman Inversion with an ensemble size of 10 for 5 +iterations. This generated a total of 50 input output pairs from our model. +We will use these samples to train an emulator. The EKI samples make a suitable +dataset for training an emulator because in the first iteration, the ensemble parameters +are spread out according to the prior, meaning they cover the full support of the +parameter space. This is important for building an emulator that can be evaluated anywhere +in this space. In later iterations, the ensemble parameters are focused around the truth. +This means the emulator that will be more accurate around this region. + +First, we load additional packages we need for this section: +```julia +using CalibrateEmulateSample.Emulators +const CES = CalibrateEmulateSample +``` + +We will build two types of emulator here for comparison: Gaussian processes and Random +Features. First, set up the data in the correct format. CalibrateEmulateSample.jl uses +a paired data container that matches the inputs (in the unconstrained space) to the outputs: + +```julia +input_output_pairs = CES.Utilities.get_training_points(ensemble_kalman_process, N_iterations) +unconstrained_inputs = CES.Utilities.get_inputs(input_output_pairs) +inputs = Emulators.transform_unconstrained_to_constrained(prior, unconstrained_inputs) +outputs = CES.Utilities.get_outputs(input_output_pairs) +``` + +### Gaussian process +We will set up a basic Gaussian process (GP) emulator using the [`ScikitLearn.jl`](https://scikitlearnjl.readthedocs.io/en/latest/models/#scikitlearn-models) package or [`GaussianProcesses.jl`](https://stor-i.github.io/GaussianProcesses.jl/latest/). +See the [Gaussian process page](https://clima.github.io/CalibrateEmulateSample.jl/dev/GaussianProcessEmulator/) for more information and options, including choice of package and kernels. +```julia +gppackage = Emulators.GPJL() +gauss_proc = Emulators.GaussianProcess(gppackage, noise_learn = false) + +# Build emulator with data +emulator_gp = Emulator(gauss_proc, input_output_pairs, normalize_inputs = true, obs_noise_cov = Γ) +optimize_hyperparameters!(emulator_gp) +``` +For this simple example, we already know the observational noise `Γ=0.2*I`, so we set `noise_learn = false`. +However, for more complicated problems we may want to learn the noise as an additional hyperparameter. + +We will check performance of the GP by testing on unseen data in a moment, but first, we will build a random features emulator for comparison. + +### Random Features +An alternative emulator can be created with random features (RF). Random features can approximate a Gaussian process +with improved scaling properties, making them more suitable for higher dimensional problems. We use a Vector Random +Features emulator here, chosen because we find it is a reasonable approximation to the Gaussian process emulator above. +For new problems, you may need to play around with these parameter choices. +More information can be found [here](https://clima.github.io/CalibrateEmulateSample.jl/dev/random_feature_emulator/). + +```julia +# We have two input dimensions and two output dimensions. +input_dim = 2 +output_dim = 2 +# Select number of features +n_features = 60 +nugget = 1e-9 +kernel_structure = NonseparableKernel(LowRankFactor(2, nugget)) +optimizer_options = Dict( + "n_ensemble" => 50, + "cov_sample_multiplier" => 10, + "scheduler" => EKP.DataMisfitController(on_terminate = "continue"), + "n_iteration" => 50, + "verbose" => true, +) +random_features = VectorRandomFeatureInterface( + n_features, + input_dim, + output_dim, + kernel_structure = kernel_structure, + optimizer_options = optimizer_options, +) +emulator_random_features = + Emulator(random_features, input_output_pairs, normalize_inputs = true, obs_noise_cov = Γ, decorrelate = false) +optimize_hyperparameters!(emulator_random_features) +``` +### Emulator Validation + +Now we will validate both GP and RF emulators and compare them against the ground truth, $G(\theta)$. +Note this is only possible in our example because our true model, $G(\theta)$, is cheap to evaluate. In more complex systems, we would have limited data to validate emulator performance with. + +Here, we will compare emulator performance across a wide range of parameters, but we will pay close attention to +performance near the final ensemble mean $\theta=(3, 6)$. This is because we need high accuracy in this region +in the next step, when we sample the posterior distribution. + +First, we check the ground truth model $G(\theta)$ over our parameter space. We can plot how the two outputs (range, mean), vary with the two input parameters (amplitude, vertical shift). + +![groundtruth](../assets/sinusoid_groundtruth_contours.png) + +The first panel shows how the range varies with respect to the two parameters in the true forward map. The contours show the range is mostly dependent on the amplitude, with little variation with +respect to the vertical shift. The second panel shows how the mean varies with the respect to the two +parameters and is mostly dependent on the vertical shift. This result makes sense for our model setup. + +Below, we recreate the same contour plot with the emulators. We will also overlay the training data points +from the EKI, where the colors show the output from $G(\theta)$ evaluated at the training points. +The emulator contours should agree with the training data. + +First, for the Gaussian process emulator: + +![GP_emulator](../assets/sinusoid_GP_emulator_contours.png) + +This looks similar to the output from $G(\theta)$. Next, let's check the random features emulator: + +![RF_emulator](../assets/sinusoid_RF_emulator_contours.png) + +Both the GP and RF emulator give similar results to the ground truth $G(\theta)$, indicating they are correctly +learning the relationships between the parameters and the outputs. We also see the contours agree with the +colors of the training data points. + +We should also validate how accurate the emulators are by looking at the absolute difference between emulator +predictions and the ground truth. + +The Gaussian process absolute errors are plotted here: + +![GP_errors](../assets/sinusoid_GP_errors_contours.png) + +and the random features absolute errors are here: + +![RF_errors](../assets/sinusoid_RF_errors_contours.png) + +Both these error maps look similar. Importantly, we want the emulator to show the low errors in the region around the true parameter values near $\theta=(3, 6)$ (i.e, near where the EKI points converge, shown by the scatter points in +the previous plot). This the region that we will be sampling in the next step. +We see low errors near here for both outputs and for both emulators. Now we have validated these emulators, +we will proceed the last step of CES: Sampling of the posterior distribution. + +## Sample + +Now that we have a cheap emulator for our model, we can carry out uncertainty quantification +to learn the posterier distribution of the parameters, $\theta$. We use Markov chain Monte Carlo +(MCMC) to sample the posterior distribtion. In MCMC, we start with a sample from a prior distribution +and propose a new sample from a proposal distribution, which is accepted with a probability relating the +the ratio of the posterior distribution to the proposal distributions. If accepted, this proposed sample is +added to the chain, or otherwise, the original sample is added to the chain. This is repeated over many +iterations and eventually creates a sequence of samples from the posterior distribution. +The CES code uses [AbstractMCMC.jl](https://turing.ml/dev/docs/for-developers/interface), full details can be found [here](https://clima.github.io/CalibrateEmulateSample.jl/dev/API/AbstractMCMC/). +For this example, we will use a random walk Metropolis-Hastings sampler (`RWMHSampling`), which assumes that +the proposal distribution is a random walk, with a step-size $\delta$. Usually, we have little knowledge of +what this step size should be, but we can optimize this as shown below. + +First, we will load the additional packages we need: +```julia +using CalibrateEmulateSample.MarkovChainMonteCarlo +``` + +We will provide the API with the observations, priors and our cheap emulator from the previous section. In this +example we use the GP emulator. First, we need to find a suitable starting point, ideally one that is near the posterior distribution. We will use the final ensemble mean from EKI as this will increase the chance of acceptance near the start of the chain, and reduce burn-in time. +```julia +init_sample = EKP.get_u_mean_final(ensemble_kalman_process) +println("initial parameters: ", init_sample) # (1.11, 6.37) +``` + +Now, we can set up and carry out the MCMC starting from this point. +```julia +mcmc = MCMCWrapper(RWMHSampling(), y_obs, prior, emulator_gp; init_params = init_sample) +# First let's run a short chain to determine a good step size +new_step = optimize_stepsize(mcmc; init_stepsize = 0.1, N = 2000, discard_initial = 0) + +# Now begin the actual MCMC +println("Begin MCMC - with step size ", new_step) +chain = MarkovChainMonteCarlo.sample(mcmc, 100_000; stepsize = new_step, discard_initial = 2_000) + +# We can print summary statistics of the MCMC chain +display(chain) +``` + +| parameters | mean | std | +| :---------------- | :------: | :----: | +| amplitude | 1.1068 | 0.0943 | +| vert_shift | 6.3897 | 0.4601 | + +Note that these values are provided in the unconstrained space. The vertical shift +seems reasonable, but the amplitude is not. This is because the amplitude is constrained to be +positive, but the MCMC is run in the unconstrained space. We can transform to the real +constrained space and re-calculate these values. + +```julia +# Extract posterior samples +posterior = MarkovChainMonteCarlo.get_posterior(mcmc, chain) +# Back to constrained coordinates +constrained_posterior = Emulators.transform_unconstrained_to_constrained( + prior, MarkovChainMonteCarlo.get_distribution(posterior) +) +println("Amplitude mean: ", mean(constrained_posterior["amplitude"]), ", std: ", std(constrained_posterior["amplitude"])) +println("Vertical shift mean: ", mean(constrained_posterior["vert_shift"]), ", std: ", std(constrained_posterior["vert_shift"])) +``` +This gives: + + +| parameters | mean | std | +| :---------------- | :------: | :----: | +| amplitude | 3.0382 | 0.2880 | +| vert_shift | 6.3774 | 0.4586 | + +This is in agreement with the true $\theta=(3.0, 7.0)$ and with the observational covariance matrix we provided $\Gamma=0.2 * I$ (i.e., a standard deviation of approx. $0.45$). `CalibrateEmulateSample.jl` has built-in plotting +recipes to help us visualize the prior and posterior distributions. Note that these are the +marginal distributions. + +```julia +# We can quickly plot priors and posterior using built-in capabilities +p = plot(prior, fill = :lightgray) +plot!(posterior, fill = :darkblue, alpha = 0.5) + +``` +![GP_posterior](../assets/sinusoid_posterior_GP.png) + + +The MCMC has learned the posterior distribution which is much narrower than the prior. +For multidimensional problems, the posterior is typically multidimensional, and marginal +distribution plots do not show how parameters co-vary. We plot a 2D histogram of $\theta_1$ vs. +$\theta_2$ below, with the marginal distributions on each axis. + +![GP_2d_posterior](../assets/sinusoid_MCMC_hist_GP.png) + + +### Sample with Random Features + +We can repeat the sampling method using the random features emulator instead of the Gaussian +process and we find similar results: + +| parameters | mean | std | +| :---------------- | :------: | :----: | +| amplitude | 3.3210 | 0.7216 | +| vert_shift | 6.3986 | 0.5098 | + +![RF_2d_posterior](../assets/sinusoid_MCMC_hist_RF.png) + +It is reassuring to see that our uncertainty quantification methods are robust to the different emulator +choices here. This is because our particular GP and RF emulators showed similar accuracy during validation. +However, this result is highly sensitive to the choices of GP kernel and RF kernel structure. If you find very +different posterior distributions for different emulators, it is likely that the kernel choices need be refined. +The kernel choices must be flexible enough to accurately capture the relationships between the inputs and outputs. +We recommend trying a variety of different emulator configurations and carefully considering emulator validation +on samples that the emulator has not been trained on. diff --git a/docs/src/index.md b/docs/src/index.md index d67418f39..28dc10da7 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -19,31 +19,53 @@ y = \mathcal{G}(\theta) + \eta, ``` where the noise ``\eta`` is drawn from a $d$-dimensional Gaussian with distribution ``\mathcal{N}(0, \Gamma_y)``. + ### The inverse problem Given an observation ``y``, the computer model ``\mathcal{G}``, the observational noise ``\Gamma_y``, and some broad prior information on ``\theta``, we return the joint distribution of a data-informed distribution for "``\theta`` given ``y``". As the name suggests, `CalibrateEmulateSample.jl` breaks this problem into a sequence of three steps: calibration, emulation, and sampling. A comprehensive treatment of the calibrate-emulate-sample approach to Bayesian inverse problems can be found in [Cleary et al. (2020)](https://arxiv.org/pdf/2001.03689.pdf). -### The three steps of the algorithm: +### The three steps of the algorithm: see our walkthrough of the [Sinusoid Example](@ref) + +**Learn the vertical shift and amplitude of the signal given the noisy observation** +```@raw html + +``` -The **calibrate** step of the algorithm consists of an application of [Ensemble Kalman Processes](https://github.com/CliMA/EnsembleKalmanProcesses.jl), which generates input-output pairs ``\{\theta, \mathcal{G}(\theta)\}`` in high density around an optimal parameter ``\theta^*``. This ``\theta^*`` will be near a mode of the posterior distribution (Note: This the only time we interface with the forward model ``\mathcal{G}``). +The **calibrate** step of the algorithm consists of an application of [Ensemble Kalman Processes](https://github.com/CliMA/EnsembleKalmanProcesses.jl), which generates input-output pairs ``\{\theta, \mathcal{G}(\theta)\}`` in high density around an optimal parameter ``\theta^*``. Here, ``\theta`` are amplitude and vertical shift pairs, and ``\mathcal{G}(\theta)`` are the resulting signal mean and range. This ``\theta^*`` will be near a mode of the posterior distribution (Note: This is the only time we interface with the forward model ``\mathcal{G}``). + +**calibrate with EKP to generate data pairs...** +```@raw html + +``` The **emulate** step takes these pairs ``\{\theta, \mathcal{G}(\theta)\}`` and trains a statistical surrogate model (e.g., a Gaussian process), emulating the forward map ``\mathcal{G}``. +**emulate the map statistically from EKP pairs...** +```@raw html + +``` The **sample** step uses this surrogate in place of ``\mathcal{G}`` in a sampling method (Markov chain Monte Carlo) to sample the posterior distribution of ``\theta``. +**sample the emulated map with MCMC...** +```@raw html + +``` + +## Code Components + `CalibrateEmulateSample.jl` contains the following modules: -Module | Purpose ------------------------------|-------------------------------------------------------- -CalibrateEmulateSample.jl | Pulls in the [Ensemble Kalman Processes](https://github.com/CliMA/EnsembleKalmanProcesses.jl) package -Emulator.jl | Emulate: Modular template for emulators -GaussianProcess.jl | - A Gaussian process emulator -MarkovChainMonteCarlo.jl | Sample: Modular template for MCMC -Utilities.jl | Helper functions +Module | Purpose +---------------------------------------|-------------------------------------------------------- +CalibrateEmulateSample.jl | A wrapper for the pipeline +Emulator.jl | Modular template for the emulators +GaussianProcess.jl | A Gaussian process emulator +Scalar/VectorRandomFeatureInterface.jl | A Scalar/Vector-output Random Feature emulator +MarkovChainMonteCarlo.jl | Modular template for Markov Chain Monte Carlo samplers +Utilities.jl | Helper functions -**The best way to get started is to have a look at the examples!** ## Authors diff --git a/docs/src/installation_instructions.md b/docs/src/installation_instructions.md index 74f851df8..992d13635 100644 --- a/docs/src/installation_instructions.md +++ b/docs/src/installation_instructions.md @@ -3,42 +3,60 @@ ### Installing CalibrateEmulateSample.jl Currently CalibrateEmulateSample (CES) depends on some external python dependencies -including `scikit-learn` wrapped by ScikitLearn.jl, which requires a couple extra -installation steps: - -First clone the project into a new local repository +!!! info "Latest python package versions!" + We have verified that the configurations work: + For Python `3.11 - 3.12`: `scipy` = `1.14.1`, `scikit-learn` = `1.5.1`. + For Python `3.8 - 3.11`: `scipy` = `1.8.1`, `scikit-learn` = `1.1.1`. + Please create an issue if you have had success with more up-to-date versions, and we can update this page! + +If you have dependencies installed already, then the code can be used by simply entering ``` -> git clone git@github.com:Clima/CalibrateEmulateSample.jl -> cd CalibrateEmulateSample.jl +julia --project +> ] +> add CalibrateEmulateSample ``` -Install and build the project dependencies. Given that CES depends on python packages -it is easiest to set the project to use its own -[Conda](https://docs.conda.io/en/latest/miniconda.html) environment variable +One may instead clone the project into a new local repository (using SSH or https link from github), to easily access the CES codebase (e.g. to run our example suite) . + +If you do not have the dependencies installed, we have found it is easiest to install them via Julia's "Conda.jl", +``` +julia --project +> ] +> add Conda +> add CalibrateEmulateSample +``` +Then install the dependencies by having the project use its own [Conda](https://docs.conda.io/en/latest/miniconda.html) environment variable (set by exporting the ENV variable `PYTHON=""`). ``` > PYTHON="" julia --project -e 'using Pkg; Pkg.instantiate()' ``` - -The `scikit-learn` package (along with `scipy`) then has to be installed if using a Julia project-specific Conda environment: +This call should build Conda and Pycall. The `scikit-learn` package (along with `scipy`) then has to be installed if using a Julia project-specific Conda environment: ``` -> PYTHON="" julia --project -e 'using Conda; Conda.add("scipy=1.8.1", channel="conda-forge")' -> PYTHON="" julia --project -e 'using Conda; Conda.add("scikit-learn=1.1.1")' +> PYTHON="" julia --project -e 'using Conda; Conda.add("scipy=1.14.1", channel="conda-forge")' +> PYTHON="" julia --project -e 'using Conda; Conda.add("scikit-learn=1.5.1")' ``` +!!! info "Pycall can't find the packages!?" + Sometimes `Conda.jl` builds the python packages, in julia-based python repo but Pycall resorts to a different python path. this throws an error like: + ``` + ERROR: InitError: PyError (PyImport_ImportModule + + The Python package sklearn.gaussian_process.kernels could not be imported by pyimport. + ``` + In this case, simply call `julia --project` followed by + ```julia + julia> ENV["PYTHON"]="" + julia> Pkg.build("PyCall") + julia> exit() + ``` + to reset unify the paths. See the [PyCall.jl documentation](https://github.com/JuliaPy/PyCall.jl#specifying-the-python-version) -for more information about how to configure the local Julia / Conda / Python environment. Typically it will require building in the -REPL via -```julia -> julia --project -julia> using Pkg -julia> Pkg.build("PyCall") -``` +for more information about how to configure the local Julia / Conda / Python environment. To test that the package is working: @@ -63,4 +81,4 @@ julia --project=docs/ -e 'using Pkg; Pkg.instantiate()' julia --project=docs/ docs/make.jl ``` -The locally rendered HTML documentation can be viewed at `docs/build/index.html`. +The locally rendered HTML documentation can be viewed at `docs/build/index.html`. Occasional figures may only be viewable in the online documentation due to the fancy-url package. diff --git a/docs/src/random_feature_emulator.md b/docs/src/random_feature_emulator.md index 89cdf7bed..d478739a7 100644 --- a/docs/src/random_feature_emulator.md +++ b/docs/src/random_feature_emulator.md @@ -13,6 +13,40 @@ The `VectorRandomFeatureInterface`, when applied to multidimensional problems, d Building a random feature interface is similar to building a Gaussian process: one defines a kernel to encode similarities between outputs ``(y_i,y_j)`` based on inputs ``(x_i,x_j)``. Additionally, one must specify the number of random feature samples to be taken to build the emulator. +# Recommended configuration + +Below is listed a recommended configuration that is flexible and requires learning relatively few parameters. Users can increase `r` to balance flexibility against having more kernel hyperparameters to learn. + +```julia +using CalibrateEmulateSample.Emulators +# given input_dim, output_dim, and a PairedDataContainer + +# define number of features for prediction +n_features = 400 # number of features for prediction + +# define kernel +nugget = 1e8*eps() # small nugget term +r = 1 # start with smallest rank +lr_perturbation = LowRankFactor(r, nugget) +nonsep_lrp_kernel = NonseparableKernel(lr_perturbation) + +# configure optimizer +optimizer_options = Dict( + "verbose" => true, # print diagnostics for optimizer + "n_features_opt" => 100, # use less features during hyperparameter optimization/kernel learning + "cov_sample_multiplier" => 1.0, # use to reduce/increase number of samples in initial cov estimation stage +) + +machine_learning_tool = VectorRandomFeatureInterface( + n_features, + input_dim, + output_dim, + kernel_structure = nonsep_lrp_kernel, + optimizer_options = optimizer_options +) +``` +Users can change the kernel complexity with `r`, and the number of features for prediciton with `n_features` and optimization with `n_features_opt`. + # User Interface `CalibrateEmulateSample.jl` allows the random feature emulator to be built using the external package [`RandomFeatures.jl`](https://github.com/CliMA/RandomFeatures.jl). In the notation of this package's documentation, our interface allows for families of `RandomFourierFeature` objects to be constructed with different Gaussian distributions of the "`xi`" a.k.a weight distribution, and with a learnable "`sigma`", a.k.a scaling parameter. @@ -37,12 +71,12 @@ To adjust the expressivity of the random feature family one can define the keywo We have two types, ```julia -separable_kernel = Separable(input_cov_structure, output_cov_structure) -nonseparable_kernel = Nonseparable(cov_structure) +separable_kernel = SeparableKernel(input_cov_structure, output_cov_structure) +nonseparable_kernel = NonseparableKernel(cov_structure) ``` where the `cov_structure` implies some imposed user structure on the covariance structure. The basic covariance structures are given by ```julia -1d_cov_structure = OneDimFactor() # the problem dimension is 1 +oned_cov_structure = OneDimFactor() # the problem dimension is 1 diagonal_structure = DiagonalFactor() # impose diagonal structure (e.g. ARD kernel) cholesky_structure = CholeskyFactor() # general positive definite matrix lr_perturbation = LowRankFactor(r) # assume structure is a rank-r perturbation from identity @@ -75,7 +109,7 @@ Dict( "train_fraction" => tf, ) ``` -- Decreasing `csm` (default `10.0`) towards `0.0` directly reduces the number of samples to estimate a covariance matrix in the optimizer, by using a shrinkage estimator - the more shrinkage the more approximation (suggestion, keep shrinkage amount below `0.2`). +- Decreasing `csm` (default `10.0`) towards `0.0` directly reduces the number of samples to estimate a covariance matrix in the optimizer, by using a shrinkage estimator to improve matrix conditioning. Guide: more samples implies less shrinkage for good conditioning and less approximation error. The amount of shrinkage is returned to user as a value between 0 (no shrinkage) and 1 (shrink to diagonal matrix), it is suggested that users choose `csm` to keep the shrinkage amount below `0.2`. - Increasing `tf` towards `1` changes the train-validate split, reducing samples but increasing cost-per-sample and reducing the available validation data (default `0.8`, suggested range `(0.5,0.95)`). If optimizer convergence stagnates or is too slow, or if it terminates before producing good results, try: @@ -88,9 +122,9 @@ Dict( ) ``` We suggest looking at the [`EnsembleKalmanProcesses`](https://github.com/CliMA/EnsembleKalmanProcesses.jl) documentation for more details; but to summarize -- Reducing optimizer samples `n_e` and iterations `n_i` reduces computation time. -- If `n_e` becomes less than the number of hyperparameters, the updates will fail and a localizer must be specified in `loc`. -- If the algorithm terminates at `T=1` and resulting emulators looks unacceptable one can change or add arguments in `sch` e.g. `DataMisfitController("on_terminate"=continue)` +- Reducing optimizer samples `n_e` and iterations `n_i` reduces computation time but may limit convergence progress, see [here](https://clima.github.io/EnsembleKalmanProcesses.jl/dev/ensemble_kalman_inversion/#Updating-the-Ensemble). +- If `n_e` becomes less than the number of hyperparameters, the updates may fail and a localizer must be specified in `loc`, see [here](https://clima.github.io/EnsembleKalmanProcesses.jl/dev/localization/). +- If the algorithm terminates at `T=1` and resulting emulators looks unacceptable one can change or add arguments in `sch` e.g. `DataMisfitController("on_terminate"=continue)`, see [here](https://clima.github.io/EnsembleKalmanProcesses.jl/dev/learning_rate_scheduler/) !!! note Widely robust defaults here are a work in progress @@ -151,8 +185,31 @@ build_default_prior(input_dim, output_dim, vector_default_kernel) ### Vector, nonseparable: ``\mathbb{R}^{25} \to \mathbb{R}^{50}`` The following represents the most general kernel case. -!!! note "Use low-rank/diagonls representations where possible" - The following is far too general, leading to large numbers of hyperparameters +!!! note + Use low-rank/diagonls representations where possible to control the number of hyperparameters. + +```julia +using CalibrateEmulateSample.Emulators +input_dim = 25 +output_dim = 50 +eps = 1e-8 +rank=5 +# build a full-rank nonseparable vector kernel +vector_lowrank_kernel = NonseparableKernel(LowRankFactor(rank, eps)) + +calculate_n_hyperparameters(input_dim, output_dim, vector_lowrank_kernel) +# answer = 6256; 6255 for the joint input-output space, and 1 scaling + +build_default_prior(input_dim, output_dim, vector_lowrank_kernel) +# builds a 2-entry distribution +# 5-dim positive distribution 'full_lowrank_diagonal' +# 6250-dim unbounded distribution 'full_lowrank_U' +# 1-dim positive distribution `sigma` +``` + +!!! warning + Naive representations lead to very large numbers of hyperparameters. + ```julia using CalibrateEmulateSample.Emulators input_dim = 25 @@ -164,7 +221,7 @@ vector_general_kernel = NonseparableKernel(CholeskyFactor(eps)) calculate_n_hyperparameters(input_dim, output_dim, vector_general_kernel) # answer = 781876; 781875 for the joint input-output space, and 1 scaling -build_default_prior(input_dim, output_dim, vector_default_kernel) +build_default_prior(input_dim, output_dim, vector_general_kernel) # builds a 2-entry distribution # 781875-dim unbounded distribution 'full_cholesky' # 1-dim positive distribution `sigma` diff --git a/docs/src/API/AbstractMCMC.md b/docs/src/sample.md similarity index 57% rename from docs/src/API/AbstractMCMC.md rename to docs/src/sample.md index 125ad1b2f..77ff14c36 100644 --- a/docs/src/API/AbstractMCMC.md +++ b/docs/src/sample.md @@ -1,14 +1,67 @@ -# AbstractMCMC sampling API +# The Sample stage ```@meta CurrentModule = CalibrateEmulateSample.MarkovChainMonteCarlo ``` -The "sample" part of CES refers to exact sampling from the emulated posterior via [Markov chain Monte -Carlo](https://en.wikipedia.org/wiki/Markov_chain_Monte_Carlo) (MCMC). Within this paradigm, we want to provide the -flexibility to use multiple sampling algorithms; the approach we take is to use the general-purpose -[AbstractMCMC.jl](https://turing.ml/dev/docs/for-developers/interface) API, provided by the -[Turing.jl](https://turing.ml/dev/) probabilistic programming framework. +The "sample" part of CES refers to exact sampling from the emulated posterior, in our current framework this is achieved with a [Markov chain Monte +Carlo algorithm](https://en.wikipedia.org/wiki/Markov_chain_Monte_Carlo) (MCMC). Within this paradigm, we want to provide the flexibility to use multiple sampling algorithms; the approach we take is to use the general-purpose [AbstractMCMC.jl](https://turing.ml/dev/docs/for-developers/interface) API, provided by the [Turing.jl](https://turing.ml/dev/) probabilistic programming framework. + + +## User interface + +We briefly outline an instance of how one sets up and uses MCMC within the CES package. The user first loads the MCMC module, and provides one of the Protocols (i.e. how one wishes to generate sampling proposals) + +```julia +using CalibrateEmulateSample.MarkovChainMonteCarlo +protocol = RWMHSampling() # Random-Walk algorithm +# protocol = pCNMHSampling() # preconditioned-Crank-Nicholson algorithm +``` +Then one builds the MCMC by providing the standard Bayesian ingredients (prior and data) from the calibrate stage, alongside the trained statistical emulator from the emulate stage: +```julia +mcmc = MCMCWrapper( + protocol, + truth_sample, + prior, + emulator; + init_params=mean_u_final, + burnin=10_000, +) +``` +The keyword arguments `init_params` give a starting step of the chain (often taken to be the mean of the final iteration of calibrate stage), and a `burnin` gives a number of initial steps to be discarded when drawing statistics from the sampling method. + +For good efficiency, one often needs to run MCMC with a problem-dependent step size. We provide a simple utility to help choose this. Here the optimizer runs short chains (of length `N`), and adjusts the step-size until the MCMC acceptance rate falls within an acceptable range, returning this step size. +```julia +new_step = optimize_stepsize( +mcmc; +init_stepsize = 1, +N = 2000 +) +``` +To generate ``10^5`` samples with a given step size (and optional random number generator `rng`), one calls +```julia +chain = sample(rng, mcmc, 100_000; stepsize = new_step) +display(chain) # gives diagnostics +``` +The return argument is stored in an `MCMCChains.Chains` object. To convert this back into a `ParameterDistribution` type (which contains e.g. the transformation maps) one can call +```julia +posterior = get_posterior(mcmc, chain) +constrained_posterior = transform_unconstrained_to_constrained(prior, get_distribution(posterior)) +``` + +One can quickly plot the marginals of the prior and posterior distribution with +```julia +using Plots +plot(prior) +plot!(posterior) +``` +or extract statistics of the (unconstrained) distribution with +```julia +mean_posterior = mean(posterior) +cov_posterior = cov(posterior) +``` + +# [Further details on the implementation](@id AbstractMCMC sampling API) This page provides a summary of AbstractMCMC which augments the existing documentation (\[[1](https://turing.ml/dev/docs/for-developers/interface)\], @@ -30,23 +83,21 @@ remember where methods are defined! Below we describe the relevant parts of - Further extended for the needs of CES in [Markov chain Monte Carlo](https://en.wikipedia.org/wiki/Markov_chain_Monte_Carlo). -## Classes and methods - ### Sampler A Sampler is AbstractMCMC's term for an implementation of a MCMC sampling algorithm, along with all its configuration -parameters. All samplers must inherit from `AbstractMCMC.AbstractSampler`. +parameters. All samplers are a subtype of AbstractMCMC's `AbstractSampler`. Currently CES only implements the Metropolis-Hastings (MH) algorithm. Because it's so straightforward, much of AbstractMCMC isn't needed. We implement two variants of MH with two different Samplers: `RWMetropolisHastings` and -`pCNMetropolisHastings`, both of which inherit from the `AdvancedMH.MHSampler` base class. The public constructor for +`pCNMetropolisHastings`, both of which are subtypes of `AdvancedMH.MHSampler`. The constructor for both Samplers is [`MetropolisHastingsSampler`](@ref); the different Samplers are specified by passing a [`MCMCProtocol`](@ref) object to this constructor. -The MH Sampler classes have only one field, `proposal`, which is the distribution used to generate new MH proposals via -stochastic offsets to the current parameter values. This is done by +The `MHSampler` has only one field, `proposal`, the distribution used to generate new MH proposals via +additive stochastic perturbations to the current parameter values. This is done by [AdvancedMH.propose()](https://github.com/TuringLang/AdvancedMH.jl/blob/master/src/proposal.jl), which gets called for -each MCMC `step()` (below). The difference between our two Samplers is in how this proposal is generated: +each MCMC `step()`. The difference between Samplers comes from how the proposal is generated: - [`RWMHSampling`](@ref) does vanilla random-walk proposal generation with a constant, user-specified step size (this differs from the AdvancedMH implementation, which doesn't provide for a step size.) @@ -57,7 +108,7 @@ each MCMC `step()` (below). The difference between our two Samplers is in how th (2008)](https://www.worldscientific.com/doi/abs/10.1142/S0219493708002378) and [Cotter et. al. (2013)](https://projecteuclid.org/journals/statistical-science/volume-28/issue-3/MCMC-Methods-for-Functions--Modifying-Old-Algorithms-to-Make/10.1214/13-STS421.full). -This is the only difference: generated proposals are then either accepted or rejected according to the same MH criterion +Generated proposals are then either accepted or rejected according to the same MH criterion (in `step()`, below.) ### Models @@ -69,7 +120,7 @@ likelihood (see [Emulators](@ref)) together with the prior. This is constructed ### Sampling with the MCMC Wrapper object At a [high level](https://turing.ml/dev/docs/using-turing/guide), a Sampler and Model is all that's needed to do MCMC -sampling. This is done by the [`sample`](https://github.com/TuringLang/AbstractMCMC.jl/blob/master/src/sample.jl) method +sampling. This is done by the [`sample`](https://turinglang.org/AbstractMCMC.jl/dev/api/#Sampling-a-single-chain) method provided by AbstractMCMC (extending the method from BaseStats). To be more user-friendly, in CES we wrap the Sampler, Model and other necessary configuration into a @@ -79,8 +130,18 @@ with methods to use this object (that simply unpack its fields and call the appr ### Chain -The [MCMCChain](https://beta.turing.ml/MCMCChains.jl/dev/) class is used to store the results of the MCMC sampling; the -package provides simple diagnostics for visualization and diagnosing chain convergence. +The [MCMCChain](https://beta.turing.ml/MCMCChains.jl/dev/) package provides the `Chains` container to store the results of the MCMC sampling; the package provides methods to for quick diagnostics and plot utilities of the the `Chains` objects. For example, + +```julia +using MCMCChains +using StatsPlots + +# ... from our MCMC example above ... +# chain = sample(rng, mcmc, 100_000; stepsize = new_step) + +display(chain) # diagnostics +plot(chain) # plots samples over iteration and PDFs for each parameter +``` ### Internals: Transitions diff --git a/examples/Cloudy/Cloudy_calibrate.jl b/examples/Cloudy/Cloudy_calibrate.jl new file mode 100644 index 000000000..037de2d65 --- /dev/null +++ b/examples/Cloudy/Cloudy_calibrate.jl @@ -0,0 +1,317 @@ +# Reference the in-tree version of CalibrateEmulateSample on Julias load path +include(joinpath(@__DIR__, "../", "ci", "linkfig.jl")) + +@info "This experiment is very sensitive to the Cloudy version. It is known to work with Cloudy commit: b4fa7e3" + +# Import modules +using Distributions +using StatsBase +using LinearAlgebra +using StatsPlots +using Plots +using Plots.PlotMeasures +using Random +using JLD2 + +# This example requires Cloudy to be installed. +using Cloudy +using Cloudy.ParticleDistributions +using Cloudy.KernelTensors +# Import the module that runs Cloudy +include(joinpath(@__DIR__, "DynamicalModel.jl")) + +# Import Ensemble Kalman Processes modules via CES +using CalibrateEmulateSample +using CalibrateEmulateSample.EnsembleKalmanProcesses +using CalibrateEmulateSample.EnsembleKalmanProcesses.ParameterDistributions +using CalibrateEmulateSample.EnsembleKalmanProcesses.DataContainers +using CalibrateEmulateSample.EnsembleKalmanProcesses.PlotRecipes + + +################################################################################ +# # +# Cloudy Calibrate-Emulate-Sample Example # +# # +# # +# This example uses Cloudy, a microphysics model that simulates the # +# collision and coalescence of cloud droplets into bigger drops, to # +# demonstrate how the full Calibrate-Emulate-Sample pipeline can be # +# used for Bayesian learning and uncertainty quantification of # +# parameters, given some observations. # +# # +# Specifically, this examples shows how to learn parameters of the # +# initial cloud droplet mass distribution, given observations of some # +# moments of that mass distribution at a later time, after some of the # +# droplets have collided and become bigger drops. # +# # +# In this example, Cloudy is used in a "perfect model" (aka "known # +# truth") setting, which means that the "observations" are generated by # +# Cloudy itself, by running it with the true parameter values. In more # +# realistic applications, the observations will come from some external # +# measurement system. # +# # +# The purpose is to show how to do parameter learning using # +# Calibrate-Emulate-Sample in a simple (and highly artificial) setting. # +# # +# For more information on Cloudy, see # +# https://github.com/CliMA/Cloudy.jl.git # +# # +################################################################################ + +rng_seed = 41 +Random.seed!(rng_seed) +rng = Random.seed!(Random.GLOBAL_RNG, rng_seed) + +homedir = pwd() +output_directory = homedir * "/output/" +if ~isdir(output_directory) + mkdir(output_directory) +end + +### +### Define the (true) parameters and their priors +### + +# Define the parameters that we want to learn +# We assume that the true particle mass distribution is a Gamma +# distribution with parameters N0_true, θ_true, k_true +param_names = ["N0", "θ", "k"] +n_params = length(param_names) +N0_true = 300.0 # number of particles (scaling factor for Gamma distribution) +θ_true = 1.5597 # scale parameter of Gamma distribution +k_true = 0.0817 # shape parameter of Gamma distribution +ϕ_true = [N0_true, θ_true, k_true] # true parameters in constrained space +dist_true = ParticleDistributions.GammaPrimitiveParticleDistribution(ϕ_true...) + + +### +### Define priors for the parameters we want to learn +### + +# We choose to use normal distributions to represent the prior distributions of +# the parameters in the transformed (unconstrained) space. +prior_N0 = constrained_gaussian(param_names[1], 400, 300, 0.4 * N0_true, Inf) +prior_θ = constrained_gaussian(param_names[2], 1.0, 5.0, 1e-1, Inf) +prior_k = constrained_gaussian(param_names[3], 0.2, 1.0, 1e-4, Inf) +priors = combine_distributions([prior_N0, prior_θ, prior_k]) +# Plot the priors +p = plot(priors, constrained = false) +savefig(p, output_directory * "cloudy_priors.png") + +### +### Define the data from which we want to learn the parameters +### + +data_names = ["M0_M1_M2"] +moments = [0.0, 1.0, 2.0] +n_moments = length(moments) + + +### +### Model settings +### + +# Collision-coalescence kernel to be used in Cloudy +tspan = (0.0, 1.0) +coalescence_coeff = 1 / 3.14 / 4 / 100 +kernel_func = x -> coalescence_coeff +kernel = Cloudy.KernelTensors.CoalescenceTensor(kernel_func, 0, 300.0) + + +### +### Generate (artificial) truth samples +### + +dyn_model_settings_true = DynamicalModel.ModelSettings(kernel, dist_true, moments, tspan) + +G_t = DynamicalModel.run_dyn_model(ϕ_true, dyn_model_settings_true) +n_samples = 100 +y_t = zeros(length(G_t), n_samples) +# In a perfect model setting, the "observational noise" represents the +# internal model variability. Since Cloudy is a purely deterministic model, +# there is no straightforward way of coming up with a covariance structure +# for this internal model variability. We decide to use a diagonal +# covariance, with entries (variances) largely proportional to their +# corresponding data values, G_t +Γy = convert(Array, Diagonal([100.0, 5.0, 30.0])) +μ = zeros(length(G_t)) + +# Add noise +for i in 1:n_samples + y_t[:, i] = G_t .+ rand(MvNormal(μ, Γy)) +end + +truth = Observation(Dict("samples" => vec(mean(y_t, dims = 2)), "covariances" => Γy, "names" => data_names)) + + +### +### Calibrate: Ensemble Kalman Inversion +### + +N_ens = 50 # number of ensemble members +N_iter = 15 # number of EKI iterations +# initial parameters: n_params x N_ens +initial_params = construct_initial_ensemble(rng, priors, N_ens) +ekiobj = EnsembleKalmanProcess(initial_params, truth, Inversion(), scheduler = DataMisfitController()) + +# Initialize a ParticleDistribution with dummy parameters. The parameters +# will then be set within `run_dyn_model` +dummy = ones(n_params) +dist_type = ParticleDistributions.GammaPrimitiveParticleDistribution(dummy...) +model_settings = DynamicalModel.ModelSettings(kernel, dist_type, moments, tspan) +# EKI iterations +for n in 1:N_iter + # Return transformed parameters in physical/constrained space + ϕ_n = get_ϕ_final(priors, ekiobj) + # Evaluate forward map + G_n = [DynamicalModel.run_dyn_model(ϕ_n[:, i], model_settings) for i in 1:N_ens] + G_ens = hcat(G_n...) # reformat + EnsembleKalmanProcesses.update_ensemble!(ekiobj, G_ens) +end + + +# EKI results: Has the ensemble collapsed toward the truth? +θ_true = transform_constrained_to_unconstrained(priors, ϕ_true) +println("True parameters (unconstrained): ") +println(θ_true) + +println("\nEKI results:") +println(get_u_mean_final(ekiobj)) + +u_stored = get_u(ekiobj, return_array = false) +g_stored = get_g(ekiobj, return_array = false) +save( + joinpath(output_directory, "cloudy_calibrate_results.jld2"), + "inputs", + u_stored, + "outputs", + g_stored, + "priors", + priors, + "eki", + ekiobj, + "truth_sample", + get_obs(truth), + "truth_sample_mean", + vec(mean(y_t, dims = 2)), + "truth_input_constrained", + ϕ_true, +) + +# Plots in the unconstrained space +gr(size = (1200, 400)) + +u_init = get_u_prior(ekiobj) +anim_eki_unconst_cloudy = @animate for i in 1:(N_iter - 1) + u_i = get_u(ekiobj, i) + + p1 = plot(u_i[1, :], u_i[2, :], seriestype = :scatter, xlims = extrema(u_init[1, :]), ylims = extrema(u_init[2, :])) + plot!( + p1, + [θ_true[1]], + xaxis = "u1", + yaxis = "u2", + seriestype = "vline", + linestyle = :dash, + linecolor = :red, + label = false, + margin = 5mm, + title = "EKI iteration = " * string(i), + ) + plot!(p1, [θ_true[2]], seriestype = "hline", linestyle = :dash, linecolor = :red, label = "optimum") + + p2 = plot(u_i[2, :], u_i[3, :], seriestype = :scatter, xlims = extrema(u_init[2, :]), ylims = extrema(u_init[3, :])) + plot!( + p2, + [θ_true[2]], + xaxis = "u2", + yaxis = "u3", + seriestype = "vline", + linestyle = :dash, + linecolor = :red, + label = false, + margin = 5mm, + title = "EKI iteration = " * string(i), + ) + + plot!(p2, [θ_true[3]], seriestype = "hline", linestyle = :dash, linecolor = :red, label = "optimum") + + p3 = plot(u_i[3, :], u_i[1, :], seriestype = :scatter, xlims = extrema(u_init[3, :]), ylims = extrema(u_init[1, :])) + plot!( + p3, + [θ_true[3]], + xaxis = "u3", + yaxis = "u1", + seriestype = "vline", + linestyle = :dash, + linecolor = :red, + label = false, + margin = 5mm, + title = "EKI iteration = " * string(i), + ) + + plot!(p3, [θ_true[1]], seriestype = "hline", linestyle = :dash, linecolor = :red, label = "optimum") + + p = plot(p1, p2, p3, layout = (1, 3)) +end + +gif(anim_eki_unconst_cloudy, joinpath(output_directory, "cloudy_eki_unconstr.gif"), fps = 1) # hide + +# Plots in the constrained space +ϕ_init = transform_unconstrained_to_constrained(priors, u_init) +anim_eki_cloudy = @animate for i in 1:(N_iter - 1) + ϕ_i = get_ϕ(priors, ekiobj, i) + + p1 = plot(ϕ_i[1, :], ϕ_i[2, :], seriestype = :scatter, xlims = extrema(ϕ_init[1, :]), ylims = extrema(ϕ_init[2, :])) + plot!( + p1, + [ϕ_true[1]], + xaxis = "ϕ1", + yaxis = "ϕ2", + seriestype = "vline", + linestyle = :dash, + linecolor = :red, + margin = 5mm, + label = false, + title = "EKI iteration = " * string(i), + ) + + plot!(p1, [ϕ_true[2]], seriestype = "hline", linestyle = :dash, linecolor = :red, label = "optimum") + + p2 = plot(ϕ_i[2, :], ϕ_i[3, :], seriestype = :scatter, xlims = extrema(ϕ_init[2, :]), ylims = extrema(ϕ_init[3, :])) + + plot!( + p2, + [ϕ_true[2]], + xaxis = "ϕ2", + yaxis = "ϕ3", + seriestype = "vline", + linestyle = :dash, + linecolor = :red, + margin = 5mm, + label = false, + title = "EKI iteration = " * string(i), + ) + + plot!(p2, [ϕ_true[3]], seriestype = "hline", linestyle = :dash, linecolor = :red, label = "optimum") + + p3 = plot(ϕ_i[3, :], ϕ_i[1, :], seriestype = :scatter, xlims = extrema(ϕ_init[3, :]), ylims = extrema(ϕ_init[1, :])) + plot!( + p3, + [ϕ_true[3]], + xaxis = "ϕ3", + yaxis = "ϕ1", + seriestype = "vline", + linestyle = :dash, + linecolor = :red, + margin = 5mm, + label = false, + title = "EKI iteration = " * string(i), + ) + plot!(p3, [ϕ_true[1]], seriestype = "hline", linestyle = :dash, linecolor = :red, label = "optimum") + + p = plot(p1, p2, p3, layout = (1, 3)) + +end + +gif(anim_eki_cloudy, joinpath(output_directory, "cloudy_eki_constr.gif"), fps = 1) # hide diff --git a/examples/Cloudy/Cloudy_emulate_sample.jl b/examples/Cloudy/Cloudy_emulate_sample.jl new file mode 100644 index 000000000..0f3deac48 --- /dev/null +++ b/examples/Cloudy/Cloudy_emulate_sample.jl @@ -0,0 +1,303 @@ +# Reference the in-tree version of CalibrateEmulateSample on Julias load path +include(joinpath(@__DIR__, "../", "ci", "linkfig.jl")) + +# Import modules +using Distributions +using StatsBase +using GaussianProcesses +using LinearAlgebra +using Random +using JLD2 +ENV["GKSwstype"] = "100" +using CairoMakie, PairPlots + + +# Import Calibrate-Emulate-Sample modules +using CalibrateEmulateSample.Emulators +using CalibrateEmulateSample.MarkovChainMonteCarlo +using CalibrateEmulateSample.Utilities +using EnsembleKalmanProcesses +using EnsembleKalmanProcesses.ParameterDistributions +using EnsembleKalmanProcesses.DataContainers + +function get_standardizing_factors(data::Array{FT, 2}) where {FT} + # Input: data size: N_data x N_ensembles + # Ensemble median of the data + norm_factor = median(data, dims = 2) # N_data x 1 array + return norm_factor +end + +################################################################################ +# # +# Cloudy Calibrate-Emulate-Sample Example # +# # +# # +# This example uses Cloudy, a microphysics model that simulates the # +# collision and coalescence of cloud droplets into bigger drops, to # +# demonstrate how the full Calibrate-Emulate-Sample pipeline can be # +# used for Bayesian learning and uncertainty quantification of # +# parameters, given some observations. # +# # +# Specifically, this examples shows how to learn parameters of the # +# initial cloud droplet mass distribution, given observations of some # +# moments of that mass distribution at a later time, after some of the # +# droplets have collided and become bigger drops. # +# # +# In this example, Cloudy is used in a "perfect model" (aka "known # +# truth") setting, which means that the "observations" are generated by # +# Cloudy itself, by running it with the true parameter values. In more # +# realistic applications, the observations will come from some external # +# measurement system. # +# # +# The purpose is to show how to do parameter learning using # +# Calibrate-Emulate-Sample in a simple (and highly artificial) setting. # +# # +# For more information on Cloudy, see # +# https://github.com/CliMA/Cloudy.jl.git # +# # +################################################################################ + + +function main() + + rng_seed = 41 + Random.seed!(rng_seed) + rng = Random.seed!(Random.GLOBAL_RNG, rng_seed) + + output_directory = joinpath(@__DIR__, "output") + if !isdir(output_directory) + mkdir(output_directory) + end + + # The calibration results must be produced by running Cloudy_calibrate.jl + # before running Cloudy_emulate_sample.jl + data_save_file = joinpath(output_directory, "cloudy_calibrate_results.jld2") + + # Check if the file exists before loading + if isfile(data_save_file) + + ekiobj = load(data_save_file)["eki"] + priors = load(data_save_file)["priors"] + truth_sample_mean = load(data_save_file)["truth_sample_mean"] + truth_sample = load(data_save_file)["truth_sample"] + # True parameters: + # - ϕ: in constrained space + # - θ: in unconstrained space + ϕ_true = load(data_save_file)["truth_input_constrained"] + θ_true = transform_constrained_to_unconstrained(priors, ϕ_true) + + else + error("File not found: $data_save_file. Please run 'Cloudy_calibrate.jl' first.") + + end + + param_names = get_name(priors) + n_params = length(ϕ_true) # input dimension + n_outputs = length(truth_sample) # output dimension + + Γy = get_obs_noise_cov(ekiobj) + + cases = [ + "rf-scalar", + "gp-gpjl", # Veeeery slow predictions + "rf-nosvd-nonsep", + ] + + # Specify cases to run (e.g., case_mask = [2] only runs the second case) + case_mask = [1, 2, 3] + + # These settings are the same for all Gaussian Process cases + pred_type = YType() # we want to predict data + + # These settings are the same for all Random Feature cases + n_features = 400 + nugget = 1e-8 + optimizer_options = Dict( + "verbose" => true, + "scheduler" => DataMisfitController(terminate_at = 100.0), + "cov_sample_multiplier" => 1.0, + "n_iteration" => 20, + ) + + # We use the same input-output-pairs and normalization factors for + # Gaussian Process and Random Feature cases + input_output_pairs = get_training_points(ekiobj, length(get_u(ekiobj)) - 2) + norm_factors = get_standardizing_factors(get_outputs(input_output_pairs)) + for case in cases[case_mask] + + println(" ") + println("*********************************\n") + @info "running case $case" + + if case == "gp-gpjl" + + @warn "gp-gpjl case is very slow at prediction" + gppackage = GPJL() + # Kernel is the sum of a squared exponential (SE), Matérn 5/2, and + # white noise + gp_kernel = SE(1.0, 1.0) + Mat52Ard(zeros(3), 0.0) + Noise(log(2.0)) + + # Define machine learning tool + mlt = GaussianProcess(gppackage; kernel = gp_kernel, prediction_type = pred_type, noise_learn = false) + + decorrelate = true + standardize_outputs = true + + elseif case == "rf-scalar" + + kernel_rank = 3 + kernel_structure = SeparableKernel(LowRankFactor(kernel_rank, nugget), OneDimFactor()) + + # Define machine learning tool + mlt = ScalarRandomFeatureInterface( + n_features, + n_params, + kernel_structure = kernel_structure, + optimizer_options = optimizer_options, + ) + + decorrelate = true + standardize_outputs = true + + elseif case == "rf-nosvd-nonsep" + + # Define machine learning tool + kernel_rank = 4 + mlt = VectorRandomFeatureInterface( + n_features, + n_params, + n_outputs, + kernel_structure = NonseparableKernel(LowRankFactor(kernel_rank, nugget)), + optimizer_options = optimizer_options, + ) + + # Vector RF does not require decorrelation of outputs + decorrelate = false + standardize_outputs = false + + + else + error("Case $case is not implemented yet.") + + end + + # The data processing normalizes input data, and decorrelates + # output data with information from Γy, if required + # Note: The `standardize_outputs_factors` are only used under the + # condition that `standardize_outputs` is true. + emulator = Emulator( + mlt, + input_output_pairs, + obs_noise_cov = Γy, + decorrelate = decorrelate, + standardize_outputs = standardize_outputs, + standardize_outputs_factors = vcat(norm_factors...), + ) + + optimize_hyperparameters!(emulator) + + # Check how well the emulator predicts on the true parameters + y_mean, y_var = Emulators.predict(emulator, reshape(θ_true, :, 1); transform_to_real = true) + + println("Emulator ($(case)) prediction on true parameters: ") + println(vec(y_mean)) + println("true data: ") + println(truth_sample) # what was used as truth + println("Emulator ($(case)) predicted standard deviation: ") + println(sqrt.(diag(y_var[1], 0))) + println("Emulator ($(case)) MSE (truth): ") + println(mean((truth_sample - vec(y_mean)) .^ 2)) + + + ### + ### Sample: Markov Chain Monte Carlo + ### + + # initial values + u0 = vec(mean(get_inputs(input_output_pairs), dims = 2)) + println("initial parameters: ", u0) + + # First let's run a short chain to determine a good step size + yt_sample = truth_sample + mcmc = MCMCWrapper(RWMHSampling(), yt_sample, priors, emulator; init_params = u0) + + new_step = optimize_stepsize(mcmc; init_stepsize = 0.1, N = 2000, discard_initial = 0) + + # Now begin the actual MCMC + println("Begin MCMC - with step size ", new_step) + chain = MarkovChainMonteCarlo.sample(mcmc, 100_000; stepsize = new_step, discard_initial = 1_000) + + posterior = MarkovChainMonteCarlo.get_posterior(mcmc, chain) + + post_mean = mean(posterior) + post_cov = cov(posterior) + println("posterior mean") + println(post_mean) + println("posterior covariance") + println(post_cov) + + # Prior samples + prior_samples_unconstr = sample(rng, priors, Int(1e4)) + prior_samples_constr = transform_unconstrained_to_constrained(priors, prior_samples_unconstr) + + # Posterior samples + posterior_samples_unconstr = vcat([get_distribution(posterior)[name] for name in get_name(posterior)]...) # samples are columns + posterior_samples_constr = + mapslices(x -> transform_unconstrained_to_constrained(posterior, x), posterior_samples_unconstr, dims = 1) + + # Make pair plots of the posterior distributions in the unconstrained + # and in the constrained space (this uses `PairPlots.jl`) + figpath_unconstr = joinpath(output_directory, "pairplot_posterior_unconstr_" * case * ".png") + figpath_constr = joinpath(output_directory, "pairplot_posterior_constr_" * case * ".png") + labels = get_name(posterior) + + data_unconstr = (; [(Symbol(labels[i]), posterior_samples_unconstr[i, :]) for i in 1:length(labels)]...) + data_constr = (; [(Symbol(labels[i]), posterior_samples_constr[i, :]) for i in 1:length(labels)]...) + + p_unconstr = pairplot(data_unconstr => (PairPlots.Scatter(),)) + p_constr = pairplot(data_constr => (PairPlots.Scatter(),)) + save(figpath_unconstr, p_unconstr) + save(figpath_constr, p_constr) + + # Plot the marginal posterior distributions together with the priors + # and the true parameter values (we'll do that in the constrained space) + + for idx in 1:n_params + + # Find the range of the posterior samples + xmin = minimum(posterior_samples_constr[idx, :]) + xmax = maximum(posterior_samples_constr[idx, :]) + + # Create a figure and axis for plotting + fig = Figure(; size = (800, 600)) + ax = Axis(fig[1, 1]) + + # Histogram for posterior samples + hist!(ax, posterior_samples_constr[idx, :], bins = 100, color = :darkorange, label = "posterior") + + # Plotting the prior distribution + hist!(ax, prior_samples_constr[idx, :], bins = 10000, color = :slategray) + + # Adding a vertical line for the true value + vlines!(ax, [ϕ_true[idx]], color = :indigo, linewidth = 2.6, label = "true " * param_names[idx]) + + + + # Setting title and labels + ax.xlabel = "Value" + ax.ylabel = "Density" + ax.title = param_names[idx] + ax.titlesize = 20 + + # Save the figure (marginal posterior distribution in constrained + # space) + figname = "marginal_posterior_constr_" * case * "_" * param_names[idx] * ".png" + figpath_marg_constr = joinpath(output_directory, figname) + save(figpath_marg_constr, fig) + + end + end +end + + +main() diff --git a/examples/Cloudy/DynamicalModel.jl b/examples/Cloudy/DynamicalModel.jl new file mode 100644 index 000000000..f8b4d807e --- /dev/null +++ b/examples/Cloudy/DynamicalModel.jl @@ -0,0 +1,72 @@ +module DynamicalModel + +using DocStringExtensions + +using Cloudy +using Cloudy.ParticleDistributions +using Cloudy.KernelTensors +using Cloudy.EquationTypes +using Cloudy.Coalescence + +using DifferentialEquations + +export ModelSettings +export run_dyn_model + + +""" + ModelSettings{FT<:AbstractFloat, KT, D} + +Structure to hold all information to run the dynamical model + +# Fields +$(DocStringExtensions.FIELDS) +""" +struct ModelSettings{FT <: AbstractFloat, KT, D <: PrimitiveParticleDistribution{FT}} + "a kernel tensor specifying the physics of collision-coalescence" + kernel::KT + "a cloud droplet mass distribution function" + dist::D + "the moments of `dist` that the model should return" + moments::Array{FT, 1} + "time period over which to run the model, e.g., `(0, 1)`" + tspan::Tuple{FT, FT} +end + + +""" + run_dyn_model(ϕ::Array{FT, 1}, settings::ModelSettings{FT}) where {FT<:AbstractFloat} + +Run the dynamical model (Cloudy) for the given parameter vector ϕ=[N0,θ,k], which defines the initial distribution of droplet masses. This distribution +is assumed to be a (scaled) gamma distribution with scaling factor N0 (defining the number of particles), scale parameter θ, and shape parameter k. +Return the model output, a vector of moments of the particle mass +distribution at the end time of the simulation. + + - `ϕ` - parameter vector + - `settings` - a ModelSettings struct + +""" +function run_dyn_model(ϕ::Array{FT, 1}, settings::ModelSettings{FT}) where {FT <: AbstractFloat} + + # Generate the initial distribution + dist_init = GammaPrimitiveParticleDistribution(ϕ...) + moments_init = get_moments(dist_init) + + # Set up ODE problem: dM/dt = f(M, ϕ, t) + tspan = (FT(0), FT(1)) + coalescence_coeff = 1 / 3.14 / 4 / 100 + kernel_func = x -> coalescence_coeff + kernel = CoalescenceTensor(kernel_func, 0, FT(100)) + ODE_parameters = Dict(:dist => [dist_init], :kernel => settings.kernel, :dt => FT(1)) + + rhs(m, par, t) = get_int_coalescence(OneModeCoalStyle(), m, par, par[:kernel]) + prob = ODEProblem(rhs, moments_init, settings.tspan, ODE_parameters) + sol = solve(prob, SSPRK33(), dt = ODE_parameters[:dt]) + + # Return moments at last time step + moments_final = vcat(sol.u'...)[end, :] + + return moments_final +end + +end diff --git a/examples/deprecated/Cloudy/Project.toml b/examples/Cloudy/Project.toml similarity index 72% rename from examples/deprecated/Cloudy/Project.toml rename to examples/Cloudy/Project.toml index 7dc639a81..9b6148406 100644 --- a/examples/deprecated/Cloudy/Project.toml +++ b/examples/Cloudy/Project.toml @@ -1,19 +1,20 @@ [deps] +CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" +CalibrateEmulateSample = "95e48a1f-0bec-4818-9538-3db4340308e3" Cloudy = "9e3b23bb-e7cc-4b94-886c-65de2234ba87" -Conda = "8f4d0f93-b110-5947-807f-2305c1781a2d" DifferentialEquations = "0c46a032-eb83-5123-abaf-570d42b7fbaa" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" -FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341" GaussianProcesses = "891a1506-143c-57d2-908e-e1f8e92e6de9" +JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +PairPlots = "43a3c2be-4208-490b-832a-a21dcd55d7da" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd" -Sundials = "c3572dad-4567-51f8-b174-8c6c989267f4" [compat] +Cloudy = "0.2" julia = "~1.6" -FFTW = "= 1.3.0" diff --git a/examples/Darcy/GModel.jl b/examples/Darcy/GModel.jl new file mode 100644 index 000000000..e18ecccf6 --- /dev/null +++ b/examples/Darcy/GModel.jl @@ -0,0 +1,219 @@ +################## +# Copied on 3/16/23 and modified from +# https://github.com/Zhengyu-Huang/InverseProblems.jl/blob/master/Fluid/Darcy-2D.jl +################## + +using JLD2 +using Statistics +using LinearAlgebra +using Distributions +using Random +using SparseArrays + + + + +mutable struct Setup_Param{FT <: AbstractFloat, IT <: Int} + # physics + N::IT # number of grid points for both x and y directions (including both ends) + Δx::FT + xx::AbstractVector{FT} # uniform grid [a, a+Δx, a+2Δx ... b] (in each dimension) + + #for source term + f_2d::AbstractMatrix{FT} + + κ::AbstractMatrix{FT} + + # observation locations is tensor product x_locs × y_locs + x_locs::AbstractVector{IT} + y_locs::AbstractVector{IT} + + N_y::IT +end + + +function Setup_Param( + xx::AbstractVector{FT}, + obs_ΔN::IT, + κ::AbstractMatrix; + seed::IT = 123, +) where {FT <: AbstractFloat, IT <: Int} + + N = length(xx) + Δx = xx[2] - xx[1] + + # logκ_2d, φ, λ, θ_ref = generate_θ_KL(xx, N_KL, d, τ, seed=seed) + f_2d = compute_f_2d(xx) + + x_locs = Array(obs_ΔN:obs_ΔN:(N - obs_ΔN)) + y_locs = Array(obs_ΔN:obs_ΔN:(N - obs_ΔN)) + N_y = length(x_locs) * length(y_locs) + + Setup_Param(N, Δx, xx, f_2d, κ, x_locs, y_locs, N_y) +end + + + +#= +A hardcoding source function, +which assumes the computational domain is +[0 1]×[0 1] +f(x,y) = f(y), +which dependes only on y +=# +function compute_f_2d(yy::AbstractVector{FT}) where {FT <: AbstractFloat} + N = length(yy) + f_2d = zeros(FT, N, N) + for i in 1:N + if (yy[i] <= 4 / 6) + f_2d[:, i] .= 1000.0 + elseif (yy[i] >= 4 / 6 && yy[i] <= 5 / 6) + f_2d[:, i] .= 2000.0 + elseif (yy[i] >= 5 / 6) + f_2d[:, i] .= 3000.0 + end + end + return f_2d +end + +""" + run_G_ensemble(darcy,κs::AbstractMatrix) + +Computes the forward map `G` (`solve_Darcy_2D` followed by `compute_obs`) over an ensemble of `κ`'s, stored flat as columns of `κs` +""" +function run_G_ensemble(darcy, κs::AbstractMatrix) + N_ens = size(κs, 2) # ens size + nd = darcy.N_y #num obs + g_ens = zeros(nd, N_ens) + for i in 1:N_ens + # run the model with the current parameters, i.e., map θ to G(θ) + κ_i = reshape(κs[:, i], darcy.N, darcy.N) # unflatten + h_i = solve_Darcy_2D(darcy, κ_i) # run model + g_ens[:, i] = compute_obs(darcy, h_i) # observe solution + end + return g_ens +end + + + +#= + return the unknow index for the grid point + Since zero-Dirichlet boundary conditions are imposed on + all four edges, the freedoms are only on interior points +=# +function ind(darcy::Setup_Param{FT, IT}, ix::IT, iy::IT) where {FT <: AbstractFloat, IT <: Int} + return (ix - 1) + (iy - 2) * (darcy.N - 2) +end + +function ind_all(darcy::Setup_Param{FT, IT}, ix::IT, iy::IT) where {FT <: AbstractFloat, IT <: Int} + return ix + (iy - 1) * darcy.N +end + +#= + solve Darcy equation with finite difference method: + -∇(κ∇h) = f + with Dirichlet boundary condition, h=0 on ∂Ω +=# +function solve_Darcy_2D(darcy::Setup_Param{FT, IT}, κ_2d::AbstractMatrix{FT}) where {FT <: AbstractFloat, IT <: Int} + Δx, N = darcy.Δx, darcy.N + + indx = IT[] + indy = IT[] + vals = FT[] + + f_2d = darcy.f_2d + + 𝓒 = Δx^2 + for iy in 2:(N - 1) + for ix in 2:(N - 1) + + ixy = ind(darcy, ix, iy) + + #top + if iy == N - 1 + #ft = -(κ_2d[ix, iy] + κ_2d[ix, iy+1])/2.0 * (0 - h_2d[ix,iy]) + push!(indx, ixy) + push!(indy, ixy) + push!(vals, (κ_2d[ix, iy] + κ_2d[ix, iy + 1]) / 2.0 / 𝓒) + else + #ft = -(κ_2d[ix, iy] + κ_2d[ix, iy+1])/2.0 * (h_2d[ix,iy+1] - h_2d[ix,iy]) + append!(indx, [ixy, ixy]) + append!(indy, [ixy, ind(darcy, ix, iy + 1)]) + append!( + vals, + [(κ_2d[ix, iy] + κ_2d[ix, iy + 1]) / 2.0 / 𝓒, -(κ_2d[ix, iy] + κ_2d[ix, iy + 1]) / 2.0 / 𝓒], + ) + end + + #bottom + if iy == 2 + #fb = (κ_2d[ix, iy] + κ_2d[ix, iy-1])/2.0 * (h_2d[ix,iy] - 0) + push!(indx, ixy) + push!(indy, ixy) + push!(vals, (κ_2d[ix, iy] + κ_2d[ix, iy - 1]) / 2.0 / 𝓒) + else + #fb = (κ_2d[ix, iy] + κ_2d[ix, iy-1])/2.0 * (h_2d[ix,iy] - h_2d[ix,iy-1]) + append!(indx, [ixy, ixy]) + append!(indy, [ixy, ind(darcy, ix, iy - 1)]) + append!( + vals, + [(κ_2d[ix, iy] + κ_2d[ix, iy - 1]) / 2.0 / 𝓒, -(κ_2d[ix, iy] + κ_2d[ix, iy - 1]) / 2.0 / 𝓒], + ) + end + + #right + if ix == N - 1 + #fr = -(κ_2d[ix, iy] + κ_2d[ix+1, iy])/2.0 * (0 - h_2d[ix,iy]) + push!(indx, ixy) + push!(indy, ixy) + push!(vals, (κ_2d[ix, iy] + κ_2d[ix + 1, iy]) / 2.0 / 𝓒) + else + #fr = -(κ_2d[ix, iy] + κ_2d[ix+1, iy])/2.0 * (h_2d[ix+1,iy] - h_2d[ix,iy]) + append!(indx, [ixy, ixy]) + append!(indy, [ixy, ind(darcy, ix + 1, iy)]) + append!( + vals, + [(κ_2d[ix, iy] + κ_2d[ix + 1, iy]) / 2.0 / 𝓒, -(κ_2d[ix, iy] + κ_2d[ix + 1, iy]) / 2.0 / 𝓒], + ) + end + + #left + if ix == 2 + #fl = (κ_2d[ix, iy] + κ_2d[ix-1, iy])/2.0 * (h_2d[ix,iy] - 0) + push!(indx, ixy) + push!(indy, ixy) + push!(vals, (κ_2d[ix, iy] + κ_2d[ix - 1, iy]) / 2.0 / 𝓒) + else + #fl = (κ_2d[ix, iy] + κ_2d[ix-1, iy])/2.0 * (h_2d[ix,iy] - h_2d[ix-1,iy]) + append!(indx, [ixy, ixy]) + append!(indy, [ixy, ind(darcy, ix - 1, iy)]) + append!( + vals, + [(κ_2d[ix, iy] + κ_2d[ix - 1, iy]) / 2.0 / 𝓒, -(κ_2d[ix, iy] + κ_2d[ix - 1, iy]) / 2.0 / 𝓒], + ) + end + + end + end + + + + df = sparse(indx, indy, vals, (N - 2)^2, (N - 2)^2) + # Multithread does not support sparse matrix solver + h = df \ (f_2d[2:(N - 1), 2:(N - 1)])[:] + + h_2d = zeros(FT, N, N) + h_2d[2:(N - 1), 2:(N - 1)] .= reshape(h, N - 2, N - 2) + + return h_2d +end + +#= +Compute observation values +=# +function compute_obs(darcy::Setup_Param{FT, IT}, h_2d::AbstractMatrix{FT}) where {FT <: AbstractFloat, IT <: Int} + # X---X(1)---X(2) ... X(obs_N)---X + obs_2d = h_2d[darcy.x_locs, darcy.y_locs] + + return obs_2d[:] +end diff --git a/examples/Darcy/Project.toml b/examples/Darcy/Project.toml new file mode 100644 index 000000000..ae3882ed0 --- /dev/null +++ b/examples/Darcy/Project.toml @@ -0,0 +1,10 @@ +[deps] +CalibrateEmulateSample = "95e48a1f-0bec-4818-9538-3db4340308e3" +Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" +GaussianRandomFields = "e4b2fa32-6e09-5554-b718-106ed5adafe9" +JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" diff --git a/examples/Darcy/calibrate.jl b/examples/Darcy/calibrate.jl new file mode 100644 index 000000000..f79d5dda7 --- /dev/null +++ b/examples/Darcy/calibrate.jl @@ -0,0 +1,226 @@ +# # [Learning the Pearmibility field in a Darcy flow from noisy sparse observations] + +# In this example we hope to illustrate function learning. One may wish to use function learning in cases where the underlying parameter of interest is actual a finite-dimensional approximation (e.g. spatial discretization) of some "true" function. Treating such an object directly will lead to increasingly high-dimensional learning problems as the spatial resolution is increased, resulting in poor computational scaling and increasingly ill-posed inverse problems. Treating the object as a discretized function from a function space, one can learn coefficients not in the standard basis, but instead in a basis of this function space, it is commonly the case that functions will have relatively low effective dimension, and will be depend only on the spatial discretization due to discretization error, that should vanish as resolution is increased. + +# We will solve for an unknown permeability field ``\kappa`` governing the pressure field of a Darcy flow on a square 2D domain. To learn about the permeability we shall take few pointwise measurements of the solved pressure field within the domain. The forward solver is a simple finite difference scheme taken and modified from code [here](https://github.com/Zhengyu-Huang/InverseProblems.jl/blob/master/Fluid/Darcy-2D.jl). + +# First we load standard packages +using LinearAlgebra +using Distributions +using Random +using JLD2 + +# the package to define the function distributions +import GaussianRandomFields # we wrap this so we don't want to use "using" +const GRF = GaussianRandomFields + +# and finally the EKP packages +using CalibrateEmulateSample +using CalibrateEmulateSample.EnsembleKalmanProcesses +using CalibrateEmulateSample.EnsembleKalmanProcesses.ParameterDistributions +const EKP = CalibrateEmulateSample.EnsembleKalmanProcesses + +# We include the forward solver here +include("GModel.jl") + +# Then link some outputs for figures and plotting +fig_save_directory = joinpath(@__DIR__, "output") +data_save_directory = joinpath(@__DIR__, "output") +if !isdir(fig_save_directory) + mkdir(fig_save_directory) +end +if !isdir(data_save_directory) + mkdir(data_save_directory) +end# TOML interface for fitting parameters of a sinusoid + +PLOT_FLAG = true +if PLOT_FLAG + using Plots + @info "Plotting enabled, this will reduce code performance. Figures stored in $fig_save_directory" +end + +# Set a random seed. +seed = 100234 +rng = Random.MersenneTwister(seed) + + +function main() + # Define the spatial domain and discretization + dim = 2 + N, L = 80, 1.0 + pts_per_dim = LinRange(0, L, N) + obs_ΔN = 10 + + # To provide a simple test case, we assume that the true function parameter is a particular sample from the function space we set up to define our prior. More precisely we choose a value of the truth that doesnt have a vanishingly small probability under the prior defined by a probability distribution over functions; here taken as a family of Gaussian Random Fields (GRF). The function distribution is characterized by a covariance function - here a Matern kernel which assumes a level of smoothness over the samples from the distribution. We define an appropriate expansion of this distribution, here based on the Karhunen-Loeve expansion (similar to an eigenvalue-eigenfunction expansion) that is truncated to a finite number of terms, known as the degrees of freedom (`dofs`). The `dofs` define the effective dimension of the learning problem, decoupled from the spatial discretization. Explicitly, larger `dofs` may be required to represent multiscale functions, but come at an increased dimension of the parameter space and therefore a typical increase in cost and difficulty of the learning problem. + + smoothness = 0.1 + corr_length = 1.0 + dofs = 5 + + grf = GRF.GaussianRandomField( + GRF.CovarianceFunction(dim, GRF.Matern(smoothness, corr_length)), + GRF.KarhunenLoeve(dofs), + pts_per_dim, + pts_per_dim, + ) + + # We define a wrapper around the GRF, and as the permeability field must be positive we introduce a domain constraint into the function distribution. Henceforth, the GRF is interfaced in the same manner as any other parameter distribution with regards to interface. + pkg = GRFJL() + distribution = GaussianRandomFieldInterface(grf, pkg) # our wrapper from EKP + domain_constraint = bounded_below(0) # make κ positive + pd = ParameterDistribution( + Dict("distribution" => distribution, "name" => "kappa", "constraint" => domain_constraint), + ) # the fully constrained parameter distribution + + # Now we have a function distribution, we sample a reasonably high-probability value from this distribution as a true value (here all degrees of freedom set with `u_{\mathrm{true}} = -0.5`). We use the EKP transform function to build the corresponding instance of the ``\kappa_{\mathrm{true}}``. + u_true = sign.(randn(dofs, 1)) # the truth parameter + println("True coefficients: ") + println(u_true) + κ_true = transform_unconstrained_to_constrained(pd, u_true) # builds and constrains the function. + κ_true = reshape(κ_true, N, N) + + # Now we generate the data sample for the truth in a perfect model setting by evaluating the the model here, and observing it by subsampling in each dimension every `obs_ΔN` points, and add some observational noise + darcy = Setup_Param(pts_per_dim, obs_ΔN, κ_true) + println(" Number of observation points: $(darcy.N_y)") + h_2d_true = solve_Darcy_2D(darcy, κ_true) + y_noiseless = compute_obs(darcy, h_2d_true) + obs_noise_cov = 0.25^2 * I(length(y_noiseless)) * (maximum(y_noiseless) - minimum(y_noiseless)) + truth_sample = vec(y_noiseless + rand(rng, MvNormal(zeros(length(y_noiseless)), obs_noise_cov))) + + + # Now we set up the Bayesian inversion algorithm. The prior we have already defined to construct our truth + prior = pd + + + # We define some algorithm parameters, here we take ensemble members larger than the dimension of the parameter space + N_ens = 30 # number of ensemble members + N_iter = 5 # number of EKI iterations + + # We sample the initial ensemble from the prior, and create the EKP object as an EKI algorithm using the `Inversion()` keyword + initial_params = construct_initial_ensemble(rng, prior, N_ens) + ekiobj = EKP.EnsembleKalmanProcess(initial_params, truth_sample, obs_noise_cov, Inversion()) + + # We perform the inversion loop. Remember that within calls to `get_ϕ_final` the EKP transformations are applied, thus the ensemble that is returned will be the positively-bounded permeability field evaluated at all the discretization points. + println("Begin inversion") + err = [] + final_it = [N_iter] + for i in 1:N_iter + params_i = get_ϕ_final(prior, ekiobj) + g_ens = run_G_ensemble(darcy, params_i) + terminate = EKP.update_ensemble!(ekiobj, g_ens) + push!(err, get_error(ekiobj)[end]) #mean((params_true - mean(params_i,dims=2)).^2) + println("Iteration: " * string(i) * ", Error: " * string(err[i])) + if !isnothing(terminate) + final_it[1] = i - 1 + break + end + end + n_iter = final_it[1] + # We plot first the prior ensemble mean and pointwise variance of the permeability field, and also the pressure field solved with the ensemble mean. Each ensemble member is stored as a column and therefore for uses such as plotting one needs to reshape to the desired dimension. + if PLOT_FLAG + gr(size = (1500, 400), legend = false) + prior_κ_ens = get_ϕ(prior, ekiobj, 1) + κ_ens_mean = reshape(mean(prior_κ_ens, dims = 2), N, N) + p1 = contour( + pts_per_dim, + pts_per_dim, + κ_ens_mean', + fill = true, + levels = 15, + title = "kappa mean", + colorbar = true, + ) + κ_ens_ptw_var = reshape(var(prior_κ_ens, dims = 2), N, N) + p2 = contour( + pts_per_dim, + pts_per_dim, + κ_ens_ptw_var', + fill = true, + levels = 15, + title = "kappa var", + colorbar = true, + ) + h_2d = solve_Darcy_2D(darcy, κ_ens_mean) + p3 = contour(pts_per_dim, pts_per_dim, h_2d', fill = true, levels = 15, title = "pressure", colorbar = true) + l = @layout [a b c] + plt = plot(p1, p2, p3, layout = l) + savefig(plt, joinpath(fig_save_directory, "output_prior.png")) # pre update + + end + + # Now we plot the final ensemble mean and pointwise variance of the permeability field, and also the pressure field solved with the ensemble mean. + if PLOT_FLAG + gr(size = (1500, 400), legend = false) + final_κ_ens = get_ϕ_final(prior, ekiobj) # the `ϕ` indicates that the `params_i` are in the constrained space + κ_ens_mean = reshape(mean(final_κ_ens, dims = 2), N, N) + p1 = contour( + pts_per_dim, + pts_per_dim, + κ_ens_mean', + fill = true, + levels = 15, + title = "kappa mean", + colorbar = true, + ) + κ_ens_ptw_var = reshape(var(final_κ_ens, dims = 2), N, N) + p2 = contour( + pts_per_dim, + pts_per_dim, + κ_ens_ptw_var', + fill = true, + levels = 15, + title = "kappa var", + colorbar = true, + ) + h_2d = solve_Darcy_2D(darcy, κ_ens_mean) + p3 = contour(pts_per_dim, pts_per_dim, h_2d', fill = true, levels = 15, title = "pressure", colorbar = true) + l = @layout [a b c] + plt = plot(p1, p2, p3; layout = l) + savefig(plt, joinpath(fig_save_directory, "output_it_" * string(n_iter) * ".png")) # pre update + + end + println("Final coefficients (ensemble mean):") + println(get_u_mean_final(ekiobj)) + + # We can compare this with the true permeability and pressure field: + if PLOT_FLAG + gr(size = (1000, 400), legend = false) + p1 = contour(pts_per_dim, pts_per_dim, κ_true', fill = true, levels = 15, title = "kappa true", colorbar = true) + p2 = contour( + pts_per_dim, + pts_per_dim, + h_2d_true', + fill = true, + levels = 15, + title = "pressure true", + colorbar = true, + ) + l = @layout [a b] + plt = plot(p1, p2, layout = l) + savefig(plt, joinpath(fig_save_directory, "output_true.png")) + end + + # Finally the data is saved + u_stored = get_u(ekiobj, return_array = false) + g_stored = get_g(ekiobj, return_array = false) + + save( + joinpath(data_save_directory, "calibrate_results.jld2"), + "inputs", + u_stored, + "outputs", + g_stored, + "prior", + prior, + "eki", + ekiobj, + "truth_sample", + truth_sample, #data + "truth_input_constrained", # the discrete true parameter field + κ_true, + "truth_input_unconstrained", # the discrete true KL coefficients + u_true, + ) +end + +main() diff --git a/examples/Darcy/emulate_sample.jl b/examples/Darcy/emulate_sample.jl new file mode 100644 index 000000000..5ba96b8fe --- /dev/null +++ b/examples/Darcy/emulate_sample.jl @@ -0,0 +1,206 @@ +# Import modules +include(joinpath(@__DIR__, "..", "ci", "linkfig.jl")) + +# Import modules +using Distributions # probability distributions and associated functions +using LinearAlgebra +ENV["GKSwstype"] = "100" +using Plots +using Random +using JLD2 + +# CES +using CalibrateEmulateSample.Emulators +using CalibrateEmulateSample.MarkovChainMonteCarlo +using CalibrateEmulateSample.Utilities +using CalibrateEmulateSample.EnsembleKalmanProcesses +using CalibrateEmulateSample.EnsembleKalmanProcesses.Localizers +using CalibrateEmulateSample.ParameterDistributions +using CalibrateEmulateSample.DataContainers + +include("GModel.jl") + +function main() + + cases = [ + "GP", # diagonalize, train scalar GP, assume diag inputs + ] + + #### CHOOSE YOUR CASE: + mask = [1] # 1:8 # e.g. 1:8 or [7] + for (case) in cases[mask] + + + println("case: ", case) + min_iter = 1 + max_iter = 5 # number of EKP iterations to use data from is at most this + + exp_name = "darcy" + rng_seed = 940284 + rng = Random.MersenneTwister(rng_seed) + + # loading relevant data + homedir = pwd() + println(homedir) + figure_save_directory = joinpath(homedir, "output/") + data_save_directory = joinpath(homedir, "output/") + data_save_file = joinpath(data_save_directory, "calibrate_results.jld2") + + if !isfile(data_save_file) + throw( + ErrorException( + "data file $data_save_file not found. \n First run: \n > julia --project calibrate.jl \n and store results $data_save_file", + ), + ) + end + + ekiobj = load(data_save_file)["eki"] + prior = load(data_save_file)["prior"] + truth_sample = load(data_save_file)["truth_sample"] + truth_params_constrained = load(data_save_file)["truth_input_constrained"] #true parameters in constrained space + truth_params = load(data_save_file)["truth_input_unconstrained"] #true parameters in unconstrained space + Γy = get_obs_noise_cov(ekiobj) + # should ideally be loaded from calibrate (matching values in main) + N, L = 80, 1.0 + pts_per_dim = LinRange(0, L, N) + obs_ΔN = 10 + darcy = Setup_Param(pts_per_dim, obs_ΔN, truth_params_constrained) + + + n_params = length(truth_params) # "input dim" + output_dim = size(Γy, 1) + ### + ### Emulate: Gaussian Process Regression + ### + + # Emulate-sample settings + # choice of machine-learning tool in the emulation stage + if case == "GP" + # gppackage = Emulators.SKLJL() + gppackage = Emulators.GPJL() + mlt = GaussianProcess(gppackage; noise_learn = false) + end + + + # Get training points from the EKP iteration number in the second input term + N_iter = min(max_iter, length(get_u(ekiobj)) - 1) # number of paired iterations taken from EKP + min_iter = min(max_iter, max(1, min_iter)) + input_output_pairs = Utilities.get_training_points(ekiobj, min_iter:(N_iter - 1)) + input_output_pairs_test = Utilities.get_training_points(ekiobj, N_iter:(length(get_u(ekiobj)) - 1)) # "next" iterations + # Save data + @save joinpath(data_save_directory, "input_output_pairs.jld2") input_output_pairs + + retained_svd_frac = 1.0 + + normalized = true + # do we want to use SVD to decorrelate outputs + decorrelate = true + + emulator = Emulator( + mlt, + input_output_pairs; + obs_noise_cov = Γy, + normalize_inputs = normalized, + retained_svd_frac = retained_svd_frac, + decorrelate = decorrelate, + ) + optimize_hyperparameters!(emulator, kernbounds = [fill(-1e2, n_params + 1), fill(1e2, n_params + 1)]) + + # Check how well the Gaussian Process regression predicts on the + # true parameters + #if retained_svd_frac==1.0 + y_mean, y_var = Emulators.predict(emulator, reshape(truth_params, :, 1), transform_to_real = true) + y_mean_test, y_var_test = + Emulators.predict(emulator, get_inputs(input_output_pairs_test), transform_to_real = true) + + println("ML prediction on true parameters: ") + println(vec(y_mean)) + println("true data: ") + println(truth_sample) # what was used as truth + println(" ML predicted standard deviation") + println(sqrt.(diag(y_var[1], 0))) + println("ML MSE (truth): ") + println(mean((truth_sample - vec(y_mean)) .^ 2)) + println("ML MSE (next ensemble): ") + println(mean((get_outputs(input_output_pairs_test) - y_mean_test) .^ 2)) + + #end + ### + ### Sample: Markov Chain Monte Carlo + ### + # initial values + u0 = vec(mean(get_inputs(input_output_pairs), dims = 2)) + println("initial parameters: ", u0) + + # First let's run a short chain to determine a good step size + mcmc = MCMCWrapper(RWMHSampling(), truth_sample, prior, emulator; init_params = u0) + # mcmc = MCMCWrapper(pCNMHSampling(), truth_sample, prior, emulator; init_params = u0) + new_step = optimize_stepsize(mcmc; init_stepsize = 0.1, N = 2000, discard_initial = 0) + + # Now begin the actual MCMC + println("Begin MCMC - with step size ", new_step) + chain = MarkovChainMonteCarlo.sample(mcmc, 100_000; stepsize = new_step, discard_initial = 2_000) + + posterior = MarkovChainMonteCarlo.get_posterior(mcmc, chain) + + post_mean = mean(posterior) + post_cov = cov(posterior) + println("post_mean") + println(post_mean) + println("post_cov") + println(post_cov) + println("D util") + println(det(inv(post_cov))) + println(" ") + + param_names = get_name(posterior) + + posterior_samples = reduce(vcat, [get_distribution(posterior)[name] for name in get_name(posterior)]) #samples are columns of this matrix + n_post = size(posterior_samples, 2) + plot_sample_id = (n_post - 1000):n_post + constrained_posterior_samples = + transform_unconstrained_to_constrained(prior, posterior_samples[:, plot_sample_id]) + + gr(size = (1500, 400), legend = false) + κ_ens_mean = reshape(mean(constrained_posterior_samples, dims = 2), N, N) + p1 = contour( + pts_per_dim, + pts_per_dim, + κ_ens_mean', + fill = true, + levels = 15, + title = "kappa mean", + colorbar = true, + ) + κ_ens_ptw_var = reshape(var(constrained_posterior_samples, dims = 2), N, N) + p2 = contour( + pts_per_dim, + pts_per_dim, + κ_ens_ptw_var', + fill = true, + levels = 15, + title = "kappa var", + colorbar = true, + ) + + h_2d = solve_Darcy_2D(darcy, κ_ens_mean) + p3 = contour(pts_per_dim, pts_per_dim, h_2d', fill = true, levels = 15, title = "pressure", colorbar = true) + + l = @layout [a b c] + plt = plot(p1, p2, p3, layout = l) + savefig(plt, joinpath(data_save_directory, "$(case)_posterior_pointwise_uq.png")) + + # Save data + save( + joinpath(data_save_directory, "posterior.jld2"), + "posterior", + posterior, + "input_output_pairs", + input_output_pairs, + "truth_params", + truth_params, + ) + end +end + +main() diff --git a/examples/EDMF_data/Project.toml b/examples/EDMF_data/Project.toml index 2936bdfbe..2a61531f1 100644 --- a/examples/EDMF_data/Project.toml +++ b/examples/EDMF_data/Project.toml @@ -6,7 +6,8 @@ JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" NCDatasets = "85f8d34a-cbdd-5861-8df4-14fed0d494ab" PairPlots = "43a3c2be-4208-490b-832a-a21dcd55d7da" -Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" -StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd" + +[compat] +PairPlots = "=2.8.0" \ No newline at end of file diff --git a/examples/EDMF_data/README.md b/examples/EDMF_data/README.md index d9ee92f33..bee6f7088 100644 --- a/examples/EDMF_data/README.md +++ b/examples/EDMF_data/README.md @@ -3,6 +3,7 @@ To run this example, you must first instantiate the dependencies in the `Project Next, you must extract the `.zip` file and ensure that `exp_name` matches the directory name of the unzipped data directory. Then run the main file `uq_for_edmf.jl`. -Approx time for completion: +Approx time for completion (with GP case): - exp_name = ent-det-calibration: 82 mins - exp-det-tked-tkee-stab-calibtaion: 155 mins + diff --git a/examples/EDMF_data/emulator-rank-test.jl b/examples/EDMF_data/emulator-rank-test.jl new file mode 100644 index 000000000..f26a07ef8 --- /dev/null +++ b/examples/EDMF_data/emulator-rank-test.jl @@ -0,0 +1,315 @@ +#includef(joinpath(@__DIR__, "..", "ci", "linkfig.jl")) + +# Import modules +using Distributions # probability distributions and associated functions +using LinearAlgebra +using Random +using JLD2 +using NCDatasets +using StatsBase +using Dates + +# CES +using CalibrateEmulateSample.Emulators +using CalibrateEmulateSample.ParameterDistributions +using CalibrateEmulateSample.DataContainers +using CalibrateEmulateSample.EnsembleKalmanProcesses +using CalibrateEmulateSample.EnsembleKalmanProcesses.Localizers +using CalibrateEmulateSample.Utilities + +rng_seed = 42424242 +Random.seed!(rng_seed) + + +function main() + + # 2-parameter calibration exp + #exp_name = "ent-det-calibration" + + # 5-parameter calibration exp + exp_name = "ent-det-tked-tkee-stab-calibration" + + cases = [ + "GP", # diagonalize, train scalar GP, assume diag inputs + "RF-prior", + "RF-vector-svd-nonsep", + "RF-vector-svd-sep", #Bad kernel for comparison + "RF-vector-nosvd-nonsep", + ] + rank_cases = [ + [0], # not used + [10], # some rank > 0 + 1:10, # + 1:5, # input rank always 5, output rank 1:5 + 1:10, + ] + + case_id = 2 + case = cases[case_id] + rank_test = rank_cases[case_id] + n_repeats = 1 + n_iteration = 10 + + + + # Output figure save directory + figure_save_directory = joinpath(@__DIR__, "output", exp_name, string(Dates.today())) + data_save_directory = joinpath(@__DIR__, "output", exp_name, string(Dates.today())) + if !isdir(figure_save_directory) + mkpath(figure_save_directory) + end + if !isdir(data_save_directory) + mkpath(data_save_directory) + end + println("experiment running in: ", @__DIR__) + println("data saved in: ", figure_save_directory) + println("figures saved in: ", data_save_directory) + + ############# + # Pipeline: # + ############# + + # [1. ] Obtain scenario information (here all loaded from file) + # [a.] Obtain truth sample(s), + # [b.] Obtain observation and model-error noise + # [2. ] Obtain calibration information (here using saved input-output data, from EKI experiements) + # [a.] Obtain calibration priors + # [b.] Obtain calibration data + # [3. ] Build Emulator from calibration data + # [4. ] Run Emulator-based MCMC to obtain joint parameter distribution + + ################################################### + # [1. & 2. ] Obtain scenario and calibration data # + ################################################### + + exp_dir = joinpath(@__DIR__, exp_name) + if !isdir(exp_dir) + LoadError( + "experiment data directory \"" * + exp_dir * + "/\" not found. Please unzip the compressed file \"" * + exp_name * + ".zip\" and retry.", + ) + end + + data_filepath = joinpath(exp_dir, "Diagnostics.nc") + if !isfile(data_filepath) + LoadError("experiment data file \"Diagnostics.nc\" not found in directory \"" * exp_dir * "/\"") + else + @info "loading data from NC dataset" + data_set = NCDataset(data_filepath) + y_truth = Array(data_set.group["reference"]["y_full"]) #ndata + truth_cov = Array(data_set.group["reference"]["Gamma_full"]) #ndata x ndata + # Option (i) get data from NCDataset else get from jld2 files. + output_mat = Array(data_set.group["particle_diags"]["g_full"]) #nens x ndata x nit + input_mat = Array(data_set.group["particle_diags"]["u"]) #nens x nparam x nit + input_constrained_mat = Array(data_set.group["particle_diags"]["phi"]) #nens x nparam x nit + + # to be consistent, we wish to go from nens x nparam x nit arrays to nparam x (nit x nens) + inputs = + reshape(permutedims(input_mat, (2, 3, 1)), (size(input_mat, 2), size(input_mat, 3) * size(input_mat, 1))) + inputs_constrained = reshape( + permutedims(input_constrained_mat, (2, 3, 1)), + (size(input_constrained_mat, 2), size(input_constrained_mat, 3) * size(input_constrained_mat, 1)), + ) + + # to be consistent, we wish to go from nens x ndata x nit arrays to ndata x (nit x nens) + outputs = reshape( + permutedims(output_mat, (2, 3, 1)), + (size(output_mat, 2), size(output_mat, 3) * size(output_mat, 1)), + ) + + #EDMF data often contains rows/columns that are NC uses filled values - these must be removed + println("preprocessing data...") + isnan_inf_or_filled(x) = isnan(x) || isinf(x) || x ≈ NCDatasets.fillvalue(typeof(x)) + good_values = .!isnan_inf_or_filled.(outputs) + good_particles = collect(1:size(outputs, 2))[all(.!isnan_inf_or_filled.(outputs), dims = 1)[:]] # get good columns + + inputs = inputs[:, good_particles] + inputs_constrained = inputs_constrained[:, good_particles] + outputs = outputs[:, good_particles] + + good_datadim = collect(1:size(outputs, 1))[all(.!isnan_inf_or_filled.(outputs), dims = 2)[:]] # get good rows + outputs = outputs[good_datadim, :] + y_truth = y_truth[good_datadim] + truth_cov = truth_cov[good_datadim, good_datadim] + + # split out a training set: + n_test = Int(floor(0.2 * length(good_particles))) + n_train = Int(ceil(0.8 * length(good_particles))) + + rng_seed = 14225 + rng = Random.MersenneTwister(rng_seed) + + # random train-set + # train_idx = shuffle(rng, collect(1:n_test+n_train))[1:n_train] + # final train earlier iteration, test final iteration + train_idx = collect(1:(n_test + n_train))[1:n_train] + test_idx = setdiff(collect(1:(n_test + n_train)), train_idx) + + train_inputs = inputs[:, train_idx] + train_outputs = outputs[:, train_idx] + + test_inputs = inputs[:, test_idx] + test_outputs = outputs[:, test_idx] + + train_pairs = DataContainers.PairedDataContainer(train_inputs, train_outputs) + end + + @info "Completed data loading stage" + println(" ") + ############################################## + # [3. ] Build Emulator from calibration data # + ############################################## + + opt_diagnostics = zeros(length(rank_test), n_repeats, n_iteration) + train_err = zeros(length(rank_test), n_repeats) + test_err = zeros(length(rank_test), n_repeats) + ttt = zeros(length(rank_test), n_repeats) + + @info "Begin Emulation stage" + # Create GP object + train_frac = 0.9 + n_cross_val_sets = 2 + max_feature_size = size(get_outputs(train_pairs), 2) * size(get_outputs(train_pairs), 1) * (1 - train_frac) + for (rank_id, rank_val) in enumerate(rank_test) #test over different ranks for svd-nonsep + rng_seed = 99330 + rng = Random.MersenneTwister(rng_seed) + for rep_idx in 1:n_repeats + @info "Test rank: $(rank_val) from $(collect(rank_test))" + @info "Repeat: $(rep_idx)" + + overrides = Dict( + "verbose" => true, + "train_fraction" => train_frac, + "scheduler" => DataMisfitController(terminate_at = 1000), + "cov_sample_multiplier" => 0.2, + "n_iteration" => n_iteration, + "n_features_opt" => Int(floor((max_feature_size / 5))),# here: /5 with rank <= 3 works + "localization" => SEC(0.05), + "n_ensemble" => 400, + "n_cross_val_sets" => n_cross_val_sets, + ) + if case == "RF-prior" + overrides = Dict("verbose" => true, "cov_sample_multiplier" => 0.01, "n_iteration" => 0) + end + nugget = 1e-6 # 1e-6 + input_dim = size(get_inputs(train_pairs), 1) + output_dim = size(get_outputs(train_pairs), 1) + decorrelate = true + + if case == "GP" + rank_val = 0 + gppackage = Emulators.SKLJL() + pred_type = Emulators.YType() + mlt = GaussianProcess( + gppackage; + kernel = nothing, # use default squared exponential kernel + prediction_type = pred_type, + noise_learn = false, + ) + elseif case ∈ ["RF-vector-svd-sep"] + kernel_structure = SeparableKernel(LowRankFactor(5, nugget), LowRankFactor(rank_val, nugget)) + n_features = 500 + + mlt = VectorRandomFeatureInterface( + n_features, + input_dim, + output_dim, + rng = rng, + kernel_structure = kernel_structure, + optimizer_options = overrides, + ) + + elseif case ∈ ["RF-vector-svd-nonsep", "RF-prior"] + kernel_structure = NonseparableKernel(LowRankFactor(rank_val, nugget)) + n_features = 500 + + mlt = VectorRandomFeatureInterface( + n_features, + input_dim, + output_dim, + rng = rng, + kernel_structure = kernel_structure, + optimizer_options = overrides, + ) + elseif case ∈ ["RF-vector-nosvd-nonsep"] + kernel_structure = NonseparableKernel(LowRankFactor(rank_val, nugget)) + n_features = 500 + + mlt = VectorRandomFeatureInterface( + n_features, + input_dim, + output_dim, + rng = rng, + kernel_structure = kernel_structure, + optimizer_options = overrides, + ) + decorrelate = false + end + + # Fit an emulator to the data + normalized = true + + ttt[rank_id, rep_idx] = @elapsed begin + emulator = Emulator( + mlt, + train_pairs; + obs_noise_cov = truth_cov, + normalize_inputs = normalized, + decorrelate = decorrelate, + ) + + # Optimize the GP hyperparameters for better fit + optimize_hyperparameters!(emulator) + end + + # error of emulator on the training points: (no denoised training data) + train_err_tmp = [0.0] + for i in 1:size(train_inputs, 2) + train_mean, _ = Emulators.predict(emulator, train_inputs[:, i:i], transform_to_real = true) # 3x1 + train_err_tmp[1] += norm(train_mean - train_outputs[:, i]) + end + train_err[rank_id, rep_idx] = 1 / size(train_inputs, 2) * train_err_tmp[1] + + # error of emulator on test points: + test_err_tmp = [0.0] + for i in 1:size(test_inputs, 2) + test_mean, _ = Emulators.predict(emulator, test_inputs[:, i:i], transform_to_real = true) # 3x1 + test_err_tmp[1] += norm(test_mean - test_outputs[:, i]) + end + test_err[rank_id, rep_idx] = 1 / size(test_inputs, 2) * test_err_tmp[1] + + @info "train error ($(size(train_inputs,2)) pts): $(train_err[rank_id,rep_idx])" + @info "test error ($(size(test_inputs, 2)) pts): $(test_err[rank_id,rep_idx])" + if !(case ∈ ["GP", "RF-prior"]) + opt_diagnostics[rank_id, rep_idx, :] = get_optimizer(mlt)[1] #length-1 vec of vec -> vec + eki_conv_filepath = joinpath(data_save_directory, "$(case)_$(rank_val)_eki-conv.jld2") + save(eki_conv_filepath, "opt_diagnostics", opt_diagnostics) + end + + # emulator_filepath = joinpath(data_save_directory, "$(case)_$(rank_val)_emulator.jld2") + # save(emulator_filepath, "emulator", emulator) + + JLD2.save( + joinpath(data_save_directory, case * "_edmf_rank_test_results.jld2"), + "rank_test", + collect(rank_test), + "timings", + ttt, + "train_err", + train_err, + "test_err", + test_err, + ) + end + end + + + + + @info "Finished Emulation stage" +end + +main() diff --git a/examples/EDMF_data/plot_posterior.jl b/examples/EDMF_data/plot_posterior.jl index 9ba36b414..0133c00c1 100644 --- a/examples/EDMF_data/plot_posterior.jl +++ b/examples/EDMF_data/plot_posterior.jl @@ -1,7 +1,4 @@ # Import modules -ENV["GKSwstype"] = "100" -using Plots - using CairoMakie, PairPlots using JLD2 using Dates @@ -9,44 +6,178 @@ using Dates # CES using CalibrateEmulateSample.ParameterDistributions +function main() + ##### + # Creates 1 plots: One for a specific case, One with 2 cases, and One with all cases (final case being the prior). + + + # date = Date(year,month,day) + + # 2-parameter calibration exp + #exp_name = "ent-det-calibration" + #date_of_run = Date(2023, 10, 5) + + # 5-parameter calibration exp + exp_name = "ent-det-tked-tkee-stab-calibration" + date_of_run = Date(2024, 11, 03) + @info "plotting results found in $(date_of_run)" + # Output figure read/write directory + figure_save_directory = joinpath(@__DIR__, "output", exp_name, string(date_of_run)) + data_save_directory = joinpath(@__DIR__, "output", exp_name, string(date_of_run)) + + #case: + cases = [ + "GP", # diagonalize, train scalar GP, assume diag inputs + "RF-prior", + "RF-vector-svd-nonsep", + "RF-vector-svd-sep", + "RF-vector-nosvd-nonsep", + ] + case_rf = cases[3] + kernel_rank = 3 + prior_kernel_rank = 3 + # load + posterior_filepath = joinpath(data_save_directory, "$(case_rf)_$(kernel_rank)_posterior.jld2") + if !isfile(posterior_filepath) + throw(ArgumentError(posterior_filepath * " not found. Please check experiment name and date")) + else + @info "Loading posterior distribution from: " * posterior_filepath + posterior = load(posterior_filepath)["posterior"] + end + # get samples explicitly (may be easier to work with) + posterior_samples = vcat([get_distribution(posterior)[name] for name in get_name(posterior)]...) #samples are columns + transformed_posterior_samples = + mapslices(x -> transform_unconstrained_to_constrained(posterior, x), posterior_samples, dims = 1) + + # histograms + nparam_plots = sum(get_dimensions(posterior)) - 1 + density_filepath = joinpath(figure_save_directory, "$(case_rf)_$(kernel_rank)_posterior_dist_comp.png") + transformed_density_filepath = joinpath(figure_save_directory, "$(case_rf)_$(kernel_rank)_posterior_dist_phys.png") + labels = get_name(posterior) + + burnin = 50_000 + + + data_rf = (; [(Symbol(labels[i]), posterior_samples[i, burnin:end]) for i in 1:length(labels)]...) + transformed_data_rf = + (; [(Symbol(labels[i]), transformed_posterior_samples[i, burnin:end]) for i in 1:length(labels)]...) + + + + # p = pairplot(data_rf => (PairPlots.Scatter(),)) + + p = pairplot(data_rf => (PairPlots.Contourf(sigmas = 1:1:3),)) + trans_p = pairplot(transformed_data_rf => (PairPlots.Contourf(sigmas = 1:1:3),)) -# date = Date(year,month,day) + save(density_filepath, p) + save(transformed_density_filepath, trans_p) + @info "saved RF contour plot" -# 2-parameter calibration exp -exp_name = "ent-det-calibration" -date_of_run = Date(2023, 10, 5) + # + # + # -# 5-parameter calibration exp -#exp_name = "ent-det-tked-tkee-stab-calibration" -#date_of_run = Date(2023,10,4) + case_gp = cases[1] + # load + posterior_filepath = joinpath(data_save_directory, "$(case_gp)_0_posterior.jld2") + if !isfile(posterior_filepath) + throw(ArgumentError(posterior_filepath * " not found. Please check experiment name and date")) + else + @info "Loading posterior distribution from: " * posterior_filepath + posterior = load(posterior_filepath)["posterior"] + end + # get samples explicitly (may be easier to work with) + posterior_samples = vcat([get_distribution(posterior)[name] for name in get_name(posterior)]...) #samples are columns + transformed_posterior_samples = + mapslices(x -> transform_unconstrained_to_constrained(posterior, x), posterior_samples, dims = 1) -# Output figure read/write directory -figure_save_directory = joinpath(@__DIR__, "output", exp_name, string(date_of_run)) -data_save_directory = joinpath(@__DIR__, "output", exp_name, string(date_of_run)) + # histograms + nparam_plots = sum(get_dimensions(posterior)) - 1 + density_filepath = joinpath(figure_save_directory, "$(case_rf)_$(case_gp)_$(kernel_rank)_posterior_dist_comp.png") + transformed_density_filepath = + joinpath(figure_save_directory, "$(case_rf)_$(case_gp)_$(kernel_rank)_posterior_dist_phys.png") + labels = get_name(posterior) + data_gp = (; [(Symbol(labels[i]), posterior_samples[i, burnin:end]) for i in 1:length(labels)]...) + transformed_data_gp = + (; [(Symbol(labels[i]), transformed_posterior_samples[i, burnin:end]) for i in 1:length(labels)]...) + # + # + # + gp_smoothing = 1 # >1 = smoothing KDE in plotting -# load -posterior_filepath = joinpath(data_save_directory, "posterior.jld2") -if !isfile(posterior_filepath) - throw(ArgumentError(posterior_filepath * " not found. Please check experiment name and date")) -else - println("Loading posterior distribution from: " * posterior_filepath) - posterior = load(posterior_filepath)["posterior"] + @warn "Contourf in PairPlots.jl for multiple histograms works in v2.8.0. Check versions if not seen correctly." + p = pairplot( + data_rf => (PairPlots.Contourf(sigmas = 1:1:3),), + data_gp => (PairPlots.Contourf(sigmas = 1:1:3, bandwidth = gp_smoothing),), + ) + + + trans_p = pairplot( + transformed_data_rf => (PairPlots.Contourf(sigmas = 1:1:3),), + transformed_data_gp => (PairPlots.Contourf(sigmas = 1:1:3, bandwidth = gp_smoothing),), + ) + #= + trans_p = pairplot( + Series((transformed_data_rf, color=Makie.wong_colors(0.5)[1]) => (PairPlots.Contourf(sigmas = 1:1:3)),), + Series((transformed_data_gp, color=Makie.wong_colors(0.5)[2]) => (PairPlots.Contourf(sigmas = 1:1:3)),), + ) + =# + save(density_filepath, p) + save(transformed_density_filepath, trans_p) + + @info "saved RF/GP contour plot" + + + # Finally include the prior too + case_prior = cases[2] + # load + posterior_filepath = joinpath(data_save_directory, "$(case_prior)_$(prior_kernel_rank)_posterior.jld2") + if !isfile(posterior_filepath) + throw(ArgumentError(posterior_filepath * " not found. Please check experiment name and date")) + else + @info "Loading posterior distribution from: " * posterior_filepath + posterior = load(posterior_filepath)["posterior"] + end + # get samples explicitly (may be easier to work with) + posterior_samples = vcat([get_distribution(posterior)[name] for name in get_name(posterior)]...) #samples are columns + transformed_posterior_samples = + mapslices(x -> transform_unconstrained_to_constrained(posterior, x), posterior_samples, dims = 1) + + # histograms + nparam_plots = sum(get_dimensions(posterior)) - 1 + density_filepath = joinpath(figure_save_directory, "all_$(kernel_rank)_posterior_dist_comp.png") + transformed_density_filepath = joinpath(figure_save_directory, "all_$(kernel_rank)_posterior_dist_phys.png") + labels = get_name(posterior) + + prior_burnin = 240000 + + data_prior = (; [(Symbol(labels[i]), posterior_samples[i, prior_burnin:end]) for i in 1:length(labels)]...) + transformed_data_prior = + (; [(Symbol(labels[i]), transformed_posterior_samples[i, prior_burnin:end]) for i in 1:length(labels)]...) + # + # + # + + p = pairplot( + data_rf => (PairPlots.Contourf(sigmas = 1:1:3),), + data_gp => (PairPlots.Contourf(sigmas = 1:1:3, bandwidth = gp_smoothing),), + data_prior => (PairPlots.Scatter(),), + ) + trans_p = pairplot( + transformed_data_rf => (PairPlots.Contourf(sigmas = 1:1:3),), + transformed_data_gp => (PairPlots.Contourf(sigmas = 1:1:3, bandwidth = gp_smoothing),), + transformed_data_prior => (PairPlots.Scatter(),), + ) + + save(density_filepath, p) + save(transformed_density_filepath, trans_p) + + density_filepath = joinpath(figure_save_directory, "all_$(kernel_rank)_posterior_dist_comp.pdf") + transformed_density_filepath = joinpath(figure_save_directory, "all_$(kernel_rank)_posterior_dist_phys.pdf") + save(density_filepath, p) + save(transformed_density_filepath, trans_p) + + @info "saved RF/Prior/GP contour plot" end -# get samples explicitly (may be easier to work with) -posterior_samples = vcat([get_distribution(posterior)[name] for name in get_name(posterior)]...) #samples are columns -transformed_posterior_samples = - mapslices(x -> transform_unconstrained_to_constrained(posterior, x), posterior_samples, dims = 1) - -# histograms -nparam_plots = sum(get_dimensions(posterior)) - 1 -density_filepath = joinpath(figure_save_directory, "posterior_dist_comp.png") -transformed_density_filepath = joinpath(figure_save_directory, "posterior_dist_phys.png") -labels = get_name(posterior) - -data = (; [(Symbol(labels[i]), posterior_samples[i, :]) for i in 1:length(labels)]...) -transformed_data = (; [(Symbol(labels[i]), transformed_posterior_samples[i, :]) for i in 1:length(labels)]...) - -p = pairplot(data => (PairPlots.Scatter(),)) -trans_p = pairplot(transformed_data => (PairPlots.Scatter(),)) -save(density_filepath, p) -save(transformed_density_filepath, trans_p) + +main() diff --git a/examples/EDMF_data/uq_for_edmf.jl b/examples/EDMF_data/uq_for_edmf.jl index 7c5a1ac0d..5deb78114 100644 --- a/examples/EDMF_data/uq_for_edmf.jl +++ b/examples/EDMF_data/uq_for_edmf.jl @@ -1,11 +1,12 @@ -#include(joinpath(@__DIR__, "..", "ci", "linkfig.jl")) +#includef(joinpath(@__DIR__, "..", "ci", "linkfig.jl")) PLOT_FLAG = false # Import modules using Distributions # probability distributions and associated functions using LinearAlgebra -ENV["GKSwstype"] = "100" -using Plots +#ENV["GKSwstype"] = "100" +# using Plots +using CairoMakie using Random using JLD2 using NCDatasets @@ -28,11 +29,19 @@ Random.seed!(rng_seed) function main() # 2-parameter calibration exp - exp_name = "ent-det-calibration" + #exp_name = "ent-det-calibration" # 5-parameter calibration exp - #exp_name = "ent-det-tked-tkee-stab-calibration" + exp_name = "ent-det-tked-tkee-stab-calibration" + cases = [ + "GP", # diagonalize, train scalar GP, assume diag inputs + "RF-prior", + "RF-vector-svd-nonsep", + "RF-vector-svd-sep", + "RF-vector-nosvd-nonsep", + ] + case = cases[3] # Output figure save directory figure_save_directory = joinpath(@__DIR__, "output", exp_name, string(Dates.today())) @@ -74,16 +83,19 @@ function main() ".zip\" and retry.", ) end + data_filepath = joinpath(exp_dir, "Diagnostics.nc") if !isfile(data_filepath) LoadError("experiment data file \"Diagnostics.nc\" not found in directory \"" * exp_dir * "/\"") else - y_truth = Array(NCDataset(data_filepath).group["reference"]["y_full"]) #ndata - truth_cov = Array(NCDataset(data_filepath).group["reference"]["Gamma_full"]) #ndata x ndata + @info "loading data from NC dataset" + data_set = NCDataset(data_filepath) + y_truth = Array(data_set.group["reference"]["y_full"]) #ndata + truth_cov = Array(data_set.group["reference"]["Gamma_full"]) #ndata x ndata # Option (i) get data from NCDataset else get from jld2 files. - output_mat = Array(NCDataset(data_filepath).group["particle_diags"]["g_full"]) #nens x ndata x nit - input_mat = Array(NCDataset(data_filepath).group["particle_diags"]["u"]) #nens x nparam x nit - input_constrained_mat = Array(NCDataset(data_filepath).group["particle_diags"]["phi"]) #nens x nparam x nit + output_mat = Array(data_set.group["particle_diags"]["g_full"]) #nens x ndata x nit + input_mat = Array(data_set.group["particle_diags"]["u"]) #nens x nparam x nit + input_constrained_mat = Array(data_set.group["particle_diags"]["phi"]) #nens x nparam x nit # to be consistent, we wish to go from nens x nparam x nit arrays to nparam x (nit x nens) inputs = @@ -114,20 +126,11 @@ function main() y_truth = y_truth[good_datadim] truth_cov = truth_cov[good_datadim, good_datadim] - # quick plots of data - if PLOT_FLAG - println("plotting ensembles...") - for plot_i in 1:size(outputs, 1) - p = scatter(inputs_constrained[1, :], inputs_constrained[2, :], zcolor = outputs[plot_i, :]) - savefig(p, joinpath(figure_save_directory, "output_" * string(plot_i) * ".png")) - end - println("finished plotting ensembles.") - end input_output_pairs = DataContainers.PairedDataContainer(inputs, outputs) - end # load and create prior distributions + # code deprecated due to JLD2 #= prior_filepath = joinpath(exp_dir, "prior.jld2") if !isfile(prior_filepath) @@ -171,7 +174,6 @@ function main() std = prior_config["unconstrained_σ"] constraints = prior_config["constraints"] - prior = combine_distributions([ ParameterDistribution( Dict("name" => name, "distribution" => Parameterized(Normal(mean, std)), "constraint" => constraints[name]), @@ -189,39 +191,42 @@ function main() # end # input_output_pairs = Utilities.get_training_points(ekpobj, max_ekp_it) - println("Completed calibration loading stage") + @info "Completed calibration loading stage" println(" ") ############################################## # [3. ] Build Emulator from calibration data # ############################################## - println("Begin Emulation stage") + @info "Begin Emulation stage" # Create GP object - - cases = [ - "GP", # diagonalize, train scalar GP, assume diag inputs - "RF-scalar", # diagonalize, train scalar RF, don't asume diag inputs - "RF-vector-svd-diag", - "RF-vector-svd-nondiag", - "RF-vector-svd-nonsep", - ] - case = cases[5] - + train_frac = 0.9 + kernel_rank = 3 # svd-sep should be kr - 5 (as input rank is set to 5) + n_cross_val_sets = 2 + @info "Kernel rank: $(kernel_rank)" + max_feature_size = + size(get_outputs(input_output_pairs), 2) * size(get_outputs(input_output_pairs), 1) * (1 - train_frac) overrides = Dict( "verbose" => true, - "train_fraction" => 0.95, - "scheduler" => DataMisfitController(terminate_at = 100), - "cov_sample_multiplier" => 0.5, - "n_iteration" => 5, - # "n_ensemble" => 20, - # "localization" => SEC(0.1), # localization / sample error correction for small ensembles + "train_fraction" => train_frac, + "scheduler" => DataMisfitController(terminate_at = 1000), + "cov_sample_multiplier" => 0.2, + "n_iteration" => 15, + "n_features_opt" => Int(floor((max_feature_size / 5))),# here: /5 with rank <= 3 works + "localization" => SEC(0.05), + "n_ensemble" => 400, + "n_cross_val_sets" => n_cross_val_sets, ) - nugget = 0.01 + if case == "RF-prior" + overrides = Dict("verbose" => true, "cov_sample_multiplier" => 0.01, "n_iteration" => 0) + end + nugget = 1e-6 rng_seed = 99330 rng = Random.MersenneTwister(rng_seed) input_dim = size(get_inputs(input_output_pairs), 1) output_dim = size(get_outputs(input_output_pairs), 1) + decorrelate = true + opt_diagnostics = [] if case == "GP" - + kernel_rank = 0 gppackage = Emulators.SKLJL() pred_type = Emulators.YType() mlt = GaussianProcess( @@ -230,21 +235,21 @@ function main() prediction_type = pred_type, noise_learn = false, ) - elseif case ∈ ["RF-scalar"] - n_features = 100 - kernel_structure = SeparableKernel(CholeskyFactor(nugget), OneDimFactor()) - mlt = ScalarRandomFeatureInterface( + elseif case ∈ ["RF-vector-svd-sep"] + kernel_structure = SeparableKernel(LowRankFactor(5, nugget), LowRankFactor(kernel_rank, nugget)) + n_features = 500 + + mlt = VectorRandomFeatureInterface( n_features, input_dim, + output_dim, rng = rng, kernel_structure = kernel_structure, optimizer_options = overrides, ) - elseif case ∈ ["RF-vector-svd-diag", "RF-vector-svd-nondiag"] - # do we want to assume that the outputs are decorrelated in the machine-learning problem? - kernel_structure = - case ∈ ["RF-vector-svd-diag"] ? SeparableKernel(LowRankFactor(1, nugget), DiagonalFactor(nugget)) : - SeparableKernel(LowRankFactor(2, nugget), LowRankFactor(2, nugget)) + + elseif case ∈ ["RF-vector-svd-nonsep", "RF-prior"] + kernel_structure = NonseparableKernel(LowRankFactor(kernel_rank, nugget)) n_features = 500 mlt = VectorRandomFeatureInterface( @@ -255,8 +260,8 @@ function main() kernel_structure = kernel_structure, optimizer_options = overrides, ) - elseif case ∈ ["RF-vector-svd-nonsep"] - kernel_structure = NonseparableKernel(LowRankFactor(3, nugget)) + elseif case ∈ ["RF-vector-nosvd-nonsep"] + kernel_structure = NonseparableKernel(LowRankFactor(kernel_rank, nugget)) n_features = 500 mlt = VectorRandomFeatureInterface( @@ -267,20 +272,61 @@ function main() kernel_structure = kernel_structure, optimizer_options = overrides, ) + decorrelate = false end # Fit an emulator to the data normalized = true - emulator = Emulator(mlt, input_output_pairs; obs_noise_cov = truth_cov, normalize_inputs = normalized) + emulator = Emulator( + mlt, + input_output_pairs; + obs_noise_cov = truth_cov, + normalize_inputs = normalized, + decorrelate = decorrelate, + ) # Optimize the GP hyperparameters for better fit optimize_hyperparameters!(emulator) + if case ∈ ["RF-vector-nosvd-nonsep", "RF-vector-svd-nonsep"] + push!(opt_diagnostics, get_optimizer(mlt)[1]) #length-1 vec of vec -> vec + end + + # plot eki convergence plot + #= + if length(opt_diagnostics) > 0 + err_cols = reduce(hcat, opt_diagnostics) #error for each repeat as columns? - emulator_filepath = joinpath(data_save_directory, "emulator.jld2") + #save data + error_filepath = joinpath(data_save_directory, "eki_conv_error.jld2") + save(error_filepath, "error", err_cols) + + # print all repeats + f5 = Figure(resolution = (1.618 * 300, 300), markersize = 4) + ax_conv = Axis(f5[1, 1], xlabel = "Iteration", ylabel = "max-normalized error") + if n_repeats == 1 + lines!(ax_conv, collect(1:size(err_cols, 1))[:], err_cols[:], solid_color = :blue) # If just one repeat + else + for idx in 1:size(err_cols, 1) + err_normalized = (err_cols' ./ err_cols[1, :])' # divide each series by the max, so all errors start at 1 + series!(ax_conv, err_normalized', solid_color = :blue) + end + end + save(joinpath(figure_save_directory, "eki-conv_$(case).png"), f5, px_per_unit = 3) + save(joinpath(figure_save_directory, "eki-conv_$(case).pdf"), f5, px_per_unit = 3) + + end + =# + + emulator_filepath = joinpath(data_save_directory, "$(case)_$(kernel_rank)_emulator.jld2") save(emulator_filepath, "emulator", emulator) - println("Finished Emulation stage") + if length(opt_diagnostics) > 0 + eki_conv_filepath = joinpath(data_save_directory, "$(case)_$(kernel_rank)_eki-conv.jld2") + save(eki_conv_filepath, "opt_diagnostics", opt_diagnostics) + end + + @info "Finished Emulation stage" println(" ") ######################################################################## # [4. ] Run Emulator-based MCMC to obtain joint parameter distribution # @@ -293,17 +339,17 @@ function main() # determine a good step size yt_sample = y_truth mcmc = MCMCWrapper(RWMHSampling(), yt_sample, prior, emulator; init_params = u0) - new_step = optimize_stepsize(mcmc; init_stepsize = 0.1, N = 2000, discard_initial = 0) + new_step = optimize_stepsize(mcmc; init_stepsize = 0.1, N = 5000, discard_initial = 0) # Now begin the actual MCMC println("Begin MCMC - with step size ", new_step) - chain = MarkovChainMonteCarlo.sample(mcmc, 100_000; stepsize = new_step, discard_initial = 2_000) + chain = MarkovChainMonteCarlo.sample(mcmc, 300_000; stepsize = new_step, discard_initial = 2_000) posterior = MarkovChainMonteCarlo.get_posterior(mcmc, chain) - mcmc_filepath = joinpath(data_save_directory, "mcmc_and_chain.jld2") + mcmc_filepath = joinpath(data_save_directory, "$(case)_$(kernel_rank)_mcmc_and_chain.jld2") save(mcmc_filepath, "mcmc", mcmc, "chain", chain) - posterior_filepath = joinpath(data_save_directory, "posterior.jld2") + posterior_filepath = joinpath(data_save_directory, "$(case)_$(kernel_rank)_posterior.jld2") save(posterior_filepath, "posterior", posterior) println("Finished Sampling stage") diff --git a/examples/Emulator/G-function/Project.toml b/examples/Emulator/G-function/Project.toml new file mode 100644 index 000000000..7d7a6fa85 --- /dev/null +++ b/examples/Emulator/G-function/Project.toml @@ -0,0 +1,13 @@ +[deps] +CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" +CalibrateEmulateSample = "95e48a1f-0bec-4818-9538-3db4340308e3" +ColorSchemes = "35d6a980-a343-548e-a6ea-1d62b119f2f4" +DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" +Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" +EnsembleKalmanProcesses = "aa8a2aa5-91d8-4396-bcef-d4f2ec43552d" +GlobalSensitivityAnalysis = "1b10255b-6da3-57ce-9089-d24e8517b87e" +JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +RandomFeatures = "36c3bae2-c0c3-419d-b3b4-eebadd35c5e5" +StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" diff --git a/examples/Emulator/G-function/emulate-test-n-features.jl b/examples/Emulator/G-function/emulate-test-n-features.jl new file mode 100644 index 000000000..042694fbe --- /dev/null +++ b/examples/Emulator/G-function/emulate-test-n-features.jl @@ -0,0 +1,354 @@ + +using GlobalSensitivityAnalysis +const GSA = GlobalSensitivityAnalysis +using Distributions +using DataStructures +using Random +using LinearAlgebra +import StatsBase: percentile +using JLD2 + +using CalibrateEmulateSample.EnsembleKalmanProcesses +using CalibrateEmulateSample.Emulators +using CalibrateEmulateSample.DataContainers +using CalibrateEmulateSample.EnsembleKalmanProcesses.Localizers + +using CairoMakie, ColorSchemes #for plots +seed = 2589436 + +output_directory = joinpath(@__DIR__, "output") +if !isdir(output_directory) + mkdir(output_directory) +end + + +inner_func(x::AV, a::AV) where {AV <: AbstractVector} = prod((abs.(4 * x .- 2) + a) ./ (1 .+ a)) + +"G-Function taken from https://www.sfu.ca/~ssurjano/gfunc.html" +function GFunction(x::AM, a::AV) where {AM <: AbstractMatrix, AV <: AbstractVector} + @assert size(x, 1) == length(a) + return mapslices(y -> inner_func(y, a), x; dims = 1) #applys the map to columns +end + +function GFunction(x::AM) where {AM <: AbstractMatrix} + a = [(i - 1.0) / 2.0 for i in 1:size(x, 1)] + return GFunction(x, a) +end + +function main() + + rng = MersenneTwister(seed) + + n_repeats = 5 # repeat exp with same data. + n_dimensions = 6 + # To create the sampling + n_data_gen = 800 + + data = + SobolData(params = OrderedDict([Pair(Symbol("x", i), Uniform(0, 1)) for i in 1:n_dimensions]), N = n_data_gen) + + # To perform global analysis, + # one must generate samples using Sobol sequence (i.e. creates more than N points) + samples = GSA.sample(data) + n_data = size(samples, 1) # [n_samples x n_dim] + println("number of sobol points: ", n_data) + # run model (example) + y = GFunction(samples')' # G is applied to columns + # perform Sobol Analysis + result = analyze(data, y) + + # plot the first 3 dimensions + plot_dim = n_dimensions >= 3 ? 3 : n_dimensions + f1 = Figure(resolution = (1.618 * plot_dim * 300, 300), markersize = 4) + for i in 1:plot_dim + ax = Axis(f1[1, i], xlabel = "x" * string(i), ylabel = "f") + scatter!(ax, samples[:, i], y[:], color = :orange) + end + + CairoMakie.save(joinpath(output_directory, "GFunction_slices_truth_$(n_dimensions).png"), f1, px_per_unit = 3) + CairoMakie.save(joinpath(output_directory, "GFunction_slices_truth_$(n_dimensions).pdf"), f1, px_per_unit = 3) + + n_train_pts = n_dimensions * 250 + ind_total = shuffle!(rng, Vector(1:n_data)) + ind = ind_total[1:n_train_pts] + # now subsample the samples data + n_tp = length(ind) + input = zeros(n_dimensions, n_tp) + output = zeros(1, n_tp) + Γ = 1e-3 + noise = rand(rng, Normal(0, Γ), n_tp) + for i in 1:n_tp + input[:, i] = samples[ind[i], :] + output[i] = y[ind[i]] + noise[i] + end + iopairs = PairedDataContainer(input, output) + + # analytic sobol indices taken from + # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8989694/pdf/main.pdf + a = [(i - 1.0) / 2.0 for i in 1:n_dimensions] # a_i < a_j => a_i more sensitive + prod_tmp = prod(1 .+ 1 ./ (3 .* (1 .+ a) .^ 2)) - 1 + V = [(1 / (3 * (1 + ai)^2)) / prod_tmp for ai in a] + prod_tmp2 = [prod(1 .+ 1 ./ (3 .* (1 .+ a[1:end .!== j]) .^ 2)) for j in 1:n_dimensions] + TV = [(1 / (3 * (1 + ai)^2)) * prod_tmp2[i] / prod_tmp for (i, ai) in enumerate(a)] + + + + cases = ["Prior", "GP", "RF-scalar"] + case = cases[2] + decorrelate = true + nugget = Float64(1e-12) + + n_features_vec = [25, 50, 100, 200, 400, 800] # < data points ideally as output dim = 1 + ttt = zeros(length(n_features_vec), n_repeats) + train_err = zeros(length(n_features_vec), n_repeats) + test_err = zeros(length(n_features_vec), n_repeats) + n_cross_val_sets = 2 + + + for (f_idx, n_features_opt) in enumerate(n_features_vec) + y_preds = [] + result_preds = [] + + overrides = Dict( + #"verbose" => true, + "scheduler" => DataMisfitController(terminate_at = 1e3), + "n_features_opt" => n_features_opt, + "n_iteration" => 20, + "cov_sample_multiplier" => 1.0, + # "localization" => SEC(0.1),#,Doesnt help much tbh + # "accelerator" => NesterovAccelerator(), + "n_ensemble" => 100, + "n_cross_val_sets" => n_cross_val_sets, + ) + if case == "Prior" + # don't do anything + overrides["n_iteration"] = 0 + overrides["cov_sample_multiplier"] = 0.1 + end + + for rep_idx in 1:n_repeats + @info "Testing #features = $(n_features_opt) \n repeat $(rep_idx) of $(n_repeats)" + # Build ML tools + if case == "GP" + gppackage = Emulators.SKLJL() + pred_type = Emulators.YType() + mlt = GaussianProcess(gppackage; prediction_type = pred_type, noise_learn = false) + n_cross_val_sets = 0 + elseif case ∈ ["RF-scalar", "Prior"] + rank = n_dimensions #<= 10 ? n_dimensions : 10 + kernel_structure = SeparableKernel(LowRankFactor(rank, nugget), OneDimFactor()) + n_features = n_dimensions <= 10 ? n_dimensions * 100 : 1000 + if (n_features / n_train_pts > 0.9) && (n_features / n_train_pts < 1.1) + @warn "The number of features similar to the number of training points, poor performance expected, change one or other of these" + end + mlt = ScalarRandomFeatureInterface( + n_features, + n_dimensions, + rng = rng, + kernel_structure = kernel_structure, + optimizer_options = deepcopy(overrides), + ) + end + + # Emulate + ttt[f_idx, rep_idx] = @elapsed begin + emulator = Emulator(mlt, iopairs; obs_noise_cov = Γ * I, decorrelate = decorrelate) + optimize_hyperparameters!(emulator) + end + + # errors: + # training error + y_pred, y_var = predict(emulator, get_inputs(iopairs), transform_to_real = true) + train_err[f_idx, rep_idx] = sqrt(sum((y_pred - get_outputs(iopairs)) .^ 2)) / n_train_pts + + # predict on all test points with emulator (example) + y_pred, y_var = predict(emulator, samples', transform_to_real = true) #predict on all points + ind_test = ind_total[(n_train_pts + 1):end] + test_err[f_idx, rep_idx] = sqrt(sum((y_pred[ind_test] - y[ind_test]) .^ 2)) / length(ind_test) + + JLD2.save( + joinpath( + output_directory, + "diff_n_features_GFunction_$(case)_$(n_dimensions)_ntest-$(Int(n_train_pts/5))_cv-$(n_cross_val_sets).jld2", + ), + "n_features_vec", + n_features_vec, + "timings", + ttt, + "train_err", + train_err, + "test_err", + test_err, + ) #save every iteration for safety + + + # obtain emulated Sobol indices + result_pred = analyze(data, y_pred') + println("First order: ", result_pred[:firstorder]) + println("Total order: ", result_pred[:totalorder]) + + push!(y_preds, y_pred) + push!(result_preds, result_pred) + GC.gc() #collect garbage + + # PLotting: + #= if rep_idx == 1 + f3, ax3, plt3 = scatter( + 1:n_dimensions, + result_preds[1][:firstorder]; + color = :red, + markersize = 8, + marker = :cross, + label = "V-emulate", + title = "input dimension: $(n_dimensions)", + ) + scatter!(ax3, result[:firstorder], color = :red, markersize = 8, label = "V-approx") + scatter!(ax3, V, color = :red, markersize = 12, marker = :xcross, label = "V-true") + scatter!( + ax3, + 1:n_dimensions, + result_preds[1][:totalorder]; + color = :blue, + label = "TV-emulate", + markersize = 8, + marker = :cross, + ) + scatter!(ax3, result[:totalorder], color = :blue, markersize = 8, label = "TV-approx") + scatter!(ax3, TV, color = :blue, markersize = 12, marker = :xcross, label = "TV-true") + axislegend(ax3) + + CairoMakie.save( + joinpath(output_directory, "GFunction_sens_$(case)_$(n_dimensions).png"), + f3, + px_per_unit = 3, + ) + CairoMakie.save( + joinpath(output_directory, "GFunction_sens_$(case)_$(n_dimensions).pdf"), + f3, + px_per_unit = 3, + ) + else + # get percentiles: + fo_mat = zeros(n_dimensions, rep_idx) + to_mat = zeros(n_dimensions, rep_idx) + + for (idx, rp) in enumerate(result_preds) + fo_mat[:, idx] = rp[:firstorder] + to_mat[:, idx] = rp[:totalorder] + end + + firstorder_med = percentile.(eachrow(fo_mat), 50) + firstorder_low = percentile.(eachrow(fo_mat), 5) + firstorder_up = percentile.(eachrow(fo_mat), 95) + + totalorder_med = percentile.(eachrow(to_mat), 50) + totalorder_low = percentile.(eachrow(to_mat), 5) + totalorder_up = percentile.(eachrow(to_mat), 95) + + println("(50%) firstorder: ", firstorder_med) + println("(5%) firstorder: ", firstorder_low) + println("(95%) firstorder: ", firstorder_up) + + println("(50%) totalorder: ", totalorder_med) + println("(5%) totalorder: ", totalorder_low) + println("(95%) totalorder: ", totalorder_up) + # + f3, ax3, plt3 = errorbars( + 1:n_dimensions, + firstorder_med, + firstorder_med - firstorder_low, + firstorder_up - firstorder_med; + whiskerwidth = 10, + color = :red, + label = "V-emulate", + title = "input dimension: $(n_dimensions)", + ) + scatter!(ax3, result[:firstorder], color = :red, markersize = 8, label = "V-approx") + scatter!(ax3, V, color = :red, markersize = 12, marker = :xcross, label = "V-true") + errorbars!( + ax3, + 1:n_dimensions, + totalorder_med, + totalorder_med - totalorder_low, + totalorder_up - totalorder_med; + whiskerwidth = 10, + color = :blue, + label = "TV-emulate", + ) + scatter!(ax3, result[:totalorder], color = :blue, markersize = 8, label = "TV-approx") + scatter!(ax3, TV, color = :blue, markersize = 12, marker = :xcross, label = "TV-true") + axislegend(ax3) + + CairoMakie.save( + joinpath(output_directory, "GFunction_sens_$(case)_$(n_dimensions).png"), + f3, + px_per_unit = 3, + ) + CairoMakie.save( + joinpath(output_directory, "GFunction_sens_$(case)_$(n_dimensions).pdf"), + f3, + px_per_unit = 3, + ) + + end + # plots - first 3 dimensions + if rep_idx == 1 + f2 = Figure(resolution = (1.618 * plot_dim * 300, 300), markersize = 4) + for i in 1:plot_dim + ax2 = Axis(f2[1, i], xlabel = "x" * string(i), ylabel = "f") + scatter!(ax2, samples[:, i], y_preds[1][:], color = :blue) + scatter!(ax2, samples[ind, i], y[ind] + noise, color = :red, markersize = 8) + end + CairoMakie.save( + joinpath(output_directory, "GFunction_slices_$(case)_$(n_dimensions).png"), + f2, + px_per_unit = 3, + ) + CairoMakie.save( + joinpath(output_directory, "GFunction_slices_$(case)_$(n_dimensions).pdf"), + f2, + px_per_unit = 3, + ) + end + + end + =# + end + end + + println(" ") + println("True Sobol Indices") + println("******************") + println(" firstorder: ", V) + println(" totalorder: ", TV) + println(" ") + println("Sampled truth Sobol Indices (# points $n_data)") + println("***************************") + println(" firstorder: ", result[:firstorder]) + println(" totalorder: ", result[:totalorder]) + println(" ") + + println("Sampled Emulated Sobol Indices (# obs $n_train_pts, noise var $Γ)") + println("***************************************************************") + + + jldsave( + joinpath(output_directory, "Gfunction_$(case)_$(n_dimensions).jld2"); + sobol_pts = samples, + train_idx = ind, + analytic_V = V, + analytic_TV = TV, + estimated_sobol = result, + mlt_sobol = result_preds, + mlt_pred_y = y_preds, + true_y = y, + noise_y = Γ, + observed_y = output, + ) + + + return y_preds, result_preds +end + + +main() diff --git a/examples/Emulator/G-function/emulate.jl b/examples/Emulator/G-function/emulate.jl new file mode 100644 index 000000000..23007f5d6 --- /dev/null +++ b/examples/Emulator/G-function/emulate.jl @@ -0,0 +1,355 @@ + +using GlobalSensitivityAnalysis +const GSA = GlobalSensitivityAnalysis +using Distributions +using DataStructures +using Random +using LinearAlgebra +import StatsBase: percentile +using JLD2 + +using CalibrateEmulateSample.EnsembleKalmanProcesses +using CalibrateEmulateSample.Emulators +using CalibrateEmulateSample.DataContainers +using CalibrateEmulateSample.EnsembleKalmanProcesses.Localizers + +using CairoMakie, ColorSchemes #for plots +seed = 2589436 + +output_directory = joinpath(@__DIR__, "output") +if !isdir(output_directory) + mkdir(output_directory) +end + + +inner_func(x::AV, a::AV) where {AV <: AbstractVector} = prod((abs.(4 * x .- 2) + a) ./ (1 .+ a)) + +"G-Function taken from https://www.sfu.ca/~ssurjano/gfunc.html" +function GFunction(x::AM, a::AV) where {AM <: AbstractMatrix, AV <: AbstractVector} + @assert size(x, 1) == length(a) + return mapslices(y -> inner_func(y, a), x; dims = 1) #applys the map to columns +end + +function GFunction(x::AM) where {AM <: AbstractMatrix} + a = [(i - 1.0) / 2.0 for i in 1:size(x, 1)] + return GFunction(x, a) +end + +function main() + + rng = MersenneTwister(seed) + + n_repeats = 20# repeat exp with same data. + n_dimensions = 20 + # To create the sampling + n_data_gen = 800 + + data = + SobolData(params = OrderedDict([Pair(Symbol("x", i), Uniform(0, 1)) for i in 1:n_dimensions]), N = n_data_gen) + + # To perform global analysis, + # one must generate samples using Sobol sequence (i.e. creates more than N points) + samples = GSA.sample(data) + n_data = size(samples, 1) # [n_samples x n_dim] + println("number of sobol points: ", n_data) + # run model (example) + y = GFunction(samples')' # G is applied to columns + # perform Sobol Analysis + result = analyze(data, y) + + # plot the first 3 dimensions + plot_dim = n_dimensions >= 3 ? 3 : n_dimensions + f1 = Figure(resolution = (1.618 * plot_dim * 300, 300), markersize = 4) + for i in 1:plot_dim + ax = Axis(f1[1, i], xlabel = "x" * string(i), ylabel = "f") + scatter!(ax, samples[:, i], y[:], color = :orange) + end + + CairoMakie.save(joinpath(output_directory, "GFunction_slices_truth_$(n_dimensions).png"), f1, px_per_unit = 3) + CairoMakie.save(joinpath(output_directory, "GFunction_slices_truth_$(n_dimensions).pdf"), f1, px_per_unit = 3) + + n_train_pts = n_dimensions * 250 + ind = shuffle!(rng, Vector(1:n_data))[1:n_train_pts] + # now subsample the samples data + n_tp = length(ind) + input = zeros(n_dimensions, n_tp) + output = zeros(1, n_tp) + Γ = 1e-3 + noise = rand(rng, Normal(0, Γ), n_tp) + for i in 1:n_tp + input[:, i] = samples[ind[i], :] + output[i] = y[ind[i]] + noise[i] + end + iopairs = PairedDataContainer(input, output) + + # analytic sobol indices taken from + # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8989694/pdf/main.pdf + a = [(i - 1.0) / 2.0 for i in 1:n_dimensions] # a_i < a_j => a_i more sensitive + prod_tmp = prod(1 .+ 1 ./ (3 .* (1 .+ a) .^ 2)) - 1 + V = [(1 / (3 * (1 + ai)^2)) / prod_tmp for ai in a] + prod_tmp2 = [prod(1 .+ 1 ./ (3 .* (1 .+ a[1:end .!== j]) .^ 2)) for j in 1:n_dimensions] + TV = [(1 / (3 * (1 + ai)^2)) * prod_tmp2[i] / prod_tmp for (i, ai) in enumerate(a)] + + + + cases = ["Prior", "GP", "RF-scalar"] + case = cases[3] + decorrelate = true + nugget = Float64(1e-12) + + overrides = Dict( + "verbose" => true, + "scheduler" => DataMisfitController(terminate_at = 1e2), + "n_features_opt" => 150, + "n_iteration" => 10, + "cov_sample_multiplier" => 3.0, + #"localization" => SEC(0.1),#,Doesnt help much tbh + #"accelerator" => NesterovAccelerator(), + "n_ensemble" => 100, #40*n_dimensions, + "n_cross_val_sets" => 4, + ) + if case == "Prior" + # don't do anything + overrides["n_iteration"] = 0 + overrides["cov_sample_multiplier"] = 0.1 + end + + y_preds = [] + result_preds = [] + opt_diagnostics = [] + times = zeros(n_repeats) + for rep_idx in 1:n_repeats + @info "Repeat: $(rep_idx)" + # Build ML tools + if case == "GP" + gppackage = Emulators.SKLJL() + pred_type = Emulators.YType() + mlt = GaussianProcess(gppackage; prediction_type = pred_type, noise_learn = false) + + elseif case ∈ ["RF-scalar", "Prior"] + rank = n_dimensions #<= 10 ? n_dimensions : 10 + kernel_structure = SeparableKernel(LowRankFactor(rank, nugget), OneDimFactor()) + n_features = n_dimensions <= 10 ? n_dimensions * 100 : 1000 + if (n_features / n_train_pts > 0.9) && (n_features / n_train_pts < 1.1) + @warn "The number of features similar to the number of training points, poor performance expected, change one or other of these" + end + mlt = ScalarRandomFeatureInterface( + n_features, + n_dimensions, + rng = rng, + kernel_structure = kernel_structure, + optimizer_options = deepcopy(overrides), + ) + end + + # Emulate + times[rep_idx] = @elapsed begin + emulator = Emulator(mlt, iopairs; obs_noise_cov = Γ * I, decorrelate = decorrelate) + optimize_hyperparameters!(emulator) + end + + if case == "RF-scalar" + diag_tmp = reduce(hcat, get_optimizer(mlt)) # (n_iteration, dim_output=1) convergence for each scalar mode as cols + push!(opt_diagnostics, diag_tmp) + end + + @info "statistics of training time for case $(case): \n mean(s): $(mean(times[1:rep_idx])) \n var(s) : $(var(times[1:rep_idx]))" + # predict on all Sobol points with emulator (example) + y_pred, y_var = predict(emulator, samples', transform_to_real = true) + + # obtain emulated Sobol indices + result_pred = analyze(data, y_pred') + println("First order: ", result_pred[:firstorder]) + println("Total order: ", result_pred[:totalorder]) + + push!(y_preds, y_pred) + push!(result_preds, result_pred) + GC.gc() #collect garbage + + # PLotting: + fontsize = 24 + if rep_idx == 1 + f3 = Figure(markersize = 8, fontsize = fontsize) + ax3 = Axis(f3[1, 1]) + scatter!( + ax3, + 1:n_dimensions, + result_preds[1][:firstorder]; + color = :red, + marker = :cross, + label = "V-emulate", + ) + scatter!(ax3, result[:firstorder], color = :red, markersize = 8, label = "V-approx") + scatter!(ax3, V, color = :red, markersize = 12, marker = :xcross, label = "V-true") + scatter!( + ax3, + 1:n_dimensions, + result_preds[1][:totalorder]; + color = :blue, + label = "TV-emulate", + markersize = 8, + marker = :cross, + ) + scatter!(ax3, result[:totalorder], color = :blue, markersize = 8, label = "TV-approx") + scatter!(ax3, TV, color = :blue, markersize = 12, marker = :xcross, label = "TV-true") + axislegend(ax3) + + CairoMakie.save( + joinpath(output_directory, "GFunction_sens_$(case)_$(n_dimensions).png"), + f3, + px_per_unit = 3, + ) + CairoMakie.save( + joinpath(output_directory, "GFunction_sens_$(case)_$(n_dimensions).pdf"), + f3, + px_per_unit = 3, + ) + else + # get percentiles: + fo_mat = zeros(n_dimensions, rep_idx) + to_mat = zeros(n_dimensions, rep_idx) + + for (idx, rp) in enumerate(result_preds) + fo_mat[:, idx] = rp[:firstorder] + to_mat[:, idx] = rp[:totalorder] + end + + firstorder_med = percentile.(eachrow(fo_mat), 50) + firstorder_low = percentile.(eachrow(fo_mat), 5) + firstorder_up = percentile.(eachrow(fo_mat), 95) + + totalorder_med = percentile.(eachrow(to_mat), 50) + totalorder_low = percentile.(eachrow(to_mat), 5) + totalorder_up = percentile.(eachrow(to_mat), 95) + + println("(50%) firstorder: ", firstorder_med) + println("(5%) firstorder: ", firstorder_low) + println("(95%) firstorder: ", firstorder_up) + + println("(50%) totalorder: ", totalorder_med) + println("(5%) totalorder: ", totalorder_low) + println("(95%) totalorder: ", totalorder_up) + # + f3 = Figure(markersize = 8, fontsize = fontsize) + ax3 = Axis(f3[1, 1]) + errorbars!( + ax3, + 1:n_dimensions, + firstorder_med, + firstorder_med - firstorder_low, + firstorder_up - firstorder_med; + whiskerwidth = 10, + color = :red, + label = "V-emulate", + ) + scatter!(ax3, result[:firstorder], color = :red, markersize = 8, label = "V-approx") + scatter!(ax3, V, color = :red, markersize = 12, marker = :xcross, label = "V-true") + errorbars!( + ax3, + 1:n_dimensions, + totalorder_med, + totalorder_med - totalorder_low, + totalorder_up - totalorder_med; + whiskerwidth = 10, + color = :blue, + label = "TV-emulate", + ) + scatter!(ax3, result[:totalorder], color = :blue, markersize = 8, label = "TV-approx") + scatter!(ax3, TV, color = :blue, markersize = 12, marker = :xcross, label = "TV-true") + axislegend(ax3) + + CairoMakie.save( + joinpath(output_directory, "GFunction_sens_$(case)_$(n_dimensions).png"), + f3, + px_per_unit = 3, + ) + CairoMakie.save( + joinpath(output_directory, "GFunction_sens_$(case)_$(n_dimensions).pdf"), + f3, + px_per_unit = 3, + ) + + end + # plots - first 3 dimensions + if rep_idx == 1 + f2 = Figure(resolution = (1.618 * plot_dim * 300, 300), markersize = 4) + for i in 1:plot_dim + ax2 = Axis(f2[1, i], xlabel = "x" * string(i), ylabel = "f") + scatter!(ax2, samples[:, i], y_preds[1][:], color = :blue) + scatter!(ax2, samples[ind, i], y[ind] + noise, color = :red, markersize = 8) + end + CairoMakie.save( + joinpath(output_directory, "GFunction_slices_$(case)_$(n_dimensions).png"), + f2, + px_per_unit = 3, + ) + CairoMakie.save( + joinpath(output_directory, "GFunction_slices_$(case)_$(n_dimensions).pdf"), + f2, + px_per_unit = 3, + ) + end + end + + if length(opt_diagnostics) > 0 + err_cols = reduce(hcat, opt_diagnostics) #error for each repeat as columns? + + #save + error_filepath = joinpath(output_directory, "eki_conv_error.jld2") + save(error_filepath, "error", err_cols) + + # print all repeats + f3 = Figure(resolution = (1.618 * 300, 300), markersize = 4) + ax_conv = Axis(f3[1, 1], xlabel = "Iteration", ylabel = "max-normalized error") + + if n_repeats == 1 + lines!(ax_conv, collect(1:size(err_cols, 1))[:], err_cols[:], color = :blue) # If just one repeat + else + for idx in 1:size(err_cols, 1) + err_normalized = (err_cols' ./ err_cols[1, :])' # divide each series by the max, so all errors start at 1 + series!(ax_conv, err_normalized', color = :blue) + end + end + + save(joinpath(output_directory, "GFunction_eki-conv_$(case)_$(n_dimensions).png"), f3, px_per_unit = 3) + save(joinpath(output_directory, "GFunction_eki-conv_$(case)_$(n_dimensions).pdf"), f3, px_per_unit = 3) + + end + + + println(" ") + println("True Sobol Indices") + println("******************") + println(" firstorder: ", V) + println(" totalorder: ", TV) + println(" ") + println("Sampled truth Sobol Indices (# points $n_data)") + println("***************************") + println(" firstorder: ", result[:firstorder]) + println(" totalorder: ", result[:totalorder]) + println(" ") + + println("Sampled Emulated Sobol Indices (# obs $n_train_pts, noise var $Γ)") + println("***************************************************************") + + + jldsave( + joinpath(output_directory, "GFunction_$(case)_$(n_dimensions).jld2"); + sobol_pts = samples, + train_idx = ind, + analytic_V = V, + analytic_TV = TV, + estimated_sobol = result, + mlt_sobol = result_preds, + mlt_pred_y = y_preds, + true_y = y, + noise_y = Γ, + observed_y = output, + ) + + + return y_preds, result_preds +end + + +main() diff --git a/examples/Emulator/G-function/plot_result.jl b/examples/Emulator/G-function/plot_result.jl new file mode 100644 index 000000000..03455b87a --- /dev/null +++ b/examples/Emulator/G-function/plot_result.jl @@ -0,0 +1,154 @@ +using CairoMakie, ColorSchemes #for plots + + + +function main() + + output_directory = "output" + cases = ["Prior", "GP", "RF-scalar"] + case = cases[3] + n_dimensions = 3 + filename = joinpath(output_directory, "Gfunction_$(case)_$(n_dimensions).jld2") + legend = true + + ( + sobol_pts, + train_idx, + mlt_pred_y, + mlt_sobol, + analytic_V, + analytic_TV, + true_y, + noise_y, + observed_y, + estimated_sobol, + ) = load( + filename, + "sobol_pts", + "train_idx", + "mlt_pred_y", + "mlt_sobol", + "analytic_V", + "analytic_TV", + "true_y", + "noise_y", + "observed_y", + "estimated_sobol", + ) + + n_repeats = length(mlt_sobol) + fontsize = 24 + if n_repeats == 1 + f3 = Figure(markersize = 8, fontsize = fontsize) + ax3 = Axis(f3[1, 1], xticks = 1:2:n_dimensions, ylabel = "Sobol Index", xlabel = "i") + scatter!( + ax3, + 1:n_dimensions, + mlt_sobol[1][:firstorder]; + color = :red, + markersize = 8, + marker = :cross, + label = "V-emulate", + ) + scatter!(ax3, estimated_sobol[:firstorder], color = :red, markersize = 8, label = "V-approx") + scatter!(ax3, analytic_V, color = :red, markersize = 12, marker = :xcross, label = "V-true") + scatter!( + ax3, + 1:n_dimensions, + mlt_sobol[1][:totalorder]; + color = :blue, + label = "TV-emulate", + markersize = 8, + marker = :cross, + ) + scatter!(ax3, estimated_sobol[:totalorder], color = :blue, markersize = 8, label = "TV-approx") + scatter!(ax3, analytic_TV, color = :blue, markersize = 12, marker = :xcross, label = "TV-true") + if legend + axislegend(ax3) + end + png_out = joinpath(output_directory, "GFunction_sens_$(case)_$(n_dimensions).png") + pdf_out = joinpath(output_directory, "GFunction_sens_$(case)_$(n_dimensions).pdf") + CairoMakie.save(png_out, f3, px_per_unit = 3) + CairoMakie.save(pdf_out, f3, px_per_unit = 3) + @info "Plotted sensitivities, case dim = $n_dimensions, \n storing plots in: \n $png_out \n $pdf_out" + + else + # get percentiles: + fo_mat = zeros(n_dimensions, n_repeats) + to_mat = zeros(n_dimensions, n_repeats) + + for (idx, rp) in enumerate(mlt_sobol) + fo_mat[:, idx] = rp[:firstorder] + to_mat[:, idx] = rp[:totalorder] + end + + firstorder_med = percentile.(eachrow(fo_mat), 50) + firstorder_low = percentile.(eachrow(fo_mat), 5) + firstorder_up = percentile.(eachrow(fo_mat), 95) + + totalorder_med = percentile.(eachrow(to_mat), 50) + totalorder_low = percentile.(eachrow(to_mat), 5) + totalorder_up = percentile.(eachrow(to_mat), 95) + + println("(50%) firstorder: ", firstorder_med) + println("(5%) firstorder: ", firstorder_low) + println("(95%) firstorder: ", firstorder_up) + + println("(50%) totalorder: ", totalorder_med) + println("(5%) totalorder: ", totalorder_low) + println("(95%) totalorder: ", totalorder_up) + # + f3 = Figure(markersize = 8, fontsize = fontsize) + ax3 = Axis(f3[1, 1], xticks = 1:2:n_dimensions, ylabel = "Sobol Index", xlabel = "i") + errorbars!( + ax3, + 1:n_dimensions, + firstorder_med, + firstorder_med - firstorder_low, + firstorder_up - firstorder_med; + whiskerwidth = 10, + color = :red, + label = "V-emulate", + ) + scatter!(ax3, estimated_sobol[:firstorder], color = :red, markersize = 8, label = "V-approx") + scatter!(ax3, analytic_V, color = :red, markersize = 12, marker = :xcross, label = "V-true") + errorbars!( + ax3, + 1:n_dimensions, + totalorder_med, + totalorder_med - totalorder_low, + totalorder_up - totalorder_med; + whiskerwidth = 10, + color = :blue, + label = "TV-emulate", + ) + scatter!(ax3, estimated_sobol[:totalorder], color = :blue, markersize = 8, label = "TV-approx") + scatter!(ax3, analytic_TV, color = :blue, markersize = 12, marker = :xcross, label = "TV-true") + if legend + axislegend(ax3) + end + png_out = joinpath(output_directory, "GFunction_sens_$(case)_$(n_dimensions).png") + pdf_out = joinpath(output_directory, "GFunction_sens_$(case)_$(n_dimensions).pdf") + CairoMakie.save(png_out, f3, px_per_unit = 3) + CairoMakie.save(pdf_out, f3, px_per_unit = 3) + @info "Plotted sensitivities, case dim = $n_dimensions, \n storing plots in: \n $png_out \n $pdf_out" + + end + # plots - first 3 dimensions + plot_dim = n_dimensions >= 3 ? 3 : n_dimensions + f2 = Figure(resolution = (1.618 * plot_dim * 300, 300), markersize = 4, fontsize = fontsize) + for i in 1:plot_dim + ax2 = Axis(f2[1, i], xlabel = "x" * string(i), ylabel = "f") + scatter!(ax2, sobol_pts[:, i], mlt_pred_y[1][:], color = :blue) + scatter!(ax2, sobol_pts[train_idx, i], observed_y[:], color = :red, markersize = 8) + end + png_out = joinpath(output_directory, "GFunction_slices_$(case)_$(n_dimensions).png") + pdf_out = joinpath(output_directory, "GFunction_slices_$(case)_$(n_dimensions).pdf") + CairoMakie.save(png_out, f2, px_per_unit = 3) + CairoMakie.save(pdf_out, f2, px_per_unit = 3) + @info "Plotted slices, case dim = $n_dimensions, \n storing plots in: \n $png_out \n $pdf_out" + +end + + +main() diff --git a/examples/Emulator/Ishigami/Project.toml b/examples/Emulator/Ishigami/Project.toml index 930071c5b..4c618efd4 100644 --- a/examples/Emulator/Ishigami/Project.toml +++ b/examples/Emulator/Ishigami/Project.toml @@ -5,5 +5,6 @@ ColorSchemes = "35d6a980-a343-548e-a6ea-1d62b119f2f4" DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" GlobalSensitivityAnalysis = "1b10255b-6da3-57ce-9089-d24e8517b87e" +JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" diff --git a/examples/Emulator/Ishigami/emulate.jl b/examples/Emulator/Ishigami/emulate.jl index ae367270c..45e04acd1 100644 --- a/examples/Emulator/Ishigami/emulate.jl +++ b/examples/Emulator/Ishigami/emulate.jl @@ -1,4 +1,5 @@ + using GlobalSensitivityAnalysis const GSA = GlobalSensitivityAnalysis using Distributions @@ -9,6 +10,9 @@ using LinearAlgebra using CalibrateEmulateSample.EnsembleKalmanProcesses using CalibrateEmulateSample.Emulators using CalibrateEmulateSample.DataContainers +using CalibrateEmulateSample.EnsembleKalmanProcesses.Localizers + +using JLD2 using CairoMakie, ColorSchemes #for plots seed = 2589456 @@ -32,7 +36,7 @@ function main() rng = MersenneTwister(seed) - n_repeats = 20 # repeat exp with same data. + n_repeats = 30 # repeat exp with same data. # To create the sampling n_data_gen = 2000 @@ -51,7 +55,7 @@ function main() # perform Sobol Analysis result = analyze(data, y) - f1 = Figure(resolution = (1.618 * 900, 300), markersize = 4) + f1 = Figure(resolution = (1.618 * 900, 300), markersize = 4, fontsize = 28) axx = Axis(f1[1, 1], xlabel = "x1", ylabel = "f") axy = Axis(f1[1, 2], xlabel = "x2", ylabel = "f") axz = Axis(f1[1, 3], xlabel = "x3", ylabel = "f") @@ -81,9 +85,13 @@ function main() case = cases[3] decorrelate = true nugget = Float64(1e-12) - - overrides = - Dict("verbose" => true, "scheduler" => DataMisfitController(terminate_at = 1e4), "n_features_opt" => 200) + overrides = Dict( + "scheduler" => DataMisfitController(terminate_at = 1e4), + "n_features_opt" => 150, + "n_ensemble" => 30, + "n_iteration" => 20, + "accelerator" => NesterovAccelerator(), + ) if case == "Prior" # don't do anything overrides["n_iteration"] = 0 @@ -92,7 +100,7 @@ function main() y_preds = [] result_preds = [] - + opt_diagnostics = [] for rep_idx in 1:n_repeats # Build ML tools @@ -118,6 +126,11 @@ function main() emulator = Emulator(mlt, iopairs; obs_noise_cov = Γ * I, decorrelate = decorrelate) optimize_hyperparameters!(emulator) + # get EKP errors - just stored in "optimizer" box for now + if case == "RF-scalar" + diag_tmp = reduce(hcat, get_optimizer(mlt)) # (n_iteration, dim_output=1) convergence for each scalar mode as cols + push!(opt_diagnostics, diag_tmp) + end # predict on all Sobol points with emulator (example) y_pred, y_var = predict(emulator, samples', transform_to_real = true) @@ -126,8 +139,10 @@ function main() push!(y_preds, y_pred) push!(result_preds, result_pred) + jldsave(joinpath(output_directory, "emulator_repeat_$(rep_idx)_$(case).jld2"); emulator) end + # analytic sobol indices a = 7 b = 0.1 @@ -139,6 +154,18 @@ function main() VT2 = a^2 / 8 VT3 = 8 * b^2 * π^8 / 225 + jldsave( + joinpath(output_directory, "results_$case.jld2"); + sobol_pts = samples, + train_idx = ind, + mlt_pred_y = y_preds, + mlt_sobol = result_preds, + analytic_sobol = [V, V1, V2, V3, VT1, VT2, VT3], + true_y = y, + noise_sample = noise, + noise_cov = Γ, + estimated_sobol = result, + ) println(" ") println("True Sobol Indices") @@ -171,7 +198,7 @@ function main() # plots - f2 = Figure(resolution = (1.618 * 900, 300), markersize = 4) + f2 = Figure(resolution = (1.618 * 900, 300), markersize = 4, fontsize = 28) axx_em = Axis(f2[1, 1], xlabel = "x1", ylabel = "f") axy_em = Axis(f2[1, 2], xlabel = "x2", ylabel = "f") axz_em = Axis(f2[1, 3], xlabel = "x3", ylabel = "f") @@ -186,6 +213,30 @@ function main() save(joinpath(output_directory, "ishigami_slices_$(case).pdf"), f2, px_per_unit = 3) + if length(opt_diagnostics) > 0 + err_cols = reduce(hcat, opt_diagnostics) #error for each repeat as columns? + + #save + error_filepath = joinpath(output_directory, "eki_conv_error.jld2") + save(error_filepath, "error", err_cols) + + # print all repeats + f3 = Figure(resolution = (1.618 * 300, 300), markersize = 4) + ax_conv = Axis(f3[1, 1], xlabel = "Iteration", ylabel = "max-normalized error") + + if n_repeats == 1 + lines!(ax_conv, collect(1:size(err_cols, 1))[:], err_cols[:], solid_color = :blue) # If just one repeat + else + for idx in 1:size(err_cols, 1) + err_normalized = (err_cols' ./ err_cols[1, :])' # divide each series by the max, so all errors start at 1 + series!(ax_conv, err_normalized', solid_color = :blue) + end + end + + save(joinpath(output_directory, "ishigami_eki-conv_$(case).png"), f3, px_per_unit = 3) + save(joinpath(output_directory, "ishigami_eki-conv_$(case).pdf"), f3, px_per_unit = 3) + + end end diff --git a/examples/Emulator/Ishigami/plot_result.jl b/examples/Emulator/Ishigami/plot_result.jl new file mode 100644 index 000000000..18345977e --- /dev/null +++ b/examples/Emulator/Ishigami/plot_result.jl @@ -0,0 +1,99 @@ +using CairoMakie, ColorSchemes #for plots +using JLD2 + + +function main() + + + + + output_directory = "output" + cases = ["Prior", "GP", "RF-scalar"] + case = cases[2] + filename = joinpath(output_directory, "results_$case.jld2") + + (sobol_pts, train_idx, mlt_pred_y, mlt_sobol, analytic_sobol, true_y, noise_sample, noise_cov, estimated_sobol) = + load( + filename, + "sobol_pts", + "train_idx", + "mlt_pred_y", + "mlt_sobol", + "analytic_sobol", + "true_y", + "noise_sample", + "noise_cov", + "estimated_sobol", + ) + n_data = size(sobol_pts, 1) + n_train_pts = length(train_idx) + n_repeats = length(mlt_sobol) + (V, V1, V2, V3, VT1, VT2, VT3) = analytic_sobol + + + fontsize = 28 + f1 = Figure(resolution = (1.618 * 900, 300), markersize = 4, fontsize = fontsize) + axx = Axis(f1[1, 1], xlabel = "x1", ylabel = "f") + axy = Axis(f1[1, 2], xlabel = "x2", ylabel = "f") + axz = Axis(f1[1, 3], xlabel = "x3", ylabel = "f") + + scatter!(axx, sobol_pts[:, 1], true_y[:], color = :orange) + scatter!(axy, sobol_pts[:, 2], true_y[:], color = :orange) + scatter!(axz, sobol_pts[:, 3], true_y[:], color = :orange) + + save(joinpath(output_directory, "ishigami_slices_truth.png"), f1, px_per_unit = 3) + save(joinpath(output_directory, "ishigami_slices_truth.pdf"), f1, px_per_unit = 3) + + # display some info + println(" ") + println("True Sobol Indices") + println("******************") + println(" firstorder: ", [V1 / V, V2 / V, V3 / V]) + println(" totalorder: ", [VT1 / V, VT2 / V, VT3 / V]) + println(" ") + println("Sampled truth Sobol Indices (# points $n_data)") + println("***************************") + println(" firstorder: ", estimated_sobol[:firstorder]) + println(" totalorder: ", estimated_sobol[:totalorder]) + println(" ") + + println("Sampled Emulated Sobol Indices (# obs $n_train_pts, noise var $noise_cov)") + println("***************************************************************") + if n_repeats == 1 + println(" firstorder: ", mlt_sobol[1][:firstorder]) + println(" totalorder: ", mlt_sobol[1][:totalorder]) + else + firstorder_mean = mean([ms[:firstorder] for ms in mlt_sobol]) + firstorder_std = std([ms[:firstorder] for ms in mlt_sobol]) + totalorder_mean = mean([ms[:totalorder] for ms in mlt_sobol]) + totalorder_std = std([ms[:totalorder] for ms in mlt_sobol]) + + println("(mean) firstorder: ", firstorder_mean) + println("(std) firstorder: ", firstorder_std) + println("(mean) totalorder: ", totalorder_mean) + println("(std) totalorder: ", totalorder_std) + end + + # plots + + f2 = Figure(resolution = (1.618 * 900, 300), markersize = 4, fontsize = fontsize) + axx_em = Axis(f2[1, 1], xlabel = "x1", ylabel = "f") + axy_em = Axis(f2[1, 2], xlabel = "x2", ylabel = "f") + axz_em = Axis(f2[1, 3], xlabel = "x3", ylabel = "f") + scatter!(axx_em, sobol_pts[:, 1], mlt_pred_y[1][:], color = :blue) + scatter!(axy_em, sobol_pts[:, 2], mlt_pred_y[1][:], color = :blue) + scatter!(axz_em, sobol_pts[:, 3], mlt_pred_y[1][:], color = :blue) + scatter!(axx_em, sobol_pts[train_idx, 1], true_y[train_idx] + noise_sample, color = :red, markersize = 8) + scatter!(axy_em, sobol_pts[train_idx, 2], true_y[train_idx] + noise_sample, color = :red, markersize = 8) + scatter!(axz_em, sobol_pts[train_idx, 3], true_y[train_idx] + noise_sample, color = :red, markersize = 8) + + save(joinpath(output_directory, "ishigami_slices_$(case).png"), f2, px_per_unit = 3) + save(joinpath(output_directory, "ishigami_slices_$(case).pdf"), f2, px_per_unit = 3) + + + + +end + + +main() diff --git a/examples/Emulator/L63/Project.toml b/examples/Emulator/L63/Project.toml index e2c9baa08..f50da38dd 100644 --- a/examples/Emulator/L63/Project.toml +++ b/examples/Emulator/L63/Project.toml @@ -3,7 +3,9 @@ CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" CalibrateEmulateSample = "95e48a1f-0bec-4818-9538-3db4340308e3" ColorSchemes = "35d6a980-a343-548e-a6ea-1d62b119f2f4" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" +EnsembleKalmanProcesses = "aa8a2aa5-91d8-4396-bcef-d4f2ec43552d" JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" +LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" diff --git a/examples/Emulator/L63/emulate.jl b/examples/Emulator/L63/emulate.jl index d3af3eb77..64ee4b6c3 100644 --- a/examples/Emulator/L63/emulate.jl +++ b/examples/Emulator/L63/emulate.jl @@ -8,6 +8,7 @@ using JLD2 using CalibrateEmulateSample.Emulators using CalibrateEmulateSample.EnsembleKalmanProcesses using CalibrateEmulateSample.DataContainers +using EnsembleKalmanProcesses.Localizers function lorenz(du, u, p, t) du[1] = 10.0 * (u[2] - u[1]) @@ -23,11 +24,14 @@ function main() end # rng - rng = MersenneTwister(1232434) + rng = MersenneTwister(1232435) n_repeats = 20 # repeat exp with same data. println("run experiment $n_repeats times") + #for later plots + fontsize = 20 + wideticks = WilkinsonTicks(3, k_min = 3, k_max = 4) # prefer few ticks # Run L63 from 0 -> tmax @@ -70,7 +74,7 @@ function main() # Create training pairs (with noise) from subsampling [burnin,tmax] tburn = 1 # NB works better with no spin-up! burnin = Int(floor(tburn / dt)) - n_train_pts = 600 + n_train_pts = 500 sample_rand = true if sample_rand ind = Int.(shuffle!(rng, Vector(burnin:(tmax / dt - 1)))[1:n_train_pts]) @@ -80,7 +84,9 @@ function main() n_tp = length(ind) input = zeros(3, n_tp) output = zeros(3, n_tp) - Γy = 1e-4 * I(3) + noise_var = 1e-4 + Γy = noise_var * I(3) + @info "with noise size: $(noise_var)" noise = rand(rng, MvNormal(zeros(3), Γy), n_tp) for i in 1:n_tp input[:, i] = sol.u[ind[i]] @@ -90,29 +96,60 @@ function main() # Emulate - cases = ["GP", "RF-scalar", "RF-scalar-diagin", "RF-svd-nonsep", "RF-nosvd-nonsep", "RF-nosvd-sep"] - - case = cases[1] - - nugget = Float64(1e-12) + cases = [ + "GP", + "RF-prior", + "RF-scalar", + "RF-scalar-diagin", + "RF-svd-nonsep", + "RF-nosvd-nonsep", + "RF-nosvd-sep", + "RF-svd-sep", + ] + + case = cases[7] + + nugget = Float64(1e-8) u_test = [] u_hist = [] train_err = [] + opt_diagnostics = [] for rep_idx in 1:n_repeats - + @info "Repeat: $(rep_idx)" rf_optimizer_overrides = Dict( "scheduler" => DataMisfitController(terminate_at = 1e4), - "cov_sample_multiplier" => 0.5, - "n_features_opt" => 400, - "n_iteration" => 30, - "accelerator" => ConstantStepNesterovAccelerator(), + "cov_sample_multiplier" => 1.0, #5.0, + "n_features_opt" => 150, + "n_iteration" => 10, + #"accelerator" => DefaultAccelerator(), + #"localization" => EnsembleKalmanProcesses.Localizers.SECNice(0.01,1.0), # localization / s + "n_ensemble" => 200, + "verbose" => true, + "n_cross_val_sets" => 2, ) + # Build ML tools if case == "GP" gppackage = Emulators.GPJL() pred_type = Emulators.YType() mlt = GaussianProcess(gppackage; prediction_type = pred_type, noise_learn = false) + elseif case == "RF-prior" + #No optimization + rf_optimizer_overrides["n_iteration"] = 0 + rf_optimizer_overrides["cov_sample_multiplier"] = 0.1 + # put in whatever you want to reflect + kernel_structure = SeparableKernel(LowRankFactor(3, nugget), LowRankFactor(3, nugget)) + n_features = 500 + mlt = VectorRandomFeatureInterface( + n_features, + 3, + 3, + rng = rng, + kernel_structure = kernel_structure, + optimizer_options = rf_optimizer_overrides, + ) + elseif case ∈ ["RF-scalar", "RF-scalar-diagin"] n_features = 10 * Int(floor(sqrt(3 * n_tp))) kernel_structure = @@ -126,7 +163,7 @@ function main() optimizer_options = rf_optimizer_overrides, ) elseif case ∈ ["RF-svd-nonsep"] - kernel_structure = NonseparableKernel(LowRankFactor(6, nugget)) + kernel_structure = NonseparableKernel(LowRankFactor(4, nugget)) n_features = 500 mlt = VectorRandomFeatureInterface( @@ -148,8 +185,8 @@ function main() kernel_structure = kernel_structure, optimizer_options = rf_optimizer_overrides, ) - elseif case ∈ ["RF-nosvd-sep"] - kernel_structure = SeparableKernel(LowRankFactor(3, nugget), LowRankFactor(3, nugget)) + elseif case ∈ ["RF-nosvd-sep", "RF-svd-sep"] + kernel_structure = SeparableKernel(LowRankFactor(3, nugget), LowRankFactor(1, nugget)) n_features = 500 mlt = VectorRandomFeatureInterface( n_features, @@ -161,6 +198,19 @@ function main() ) end + #save config for RF + if !(case == "GP") && (rep_idx == 1) + JLD2.save( + joinpath(output_directory, case * "_l63_config.jld2"), + "rf_optimizer_overrides", + rf_optimizer_overrides, + "n_features", + n_features, + "kernel_structure", + kernel_structure, + ) + end + # Emulate if case ∈ ["RF-nosvd-nonsep", "RF-nosvd-sep"] decorrelate = false @@ -170,6 +220,11 @@ function main() emulator = Emulator(mlt, iopairs; obs_noise_cov = Γy, decorrelate = decorrelate) optimize_hyperparameters!(emulator) + # diagnostics + if case ∈ ["RF-svd-nonsep", "RF-nosvd-nonsep", "RF-svd-sep"] + push!(opt_diagnostics, get_optimizer(mlt)[1]) #length-1 vec of vec -> vec + end + # Predict with emulator u_test_tmp = zeros(3, length(xspan_test)) @@ -201,11 +256,12 @@ function main() # plots for the first repeat if rep_idx == 1 + # plotting trace - f = Figure(resolution = (900, 450)) - axx = Axis(f[1, 1], xlabel = "time", ylabel = "x") - axy = Axis(f[2, 1], xlabel = "time", ylabel = "y") - axz = Axis(f[3, 1], xlabel = "time", ylabel = "z") + f = Figure(size = (900, 450), fontsize = fontsize) + axx = Axis(f[1, 1], ylabel = "x", yticks = wideticks) + axy = Axis(f[2, 1], ylabel = "y", yticks = wideticks) + axz = Axis(f[3, 1], xlabel = "time", ylabel = "z", yticks = [10, 30, 50]) xx = 0:dt:tmax_test lines!(axx, xx, u_test_tmp[1, :], color = :blue) @@ -222,7 +278,7 @@ function main() save(joinpath(output_directory, case * "_l63_test.pdf"), f, pt_per_unit = 3) # plot attractor - f3 = Figure() + f3 = Figure(fontsize = fontsize) lines(f3[1, 1], u_test_tmp[1, :], u_test_tmp[3, :], color = :blue) lines(f3[2, 1], solplot[1, :], solplot[3, :], color = :orange) @@ -231,14 +287,18 @@ function main() save(joinpath(output_directory, case * "_l63_attr.pdf"), f3, pt_per_unit = 3) # plotting histograms - f2 = Figure() - hist(f2[1, 1], u_hist_tmp[1, :], bins = 50, normalization = :pdf, color = (:blue, 0.5)) - hist(f2[1, 2], u_hist_tmp[2, :], bins = 50, normalization = :pdf, color = (:blue, 0.5)) - hist(f2[1, 3], u_hist_tmp[3, :], bins = 50, normalization = :pdf, color = (:blue, 0.5)) + f2 = Figure(fontsize = 1.25 * fontsize) + axx = Axis(f2[1, 1], xlabel = "x", ylabel = "pdf", xticks = wideticks, yticklabelsvisible = false) + axy = Axis(f2[1, 2], xlabel = "y", xticks = wideticks, yticklabelsvisible = false) + axz = Axis(f2[1, 3], xlabel = "z", xticks = [10, 30, 50], yticklabelsvisible = false) - hist!(f2[1, 1], solhist[1, :], bins = 50, normalization = :pdf, color = (:orange, 0.5)) - hist!(f2[1, 2], solhist[2, :], bins = 50, normalization = :pdf, color = (:orange, 0.5)) - hist!(f2[1, 3], solhist[3, :], bins = 50, normalization = :pdf, color = (:orange, 0.5)) + hist!(axx, u_hist_tmp[1, :], bins = 50, normalization = :pdf, color = (:blue, 0.5)) + hist!(axy, u_hist_tmp[2, :], bins = 50, normalization = :pdf, color = (:blue, 0.5)) + hist!(axz, u_hist_tmp[3, :], bins = 50, normalization = :pdf, color = (:blue, 0.5)) + + hist!(axx, solhist[1, :], bins = 50, normalization = :pdf, color = (:orange, 0.5)) + hist!(axy, solhist[2, :], bins = 50, normalization = :pdf, color = (:orange, 0.5)) + hist!(axz, solhist[3, :], bins = 50, normalization = :pdf, color = (:orange, 0.5)) # save save(joinpath(output_directory, case * "_l63_pdf.png"), f2, px_per_unit = 3) @@ -252,6 +312,30 @@ function main() JLD2.save(joinpath(output_directory, case * "_l63_histdata.jld2"), "solhist", solhist, "uhist", u_hist) JLD2.save(joinpath(output_directory, case * "_l63_testdata.jld2"), "solplot", solplot, "uplot", u_test) + # plot eki convergence plot + if length(opt_diagnostics) > 0 + err_cols = reduce(hcat, opt_diagnostics) #error for each repeat as columns? + + #save + error_filepath = joinpath(output_directory, "eki_conv_error.jld2") + save(error_filepath, "error", err_cols) + + # print all repeats + f5 = Figure(resolution = (1.618 * 300, 300), markersize = 4) + ax_conv = Axis(f5[1, 1], xlabel = "Iteration", ylabel = "max-normalized error", yscale = log10) + if n_repeats == 1 + lines!(ax_conv, collect(1:size(err_cols, 1))[:], err_cols[:], color = :blue) # If just one repeat + else + for idx in 1:size(err_cols, 1) + err_normalized = (err_cols' ./ err_cols[1, :])' # divide each series by the max, so all errors start at 1 + series!(ax_conv, err_normalized', solid_color = :blue) + end + end + save(joinpath(output_directory, "l63_eki-conv_$(case).png"), f5, px_per_unit = 3) + save(joinpath(output_directory, "l63_eki-conv_$(case).pdf"), f5, px_per_unit = 3) + + end + # compare marginal histograms to truth - rough measure of fit sol_cdf = sort(solhist, dims = 2) @@ -261,10 +345,10 @@ function main() push!(u_cdf, u_cdf_tmp) end - f4 = Figure(resolution = (900, Int(floor(900 / 1.618)))) - axx = Axis(f4[1, 1], xlabel = "", ylabel = "x") - axy = Axis(f4[1, 2], xlabel = "", ylabel = "y") - axz = Axis(f4[1, 3], xlabel = "", ylabel = "z") + f4 = Figure(size = (900, Int(floor(900 / 1.618))), fontsize = 1.5 * fontsize) + axx = Axis(f4[1, 1], xlabel = "x", ylabel = "cdf", xticks = wideticks) + axy = Axis(f4[1, 2], xlabel = "y", xticks = wideticks, yticklabelsvisible = false) + axz = Axis(f4[1, 3], xlabel = "z", xticks = [10, 30, 50], yticklabelsvisible = false) unif_samples = (1:size(sol_cdf, 2)) / size(sol_cdf, 2) @@ -278,8 +362,6 @@ function main() lines!(axy, sol_cdf[2, :], unif_samples, color = (:orange, 1.0), linewidth = 4) lines!(axz, sol_cdf[3, :], unif_samples, color = (:orange, 1.0), linewidth = 4) - - # save save(joinpath(output_directory, case * "_l63_cdfs.png"), f4, px_per_unit = 3) save(joinpath(output_directory, case * "_l63_cdfs.pdf"), f4, pt_per_unit = 3) diff --git a/examples/Emulator/L63/emulate_diff-rank-test.jl b/examples/Emulator/L63/emulate_diff-rank-test.jl new file mode 100644 index 000000000..222147bf2 --- /dev/null +++ b/examples/Emulator/L63/emulate_diff-rank-test.jl @@ -0,0 +1,288 @@ +using OrdinaryDiffEq +using Random, Distributions, LinearAlgebra +ENV["GKSwstype"] = "100" +using CairoMakie, ColorSchemes #for plots +using JLD2 + +# CES +using CalibrateEmulateSample.Emulators +using CalibrateEmulateSample.EnsembleKalmanProcesses +using CalibrateEmulateSample.DataContainers +using EnsembleKalmanProcesses.Localizers + +function lorenz(du, u, p, t) + du[1] = 10.0 * (u[2] - u[1]) + du[2] = u[1] * (28.0 - u[3]) - u[2] + du[3] = u[1] * u[2] - (8 / 3) * u[3] +end + +function main() + + output_directory = joinpath(@__DIR__, "output") + if !isdir(output_directory) + mkdir(output_directory) + end + + # rng + rng = MersenneTwister(1232435) + + n_repeats = 1# 5 # repeat exp with same data. + println("run experiment $n_repeats times") + rank_test = 1:9 # must be 1:k for now + n_iteration = 20 + + #for later plots + fontsize = 20 + wideticks = WilkinsonTicks(3, k_min = 3, k_max = 4) # prefer few ticks + + + # Run L63 from 0 -> tmax + u0 = [1.0; 0.0; 0.0] + tmax = 20 + dt = 0.01 + tspan = (0.0, tmax) + prob = ODEProblem(lorenz, u0, tspan) + sol = solve(prob, Euler(), dt = dt) + + # Run L63 from end for test trajectory data + tmax_test = 100 + tspan_test = (0.0, tmax_test) + u0_test = sol.u[end] + prob_test = ODEProblem(lorenz, u0_test, tspan_test) + sol_test = solve(prob_test, Euler(), dt = dt) + + # Run L63 from end for histogram matching data + tmax_hist = 1 + tspan_hist = (0.0, tmax_hist) + u0_hist = sol_test.u[end] + prob_hist = ODEProblem(lorenz, u0_hist, tspan_hist) + sol_hist = solve(prob_hist, Euler(), dt = dt) + + + # test data for plotting + xspan_test = 0.0:dt:tmax_test + solplot = zeros(3, length(xspan_test)) + for i in 1:length(xspan_test) + solplot[:, i] = sol_test.u[i] #noiseless + end + + # hist data for plotting + xspan_hist = 0.0:dt:tmax_hist + solhist = zeros(3, length(xspan_hist)) + for i in 1:length(xspan_hist) + solhist[:, i] = sol_hist.u[i] #noiseless + end + + # Create training pairs (with noise) from subsampling [burnin,tmax] + tburn = 1 # NB works better with no spin-up! + burnin = Int(floor(tburn / dt)) + n_train_pts = 500 + sample_rand = true + if sample_rand + ind = Int.(shuffle!(rng, Vector(burnin:(tmax / dt - 1)))[1:n_train_pts]) + else + ind = burnin:(n_train_pts + burnin) + end + n_tp = length(ind) + input = zeros(3, n_tp) + output = zeros(3, n_tp) + noise_var = 1e-4 + Γy = noise_var * I(3) + @info "with noise size: $(noise_var)" + noise = rand(rng, MvNormal(zeros(3), Γy), n_tp) + for i in 1:n_tp + input[:, i] = sol.u[ind[i]] + output[:, i] = sol.u[ind[i] + 1] + noise[:, i] + end + iopairs = PairedDataContainer(input, output) + + + # Emulate + cases = [ + "GP", + "RF-prior", + "RF-scalar", + "RF-scalar-diagin", + "RF-svd-nonsep", + "RF-nosvd-nonsep", + "RF-nosvd-sep", + "RF-svd-sep", + ] + + case = cases[1] #5 + + nugget = Float64(1e-8) + u_test = [] + u_hist = [] + opt_diagnostics = zeros(length(rank_test), n_repeats, n_iteration) + train_err = zeros(length(rank_test), n_repeats) + test_err = zeros(length(rank_test), n_repeats) + + ttt = zeros(length(rank_test), n_repeats) + for (rank_id, rank_val) in enumerate(rank_test) #test over different ranks for svd-nonsep + @info "Test rank: $(rank_val)" + for rep_idx in 1:n_repeats + @info "Repeat: $(rep_idx)" + rf_optimizer_overrides = Dict( + "scheduler" => DataMisfitController(terminate_at = 1e4), + "cov_sample_multiplier" => 1.0, #5.0, + "n_features_opt" => 150, + "n_iteration" => n_iteration, + "accelerator" => DefaultAccelerator(), + #"localization" => EnsembleKalmanProcesses.Localizers.SECNice(0.01,1.0), # localization / s + "n_ensemble" => 200, + "verbose" => true, + "n_cross_val_sets" => 2, + ) + + + # Build ML tools + if case == "GP" + gppackage = Emulators.GPJL() + pred_type = Emulators.YType() + mlt = GaussianProcess(gppackage; prediction_type = pred_type, noise_learn = false) + elseif case ∈ ["RF-svd-nonsep"] + kernel_structure = NonseparableKernel(LowRankFactor(rank_val, nugget)) + n_features = 500 + + mlt = VectorRandomFeatureInterface( + n_features, + 3, + 3, + rng = rng, + kernel_structure = kernel_structure, + optimizer_options = rf_optimizer_overrides, + ) + elseif case ∈ ["RF-svd-sep", "RF-nosvd-sep"] + rank_out = Int(ceil(rank_val / 3)) # 1 1 1 2 2 2 + rank_in = rank_val - 3 * (rank_out - 1) # 1 2 3 1 2 3 + @info "Test rank in: $(rank_in) out: $(rank_out)" + kernel_structure = SeparableKernel(LowRankFactor(rank_in, nugget), LowRankFactor(rank_out, nugget)) + n_features = 500 + mlt = VectorRandomFeatureInterface( + n_features, + 3, + 3, + rng = rng, + kernel_structure = kernel_structure, + optimizer_options = rf_optimizer_overrides, + ) + + end + + #save config for RF + if !(case == "GP") && (rep_idx == 1) + JLD2.save( + joinpath(output_directory, case * "_l63_config-diff-rank-test.jld2"), + "rf_optimizer_overrides", + rf_optimizer_overrides, + "n_features", + n_features, + "kernel_structure", + kernel_structure, + ) + end + + # Emulate + if case ∈ ["RF-nosvd-nonsep", "RF-nosvd-sep"] + decorrelate = false + else + decorrelate = true + end + ttt[rank_id, rep_idx] = @elapsed begin + emulator = Emulator(mlt, iopairs; obs_noise_cov = Γy, decorrelate = decorrelate) + optimize_hyperparameters!(emulator) + end + + # diagnostics + if case != "GP" + opt_diagnostics[rank_id, rep_idx, :] = get_optimizer(mlt)[1] #length-1 vec of vec -> vec + end + + # Predict with emulator + u_test_tmp = zeros(3, length(xspan_test)) + u_test_tmp[:, 1] = sol_test.u[1] + + # predict sequentially i -> i+1 + for i in 1:(length(xspan_test) - 1) + rf_mean, _ = predict(emulator, u_test_tmp[:, i:i], transform_to_real = true) # 3x1 matrix + u_test_tmp[:, i + 1] = rf_mean + end + + # training error i -> o + train_err_tmp = [0.0] + for i in 1:size(input, 2) + train_mean, _ = predict(emulator, input[:, i:i], transform_to_real = true) # 3x1 + train_err_tmp[1] += norm(train_mean - output[:, i]) + end + train_err[rank_id, rep_idx] = 1 / size(input, 2) * train_err_tmp[1] + + # test error i -> o + test_err_tmp = [0.0] + for i in 1:(length(xspan_test) - 1) + test_mean, _ = predict(emulator, reshape(sol_test.u[i], :, 1), transform_to_real = true) # 3x1 matrix + test_err_tmp[1] += norm(test_mean[:] - sol_test.u[i + 1]) + end + test_err[rank_id, rep_idx] = 1 / (length(xspan_test) - 1) * test_err_tmp[1] + println("normalized L^2 error on training data:", 1 / size(input, 2) * train_err_tmp[1]) + println("normalized L^2 error on test data:", 1 / (length(xspan_test) - 1) * test_err_tmp[1]) + + u_hist_tmp = zeros(3, length(xspan_hist)) + u_hist_tmp[:, 1] = sol_hist.u[1] # start at end of previous sim + + for i in 1:(length(xspan_hist) - 1) + rf_mean, _ = predict(emulator, u_hist_tmp[:, i:i], transform_to_real = true) # 3x1 matrix + u_hist_tmp[:, i + 1] = rf_mean + end + + push!(u_test, u_test_tmp) + push!(u_hist, u_hist_tmp) + + JLD2.save( + joinpath(output_directory, case * "_l63_rank_test_results.jld2"), + "rank_test", + collect(rank_test), + "timings", + ttt, + "train_err", + train_err, + "test_err", + test_err, + ) + + end + + end + + # save data + JLD2.save(joinpath(output_directory, case * "_l63_trainerr.jld2"), "train_err", train_err) + JLD2.save(joinpath(output_directory, case * "_l63_histdata.jld2"), "solhist", solhist, "uhist", u_hist) + JLD2.save(joinpath(output_directory, case * "_l63_testdata.jld2"), "solplot", solplot, "uplot", u_test) + + # plot eki convergence plot + if length(opt_diagnostics) > 0 + + err_cols = mean(opt_diagnostics, dims = 2)[:, 1, :]' # average error over repeats for each rank as columns + err_normalized = (err_cols' ./ err_cols[1, :])' # divide each series by the max, so all errors start at 1 + plot_x = collect(1:size(err_cols, 1)) + #save + error_filepath = joinpath(output_directory, "eki_diff-rank-test_conv_error.jld2") + save(error_filepath, "error", err_cols, "trajectory_error", train_err) + + f5 = Figure(resolution = (1.618 * 300, 300), markersize = 4) + ax_conv = Axis(f5[1, 1], xlabel = "Iteration", ylabel = "max-normalized error", yscale = log10) + for (rank_id, rank_val) in enumerate(rank_test) + lines!(ax_conv, plot_x, err_normalized[:, rank_id], label = "rank $(rank_val)") + end + axislegend(ax_conv) + save(joinpath(output_directory, "l63_diff-rank-test_eki-conv_$(case).png"), f5, px_per_unit = 3) + save(joinpath(output_directory, "l63_diff-rank-test_eki-conv_$(case).pdf"), f5, px_per_unit = 3) + + end + + + + +end + +main() diff --git a/examples/Emulator/L63/plot_results-diff-rank-test.jl b/examples/Emulator/L63/plot_results-diff-rank-test.jl new file mode 100644 index 000000000..dfe7d0f6d --- /dev/null +++ b/examples/Emulator/L63/plot_results-diff-rank-test.jl @@ -0,0 +1,107 @@ +using Random, Distributions, LinearAlgebra, LaTeXStrings +ENV["GKSwstype"] = "100" +using CairoMakie, ColorSchemes #for plots +using JLD2 + +function main() + # filepaths + output_directory = joinpath(@__DIR__, "output") + # case = "RF-svd-nonsep" + case = "RF-svd-sep" + rank_test = 1:9 # must be 1:k for now + @info "plotting case $case for rank test $(rank_test)" + + #for later plots + fontsize = 18 + wideticks = WilkinsonTicks(3, k_min = 3, k_max = 4) # prefer few ticks + + error_filepath = joinpath(output_directory, "eki_diff-rank-test_conv_error.jld2") + error_data = JLD2.load(error_filepath) + err_cols = error_data["error"] # exps are columns + + rank_test_filepath = joinpath(output_directory, case * "_l63_rank_test_results.jld2") + rank_test_data = JLD2.load(rank_test_filepath) + train_err = rank_test_data["train_err"] + test_err = rank_test_data["test_err"] + + # Plot convergences + err_normalized = (err_cols' ./ err_cols[1, :])' # divide each series by the max, so all errors start at 1 + n_iterations = size(err_cols, 1) + plot_x = collect(1:n_iterations) + + f5 = Figure(resolution = (1.618 * 300, 300), markersize = 4, fontsize = fontsize) + ax_conv = Axis( + f5[1, 1], + xlabel = "Iteration", + ylabel = "max-normalized error", + yscale = log10, + xticks = 2:2:n_iterations, + yticks = ([10^-4, 10^-2, 10^0, 10^2], [L"10^{-4}", L"10^{-2}", L"10^0", L"10^2"]), + ) + plot_to = 10 + for (rank_id, rank_val) in enumerate(rank_test) + lines!( + ax_conv, + plot_x[1:plot_to], + err_normalized[1:plot_to, rank_id], + label = "rank $(rank_val)", + color = (:blue, 0.5 * rank_val / 10), + ) + end + + #axislegend(ax_conv) + save(joinpath(output_directory, "l63_diff-rank-test_eki-conv_$(case).png"), f5, px_per_unit = 3) + save(joinpath(output_directory, "l63_diff-rank-test_eki-conv_$(case).pdf"), f5, px_per_unit = 3) + + # if including GP - either load or just put in average here: + gp_train_test_err = [0.0154, 0.00292] + + f6 = Figure(resolution = (1.618 * 300, 300), markersize = 4, fontsize = fontsize) + if case == "RF-svd-nonsep" + ax_error = Axis( + f6[1, 1], + xlabel = "rank", + xticks = collect(rank_test), + ylabel = "L²-error in trajectory", + yscale = log10, + ) + mean_train_err = mean(train_err[1:length(rank_test), :], dims = 2) + mean_test_err = mean(test_err[1:length(rank_test), :], dims = 2) + lines!(ax_error, rank_test, mean_train_err[:], label = "RF-train", color = :blue, linestyle = :dash) + lines!(ax_error, rank_test, mean_test_err[:], label = "RF-test", color = :blue) + hlines!(ax_error, [gp_train_test_err[1]], label = "GP-train", color = :orange, linestyle = :dash) + hlines!(ax_error, [gp_train_test_err[2]], label = "GP-test", color = :orange, linestyle = :solid) + + axislegend(ax_error) + elseif case == "RF-svd-sep" + rank_labels = [] + for rank_val in collect(rank_test) + rank_out = Int(ceil(rank_val / 3)) # 1 1 1 2 2 2 + rank_in = rank_val - 3 * (rank_out - 1) # 1 2 3 1 2 3 + push!(rank_labels, (rank_in, rank_out)) + end + idx = [1, 4, 7, 2, 5, 8, 3, 6, 9] + + ax_error = Axis( + f6[1, 1], + xlabel = "rank (in,out)", + xticks = (rank_test, ["$(rank_labels[i])" for i in idx]), + ylabel = "L²-error in trajectory", + yscale = log10, + ) + mean_train_err = mean(train_err[idx, :], dims = 2) + mean_test_err = mean(test_err[idx, :], dims = 2) + lines!(ax_error, rank_test, mean_train_err[:], label = "RF-train", color = :blue, linestyle = :dash) + lines!(ax_error, rank_test, mean_test_err[:], label = "RF-test", color = :blue) + hlines!(ax_error, [gp_train_test_err[1]], label = "GP-train", color = :orange, linestyle = :dash) + hlines!(ax_error, [gp_train_test_err[2]], label = "GP-test", color = :orange, linestyle = :solid) + axislegend(ax_error) + else + throw(ArgumentError("case $(case) not found, check for typos")) + end + + save(joinpath(output_directory, "l63_diff-rank-test_train_err_$(case).png"), f6, px_per_unit = 3) + save(joinpath(output_directory, "l63_diff-rank-test_train_err_$(case).pdf"), f6, px_per_unit = 3) +end + +main() diff --git a/examples/Emulator/L63/plot_results.jl b/examples/Emulator/L63/plot_results.jl new file mode 100644 index 000000000..923eccf6a --- /dev/null +++ b/examples/Emulator/L63/plot_results.jl @@ -0,0 +1,124 @@ +using Random, Distributions, LinearAlgebra +ENV["GKSwstype"] = "100" +using CairoMakie, ColorSchemes #for plots +using JLD2 + +function main() + # filepaths + output_directory = joinpath(@__DIR__, "output") + cases = ["GP", "RF-prior", "RF-scalar", "RF-scalar-diagin", "RF-svd-nonsep", "RF-nosvd-nonsep", "RF-nosvd-sep"] #for paper, 1 2 & 5. + case = cases[1] + @info "plotting case $case" + #for later plots + fontsize = 20 + wideticks = WilkinsonTicks(3, k_min = 3, k_max = 4) # prefer few ticks + + # Load from saved files + #= + config_file = JLD2.load(joinpath(output_directory, case * "_l63_config.jld2")) + rf_optimizer_overrides = config_file["rf_optimizer_overrides"] + n_features = config_file["n_features"] + kernel_structure = config_file["kernel_structure"] + =# + + histogram_data = JLD2.load(joinpath(output_directory, case * "_l63_histdata.jld2")) + solhist = histogram_data["solhist"] + uhist = histogram_data["uhist"] + trajectory_data = JLD2.load(joinpath(output_directory, case * "_l63_testdata.jld2")) + solplot = trajectory_data["solplot"] + uplot = trajectory_data["uplot"] + + # plotting trajectories for just first repeat + uplot_tmp = uplot[1] + uhist_tmp = uhist[1] + + # copied from emulate.jl + dt = 0.01 + tmax_test = 20 #100 + xx = 0.0:dt:tmax_test + + f = Figure(size = (900, 450), fontsize = fontsize) + axx = Axis(f[1, 1], ylabel = "x", yticks = wideticks) + axy = Axis(f[2, 1], ylabel = "y", yticks = wideticks) + axz = Axis(f[3, 1], xlabel = "time", ylabel = "z", yticks = [10, 30, 50]) + + tt = 1:length(xx) + lines!(axx, xx, uplot_tmp[1, tt], color = :blue) + lines!(axy, xx, uplot_tmp[2, tt], color = :blue) + lines!(axz, xx, uplot_tmp[3, tt], color = :blue) + + lines!(axx, xx, solplot[1, tt], color = :orange) + lines!(axy, xx, solplot[2, tt], color = :orange) + lines!(axz, xx, solplot[3, tt], color = :orange) + + current_figure() + # save + save(joinpath(output_directory, case * "_l63_test.png"), f, px_per_unit = 3) + save(joinpath(output_directory, case * "_l63_test.pdf"), f, pt_per_unit = 3) + @info "plotted trajectory in \n $(case)_l63_test.png, and $(case)_l63_test.pdf" + + # plot attractor + f3 = Figure(fontsize = fontsize) + lines(f3[1, 1], uplot_tmp[1, :], uplot_tmp[3, :], color = :blue) + lines(f3[2, 1], solplot[1, :], solplot[3, :], color = :orange) + + # save + save(joinpath(output_directory, case * "_l63_attr.png"), f3, px_per_unit = 3) + save(joinpath(output_directory, case * "_l63_attr.pdf"), f3, pt_per_unit = 3) + @info "plotted attractor in \n $(case)_l63_attr.png, and $(case)_l63_attr.pdf" + + # plotting histograms + f2 = Figure(fontsize = 1.25 * fontsize) + axx = Axis(f2[1, 1], xlabel = "x", ylabel = "pdf", xticks = wideticks, yticklabelsvisible = false) + axy = Axis(f2[1, 2], xlabel = "y", xticks = wideticks, yticklabelsvisible = false) + axz = Axis(f2[1, 3], xlabel = "z", xticks = [10, 30, 50], yticklabelsvisible = false) + + hist!(axx, uhist_tmp[1, :], bins = 50, normalization = :pdf, color = (:blue, 0.5)) + hist!(axy, uhist_tmp[2, :], bins = 50, normalization = :pdf, color = (:blue, 0.5)) + hist!(axz, uhist_tmp[3, :], bins = 50, normalization = :pdf, color = (:blue, 0.5)) + + hist!(axx, solhist[1, :], bins = 50, normalization = :pdf, color = (:orange, 0.5)) + hist!(axy, solhist[2, :], bins = 50, normalization = :pdf, color = (:orange, 0.5)) + hist!(axz, solhist[3, :], bins = 50, normalization = :pdf, color = (:orange, 0.5)) + + # save + save(joinpath(output_directory, case * "_l63_pdf.png"), f2, px_per_unit = 3) + save(joinpath(output_directory, case * "_l63_pdf.pdf"), f2, pt_per_unit = 3) + @info "plotted histogram in \n $(case)_l63_pdf.png, and $(case)_l63_pdf.pdf" + + # compare marginal histograms to truth - rough measure of fit + solcdf = sort(solhist, dims = 2) + + if length(uhist) > 1 + ucdf = [] + for u in uhist + ucdf_tmp = sort(u, dims = 2) + push!(ucdf, ucdf_tmp) + end + + f4 = Figure(size = (900, Int(floor(900 / 1.618))), fontsize = 1.5 * fontsize) + axx = Axis(f4[1, 1], xlabel = "x", ylabel = "cdf", xticks = wideticks) + axy = Axis(f4[1, 2], xlabel = "y", xticks = wideticks, yticklabelsvisible = false) + axz = Axis(f4[1, 3], xlabel = "z", xticks = [10, 30, 50], yticklabelsvisible = false) + + + unif_samples = (1:size(solcdf, 2)) / size(solcdf, 2) + + for u in ucdf + lines!(axx, u[1, :], unif_samples, color = (:blue, 0.2), linewidth = 4) + lines!(axy, u[2, :], unif_samples, color = (:blue, 0.2), linewidth = 4) + lines!(axz, u[3, :], unif_samples, color = (:blue, 0.2), linewidth = 4) + end + + lines!(axx, solcdf[1, :], unif_samples, color = (:orange, 1.0), linewidth = 4) + lines!(axy, solcdf[2, :], unif_samples, color = (:orange, 1.0), linewidth = 4) + lines!(axz, solcdf[3, :], unif_samples, color = (:orange, 1.0), linewidth = 4) + + # save + save(joinpath(output_directory, case * "_l63_cdfs.png"), f4, px_per_unit = 3) + save(joinpath(output_directory, case * "_l63_cdfs.pdf"), f4, pt_per_unit = 3) + @info "plotted cdfs for all samples in \n $(case)_l63_cdfs.png, and $(case)_l63_cdfs.pdf" + end +end + +main() diff --git a/examples/Emulator/Regression_2d_2d/compare_regresssion.jl b/examples/Emulator/Regression_2d_2d/compare_regression.jl similarity index 96% rename from examples/Emulator/Regression_2d_2d/compare_regresssion.jl rename to examples/Emulator/Regression_2d_2d/compare_regression.jl index be75e2955..df9282d32 100644 --- a/examples/Emulator/Regression_2d_2d/compare_regresssion.jl +++ b/examples/Emulator/Regression_2d_2d/compare_regression.jl @@ -52,20 +52,22 @@ function main() case_mask = [1, 3:length(cases)...] # (KEEP set to 1:length(cases) when pushing for buildkite) #problem - n = 150 # number of training points + n = 200 # number of training points p = 2 # input dim d = 2 # output dim X = 2.0 * π * rand(p, n) # G(x1, x2) - g1x = sin.(X[1, :]) .+ cos.(X[2, :]) - g2x = sin.(X[1, :]) .- cos.(X[2, :]) + g1(x) = sin.(x[1, :]) .+ 2 * cos.(2 * x[2, :]) + g2(x) = 3 * sin.(3 * x[1, :]) .- 4 * cos.(4 * x[2, :]) + g1x = g1(X) + g2x = g2(X) gx = zeros(2, n) gx[1, :] = g1x gx[2, :] = g2x # Add noise η μ = zeros(d) - Σ = 0.1 * [[0.8, 0.1] [0.1, 0.5]] # d x d + Σ = 0.05 * [[0.8, 0.1] [0.1, 0.5]] # d x d noise_samples = rand(MvNormal(μ, Σ), n) # y = G(x) + η Y = gx .+ noise_samples @@ -144,8 +146,9 @@ function main() pred_type = YType() # common random feature setup - n_features = 150 - optimizer_options = Dict("n_iteration" => 10, "scheduler" => DataMisfitController(on_terminate = "continue")) + n_features = 300 + optimizer_options = + Dict("n_iteration" => 20, "n_features_opt" => 100, "n_ensemble" => 80, "cov_sample_multiplier" => 1.0) nugget = 1e-12 @@ -163,7 +166,7 @@ function main() srfi = ScalarRandomFeatureInterface( n_features, p, - kernel_structure = SeparableKernel(LowRankFactor(2, 1e-8), OneDimFactor()), + kernel_structure = SeparableKernel(LowRankFactor(2, nugget), OneDimFactor()), optimizer_options = optimizer_options, ) emulator = Emulator(srfi, iopairs, obs_noise_cov = Σ, normalize_inputs = true) @@ -284,7 +287,7 @@ function main() end # Plot the true components of G(x1, x2) - g1_true = sin.(inputs[1, :]) .+ cos.(inputs[2, :]) + g1_true = g1(inputs) g1_true_grid = reshape(g1_true, n_pts, n_pts) if plot_flag p7 = plot( @@ -302,7 +305,7 @@ function main() savefig(joinpath(output_directory, case * "_true_g1.png")) end - g2_true = sin.(inputs[1, :]) .- cos.(inputs[2, :]) + g2_true = g2(inputs) g2_true_grid = reshape(g2_true, n_pts, n_pts) if plot_flag p8 = plot( diff --git a/examples/Lorenz/Project.toml b/examples/Lorenz/Project.toml index 8e6fb13e2..7bb353483 100644 --- a/examples/Lorenz/Project.toml +++ b/examples/Lorenz/Project.toml @@ -16,5 +16,5 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd" [compat] -FFTW = "= 1.3.0" +FFTW = "1.3" julia = "~1.6" diff --git a/examples/Lorenz/calibrate.jl b/examples/Lorenz/calibrate.jl index 17e1b1242..c546d0491 100644 --- a/examples/Lorenz/calibrate.jl +++ b/examples/Lorenz/calibrate.jl @@ -105,11 +105,21 @@ function main() priors = PD.constrained_gaussian("F", F_true, 1.0, 0, Inf) end + # Plot the prior distribution + p = plot(priors, title = "prior") + plot!(p.subplots[1], [F_true], seriestype = "vline", w = 1.5, c = :steelblue, ls = :dash, xlabel = "F") # vline on top histogram + plot!(p.subplots[2], [A_true], seriestype = "vline", w = 1.5, c = :steelblue, ls = :dash, xlabel = "A") # vline on top histogram + figpath = joinpath(figure_save_directory, "prior" * ".pdf") + savefig(figpath) + figpath = joinpath(figure_save_directory, "prior" * ".png") + savefig(figpath) + + ### ### Define the data from which we want to learn the parameters ### - data_names = ["y0", "y1"] + data_names = ["y0_y1"] ### @@ -244,8 +254,7 @@ function main() # Construct observation object - truth = Observations.Observation(yt, Γy, data_names) - truth_sample = yt[:, end] + truth = EKP.Observation(Dict("samples" => vec(mean(yt, dims = 2)), "covariances" => Γy, "names" => data_names)) ### ### Calibrate: Ensemble Kalman Inversion ### @@ -261,8 +270,7 @@ function main() ekiobj = EKP.EnsembleKalmanProcess( initial_params, - truth_sample, - truth.obs_noise_cov, + truth, EKP.Inversion(), scheduler = EKP.DataMisfitController(), verbose = true, @@ -306,9 +314,9 @@ function main() "eki", ekiobj, "truth_sample", - truth_sample, + EKP.get_obs(truth), "truth_sample_mean", - truth.mean, + vec(mean(yt, dims = 2)), "truth_input_constrained", params_true, #constrained here, as these are in a physically constrained space (unlike the u inputs), ) diff --git a/examples/Lorenz/emulate_sample.jl b/examples/Lorenz/emulate_sample.jl index a42cd5c5a..c9fb04feb 100644 --- a/examples/Lorenz/emulate_sample.jl +++ b/examples/Lorenz/emulate_sample.jl @@ -17,7 +17,6 @@ using CalibrateEmulateSample.Utilities using CalibrateEmulateSample.EnsembleKalmanProcesses using CalibrateEmulateSample.ParameterDistributions using CalibrateEmulateSample.DataContainers -using CalibrateEmulateSample.Observations function get_standardizing_factors(data::Array{FT, 2}) where {FT} # Input: data size: N_data x N_ensembles @@ -48,7 +47,7 @@ function main() ] #### CHOOSE YOUR CASE: - mask = 2:7 # e.g. 1:8 or [7] + mask = 1:8 # 1:8 # e.g. 1:8 or [7] for (case) in cases[mask] @@ -95,7 +94,7 @@ function main() truth_sample = load(data_save_file)["truth_sample"] truth_params_constrained = load(data_save_file)["truth_input_constrained"] #true parameters in constrained space truth_params = transform_constrained_to_unconstrained(priors, truth_params_constrained) - Γy = ekiobj.obs_noise_cov + Γy = get_obs_noise_cov(ekiobj) n_params = length(truth_params) # "input dim" @@ -105,7 +104,7 @@ function main() ### # Emulate-sample settings - # choice of machine-learning tool + # choice of machine-learning tool in the emulation stage nugget = 0.001 if case == "GP" gppackage = Emulators.GPJL() diff --git a/examples/Sinusoid/Project.toml b/examples/Sinusoid/Project.toml new file mode 100644 index 000000000..cde045f95 --- /dev/null +++ b/examples/Sinusoid/Project.toml @@ -0,0 +1,6 @@ +[deps] +CalibrateEmulateSample = "95e48a1f-0bec-4818-9538-3db4340308e3" +Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" +EnsembleKalmanProcesses = "aa8a2aa5-91d8-4396-bcef-d4f2ec43552d" +JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" +Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" diff --git a/examples/Sinusoid/calibrate.jl b/examples/Sinusoid/calibrate.jl new file mode 100644 index 000000000..ac610e533 --- /dev/null +++ b/examples/Sinusoid/calibrate.jl @@ -0,0 +1,171 @@ +# # [Calibrate](@id sinusoid-example) + +################################################################################################ +################################################################################################ +#### Sinusoid example #### +#### This is a simple example of how to use the CalibrateEmulateSample.jl package. #### +#### We use a simple model for a sinusoid with 2 input parameters and 2 observables. #### +#### We follow the steps (1) Calibrate, using Ensemble Kalman Inversion, (2) Emulate, using #### +#### both Gaussian process and random features and (3) Sample, using Markov chain Monte #### +#### Carlo. Each of these steps are run in separate scripts. For this example to work, we #### +#### should run the scripts in the following order: #### +#### (1) calibrate.jl #### +#### (2) emulate.jl #### +#### (3) sample.jl #### +#### #### +#### #### +#### Sinusoid Model #### +#### This example closely follows the sinusoidal model in EnsembleKalmanProcess.jl #### +#### https://clima.github.io/EnsembleKalmanProcesses.jl/stable/literated/sinusoid_example/ #### +#### In this example we have a model that produces a sinusoid #### +#### ``f(A, v) = A \sin(\phi + t) + v, \forall t \in [0,2\pi]``, with a random #### +#### phase ``\phi``. Given an initial guess of the parameters as #### +#### ``A^* \sim \mathcal{N}(2,1)`` and ``v^* \sim \mathcal{N}(0,25)``, our goal is #### +#### to estimate the parameters from a noisy observation of the maximum, minimum, #### +#### and mean of the true model output. #### +#### #### +#### #### +################################################################################################ +################################################################################################ + + +# First, we load the packages we need: +using LinearAlgebra, Random + +using Distributions, Plots +using JLD2 + +# CES +using CalibrateEmulateSample + +const EKP = CalibrateEmulateSample.EnsembleKalmanProcesses +const PD = EKP.ParameterDistributions + + +## Setting up the model and data for our inverse problem +include("sinusoid_setup.jl") # This file defines the model G(θ) + +rng_seed = 41 +rng = Random.MersenneTwister(rng_seed) + +# Generate observations (see generate_obs in sinusoid_set.jl for more details) +amplitude_true = 3.0 +vert_shift_true = 7.0 +theta_true = [amplitude_true, vert_shift_true] +Γ = 0.2 * I # Observational covariance matrix (assumed diagonal) +y_obs = generate_obs(amplitude_true, vert_shift_true, Γ; rng = rng) + +# Directories +# Save the output for the emulate, sample steps. +# Output figure save directory +example_directory = @__DIR__ +data_save_directory = joinpath(example_directory, "output") + + +## Solving the inverse problem + +# We now define prior distributions on the two parameters. For the amplitude, +# we define a prior with mean 2 and standard deviation 1. It is +# additionally constrained to be nonnegative. For the vertical shift we define +# a Gaussian prior with mean 0 and standard deviation 5. +prior_u1 = PD.constrained_gaussian("amplitude", 2, 1, 0, Inf) +prior_u2 = PD.constrained_gaussian("vert_shift", 0, 5, -Inf, Inf) +prior = PD.combine_distributions([prior_u1, prior_u2]) +# Plot priors +p = plot(prior, fill = :lightgray, rng = rng) +savefig(p, joinpath(data_save_directory, "sinusoid_prior.png")) + +# We now generate the initial ensemble and set up the ensemble Kalman inversion. +N_ensemble = 10 +N_iterations = 5 + +initial_ensemble = EKP.construct_initial_ensemble(rng, prior, N_ensemble) + +ensemble_kalman_process = EKP.EnsembleKalmanProcess(initial_ensemble, y_obs, Γ, EKP.Inversion(); rng = rng) + + +# We are now ready to carry out the inversion. At each iteration, we get the +# ensemble from the last iteration, apply ``G(\theta)`` to each ensemble member, +# and apply the Kalman update to the ensemble. +for i in 1:N_iterations + params_i = EKP.get_ϕ_final(prior, ensemble_kalman_process) + + G_ens = hcat([G(params_i[:, i]; rng = rng) for i in 1:N_ensemble]...) + + EKP.update_ensemble!(ensemble_kalman_process, G_ens) +end + + +# Finally, we get the ensemble after the last iteration. This provides our estimate of the parameters. +final_ensemble = EKP.get_ϕ_final(prior, ensemble_kalman_process) + +# Check that the ensemble mean is close to the theta_true +println("Ensemble mean: ", mean(final_ensemble, dims = 2)) # [3.08, 6.37] +println("True parameters: ", theta_true) #[3.0, 7.0] + +# Plot true model parameters and all ensemble members at each iteration +p = plot( + [theta_true[1]], + [theta_true[2]], + c = :red, + seriestype = :scatter, + marker = :star, + markersize = :10, + label = "Truth", + legend = :bottomright, + xlims = (0, 6), + ylims = (-6, 10), + guidefontsize = 14, + tickfontsize = 12, + legendfontsize = 12, +) +vline!([theta_true[1]], color = :red, style = :dash, label = :false) +hline!([theta_true[2]], color = :red, style = :dash, label = :false) + + +iteration_colormap = colormap("Blues", N_iterations) +for j in 1:N_iterations + ensemble_j = EKP.get_ϕ(prior, ensemble_kalman_process, j) + plot!( + ensemble_j[1, :], + ensemble_j[2, :], + c = iteration_colormap[j], + seriestype = :scatter, + label = "Ensemble $(j)", + alpha = 0.8, + ) +end + +xlabel!("Amplitude") +ylabel!("Vertical shift") +savefig(p, joinpath(data_save_directory, "sinusoid_eki_pairs.png")) + +# The ensembles are initially spread out but move closer to the true parameter values +# with each iteration, indicating the EKI algorithm is converging towards the minimum. +# In the next step of CES, we will build an emulator using this dataset. + + +# Save the data +ϕ_stored = EKP.get_ϕ(prior, ensemble_kalman_process) +u_stored = EKP.get_u(ensemble_kalman_process, return_array = false) +g_stored = EKP.get_g(ensemble_kalman_process, return_array = false) + +save( + joinpath(data_save_directory, "calibrate_results.jld2"), + "inputs", + u_stored, + "outputs", + g_stored, + "constrained_inputs", + ϕ_stored, + "prior", + prior, + "eki", + ensemble_kalman_process, + "N_ensemble", + N_ensemble, + "N_iterations", + N_iterations, + "rng", + rng, +) diff --git a/examples/Sinusoid/emulate.jl b/examples/Sinusoid/emulate.jl new file mode 100644 index 000000000..dcf0027c0 --- /dev/null +++ b/examples/Sinusoid/emulate.jl @@ -0,0 +1,412 @@ +# # [Emulate](@id sinusoid-example) + +# In this example we have a model that produces a sinusoid +# ``f(A, v) = A \sin(\phi + t) + v, \forall t \in [0,2\pi]``, with a random +# phase ``\phi``. We want to quantify uncertainties on parameters ``A`` and ``v``, +# given noisy observations of the model output. +# Previously, in the calibration step, we started with an initial guess of these +# parameters and used Ensemble Kalman Inversion to iteratively update our estimates. +# Quantifying uncertainties around these estimates requires many model evaluations +# and can quickly become expensive. +# On this page, we will see how we can build an emulator to help us with +# this task. + +# First, we load the packages we need: +using LinearAlgebra, Random + +using Distributions, Plots, Plots.PlotMeasures +using JLD2 + +using CalibrateEmulateSample +using CalibrateEmulateSample.Emulators +using CalibrateEmulateSample.EnsembleKalmanProcesses + +const CES = CalibrateEmulateSample +const EKP = CalibrateEmulateSample.EnsembleKalmanProcesses + +include("sinusoid_setup.jl") # This file defines the model G(θ) + + +# Load the ensembles generated in the calibration step. +example_directory = @__DIR__ +data_save_directory = joinpath(example_directory, "output") +# Get observations, true parameters and observation noise +obs_file = joinpath(data_save_directory, "observations.jld2") +y_obs = load(obs_file)["y_obs"] +theta_true = load(obs_file)["theta_true"] +Γ = load(obs_file)["Γ"] +# Get EKI and prior +calibrate_file = joinpath(data_save_directory, "calibrate_results.jld2") +ensemble_kalman_process = load(calibrate_file)["eki"] +prior = load(calibrate_file)["prior"] +N_ensemble = load(calibrate_file)["N_ensemble"] +N_iterations = load(calibrate_file)["N_iterations"] +# Get random number generator to start where we left off +rng = load(calibrate_file)["rng"] + + +# We ran Ensemble Kalman Inversion with an ensemble size of 10 for 5 +# iterations. This generated a total of 50 input output pairs from our model. +# We will use these samples to train an emulator. The EKI samples make a suitable +# dataset for training an emulator because in the first iteration, the ensemble parameters +# are spread out according to the prior, meaning they cover the full support of the +# parameter space. This is important for building an emulator that can be evaluated anywhere +# in this space. In later iterations, the ensemble parameters are focused around the truth. +# This means the emulator that will be more accurate around this region. + + +## Build Emulators +# We will build two types of emulator here for comparison: Gaussian processes and Random +# Features. First, set up the data in the correct format. CalibrateEmulateSample.jl uses +# a paired data container that matches the inputs (in the unconstrained space) to the outputs: +input_output_pairs = CES.Utilities.get_training_points(ensemble_kalman_process, N_iterations) +unconstrained_inputs = CES.Utilities.get_inputs(input_output_pairs) +inputs = Emulators.transform_unconstrained_to_constrained(prior, unconstrained_inputs) +outputs = CES.Utilities.get_outputs(input_output_pairs) + +# Gaussian process +# We will set up a basic Gaussian process emulator using either ScikitLearn.jl or GaussianProcesses.jl. +# See the Gaussian process page for more information and details about kernel choices. +gppackage = Emulators.GPJL() +gauss_proc = Emulators.GaussianProcess(gppackage, noise_learn = false) + +# Build emulator with data +emulator_gp = Emulator(gauss_proc, input_output_pairs, normalize_inputs = true, obs_noise_cov = Γ) +optimize_hyperparameters!(emulator_gp) + +# We have built the Gaussian process emulator and we can now use it for prediction. We will validate the emulator +# performance soon, but first, build a random features emulator for comparison. + +## Random Features + +# An alternative to Gaussian process for the emulator +# We have two input dimensions and two output dimensions. +input_dim = 2 +output_dim = 2 + +# Select number of features +n_features = 60 +nugget = 1e-9 + +# Create random features +# Here we use a vector random features set up because we find it performs similarly well to +# the GP emulator, but there are many options that could be tested +kernel_structure = NonseparableKernel(LowRankFactor(2, nugget)) +optimizer_options = Dict( + "n_ensemble" => 50, + "cov_sample_multiplier" => 10, + "scheduler" => EKP.DataMisfitController(on_terminate = "continue"), + "n_iteration" => 50, + "rng" => rng, + "verbose" => true, +) +random_features = VectorRandomFeatureInterface( + n_features, + input_dim, + output_dim, + rng = rng, + kernel_structure = kernel_structure, + optimizer_options = optimizer_options, +) +emulator_random_features = + Emulator(random_features, input_output_pairs, normalize_inputs = true, obs_noise_cov = Γ, decorrelate = false) +optimize_hyperparameters!(emulator_random_features) + + +## Emulator Validation +# Now we will validate both GP and RF emulators and compare them against the ground truth, G(θ). +# Note this is only possible in our example because our true model, G(θ), is cheap to evaluate. +# In more complex systems, we would have limited data to validate emulator performance with. +# Set up mesh to cover parameter space +N_grid = 50 +amp_range = range(0.6, 4, length = N_grid) +vshift_range = range(-6, 10, length = N_grid) + +function meshgrid(vx::AbstractVector{T}, vy::AbstractVector{T}) where {T} + m, n = length(vy), length(vx) + gx = reshape(repeat(vx, inner = m, outer = 1), m, n) + gy = reshape(repeat(vy, inner = 1, outer = n), m, n) + + return gx, gy +end + +X1, X2 = meshgrid(amp_range, vshift_range) +# Input for predict has to be of size input_dim x N_samples +input_grid = permutedims(hcat(X1[:], X2[:]), (2, 1)) +# First, predict with true model +g_true = hcat([G(input_grid[:, i]; rng = rng) for i in 1:size(input_grid)[2]]...) +g_true_grid = reshape(g_true, (output_dim, N_grid, N_grid)) + +# Next, predict with emulators. We first need to transform to the unconstrained space. +input_grid_unconstrained = Emulators.transform_constrained_to_unconstrained(prior, input_grid) +gp_mean, gp_cov = Emulators.predict(emulator_gp, input_grid_unconstrained, transform_to_real = true) +rf_mean, rf_cov = Emulators.predict(emulator_random_features, input_grid_unconstrained, transform_to_real = true) + +# Reshape into (output_dim x 50 x 50) grid +output_dim = 2 +gp_grid = reshape(gp_mean, (output_dim, N_grid, N_grid)) +rf_grid = reshape(rf_mean, (output_dim, N_grid, N_grid)) + +# Convert cov matrix into std and reshape +gp_std = [sqrt.(diag(gp_cov[j])) for j in 1:length(gp_cov)] +gp_std_grid = reshape(permutedims(reduce(vcat, [x' for x in gp_std]), (2, 1)), (output_dim, N_grid, N_grid)) +rf_std = [sqrt.(diag(rf_cov[j])) for j in 1:length(rf_cov)] +rf_std_grid = reshape(permutedims(reduce(vcat, [x' for x in rf_std]), (2, 1)), (output_dim, N_grid, N_grid)) + +## Plot +# First, we will plot the ground truth. We have 2 parameters and 2 outputs, so we will create a contour plot +# for each output to show how they vary against the two inputs. +range_clims = (0, 8) +mean_clims = (-6, 10) + +p1 = contour( + amp_range, + vshift_range, + g_true_grid[1, :, :]; + fill = true, + clims = range_clims, + xlabel = "Amplitude", + ylabel = "Vertical Shift", + title = "True Sinusoid Range", +) +p2 = contour( + amp_range, + vshift_range, + g_true_grid[2, :, :]; + fill = true, + clims = mean_clims, + xlabel = "Amplitude", + ylabel = "Vertical Shift", + title = "True Sinusoid Mean", +) +p = plot(p1, p2, size = (600, 300), layout = (1, 2), guidefontsize = 12, tickfontsize = 10, legendfontsize = 10) +savefig(p, joinpath(data_save_directory, "sinusoid_groundtruth_contours.png")) +# The first panel shows how the range varies with respect to the two parameters in the Gaussian process +# emulator. The contours show the range is mostly dependent on the amplitude, with little variation with +# respect to the vertical shift. The second panel shows how the mean varies with the respect to the two +# parameters and is mostly dependent on the vertical shift. This result makes sense for our model setup. +# Next, we recreate the same plot with the emulators. Ideally, the results should look similar. + +# Plot GP emulator contours +p1 = contour( + amp_range, + vshift_range, + gp_grid[1, :, :]; + fill = true, + clims = range_clims, + xlabel = "Amplitude", + ylabel = "Vertical Shift", + title = "GP Sinusoid Range", +) +# We will also overlay the training data points from the EKI. The colors of the points should agree with +# the contours. +plot!(inputs[1, :], inputs[2, :]; seriestype = :scatter, zcolor = outputs[1, :], label = :false) +p2 = contour( + amp_range, + vshift_range, + gp_grid[2, :, :]; + fill = true, + clims = mean_clims, + xlabel = "Amplitude", + ylabel = "Vertical Shift", + title = "GP Sinusoid Mean", +) +plot!(inputs[1, :], inputs[2, :]; seriestype = :scatter, zcolor = outputs[2, :], label = :false) +p = plot( + p1, + p2, + right_margin = 3mm, + bottom_margin = 3mm, + size = (600, 300), + layout = (1, 2), + guidefontsize = 12, + tickfontsize = 10, + legendfontsize = 10, +) +savefig(p, joinpath(data_save_directory, "sinusoid_GP_emulator_contours.png")) + +# Plot RF emulator contours +p1 = contour( + amp_range, + vshift_range, + rf_grid[1, :, :]; + fill = true, + clims = range_clims, + xlabel = "Amplitude", + ylabel = "Vertical Shift", + title = "RF Sinusoid Range", +) +plot!(inputs[1, :], inputs[2, :]; seriestype = :scatter, zcolor = outputs[1, :], label = :false) +p2 = contour( + amp_range, + vshift_range, + rf_grid[2, :, :]; + fill = true, + clims = mean_clims, + xlabel = "Amplitude", + ylabel = "Vertical Shift", + title = "RF Sinusoid Mean", +) +plot!(inputs[1, :], inputs[2, :]; seriestype = :scatter, zcolor = outputs[2, :], label = :false) +p = plot( + p1, + p2, + right_margin = 3mm, + bottom_margin = 3mm, + size = (600, 300), + layout = (1, 2), + guidefontsize = 12, + tickfontsize = 10, + legendfontsize = 10, +) +savefig(p, joinpath(data_save_directory, "sinusoid_RF_emulator_contours.png")) + +# Both the GP and RF emulator give similar results to the ground truth G(θ), indicating they are correctly +# learning the relationships between the parameters and the outputs. We also see the contours agree with the +# colors of the training data points. + +# Next, we plot uncertainty estimates from the GP and RF emulators +# Plot GP std estimates +range_std_clims = (0, 2) +mean_std_clims = (0, 1) + +p1 = contour( + amp_range, + vshift_range, + gp_std_grid[1, :, :]; + c = :cividis, + fill = true, + clims = range_std_clims, + xlabel = "Amplitude", + ylabel = "Vertical Shift", + title = "GP 1σ in Sinusoid Range", +) +p2 = contour( + amp_range, + vshift_range, + gp_std_grid[2, :, :]; + c = :cividis, + fill = true, + clims = mean_std_clims, + xlabel = "Amplitude", + ylabel = "Vertical Shift", + title = "GP 1σ in Sinusoid Mean", +) +p = plot( + p1, + p2, + size = (600, 300), + layout = (1, 2), + right_margin = 3mm, + bottom_margin = 3mm, + guidefontsize = 12, + tickfontsize = 10, + legendfontsize = 10, +) +savefig(p, joinpath(data_save_directory, "sinusoid_GP_emulator_std_contours.png")) + + +# Plot RF std estimates +p1 = contour( + amp_range, + vshift_range, + rf_std_grid[1, :, :]; + c = :cividis, + fill = true, + clims = range_std_clims, + xlabel = "Amplitude", + ylabel = "Vertical Shift", + title = "RF 1σ in Sinusoid Range", +) +p2 = contour( + amp_range, + vshift_range, + rf_std_grid[2, :, :]; + c = :cividis, + fill = true, + clims = mean_std_clims, + xlabel = "Amplitude", + ylabel = "Vertical Shift", + title = "RF 1σ in Sinusoid Mean", +) +p = plot(p1, p2, size = (600, 300), layout = (1, 2), guidefontsize = 12, tickfontsize = 10, legendfontsize = 10) +savefig(p, joinpath(data_save_directory, "sinusoid_RF_emulator_std_contours.png")) +# The GP and RF uncertainty predictions are similar and show lower uncertainties around the region of interest +# where we have more training points. + +# Finally, we should validate how accurate the emulators are by looking at the absolute difference between emulator +# predictions and the ground truth. +gp_diff_grid = abs.(gp_grid - g_true_grid) +range_diff_clims = (0, 1) +mean_diff_clims = (0, 1) +p1 = contour( + amp_range, + vshift_range, + gp_diff_grid[1, :, :]; + c = :cividis, + fill = true, + clims = range_diff_clims, + xlabel = "Amplitude", + ylabel = "Vertical Shift", + title = "GP error in Sinusoid Range", +) +p2 = contour( + amp_range, + vshift_range, + gp_diff_grid[2, :, :]; + c = :cividis, + fill = true, + clims = mean_diff_clims, + xlabel = "Amplitude", + ylabel = "Vertical Shift", + title = "GP error in Sinusoid Mean", +) +p = plot(p1, p2, size = (600, 300), layout = (1, 2), guidefontsize = 12, tickfontsize = 10, legendfontsize = 10) +savefig(p, joinpath(data_save_directory, "sinusoid_GP_errors_contours.png")) + +rf_diff_grid = abs.(rf_grid - g_true_grid) +p1 = contour( + amp_range, + vshift_range, + rf_diff_grid[1, :, :]; + c = :cividis, + fill = true, + clims = range_diff_clims, + xlabel = "Amplitude", + ylabel = "Vertical Shift", + title = "RF error in Sinusoid Range", +) +p2 = contour( + amp_range, + vshift_range, + rf_diff_grid[2, :, :]; + c = :cividis, + fill = true, + clims = mean_diff_clims, + xlabel = "Amplitude", + ylabel = "Vertical Shift", + title = "RF error in Sinusoid Mean", +) +p = plot(p1, p2, size = (600, 300), layout = (1, 2), guidefontsize = 12, tickfontsize = 10, legendfontsize = 10) +savefig(p, joinpath(data_save_directory, "sinusoid_RF_errors_contours.png")) + +# Here, we want the emulator to show the low errors in the region around the true parameter values near θ = (3, 6), +# as this is the region that we will be sampling in the next step. This appears to the be case for both +# outputs and both emulators. + + +println(mean(gp_diff_grid, dims = (2, 3))) +println(mean(rf_diff_grid, dims = (2, 3))) + + +save( + joinpath(data_save_directory, "emulators.jld2"), + "emulator_gp", + emulator_gp, + "emulator_random_features", + emulator_random_features, + "prior", + prior, + "rng", + rng, +) diff --git a/examples/Sinusoid/sample.jl b/examples/Sinusoid/sample.jl new file mode 100644 index 000000000..e2faaa197 --- /dev/null +++ b/examples/Sinusoid/sample.jl @@ -0,0 +1,304 @@ +# # [Sample](@id sinusoid-example) + +# In this example we have a model that produces a sinusoid +# ``f(A, v) = A \sin(\phi + t) + v, \forall t \in [0,2\pi]``, with a random +# phase ``\phi``. We want to quantify uncertainties on parameters ``A`` and ``v``, +# given noisy observations of the model output. +# Previously, in the emulate step, we built an emulator to allow us to make quick and +# approximate model evaluations. This will be used in our Markov chain Monte Carlo +# to sample the posterior distribution. + +# First, we load the packages we need: +using LinearAlgebra, Random + +using Distributions, Plots +using JLD2 + +using CalibrateEmulateSample +using CalibrateEmulateSample.Emulators +using CalibrateEmulateSample.MarkovChainMonteCarlo + +const CES = CalibrateEmulateSample +const EKP = CalibrateEmulateSample.EnsembleKalmanProcesses + +# Next, we need to load the emulator we built in the previous step (emulate.jl must be run before this script +# We will start with the Gaussian process emulator. +example_directory = @__DIR__ +data_save_directory = joinpath(example_directory, "output") +# Get observations, true parameters and observation noise +obs_file = joinpath(data_save_directory, "observations.jld2") +y_obs = load(obs_file)["y_obs"] +theta_true = load(obs_file)["theta_true"] +Γ = load(obs_file)["Γ"] +# Get GP emulators and prior +emulator_file = joinpath(data_save_directory, "emulators.jld2") +emulator_gp = load(emulator_file)["emulator_gp"] +prior = load(emulator_file)["prior"] +# Get random number generator to start where we left off +rng = load(emulator_file)["rng"] + +# We will also need a suitable value to initiate MCMC. To reduce burn-in, we will use the +# final ensemble mean from EKI. +calibrate_file = joinpath(data_save_directory, "calibrate_results.jld2") +ensemble_kalman_process = load(calibrate_file)["eki"] + +## Markov chain Monte Carlo (MCMC) +# Here, we set up an MCMC sampler, using the API. The MCMC will be run in the unconstrained space, for computational +# efficiency. First, we need to find a suitable starting point, ideally one that is near the posterior distribution. +# We start the MCMC from the final ensemble mean from EKI as this will increase the chance of acceptance near +# the start of the chain, and reduce burn-in time. +init_sample = EKP.get_u_mean_final(ensemble_kalman_process) +println("initial parameters: ", init_sample) + +# Create MCMC from the wrapper: we will use a random walk Metropolis-Hastings MCMC (RWMHSampling()) +# We need to provide the API with the observations (y_obs), priors (prior) and our emulator (emulator_gp). +# The emulator is used because it is cheap to evaluate so we can generate many MCMC samples. +mcmc = MCMCWrapper(RWMHSampling(), y_obs, prior, emulator_gp; init_params = init_sample) +# First let's run a short chain to determine a good step size +new_step = optimize_stepsize(mcmc; rng = rng, init_stepsize = 0.1, N = 2000, discard_initial = 0) + +# Now begin the actual MCMC +println("Begin MCMC - with step size ", new_step) # 0.4 +chain = MarkovChainMonteCarlo.sample(mcmc, 100_000; rng = rng, stepsize = new_step, discard_initial = 2_000) + +# We can print summary statistics of the MCMC chain +display(chain) + +# Note that these values are provided in the unconstrained space. The vertical shift +# seems reasonable, but the amplitude is not. This is because the amplitude is constrained to be +# positive, but the MCMC is run in the unconstrained space. We can transform to the real +# constrained space and re-calculate these values. + +# Extract posterior samples and plot +posterior = MarkovChainMonteCarlo.get_posterior(mcmc, chain) + +# Back to constrained coordinates +constrained_posterior = + Emulators.transform_unconstrained_to_constrained(prior, MarkovChainMonteCarlo.get_distribution(posterior)) + +constrained_amp = vec(constrained_posterior["amplitude"]) +constrained_vshift = vec(constrained_posterior["vert_shift"]) + +println("Amplitude mean: ", mean(constrained_amp)) +println("Amplitude std: ", std(constrained_amp)) +println("Vertical Shift mean: ", mean(constrained_vshift)) +println("Vertical Shift std: ", std(constrained_vshift)) + +# We can quickly plot priors and posterior using built-in capabilities +p = plot(prior, fill = :lightgray, rng = rng) +plot!(posterior, fill = :darkblue, alpha = 0.5, rng = rng, size = (800, 200)) +savefig(p, joinpath(data_save_directory, "sinusoid_posterior_GP.png")) + +# This shows the posterior distribution has collapsed around the true values for theta. +# Note, these are marginal distributions but this is a multi-dimensional problem with a +# multi-dimensional posterior. Marginal distributions do not show us how parameters co-vary, +# so we also plot the 2D posterior distribution. + +# Plot 2D histogram (in constrained space) +amp_lims = (0, 6) # manually set limits based on our priors +vshift_lims = (-6, 10) + +hist2d = histogram2d( + constrained_amp, + constrained_vshift, + colorbar = :false, + xlims = amp_lims, + ylims = vshift_lims, + xlabel = "Amplitude", + ylabel = "Vertical Shift", +) + +# Let's also plot the marginal distributions along the top and the right hand +# panels. We can plot the prior and marginal posteriors as histograms. +prior_samples = sample(rng, prior, Int(1e4)) +constrained_prior_samples = EKP.transform_unconstrained_to_constrained(prior, prior_samples) + +tophist = histogram( + constrained_prior_samples[1, :], + bins = 100, + normed = true, + fill = :lightgray, + legend = :false, + lab = "Prior", + yaxis = :false, + xlims = amp_lims, + xlabel = "Amplitude", +) +histogram!( + tophist, + constrained_amp, + bins = 50, + normed = true, + fill = :darkblue, + alpha = 0.5, + legend = :false, + lab = "Posterior", +) +righthist = histogram( + constrained_prior_samples[2, :], + bins = 100, + normed = true, + fill = :lightgray, + orientation = :h, + ylim = vshift_lims, + xlims = (0, 1.4), + xaxis = :false, + legend = :false, + lab = :false, + ylabel = "Vertical Shift", +) + +histogram!( + righthist, + constrained_vshift, + bins = 50, + normed = true, + fill = :darkblue, + alpha = 0.5, + legend = :false, + lab = :false, + orientation = :h, +) + +layout = @layout [ + tophist{0.8w, 0.2h} _ + hist2d{0.8w, 0.8h} righthist{0.2w, 0.8h} +] + +plot_all = plot( + tophist, + hist2d, + righthist, + layout = layout, + size = (600, 600), + legend = :true, + guidefontsize = 14, + tickfontsize = 12, + legendfontsize = 12, +) + +savefig(plot_all, joinpath(data_save_directory, "sinusoid_MCMC_hist_GP.png")) + + +### MCMC Sampling using Random Features Emulator + +# We could repeat the above process with the random features (RF) emulator in place of the GP +# emulator. We hope to see similar results, since our RF emulator should be a good approximation +# to the GP emulator. + +emulator_random_features = load(emulator_file)["emulator_random_features"] +mcmc = MCMCWrapper(RWMHSampling(), y_obs, prior, emulator_random_features; init_params = init_sample) +new_step = optimize_stepsize(mcmc; init_stepsize = 0.1, N = 2000, discard_initial = 0) + +println("Begin MCMC - with step size ", new_step) # 0.4 +chain = MarkovChainMonteCarlo.sample(mcmc, 100_000; stepsize = new_step, discard_initial = 2_000) + +# We can print summary statistics of the MCMC chain +display(chain) + +# The output of the random features MCMC is almost identical. Again, these are in the unconstrained space +# so we need to transform to the real (constrained) space and re-calculate these values. + +# Extract posterior samples and plot +posterior = MarkovChainMonteCarlo.get_posterior(mcmc, chain) + +# Back to constrained coordinates +constrained_posterior = + Emulators.transform_unconstrained_to_constrained(prior, MarkovChainMonteCarlo.get_distribution(posterior)) + +constrained_amp = vec(constrained_posterior["amplitude"]) +constrained_vshift = vec(constrained_posterior["vert_shift"]) + +println("Amplitude mean: ", mean(constrained_amp)) +println("Amplitude std: ", std(constrained_amp)) +println("Vertical shift mean: ", mean(constrained_vshift)) +println("Vertical shift std: ", std(constrained_vshift)) + +# These numbers are very similar to our GP results. We can also check the posteriors look similar +# using the same plotting functions as before. +p = plot(prior, fill = :lightgray, rng = rng) +plot!(posterior, fill = :darkblue, alpha = 0.5, rng = rng, size = (800, 200)) +savefig(p, joinpath(data_save_directory, "sinusoid_posterior_RF.png")) + +# Plot 2D histogram (in constrained space) +# Using the same set up as before, with the same xlims, ylims. +hist2d = histogram2d( + constrained_amp, + constrained_vshift, + colorbar = :false, + xlims = amp_lims, + ylims = vshift_lims, + xlabel = "Amplitude", + ylabel = "Vertical Shift", +) + +# As before, we will plot the marginal distributions for both prior and posterior +# We will use the same prior samples generated for the GP histogram. +tophist = histogram( + constrained_prior_samples[1, :], + bins = 100, + normed = true, + fill = :lightgray, + legend = :false, + lab = "Prior", + yaxis = :false, + xlims = amp_lims, + xlabel = "Amplitude", +) +histogram!( + tophist, + constrained_amp, + bins = 50, + normed = true, + fill = :darkblue, + alpha = 0.5, + legend = :false, + lab = "Posterior", +) +righthist = histogram( + constrained_prior_samples[2, :], + bins = 100, + normed = true, + fill = :lightgray, + orientation = :h, + ylim = vshift_lims, + xlims = (0, 1.4), + xaxis = :false, + legend = :false, + lab = :false, + ylabel = "Vertical Shift", +) + +histogram!( + righthist, + constrained_vshift, + bins = 50, + normed = true, + fill = :darkblue, + alpha = 0.5, + legend = :false, + lab = :false, + orientation = :h, +) + +layout = @layout [ + tophist{0.8w, 0.2h} _ + hist2d{0.8w, 0.8h} righthist{0.2w, 0.8h} +] + +plot_all = plot( + tophist, + hist2d, + righthist, + layout = layout, + size = (600, 600), + legend = :true, + guidefontsize = 14, + tickfontsize = 12, + legendfontsize = 12, +) + +savefig(plot_all, joinpath(data_save_directory, "sinusoid_MCMC_hist_RF.png")) + +# It is reassuring to see that this method is robust to the choice of emulator. The MCMC using +# both GP and RF emulators give very similar posterior distributions. diff --git a/examples/Sinusoid/sinusoid_setup.jl b/examples/Sinusoid/sinusoid_setup.jl new file mode 100644 index 000000000..897ae8a3f --- /dev/null +++ b/examples/Sinusoid/sinusoid_setup.jl @@ -0,0 +1,149 @@ +# # [Setup](@id sinusoid-example) + +################################################################################################ +################################################################################################ +#### Sinusoid example #### +#### This is a simple example of how to use the CalibrateEmulateSample.jl package. #### +#### We use a simple model for a sinusoid with 2 input parameters and 2 observables. #### +#### We follow the steps (1) Calibrate, using Ensemble Kalman Inversion, (2) Emulate, using #### +#### both Gaussian process and random features and (3) Sample, using Markov chain Monte #### +#### Carlo. Each of these steps are run in separate scripts. For this example to work, we #### +#### should run the scripts in the following order: #### +#### (1) calibrate.jl #### +#### (2) emulate.jl #### +#### (3) sample.jl #### +#### #### +#### #### +#### Sinusoid Model #### +#### This example closely follows the sinusoidal model in EnsembleKalmanProcess.jl #### +#### https://clima.github.io/EnsembleKalmanProcesses.jl/stable/literated/sinusoid_example/ #### +#### In this example we have a model that produces a sinusoid #### +#### ``f(A, v) = A \sin(\phi + t) + v, \forall t \in [0,2\pi]``, with a random #### +#### phase ``\phi``. Given an initial guess of the parameters as #### +#### ``A^* \sim \mathcal{N}(2,1)`` and ``v^* \sim \mathcal{N}(0,25)``, our goal is #### +#### to estimate the parameters from a noisy observation of the maximum, minimum, #### +#### and mean of the true model output. #### +#### #### +#### #### +################################################################################################ +################################################################################################ + +### Model set up +# This file deals with setting up the model and generating pseudo-observations. + +# First, we load the packages we need: +using LinearAlgebra, Random +using Plots +using JLD2 +using Statistics, Distributions + +# Export functions for use later +export model +export G + +## Setting up the model and data for our inverse problem + +# Directories +# Save the output for the emulate, sample steps. +# Output figure save directory +example_directory = @__DIR__ +data_save_directory = joinpath(example_directory, "output") +if !isdir(data_save_directory) + mkdir(data_save_directory) +end + +# We define a model which generates a sinusoid given parameters ``\theta``: an +# amplitude and a vertical shift. We will estimate these parameters from data. +# The model adds a random phase shift upon evaluation. +function model(amplitude, vert_shift; rng = Random.GLOBAL_RNG) + # Define x-axis + dt = 0.01 + trange = 0:dt:(2 * pi + dt) + # Set phi + phi = 2 * pi * rand(rng) + return amplitude * sin.(trange .+ phi) .+ vert_shift +end + +# We will define a "true" amplitude and vertical shift, to generate some pseudo-observations. + +# Generate observations using "true" parameters, amplitude_true and vert_shift_true +# and user defined observation covariance matrix, Γ (start with diagonal covariance matrix, +# e.g. Γ = 0.1 * I) +function generate_obs(amplitude_true, vert_shift_true, Γ; rng = Random.GLOBAL_RNG) + theta_true = [amplitude_true, vert_shift_true] + # Generate the "true" signal for these parameters + signal_true = model(amplitude_true, vert_shift_true; rng = rng) + # Plot + p = plot( + signal_true, + color = :black, + linewidth = 3, + label = "True signal", + guidefontsize = 14, + tickfontsize = 12, + legendfontsize = 12, + ) + + # We will observe properties of the signal that inform us about the amplitude and vertical + # position. These properties will be the range (the difference between the maximum and the minimum), + # which is informative about the amplitude of the sinusoid, and the mean, which is informative + # about the vertical shift. + y1_true = maximum(signal_true) - minimum(signal_true) + y2_true = mean(signal_true) + # Add true observables to plot in red + hline!( + [y2_true], + color = :red, + style = :dash, + linewidth = 2, + label = "True mean: " * string(round(y2_true, digits = 1)), + ) + plot!( + [argmax(signal_true), argmax(signal_true)], + [minimum(signal_true), maximum(signal_true)], + arrows = :both, + color = :red, + linewidth = 2, + label = "True range: " * string(round(y1_true, digits = 1)), + ) + + # However, our observations are typically not noise-free, so we add some white noise to our + # observables, defined by the covariance matrix Γ (usually assumed diagonal) + dim_output = 2 + white_noise = MvNormal(zeros(dim_output), Γ) + y_obs = [y1_true, y2_true] .+ rand(rng, white_noise) + y1_obs = y_obs[1] + y2_obs = y_obs[2] + # Add noisy observables to plot in blue + hline!( + [y2_obs], + color = :blue, + style = :dash, + linewidth = 1, + label = "Observed mean: " * string(round(y2_obs, digits = 2)), + ) + plot!( + [argmax(signal_true) + 15, argmax(signal_true) + 15], + [y2_obs - y1_obs / 2, y2_obs + y1_obs / 2], + arrows = :both, + color = :blue, + linewidth = 1, + label = "Observed range: " * string(round(y1_obs, digits = 2)), + ) + + savefig(p, joinpath(data_save_directory, "sinusoid_true_vs_observed_signal.png")) + + # Save our observations and the parameters that generated them + println("Observations:", y_obs) + + save(joinpath(data_save_directory, "observations.jld2"), "y_obs", y_obs, "theta_true", theta_true, "Γ", Γ) + return y_obs +end + +# It will be helpful for us to define a function ``G(\theta)``, which returns these observables +# (the range and the mean) of the sinusoid given a parameter vector. +function G(theta; rng = Random.GLOBAL_RNG) + amplitude, vert_shift = theta + sincurve = model(amplitude, vert_shift; rng = rng) + return [maximum(sincurve) - minimum(sincurve), mean(sincurve)] +end diff --git a/examples/deprecated/Cloudy/Cloudy_example.jl b/examples/deprecated/Cloudy/Cloudy_example.jl deleted file mode 100644 index 75bc41e65..000000000 --- a/examples/deprecated/Cloudy/Cloudy_example.jl +++ /dev/null @@ -1,296 +0,0 @@ -# Reference the in-tree version of CalibrateEmulateSample on Julias load path - -include(joinpath(@__DIR__, "..", "ci", "linkfig.jl")) -include(joinpath(@__DIR__, "GModel.jl")) # Import the module that runs Cloudy - -# Import modules -using Distributions # probability distributions and associated functions -using StatsBase -using LinearAlgebra -using StatsPlots -using GaussianProcesses -using Plots -using Random - -# Import Calibrate-Emulate-Sample modules -using CalibrateEmulateSample.EnsembleKalmanProcesses -using CalibrateEmulateSample.Emulators -using CalibrateEmulateSample.MarkovChainMonteCarlo -using CalibrateEmulateSample.Utilities -using CalibrateEmulateSample.ParameterDistributions -using CalibrateEmulateSample.DataContainers -using CalibrateEmulateSample.Observations - -# This example requires Cloudy to be installed. -using Cloudy -const PDistributions = Cloudy.ParticleDistributions - -################################################################################ -# # -# Cloudy Calibrate-Emulate-Sample Example # -# # -# # -# This example uses Cloudy, a microphysics model that simulates the # -# coalescence of cloud droplets into bigger drops, to demonstrate how # -# the full Calibrate-Emulate-Sample pipeline can be used for Bayesian # -# learning and uncertainty quantification of parameters, given some # -# observations. # -# # -# Specifically, this examples shows how to learn parameters of the # -# initial cloud droplet mass distribution, given observations of some # -# moments of that mass distribution at a later time, after some of the # -# droplets have collided and become bigger drops. # -# # -# In this example, Cloudy is used in a "perfect model" (aka "known # -# truth") setting, which means that the "observations" are generated by # -# Cloudy itself, by running it with the true parameter values. In more # -# realistic applications, the observations will come from some external # -# measurement system. # -# # -# The purpose is to show how to do parameter learning using # -# Calibrate-Emulate-Sample in a simple (and highly artificial) setting. # -# # -# For more information on Cloudy, see # -# https://github.com/CliMA/Cloudy.jl.git # -# # -################################################################################ - - -rng_seed = 41 -Random.seed!(rng_seed) - -output_directory = joinpath(@__DIR__, "output") -if !isdir(output_directory) - mkdir(output_directory) -end - -### -### Define the (true) parameters and their priors -### - -# Define the parameters that we want to learn -# We assume that the true particle mass distribution is a Gamma distribution -# with parameters N0_true, θ_true, k_true -param_names = ["N0", "θ", "k"] -n_param = length(param_names) - -N0_true = 300.0 # number of particles (scaling factor for Gamma distribution) -θ_true = 1.5597 # scale parameter of Gamma distribution -k_true = 0.0817 # shape parameter of Gamma distribution -params_true = [N0_true, θ_true, k_true] -# Note that dist_true is a Cloudy distribution, not a Distributions.jl -# distribution -dist_true = PDistributions.Gamma(N0_true, θ_true, k_true) - - -### -### Define priors for the parameters we want to learn -### - -# Define constraints -lbound_N0 = 0.4 * N0_true -lbound_θ = 1.0e-1 -lbound_k = 1.0e-4 -c1 = bounded_below(lbound_N0) -c2 = bounded_below(lbound_θ) -c3 = bounded_below(lbound_k) -constraints = [[c1], [c2], [c3]] - -# We choose to use normal distributions to represent the prior distributions of -# the parameters in the transformed (unconstrained) space. i.e log coordinates -d1 = Parameterized(Normal(4.5, 1.0)) #truth is 5.19 -d2 = Parameterized(Normal(0.0, 2.0)) #truth is 0.378 -d3 = Parameterized(Normal(-1.0, 1.0))#truth is -2.51 -distributions = [d1, d2, d3] - -param_names = ["N0", "θ", "k"] - -priors = ParameterDistribution(distributions, constraints, param_names) - -### -### Define the data from which we want to learn the parameters -### - -data_names = ["M0", "M1", "M2"] -moments = [0.0, 1.0, 2.0] -n_moments = length(moments) - - -### -### Model settings -### - -# Collision-coalescence kernel to be used in Cloudy -coalescence_coeff = 1 / 3.14 / 4 / 100 -kernel_func = x -> coalescence_coeff -kernel = Cloudy.KernelTensors.CoalescenceTensor(kernel_func, 0, 100.0) - -# Time period over which to run Cloudy -tspan = (0.0, 1.0) - - -### -### Generate (artificial) truth samples -### Note: The observables y are related to the parameters θ by: -### y = G(x1, x2) + η -### - -g_settings_true = GModel.GSettings(kernel, dist_true, moments, tspan) -gt = GModel.run_G( - params_true, - g_settings_true, - PDistributions.update_params, - PDistributions.moment, - Cloudy.Sources.get_int_coalescence, -) -n_samples = 100 -yt = zeros(length(gt), n_samples) -# In a perfect model setting, the "observational noise" represent the internal -# model variability. Since Cloudy is a purely deterministic model, there is no -# straightforward way of coming up with a covariance structure for this internal -# model variability. We decide to use a diagonal covariance, with entries -# (variances) largely proportional to their corresponding data values, gt. -Γy = convert(Array, Diagonal([100.0, 5.0, 30.0])) -μ = zeros(length(gt)) - -# Add noise -for i in 1:n_samples - yt[:, i] = gt .+ rand(MvNormal(μ, Γy)) -end - -truth = Observations.Observation(yt, Γy, data_names) -truth_sample = truth.mean -### -### Calibrate: Ensemble Kalman Inversion -### - - -N_ens = 50 # number of ensemble members -N_iter = 8 # number of EKI iterations -# initial parameters: N_params x N_ens -initial_params = EnsembleKalmanProcesses.construct_initial_ensemble(priors, N_ens; rng_seed = 6) -ekiobj = EnsembleKalmanProcesses.EnsembleKalmanProcess( - initial_params, - truth_sample, - truth.obs_noise_cov, - Inversion(), - Δt = 0.1, -) - - -# Initialize a ParticleDistribution with dummy parameters. The parameters -# will then be set in run_G_ensemble -dummy = 1.0 -dist_type = PDistributions.Gamma(dummy, dummy, dummy) -g_settings = GModel.GSettings(kernel, dist_type, moments, tspan) - -# EKI iterations -for i in 1:N_iter - params_i = mapslices(x -> transform_unconstrained_to_constrained(priors, x), get_u_final(ekiobj); dims = 1) - g_ens = GModel.run_G_ensemble( - params_i, - g_settings, - PDistributions.update_params, - PDistributions.moment, - Cloudy.Sources.get_int_coalescence, - ) - EnsembleKalmanProcesses.update_ensemble!(ekiobj, g_ens) -end - -# EKI results: Has the ensemble collapsed toward the truth? -transformed_params_true = transform_constrained_to_unconstrained(priors, params_true) -println("True parameters (transformed): ") -println(transformed_params_true) - -println("\nEKI results:") -println(mean(get_u_final(ekiobj), dims = 2)) - - -### -### Emulate: Gaussian Process Regression -### - -gppackage = Emulators.GPJL() -pred_type = Emulators.YType() -gauss_proc = GaussianProcess( - gppackage; - kernel = nothing, # use default squared exponential kernel - prediction_type = pred_type, - noise_learn = false, -) - -# Get training points -input_output_pairs = Utilities.get_training_points(ekiobj, N_iter) -emulator = Emulator(gauss_proc, input_output_pairs, obs_noise_cov = Γy, normalize_inputs = true) -optimize_hyperparameters!(emulator) - -# Check how well the Gaussian Process regression predicts on the -# true parameters -y_mean, y_var = Emulators.predict(emulator, reshape(transformed_params_true, :, 1); transform_to_real = true) -println("GP prediction on true parameters: ") -println(vec(y_mean)) -println("true data: ") -println(truth.mean) - - -### -### Sample: Markov Chain Monte Carlo -### - -# initial values -u0 = vec(mean(get_inputs(input_output_pairs), dims = 2)) -println("initial parameters: ", u0) - -# First let's run a short chain to determine a good step size -yt_sample = truth_sample -mcmc = MCMCWrapper(RWMHSampling(), yt_sample, priors, emulator; init_params = u0) -new_step = optimize_stepsize(mcmc; init_stepsize = 0.1, N = 2000, discard_initial = 0) - -# Now begin the actual MCMC -println("Begin MCMC - with step size ", new_step) -chain = MarkovChainMonteCarlo.sample(mcmc, 100_000; stepsize = new_step, discard_initial = 1_000) -posterior = MarkovChainMonteCarlo.get_posterior(mcmc, chain) - -post_mean = mean(posterior) -post_cov = cov(posterior) -println("posterior mean") -println(post_mean) -println("posterior covariance") -println(post_cov) - -# Plot the posteriors together with the priors and the true parameter values -# (in the transformed/unconstrained space) -n_params = length(get_name(posterior)) - -gr(size = (800, 600)) - -for idx in 1:n_params - if idx == 1 - xs = collect(range(5.15, stop = 5.25, length = 1000)) - elseif idx == 2 - xs = collect(range(0.0, stop = 0.5, length = 1000)) - elseif idx == 3 - xs = collect(range(-3.0, stop = -2.0, length = 1000)) - else - throw("not implemented") - end - - label = "true " * param_names[idx] - posterior_samples = dropdims(get_distribution(posterior)[param_names[idx]], dims = 1) - histogram( - posterior_samples, - bins = 100, - normed = true, - fill = :slategray, - thickness_scaling = 2.0, - lab = "posterior", - legend = :outertopright, - ) - prior_dist = get_distribution(mcmc.prior)[param_names[idx]] - plot!(xs, prior_dist, w = 2.6, color = :blue, lab = "prior") - plot!([transformed_params_true[idx]], seriestype = "vline", w = 2.6, lab = label) - title!(param_names[idx]) - figpath = joinpath(output_directory, "posterior_" * param_names[idx] * ".png") - StatsPlots.savefig(figpath) - linkfig(figpath) -end diff --git a/examples/deprecated/Cloudy/GModel.jl b/examples/deprecated/Cloudy/GModel.jl deleted file mode 100644 index c932671f8..000000000 --- a/examples/deprecated/Cloudy/GModel.jl +++ /dev/null @@ -1,116 +0,0 @@ -module GModel - -using DocStringExtensions - -# TODO: Remove build (which currently prevents segfault): -using Pkg -Pkg.build() - -using Random -using Distributions -using LinearAlgebra - -using DifferentialEquations -using Sundials # CVODE_BDF() solver for ODE - -export run_G -export run_G_ensemble - -# TODO: It would be nice to have run_G_ensemble take a pointer to the -# function G (called run_G below), which maps the parameters u to G(u), -# as an input argument. In the example below, run_G runs the model, but -# in general run_G will be supplied by the user and run the specific -# model the user wants to do UQ with. -# So, the interface would ideally be something like: -# run_G_ensemble(params, G) where G is user-defined -""" -$(DocStringExtensions.TYPEDEF) - -Structure to hold all information to run the forward model *G*. - -# Fields -$(DocStringExtensions.TYPEDFIELDS) -""" -struct GSettings{FT <: AbstractFloat, KT, D} - "A kernel tensor." - kernel::KT - "A model distribution." - dist::D - "The moments of `dist` that model should return." - moments::Array{FT, 1} - "Time period over which to run the model, e.g., `(0, 1)`." - tspan::Tuple{FT, FT} -end - - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Run the forward model *G* for an array of parameters by iteratively -calling `run_G` for each of the *N\\_ensemble* parameter values. - -- `params` - array of size (*N\\_ensemble* × *N\\_parameters*) containing the parameters for which - G will be run. -- `settings` - a [GSetttings](@ref) struct. - -Returns `g_ens``, an array of size (*N\\_ensemble* × *N\\_data*), where g_ens[j,:] = G(params[j,:]). -""" -function run_G_ensemble( - params::Array{FT, 2}, - settings::GSettings{FT}, - update_params, - moment, - get_src; - rng_seed = 42, -) where {FT <: AbstractFloat} - - N_ens = size(params, 2) # params is N_ens x N_params - n_moments = length(settings.moments) - g_ens = zeros(n_moments, N_ens) - - Random.seed!(rng_seed) - for i in 1:N_ens - # run the model with the current parameters, i.e., map θ to G(θ) - g_ens[:, i] = run_G(params[:, i], settings, update_params, moment, get_src) - end - - return g_ens -end - - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -- `u` - parameter vector of length *N\\_parameters*. -- `settings` - a [GSetttings](@ref) struct. - -Returns `g_u` = *G(u)*, a vector of length *N\\_data*. -""" -function run_G(u::Array{FT, 1}, settings::GSettings{FT}, update_params, moment, get_src) where {FT <: AbstractFloat} - - # generate the initial distribution - dist = update_params(settings.dist, u) - - # Numerical parameters - tol = FT(1e-7) - - # Make sure moments are up to date. mom0 is the initial condition for the - # ODE problem - moments = settings.moments - moments_init = fill(NaN, length(moments)) - for (i, mom) in enumerate(moments) - moments_init[i] = moment(dist, convert(FT, mom)) - end - - # Set up ODE problem: dM/dt = f(M,p,t) - rhs(M, p, t) = get_src(M, dist, settings.kernel) - prob = ODEProblem(rhs, moments_init, settings.tspan) - # Solve the ODE - sol = solve(prob, CVODE_BDF(), alg_hints = [:stiff], reltol = tol, abstol = tol) - # Return moments at last time step - moments_final = vcat(sol.u'...)[end, :] - - return moments_final -end - -end # module diff --git a/examples/deprecated/Cloudy/Manifest.toml b/examples/deprecated/Cloudy/Manifest.toml deleted file mode 100644 index 46ec9e740..000000000 --- a/examples/deprecated/Cloudy/Manifest.toml +++ /dev/null @@ -1,1531 +0,0 @@ -# This file is machine-generated - editing it directly is not advised - -[[AbstractFFTs]] -deps = ["LinearAlgebra"] -git-tree-sha1 = "051c95d6836228d120f5f4b984dd5aba1624f716" -uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c" -version = "0.5.0" - -[[Adapt]] -deps = ["LinearAlgebra"] -git-tree-sha1 = "af92965fb30777147966f58acb05da51c5616b5f" -uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" -version = "3.3.3" - -[[ArgTools]] -uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" - -[[ArnoldiMethod]] -deps = ["LinearAlgebra", "Random", "StaticArrays"] -git-tree-sha1 = "f87e559f87a45bece9c9ed97458d3afe98b1ebb9" -uuid = "ec485272-7323-5ecc-a04f-4719b315124d" -version = "0.1.0" - -[[Arpack]] -deps = ["Arpack_jll", "Libdl", "LinearAlgebra"] -git-tree-sha1 = "2ff92b71ba1747c5fdd541f8fc87736d82f40ec9" -uuid = "7d9fca2a-8960-54d3-9f78-7d1dccf2cb97" -version = "0.4.0" - -[[Arpack_jll]] -deps = ["Libdl", "OpenBLAS_jll", "Pkg"] -git-tree-sha1 = "e214a9b9bd1b4e1b4f15b22c0994862b66af7ff7" -uuid = "68821587-b530-5797-8361-c406ea357684" -version = "3.5.0+3" - -[[ArrayInterface]] -deps = ["Compat", "IfElse", "LinearAlgebra", "Requires", "SparseArrays", "Static"] -git-tree-sha1 = "1ee88c4c76caa995a885dc2f22a5d548dfbbc0ba" -uuid = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" -version = "3.2.2" - -[[ArrayLayouts]] -deps = ["FillArrays", "LinearAlgebra"] -git-tree-sha1 = "951c3fc1ff93497c88fb1dfa893f4de55d0b38e3" -uuid = "4c555306-a7a7-4459-81d9-ec55ddd5c99a" -version = "0.3.8" - -[[Artifacts]] -uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" - -[[AxisAlgorithms]] -deps = ["LinearAlgebra", "Random", "SparseArrays", "WoodburyMatrices"] -git-tree-sha1 = "66771c8d21c8ff5e3a93379480a2307ac36863f7" -uuid = "13072b0f-2c55-5437-9ae7-d433b7a33950" -version = "1.0.1" - -[[BandedMatrices]] -deps = ["ArrayLayouts", "FillArrays", "LinearAlgebra", "Random", "SparseArrays"] -git-tree-sha1 = "eaf98fa821ab26c5825fa3a054db735755c335da" -uuid = "aae01518-5342-5314-be14-df237901396f" -version = "0.15.15" - -[[Base64]] -uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" - -[[BinDeps]] -deps = ["Libdl", "Pkg", "SHA", "URIParser", "Unicode"] -git-tree-sha1 = "1289b57e8cf019aede076edab0587eb9644175bd" -uuid = "9e28174c-4ba2-5203-b857-d8d62c4213ee" -version = "1.0.2" - -[[BinaryProvider]] -deps = ["Libdl", "Logging", "SHA"] -git-tree-sha1 = "ecdec412a9abc8db54c0efc5548c64dfce072058" -uuid = "b99e7846-7c00-51b0-8f62-c81ae34c0232" -version = "0.5.10" - -[[BitTwiddlingConvenienceFunctions]] -deps = ["Static"] -git-tree-sha1 = "bc1317f71de8dce26ea67fcdf7eccc0d0693b75b" -uuid = "62783981-4cbd-42fc-bca8-16325de8dc4b" -version = "0.1.1" - -[[BoundaryValueDiffEq]] -deps = ["BandedMatrices", "DiffEqBase", "FiniteDiff", "ForwardDiff", "LinearAlgebra", "NLsolve", "Reexport", "SparseArrays"] -git-tree-sha1 = "fe34902ac0c3a35d016617ab7032742865756d7d" -uuid = "764a87c0-6b3e-53db-9096-fe964310641d" -version = "2.7.1" - -[[Bzip2_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "19a35467a82e236ff51bc17a3a44b69ef35185a2" -uuid = "6e34b625-4abd-537c-b88f-471c36dfa7a0" -version = "1.0.8+0" - -[[CEnum]] -git-tree-sha1 = "215a9aa4a1f23fbd05b92769fdd62559488d70e9" -uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82" -version = "0.4.1" - -[[CPUSummary]] -deps = ["Hwloc", "IfElse", "Static"] -git-tree-sha1 = "87b0c9c6ee0124d6c1f4ce8cb035dcaf9f90b803" -uuid = "2a0fbf3d-bb9c-48f3-b0a9-814d99fd7ab9" -version = "0.1.6" - -[[Cairo_jll]] -deps = ["Artifacts", "Bzip2_jll", "Fontconfig_jll", "FreeType2_jll", "Glib_jll", "JLLWrappers", "LZO_jll", "Libdl", "Pixman_jll", "Pkg", "Xorg_libXext_jll", "Xorg_libXrender_jll", "Zlib_jll", "libpng_jll"] -git-tree-sha1 = "4b859a208b2397a7a623a03449e4636bdb17bcf2" -uuid = "83423d85-b0ee-5818-9007-b63ccbeb887a" -version = "1.16.1+1" - -[[Calculus]] -deps = ["LinearAlgebra"] -git-tree-sha1 = "f641eb0a4f00c343bbc32346e1217b86f3ce9dad" -uuid = "49dc2e85-a5d0-5ad3-a950-438e2897f1b9" -version = "0.5.1" - -[[CanonicalTraits]] -deps = ["MLStyle"] -git-tree-sha1 = "f959d0e7164fb0262b02abecb93cf42b9a9f3188" -uuid = "a603d957-0e48-4f86-8fbd-0b7bc66df689" -version = "0.2.4" - -[[ChainRulesCore]] -deps = ["Compat", "LinearAlgebra", "SparseArrays"] -git-tree-sha1 = "926870acb6cbcf029396f2f2de030282b6bc1941" -uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" -version = "1.11.4" - -[[ChangesOfVariables]] -deps = ["ChainRulesCore", "LinearAlgebra", "Test"] -git-tree-sha1 = "bf98fa45a0a4cee295de98d4c1462be26345b9a1" -uuid = "9e997f8a-9a97-42d5-a9f1-ce6bfc15e2c0" -version = "0.1.2" - -[[CloseOpenIntervals]] -deps = ["ArrayInterface", "Static"] -git-tree-sha1 = "7b8f09d58294dc8aa13d91a8544b37c8a1dcbc06" -uuid = "fb6a15b2-703c-40df-9091-08a04967cfa9" -version = "0.1.4" - -[[Cloudy]] -deps = ["Coverage", "DifferentialEquations", "DocStringExtensions", "ForwardDiff", "HCubature", "LinearAlgebra", "Optim", "PyPlot", "SpecialFunctions", "TaylorSeries", "Test"] -git-tree-sha1 = "7cb3ce76f5a63d17eb358b6030f7664931c75857" -uuid = "9e3b23bb-e7cc-4b94-886c-65de2234ba87" -version = "0.1.0" - -[[Clustering]] -deps = ["Distances", "LinearAlgebra", "NearestNeighbors", "Printf", "SparseArrays", "Statistics", "StatsBase"] -git-tree-sha1 = "75479b7df4167267d75294d14b58244695beb2ac" -uuid = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5" -version = "0.14.2" - -[[ColorSchemes]] -deps = ["ColorTypes", "Colors", "FixedPointNumbers", "Random"] -git-tree-sha1 = "a851fec56cb73cfdf43762999ec72eff5b86882a" -uuid = "35d6a980-a343-548e-a6ea-1d62b119f2f4" -version = "3.15.0" - -[[ColorTypes]] -deps = ["FixedPointNumbers", "Random"] -git-tree-sha1 = "024fe24d83e4a5bf5fc80501a314ce0d1aa35597" -uuid = "3da002f7-5984-5a60-b8a6-cbb66c0b333f" -version = "0.11.0" - -[[Colors]] -deps = ["ColorTypes", "FixedPointNumbers", "Reexport"] -git-tree-sha1 = "417b0ed7b8b838aa6ca0a87aadf1bb9eb111ce40" -uuid = "5ae59095-9a9b-59fe-a467-6f913c188581" -version = "0.12.8" - -[[Combinatorics]] -git-tree-sha1 = "08c8b6831dc00bfea825826be0bc8336fc369860" -uuid = "861a8166-3701-5b0c-9a16-15d98fcdc6aa" -version = "1.0.2" - -[[CommonSolve]] -git-tree-sha1 = "68a0743f578349ada8bc911a5cbd5a2ef6ed6d1f" -uuid = "38540f10-b2f7-11e9-35d8-d573e4eb0ff2" -version = "0.2.0" - -[[CommonSubexpressions]] -deps = ["MacroTools", "Test"] -git-tree-sha1 = "7b8a93dba8af7e3b42fecabf646260105ac373f7" -uuid = "bbf7d656-a473-5ed7-a52c-81e309532950" -version = "0.3.0" - -[[Compat]] -deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"] -git-tree-sha1 = "44c37b4636bc54afac5c574d2d02b625349d6582" -uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" -version = "3.41.0" - -[[CompilerSupportLibraries_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" - -[[Conda]] -deps = ["Downloads", "JSON", "VersionParsing"] -git-tree-sha1 = "6cdc8832ba11c7695f494c9d9a1c31e90959ce0f" -uuid = "8f4d0f93-b110-5947-807f-2305c1781a2d" -version = "1.6.0" - -[[ConstructionBase]] -deps = ["LinearAlgebra"] -git-tree-sha1 = "f74e9d5388b8620b4cee35d4c5a618dd4dc547f4" -uuid = "187b0558-2788-49d3-abe0-74a17ed4e7c9" -version = "1.3.0" - -[[Contour]] -deps = ["StaticArrays"] -git-tree-sha1 = "9f02045d934dc030edad45944ea80dbd1f0ebea7" -uuid = "d38c429a-6771-53c6-b99e-75d170b6e991" -version = "0.5.7" - -[[Coverage]] -deps = ["CoverageTools", "HTTP", "JSON", "LibGit2", "MbedTLS"] -git-tree-sha1 = "991f606da16a3ab900d5cad64e90cede27678ab2" -uuid = "a2441757-f6aa-5fb2-8edb-039e3f45d037" -version = "1.4.0" - -[[CoverageTools]] -git-tree-sha1 = "cc5595feb314d3b226ed765a001a40ca451ad687" -uuid = "c36e975a-824b-4404-a568-ef97ca766997" -version = "1.3.0" - -[[DataAPI]] -git-tree-sha1 = "cc70b17275652eb47bc9e5f81635981f13cea5c8" -uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a" -version = "1.9.0" - -[[DataStructures]] -deps = ["Compat", "InteractiveUtils", "OrderedCollections"] -git-tree-sha1 = "3daef5523dd2e769dad2365274f760ff5f282c7d" -uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" -version = "0.18.11" - -[[DataValueInterfaces]] -git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6" -uuid = "e2d170a0-9d28-54be-80f0-106bbe20a464" -version = "1.0.0" - -[[DataValues]] -deps = ["DataValueInterfaces", "Dates"] -git-tree-sha1 = "d88a19299eba280a6d062e135a43f00323ae70bf" -uuid = "e7dc6d0d-1eca-5fa6-8ad6-5aecde8b7ea5" -version = "0.4.13" - -[[Dates]] -deps = ["Printf"] -uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" - -[[DelayDiffEq]] -deps = ["ArrayInterface", "DataStructures", "DiffEqBase", "LinearAlgebra", "Logging", "NonlinearSolve", "OrdinaryDiffEq", "Printf", "RecursiveArrayTools", "Reexport", "UnPack"] -git-tree-sha1 = "1f4022b5b378c627c44212ea6f19ea36d7f9d3e3" -uuid = "bcd4f6db-9728-5f36-b5f7-82caef46ccdb" -version = "5.32.4" - -[[DelimitedFiles]] -deps = ["Mmap"] -uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab" - -[[DiffEqBase]] -deps = ["ArrayInterface", "ChainRulesCore", "DataStructures", "DocStringExtensions", "FastBroadcast", "FunctionWrappers", "IterativeSolvers", "LabelledArrays", "LinearAlgebra", "Logging", "MuladdMacro", "NonlinearSolve", "Parameters", "Printf", "RecursiveArrayTools", "RecursiveFactorization", "Reexport", "Requires", "SciMLBase", "Setfield", "SparseArrays", "StaticArrays", "Statistics", "SuiteSparse", "ZygoteRules"] -git-tree-sha1 = "9d312bb0b7c8ace440a71c64330cf1bea0ade0c8" -uuid = "2b5f629d-d688-5b77-993f-72d75c75574e" -version = "6.70.0" - -[[DiffEqCallbacks]] -deps = ["DataStructures", "DiffEqBase", "ForwardDiff", "LinearAlgebra", "NLsolve", "OrdinaryDiffEq", "Parameters", "RecipesBase", "RecursiveArrayTools", "SciMLBase", "StaticArrays"] -git-tree-sha1 = "4fba4f6bc9b8dfa96019ac4b72ac33ba56c07d87" -uuid = "459566f4-90b8-5000-8ac3-15dfb0a30def" -version = "2.20.0" - -[[DiffEqDiffTools]] -deps = ["LinearAlgebra", "SparseArrays", "StaticArrays"] -git-tree-sha1 = "b992345a39b4d9681342ae795a8dacc100730182" -uuid = "01453d9d-ee7c-5054-8395-0335cb756afa" -version = "0.14.0" - -[[DiffEqFinancial]] -deps = ["DiffEqBase", "DiffEqNoiseProcess", "LinearAlgebra", "Markdown", "RandomNumbers"] -git-tree-sha1 = "db08e0def560f204167c58fd0637298e13f58f73" -uuid = "5a0ffddc-d203-54b0-88ba-2c03c0fc2e67" -version = "2.4.0" - -[[DiffEqJump]] -deps = ["ArrayInterface", "Compat", "DataStructures", "DiffEqBase", "FunctionWrappers", "LightGraphs", "LinearAlgebra", "PoissonRandom", "Random", "RandomNumbers", "RecursiveArrayTools", "Reexport", "StaticArrays", "TreeViews", "UnPack"] -git-tree-sha1 = "9f47b8ae1c6f2b172579ac50397f8314b460fcd9" -uuid = "c894b116-72e5-5b58-be3c-e6d8d4ac2b12" -version = "7.3.1" - -[[DiffEqNoiseProcess]] -deps = ["DiffEqBase", "Distributions", "LinearAlgebra", "PoissonRandom", "Random", "Random123", "RandomNumbers", "RecipesBase", "RecursiveArrayTools", "Requires", "ResettableStacks", "StaticArrays", "Statistics"] -git-tree-sha1 = "edc9464d28048d672dc1781cfb9e575b4f7bbd50" -uuid = "77a26b50-5914-5dd7-bc55-306e6241c503" -version = "5.5.2" - -[[DiffEqPhysics]] -deps = ["DiffEqBase", "DiffEqCallbacks", "ForwardDiff", "LinearAlgebra", "Printf", "Random", "RecipesBase", "RecursiveArrayTools", "Reexport", "StaticArrays"] -git-tree-sha1 = "8f23c6f36f6a6eb2cbd6950e28ec7c4b99d0e4c9" -uuid = "055956cb-9e8b-5191-98cc-73ae4a59e68a" -version = "3.9.0" - -[[DiffResults]] -deps = ["StaticArrays"] -git-tree-sha1 = "c18e98cba888c6c25d1c3b048e4b3380ca956805" -uuid = "163ba53b-c6d8-5494-b064-1a9d43ac40c5" -version = "1.0.3" - -[[DiffRules]] -deps = ["LogExpFunctions", "NaNMath", "Random", "SpecialFunctions"] -git-tree-sha1 = "d529c45b4d95f1ff43640166959360095b1580f8" -uuid = "b552c78f-8df3-52c6-915a-8e097449b14b" -version = "1.7.0" - -[[DifferentialEquations]] -deps = ["BoundaryValueDiffEq", "DelayDiffEq", "DiffEqBase", "DiffEqCallbacks", "DiffEqFinancial", "DiffEqJump", "DiffEqNoiseProcess", "DiffEqPhysics", "DimensionalPlotRecipes", "LinearAlgebra", "MultiScaleArrays", "OrdinaryDiffEq", "ParameterizedFunctions", "Random", "RecursiveArrayTools", "Reexport", "SteadyStateDiffEq", "StochasticDiffEq", "Sundials"] -git-tree-sha1 = "ececc535bd2aa55a520131d955639288704e3851" -uuid = "0c46a032-eb83-5123-abaf-570d42b7fbaa" -version = "6.18.0" - -[[DimensionalPlotRecipes]] -deps = ["LinearAlgebra", "RecipesBase"] -git-tree-sha1 = "af883a26bbe6e3f5f778cb4e1b81578b534c32a6" -uuid = "c619ae07-58cd-5f6d-b883-8f17bd6a98f9" -version = "1.2.0" - -[[Distances]] -deps = ["LinearAlgebra", "SparseArrays", "Statistics", "StatsAPI"] -git-tree-sha1 = "3258d0659f812acde79e8a74b11f17ac06d0ca04" -uuid = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7" -version = "0.10.7" - -[[Distributed]] -deps = ["Random", "Serialization", "Sockets"] -uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" - -[[Distributions]] -deps = ["FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SpecialFunctions", "Statistics", "StatsBase", "StatsFuns"] -git-tree-sha1 = "9c41285c57c6e0d73a21ed4b65f6eec34805f937" -uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" -version = "0.23.8" - -[[DocStringExtensions]] -deps = ["LibGit2"] -git-tree-sha1 = "b19534d1895d702889b219c382a6e18010797f0b" -uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" -version = "0.8.6" - -[[Downloads]] -deps = ["ArgTools", "LibCURL", "NetworkOptions"] -uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" - -[[EarCut_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "3f3a2501fa7236e9b911e0f7a588c657e822bb6d" -uuid = "5ae413db-bbd1-5e63-b57d-d24a61df00f5" -version = "2.2.3+0" - -[[ElasticArrays]] -deps = ["Adapt"] -git-tree-sha1 = "a0fcc1bb3c9ceaf07e1d0529c9806ce94be6adf9" -uuid = "fdbdab4c-e67f-52f5-8c3f-e7b388dad3d4" -version = "1.2.9" - -[[ElasticPDMats]] -deps = ["LinearAlgebra", "MacroTools", "PDMats"] -git-tree-sha1 = "5157c93fe9431a041e4cd84265dfce3d53a52323" -uuid = "2904ab23-551e-5aed-883f-487f97af5226" -version = "0.2.2" - -[[Expat_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "b3bfd02e98aedfa5cf885665493c5598c350cd2f" -uuid = "2e619515-83b5-522b-bb60-26c02a35a201" -version = "2.2.10+0" - -[[ExponentialUtilities]] -deps = ["ArrayInterface", "LinearAlgebra", "Printf", "Requires", "SparseArrays"] -git-tree-sha1 = "1b873816d2cfc8c0fcb1edcb08e67fdf630a70b7" -uuid = "d4d017d3-3776-5f7e-afef-a10c40355c18" -version = "1.10.2" - -[[FFMPEG]] -deps = ["FFMPEG_jll"] -git-tree-sha1 = "b57e3acbe22f8484b4b5ff66a7499717fe1a9cc8" -uuid = "c87230d0-a227-11e9-1b43-d7ebe4e7570a" -version = "0.4.1" - -[[FFMPEG_jll]] -deps = ["Artifacts", "Bzip2_jll", "FreeType2_jll", "FriBidi_jll", "JLLWrappers", "LAME_jll", "Libdl", "Ogg_jll", "OpenSSL_jll", "Opus_jll", "Pkg", "Zlib_jll", "libass_jll", "libfdk_aac_jll", "libvorbis_jll", "x264_jll", "x265_jll"] -git-tree-sha1 = "d8a578692e3077ac998b50c0217dfd67f21d1e5f" -uuid = "b22a6f82-2f65-5046-a5b2-351ab43fb4e5" -version = "4.4.0+0" - -[[FFTW]] -deps = ["AbstractFFTs", "FFTW_jll", "IntelOpenMP_jll", "Libdl", "LinearAlgebra", "MKL_jll", "Reexport"] -git-tree-sha1 = "8fda0934cb99db617171f7296dc361f4d6fa5424" -uuid = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341" -version = "1.3.0" - -[[FFTW_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "c6033cc3892d0ef5bb9cd29b7f2f0331ea5184ea" -uuid = "f5851436-0d7a-5f13-b9de-f02708fd171a" -version = "3.3.10+0" - -[[FastBroadcast]] -deps = ["LinearAlgebra", "Polyester", "Static"] -git-tree-sha1 = "0f8ef5dcb040dbb9edd98b1763ac10882ee1ff03" -uuid = "7034ab61-46d4-4ed7-9d0f-46aef9175898" -version = "0.1.12" - -[[FastClosures]] -git-tree-sha1 = "acebe244d53ee1b461970f8910c235b259e772ef" -uuid = "9aa1b823-49e4-5ca5-8b0f-3971ec8bab6a" -version = "0.3.2" - -[[FastGaussQuadrature]] -deps = ["LinearAlgebra", "SpecialFunctions"] -git-tree-sha1 = "6ea5f7b4aecce0e3a14ca1da03f62f86148c8fa3" -uuid = "442a2c76-b920-505d-bb47-c5924d526838" -version = "0.4.5" - -[[FillArrays]] -deps = ["LinearAlgebra", "Random", "SparseArrays"] -git-tree-sha1 = "4863cbb7910079369e258dee4add9d06ead5063a" -uuid = "1a297f60-69ca-5386-bcde-b61e274b549b" -version = "0.8.14" - -[[FiniteDiff]] -deps = ["ArrayInterface", "LinearAlgebra", "Requires", "SparseArrays", "StaticArrays"] -git-tree-sha1 = "8b3c09b56acaf3c0e581c66638b85c8650ee9dca" -uuid = "6a86dc24-6348-571c-b903-95158fe2bd41" -version = "2.8.1" - -[[FixedPointNumbers]] -deps = ["Statistics"] -git-tree-sha1 = "335bfdceacc84c5cdf16aadc768aa5ddfc5383cc" -uuid = "53c48c17-4a7d-5ca2-90c5-79b7896eea93" -version = "0.8.4" - -[[Fontconfig_jll]] -deps = ["Artifacts", "Bzip2_jll", "Expat_jll", "FreeType2_jll", "JLLWrappers", "Libdl", "Libuuid_jll", "Pkg", "Zlib_jll"] -git-tree-sha1 = "21efd19106a55620a188615da6d3d06cd7f6ee03" -uuid = "a3f928ae-7b40-5064-980b-68af3947d34b" -version = "2.13.93+0" - -[[Formatting]] -deps = ["Printf"] -git-tree-sha1 = "8339d61043228fdd3eb658d86c926cb282ae72a8" -uuid = "59287772-0a20-5a39-b81b-1366585eb4c0" -version = "0.4.2" - -[[ForwardDiff]] -deps = ["CommonSubexpressions", "DiffResults", "DiffRules", "LinearAlgebra", "LogExpFunctions", "NaNMath", "Preferences", "Printf", "Random", "SpecialFunctions", "StaticArrays"] -git-tree-sha1 = "2b72a5624e289ee18256111657663721d59c143e" -uuid = "f6369f11-7733-5829-9624-2563aa707210" -version = "0.10.24" - -[[FreeType2_jll]] -deps = ["Artifacts", "Bzip2_jll", "JLLWrappers", "Libdl", "Pkg", "Zlib_jll"] -git-tree-sha1 = "87eb71354d8ec1a96d4a7636bd57a7347dde3ef9" -uuid = "d7e528f0-a631-5988-bf34-fe36492bcfd7" -version = "2.10.4+0" - -[[FriBidi_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "aa31987c2ba8704e23c6c8ba8a4f769d5d7e4f91" -uuid = "559328eb-81f9-559d-9380-de523a88c83c" -version = "1.0.10+0" - -[[FunctionWrappers]] -git-tree-sha1 = "241552bc2209f0fa068b6415b1942cc0aa486bcc" -uuid = "069b7b12-0de2-55c6-9aab-29f3d0a68a2e" -version = "1.1.2" - -[[Future]] -deps = ["Random"] -uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820" - -[[GR]] -deps = ["Base64", "DelimitedFiles", "HTTP", "JSON", "LinearAlgebra", "Printf", "Random", "Serialization", "Sockets", "Test", "UUIDs"] -git-tree-sha1 = "cd0f34bd097d4d5eb6bbe01778cf8a7ed35f29d9" -uuid = "28b8d3ca-fb5f-59d9-8090-bfdbd6d07a71" -version = "0.52.0" - -[[GaussianProcesses]] -deps = ["Distances", "Distributions", "ElasticArrays", "ElasticPDMats", "FastGaussQuadrature", "ForwardDiff", "LinearAlgebra", "Optim", "PDMats", "Printf", "ProgressMeter", "Random", "RecipesBase", "ScikitLearnBase", "SpecialFunctions", "StaticArrays", "Statistics", "StatsFuns"] -git-tree-sha1 = "9cf8ba8037e332b1be14c71e549143e68c42a22d" -uuid = "891a1506-143c-57d2-908e-e1f8e92e6de9" -version = "0.12.4" - -[[GeneralizedGenerated]] -deps = ["CanonicalTraits", "DataStructures", "JuliaVariables", "MLStyle"] -git-tree-sha1 = "7dd404baf79b28f117917633f0cc1d2976c1fd9f" -uuid = "6b9d7cbe-bcb9-11e9-073f-15a7a543e2eb" -version = "0.2.8" - -[[GeometryBasics]] -deps = ["EarCut_jll", "IterTools", "LinearAlgebra", "StaticArrays", "StructArrays", "Tables"] -git-tree-sha1 = "4136b8a5668341e58398bb472754bff4ba0456ff" -uuid = "5c1252a2-5f33-56bf-86c9-59e7332b4326" -version = "0.3.12" - -[[GeometryTypes]] -deps = ["ColorTypes", "FixedPointNumbers", "LinearAlgebra", "StaticArrays"] -git-tree-sha1 = "d796f7be0383b5416cd403420ce0af083b0f9b28" -uuid = "4d00f742-c7ba-57c2-abde-4428a4b178cb" -version = "0.8.5" - -[[Gettext_jll]] -deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Libiconv_jll", "Pkg", "XML2_jll"] -git-tree-sha1 = "9b02998aba7bf074d14de89f9d37ca24a1a0b046" -uuid = "78b55507-aeef-58d4-861c-77aaff3498b1" -version = "0.21.0+0" - -[[Glib_jll]] -deps = ["Artifacts", "Gettext_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Libiconv_jll", "Libmount_jll", "PCRE_jll", "Pkg", "Zlib_jll"] -git-tree-sha1 = "a32d672ac2c967f3deb8a81d828afc739c838a06" -uuid = "7746bdde-850d-59dc-9ae8-88ece973131d" -version = "2.68.3+2" - -[[Graphite2_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "344bf40dcab1073aca04aa0df4fb092f920e4011" -uuid = "3b182d85-2403-5c21-9c21-1e1f0cc25472" -version = "1.3.14+0" - -[[Grisu]] -git-tree-sha1 = "53bb909d1151e57e2484c3d1b53e19552b887fb2" -uuid = "42e2da0e-8278-4e71-bc24-59509adca0fe" -version = "1.0.2" - -[[HCubature]] -deps = ["Combinatorics", "DataStructures", "LinearAlgebra", "QuadGK", "StaticArrays"] -git-tree-sha1 = "134af3b940d1ca25b19bc9740948157cee7ff8fa" -uuid = "19dc6840-f33b-545b-b366-655c7e3ffd49" -version = "1.5.0" - -[[HTTP]] -deps = ["Base64", "Dates", "IniFile", "MbedTLS", "Sockets"] -git-tree-sha1 = "c7ec02c4c6a039a98a15f955462cd7aea5df4508" -uuid = "cd3eb016-35fb-5094-929b-558a96fad6f3" -version = "0.8.19" - -[[HarfBuzz_jll]] -deps = ["Artifacts", "Cairo_jll", "Fontconfig_jll", "FreeType2_jll", "Glib_jll", "Graphite2_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Pkg"] -git-tree-sha1 = "129acf094d168394e80ee1dc4bc06ec835e510a3" -uuid = "2e76f6c2-a576-52d4-95c1-20adfe4de566" -version = "2.8.1+1" - -[[HostCPUFeatures]] -deps = ["BitTwiddlingConvenienceFunctions", "IfElse", "Libdl", "Static"] -git-tree-sha1 = "8f0dc80088981ab55702b04bba38097a44a1a3a9" -uuid = "3e5b6fbb-0976-4d2c-9146-d79de83f2fb0" -version = "0.1.5" - -[[Hwloc]] -deps = ["Hwloc_jll"] -git-tree-sha1 = "92d99146066c5c6888d5a3abc871e6a214388b91" -uuid = "0e44f5e4-bd66-52a0-8798-143a42290a1d" -version = "2.0.0" - -[[Hwloc_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "3395d4d4aeb3c9d31f5929d32760d8baeee88aaf" -uuid = "e33a78d0-f292-5ffc-b300-72abe9b543c8" -version = "2.5.0+0" - -[[IfElse]] -git-tree-sha1 = "debdd00ffef04665ccbb3e150747a77560e8fad1" -uuid = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173" -version = "0.1.1" - -[[Inflate]] -git-tree-sha1 = "f5fc07d4e706b84f72d54eedcc1c13d92fb0871c" -uuid = "d25df0c9-e2be-5dd7-82c8-3ad0b3e990b9" -version = "0.1.2" - -[[IniFile]] -deps = ["Test"] -git-tree-sha1 = "098e4d2c533924c921f9f9847274f2ad89e018b8" -uuid = "83e8ac13-25f8-5344-8a64-a9f2b223428f" -version = "0.5.0" - -[[IntelOpenMP_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "d979e54b71da82f3a65b62553da4fc3d18c9004c" -uuid = "1d5cc7b8-4909-519e-a0f8-d0f5ad9712d0" -version = "2018.0.3+2" - -[[InteractiveUtils]] -deps = ["Markdown"] -uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" - -[[Interpolations]] -deps = ["AxisAlgorithms", "ChainRulesCore", "LinearAlgebra", "OffsetArrays", "Random", "Ratios", "Requires", "SharedArrays", "SparseArrays", "StaticArrays", "WoodburyMatrices"] -git-tree-sha1 = "b15fc0a95c564ca2e0a7ae12c1f095ca848ceb31" -uuid = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59" -version = "0.13.5" - -[[InverseFunctions]] -deps = ["Test"] -git-tree-sha1 = "a7254c0acd8e62f1ac75ad24d5db43f5f19f3c65" -uuid = "3587e190-3f89-42d0-90ee-14403ec27112" -version = "0.1.2" - -[[IrrationalConstants]] -git-tree-sha1 = "7fd44fd4ff43fc60815f8e764c0f352b83c49151" -uuid = "92d709cd-6900-40b7-9082-c6be49f344b6" -version = "0.1.1" - -[[IterTools]] -git-tree-sha1 = "fa6287a4469f5e048d763df38279ee729fbd44e5" -uuid = "c8e1da08-722c-5040-9ed9-7db0dc04731e" -version = "1.4.0" - -[[IterativeSolvers]] -deps = ["LinearAlgebra", "Printf", "Random", "RecipesBase", "SparseArrays"] -git-tree-sha1 = "1169632f425f79429f245113b775a0e3d121457c" -uuid = "42fd0dbc-a981-5370-80f2-aaf504508153" -version = "0.9.2" - -[[IteratorInterfaceExtensions]] -git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856" -uuid = "82899510-4779-5014-852e-03e436cf321d" -version = "1.0.0" - -[[JLLWrappers]] -deps = ["Preferences"] -git-tree-sha1 = "642a199af8b68253517b80bd3bfd17eb4e84df6e" -uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" -version = "1.3.0" - -[[JSON]] -deps = ["Dates", "Mmap", "Parsers", "Unicode"] -git-tree-sha1 = "8076680b162ada2a031f707ac7b4953e30667a37" -uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" -version = "0.21.2" - -[[JuliaVariables]] -deps = ["MLStyle", "NameResolution"] -git-tree-sha1 = "49fb3cb53362ddadb4415e9b73926d6b40709e70" -uuid = "b14d175d-62b4-44ba-8fb7-3064adc8c3ec" -version = "0.2.4" - -[[KernelDensity]] -deps = ["Distributions", "DocStringExtensions", "FFTW", "Interpolations", "StatsBase"] -git-tree-sha1 = "591e8dc09ad18386189610acafb970032c519707" -uuid = "5ab0869b-81aa-558d-bb23-cbf5423bbe9b" -version = "0.6.3" - -[[LAME_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "f6250b16881adf048549549fba48b1161acdac8c" -uuid = "c1c5ebd0-6772-5130-a774-d5fcae4a789d" -version = "3.100.1+0" - -[[LZO_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "e5b909bcf985c5e2605737d2ce278ed791b89be6" -uuid = "dd4b983a-f0e5-5f8d-a1b7-129d4a5fb1ac" -version = "2.10.1+0" - -[[LaTeXStrings]] -git-tree-sha1 = "f2355693d6778a178ade15952b7ac47a4ff97996" -uuid = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" -version = "1.3.0" - -[[LabelledArrays]] -deps = ["ArrayInterface", "ChainRulesCore", "LinearAlgebra", "MacroTools", "StaticArrays"] -git-tree-sha1 = "3609bbf5feba7b22fb35fe7cb207c8c8d2e2fc5b" -uuid = "2ee39098-c373-598a-b85f-a56591580800" -version = "1.6.7" - -[[Latexify]] -deps = ["Formatting", "InteractiveUtils", "LaTeXStrings", "MacroTools", "Markdown", "Printf", "Requires"] -git-tree-sha1 = "864527aa4d14c893fb8c51d48ef314410c88c7b9" -uuid = "23fbe1c1-3f47-55db-b15f-69d7ec21a316" -version = "0.13.5" - -[[LayoutPointers]] -deps = ["ArrayInterface", "LinearAlgebra", "ManualMemory", "SIMDTypes", "Static"] -git-tree-sha1 = "83b56449c39342a47f3fcdb3bc782bd6d66e1d97" -uuid = "10f19ff3-798f-405d-979b-55457f8fc047" -version = "0.1.4" - -[[LibCURL]] -deps = ["LibCURL_jll", "MozillaCACerts_jll"] -uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" - -[[LibCURL_jll]] -deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] -uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" - -[[LibGit2]] -deps = ["Base64", "NetworkOptions", "Printf", "SHA"] -uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" - -[[LibSSH2_jll]] -deps = ["Artifacts", "Libdl", "MbedTLS_jll"] -uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" - -[[Libdl]] -uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" - -[[Libffi_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "0b4a5d71f3e5200a7dff793393e09dfc2d874290" -uuid = "e9f186c6-92d2-5b65-8a66-fee21dc1b490" -version = "3.2.2+1" - -[[Libgcrypt_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Libgpg_error_jll", "Pkg"] -git-tree-sha1 = "64613c82a59c120435c067c2b809fc61cf5166ae" -uuid = "d4300ac3-e22c-5743-9152-c294e39db1e4" -version = "1.8.7+0" - -[[Libgpg_error_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "c333716e46366857753e273ce6a69ee0945a6db9" -uuid = "7add5ba3-2f88-524e-9cd5-f83b8a55f7b8" -version = "1.42.0+0" - -[[Libiconv_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "42b62845d70a619f063a7da093d995ec8e15e778" -uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531" -version = "1.16.1+1" - -[[Libmount_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "9c30530bf0effd46e15e0fdcf2b8636e78cbbd73" -uuid = "4b2f31a3-9ecc-558c-b454-b3730dcb73e9" -version = "2.35.0+0" - -[[Libuuid_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "7f3efec06033682db852f8b3bc3c1d2b0a0ab066" -uuid = "38a345b3-de98-5d2b-a5d3-14cd9215e700" -version = "2.36.0+0" - -[[LightGraphs]] -deps = ["ArnoldiMethod", "DataStructures", "Distributed", "Inflate", "LinearAlgebra", "Random", "SharedArrays", "SimpleTraits", "SparseArrays", "Statistics"] -git-tree-sha1 = "432428df5f360964040ed60418dd5601ecd240b6" -uuid = "093fc24a-ae57-5d10-9952-331d41423f4d" -version = "1.3.5" - -[[LineSearches]] -deps = ["LinearAlgebra", "NLSolversBase", "NaNMath", "Parameters", "Printf"] -git-tree-sha1 = "f27132e551e959b3667d8c93eae90973225032dd" -uuid = "d3d80556-e9d4-5f37-9878-2ab0fcc64255" -version = "7.1.1" - -[[LinearAlgebra]] -deps = ["Libdl"] -uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" - -[[LogExpFunctions]] -deps = ["ChainRulesCore", "ChangesOfVariables", "DocStringExtensions", "InverseFunctions", "IrrationalConstants", "LinearAlgebra"] -git-tree-sha1 = "e5718a00af0ab9756305a0392832c8952c7426c1" -uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688" -version = "0.3.6" - -[[Logging]] -uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" - -[[LoopVectorization]] -deps = ["ArrayInterface", "CPUSummary", "CloseOpenIntervals", "DocStringExtensions", "HostCPUFeatures", "IfElse", "LayoutPointers", "LinearAlgebra", "OffsetArrays", "PolyesterWeave", "Requires", "SIMDDualNumbers", "SLEEFPirates", "Static", "ThreadingUtilities", "UnPack", "VectorizationBase"] -git-tree-sha1 = "9e10579c154f785b911d9ceb96c33fcc1a661171" -uuid = "bdcacae8-1622-11e9-2a5c-532679323890" -version = "0.12.99" - -[[MKL_jll]] -deps = ["IntelOpenMP_jll", "Libdl", "Pkg"] -git-tree-sha1 = "eb540ede3aabb8284cb482aa41d00d6ca850b1f8" -uuid = "856f044c-d86e-5d09-b602-aeab76dc8ba7" -version = "2020.2.254+0" - -[[MLStyle]] -git-tree-sha1 = "594e189325f66e23a8818e5beb11c43bb0141bcd" -uuid = "d8e11817-5142-5d16-987a-aa16d5891078" -version = "0.4.10" - -[[MacroTools]] -deps = ["Markdown", "Random"] -git-tree-sha1 = "3d3e902b31198a27340d0bf00d6ac452866021cf" -uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" -version = "0.5.9" - -[[ManualMemory]] -git-tree-sha1 = "9cb207b18148b2199db259adfa923b45593fe08e" -uuid = "d125e4d3-2237-4719-b19c-fa641b8a4667" -version = "0.1.6" - -[[Markdown]] -deps = ["Base64"] -uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" - -[[MbedTLS]] -deps = ["Dates", "MbedTLS_jll", "Random", "Sockets"] -git-tree-sha1 = "1c38e51c3d08ef2278062ebceade0e46cefc96fe" -uuid = "739be429-bea8-5141-9913-cc70e7f3736d" -version = "1.0.3" - -[[MbedTLS_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" - -[[Measures]] -git-tree-sha1 = "e498ddeee6f9fdb4551ce855a46f54dbd900245f" -uuid = "442fdcdd-2543-5da2-b0f3-8c86c306513e" -version = "0.3.1" - -[[Missings]] -deps = ["DataAPI"] -git-tree-sha1 = "bf210ce90b6c9eed32d25dbcae1ebc565df2687f" -uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" -version = "1.0.2" - -[[Mmap]] -uuid = "a63ad114-7e13-5084-954f-fe012c677804" - -[[ModelingToolkit]] -deps = ["DiffEqBase", "DiffRules", "Distributed", "DocStringExtensions", "GeneralizedGenerated", "Latexify", "LinearAlgebra", "MacroTools", "NaNMath", "SafeTestsets", "SparseArrays", "SpecialFunctions", "StaticArrays", "TreeViews", "Unitful"] -git-tree-sha1 = "2da0c371ce983b02afd6a957ba3c6b0aa977a3b3" -uuid = "961ee093-0014-501f-94e3-6117800e7a78" -version = "2.0.0" - -[[MozillaCACerts_jll]] -uuid = "14a3606d-f60d-562e-9121-12d972cd8159" - -[[MuladdMacro]] -git-tree-sha1 = "c6190f9a7fc5d9d5915ab29f2134421b12d24a68" -uuid = "46d2c3a1-f734-5fdb-9937-b9b9aeba4221" -version = "0.2.2" - -[[MultiScaleArrays]] -deps = ["DiffEqBase", "FiniteDiff", "ForwardDiff", "LinearAlgebra", "OrdinaryDiffEq", "Random", "RecursiveArrayTools", "SparseDiffTools", "Statistics", "StochasticDiffEq", "TreeViews"] -git-tree-sha1 = "03647373a31d9bde3382a31cca11f528b4d6bd5b" -uuid = "f9640e96-87f6-5992-9c3b-0743c6a49ffa" -version = "1.9.0" - -[[MultivariateStats]] -deps = ["Arpack", "LinearAlgebra", "SparseArrays", "Statistics", "StatsBase"] -git-tree-sha1 = "8d958ff1854b166003238fe191ec34b9d592860a" -uuid = "6f286f6a-111f-5878-ab1e-185364afe411" -version = "0.8.0" - -[[NLSolversBase]] -deps = ["Calculus", "DiffEqDiffTools", "DiffResults", "Distributed", "ForwardDiff"] -git-tree-sha1 = "f1b8ed89fa332f410cfc7c937682eb4d0b361521" -uuid = "d41bc354-129a-5804-8e4c-c37616107c6c" -version = "7.5.0" - -[[NLsolve]] -deps = ["Distances", "LineSearches", "LinearAlgebra", "NLSolversBase", "Printf", "Reexport"] -git-tree-sha1 = "019f12e9a1a7880459d0173c182e6a99365d7ac1" -uuid = "2774e3e8-f4cf-5e23-947b-6d7e65073b56" -version = "4.5.1" - -[[NaNMath]] -git-tree-sha1 = "f755f36b19a5116bb580de457cda0c140153f283" -uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3" -version = "0.3.6" - -[[NameResolution]] -deps = ["PrettyPrint"] -git-tree-sha1 = "1a0fa0e9613f46c9b8c11eee38ebb4f590013c5e" -uuid = "71a1bf82-56d0-4bbc-8a3c-48b961074391" -version = "0.1.5" - -[[NearestNeighbors]] -deps = ["Distances", "StaticArrays"] -git-tree-sha1 = "16baacfdc8758bc374882566c9187e785e85c2f0" -uuid = "b8a86587-4115-5ab1-83bc-aa920d37bbce" -version = "0.4.9" - -[[NetworkOptions]] -uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" - -[[NonlinearSolve]] -deps = ["ArrayInterface", "FiniteDiff", "ForwardDiff", "IterativeSolvers", "LinearAlgebra", "RecursiveArrayTools", "RecursiveFactorization", "Reexport", "SciMLBase", "Setfield", "StaticArrays", "UnPack"] -git-tree-sha1 = "8dc3be3e9edf976a3e79363b3bd2ad776a627c31" -uuid = "8913a72c-1f9b-4ce2-8d82-65094dcecaec" -version = "0.3.12" - -[[Observables]] -git-tree-sha1 = "fe29afdef3d0c4a8286128d4e45cc50621b1e43d" -uuid = "510215fc-4207-5dde-b226-833fc4488ee2" -version = "0.4.0" - -[[OffsetArrays]] -deps = ["Adapt"] -git-tree-sha1 = "043017e0bdeff61cfbb7afeb558ab29536bbb5ed" -uuid = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" -version = "1.10.8" - -[[Ogg_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "887579a3eb005446d514ab7aeac5d1d027658b8f" -uuid = "e7412a2a-1a6e-54c0-be00-318e2571c051" -version = "1.3.5+1" - -[[OpenBLAS_jll]] -deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"] -uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" - -[[OpenSSL_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "15003dcb7d8db3c6c857fda14891a539a8f2705a" -uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95" -version = "1.1.10+0" - -[[Optim]] -deps = ["Compat", "FillArrays", "LineSearches", "LinearAlgebra", "NLSolversBase", "NaNMath", "Parameters", "PositiveFactorizations", "Printf", "SparseArrays", "StatsBase"] -git-tree-sha1 = "62054d469d3631960e3f472ceb8624be5b11c34d" -uuid = "429524aa-4258-5aef-a3af-852621145aeb" -version = "0.20.6" - -[[Opus_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "51a08fb14ec28da2ec7a927c4337e4332c2a4720" -uuid = "91d4177d-7536-5919-b921-800302f37372" -version = "1.3.2+0" - -[[OrderedCollections]] -git-tree-sha1 = "85f8e6578bf1f9ee0d11e7bb1b1456435479d47c" -uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" -version = "1.4.1" - -[[OrdinaryDiffEq]] -deps = ["Adapt", "ArrayInterface", "DataStructures", "DiffEqBase", "DocStringExtensions", "ExponentialUtilities", "FastClosures", "FiniteDiff", "ForwardDiff", "LinearAlgebra", "Logging", "MacroTools", "MuladdMacro", "NLsolve", "RecursiveArrayTools", "Reexport", "SparseArrays", "SparseDiffTools", "StaticArrays", "UnPack"] -git-tree-sha1 = "e9f977a3119e7bfb3bfaeb3daa354f38e9baf76f" -uuid = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed" -version = "5.55.1" - -[[PCRE_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "b2a7af664e098055a7529ad1a900ded962bca488" -uuid = "2f80f16e-611a-54ab-bc61-aa92de5b98fc" -version = "8.44.0+0" - -[[PDMats]] -deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse", "Test"] -git-tree-sha1 = "95a4038d1011dfdbde7cecd2ad0ac411e53ab1bc" -uuid = "90014a1f-27ba-587c-ab20-58faa44d9150" -version = "0.10.1" - -[[ParameterizedFunctions]] -deps = ["DataStructures", "DiffEqBase", "Latexify", "LinearAlgebra", "ModelingToolkit", "Reexport"] -git-tree-sha1 = "291279c720121d7f5c6a145726bb94da79e9b42c" -uuid = "65888b18-ceab-5e60-b2b9-181511a3b968" -version = "5.6.0" - -[[Parameters]] -deps = ["OrderedCollections", "UnPack"] -git-tree-sha1 = "34c0e9ad262e5f7fc75b10a9952ca7692cfc5fbe" -uuid = "d96e819e-fc66-5662-9728-84c9c7592b0a" -version = "0.12.3" - -[[Parsers]] -deps = ["Dates"] -git-tree-sha1 = "d7fa6237da8004be601e19bd6666083056649918" -uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" -version = "2.1.3" - -[[Pixman_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "b4f5d02549a10e20780a24fce72bea96b6329e29" -uuid = "30392449-352a-5448-841d-b1acce4e97dc" -version = "0.40.1+0" - -[[Pkg]] -deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] -uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" - -[[PlotThemes]] -deps = ["PlotUtils", "Requires", "Statistics"] -git-tree-sha1 = "a3a964ce9dc7898193536002a6dd892b1b5a6f1d" -uuid = "ccf2f8ad-2431-5c83-bf29-c5338b663b6a" -version = "2.0.1" - -[[PlotUtils]] -deps = ["ColorSchemes", "Colors", "Dates", "Printf", "Random", "Reexport", "Statistics"] -git-tree-sha1 = "68604313ed59f0408313228ba09e79252e4b2da8" -uuid = "995b91a9-d308-5afd-9ec6-746e21dbc043" -version = "1.1.2" - -[[Plots]] -deps = ["Base64", "Contour", "Dates", "FFMPEG", "FixedPointNumbers", "GR", "GeometryBasics", "GeometryTypes", "JSON", "LinearAlgebra", "Measures", "NaNMath", "PlotThemes", "PlotUtils", "Printf", "REPL", "Random", "RecipesBase", "RecipesPipeline", "Reexport", "Requires", "Showoff", "SparseArrays", "Statistics", "StatsBase", "UUIDs"] -git-tree-sha1 = "c499e18bbeab024f6de0e0ae285554d153eeb5c5" -uuid = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" -version = "1.6.8" - -[[PoissonRandom]] -deps = ["Random", "Statistics", "Test"] -git-tree-sha1 = "44d018211a56626288b5d3f8c6497d28c26dc850" -uuid = "e409e4f3-bfea-5376-8464-e040bb5c01ab" -version = "0.4.0" - -[[Polyester]] -deps = ["ArrayInterface", "BitTwiddlingConvenienceFunctions", "CPUSummary", "IfElse", "ManualMemory", "PolyesterWeave", "Requires", "Static", "StrideArraysCore", "ThreadingUtilities"] -git-tree-sha1 = "3c44fc250c04352839cea8d5b9d94bcb7b3de420" -uuid = "f517fe37-dbe3-4b94-8317-1923a5111588" -version = "0.6.2" - -[[PolyesterWeave]] -deps = ["BitTwiddlingConvenienceFunctions", "CPUSummary", "IfElse", "Static", "ThreadingUtilities"] -git-tree-sha1 = "a3ff99bf561183ee20386aec98ab8f4a12dc724a" -uuid = "1d0040c9-8b98-4ee7-8388-3f51789ca0ad" -version = "0.1.2" - -[[PositiveFactorizations]] -deps = ["LinearAlgebra"] -git-tree-sha1 = "17275485f373e6673f7e7f97051f703ed5b15b20" -uuid = "85a6dd25-e78a-55b7-8502-1745935b8125" -version = "0.2.4" - -[[Preferences]] -deps = ["TOML"] -git-tree-sha1 = "2cf929d64681236a2e074ffafb8d568733d2e6af" -uuid = "21216c6a-2e73-6563-6e65-726566657250" -version = "1.2.3" - -[[PrettyPrint]] -git-tree-sha1 = "632eb4abab3449ab30c5e1afaa874f0b98b586e4" -uuid = "8162dcfd-2161-5ef2-ae6c-7681170c5f98" -version = "0.2.0" - -[[Printf]] -deps = ["Unicode"] -uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" - -[[ProgressMeter]] -deps = ["Distributed", "Printf"] -git-tree-sha1 = "afadeba63d90ff223a6a48d2009434ecee2ec9e8" -uuid = "92933f4c-e287-5a05-a399-4b506db050ca" -version = "1.7.1" - -[[PyCall]] -deps = ["Conda", "Dates", "Libdl", "LinearAlgebra", "MacroTools", "Serialization", "VersionParsing"] -git-tree-sha1 = "71fd4022ecd0c6d20180e23ff1b3e05a143959c2" -uuid = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" -version = "1.93.0" - -[[PyPlot]] -deps = ["Colors", "LaTeXStrings", "PyCall", "Sockets", "Test", "VersionParsing"] -git-tree-sha1 = "14c1b795b9d764e1784713941e787e1384268103" -uuid = "d330b81b-6aea-500a-939a-2ce795aea3ee" -version = "2.10.0" - -[[QuadGK]] -deps = ["DataStructures", "LinearAlgebra"] -git-tree-sha1 = "78aadffb3efd2155af139781b8a8df1ef279ea39" -uuid = "1fd47b50-473d-5c70-9696-f719f8f3bcdc" -version = "2.4.2" - -[[REPL]] -deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] -uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" - -[[Random]] -deps = ["Serialization"] -uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" - -[[Random123]] -deps = ["Libdl", "Random", "RandomNumbers"] -git-tree-sha1 = "0e8b146557ad1c6deb1367655e052276690e71a3" -uuid = "74087812-796a-5b5d-8853-05524746bad3" -version = "1.4.2" - -[[RandomNumbers]] -deps = ["Random", "Requires"] -git-tree-sha1 = "043da614cc7e95c703498a491e2c21f58a2b8111" -uuid = "e6cf234a-135c-5ec9-84dd-332b85af5143" -version = "1.5.3" - -[[Ratios]] -deps = ["Requires"] -git-tree-sha1 = "01d341f502250e81f6fec0afe662aa861392a3aa" -uuid = "c84ed2f1-dad5-54f0-aa8e-dbefe2724439" -version = "0.4.2" - -[[RecipesBase]] -git-tree-sha1 = "6bf3f380ff52ce0832ddd3a2a7b9538ed1bcca7d" -uuid = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" -version = "1.2.1" - -[[RecipesPipeline]] -deps = ["Dates", "NaNMath", "PlotUtils", "RecipesBase"] -git-tree-sha1 = "4a325c9bcc2d8e62a8f975b9666d0251d53b63b9" -uuid = "01d81517-befc-4cb6-b9ec-a95719d0359c" -version = "0.1.13" - -[[RecursiveArrayTools]] -deps = ["ArrayInterface", "ChainRulesCore", "DocStringExtensions", "LinearAlgebra", "RecipesBase", "Requires", "StaticArrays", "Statistics", "ZygoteRules"] -git-tree-sha1 = "00bede2eb099dcc1ddc3f9ec02180c326b420ee2" -uuid = "731186ca-8d62-57ce-b412-fbd966d074cd" -version = "2.17.2" - -[[RecursiveFactorization]] -deps = ["LinearAlgebra", "LoopVectorization"] -git-tree-sha1 = "2e1a88c083ebe8ba69bc0b0084d4b4ba4aa35ae0" -uuid = "f2c3362d-daeb-58d1-803e-2bc74f2840b4" -version = "0.1.13" - -[[Reexport]] -deps = ["Pkg"] -git-tree-sha1 = "7b1d07f411bc8ddb7977ec7f377b97b158514fe0" -uuid = "189a3867-3050-52da-a836-e630ba90ab69" -version = "0.2.0" - -[[Requires]] -deps = ["UUIDs"] -git-tree-sha1 = "8f82019e525f4d5c669692772a6f4b0a58b06a6a" -uuid = "ae029012-a4dd-5104-9daa-d747884805df" -version = "1.2.0" - -[[ResettableStacks]] -deps = ["StaticArrays"] -git-tree-sha1 = "256eeeec186fa7f26f2801732774ccf277f05db9" -uuid = "ae5879a3-cd67-5da8-be7f-38c6eb64a37b" -version = "1.1.1" - -[[Rmath]] -deps = ["Random", "Rmath_jll"] -git-tree-sha1 = "bf3188feca147ce108c76ad82c2792c57abe7b1f" -uuid = "79098fc4-a85e-5d69-aa6a-4863f24498fa" -version = "0.7.0" - -[[Rmath_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "68db32dff12bb6127bac73c209881191bf0efbb7" -uuid = "f50d1b31-88e8-58de-be2c-1cc44531875f" -version = "0.3.0+0" - -[[SHA]] -uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" - -[[SIMDDualNumbers]] -deps = ["ForwardDiff", "IfElse", "SLEEFPirates", "VectorizationBase"] -git-tree-sha1 = "62c2da6eb66de8bb88081d20528647140d4daa0e" -uuid = "3cdde19b-5bb0-4aaf-8931-af3e248e098b" -version = "0.1.0" - -[[SIMDTypes]] -git-tree-sha1 = "330289636fb8107c5f32088d2741e9fd7a061a5c" -uuid = "94e857df-77ce-4151-89e5-788b33177be4" -version = "0.1.0" - -[[SLEEFPirates]] -deps = ["IfElse", "Static", "VectorizationBase"] -git-tree-sha1 = "1410aad1c6b35862573c01b96cd1f6dbe3979994" -uuid = "476501e8-09a2-5ece-8869-fb82de89a1fa" -version = "0.6.28" - -[[SafeTestsets]] -deps = ["Test"] -git-tree-sha1 = "36ebc5622c82eb9324005cc75e7e2cc51181d181" -uuid = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f" -version = "0.0.1" - -[[SciMLBase]] -deps = ["ArrayInterface", "CommonSolve", "ConstructionBase", "Distributed", "DocStringExtensions", "IteratorInterfaceExtensions", "LinearAlgebra", "Logging", "RecipesBase", "RecursiveArrayTools", "StaticArrays", "Statistics", "Tables", "TreeViews"] -git-tree-sha1 = "c61870a745fb9a468649d9efdd05c18d30e6a6e2" -uuid = "0bca4576-84f4-4d90-8ffe-ffa030f20462" -version = "1.24.0" - -[[ScikitLearnBase]] -deps = ["LinearAlgebra", "Random", "Statistics"] -git-tree-sha1 = "7877e55c1523a4b336b433da39c8e8c08d2f221f" -uuid = "6e75b9c4-186b-50bd-896f-2d2496a4843e" -version = "0.5.0" - -[[SentinelArrays]] -deps = ["Dates", "Random"] -git-tree-sha1 = "244586bc07462d22aed0113af9c731f2a518c93e" -uuid = "91c51154-3ec4-41a3-a24f-3f23e20d615c" -version = "1.3.10" - -[[Serialization]] -uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" - -[[Setfield]] -deps = ["ConstructionBase", "Future", "MacroTools", "Requires"] -git-tree-sha1 = "fca29e68c5062722b5b4435594c3d1ba557072a3" -uuid = "efcf1570-3423-57d1-acb7-fd33fddbac46" -version = "0.7.1" - -[[SharedArrays]] -deps = ["Distributed", "Mmap", "Random", "Serialization"] -uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383" - -[[Showoff]] -deps = ["Dates", "Grisu"] -git-tree-sha1 = "ee010d8f103468309b8afac4abb9be2e18ff1182" -uuid = "992d4aef-0814-514b-bc4d-f2e9a6c4116f" -version = "0.3.2" - -[[SimpleTraits]] -deps = ["InteractiveUtils", "MacroTools"] -git-tree-sha1 = "5d7e3f4e11935503d3ecaf7186eac40602e7d231" -uuid = "699a6c99-e7fa-54fc-8d76-47d257e15c1d" -version = "0.9.4" - -[[Sockets]] -uuid = "6462fe0b-24de-5631-8697-dd941f90decc" - -[[SortingAlgorithms]] -deps = ["DataStructures"] -git-tree-sha1 = "b3363d7460f7d098ca0912c69b082f75625d7508" -uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c" -version = "1.0.1" - -[[SparseArrays]] -deps = ["LinearAlgebra", "Random"] -uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" - -[[SparseDiffTools]] -deps = ["Adapt", "ArrayInterface", "Compat", "DataStructures", "FiniteDiff", "ForwardDiff", "LightGraphs", "LinearAlgebra", "Requires", "SparseArrays", "VertexSafeGraphs"] -git-tree-sha1 = "be20320958ccd298c98312137a5ebe75a654ebc8" -uuid = "47a9eef4-7e08-11e9-0b38-333d64bd3804" -version = "1.13.2" - -[[SpecialFunctions]] -deps = ["BinDeps", "BinaryProvider", "Libdl"] -git-tree-sha1 = "3bdd374b6fd78faf0119b8c5d538788dbf910c6e" -uuid = "276daf66-3868-5448-9aa4-cd146d93841b" -version = "0.8.0" - -[[Static]] -deps = ["IfElse"] -git-tree-sha1 = "7f5a513baec6f122401abfc8e9c074fdac54f6c1" -uuid = "aedffcd0-7271-4cad-89d0-dc628f76c6d3" -version = "0.4.1" - -[[StaticArrays]] -deps = ["LinearAlgebra", "Random", "Statistics"] -git-tree-sha1 = "da4cf579416c81994afd6322365d00916c79b8ae" -uuid = "90137ffa-7385-5640-81b9-e52037218182" -version = "0.12.5" - -[[Statistics]] -deps = ["LinearAlgebra", "SparseArrays"] -uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" - -[[StatsAPI]] -git-tree-sha1 = "d88665adc9bcf45903013af0982e2fd05ae3d0a6" -uuid = "82ae8749-77ed-4fe6-ae5f-f523153014b0" -version = "1.2.0" - -[[StatsBase]] -deps = ["DataAPI", "DataStructures", "LinearAlgebra", "LogExpFunctions", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics", "StatsAPI"] -git-tree-sha1 = "51383f2d367eb3b444c961d485c565e4c0cf4ba0" -uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" -version = "0.33.14" - -[[StatsFuns]] -deps = ["Rmath", "SpecialFunctions"] -git-tree-sha1 = "ced55fd4bae008a8ea12508314e725df61f0ba45" -uuid = "4c63d2b9-4356-54db-8cca-17b64c39e42c" -version = "0.9.7" - -[[StatsPlots]] -deps = ["Clustering", "DataStructures", "DataValues", "Distributions", "Interpolations", "KernelDensity", "LinearAlgebra", "MultivariateStats", "Observables", "Plots", "RecipesBase", "RecipesPipeline", "Reexport", "StatsBase", "TableOperations", "Tables", "Widgets"] -git-tree-sha1 = "e1e5ed9669d5521d4bbdd4fab9f0945a0ffceba2" -uuid = "f3b207a7-027a-5e70-b257-86293d7955fd" -version = "0.14.30" - -[[SteadyStateDiffEq]] -deps = ["DiffEqBase", "DiffEqCallbacks", "LinearAlgebra", "NLsolve", "Reexport", "SciMLBase"] -git-tree-sha1 = "3e057e1f9f12d18cac32011aed9e61eef6c1c0ce" -uuid = "9672c7b4-1e72-59bd-8a11-6ac3964bc41f" -version = "1.6.6" - -[[StochasticDiffEq]] -deps = ["Adapt", "ArrayInterface", "DataStructures", "DiffEqBase", "DiffEqJump", "DiffEqNoiseProcess", "DocStringExtensions", "FillArrays", "FiniteDiff", "ForwardDiff", "LinearAlgebra", "Logging", "MuladdMacro", "NLsolve", "OrdinaryDiffEq", "Random", "RandomNumbers", "RecursiveArrayTools", "Reexport", "SparseArrays", "SparseDiffTools", "StaticArrays", "UnPack"] -git-tree-sha1 = "45b59a5bd9665fe678c0372d7026321df28769d8" -uuid = "789caeaf-c7a9-5a7d-9973-96adeb23e2a0" -version = "6.40.0" - -[[StrideArraysCore]] -deps = ["ArrayInterface", "CloseOpenIntervals", "IfElse", "LayoutPointers", "ManualMemory", "Requires", "SIMDTypes", "Static", "ThreadingUtilities"] -git-tree-sha1 = "12cf3253ebd8e2a3214ae171fbfe51e7e8d8ad28" -uuid = "7792a7ef-975c-4747-a70f-980b88e8d1da" -version = "0.2.9" - -[[StructArrays]] -deps = ["Adapt", "DataAPI", "Tables"] -git-tree-sha1 = "44b3afd37b17422a62aea25f04c1f7e09ce6b07f" -uuid = "09ab397b-f2b6-538f-b94a-2f83cf4a842a" -version = "0.5.1" - -[[SuiteSparse]] -deps = ["Libdl", "LinearAlgebra", "Serialization", "SparseArrays"] -uuid = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9" - -[[SuiteSparse_jll]] -deps = ["Artifacts", "Libdl", "OpenBLAS_jll"] -uuid = "bea87d4a-7f5b-5778-9afe-8cc45184846c" - -[[Sundials]] -deps = ["CEnum", "DataStructures", "DiffEqBase", "Libdl", "LinearAlgebra", "Logging", "Reexport", "SparseArrays", "Sundials_jll"] -git-tree-sha1 = "a8bbdf8e19058d1cf69c29ebb4514e9bcaa2305f" -uuid = "c3572dad-4567-51f8-b174-8c6c989267f4" -version = "4.9.0" - -[[Sundials_jll]] -deps = ["CompilerSupportLibraries_jll", "Libdl", "OpenBLAS_jll", "Pkg", "SuiteSparse_jll"] -git-tree-sha1 = "013ff4504fc1d475aa80c63b455b6b3a58767db2" -uuid = "fb77eaff-e24c-56d4-86b1-d163f2edb164" -version = "5.2.0+1" - -[[TOML]] -deps = ["Dates"] -uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" - -[[TableOperations]] -deps = ["SentinelArrays", "Tables", "Test"] -git-tree-sha1 = "e383c87cf2a1dc41fa30c093b2a19877c83e1bc1" -uuid = "ab02a1b2-a7df-11e8-156e-fb1833f50b87" -version = "1.2.0" - -[[TableTraits]] -deps = ["IteratorInterfaceExtensions"] -git-tree-sha1 = "c06b2f539df1c6efa794486abfb6ed2022561a39" -uuid = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c" -version = "1.0.1" - -[[Tables]] -deps = ["DataAPI", "DataValueInterfaces", "IteratorInterfaceExtensions", "LinearAlgebra", "TableTraits", "Test"] -git-tree-sha1 = "bb1064c9a84c52e277f1096cf41434b675cd368b" -uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" -version = "1.6.1" - -[[Tar]] -deps = ["ArgTools", "SHA"] -uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" - -[[TaylorSeries]] -deps = ["InteractiveUtils", "LinearAlgebra", "Markdown", "Requires", "SparseArrays"] -git-tree-sha1 = "66f4d1993bae49eeba21a1634b5f65782585a42c" -uuid = "6aa5eb33-94cf-58f4-a9d0-e4b2c4fc25ea" -version = "0.10.13" - -[[Test]] -deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] -uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" - -[[ThreadingUtilities]] -deps = ["ManualMemory"] -git-tree-sha1 = "884539ba8c4584a3a8173cb4ee7b61049955b79c" -uuid = "8290d209-cae3-49c0-8002-c8c24d57dab5" -version = "0.4.7" - -[[TreeViews]] -deps = ["Test"] -git-tree-sha1 = "8d0d7a3fe2f30d6a7f833a5f19f7c7a5b396eae6" -uuid = "a2a6695c-b41b-5b7d-aed9-dbfdeacea5d7" -version = "0.3.0" - -[[URIParser]] -deps = ["Unicode"] -git-tree-sha1 = "53a9f49546b8d2dd2e688d216421d050c9a31d0d" -uuid = "30578b45-9adc-5946-b283-645ec420af67" -version = "0.4.1" - -[[UUIDs]] -deps = ["Random", "SHA"] -uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" - -[[UnPack]] -git-tree-sha1 = "387c1f73762231e86e0c9c5443ce3b4a0a9a0c2b" -uuid = "3a884ed6-31ef-47d7-9d2a-63182c4928ed" -version = "1.0.2" - -[[Unicode]] -uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" - -[[Unitful]] -deps = ["ConstructionBase", "Dates", "LinearAlgebra", "Random"] -git-tree-sha1 = "b95e0b8a8d1b6a6c3e0b3ca393a7a285af47c264" -uuid = "1986cc42-f94f-5a68-af5c-568840ba703d" -version = "1.10.1" - -[[VectorizationBase]] -deps = ["ArrayInterface", "CPUSummary", "HostCPUFeatures", "Hwloc", "IfElse", "LayoutPointers", "Libdl", "LinearAlgebra", "SIMDTypes", "Static"] -git-tree-sha1 = "6e261bff5c9f2537776165dea3067df9de4440cf" -uuid = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f" -version = "0.21.23" - -[[VersionParsing]] -git-tree-sha1 = "e575cf85535c7c3292b4d89d89cc29e8c3098e47" -uuid = "81def892-9a0e-5fdd-b105-ffc91e053289" -version = "1.2.1" - -[[VertexSafeGraphs]] -deps = ["LightGraphs"] -git-tree-sha1 = "b9b450c99a3ca1cc1c6836f560d8d887bcbe356e" -uuid = "19fa3120-7c27-5ec5-8db8-b0b0aa330d6f" -version = "0.1.2" - -[[Widgets]] -deps = ["Colors", "Dates", "Observables", "OrderedCollections"] -git-tree-sha1 = "80661f59d28714632132c73779f8becc19a113f2" -uuid = "cc8bc4a8-27d6-5769-a93b-9d913e69aa62" -version = "0.6.4" - -[[WoodburyMatrices]] -deps = ["LinearAlgebra", "SparseArrays"] -git-tree-sha1 = "de67fa59e33ad156a590055375a30b23c40299d3" -uuid = "efce3f68-66dc-5838-9240-27a6d6f5f9b6" -version = "0.5.5" - -[[XML2_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Libiconv_jll", "Pkg", "Zlib_jll"] -git-tree-sha1 = "1acf5bdf07aa0907e0a37d3718bb88d4b687b74a" -uuid = "02c8fc9c-b97f-50b9-bbe4-9be30ff0a78a" -version = "2.9.12+0" - -[[XSLT_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Libgcrypt_jll", "Libgpg_error_jll", "Libiconv_jll", "Pkg", "XML2_jll", "Zlib_jll"] -git-tree-sha1 = "91844873c4085240b95e795f692c4cec4d805f8a" -uuid = "aed1982a-8fda-507f-9586-7b0439959a61" -version = "1.1.34+0" - -[[Xorg_libX11_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libxcb_jll", "Xorg_xtrans_jll"] -git-tree-sha1 = "5be649d550f3f4b95308bf0183b82e2582876527" -uuid = "4f6342f7-b3d2-589e-9d20-edeb45f2b2bc" -version = "1.6.9+4" - -[[Xorg_libXau_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "4e490d5c960c314f33885790ed410ff3a94ce67e" -uuid = "0c0b7dd1-d40b-584c-a123-a41640f87eec" -version = "1.0.9+4" - -[[Xorg_libXdmcp_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "4fe47bd2247248125c428978740e18a681372dd4" -uuid = "a3789734-cfe1-5b06-b2d0-1dd0d9d62d05" -version = "1.1.3+4" - -[[Xorg_libXext_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll"] -git-tree-sha1 = "b7c0aa8c376b31e4852b360222848637f481f8c3" -uuid = "1082639a-0dae-5f34-9b06-72781eeb8cb3" -version = "1.3.4+4" - -[[Xorg_libXrender_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll"] -git-tree-sha1 = "19560f30fd49f4d4efbe7002a1037f8c43d43b96" -uuid = "ea2f1a96-1ddc-540d-b46f-429655e07cfa" -version = "0.9.10+4" - -[[Xorg_libpthread_stubs_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "6783737e45d3c59a4a4c4091f5f88cdcf0908cbb" -uuid = "14d82f49-176c-5ed1-bb49-ad3f5cbd8c74" -version = "0.1.0+3" - -[[Xorg_libxcb_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "XSLT_jll", "Xorg_libXau_jll", "Xorg_libXdmcp_jll", "Xorg_libpthread_stubs_jll"] -git-tree-sha1 = "daf17f441228e7a3833846cd048892861cff16d6" -uuid = "c7cfdc94-dc32-55de-ac96-5a1b8d977c5b" -version = "1.13.0+3" - -[[Xorg_xtrans_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "79c31e7844f6ecf779705fbc12146eb190b7d845" -uuid = "c5fb5394-a638-5e4d-96e5-b29de1b5cf10" -version = "1.4.0+3" - -[[Zlib_jll]] -deps = ["Libdl"] -uuid = "83775a58-1f1d-513f-b197-d71354ab007a" - -[[ZygoteRules]] -deps = ["MacroTools"] -git-tree-sha1 = "8c1a8e4dfacb1fd631745552c8db35d0deb09ea0" -uuid = "700de1a5-db45-46bc-99cf-38207098b444" -version = "0.2.2" - -[[libass_jll]] -deps = ["Artifacts", "Bzip2_jll", "FreeType2_jll", "FriBidi_jll", "HarfBuzz_jll", "JLLWrappers", "Libdl", "Pkg", "Zlib_jll"] -git-tree-sha1 = "5982a94fcba20f02f42ace44b9894ee2b140fe47" -uuid = "0ac62f75-1d6f-5e53-bd7c-93b484bb37c0" -version = "0.15.1+0" - -[[libfdk_aac_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "daacc84a041563f965be61859a36e17c4e4fcd55" -uuid = "f638f0a6-7fb0-5443-88ba-1cc74229b280" -version = "2.0.2+0" - -[[libpng_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Zlib_jll"] -git-tree-sha1 = "94d180a6d2b5e55e447e2d27a29ed04fe79eb30c" -uuid = "b53b4c65-9356-5827-b1ea-8c7a1a84506f" -version = "1.6.38+0" - -[[libvorbis_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Ogg_jll", "Pkg"] -git-tree-sha1 = "c45f4e40e7aafe9d086379e5578947ec8b95a8fb" -uuid = "f27f6e37-5d2b-51aa-960f-b287f2bc3b7a" -version = "1.3.7+0" - -[[nghttp2_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" - -[[p7zip_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" - -[[x264_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "4fea590b89e6ec504593146bf8b988b2c00922b2" -uuid = "1270edf5-f2f9-52d2-97e9-ab00b5d0237a" -version = "2021.5.5+0" - -[[x265_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "ee567a171cce03570d77ad3a43e90218e38937a9" -uuid = "dfaa095f-4041-5dcd-9319-2fabd8486b76" -version = "3.5.0+0" diff --git a/src/CalibrateEmulateSample.jl b/src/CalibrateEmulateSample.jl index f5956b175..63a47b23b 100644 --- a/src/CalibrateEmulateSample.jl +++ b/src/CalibrateEmulateSample.jl @@ -10,9 +10,9 @@ module CalibrateEmulateSample using Distributions, Statistics, LinearAlgebra, DocStringExtensions # imported modules from EKP. -import EnsembleKalmanProcesses: EnsembleKalmanProcesses, ParameterDistributions, Observations, DataContainers +import EnsembleKalmanProcesses: EnsembleKalmanProcesses, ParameterDistributions, DataContainers -export EnsembleKalmanProcesses, ParameterDistributions, Observations, DataContainers +export EnsembleKalmanProcesses, ParameterDistributions, DataContainers # Internal deps, light external deps diff --git a/src/Emulator.jl b/src/Emulator.jl index 135d726d5..1da72a3bc 100644 --- a/src/Emulator.jl +++ b/src/Emulator.jl @@ -81,6 +81,20 @@ end get_machine_learning_tool(emulator::Emulator) = emulator.machine_learning_tool # Constructor for the Emulator Object +""" +$(DocStringExtensions.TYPEDSIGNATURES) + +Positional Arguments + - `machine_learning_tool` ::MachineLearningTool, + - `input_output_pairs` ::PairedDataContainer +Keyword Arguments + - `obs_noise_cov`: A matrix/uniform scaling to provide the observational noise covariance of the data - used for data processing (default `nothing`), + - `normalize_inputs`: Normalize the inputs to be unit Gaussian, in the smallest full-rank space of the data (default `true`), + - `standardize_outputs`: Standardize outputs with by dividing by a vector of provided factors (default `false`), + - `standardize_outputs_factors`: If standardizing, the provided dim_output-length vector of factors, + - `decorrelate`: Apply (truncated) SVD to the outputs. Predictions are returned in the decorrelated space, (default `true`) + - `retained_svd_frac`: The cumulative sum of singular values retained after output SVD truncation (default 1.0 - no truncation) +""" function Emulator( machine_learning_tool::MachineLearningTool, input_output_pairs::PairedDataContainer{FT}; @@ -102,6 +116,8 @@ function Emulator( if obs_noise_cov !== nothing err2 = "obs_noise_cov must be of size ($output_dim, $output_dim), got $(size(obs_noise_cov))" size(obs_noise_cov) == (output_dim, output_dim) || throw(ArgumentError(err2)) + else + @warn "The covariance of the observational noise (a.k.a obs_noise_cov) is useful for data processing. Large approximation errors can occur without it. If possible, please provide it using the keyword obs_noise_cov." end @info "test here - after checks" @@ -210,7 +226,6 @@ function predict( # [1.] normalize normalized_new_inputs = normalize(emulator, new_inputs) - # [2.] predict. Note: ds = decorrelated, standard ds_outputs, ds_output_var = predict(emulator.machine_learning_tool, normalized_new_inputs, mlt_kwargs...) @@ -302,6 +317,7 @@ function calculate_normalization(inputs::VOrM) where {VOrM <: AbstractVecOrMat} svd_in = svd(input_cov) sqrt_inv_sv = 1 ./ sqrt.(svd_in.S[1:rank(input_cov)]) normalization = Diagonal(sqrt_inv_sv) * svd_in.Vt[1:rank(input_cov), :] #non-square + @info "reducing input dimension from $(size(input_cov,1)) to $rank(input_cov) during low rank in normalization" end return normalization end diff --git a/src/GaussianProcess.jl b/src/GaussianProcess.jl index 8b803e957..b25907d5f 100644 --- a/src/GaussianProcess.jl +++ b/src/GaussianProcess.jl @@ -113,6 +113,10 @@ function build_models!( # Number of models (We are fitting one model per output dimension, as data is decorrelated) models = gp.models + if length(gp.models) > 0 # check to see if gp already contains models + @warn "GaussianProcess already built. skipping..." + return + end N_models = size(output_values, 1) #size(transformed_data)[1] @@ -228,6 +232,11 @@ function build_models!( # Number of models (We are fitting one model per output dimension, as data is decorrelated) models = gp.models + if length(gp.models) > 0 # check to see if gp already contains models + @warn "GaussianProcess already built. skipping..." + return + end + N_models = size(output_values, 1) #size(transformed_data)[1] if gp.kernel === nothing diff --git a/src/MarkovChainMonteCarlo.jl b/src/MarkovChainMonteCarlo.jl index e7a862c28..dd84f8bef 100644 --- a/src/MarkovChainMonteCarlo.jl +++ b/src/MarkovChainMonteCarlo.jl @@ -96,8 +96,8 @@ AdvancedMH.logratio_proposal_density( function _get_proposal(prior::ParameterDistribution) # *only* use covariance of prior, not full distribution - Σ = ParameterDistributions.cov(prior) - return AdvancedMH.RandomWalkProposal(MvNormal(zeros(size(Σ)[1]), Σ)) + Σsqrt = sqrt(ParameterDistributions.cov(prior)) # rt_cov * MVN(0,I) avoids the posdef errors for MVN in Julia Distributions + return AdvancedMH.RandomWalkProposal(Σsqrt * MvNormal(zeros(size(Σsqrt)[1]), I)) end """ @@ -298,10 +298,11 @@ function AbstractMCMC.bundle_samples( ) # Turn all the transitions into a vector-of-vectors. vals = [vcat(t.params, t.log_density, t.accepted) for t in ts] - # Check if we received any parameter names. if ismissing(param_names) param_names = [Symbol(:param_, i) for i in 1:length(keys(ts[1].params))] + # elseif length(param_names) < length(keys(ts[1].params))# in case bug with MV names, Chains still needs one name per dist. + # param_names = [Symbol(:param_, i) for i in 1:length(keys(ts[1].params))] else # Generate new array to be thread safe. param_names = Symbol.(param_names) @@ -310,9 +311,9 @@ function AbstractMCMC.bundle_samples( # Bundle everything up and return a MCChains.Chains struct. return MCMCChains.Chains( - vals, - vcat(param_names, internal_names), - (parameters = param_names, internals = internal_names); + vals, # current state information as vec-of-vecs + vcat(param_names, internal_names), # parameter names which get converted to symbols + (parameters = param_names, internals = internal_names); # name map (one needs to be called parameters = ...) start = discard_initial + 1, thin = thinning, ) @@ -350,6 +351,8 @@ function AbstractMCMC.bundle_samples( # Check if we received any parameter names. if ismissing(param_names) param_names = [Symbol(:param_, i) for i in 1:length(keys(ts[1][1].params))] + # elseif length(param_names) < length(keys(ts[1][1].params)) # in case bug with MV names, Chains still needs one name per dist. + # param_names = [Symbol(:param_, i) for i in 1:length(keys(ts[1][1].params))] else # Generate new array to be thread safe. param_names = Symbol.(param_names) @@ -427,9 +430,20 @@ function MCMCWrapper( obs_sample = to_decorrelated(obs_sample, em) log_posterior_map = EmulatorPosteriorModel(prior, em, obs_sample) mh_proposal_sampler = MetropolisHastingsSampler(mcmc_alg, prior) + + # parameter names are needed in every dimension in a MCMCChains object needed for diagnostics + # so create the duplicates here + dd = get_dimensions(prior) + if all(dd .== 1) # i.e if dd == [1, 1, 1, 1, 1], => all params are univariate + param_names = get_name(prior) + else # else use multiplicity to get still informative parameter names + pn = get_name(prior) + param_names = reduce(vcat, [(pn[k] * "_") .* map(x -> string(x), 1:dd[k]) for k in 1:length(pn)]) + end + sample_kwargs = (; # set defaults here :init_params => deepcopy(init_params), - :param_names => get_name(prior), + :param_names => param_names, :discard_initial => burnin, :chain_type => MCMCChains.Chains, ) @@ -587,14 +601,21 @@ function get_posterior(mcmc::MCMCWrapper, chain::MCMCChains.Chains) p_names = get_name(mcmc.prior) p_slices = batch(mcmc.prior) flat_constraints = get_all_constraints(mcmc.prior) - # live in same space as prior - p_constraints = [flat_constraints[slice] for slice in p_slices] # Cast data in chain to a ParameterDistribution object. Data layout in Chain is an # (N_samples x n_params x n_chains) AxisArray, so samples are in rows. p_chain = Array(Chains(chain, :parameters)) # discard internal/diagnostic data p_samples = [Samples(p_chain[:, slice, 1], params_are_columns = false) for slice in p_slices] + # live in same space as prior + # checks if a function distribution, by looking at if the distribution is nested + p_constraints = [ + !isa(get_distribution(mcmc.prior)[pn], ParameterDistribution) ? # if not func-dist + flat_constraints[slice] : # constraints are slice + get_all_constraints(get_distribution(mcmc.prior)[pn]) # get constraints of nested dist + for (pn, slice) in zip(p_names, p_slices) + ] + # distributions created as atoms and pieced together posterior_distribution = combine_distributions([ ParameterDistribution(ps, pc, pn) for (ps, pc, pn) in zip(p_samples, p_constraints, p_names) diff --git a/src/RandomFeature.jl b/src/RandomFeature.jl index 3caa6117e..a9e229702 100644 --- a/src/RandomFeature.jl +++ b/src/RandomFeature.jl @@ -3,6 +3,7 @@ using RandomFeatures const RF = RandomFeatures using EnsembleKalmanProcesses const EKP = EnsembleKalmanProcesses +using EnsembleKalmanProcesses.Localizers using ..ParameterDistributions using ..Utilities using StableRNGs @@ -14,7 +15,7 @@ export SeparableKernel, NonseparableKernel export get_input_cov_structure, get_output_cov_structure, get_cov_structure export get_eps export rank -export shrinkage_cov +export shrinkage_cov, nice_cov abstract type RandomFeatureInterface <: MachineLearningTool end @@ -430,8 +431,8 @@ function calculate_mean_cov_and_coeffs( l::ForVM, regularization::MorUSorD, n_features::Int, - n_train::Int, - n_test::Int, + train_idx::VV, + test_idx::VV, batch_sizes::Union{Dict{S, Int}, Nothing}, io_pairs::PairedDataContainer, decomp_type::S, @@ -443,6 +444,7 @@ function calculate_mean_cov_and_coeffs( RFI <: RandomFeatureInterface, RNG <: AbstractRNG, ForVM <: Union{AbstractFloat, AbstractVecOrMat}, + VV <: AbstractVector, S <: AbstractString, MorUSorD <: Union{Matrix, UniformScaling, Diagonal}, M <: AbstractMatrix{<:AbstractFloat}, @@ -451,14 +453,15 @@ function calculate_mean_cov_and_coeffs( } # split data into train/test - itrain = get_inputs(io_pairs)[:, 1:n_train] - otrain = get_outputs(io_pairs)[:, 1:n_train] + itrain = get_inputs(io_pairs)[:, train_idx] + otrain = get_outputs(io_pairs)[:, train_idx] io_train_cost = PairedDataContainer(itrain, otrain) - itest = get_inputs(io_pairs)[:, (n_train + 1):end] - otest = get_outputs(io_pairs)[:, (n_train + 1):end] + itest = get_inputs(io_pairs)[:, test_idx] + otest = get_outputs(io_pairs)[:, test_idx] input_dim = size(itrain, 1) output_dim = size(otrain, 1) n_test = size(itest, 2) + # build and fit the RF rfm = RFM_from_hyperparameters( rfi, @@ -488,20 +491,20 @@ function calculate_mean_cov_and_coeffs( # sizes (output_dim x n_test), (output_dim x output_dim x n_test) ## TODO - the theory states that the following should be set: - # scaled_coeffs = sqrt(1 / (n_features)) * RF.Methods.get_coeffs(fitted_features) - # However the convergence is much improved with setting this to zero: - scaled_coeffs = 0 - + scaled_coeffs = sqrt(1 / (n_features)) * RF.Methods.get_coeffs(fitted_features) + #scaled_coeffs = 1e-3 * rand(n_features)#overwrite with noise... + # However the convergence is much improved with setting this to zero: + #scaled_coeffs = 0 if decomp_type == "cholesky" chol_fac = RF.Methods.get_decomposition(RF.Methods.get_feature_factors(fitted_features)).L + complexity = 2 * sum(log(chol_fac[i, i]) for i in 1:size(chol_fac, 1)) else svd_singval = RF.Methods.get_decomposition(RF.Methods.get_feature_factors(fitted_features)).S complexity = sum(log, svd_singval) # note this is log(abs(det)) end - complexity = sqrt(abs(complexity)) #abs can introduce nonconvexity, - + complexity = sqrt(complexity) return scaled_coeffs, complexity end @@ -540,6 +543,56 @@ function shrinkage_cov(sample_mat::AA) where {AA <: AbstractMatrix} end +""" +$(DocStringExtensions.TYPEDSIGNATURES) + +Calculate the empirical covariance, additionally applying the Noise Informed Covariance Estimator (NICE) Vishnay et al. 2024. +""" +function nice_cov(sample_mat::AA, n_samples = 400, δ::FT = 1.0) where {AA <: AbstractMatrix, FT <: Real} + + n_sample_cov = size(sample_mat, 2) + Γ = cov(sample_mat, dims = 2) + + bd_tol = 1e8 * eps() + + v = sqrt.(diag(Γ)) + V = Diagonal(v) #stds + V_inv = inv(V) + corr = clamp.(V_inv * Γ * V_inv, -1 + bd_tol, 1 - bd_tol) # full corr + + # parameter sweep over the exponents + max_exponent = 2 * 5 # must be even + interp_steps = 100 + # use find the variability in the corr coeff matrix entries + std_corrs = approximate_corr_std.(corr, n_sample_cov, n_samples) # Found in EKP.Localizers !! slowest part of code -> could speed up by precomputing an interpolation of [-1,1] + + std_tol = sqrt(sum(std_corrs .^ 2)) + α_min_exceeded = [max_exponent] + for α in 2:2:max_exponent # even exponents give a PSD + corr_psd = corr .^ (α + 1) # abs not needed as α even + # find the first exponent that exceeds the noise tolerance in norm + if norm(corr_psd - corr) > δ * std_tol + α_min_exceeded[1] = α + break + end + end + corr_psd = corr .^ α_min_exceeded[1] + corr_psd_prev = corr .^ (α_min_exceeded[1] - 2) # previous PSD correction + + for α in LinRange(1.0, 0.0, interp_steps) + corr_interp = ((1 - α) * (corr_psd_prev) + α * corr_psd) .* corr + if norm(corr_interp - corr) < δ * std_tol + corr[:, :] = corr_interp #update the correlation matrix block + break + end + end + out = posdef_correct(V * corr * V) # rebuild the cov matrix + @info "NICE-adjusted covariance condition number: $(cond(out))" + return out + +end + + """ $(DocStringExtensions.TYPEDSIGNATURES) @@ -552,23 +605,26 @@ function estimate_mean_and_coeffnorm_covariance( l::ForVM, regularization::MorUSorD, n_features::Int, - n_train::Int, - n_test::Int, + train_idx::VV, + test_idx::VV, batch_sizes::Union{Dict{S, Int}, Nothing}, io_pairs::PairedDataContainer, n_samples::Int, decomp_type::S, multithread_type::TullioThreading; repeats::Int = 1, + cov_correction = "shrinkage", ) where { RFI <: RandomFeatureInterface, RNG <: AbstractRNG, ForVM <: Union{AbstractFloat, AbstractVecOrMat}, + VV <: AbstractVector, S <: AbstractString, MorUSorD <: Union{Matrix, UniformScaling, Diagonal}, } output_dim = size(get_outputs(io_pairs), 1) + n_test = length(test_idx) means = zeros(output_dim, n_samples, n_test) mean_of_covs = zeros(output_dim, output_dim, n_test) @@ -577,7 +633,7 @@ function estimate_mean_and_coeffnorm_covariance( buffer = zeros(n_test, output_dim, n_features) complexity = zeros(1, n_samples) coeffl2norm = zeros(1, n_samples) - println("estimate cov with " * string(n_samples * repeats) * " iterations...") + println("estimate cov with " * string(n_samples) * " iterations...") for i in ProgressBar(1:n_samples) for j in 1:repeats @@ -587,8 +643,8 @@ function estimate_mean_and_coeffnorm_covariance( l, regularization, n_features, - n_train, - n_test, + train_idx, + test_idx, batch_sizes, io_pairs, decomp_type, @@ -625,9 +681,11 @@ function estimate_mean_and_coeffnorm_covariance( end sample_mat = vcat(blockmeans, coeffl2norm, complexity) - shrinkage = true - if shrinkage + + if cov_correction == "shrinkage" Γ = shrinkage_cov(sample_mat) + elseif cov_correction == "nice" + Γ = nice_cov(sample_mat) else Γ = cov(sample_mat, dims = 2) end @@ -652,8 +710,8 @@ function calculate_ensemble_mean_and_coeffnorm( lvecormat::VorM, regularization::MorUSorD, n_features::Int, - n_train::Int, - n_test::Int, + train_idx::VV, + test_idx::VV, batch_sizes::Union{Dict{S, Int}, Nothing}, io_pairs::PairedDataContainer, decomp_type::S, @@ -663,6 +721,7 @@ function calculate_ensemble_mean_and_coeffnorm( RFI <: RandomFeatureInterface, RNG <: AbstractRNG, VorM <: AbstractVecOrMat, + VV <: AbstractVector, S <: AbstractString, MorUSorD <: Union{Matrix, UniformScaling, Diagonal}, } @@ -673,6 +732,7 @@ function calculate_ensemble_mean_and_coeffnorm( end N_ens = size(lmat, 2) output_dim = size(get_outputs(io_pairs), 1) + n_test = length(test_idx) means = zeros(output_dim, N_ens, n_test) mean_of_covs = zeros(output_dim, output_dim, n_test) @@ -682,7 +742,7 @@ function calculate_ensemble_mean_and_coeffnorm( moc_tmp = similar(mean_of_covs) mtmp = zeros(output_dim, n_test) - println("calculating " * string(N_ens * repeats) * " ensemble members...") + println("calculating " * string(N_ens) * " ensemble members...") for i in ProgressBar(1:N_ens) for j in collect(1:repeats) @@ -694,8 +754,8 @@ function calculate_ensemble_mean_and_coeffnorm( l, regularization, n_features, - n_train, - n_test, + train_idx, + test_idx, batch_sizes, io_pairs, decomp_type, @@ -739,25 +799,27 @@ function estimate_mean_and_coeffnorm_covariance( l::ForVM, regularization::MorUSorD, n_features::Int, - n_train::Int, - n_test::Int, + train_idx::VV, + test_idx::VV, batch_sizes::Union{Dict{S, Int}, Nothing}, io_pairs::PairedDataContainer, n_samples::Int, decomp_type::S, multithread_type::EnsembleThreading; repeats::Int = 1, + cov_correction = "shrinkage", ) where { RFI <: RandomFeatureInterface, RNG <: AbstractRNG, ForVM <: Union{AbstractFloat, AbstractVecOrMat}, + VV <: AbstractVector, S <: AbstractString, MorUSorD <: Union{Matrix, UniformScaling, Diagonal}, } output_dim = size(get_outputs(io_pairs), 1) - - println("estimate cov with " * string(n_samples * repeats) * " iterations...") + n_test = length(test_idx) + println("estimate cov with " * string(n_samples) * " iterations...") nthreads = Threads.nthreads() rng_seed = randperm(rng, 10^5)[1] # dumb way to get a random integer in 1:10^5 @@ -786,8 +848,8 @@ function estimate_mean_and_coeffnorm_covariance( l, regularization, n_features, - n_train, - n_test, + train_idx, + test_idx, batch_sizes, io_pairs, decomp_type, @@ -833,9 +895,11 @@ function estimate_mean_and_coeffnorm_covariance( sample_mat = vcat(blockmeans, coeffl2norm, complexity) - shrinkage = true - if shrinkage + + if cov_correction == "shrinkage" Γ = shrinkage_cov(sample_mat) + elseif cov_correction == "nice" + Γ = nice_cov(sample_mat) else Γ = cov(sample_mat, dims = 2) end @@ -858,8 +922,8 @@ function calculate_ensemble_mean_and_coeffnorm( lvecormat::VorM, regularization::MorUSorD, n_features::Int, - n_train::Int, - n_test::Int, + train_idx::VV, + test_idx::VV, batch_sizes::Union{Dict{S, Int}, Nothing}, io_pairs::PairedDataContainer, decomp_type::S, @@ -869,6 +933,7 @@ function calculate_ensemble_mean_and_coeffnorm( RFI <: RandomFeatureInterface, RNG <: AbstractRNG, VorM <: AbstractVecOrMat, + VV <: AbstractVector, S <: AbstractString, MorUSorD <: Union{Matrix, UniformScaling, Diagonal}, } @@ -879,10 +944,10 @@ function calculate_ensemble_mean_and_coeffnorm( end N_ens = size(lmat, 2) output_dim = size(get_outputs(io_pairs), 1) + n_test = length(test_idx) - - println("calculating " * string(N_ens * repeats) * " ensemble members...") + println("calculating " * string(N_ens) * " ensemble members...") nthreads = Threads.nthreads() c_list = [zeros(1, N_ens) for i in 1:nthreads] @@ -909,8 +974,8 @@ function calculate_ensemble_mean_and_coeffnorm( l, regularization, n_features, - n_train, - n_test, + train_idx, + test_idx, batch_sizes, io_pairs, decomp_type, diff --git a/src/ScalarRandomFeature.jl b/src/ScalarRandomFeature.jl index 05a9c9987..a946fd9c8 100644 --- a/src/ScalarRandomFeature.jl +++ b/src/ScalarRandomFeature.jl @@ -30,6 +30,8 @@ struct ScalarRandomFeatureInterface{S <: AbstractString, RNG <: AbstractRNG, KST feature_decomposition::S "dictionary of options for hyperparameter optimizer" optimizer_options::Dict{S} + "diagnostics from optimizer" + optimizer::Vector end """ @@ -72,7 +74,7 @@ $(DocStringExtensions.TYPEDSIGNATURES) gets the rng field """ -get_rng(srfi::ScalarRandomFeatureInterface) = srfi.rng +EKP.get_rng(srfi::ScalarRandomFeatureInterface) = srfi.rng """ $(DocStringExtensions.TYPEDSIGNATURES) @@ -95,6 +97,13 @@ gets the optimizer_options field """ get_optimizer_options(srfi::ScalarRandomFeatureInterface) = srfi.optimizer_options +""" +$(DocStringExtensions.TYPEDSIGNATURES) + +gets the optimizer field +""" +get_optimizer(srfi::ScalarRandomFeatureInterface) = srfi.optimizer + """ $(DocStringExtensions.TYPEDSIGNATURES) @@ -120,6 +129,8 @@ Constructs a `ScalarRandomFeatureInterface <: MachineLearningTool` interface for - "multithread": how to multithread. "ensemble" (default) threads across ensemble members "tullio" threads random feature matrix algebra - "accelerator": use EKP accelerators (default is no acceleration) - "verbose" => false, verbose optimizer statements + - "cov_correction" => "shrinkage", type of conditioning to improve estimated covariance (Ledoit Wolfe 03), also "nice" for (Vishny, Morzfeld et al. 2024) + - "n_cross_val_sets" => 2, train fraction creates (default 5) train-test data subsets, then use 'n_cross_val_sets' of these stacked in the loss function. If set to 0, train=test on the full data provided ignoring "train_fraction". """ function ScalarRandomFeatureInterface( n_features::Int, @@ -144,16 +155,19 @@ function ScalarRandomFeatureInterface( # default optimizer settings optimizer_opts = Dict( "prior" => prior, #the hyperparameter_prior - "n_ensemble" => max(ndims(prior) + 1, 10), #number of ensemble - "n_iteration" => 5, # number of eki iterations - "scheduler" => EKP.DataMisfitController(), # Adaptive timestepping, - "cov_sample_multiplier" => 2.0, # multiplier for samples to estimate covariance in optimization scheme + "n_ensemble" => min(10 * ndims(prior), 100), #number of ensemble + "n_iteration" => 10, # number of eki iterations + "scheduler" => EKP.DataMisfitController(terminate_at = 1000), # Adaptive timestepping, + "cov_sample_multiplier" => 10.0, # multiplier for samples to estimate covariance in optimization scheme "inflation" => 1e-4, # additive inflation ∈ [0,1] with 0 being no inflation "train_fraction" => 0.8, # 80:20 train - test split "n_features_opt" => n_features, # number of features for the optimization "multithread" => "ensemble", # instead of "tullio" "verbose" => false, # verbose optimizer statements - "accelerator" => EKP.DefaultAccelerator(), # acceleration with momentum + "accelerator" => EKP.NesterovAccelerator(), # acceleration with momentum + "localization" => EKP.Localizers.NoLocalization(), # localization / sample error correction for small ensembles + "cov_correction" => "shrinkage", # type of conditioning to improve estimated covariance + "n_cross_val_sets" => 2, # if >1 do cross validation, else if 0 do no data splitting and no training fraction ) if !isnothing(optimizer_options) @@ -180,6 +194,7 @@ function ScalarRandomFeatureInterface( kernel_structure, feature_decomposition, optimizer_opts, + [], ) end @@ -191,6 +206,7 @@ function hyperparameter_distribution_from_flat( M = zeros(input_dim) #scalar output U = hyperparameters_from_flat(x, input_dim, kernel_structure) + if !isposdef(U) println("U not posdef - correcting") U = posdef_correct(U) @@ -305,37 +321,68 @@ function build_models!( rfms = get_rfms(srfi) + if length(rfms) > 0 + @warn "ScalarRandomFeatureInterface already built. skipping..." + return + end fitted_features = get_fitted_features(srfi) n_features = get_n_features(srfi) batch_sizes = get_batch_sizes(srfi) rng = get_rng(srfi) decomp_type = get_feature_decomposition(srfi) optimizer_options = get_optimizer_options(srfi) - + optimizer = get_optimizer(srfi) # empty vector # Optimize features with EKP for each output dim # [1.] Split data into test/train 80/20 - train_fraction = optimizer_options["train_fraction"] - n_train = Int(floor(train_fraction * n_data)) - n_test = n_data - n_train + idx_shuffle = randperm(rng, n_data) + n_cross_val_sets = Int(optimizer_options["n_cross_val_sets"]) n_features_opt = optimizer_options["n_features_opt"] - idx_shuffle = randperm(rng, n_data) + train_idx = [] + test_idx = [] + n_train = 0 + n_test = 0 + if n_cross_val_sets == 0 + push!(train_idx, idx_shuffle) + push!(test_idx, idx_shuffle) + n_cross_val_sets = 1 # now just pretend there is one partition for looping purposes + n_train = n_data + n_test = n_data + else + train_fraction = optimizer_options["train_fraction"] + n_train = Int(floor(train_fraction * n_data)) + n_test = n_data - n_train + + if n_test * n_cross_val_sets > n_data + throw( + ArgumentError( + "train/test split produces cross validation test sets of size $(n_test), out of $(n_data). \"n_cross_val_sets\" optimizer_options keyword < $(Int(floor(n_data/n_test))). Received $n_cross_val_sets", + ), + ) + end + + + for i in 1:n_cross_val_sets + tmp = idx_shuffle[((i - 1) * n_test + 1):(i * n_test)] + push!(test_idx, tmp) + push!(train_idx, setdiff(collect(1:n_data), tmp)) + end + end + + #regularization = I = 1.0 in scalar case regularization = I - @info ( "hyperparameter learning for $n_rfms models using $n_train training points, $n_test validation points and $n_features_opt features" ) + n_iteration = optimizer_options["n_iteration"] + diagnostics = zeros(n_iteration, n_rfms) for i in 1:n_rfms - - io_pairs_opt = PairedDataContainer( - input_values[:, idx_shuffle], - reshape(output_values[i, idx_shuffle], 1, size(output_values, 2)), - ) + io_pairs_opt = PairedDataContainer(input_values, reshape(output_values[i, :], 1, size(output_values, 2))) multithread = optimizer_options["multithread"] if multithread == "ensemble" @@ -345,7 +392,7 @@ function build_models!( else throw( ArgumentError( - "Unknown optimizer option for multithreading, please choose from \"tullio\" (allows Tullio.jl to control threading in RandomFeatures.jl, or \"loops\" (threading optimization loops)", + "Unknown optimizer option for multithreading, please choose from \"tullio\" (allows Tullio.jl to control threading in RandomFeatures.jl), or \"ensemble\" (threading is done over the ensemble)", ), ) end @@ -365,47 +412,58 @@ function build_models!( μ_hp = transform_unconstrained_to_constrained(prior, mean(prior)) cov_sample_multiplier = optimizer_options["cov_sample_multiplier"] + cov_correction = optimizer_options["cov_correction"] n_cov_samples_min = n_test + 2 n_cov_samples = Int(floor(n_cov_samples_min * max(cov_sample_multiplier, 0.0))) + observation_vec = [] + for cv_idx in 1:n_cross_val_sets + internal_Γ, approx_σ2 = estimate_mean_and_coeffnorm_covariance( + srfi, + rng, + μ_hp, + regularization, + n_features_opt, + train_idx[cv_idx], + test_idx[cv_idx], + batch_sizes, + io_pairs_opt, + n_cov_samples, + decomp_type, + multithread_type, + cov_correction = cov_correction, + ) + Γ = internal_Γ + Γ[1:n_test, 1:n_test] += regularization # + approx_σ2 + Γ[(n_test + 1):end, (n_test + 1):end] += I + if !isposdef(Γ) + Γ = posdef_correct(Γ) + end + data = vcat(get_outputs(io_pairs_opt)[test_idx[cv_idx]], 0.0, 0.0) - internal_Γ, approx_σ2 = estimate_mean_and_coeffnorm_covariance( - srfi, - rng, - μ_hp, - regularization, - n_features_opt, - n_train, - n_test, - batch_sizes, - io_pairs_opt, - n_cov_samples, - decomp_type, - multithread_type, - ) - Γ = internal_Γ - Γ[1:n_test, 1:n_test] += regularization # + approx_σ2 - Γ[(n_test + 1):end, (n_test + 1):end] += I - + push!( + observation_vec, + EKP.Observation(Dict("names" => "$(cv_idx)", "samples" => data[:], "covariances" => Γ)), + ) + end + observation = combine_observations(observation_vec) # [3.] set up EKP optimization n_ensemble = optimizer_options["n_ensemble"] n_iteration = optimizer_options["n_iteration"] opt_verbose_flag = optimizer_options["verbose"] scheduler = optimizer_options["scheduler"] accelerator = optimizer_options["accelerator"] + localization = optimizer_options["localization"] initial_params = construct_initial_ensemble(rng, prior, n_ensemble) - min_complexity = n_features_opt * log(regularization.λ) - min_complexity = sqrt(abs(min_complexity)) - data = vcat(get_outputs(io_pairs_opt)[(n_train + 1):end], 0.0, min_complexity) ekiobj = EKP.EnsembleKalmanProcess( initial_params, - data, - Γ, + observation, Inversion(), scheduler = scheduler, rng = rng, accelerator = accelerator, verbose = opt_verbose_flag, + localization_method = localization, ) err = zeros(n_iteration) @@ -414,24 +472,28 @@ function build_models!( #get parameters: lvec = transform_unconstrained_to_constrained(prior, get_u_final(ekiobj)) + g_ens = zeros(n_cross_val_sets * (n_test + 2), n_ensemble) + for cv_idx in 1:n_cross_val_sets + + g_ens_tmp, _ = calculate_ensemble_mean_and_coeffnorm( + srfi, + rng, + lvec, + regularization, + n_features_opt, + train_idx[cv_idx], + test_idx[cv_idx], + batch_sizes, + io_pairs_opt, + decomp_type, + multithread_type, + ) + g_ens[((cv_idx - 1) * (n_test + 2) + 1):(cv_idx * (n_test + 2)), :] = g_ens_tmp + end - g_ens, _ = calculate_ensemble_mean_and_coeffnorm( - srfi, - rng, - lvec, - regularization, - n_features_opt, - n_train, - n_test, - batch_sizes, - io_pairs_opt, - decomp_type, - multithread_type, - ) inflation = optimizer_options["inflation"] if inflation > 0 - terminated = - EKP.update_ensemble!(ekiobj, g_ens, additive_inflation = true, use_prior_cov = true, s = inflation) # small regularizing inflation + terminated = EKP.update_ensemble!(ekiobj, g_ens, additive_inflation = true, s = inflation) # small regularizing inflation else terminated = EKP.update_ensemble!(ekiobj, g_ens) # small regularizing inflation end @@ -440,8 +502,8 @@ function build_models!( end err[i] = get_error(ekiobj)[end] #mean((params_true - mean(params_i,dims=2)).^2) - end + diagnostics[:, i] = copy(err) # [5.] extract optimal hyperparameters hp_optimal = get_ϕ_mean_final(prior, ekiobj)[:] @@ -483,7 +545,9 @@ function build_models!( push!(rfms, rfm_i) push!(fitted_features, fitted_features_i) + end + push!(optimizer, diagnostics) end diff --git a/src/Utilities.jl b/src/Utilities.jl index ca937c1a8..1d3e34a20 100644 --- a/src/Utilities.jl +++ b/src/Utilities.jl @@ -5,13 +5,11 @@ using LinearAlgebra using Statistics using StatsBase using Random -using ..Observations using ..EnsembleKalmanProcesses EnsembleKalmanProcess = EnsembleKalmanProcesses.EnsembleKalmanProcess using ..DataContainers export get_training_points -export get_obs_sample export orig2zscore export zscore2orig """ @@ -50,33 +48,6 @@ function get_training_points( return training_points end - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Return a random sample from the observations, for use in the MCMC. - - - `rng` - optional RNG object used to pick random sample; defaults to `Random.GLOBAL_RNG`. - - `obs` - Observation struct with the observations (extract will pick one - of the sample observations to train). - - `rng_seed` - optional kwarg; if provided, used to re-seed `rng` before sampling. -""" -function get_obs_sample( - rng::Random.AbstractRNG, - obs::Observation; - rng_seed::Union{IT, Nothing} = nothing, -) where {IT <: Int} - # Ensuring reproducibility of the sampled parameter values: - # re-seed the rng *only* if we're given a seed - if rng_seed !== nothing - rng = Random.seed!(rng, rng_seed) - end - row_idxs = StatsBase.sample(rng, axes(obs.samples, 1), 1; replace = false, ordered = false) - return obs.samples[row_idxs...] -end -# first arg optional; defaults to GLOBAL_RNG (as in Random, StatsBase) -get_obs_sample(obs::Observation; kwargs...) = get_obs_sample(Random.GLOBAL_RNG, obs; kwargs...) - function orig2zscore(X::AbstractVector{FT}, mean::AbstractVector{FT}, std::AbstractVector{FT}) where {FT} # Compute the z scores of a vector X using the given mean # and std diff --git a/src/VectorRandomFeature.jl b/src/VectorRandomFeature.jl index 96a0ef231..686cd1a20 100644 --- a/src/VectorRandomFeature.jl +++ b/src/VectorRandomFeature.jl @@ -9,7 +9,8 @@ export get_rfms, get_output_dim, get_rng, get_kernel_structure, - get_optimizer_options + get_optimizer_options, + get_optimizer """ $(DocStringExtensions.TYPEDEF) @@ -44,6 +45,8 @@ struct VectorRandomFeatureInterface{S <: AbstractString, RNG <: AbstractRNG, KST feature_decomposition::S "dictionary of options for hyperparameter optimizer" optimizer_options::Dict + "diagnostics from optimizer" + optimizer::Vector end """ @@ -93,7 +96,7 @@ $(DocStringExtensions.TYPEDSIGNATURES) Gets the rng field """ -get_rng(vrfi::VectorRandomFeatureInterface) = vrfi.rng +EKP.get_rng(vrfi::VectorRandomFeatureInterface) = vrfi.rng """ $(DocStringExtensions.TYPEDSIGNATURES) @@ -126,6 +129,13 @@ get_optimizer_options(vrfi::VectorRandomFeatureInterface) = vrfi.optimizer_optio """ $(DocStringExtensions.TYPEDSIGNATURES) +gets the optimizer field +""" +get_optimizer(vrfi::VectorRandomFeatureInterface) = vrfi.optimizer + +""" +$(DocStringExtensions.TYPEDSIGNATURES) + Constructs a `VectorRandomFeatureInterface <: MachineLearningTool` interface for the `RandomFeatures.jl` package for multi-input and multi-output emulators. - `n_features` - the number of random features - `input_dim` - the dimension of the input space @@ -148,6 +158,8 @@ Constructs a `VectorRandomFeatureInterface <: MachineLearningTool` interface for - "multithread": how to multithread. "ensemble" (default) threads across ensemble members "tullio" threads random feature matrix algebra - "accelerator": use EKP accelerators (default is no acceleration) - "verbose" => false, verbose optimizer statements to check convergence, priors and optimal parameters. + - "cov_correction" => "shrinkage", type of conditioning to improve estimated covariance (Ledoit Wolfe 03), also "nice" for (Vishny, Morzfeld et al. 2024) + - "n_cross_val_sets" => 2, train fraction creates (default 5) train-test data subsets, then use 'n_cross_val_sets' of these stacked in the loss function. If set to 0, train=test on the full data provided ignoring "train_fraction". """ function VectorRandomFeatureInterface( @@ -178,9 +190,9 @@ function VectorRandomFeatureInterface( #Optimization Defaults optimizer_opts = Dict( "prior" => prior, #the hyperparameter_prior (note scalings have already been applied) - "n_ensemble" => max(ndims(prior) + 1, 10), #number of ensemble - "n_iteration" => 5, # number of eki iterations - "scheduler" => EKP.DataMisfitController(), # Adaptive timestepping + "n_ensemble" => min(10 * ndims(prior), 100), #number of ensemble + "n_iteration" => 10, # number of eki iterations + "scheduler" => EKP.DataMisfitController(terminate_at = 1000), # Adaptive timestepping "cov_sample_multiplier" => 10.0, # multiplier for samples to estimate covariance in optimization scheme "tikhonov" => 0, # tikhonov regularization parameter if >0 "inflation" => 1e-4, # additive inflation ∈ [0,1] with 0 being no inflation @@ -189,7 +201,9 @@ function VectorRandomFeatureInterface( "multithread" => "ensemble", # instead of "tullio" "verbose" => false, # verbose optimizer statements "localization" => EKP.Localizers.NoLocalization(), # localization / sample error correction for small ensembles - "accelerator" => EKP.DefaultAccelerator(), # acceleration with momentum + "accelerator" => EKP.NesterovAccelerator(), # acceleration with momentum + "cov_correction" => "shrinkage", # type of conditioning to improve estimated covariance + "n_cross_val_sets" => 2, # if set to 0, removes data split. i.e takes train & test to be the same data set ) if !isnothing(optimizer_options) @@ -218,6 +232,7 @@ function VectorRandomFeatureInterface( kernel_structure, feature_decomposition, optimizer_opts, + [], ) end @@ -354,11 +369,17 @@ function build_models!( n_hp = calculate_n_hyperparameters(input_dim, output_dim, kernel_structure) rfms = get_rfms(vrfi) + if length(rfms) > 0 + @warn "VectorRandomFeatureInterface already built. skipping..." + return + end + fitted_features = get_fitted_features(vrfi) n_features = get_n_features(vrfi) batch_sizes = get_batch_sizes(vrfi) decomp_type = get_feature_decomposition(vrfi) optimizer_options = get_optimizer_options(vrfi) + optimizer = get_optimizer(vrfi) multithread = optimizer_options["multithread"] if multithread == "ensemble" multithread_type = EnsembleThreading() @@ -385,46 +406,62 @@ function build_models!( # Optimize feature cholesky factors with EKP # [1.] Split data into test/train (e.g. 80/20) - train_fraction = optimizer_options["train_fraction"] - n_train = Int(floor(train_fraction * n_data)) # 20% split - n_test = n_data - n_train n_features_opt = optimizer_options["n_features_opt"] + idx_shuffle = randperm(rng, n_data) + n_cross_val_sets = Int(optimizer_options["n_cross_val_sets"]) + + train_idx = [] + test_idx = [] + if n_cross_val_sets == 0 + push!(train_idx, idx_shuffle) + push!(test_idx, idx_shuffle) + n_cross_val_sets = 1 # now just pretend there is one partition for looping purposes + n_train = n_data + n_test = n_data + else + train_fraction = optimizer_options["train_fraction"] + n_train = Int(floor(train_fraction * n_data)) # 20% split + n_test = n_data - n_train + + if n_test * n_cross_val_sets > n_data + throw( + ArgumentError( + "train/test split produces cross validation test sets of size $(n_test), out of $(n_data). \"n_cross_val_sets\" optimizer_options keyword < $(Int(floor(n_data/n_test))). Received $n_cross_val_sets", + ), + ) + end + + for i in 1:n_cross_val_sets + tmp = idx_shuffle[((i - 1) * n_test + 1):(i * n_test)] + push!(test_idx, tmp) + push!(train_idx, setdiff(collect(1:n_data), tmp)) + end + end @info ( "hyperparameter learning using $n_train training points, $n_test validation points and $n_features_opt features" ) - # regularization_matrix = nothing when we use scaled SVD to decorrelate the space, # in this setting, noise = I if regularization_matrix === nothing regularization = I else - reg_mat = regularization_matrix + # think of the regularization_matrix as the observational noise covariance, or a related quantity if !isposdef(regularization_matrix) regularization = posdef_correct(regularization_matrix) println("RF regularization matrix is not positive definite, correcting") else - # think of the regularization_matrix as the observational noise covariance, or a related quantity - regularization = exp((1 / output_dim) * sum(log.(eigvals(reg_mat)))) * I #i.e. det(M)^{1/output_dim} I - - #regularization = reg_mat #using the full p.d. tikhonov exp. EXPENSIVE, and challenge get complexity terms + regularization = regularization_matrix end - end - - idx_shuffle = randperm(rng, n_data) - - io_pairs_opt = PairedDataContainer( - input_values[:, idx_shuffle], - reshape(output_values[:, idx_shuffle], :, size(output_values, 2)), - ) + end # [2.] Estimate covariance at mean value μ_hp = transform_unconstrained_to_constrained(prior, mean(prior)) cov_sample_multiplier = optimizer_options["cov_sample_multiplier"] - + cov_correction = optimizer_options["cov_correction"] if nameof(typeof(kernel_structure)) == :SeparableKernel if nameof(typeof(get_output_cov_structure(kernel_structure))) == :DiagonalFactor n_cov_samples_min = n_test + 2 # diagonal case @@ -435,67 +472,63 @@ function build_models!( n_cov_samples_min = (n_test * output_dim + 2) end n_cov_samples = Int(floor(n_cov_samples_min * max(cov_sample_multiplier, 0.0))) - - internal_Γ, approx_σ2 = estimate_mean_and_coeffnorm_covariance( - vrfi, - rng, - μ_hp, # take mean values - regularization, - n_features_opt, - n_train, - n_test, - batch_sizes, - io_pairs_opt, - n_cov_samples, - decomp_type, - multithread_type, - ) - + observation_vec = [] tikhonov_opt_val = optimizer_options["tikhonov"] - if tikhonov_opt_val == 0 - # Build the covariance - Γ = internal_Γ - Γ[1:(n_test * output_dim), 1:(n_test * output_dim)] += regularization # + approx_σ2 - Γ[(n_test * output_dim + 1):end, (n_test * output_dim + 1):end] += I - - #in diag case we have data logdet = λ^m, in non diag case we have logdet(Λ^) to match the different reg matrices. - min_complexity = - isa(regularization, UniformScaling) ? n_features_opt * log(regularization.λ) : - n_features_opt / output_dim * 2 * sum(log.(diag(cholesky(regularization).L))) - min_complexity = sqrt(abs(min_complexity)) - - - data = vcat(reshape(get_outputs(io_pairs_opt)[:, (n_train + 1):end], :, 1), 0.0, min_complexity) #flatten data - - elseif tikhonov_opt_val > 0 - # augment the state to add tikhonov - outsize = size(internal_Γ, 1) - Γ = zeros(outsize + n_hp, outsize + n_hp) - Γ[1:outsize, 1:outsize] = internal_Γ - Γ[1:(n_test * output_dim), 1:(n_test * output_dim)] += approx_σ2 + regularization - Γ[(n_test * output_dim + 1):outsize, (n_test * output_dim + 1):outsize] += I - - Γ[(outsize + 1):end, (outsize + 1):end] = tikhonov_opt_val .* cov(prior) - - #TODO the min complexity here is not the correct object in the non-diagonal case - min_complexity = - isa(regularization, UniformScaling) ? n_features_opt * log(regularization.λ) : - n_features_opt / output_dim * 2 * sum(log.(diag(cholesky(regularization).L))) - min_complexity = sqrt(abs(min_complexity)) - - data = vcat( - reshape(get_outputs(io_pairs_opt)[:, (n_train + 1):end], :, 1), - 0.0, - min_complexity, - zeros(size(Γ, 1) - outsize, 1), - ) #flatten data with additional zeros - else - throw( - ArgumentError( - "Tikhonov parameter must be non-negative, instead received tikhonov_opt_val=$tikhonov_opt_val", - ), + for cv_idx in 1:n_cross_val_sets + internal_Γ, approx_σ2 = estimate_mean_and_coeffnorm_covariance( + vrfi, + rng, + μ_hp, # take mean values + regularization, + n_features_opt, + train_idx[cv_idx], + test_idx[cv_idx], + batch_sizes, + input_output_pairs, + n_cov_samples, + decomp_type, + multithread_type, + cov_correction = cov_correction, ) + + if tikhonov_opt_val == 0 + # Build the covariance + Γ = internal_Γ + Γ[1:(n_test * output_dim), 1:(n_test * output_dim)] += + isa(regularization, UniformScaling) ? regularization : kron(I(n_test), regularization) # + approx_σ2 + Γ[(n_test * output_dim + 1):end, (n_test * output_dim + 1):end] += I + data = vcat(reshape(get_outputs(input_output_pairs)[:, test_idx[cv_idx]], :, 1), 0.0, 0.0) #flatten data + + elseif tikhonov_opt_val > 0 + # augment the state to add tikhonov + outsize = size(internal_Γ, 1) + Γ = zeros(outsize + n_hp, outsize + n_hp) + Γ[1:outsize, 1:outsize] = internal_Γ + Γ[1:(n_test * output_dim), 1:(n_test * output_dim)] += kron(I(n_test), regularization) # block diag regularization + Γ[(n_test * output_dim + 1):outsize, (n_test * output_dim + 1):outsize] += I + + Γ[(outsize + 1):end, (outsize + 1):end] = tikhonov_opt_val .* cov(prior) + + data = vcat( + reshape(get_outputs(input_output_pairs)[:, test_idx[cv_idx]], :, 1), + 0.0, + 0.0, + zeros(size(Γ, 1) - outsize, 1), + ) #flatten data with additional zeros + else + throw( + ArgumentError( + "Tikhonov parameter must be non-negative, instead received tikhonov_opt_val=$tikhonov_opt_val", + ), + ) + end + if !isposdef(Γ) + Γ = posdef_correct(Γ) + end + push!(observation_vec, EKP.Observation(Dict("names" => "$(cv_idx)", "samples" => data[:], "covariances" => Γ))) + end + observation = combine_observations(observation_vec) # [3.] set up EKP optimization n_ensemble = optimizer_options["n_ensemble"] # minimal ensemble size n_hp, @@ -505,16 +538,12 @@ function build_models!( localization = optimizer_options["localization"] accelerator = optimizer_options["accelerator"] - if !isposdef(Γ) - Γ = posdef_correct(Γ) - end initial_params = construct_initial_ensemble(rng, prior, n_ensemble) ekiobj = EKP.EnsembleKalmanProcess( initial_params, - data[:], - Γ, + observation, Inversion(), scheduler = scheduler, rng = rng, @@ -529,34 +558,44 @@ function build_models!( #get parameters: lvec = get_ϕ_final(prior, ekiobj) - g_ens, _ = calculate_ensemble_mean_and_coeffnorm( - vrfi, - rng, - lvec, - regularization, - n_features_opt, - n_train, - n_test, - batch_sizes, - io_pairs_opt, - decomp_type, - multithread_type, - ) if tikhonov_opt_val > 0 - # augment with the computational parameters (u not ϕ) - uvecormat = get_u_final(ekiobj) - if isa(uvecormat, AbstractVector) - umat = reshape(uvecormat, 1, :) - else - umat = uvecormat + g_ens = zeros(n_cross_val_sets * (output_dim * n_test + input_dim + 2), n_ensemble) + else + g_ens = zeros(n_cross_val_sets * (output_dim * n_test + 2), n_ensemble) + end + for cv_idx in 1:n_cross_val_sets + g_ens_tmp, _ = calculate_ensemble_mean_and_coeffnorm( + vrfi, + rng, + lvec, + regularization, + n_features_opt, + train_idx[cv_idx], + test_idx[cv_idx], + batch_sizes, + input_output_pairs, + decomp_type, + multithread_type, + ) + if tikhonov_opt_val > 0 + # augment with the computational parameters (u not ϕ) + uvecormat = get_u_final(ekiobj) + if isa(uvecormat, AbstractVector) + umat = reshape(uvecormat, 1, :) + else + umat = uvecormat + end + + g_ens_tmp = vcat(g_ens_tmp, umat) end - g_ens = vcat(g_ens, umat) + g_ens[((cv_idx - 1) * (output_dim * n_test + 2) + 1):(cv_idx * (output_dim * n_test + 2)), :] = g_ens_tmp + end + inflation = optimizer_options["inflation"] if inflation > 0 - terminated = - EKP.update_ensemble!(ekiobj, g_ens, additive_inflation = true, use_prior_cov = true, s = inflation) # small regularizing inflation + terminated = EKP.update_ensemble!(ekiobj, g_ens, additive_inflation = true, s = inflation) # small regularizing inflation else terminated = EKP.update_ensemble!(ekiobj, g_ens) # small regularizing inflation end @@ -567,6 +606,7 @@ function build_models!( err[i] = get_error(ekiobj)[end] #mean((params_true - mean(params_i,dims=2)).^2) end + push!(optimizer, err) # [5.] extract optimal hyperparameters hp_optimal = get_ϕ_mean_final(prior, ekiobj)[:] diff --git a/test/GaussianProcess/runtests.jl b/test/GaussianProcess/runtests.jl index e25b51998..3d9abca71 100644 --- a/test/GaussianProcess/runtests.jl +++ b/test/GaussianProcess/runtests.jl @@ -62,6 +62,7 @@ using CalibrateEmulateSample.DataContainers @test gp1.prediction_type == pred_type @test gp1.alg_reg_noise == 1e-4 + em1 = Emulator( gp1, iopairs, @@ -71,6 +72,24 @@ using CalibrateEmulateSample.DataContainers retained_svd_frac = 1.0, ) + @test_logs (:warn,) (:warn,) Emulator( + gp1, + iopairs, + obs_noise_cov = nothing, + normalize_inputs = false, + standardize_outputs = false, + retained_svd_frac = 1.0, + ) # check that gp1 does not get more models added under second call + Emulator( + gp1, + iopairs, + obs_noise_cov = nothing, + normalize_inputs = false, + standardize_outputs = false, + retained_svd_frac = 1.0, + ) + @test length(gp1.models) == 1 + Emulators.optimize_hyperparameters!(em1) μ1, σ1² = Emulators.predict(em1, new_inputs) @@ -122,6 +141,24 @@ using CalibrateEmulateSample.DataContainers ) @info " post emulator pre optimize" + @test_logs (:warn,) (:warn,) Emulator( + gp3, + iopairs, + obs_noise_cov = nothing, + normalize_inputs = false, + standardize_outputs = false, + retained_svd_frac = 1.0, + ) + Emulator( + gp3, + iopairs, + obs_noise_cov = nothing, + normalize_inputs = false, + standardize_outputs = false, + retained_svd_frac = 1.0, + ) + @test length(gp3.models) == 1 # check that gp3 does not get more models added under repeated calls + Emulators.optimize_hyperparameters!(em3) @info " post optimize pre predict" diff --git a/test/MarkovChainMonteCarlo/runtests.jl b/test/MarkovChainMonteCarlo/runtests.jl index 3b1c1ec75..e65346305 100644 --- a/test/MarkovChainMonteCarlo/runtests.jl +++ b/test/MarkovChainMonteCarlo/runtests.jl @@ -34,6 +34,33 @@ function test_prior() return ParameterDistribution(prior_dist, prior_constraint, prior_name) end +function test_prior_mv() + ### Define prior + return constrained_gaussian("u_mv", -1.0, 6.0, -Inf, Inf, repeats = 10) +end + +function test_data_mv(; rng_seed = 41, n = 20, var_y = 0.05, input_dim = 10, rest...) + # Seed for pseudo-random number generator + rng = Random.MersenneTwister(rng_seed) + n = 40 # number of training points + x = 1 / input_dim * π * rand(rng, Float64, (input_dim, n)) # predictors/features: 1 × n + σ2_y = reshape([var_y], 1, 1) + y = sin.(norm(x)) + rand(rng, Normal(0, σ2_y[1]), (1, n)) # predictands/targets: 1 × n + + return y, σ2_y, PairedDataContainer(x, y, data_are_columns = true), rng +end +function test_gp_mv(y, σ2_y, iopairs::PairedDataContainer; norm_factor = nothing) + gppackage = GPJL() + pred_type = YType() + # Construct kernel: + # Squared exponential kernel (note that hyperparameters are on log scale) + # with observational noise + gp = GaussianProcess(gppackage; noise_learn = true, prediction_type = pred_type) + em = Emulator(gp, iopairs; obs_noise_cov = σ2_y) + Emulators.optimize_hyperparameters!(em) + return em +end + function test_gp_1(y, σ2_y, iopairs::PairedDataContainer; norm_factor = nothing) gppackage = GPJL() pred_type = YType() @@ -117,6 +144,17 @@ end @test isapprox(test_obs, (obs_sample ./ sqrt(σ2_y[1, 1])); atol = 1e-2) end + @testset "MV priors" begin + # 10D dist with 1 name, just build the wrapper for test + prior_mv = test_prior_mv() + y_mv, σ2_y_mv, iopairs_mv, rng_mv = test_data(input_dim = 10) + init_params = repeat([0.0], 10) + obs_sample = obs_sample # scalar or Vector -> Vector + em_mv = test_gp_mv(y_mv, σ2_y_mv, iopairs_mv) + mcmc = MCMCWrapper(RWMHSampling(), obs_sample, prior_mv, em_mv; init_params = init_params) + + end + @testset "Sine GP & RW Metropolis" begin em_1 = test_gp_1(y, σ2_y, iopairs) new_step, posterior_mean_1 = mcmc_test_template(prior, σ2_y, em_1; mcmc_params...) @@ -144,7 +182,7 @@ end em_1 = test_gp_1(y, σ2_y, iopairs) new_step, posterior_mean_1 = mcmc_test_template(prior, σ2_y, em_1; mcmc_params...) - @test isapprox(new_step, 0.25; atol = 0.25) + @test isapprox(new_step, 0.75; atol = 0.6) # difference between mean_1 and ground truth comes from MCMC convergence and GP sampling @test isapprox(posterior_mean_1, π / 2; atol = 4e-1) diff --git a/test/RandomFeature/runtests.jl b/test/RandomFeature/runtests.jl index 6867040ee..b9ee270d1 100644 --- a/test/RandomFeature/runtests.jl +++ b/test/RandomFeature/runtests.jl @@ -7,7 +7,6 @@ using CalibrateEmulateSample.Emulators using CalibrateEmulateSample.DataContainers using CalibrateEmulateSample.EnsembleKalmanProcesses using CalibrateEmulateSample.ParameterDistributions -using RandomFeatures seed = 10101010 rng = Random.MersenneTwister(seed) @@ -119,6 +118,12 @@ rng = Random.MersenneTwister(seed) good_cov = shrinkage_cov(samples) @test (cond(good_cov) < 1.1) && ((good_cov[1] < 1.2) && (good_cov[1] > 0.8)) + # test NICE utility + samples = rand(MvNormal(zeros(100), I), 20) + # normal condition number should be huge around 10^18 + # nice cov will have improved conditioning, does not perform as well at this task as shrinking so has looser bounds + good_cov = nice_cov(samples) + @test (cond(good_cov) < 100) && ((good_cov[1] < 2.0) && (good_cov[1] > 0.2)) end @@ -137,16 +142,18 @@ rng = Random.MersenneTwister(seed) optimizer_options = Dict( "prior" => prior, - "n_ensemble" => max(ndims(prior) + 1, 10), - "n_iteration" => 5, - "scheduler" => DataMisfitController(), + "n_ensemble" => min(10 * ndims(prior), 100), + "n_iteration" => 10, + "scheduler" => DataMisfitController(terminate_at = 1000), "n_features_opt" => n_features, - "cov_sample_multiplier" => 2.0, + "cov_sample_multiplier" => 10.0, "inflation" => 1e-4, "train_fraction" => 0.8, "multithread" => "ensemble", - "accelerator" => DefaultAccelerator(), + "accelerator" => NesterovAccelerator(), "verbose" => false, + "cov_correction" => "shrinkage", + "n_cross_val_sets" => 2, ) srfi = ScalarRandomFeatureInterface( @@ -165,8 +172,6 @@ rng = Random.MersenneTwister(seed) @test get_input_dim(srfi) == input_dim @test get_rng(srfi) == rng @test get_kernel_structure(srfi) == kernel_structure - @test get_optimizer_options(srfi) == optimizer_options - # check defaults srfi2 = ScalarRandomFeatureInterface(n_features, input_dim) @test get_batch_sizes(srfi2) === nothing @@ -174,12 +179,13 @@ rng = Random.MersenneTwister(seed) @test get_kernel_structure(srfi2) == SeparableKernel(cov_structure_from_string("lowrank", input_dim), OneDimFactor()) - # currently the "scheduler" doesn't always satisfy X() = X(), bug so we need to remove this for now + # Some structs don't satisfy X == X so removed for now for key in keys(optimizer_options) - if !(key ∈ ["scheduler", "prior", "n_ensemble"]) + if !(key ∈ ["scheduler", "prior", "n_ensemble", "accelerator"]) @test get_optimizer_options(srfi2)[key] == optimizer_options[key] # we just set the defaults above end end + end @testset "VectorRandomFeatureInterface" begin @@ -199,18 +205,20 @@ rng = Random.MersenneTwister(seed) optimizer_options = Dict( "prior" => prior, - "n_ensemble" => max(ndims(prior) + 1, 10), - "n_iteration" => 5, - "scheduler" => DataMisfitController(), + "n_ensemble" => min(10 * ndims(prior), 100), + "n_iteration" => 10, + "scheduler" => DataMisfitController(terminate_at = 1000), "cov_sample_multiplier" => 10.0, "n_features_opt" => n_features, "tikhonov" => 0, "inflation" => 1e-4, "train_fraction" => 0.8, "multithread" => "ensemble", - "accelerator" => DefaultAccelerator(), + "accelerator" => NesterovAccelerator(), "verbose" => false, "localization" => EnsembleKalmanProcesses.Localizers.NoLocalization(), + "cov_correction" => "shrinkage", + "n_cross_val_sets" => 2, ) #build interfaces @@ -244,12 +252,15 @@ rng = Random.MersenneTwister(seed) cov_structure_from_string("lowrank", output_dim), ) + # exclude some structs where X == X not true for key in keys(optimizer_options) - if !(key ∈ ["scheduler", "prior", "n_ensemble"]) + if !(key ∈ ["scheduler", "prior", "n_ensemble", "accelerator"]) @test get_optimizer_options(vrfi2)[key] == optimizer_options[key] # we just set the defaults above + end end + end @testset "RF within Emulator: 1D -> 1D" begin @@ -257,30 +268,69 @@ rng = Random.MersenneTwister(seed) # Training data input_dim = 1 output_dim = 1 - n = 40 # number of training points + n = 50 # number of training points x = reshape(2.0 * π * rand(n), 1, n) # unif(0,2π) predictors/features: 1 x n obs_noise_cov = 0.05^2 * I y = reshape(sin.(x) + 0.05 * randn(n)', 1, n) # predictands/targets: 1 x n iopairs = PairedDataContainer(x, y, data_are_columns = true) - ntest = 40 + ntest = 50 new_inputs = reshape(2.0 * π * rand(ntest), 1, ntest) new_outputs = sin.(new_inputs) # RF parameters n_features = 100 - eps = 1e-8 + eps = 1.0 # more reg needed here for some reason... scalar_ks = SeparableKernel(DiagonalFactor(eps), OneDimFactor()) # Diagonalize input (ARD-type kernel) + + eps = 1e-8 # more reg needed here for some reason... vector_ks = SeparableKernel(DiagonalFactor(eps), CholeskyFactor()) # Diagonalize input (ARD-type kernel) # Scalar RF options to mimic squared-exp ARD kernel - srfi = ScalarRandomFeatureInterface(n_features, input_dim, kernel_structure = scalar_ks, rng = rng) + n_features = 100 + srfi = ScalarRandomFeatureInterface( + n_features, + input_dim, + kernel_structure = scalar_ks, + rng = rng, + optimizer_options = Dict("n_cross_val_sets" => 0), + ) # Vector RF options to mimic squared-exp ARD kernel (in 1D) - vrfi = VectorRandomFeatureInterface(n_features, input_dim, output_dim, kernel_structure = vector_ks, rng = rng) + vrfi = VectorRandomFeatureInterface( + n_features, + input_dim, + output_dim, + kernel_structure = vector_ks, + rng = rng, + optimizer_options = Dict("n_cross_val_sets" => 0), + ) # build emulators em_srfi = Emulator(srfi, iopairs, obs_noise_cov = obs_noise_cov) + n_srfi = length(get_rfms(srfi)) em_vrfi = Emulator(vrfi, iopairs, obs_noise_cov = obs_noise_cov) + n_vrfi = length(get_rfms(vrfi)) + + # test bad case + optimizer_options = Dict("multithread" => "bad_option") + + srfi_bad = ScalarRandomFeatureInterface( + n_features, + input_dim, + kernel_structure = scalar_ks, + rng = rng, + optimizer_options = optimizer_options, + ) + @test_throws ArgumentError Emulator(srfi_bad, iopairs) + + # test under repeats + @test_logs (:warn,) Emulator(srfi, iopairs, obs_noise_cov = obs_noise_cov) + Emulator(srfi, iopairs, obs_noise_cov = obs_noise_cov) + @test length(get_rfms(srfi)) == n_srfi + @test_logs (:warn,) Emulator(vrfi, iopairs, obs_noise_cov = obs_noise_cov) + Emulator(vrfi, iopairs, obs_noise_cov = obs_noise_cov) + @test length(get_rfms(vrfi)) == n_vrfi + # just see if it prints something @test_logs (:info,) Emulators.optimize_hyperparameters!(em_srfi) @@ -300,6 +350,9 @@ rng = Random.MersenneTwister(seed) @test isapprox.(norm(μv - new_outputs), 0, atol = tol_μ) @test all(isapprox.(vec(σv²), 0.05^2 * ones(ntest), atol = 1e-2)) + + + end @testset "RF within Emulator: 2D -> 2D" begin # Generate training data diff --git a/test/Utilities/runtests.jl b/test/Utilities/runtests.jl index 54e1ad7c9..7fe46f522 100644 --- a/test/Utilities/runtests.jl +++ b/test/Utilities/runtests.jl @@ -4,7 +4,6 @@ using Statistics using LinearAlgebra using CalibrateEmulateSample.Utilities -using CalibrateEmulateSample.Observations using CalibrateEmulateSample.EnsembleKalmanProcesses using CalibrateEmulateSample.DataContainers @@ -15,10 +14,6 @@ using CalibrateEmulateSample.DataContainers arr = vcat([i * ones(3)' for i in 1:5]...) arr_t = permutedims(arr, (2, 1)) - data_names = ["d1", "d2", "d3"] - obs = Observation(arr_t, data_names) #data must be columns as default - sample = get_obs_sample(rng, obs) - @test sample == [5.0, 5.0, 5.0] mean_arr = dropdims(mean(arr, dims = 1), dims = 1) std_arr = dropdims(std(arr, dims = 1), dims = 1)