diff --git a/.github/ISSUE_TEMPLATE/ask-a-question-about-gcpy.md b/.github/ISSUE_TEMPLATE/ask-a-question-about-gcpy.md deleted file mode 100644 index 1952c846..00000000 --- a/.github/ISSUE_TEMPLATE/ask-a-question-about-gcpy.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -name: Ask a question about GCPy -about: Template for users to ask general questions about GCPy -title: "[QUESTION]" -labels: question -assignees: '' - ---- - -## Type your question below: diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 00000000..6fbbafb9 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,5 @@ +blank_issues_enabled: false +contact_links: + - name: GCPy user manual + url: https://gcpy.readthedocs.io/en/stable + about: Click this link to read the GCPy user manual. diff --git a/.github/ISSUE_TEMPLATE/new-feature-or-discussion.md b/.github/ISSUE_TEMPLATE/new-feature-or-discussion.md new file mode 100644 index 00000000..cfc7383e --- /dev/null +++ b/.github/ISSUE_TEMPLATE/new-feature-or-discussion.md @@ -0,0 +1,17 @@ +--- +name: Request a new GCPy feature or start a discussion +about: Use this form to request a new GCPy feature or start a discussion +--- + +### Name and Institution (Required) + +Name: +Institution: + +### Confirm you have reviewed the following documentation + +- [ ] [Contributing guidelines](https://gcpy.readthedocs.io/en/stable/reference/CONTRIBUTING.html) + +### New GCPy feature or discussion + +Please provide a clear and concise overview of the discussion topic or new feature requested. diff --git a/.github/ISSUE_TEMPLATE/question-issue.md b/.github/ISSUE_TEMPLATE/question-issue.md new file mode 100644 index 00000000..8f4990fb --- /dev/null +++ b/.github/ISSUE_TEMPLATE/question-issue.md @@ -0,0 +1,20 @@ +--- +name: Ask a question about or report an issue with GCPy +about: Use this form to ask a question about GCPy or to report an issue +--- + +### Name and Institution (Required) + +Name: +Institution: + +### Confirm you have reviewed the following documentation + +- [ ] [Support guidelines](https://gcpy.readthedocs.io/en/stable/reference/SUPPORT.html) +- [ ] [User manual](https://gcpy.readthedocs.io/en/stable) +- [ ] [Current and past Github issues](https://github.com/geoschem/gcpy/issues) + +### Description of your issue or question + +Please provide as much detail as possible. Always include the GCPy version number and any relevant configuration and log files. + diff --git a/.github/ISSUE_TEMPLATE/report-a-gcpy-bug-or-issue.md b/.github/ISSUE_TEMPLATE/report-a-gcpy-bug-or-issue.md deleted file mode 100644 index 70437385..00000000 --- a/.github/ISSUE_TEMPLATE/report-a-gcpy-bug-or-issue.md +++ /dev/null @@ -1,36 +0,0 @@ ---- -name: Report a GCPy bug or issue -about: Template for users to report bugs or issues with GCPy -title: "[BUG/ISSUE]" -labels: bug -assignees: '' - ---- - -## Describe the bug -Provide a clear and concise description of the bug. - -## To Reproduce -List the steps to reproduce the bug or issue below: - -```python -...add your python code snippets here -``` - -## Expected behavior -A clear and concise description of what you expected to happen. - -## Required information: -Run the following commands from an interactive Python session: -```python ->>> import gcpy ->>> gcpy.show_versions() -``` -and -``` -...paste the output from gcpy.show_versions() here ... -``` -This will display information about your system and the versions of Python packages that GCPy depends on. - -## Additional context -Provide any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/request-a-new-feature.md b/.github/ISSUE_TEMPLATE/request-a-new-feature.md deleted file mode 100644 index eaee01f7..00000000 --- a/.github/ISSUE_TEMPLATE/request-a-new-feature.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -name: Request a new GCPy feature -about: Template for users to request new features in GCPy -title: "[FEATURE REQUEST]" -labels: feature -assignees: '' - ---- - -## Add your feature request below: diff --git a/.github/ISSUE_TEMPLATE/start-a-gcpy-discussion.md b/.github/ISSUE_TEMPLATE/start-a-gcpy-discussion.md deleted file mode 100644 index b0748509..00000000 --- a/.github/ISSUE_TEMPLATE/start-a-gcpy-discussion.md +++ /dev/null @@ -1,28 +0,0 @@ ---- -name: Start an extended discussion about a GCPy-related topic -about: This template allows users to open an extended discussion about a particular GCPy topic. -title: "[DISCUSSION]" -labels: discussion -assignees: '' - ---- - -# Start a discussion about a GCPy-related topic - -This issue template is intended for extended discussions about open GCPy topics, both technical and scientific. Please use one of the other issue templates for: - -* [Submitting a GCPy bug or technical issue](https://github.com/geoschem/gcpy/issues/new?assignees=&labels=&template=report-a-bug-or-technical-issue.md&title=%5BBUG%2FISSUE%5D); -* [Asking a question about GCPy](https://github.com/geoschem/gcpy/issues/new?assignees=&labels=&template=ask-a-question-about-geos-chem.md&title=%5BQUESTION%5D); and/or -* [Requesting a new GCPy feature](https://github.com/geoschem/gcpy/issues/new?assignees=&labels=&template=request-a-new-feature.md&title=%5BFEATURE+REQUEST%5D.) - -Feel free to edit the section headers below accordingly - ------- YOU CAN REMOVE ALL TEXT ABOVE THIS LINE IF YOU WISH ------ - -## Overview - -## Additional information - -## Action items - -## Status updates diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 00000000..b9d1f604 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,24 @@ +### Name and Institution (Required) + +Name: +Institution: + +### Confirm you have reviewed the following documentation + +- [ ] [Contributing guidelines](https://gcpy.readthedocs.io/en/stable/reference/CONTRIBUTING.html) + +### Describe the update + +Please provide a clear and concise overview of the update. + +### Expected changes + +Please provide details on how this update will impact GCPy output and include plots or tables as needed. + +### Reference(s) + +If this is a science update, please provide a literature citation. + +### Related Github Issue(s) + +Please link to the corresponding Github issue here. If fixing a bug, there should be an issue describing it with steps to reproduce. diff --git a/.github/create-a-gcpy-pull-request.md b/.github/create-a-gcpy-pull-request.md deleted file mode 100644 index c902a2fe..00000000 --- a/.github/create-a-gcpy-pull-request.md +++ /dev/null @@ -1,58 +0,0 @@ ---- -name: Submit updates to GCPy with a pull request -about: This template instructs users to submit a GEOS-Chem pull request -title: "[PULL REQUEST]" -labels: '' -assignees: '' - ---- - -# GCPy Update Submission Form - -Please fill out this form to submit an update for consideration in GEOS-Chem. Contact the [GEOS-Chem Support Team](http://wiki.geos-chem.org/GEOS-Chem_Support_Team) with any questions. - -Please also see our [Submitting updates for inclusion into GEOS-Chem](http://wiki.geos-chem.org/Submitting_updates_for_inclusion_in_GEOS-Chem) wiki page for more information. - -## Brief description of this update: -e.g. Our update does XYZ, which is necessary because ... - -## Purpose of this update: - -* [ ] Bug fix -* [ ] New feature -* [ ] Structural update -* [ ] Benchmarking update -* [ ] Documentation update -* [ ] Other (please specify) - - -## Developers and their affiliations: -* e,g, Jane Smith (Institution X) - - -## Lead developer's email address: -* e.g. janesmith@myemailaddress.com - - -## Reference(s) (if applicable): -* e.g. Smith et al, _Title of our research study_, Journal ABC, vol. XX, pp. YY-ZZ, 2019. DOI:xxxx/xxxxxxx - - -## This update impacts (check all that apply): - -* [ ] GCPy source code -* [ ] Input data -* [ ] Other (please specify) - - -## Tests conducted to validate this update: - -* [ ] Unit Test (e.g. via pytest) -* [ ] Created plots to verify that everything looked as expected -* [ ] Other (please specify) - - -## Update submitted as: -* [ ] Github pull request (recommended) -* [ ] Git patch file -* [ ] Input data \ No newline at end of file diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 00000000..75649061 --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,82 @@ +# For most projects, this workflow file will not need changing; you simply need +# to commit it to your repository. +# +# You may wish to alter this file to override the set of languages analyzed, +# or to provide custom queries or build logic. +# +# ******** NOTE ******** +# We have attempted to detect the languages in your repository. Please check +# the `language` matrix defined below to confirm you have the correct set of +# supported CodeQL languages. +# +name: "CodeQL" + +on: + push: + branches: [ "main", "dev" ] + pull_request: + # The branches below must be a subset of the branches above + branches: [ "main", "dev" ] + schedule: + - cron: '21 14 * * 2' + +jobs: + analyze: + name: Analyze + # Runner size impacts CodeQL analysis time. To learn more, please see: + # - https://gh.io/recommended-hardware-resources-for-running-codeql + # - https://gh.io/supported-runners-and-hardware-resources + # - https://gh.io/using-larger-runners + # Consider using larger runners for possible analysis time improvements. + runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }} + timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }} + permissions: + actions: read + contents: read + security-events: write + + strategy: + fail-fast: false + matrix: + language: [ 'python' ] + # CodeQL supports [ 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift' ] + # Use only 'java-kotlin' to analyze code written in Java, Kotlin or both + # Use only 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both + # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v2 + with: + languages: ${{ matrix.language }} + # If you wish to specify custom queries, you can do so here or in a config file. + # By default, queries listed here will override any specified in a config file. + # Prefix the list here with "+" to use these queries and those in the config file. + + # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs + # queries: security-extended,security-and-quality + + + # Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift). + # If this step fails, then you should remove it and run the build manually (see below) + - name: Autobuild + uses: github/codeql-action/autobuild@v2 + + # ℹ️ Command-line programs to run using the OS shell. + # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun + + # If the Autobuild fails above, remove it and uncomment the following three lines. + # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. + + # - run: | + # echo "Run, Build Application using script" + # ./location_of_script_within_repo/buildscript.sh + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v2 + with: + category: "/language:${{matrix.language}}" diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml new file mode 100644 index 00000000..0f0e51eb --- /dev/null +++ b/.github/workflows/pylint.yml @@ -0,0 +1,23 @@ +name: Pylint + +on: [push] + +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.9", "3.10", "3.11"] + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pylint + - name: Analysing the code with pylint + run: | + pylint $(git ls-files '*.py') diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml new file mode 100644 index 00000000..d858af41 --- /dev/null +++ b/.github/workflows/stale.yml @@ -0,0 +1,27 @@ +# This workflow warns and then closes issues and PRs that have had no activity for a specified amount of time. +# +# You can adjust the behavior by modifying this file. +# For more information, see: +# https://github.com/actions/stale +name: Mark stale issues and pull requests + +on: + schedule: + - cron: '32 22 * * *' + +jobs: + stale: + + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + + steps: + - uses: actions/stale@v5 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + stale-issue-message: 'Stale issue message' + stale-pr-message: 'Stale pull request message' + stale-issue-label: 'no-issue-activity' + stale-pr-label: 'no-pr-activity' diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 00000000..7c8d284c --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,38 @@ +# .readthedocs.yaml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details +# +# NOTE: This is now needed in order to prevent builds from failing due +# to Python package issues at the ReadTheDocs site. For more info, see: +# https://github.com/readthedocs/readthedocs.org/issues/10290 +# -- Bob Yantosca (10 May 2023) + +# Required +version: 2 + +## Set the version of Python and other tools you might need +build: + os: ubuntu-22.04 + tools: + python: "3.7" + nodejs: "14" + rust: "1.55" + golang: "1.17" + +# Build documentation in the docs/ directory with Sphinx +sphinx: + builder: html + configuration: docs/source/conf.py + +# If using Sphinx, optionally build your docs in additional formats such as PDF +formats: all + +# Optionally declare the Python requirements required to build your docs +python: + install: + - requirements: docs/requirements.txt + +# Tell RTD to clone all submodules +submodules: + include: all + recursive: true diff --git a/AUTHORS.txt b/AUTHORS.txt index 0307945b..2a045f87 100644 --- a/AUTHORS.txt +++ b/AUTHORS.txt @@ -1,13 +1,366 @@ -List of GCPy developers (29 Sep 2022) +List of Developers for GEOS-Chem, HEMCO, and Related Software, +including GCPy (11 Jul 2023) =============================================================================== -Liam Bindle -Will Downs -Sebastian Eastham -Lucas Estrada -Haipeng Lin -Lizzie Lundgren -Daniel Rothenberg -Melissa Sulprizio -Bob Yantosca -Jiawei Zhuang +ADAMS, Peter +ALEXANDER, Becky +ALVARADO, Matthew +AMOS, Helen +ANGOT, Helene +AUSMEEL, Stina +AUVRAY, Marion +BALASUS, Nicholas +BARAY, Sabour +BARKLEY, Michael +BARRET, Steven +BARRET, Brice +BATES, Kelvin +BEY, Isabelle +BINDLE, Liam +BOERSMA, Folkert +BOJJAGANI, Sreekanth +BONILLA, Eimy +BOVY, Benoit +BOWMAN, Kevin +BOYS, Brian +BRATTICH, Erika +BREIDER, Tom +BREWER, Jared +BROWNE, Ellie +BROWN-STEINER, Benjamin +BUKOSA, Beata +BUTENHOFF, Christopher +CADY-PEREIRA, Karen +CAMERON-SMITH, Philip +CAMPBELL, Patrick +CAO, Guofeng +CAO, Hansen +CAO, Liangzhong +CAO, Yi +CAPPS, Shannon +CAROUGE, Claire +CARTER, Therese (Tess) +CAZORLA, Maria +CHALIYAKUNNEL, Sreelekha +CHAN MILLER, Christopher +CHEN, Han +CHEN, Jing +CHEN, Long +CHEN, Qi +CHEN, Qianjie +CHEN, Xin +CHEN, Yang +CHOI, Yunsoo +CHRISTIAN, Kenneth +CLUNE, Tom +COOPER, Matt +CORBITT, Bess +CROFT, Betty +CRUZ, Xochitl +CURCI, Gabriele +DACHS, Jordi +DAVILA, Yanko +DELWICHE, Kyle +DENG, Feng +DESOUZA, Priyanka +DESSERVETTAZ, Max +DEUTSCHER, Nicholas +DI PIERRO, Maurizio +DLAMINI, Thandolwethu +DOMMERGUE, Aurelien +DOWNS, Will +DRURY, Easan +DUNCAN, Bryan +EASTHAM, Sebastian +ELLIS, Raluca +ENBERG, Joseph +ESTRADA, Lucas +EVANS, Mathew +FAIRLIE, T. Duncan +FARINA, Sal +FAYE, Vivian +FEINBERG, Ari +FIELD, Brendan +FIORE, Arlene +FISCHER, Emily +FISHER, Jenny +FITE, Charley +FLEMING, Eric +FLO GAYA, Judit +FORD, Bonne +FRANCIS, Timmy +FREESE, Lyssa +FRIEDMAN, Carey +FRITZ, Thibaud +FU, Tzung-May +FU, Joshua +GANTT, Brett +GE, Cui +GEDDES, Jeffrey +GIANG, Amanda +GIANNAKOPOULOS, Christos +GOUNIA, Harriet +GRAEF, Edward +GREENSLADE, Jesse +GU, Xiaoguang +HAMMER, Melanie +HASKINS, Jessica +HE, Cenlin +HE, Jourdan +HEALD, Colette +HENDERSON, Barron +HENZE, Daven +HO, Wai-Lo +HORDIICHUK, Myroslav +HOLMES, Christopher +HOROWITZ, Hannah +HOSSENI, Zahra +HU, Lu +HUANG, Jianping +HUANG, Jiayue +IKEDA, Kohei +JACOB, Daniel +JACOBSON, Andy +JACOBSON, Marc +JAEGLE, Lyatt +JANSEN, Ruud +JIANG, Weiyuan +JIANG, Zhe +JIN, Lixu +JISKRA, Martin +JOHNSON, Matthew +JONES, Dylan +JUNG, Jaegun +KASIBHATLA, Prasad +HUDMAN KAY, Rynda +KELLER, Christoph +KELP, Makoto +KIKUCHI, Tetsuro +KIM, Hyeonmin +KIM, Sungshik +KITWIROON, Nutthida +KODROS, Jack +KOO, Jamin +KOPACZ, Monika +KOPLITZ, Shannon +KOUATCHOU, Jules +KREMSER, Stefanie +LACEY, Forrest +LAMENCUSA, Carmen +LAMSAL, Lok +LAPINA, Kateryna +LARSON, Kelsey +LARY, David +LATIMER, Robyn +LE SAGER, Philippe +LEE, Chulkyu +LEE, Colin +LEE, Meemong +LEHMANN, Ralph +LEIBENSPERGER, Eric +LERICHE, Maud +LEUNG, Fok-Yan +LI, Bengang +LI, Chi +LI, Ke +LI, Qinbin +LI, Xianglan +LI, Yanshun +LIAO, K.J. +LIANG, Qing +LIAO, Hong +LIN, Haipeng +LIN, Jintai +LIN, Shian-Jiann +LINFORD, John +LIU, Hongyu +LIU, Jane +LIU, Junhua +LIU, Pengfei +LIU, Tina +LIU, Yang +LIU, Ying +LOGAN, Jennifer +LONG, Mike +LU, Xiao +LUNDGREN, Lizzie +LUO, Gan +MAASAKKERS, J. D. (Bram) +MACINTYRE, Helen +MACKENZIE, Dave +MAHIEU, Emmanuel +MAO, Jingqiu +MARAIS, Eloise +MARKS, Marguerite +MARKUS, Arjen +MARTIN, Randall +MARVIN, Maggie +MCDUFFIE, Erin +MCGUFFIN, Dana +MCLINDEN, Chris +MENG, Hongjian +MENG, Jun +MESKHIDZE, Nicholas +MIATSELSKAYA, Natallia +MICKLEY, Loretta +MILLET, Dylan +MING, Kai +MOCH, Jonathan +MOORE, Neil +MOORING, Todd +MORENA, Jessica +MORIN, Gabriel +MORRIS, Eleanor +MU, Mingquan +MURPHY, Killian +MURRAY, Lee +NAM, Jun +NASSAR, Ray +NATHAN, Brian +NENES, Athanasios +NEVISON, Cynthia +NIELSEN, Eric +NOTHOLT, Justus +O'DELL, Kate +OKAMOTO, Sachiko +PAGE, Neil +PAI, Sidhant +PALMER, Paul +PARAZOO, Nick +PARELLA, Justin +PARK, Rokjin +PARRINGTON, Mark +PAULOT, Fabien +PHILIP, Sajeev +PICKETT-HEAPS, Christopher +PIERCE, Jeff +PORTER, Will +POUND, Ryan +PRATHER, Michael +PROTONATARIOU, Anna +PUTMAN, Bill +PYE, Havala +QURESHI, Asif +RAMNARINE, Emily +RANDERSON, James +RANDLES, Cynthia +RAVETTA, Francois +REINHART, Wes +RIDLEY, David +RIZZA, Umberto +ROTHENBERG, Daniel +ROY, Eric +SAFIEDDINE, Sarah +SANDU, Adrian +SANTILLANA, Mauricio +SAUVAGE, Bastien +SCARPELLI, Tia +SCHIFERL, Luke +SCHMIDT, Johan +SCHULTZ, Martin +SCHWANTES, Rebecca +SEBASTIANELLI, Paolo +SELIN, Noelle +SEYMOUR, Michael +SHAH, Viral +SHAO, Jingyuan +SHEN, Lu +SHERWEN, Tomas +SHUTTER, Joshua +SILVA, Sam +SMITH-DOWNEY, Nicole +SOERENSEN, Anne Laerke +SOFEN, Eric +SONG, Shaojie +SONG, Yu +SPRACKLEN, Dominick +STANEVICH, Ilya +STEENROD, Stephen +STERN, Rebecca +STEVENS, Robin +STREETS, David +STRODE, Sarah +SU, Flora +SULPRIZIO, Melissa +SUN, Minmin +SUNDERLAND, Elsie +SUNTHARALINGAM, Parvada +SURL, Luke +SUTHERLAND, Bethany +SUTO, Kimito +SZELAG, Monika +TAI, Amos +TANIMOTO, Hiroshi +TANNAHILL, John +TAO, Shu +THACKRAY, Colin +THOMPSON, Matt +THONAT, Thibaud +TIAN, Rong +TOMBROU, Maria +TRAVIS, Katherine +TRAYANOV, Atanas +TRIVITAYANURAK, Win +TRUJILLO, Corey +TUCCELLA, Paolo +TURNER, Alexander +TURQUETY, Solene +TZOMPA-SOSA, Zitely +UNGER, Nadine +VAL MARTIN, Maria +VAN DONKELAAR, Aaron +VAROTSOS, Kostas +VINKEN, Geert +VITT, Francis +WAGSTROM, Kristina +WALKER, Thomas +WANG, James +WANG, Jun +WANG, Qiaoqiao +WANG, Xuan +WANG, Xiaoli +WANG, Yuhang +WANG, Yuting +WANG, Yuxuan +WANG, Zifa +WECHT, Kevin +WEIDNER, Richard +WEISENSTEIN, Debra +WELLS, Kelley +WENG, Hongjian +WESTERVELT, Dan +WHALEY, Cynthia +WIEDINMYER, Christine +WILD, Oliver +WOHLTMANN, Ingo +WU, Shiliang +XIAO, Yaping +XU, Junwei +YAN, Yingying +YANNETTI, Matt +YANTOSCA, Bob +YATTEAU, Jack +YUMIMOTO, Keiya +YU, Karen +YU, Fangqun +YU, Xingpei +YUE, Xu +YUE, Xuyan +ZATKO, Maria +ZHAI, Shixian +ZHAI, Shuting +ZHANG, Bingqing +ZHANG, Bo +ZHANG, Chi +ZHANG, Dandan +ZHANG, Hongliang +ZHANG, Jiawei +ZHANG, Li +ZHANG, Lin +ZHANG, Qiang +ZHANG, Yanxu +ZHU, Lei +ZHU, Liye +ZHUANG, Jiawei +ZHUANG, Quanlai +ZOOGMAN, Peter diff --git a/CHANGELOG.md b/CHANGELOG.md index e8737750..681badee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,94 @@ All notable changes to GCPy will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [1.4.0] - 2023-11-20 +### Added +- Added C2H2 and C2H4 to `emission_species.yml` +- Updated `species_database.yml` for consistency with GEOS-Chem 14.2.0 +- Added `.github/ISSUE_TEMPLATE/config.yml` file w/ Github issue options +- Added `CONTRIBUTING.md` and `SUPPORT.md`, replacing `docs/source/Contributing.rst` and `docs/source/Report_Request.rst` +- Added option to pass the benchmark type to plotting routines +- Updated `AUTHORS.txt` as of Apr 2023 (concurrent w/ GEOS-Chem 14.2.0) +- Added ReadTheDocs badge in `README.md` +- Added `.readthedocs.yaml` to configure ReadTheDocs builds +- Added cloud benchmarking YAML configuration files to `benchmark/cloud` folder +- Added `README.md` files in `gcpy/benchmark` directory structure +- Added `benchmark/modules/benchmark_models_vs_obs.py` script +- Added `benchmark/modules/GC_72_vertical_levels.csv` file +- Added `multi_index_lat` keyword to `reshape_MAPL_CS` function in `gcpy/util.py` +- Added FURA to `emission_species.yml` and `benchmark_categories.yml` +- Added new routine `format_number_for_table` in `gcpy/util.py` +- Added module `gcpy/cstools.py` with utility functions for cubed-sphere grids +- Added new routine `verify_variable_type` function in `gcpy/util.py` +- Added new routine `format_number_for_table` in `util.py` +- Added BrSALA and BrSALC to `emission_species.yml` +- Added `options:n_cores` to all benchmark YAML config files +- Added `__init__.py` files in subfolders of `gcpy/gcpy` +- `gcpy/benchmark/modules/*.py` scripts are now chmod 644 +- Added `ENCODING = "UTF-8"` to `gcpy/constants.py` +- Added statement `from dask.array import Array as DaskArray` in `gcpy plot.py` +- Added SLURM run script `gcpy/benchmark/benchmark_slurm.sh` +- Added `gcpy/plot/gcpy_plot_style` style sheet for title and label default settings +- Added `gcpy/gcpy_plot_style` style sheet for title and label default settings +- Added new cubed-sphere grid inquiry functions to `gcpy/cstools.py` +- Added functions `get_ilev_coord` and `get_lev_coord` to `gcpy/grid.py` +- Add `tk` package to `docs/environment_files/environment.yml` + +### Changed +- Simplified the Github issues templates into two options: `new-feature-or-discussion.md` and `question-issue.md` +- The GitHub PR template is now named `./github/PULL_REQUEST_TEMPLATE.md` +- Updated badge links in `README.md` +- Construct ops budget table filename without using the `label` argument +- Updated species_database.yml for consistency with GEOS-Chem 14.2.0 +- Renamed TransportTracers species in `benchmark_categories.yml`, `run_1yr_tt_benchmark.py`, and in documentation +- YAML files in `benchmark/` have been moved to `benchmark/config` +- Models vs. O3 obs plots are now arranged by site latitude from north to south +- Routine `print_totals` now prints small and/or large numbers in scientific notation +- Truncate names in benchmark & emissions tables to improve readability +- Add TransportTracers species names to `gcpy/emissions_*.yml` files +- Now pass `n_job=config["options"]["n_cores"]` to benchmark plotting routines +- Script `benchmark.py` to `benchmark_funcs.py` to remove a name collision +- Folder `gcpy/benchmark` is now `gcpy/gcpy/benchmark` +- Folder `benchmark/modules` is now `gcpy/gcpy/benchmark/modules` +- Folder `gcpy/examples` is now `gcpy/gcpy/examples` +- Pass `sys.argv` to the `main()` routine of `run_benchmark.py`,` compare_diags.py` +- Updated `docs/environment_files/environment.yml` for MambaForge (also added `gridspec`) +- Now use `pypdf` instead of `PyPDF2` in `plot.py` and `util.py` +- Added coding suggestions made by `pylint` where possible +- Abstracted and never-nested code from `six_plot` into functions (in `plot.py`) +- Added `main()` routine to `gcpy/file_regrid.py`; Also added updates suggested by Pylint +- Fixed broken regridding code in `gcpy/file_regrid.py`; also refactored for clarity +- Rewrote `Regridding.rst` page; Confirmed that regridding examples work properly +- Now allow `plot_val` to be of type `dask.array.Array` in `plot.py` routines `six_plot` and `single_panel` +- Now add `if` statements to turn of `Parallel()` commands when `n_jobs==1`. +- Do not hardwire fontsize in `gcpy/plot.py`; get defaults from `gcpy_plot_style` +- `gcpy/plot.py` has been split up into smaller modules in the `gcpy/plot` folder +- Updated and cleaned up code in `gcpy/regrid.py` +- Example scripts`plot_single_level` and `plot_comparisons` can now accept command-line arguments +- Example scripts `plot_single_level.py`, `plot_comparisons.py`, `compare_diags.py` now handle GCHP restart files properly +- Now specify the X11 backend with by setting the `MPLBACKEND` environment variable + +### Fixed +- Generalized test for GCHP or GCClassic restart file in `regrid_restart_file.py` +- Fixed bug in transport tracer benchmark mass conservation table file write +- Routine `create_display_name` now splits on only the first `_` in species & diag names +- Prevent plot panels from overlapping in six-panel plots +- Prevent colorbar tick labels from overlapping in dynamic-range ratio plots +- Updated `seaborn` plot style names to conform to the latest matplotlib +- Set `lev:positive` and/or `ilev:positive` properly in `regrid_restart_file.py` and `file_regrid.py` +- Prevent overwriting of `lev` coord in `file_regrid.py` at netCDF write time +- Fixed bug in option to allow different units when making comparison plots + +### Removed +- Removed `gchp_is_pre_13_1` arguments & code from benchmarking routines +- Removed `is_pre_13_1` tags from `*_benchmark.yml` config files +- Removed `benchmark_emission_totals.ipynb`, this is obsolete +- Replaced `gcpy/benchmark/README` with `README.md` +- Removed `gcpy_test_dir` option from `examples/diagnostics/compare_diags.*` +- Removed `docs/environment_files/gchp_regridding.yml` environment file +- Removed `gcpy/gcpy/benchmark/plot_driver.sh` +- Made benchmark configuration files consistent + ## [1.3.3] -- 2023-03-09 ### Added - Updated installation documentation, we now recommend users to create @@ -13,6 +101,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - New functions in `benchmark.py` and `util.py` to facilitate printing of the species/emissions/inventories that differ between Dev & Ref versions. - Added new RTD documentation for installing Conda 4.12.0 with Miniconda - Added GCHP regridding environnment file `docs/environment_files/gchp_regridding.yml` +- Added new benchmark type CH4Benchmark ### Changed - Applied cleanup susggestions from pylint to `benchmark.py`, `util.py`, `plot.py`, `oh_metrics.py`, `ste_flux.py` @@ -48,6 +137,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Removed obsolete environment.yml files (@yantosca) - Added requirements.yml to docs folder for Sphinx/RTD documentation (@yantosca) - New regridding script `regrid_restart_file.py` (@liambindle) + ### Changed - Fixed several issues in benchmarking scripts (@laestrada, @lizziel, @yantosca) - Fixed bug in `budget_ox.py`; The drydep loss of Ox for GCHP was 12x too high @@ -288,16 +378,3 @@ This is the first labeled version of GCPy. The primary functionality of GCPy is - Support for plotting benchmark output for both GEOS-Chem Classic (lat/lon data) and GCHP (cubed-sphere data). The first official release version of GCPy, v1.0.0, will correspond with the release of GEOS-Chem 13.0.0. - - -## [Unreleased] - -### Added - -### Changed - -### Deprecated - -### Fixed - -### Removed diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000..057c5a8a --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,52 @@ +# Contributing Guidelines + +Thank you for looking into contributing to GCPy! GEOS-Chem is a grass-roots model that relies on contributions from community members like you. Whether you're new to GEOS-Chem or a longtime user, you're a valued member of the community, and we want you to feel empowered to contribute. + +## We use GitHub and ReadTheDocs +We use GitHub to host the GCPy source code, to track issues, user questions, and feature requests, and to accept pull requests: [https://github.com/geoschem/gcpy](https://github.com/geoschem/gcpy). Please help out as you can in response to issues and user questions. + +GCPy documentation can be found at [gcpy.readthedocs.io](https://gcpy.readthedocs.io). + +## When should I submit updates? + +Submit bug fixes right away, as these will be given the highest priority. Please see "Support Guidelines" for more information. + +The practical aspects of submitting code updates are listed below. + +## How can I submit updates? +We use [GitHub Flow](https://guides.github.com/introduction/flow/index.html), so all changes happen through pull requests. This workflow is [described here](https://guides.github.com/introduction/flow/index.html). + +As the author you are responsible for: +- Testing your changes +- Updating the user documentation (if applicable) +- Supporting issues and questions related to your changes + +### Process for submitting code updates + + 1. Create or log into your [GitHub](https://github.com/) account. + 2. [Fork the GCPy repository](https://help.github.com/articles/fork-a-repo/) into your Github account. + 5. Clone your fork of the GCPy repositories to your computer system. + 6. Add your modifications into a [new branch](https://git-scm.com/book/en/v2/Git-Branching-Branches-in-a-Nutshell) off the **main** branch. + 7. Test your update thoroughly and make sure that it works. + 8. Review the coding conventions and checklists for code and data updates listed below. + 9. Create a [pull request in GitHub](https://help.github.com/articles/creating-a-pull-request/). + 10. The [GEOS-Chem Support Team](https://wiki.geos-chem.org/GEOS-Chem_Support_Team) will add your updates into the development branch for an upcoming GCPy version. + 11. If the benchmark simulations reveal a problem with your update, the GCST will request that you take further corrective action. + +### Coding conventions +GCPy includes contributions from many people and multiple organizations. Therefore, some inconsistent conventions are inevitable, but we ask that you do your best to be consistent with nearby code. + +### Checklist for submitting code updates + + 1. Include thorough comments in all submitted code. + 2. Include full citations for references at the top of relevant source code modules. + 3. Remove extraneous code updates (e.g. testing options, other science). + +## How can I request a new feature? +We accept feature requests through issues on GitHub. To request a new feature, **[open a new issue](https://github.com/geoschem/gcpy/issues/new/choose)** and select the feature request template. Please include all the information that migth be relevant, including the motivation for the feature. + +## How can I report a bug? +Please see **[Support Guidelines](https://gcpy.readthedocs.io/en/stable/reference/SUPPORT.html)**. + +## Where can I ask for help? +Please see **[Support Guidelines](https://gcpy.readthedocs.io/en/stable/reference/SUPPORT.html)** diff --git a/LICENSE.txt b/LICENSE.txt index ac6f5163..a43bf7e3 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,8 +1,9 @@ -License Agreement for GCPy and related developments -(The MIT "Expat" License, http://opensource.org/licenses/MIT) -============================================================================== +############################################################################ +### License Agreement for GCPy and Related Developments ### +### (The MIT License, http://opensource.org/licenses/MIT) ### +############################################################################ -Copyright (c) 2017-2022 GCPy Developers +Copyright (c) 2017-2023 GCPy Developers Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -22,17 +23,17 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ------------------------------------------------------------------------------- -NOTE: Please see the AUTHORS.txt file (in this folder) for the list of -GCPy Developers. This list will be updated frequently, such as at -each major GCPy release. ------------------------------------------------------------------------------- - + NOTE: Please see the AUTHORS.txt file (in this folder) for the list of + Developers of GEOS-Chem, HEMCO and Related Software. This list will + be updated frequently, such as at each major GEOS-Chem release. -Licenses for Python packages used by GCPy: -============================================================================== +############################################################################# +### License agreements for third-party code packages ### +### used by GCPy and Related Developments ### +############################################################################# Cartopy: +----------------------------------------------------------------------------- Cartopy code @@ -63,6 +64,7 @@ Cartopy: matplotlib: +----------------------------------------------------------------------------- License agreement for matplotlib versions 1.3.0 and later ========================================================= @@ -115,6 +117,7 @@ matplotlib: Agreement. NumPy: +----------------------------------------------------------------------------- NumPy license Copyright © 2005-2019, NumPy Developers. @@ -150,6 +153,7 @@ NumPy: SciPy: +------------------------------------------------------------------------------ SciPy license Copyright © 2001, 2002 Enthought, Inc. @@ -188,43 +192,49 @@ SciPy: xarray: +----------------------------------------------------------------------------- xarray is available under the open source Apache License. http://www.apache.org/licenses/LICENSE-2.0.html esmpy: +----------------------------------------------------------------------------- Licensed under the University of Illinois-NCSA License. https://opensource.org/licenses/NCSA xESMF: +------------------------------------------------------------------------------ MIT License Copyright (c) 2017 Jiawei Zhuang - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation + files (the "Software"), to deal in the Software without restriction + including without limitation the rights to use, copy, modify, + merge, publish, distribute, sublicense, and/or sell copies of the + Software, and to permit persons to whom the Software is furnished + to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. pandas: +----------------------------------------------------------------------------- BSD 3-Clause License @@ -261,6 +271,7 @@ pandas: sphinx: +----------------------------------------------------------------------------- BSD 2-Clause License diff --git a/README.md b/README.md index 183b6cbb..010e3098 100644 --- a/README.md +++ b/README.md @@ -1,42 +1,67 @@ -[![License](https://img.shields.io/badge/License-MIT-blue.svg)](https://github.com/geoschem/gcpy/blob/master/LICENSE.txt) - # GCPy: Python toolkit for GEOS-Chem -**GCPy** is a Python-based toolkit containing useful functions for working specifically with the GEOS-Chem model of atmospheric chemistry and composition. +

+ + + +
+ DOI + + +

-GCPy aims to build on the well-established scientific Python technical stack, leveraging tools like cartopy and xarray to simplify the task of working with model output and performing atmospheric chemistry analyses. +**GCPy** is a Python-based toolkit containing useful functions for working specifically with the GEOS-Chem model of atmospheric chemistry and composition. + +**GCPy** aims to build on the well-established scientific Python technical stack, leveraging tools like **cartopy**, **numpy**, and **xarray** to simplify the task of working with GEOS-Chem model output and performing atmospheric chemistry analyses. ## What GCPy was intended to do: -1. Produce plots and tables from GEOS-Chem output using simple function calls. -2. Generate the standard evaluation plots and tables from GEOS-Chem benchmark output. -3. Obtain GEOS-Chem's horizontal/vertical grid information. -4. Implement GCHP-specific regridding functionalities (e.g. cubed-sphere to lat-lon regridding) -5. Provide example scripts for creating specific types of plots or analysis from GEOS-Chem output. +1. Produce plots and tables from [GEOS-Chem](https://geos-chem.readthedocs.io) output using simple function calls. +2. Generate the standard evaluation plots and tables for GEOS-Chem benchmark simulations. +3. Obtain GEOS-Chem's horizontal and vertical grid information. +4. Implement [GCHP](https://gchp.readthedocs.io)-specific regridding functionalities (e.g. cubed-sphere to lat-lon regridding) +5. Provide example scripts for creating specific types of plots or analysis from GEOS-Chem output. +6. Provide user-submitted scripts for specific applications related to GEOS-Chem and [HEMCO](https://hemco.readthedocs.io). -## What GCPY was not intended to do: +## What GCPy was not intended to do: 1. General NetCDF file modification: (crop a domain, extract some variables): - * Use [xarray](http://xarray.pydata.org) instead. - * Also see [our *Working with netCDF data files* wiki page](http://wiki.geos-chem.org/Working_with_netCDF_data_files). + * Instead, use netCDF tools such as: + * [xarray](http://xarray.pydata.org) + * [netCDF operators (NCO)](https://nco.sourceforge.net) + * [Climate Data Operators](https://mpimet.mpg.de/cdo) instead. + * Also see our [*Work with netCDF files* guide](https://geos-chem.readthedocs.io/en/latest/geos-chem-shared-docs/supplemental-guides/netcdf-guide.html) at [geos-chem.readthedocs.io](https://geos-chem.readthedocs.io) + 2. Statistical analysis: - * Use [scipy](http://www.scipy.org)/[scikit-learn](https://scikit-learn.org) tools instead -3. Machine Learning: - * Use the standard machine learning utilities ([pytorch](https://pytorch.org), [tensorflow](https://www.tensorflow.org), [julia](https://julialang.org), etc.) + * Instead, use statistical tools such as: + * Use [scipy](http://www.scipy.org) + * [scikit-learn](https://scikit-learn.org) + * [R](https://r-project.org) + * etc +3. Machine Learning: + * Instead, use machine learning tools such as: + * [pytorch](https://pytorch.org), + * [tensorflow](https://www.tensorflow.org) + * [julia](https://julialang.org) + * etc. ## Documentation: For more information on installing and using GCPy, visit the official documentation at [gcpy.readthedocs.io](https://gcpy.readthedocs.io/). - ## License -GCPy is distributed under the MIT license. Please read the license documents LICENSE.txt and AUTHORS.txt, which are located in the root folder. +GCPy is distributed under the MIT license. Please see the [GCPy license agreement](https://github.com/geoschem/gcpy/blob/dev/LICENSE.txt) and [List of GCPy developers](https://github.com/geoschem/gcpy/blob/dev/AUTHORS.txt) for more information. + +## Requesting support +To report a bug or suggest a new feature, please see our [Support +Guidelines](https://github.com/geoschem/gcpy/blob/dev/SUPPORT.md). -## Contact +## Submitting new features -To contact us, please [open a new issue on the issue tracker connected to this repository](https://github.com/geoschem/gcpy/issues/new/choose). You can ask a question, report a bug, or request a new feature. +If you are interested in submitting code to GCPy, please see our +[Contributing Guidelines](https://github.com/geoschem/gcpy/blob/dev/CONTRIBUTING.md). diff --git a/SUPPORT.md b/SUPPORT.md new file mode 100644 index 00000000..5c53953f --- /dev/null +++ b/SUPPORT.md @@ -0,0 +1,25 @@ +# Support Guidelines + +GCPy support is maintained by the **GEOS-Chem Support Team (GCST)**, which is based jointly at Harvard University and Washington University in St. Louis. + +We track bugs, user questions, and feature requests through **[GitHub issues](https://www.youtube.com/watch?v=dFBhdotYVf8)**. Please help out as you can in response to issues and user questions. + +## How to report a bug +We use GitHub to track issues. To report a bug, **[open a new issue](https://github.com/geoschem/gcpy/issues/new/choose)**. Please include your name, institution, and all relevant information, such as simulation log files and instructions for replicating the bug. + +## Where can I ask for help? +We use GitHub issues to support user questions. To ask a question, **[open a new issue](https://github.com/geoschem/gcpy/issues/new/choose)** and select the question template. Please include your name and institution in the issue. + +## What type of support can I expect? + +We will be happy to assist you in resolving bugs and technical issues that arise when using GCPy. User support and outreach is an important part of our mission to support the [International GEOS-Chem User Community](https://geoschem.github.io/geos-chem-people-projects-map/). + +Even though we can assist in several ways, we cannot possibly do everything. We rely on users being resourceful and willing to try to resolve problems on their own to the greatest extent possible. + +If you have a science question rather than a technical question, you should contact the relevant [GEOS-Chem Working Group(s)](https://geos-chem.seas.harvard.edu/geos-working-groups) directly. But if you do not know whom to ask, you may open a new issue (See "Where can I ask for help" above) and we will be happy to direct your question to the appropriate person(s). + +## How to submit changes +Please see **[Contributing Guidelines](https://gcpy.readthedocs.io/en/stable/reference/CONTRIBUTING.html)**. + +## How to request an enhancement +Please see **[Contributing Guidelines](https://gcpy.readthedocs.io/en/stable/reference/CONTRIBUTING.html)**. diff --git a/benchmark/README b/benchmark/README deleted file mode 100644 index 25668355..00000000 --- a/benchmark/README +++ /dev/null @@ -1 +0,0 @@ -This directory contains development materials for GEOS-Chem benchmarking. \ No newline at end of file diff --git a/benchmark/benchmark_emission_totals.ipynb b/benchmark/benchmark_emission_totals.ipynb deleted file mode 100644 index fa2adbfe..00000000 --- a/benchmark/benchmark_emission_totals.ipynb +++ /dev/null @@ -1,159 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "

Create tables of emission totals from benchmark output

\n", - "\n", - "This notebook illustrates how to generate the table of emissions totals from two benchmark simulations. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#Import packages\n", - "import os\n", - "import os.path\n", - "import gcpy\n", - "import xarray as xr\n", - "from json import load as json_load_file" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Define the two data sets to be compared. \"Ref\" (or \"Reference\") is the prior dataset, and \"Dev\" (or \"Development\") is the current dataset." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Root folder (change for your system)\n", - "dir = '/n/home09/ryantosca/GC/python/data/benchmark'\n", - "\n", - "# Load the Ref dataset\n", - "reffile = os.path.join(dir, 'Ref', 'HEMCO.diagnostics.201607010000.nc')\n", - "refstr = '12.1.1'\n", - "refdata = xr.open_dataset(reffile)\n", - "refdata" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Load the Dev dataset\n", - "devfile = os.path.join(dir, 'Dev', 'HEMCO.diagnostics.201607010000.nc')\n", - "devstr = '12.2.0'\n", - "devdata = xr.open_dataset(devfile)\n", - "devdata" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Load a JSON file containing (1) the list of species for which emissions totals are desired, and (2) the units (e.g. \"Tg, \"Tg C\") in which totals for each species will be reported." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Path to JSON file with emission species properties\n", - "# This is in the gcpy/benchmark folder\n", - "species_path = os.path.join(os.path.dirname(gcpy.benchmark.__file__), 'emission_species.json')\n", - "\n", - "# Load the JSON file into a Python dictionary\n", - "species = json_load_file(open(species_path))\n", - "\n", - "# Print the dictionary \n", - "species" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Print the table of total emissions by species to a text file" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# File path for emissions totals table\n", - "file_emis_totals = os.path.join(dir, 'output', '{}_emission_totals.txt'.format(devstr))\n", - "\n", - "# Number of seconds in the averaging period\n", - "# 1-month benchmark simulation (July 2016) = 86400 seconds * 31 days\n", - "interval = 86400.0 * 31.0\n", - "\n", - "# Write totals to file\n", - "gcpy.create_total_emissions_table(refdata, refstr, devdata, devstr, species, file_emis_totals, interval, template=\"Emis{}_\")\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Print the table of emissions for each species by inventory." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# File path for inventory totals table\n", - "file_inv_totals = os.path.join(dir, 'output', '{}_inventory_totals.txt'.format(devstr))\n", - "\n", - "# Write totals to file\n", - "gcpy.create_total_emissions_table(refdata, refstr, devdata, devstr, species, file_inv_totals, interval, template=\"Inv{}_\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.8" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/benchmark/plot_driver.sh b/benchmark/plot_driver.sh deleted file mode 100755 index b30b67a9..00000000 --- a/benchmark/plot_driver.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash - -#SBATCH -c 12 -#SBATCH -N 1 -#SBATCH -t 0-4:00 -#SBATCH -p huce_intel -#SBATCH --mem=50000 -#SBATCH --mail-type=END - -#============================================================================ -# This us a sample SLURM script that you can use to submit -# the run_1mo_benchmark.py or the run_1yr_benchmark.py -# script to a computational queue. -# -# You can modify the SLURM parameters above for your setup. -#============================================================================ - -# Apply all bash initialization settings -. ~/.bashrc - -# Make sure to set multiple threads; Joblib will use multiple -# cores to parallelize certain plotting operations. -export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK -export OMP_STACKSIZE=500m - -# Turn on Python environment (edit for your setup) -conda activate gcpy - -# Uncomment this line to make 1-month benchmark plots & tables -./run_benchmark.py 1mo_benchmark.yml > plots_1mo.log - -# Uncomment this line to make 1-year benchmark plots & tables -# ./run_benchmark.py 1yr_fullchem_benchmark.yml > plots_1yr_fullchem.log - -# Uncomment this line to make 1-year TransportTracers plots & tables -# ./run_benchmark.py 1yr_tt_benchmark.yml > plots_1yr_tt.log - -# Turn off python environment -conda deactivate - -exit 0 - diff --git a/docs/environment_files/environment.yml b/docs/environment_files/environment.yml index a9497bf1..f5fc704d 100644 --- a/docs/environment_files/environment.yml +++ b/docs/environment_files/environment.yml @@ -1,41 +1,59 @@ +# ====================================================================== +# GCPy environment file +# +# Recommended installation: with Mambaforge +# $ mamba env create -n gcpy_env --file=/path/to/gcpy/environment.yml +# +# Some package versions are not the most recent, but these +# have been proven to work together. (Bob Yantosca, 14 Aug 2023) +# ====================================================================== name: gcpy_env channels: - conda-forge - - defaults + - nodefaults dependencies: - - cartopy=0.19.0.post1 # Geospatial data processing + - awscli # Utilities for AWS cloud + - cartopy # Geospatial data processing + - cf_xarray # CF conventions for xarray + - dask # Parallel library; backend for xarray + - gridspec # Define Earth System Model grids + - ipython # Interactive Python (used by Jupyter) + - joblib # Parallelize python code + - jupyter # Jupyter Notebook + - matplotlib # Creates plots and visualizations + - netcdf4 # Python wrapper for netCDF + - netcdf-fortran # Python wrapper for netCDF-Fortran + - numpy # Optimized mathematical functions + - pandas # Tables/timeseries manipulation + - pip # Install packages from PyPi + - pylint # Python linter + - pyproj # Python map projections library + - python<3.10 # Any python version prior to 3.10 + - pypdf # PDF utilities (bookmarks, etc.) + - recommonmark # Dependency for Sphinx + - requests # HTTP library + - scipy # Scientific python package + - sparselt>=0.1.3 # Regridding earth system model data + - tabulate # Pretty-printing for column data + - tk # Tcl/tk library + - xarray # Read data from netCDF etc files + # + # NOTE: These packages need to be pegged at specific versions + # in order to avoid an ImportError. + # -- Bob Yantosca (14 Aug 2023) + # - esmf==8.1.1 # Earth system modeling framework - esmpy==8.1.1 # Python wrapper for ESMF - - netcdf4==1.6.0 # Python wrapper for netCDF - - netcdf-fortran==4.5.4 # Python wrapper for netCDF-Fortran - - python==3.9.6 # Python version 3.9.6 - - pip==21.2.1 # Install packages from PyPi - - sparselt>=0.1.3 # Regridding earth system model data - xesmf==0.5.1 # Universal regridder - - pip: - - awscli>=1.22.83 # Utilities for AWS cloud - - cf_xarray==0.7.4 # CF conventions for xarray - - dask==2021.7.1 # Parallel library; backend for xarray - - docutils==0.16 # Convert text to other formats - - h5netcdf==0.11.0 # Python interface to netCDF4/HDF5 - - h5py==3.3.0 # Python interface to HDF5 - - matplotlib==3.4.2 # Creates plots and visualizations - - ipython==8.11.0 # Interactive Python (used by Jupyter) - - jinja2==3.0.3 # Dependency for Sphinx - - joblib==1.0.1 # Parallelize python code - - jupyter==1.0.0 # Jupyter Notebook - - numpy==1.21.1 # Optimized mathematical functions - - pandas==1.3.1 # Tables/timeseries manipulation - - pycodestyle==2.9.1 # Python style checker (formerly PEP8) - - pylint==2.15.3 # Python linter - - pypdf2==1.26.0 # PDF utilities (bookmarks, etc.) - - recommonmark==0.7.1 # Dependency for Sphinx - - requests==2.26.0 # HTTP library - - scipy==1.7.0 # Scientific python package - - sphinx==3.5.4 # Generate ReadTheDocs output - - sphinx-autoapi==1.9.0 # Sphinx autodoc style documentation - - sphinx-autobuild==2021.3.14 # Build ReadTheDos live in browser - - sphinxcontrib-bibtex==2.2.0 # ReadTheDocs bibliography style - - sphinx_rtd_theme==0.5.2 # ReadTheDocs HTML theme files - - tabulate==0.8.9 # Pretty-printing for column data - - xarray==0.17.0 # Read data from netCDF etc files + # + # NOTE: These packages need to be pegged at specific versions + # or else the ReadTheDocs output won't render properly. + # -- Bob Yantosca (14 Aug 2023) + # + - docutils==0.16 # Convert text to other formats + - jinja2==3.0.3 # Dependency for Sphinx + - sphinx==3.5.4 # Generate ReadTheDocs output + - sphinx-autoapi==1.9.0 # Sphinx autodoc style documentation + - sphinx-autobuild==2021.3.14 # Build ReadTheDos live in browser + - sphinxcontrib-bibtex==2.2.0 # ReadTheDocs bibliography style + - sphinx_rtd_theme==0.5.2 # ReadTheDocs HTML theme files diff --git a/docs/environment_files/gchp_regridding.yml b/docs/environment_files/gchp_regridding.yml deleted file mode 100644 index ee163daa..00000000 --- a/docs/environment_files/gchp_regridding.yml +++ /dev/null @@ -1,12 +0,0 @@ -name: gchp_regridding -channels: - - conda-forge -dependencies: - - python=3.9 - - esmf - - gridspec - - numpy - - requests - - sparselt - - xarray - - xesmf diff --git a/docs/source/About-GCPy.rst b/docs/source/About-GCPy.rst index c5a463ef..f10d8002 100644 --- a/docs/source/About-GCPy.rst +++ b/docs/source/About-GCPy.rst @@ -1,16 +1,22 @@ +.. |br| raw:: html + +
+ .. _about: ########## About GCPy ########## -**GCPy** is a Python-based toolkit containing useful functions for -working specifically with the GEOS-Chem model of atmospheric chemistry -and composition. +:program:`GCPy` is a Python-based toolkit containing useful functions for +working specifically with the :program:`GEOS-Chem` model of +atmospheric chemistry and composition. -GCPy aims to build on the well-established scientific Python technical -stack, leveraging tools like cartopy and xarray to simplify the task of -working with model output and performing atmospheric chemistry analyses. +GCPy aims to build on the well-established scientific +Python technical stack, leveraging tools like :program:`cartopy`, +:program:`numpy`, and :program:`xarray` to simplify the task of +working with GEOS-Chem model output and performing atmospheric +chemistry analyses. .. _about-what-gcpy-does: @@ -18,37 +24,77 @@ working with model output and performing atmospheric chemistry analyses. What GCPy was intended to do ============================ -#. Produce plots and tables from GEOS-Chem output using simple function +#. Produce plots and tables from `GEOS-Chem + `_ output using simple function calls. #. Generate the standard evaluation plots and tables from GEOS-Chem - benchmark output. -#. Obtain GEOS-Chem's horizontal/vertical grid information. -#. Implement GCHP-specific regridding functionalities (e.g. cubed-sphere - to lat-lon regridding). + benchmark simulations. +#. Obtain GEOS-Chem's horizontal and vertical grid information. +#. Implement `GCHP `_-specific regridding + functionalities (e.g. cubed-sphere to lat-lon regridding). #. Provide example scripts for creating specific types of plots or analysis from GEOS-Chem output. +#. Provide user-submitted scripts for specific applications related to + GEOS-Chem and `HEMCO `_. .. _about-what-gcpy-doesnt-do: ================================ -What GCPY was not intended to do +What GCPy was not intended to do ================================ #. General NetCDF file modification: (crop a domain, extract some variables): - - Use `xarray `__ instead. - - Also see `Work with netCDF data files + - Instead, use netCDF tools such as: + + - `xarray `_ + - `netCDF Operators (NCO) `_ + - `Climate Data Operators (CDO) `_ + + - Also see our `Work with netCDF data `_ - at the GEOS-Chem ReadTheDocs site. + guide at `geos-chem.readthedocs.io + `_. #. Statistical analysis: - - Use `scipy `__ and `scikit-learn - `__ tools instead. + - Instead, use statistical tools such as: + + - `scipy `_ + - `scikit-learn `_ + - `R `_ + - etc. #. Machine Learning: - - Use the standard machine learning utilities - (`pytorch `__, - `tensorflow `__, - `julia `__, etc.). + - Instead, use machine learning tools such as: + + - `pytorch `_ + - `tensorflow `_ + - `julia `_ + - etc. + +======= +License +======= + +GCPy is distributed under the `MIT license +`_. Please see the `GCPy license +agreement `_ +and `List of GCPy developers +`_ for more +information. + +================== +Requesting support +================== + +To report a bug or suggest a new feature, please see our `Support +Guidelines `_. + +======================= +Submitting new features +======================= + +If you are interested in submitting code to GCPy, please see our +`Contributing Guidelines `_. diff --git a/docs/source/Benchmarking.rst b/docs/source/Benchmarking.rst new file mode 100644 index 00000000..1d6b2f7a --- /dev/null +++ b/docs/source/Benchmarking.rst @@ -0,0 +1,1553 @@ +.. |br| raw:: html + +
+ +.. _bmk: + +############ +Benchmarking +############ + +The GEOS-Chem Support Team uses GCPy to produce comparison plots and +summary tables from GEOS-Chem benchmark simulations. In this chapter +we will describe this capability of GCPy. + +.. _bmk-scripts: + +====================================== +Location of benchmark plotting scripts +====================================== + +The source code for creating benchmark plots is located in the +:file:`gcpy/benchmark` directory tree. + +.. table:: **Contents of the gcpy/benchmark directory** + + +-------------------------+--------------------------------------------+ + | File or folder | Description | + +=========================+============================================+ + | ``run_benchmark.py`` | Benchmark driver script | + +-------------------------+--------------------------------------------+ + | ``benchmark_slurm.sh`` | Bash script to submit ``run_benchmark,py`` | + | | as a SLURM batch job | + +-------------------------+--------------------------------------------+ + | ``cloud/`` | Directory containing template config files | + | | (in YAML format) for 1-hour and 1-month | + | | benchmark plot jobs on the AWS cloud. | + +-------------------------+--------------------------------------------+ + | ``config/`` | Directory containing editable config files | + | | (in YAML format) for 1-month and 1-year | + | | benchmark plot jobs. | + +-------------------------+--------------------------------------------+ + | ``__init__.py`` | Python import script | + +-------------------------+--------------------------------------------+ + | ``modules/`` | Contains Python modules imported into the | + | | ``run_benchmark.py`` script. | + +-------------------------+--------------------------------------------+ + | ``README.md`` | Readme file in Markdown format | + +-------------------------+--------------------------------------------+ + +.. note:: + + As of this writing, the benchmarking scripts still use several + :ref:`plotting ` and :ref:`tabling + ` functions from module + :file:`gcpy.benchmark_funcs`. We are currently in the process of + moving the functions contained in :file:`gcpy.benchmark_funcs` to + the :file:`gcpy/benchmark/modules` directory. + +.. _bmk-steps: + +=============================== +Steps to create benchmark plots +=============================== + +Follow these instructions to create comparison plots and summary +tables from GEOS-Chem benchmark simulations. + +#. Copy a configuration file from the :file:`gcpy/benchmark/config` + directory. + + In this example we will use the configuration file that will create + plots from 1-year full-chemistry benchmark + simulations. (Configuration files for other benchmark types have a + similar layout.) + + .. code-block:: console + + $ cp /path/to/GCPy/gcpy/benchmark/config/1yr_fullchem_benchmark.yml . + + |br| + +#. Edit the :literal:`paths` section of the configuration file to + specify the proper directory paths for your system. + + .. code-block:: yaml + + # Configuration for 1-year FullChemBenchmark + # + # paths: + # main_dir: High-level directory containing ref & dev rundirs + # results_dir: Directory where plots/tables will be created + # weights_dir: Path to regridding weights + # spcdb_dir: Folder in which the species_database.yml file is + # located. If set to "default", then will look for + # species_database.yml in one of the Dev rundirs. + # obs_data_dir: Path to observational data (for models vs obs plots) + # + paths: + main_dir: /path/to/benchmark/main/dir # EDIT AS NEEDED + results_dir: /path/to/BenchmarkResults # EDIT AS NEEDED + weights_dir: /n/holyscratch01/external_repos/GEOS-CHEM/gcgrid/data/ExtData/GCHP/RegriddingWeights + spcdb_dir: default + obs_data_dir: /path/to/observational/data + + |br| + +#. Edit the :literal:`data` section to specify the directories (and + labels) for the Ref and Dev versions for GEOS-Chem Classic and GCHP. + + .. code-block:: yaml + + # + # data: Contains configurations for ref and dev runs + # version: Version string (must not contain spaces) + # dir: Path to run directory + # outputs_subdir: Subdirectory w/ GEOS-Chem diagnostic files + # restarts_subdir: Subdirectory w/ GEOS-Chem restarts + # bmk_start: Simulation start date (YYYY-MM-DDThh:mm:ss) + # bmk_end: Simulation end date (YYYY-MM-DDThh:mm:ss) + # resolution: GCHP resolution string + # + data: + ref: + gcc: + version: GCC_ref + dir: GCC_ref + outputs_subdir: OutputDir + restarts_subdir: Restarts + bmk_start: "2019-01-01T00:00:00" + bmk_end: "2020-01-01T00:00:00" + gchp: + version: GCC_dev + dir: GCC_dev + outputs_subdir: OutputDir + restarts_subdir: Restarts + bmk_start: "2019-01-01T00:00:00" + bmk_end: "2020-01-01T00:00:00" + is_pre_14.0: False + resolution: c24 + dev: + gcc: + version: GCC_dev + dir: GCC_dev + outputs_subdir: OutputDir + restarts_subdir: Restarts + bmk_start: "2019-01-01T00:00:00" + bmk_end: "2020-01-01T00:00:00" + gchp: + version: GCC_dev + dir: GCC_dev + restarts_subdir: Restarts + bmk_start: "2019-01-01T00:00:00" + bmk_end: "2020-01-01T00:00:00" + is_pre_14.0: False + resolution: c24 + + |br| + +#. Edit the :literal:`comparisons` section to specify the types of + comparisons you would like to perform. + + .. code-block:: yaml + + # + # comparisons: Specifies the comparisons to perform. + # + comparisons: + gcc_vs_gcc: + run: True + dir: GCC_version_comparison + tables_subdir: Tables + gchp_vs_gcc: + run: True + dir: GCHP_GCC_comparison + tables_subdir: Tables + gchp_vs_gchp: + run: True + dir: GCHP_version_comparison + tables_subdir: Tables + gchp_vs_gcc_diff_of_diffs: + run: True + dir: GCHP_GCC_diff_of_diffs + +#. Edit the :literal:`outputs` section to select the plots and tables + that you would like to generate. + + .. code-block:: yaml + + # + # outputs: Specifies the plots and tables to generate + # + outputs: + plot_conc: True + plot_emis: True + emis_table: True + plot_jvalues: True + plot_aod: True + mass_table: True + ops_budget_table: False + aer_budget_table: True + Ox_budget_table: True + ste_table: True # GCC only + OH_metrics: True + plot_models_vs_obs: True + plot_options: + by_spc_cat: True + by_hco_cat: True + + |br| + +#. Edit the :literal:`n_cores` setting if you wish to change the + number of computational cores to use. If not, leave + :literal:`n_cores` set to :literal:`-1`, which will use as many + cores as possible. + + .. code-block:: yaml + + # + # n_cores: Specify the number of cores to use. + # -1: Use $OMP_NUM_THREADS cores + # -2: Use $OMP_NUM_THREADS - 1 cores + # -N: Use $OMP_NUM_THREADS - (N-1) cores + # 1: Disable parallelization (use a single core) + # + n_cores: -1 + + |br| + +#. Run the :file:`run.benchmark.py` script. You may do this in 2 + ways: + + #. Direct execution from the command line: + + .. code-block:: console + + (gcpy_env) $ python -m gcpy.benchmark.run_benchmark + 1yr_fullchem_benchmark.yml + + #. Batch execution with the SLURM scheduler. First, copy the + :file:`benchmark_slurm.sh` script to your current directory: + + .. code-block:: console + + (gcpy_env) $ cp /path/to/GCPy/gcpy/benchmark/benchmark_slurm.sh . + + Next, edit your local copy of :file:`benchmark_slurm.sh` to + specify your SLURM partition name, number of cores, the name of + your Python environment and the configuration file to use. + + .. code-block:: bash + + #!/bin/bash + + #SBATCH -c 8 + #SBATCH -N 1 + #SBATCH -t 0-4:00 + #SBATCH -p seas_compute,shared + #SBATCH --mem=100000 + #SBATCH --mail-type=END + + #============================================================================ + # This us a sample SLURM script that you can use to run the GCPy + # benchmark plotting code as a SLURM batch job. + # + # You can modify the SLURM parameters above for your setup. + # + # Tip: Using less cores can reduce the amount of memory required. + #============================================================================ + + # Apply all bash initialization settings + . ~/.bashrc + + # Make sure to set multiple threads; Joblib will use multiple + # cores to parallelize certain plotting operations. + export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK + export OMP_STACKSIZE=500m + + # Turn on Python environment (edit for your setup) + mamba activate gcpy_env + + # Specify a YAML file with benchmark options + # Uncomment the file that you wish: + #config="1mo_benchmark.yml" + config="1yr_fullchem_benchmark.yml" + #config="1yr_tt_benchmark.yml" + + # Call the run_benchmark script to make the plots + python -m gcpy.benchmark.run_benchmark "${config}" > benchmark.log 2>&1 + + # Turn off python environment + mamba deactivate + + exit 0 + + Lastly, start the SLURM batch execution with this command: + + .. code-block:: console + + $ sbatch benchmark_slurm.sh + +.. _bmk-funcs-plot: + +============================ +Benchmark plotting functions +============================ + +Module :code:`gcpy.benchmark_funcs` contains several functions for +creating plots and tables from GEOS-Chem benchmark simulations. The +specific outputs generated have been requested by the `GEOS-Chem +Steering Committee `_ in +order to facilitate comparing benchmark output from different model +versions. + +In this section, we will describe functions that create comparison +plots from GEOS-Chem benchmark simulation output. The functions to +create summary tables will be described :ref:`in a separate section +`. + +.. note:: + + We are working towards moving all benchmark-related source code to + the :file:`gcpy/benchmark/` directory tree. For the time being, + the :file:`benchmark_funcs.py` script is located in the + :file:`/path/to/GCPy/gcpy/` directory. + +.. table:: **Functions creating comparison plots from benchmark + simulation output** + + +-----------------------------------------------+----------------------------------------------+ + | Function | Type of 6-panel comparison plot created | + +===============================================+==============================================+ + | ``make_benchmark_aod_plots()`` | Comparison plots for aerosol optical depth | + +-----------------------------------------------+----------------------------------------------+ + | ``make_benchmark_conc_plots()`` | Species concentration | + +-----------------------------------------------+----------------------------------------------+ + | ``make_benchmark_emis_plots()`` | Emissions (by species and catgegory) | + +-----------------------------------------------+----------------------------------------------+ + | ``make_benchmark_jvalue_plots()`` | Comparison plots for J-values (photolysis) | + +-----------------------------------------------+----------------------------------------------+ + | ``make_benchmark_wetdep_plots()`` | Comparison plots for species wet deposition | + +-----------------------------------------------+----------------------------------------------+ + +The functions listed above create comparison plots of most GEOS-Chem +output variables divided into specific categories, e.g. species +categories such as :literal:`Aerosols` or :literal:`Bromine` for the +:literal:`SpeciesConcVV` diagnostic. In eachcategory, these function +create single level PDFs for the surface and 500hPa and zonal +mean PDFs for the entire atmosphere and only the stratosphere (defined +a 1-100hPa). For :code:`make_benchmark_emis_plots()`, only single +level plots at the surface are produced. All of these plotting +functions include bookmarks within the generated PDFs that point to +the pages containing each plotted quantity. Thus these functions serve +as tools for quickly creating comprehensive plots comparing two +GEOS-Chem runs. These functions are used to create the publicly +available plots for 1-month and 1-year benchmarks of new versions of +GEOS-Chem. + +Many of the plotting functions listed above use pre-defined lists of +variables in YAML files. If one dataset includes a variable but the +other dataset does not, the data for that variable in the latter +dataset will be considered to be NaN and will be plotted as such. + +.. _bmk-funcs-plot-aod: + +make_benchmark_aod_plots +------------------------ + +This function creates column optical depth plots using the Aerosols +diagnostic output. + +.. code-block:: python + + def make_benchmark_aod_plots( + ref, + refstr, + dev, + devstr, + varlist=None, + dst="./benchmark", + subdst=None, + cmpres=None, + overwrite=False, + verbose=False, + log_color_scale=False, + sigdiff_files=None, + weightsdir='.', + n_job=-1, + time_mean=False, + spcdb_dir=os.path.dirname(__file__) + ): + """ + Creates PDF files containing plots of column aerosol optical + depths (AODs) for model benchmarking purposes. + + Args: + ref: str + Path name for the "Ref" (aka "Reference") data set. + refstr: str + A string to describe ref (e.g. version number) + dev: str + Path name for the "Dev" (aka "Development") data set. + This data set will be compared against the "Reference" + data set. + devstr: str + A string to describe dev (e.g. version number) + + Keyword Args (optional): + varlist: list of str + List of AOD variables to plot. If not passed, then all + AOD variables common to both Dev and Ref will be plotted. + Use the varlist argument to restrict the number of + variables plotted to the pdf file when debugging. + Default value: None + dst: str + A string denoting the destination folder where a + PDF file containing plots will be written. + Default value: ./benchmark. + subdst: str + A string denoting the sub-directory of dst where PDF + files containing plots will be written. In practice, + subdst is only needed for the 1-year benchmark output, + and denotes a date string (such as "Jan2016") that + corresponds to the month that is being plotted. + Default value: None + cmpres: string + Grid resolution at which to compare ref and dev data, e.g. '1x1.25' + overwrite: bool + Set this flag to True to overwrite files in the + destination folder (specified by the dst argument). + Default value: False. + verbose: bool + Set this flag to True to print extra informational output. + Default value: False + log_color_scale: bool + Set this flag to True to enable plotting data (not diffs) + on a log color scale. + Default value: False + sigdiff_files: list of str + Filenames that will contain the list of quantities having + having significant differences in the column AOD plots. + These lists are needed in order to fill out the benchmark + approval forms. + Default value: None + weightsdir: str + Directory in which to place (and possibly reuse) xESMF regridder + netCDF files. + Default value: '.' + n_job: int + Defines the number of simultaneous workers for parallel plotting. + Set to 1 to disable parallel plotting. Value of -1 allows the + application to decide. + Default value: -1 + spcdb_dir: str + Directory of species_datbase.yml file + Default value: Directory of GCPy code repository + time_mean : bool + Determines if we should average the datasets over time + Default value: False + """ + +.. _bmk-funcs-plot-conc: + +make_benchmark_conc_plots +------------------------- + +This function creates species concentration plots using the +SpeciesConc diagnostic output by default. In particular: + +- This function is the only benchmark plotting function that supports + diff-of-diffs plotting, in which 4 datasets are passed and the + differences between two groups of :literal:`Ref` datasets vs. two + groups of :literal:`Dev` datasets is plotted (typically used for + comparing changes in GCHP vs. changes in GEOS-Chem Classic across + model versions). |br| + |br| + +- This is also the only benchmark plotting function that sends plots + to separate folders based on category (as denoted by the + plot_by_spc_cat flag). The full list of species categories is + denoted in `benchmark_categories.yml + `_ + (included in GCPy). |br| + |br| + +- In this function, parallelization occurs at the species category + level. In all other functions, parallelization occurs within calls + to :code:`compare_single_level()` and :code:`compare_zonal_mean()`.= + +.. code-block:: python + + def make_benchmark_conc_plots( + ref, + refstr, + dev, + devstr, + dst="./benchmark", + subdst=None, + overwrite=False, + verbose=False, + collection="SpeciesConc", + benchmark_type="FullChemBenchmark", + cmpres=None, + plot_by_spc_cat=True, + restrict_cats=[], + plots=["sfc", "500hpa", "zonalmean"], + use_cmap_RdBu=False, + log_color_scale=False, + sigdiff_files=None, + normalize_by_area=False, + cats_in_ugm3=["Aerosols", "Secondary_Organic_Aerosols"], + areas=None, + refmet=None, + devmet=None, + weightsdir='.', + n_job=-1, + second_ref=None, + second_dev=None, + time_mean=False, + spcdb_dir=os.path.dirname(__file__) + ): + """ + Creates PDF files containing plots of species concentration + for model benchmarking purposes. + + Args: + ref: str + Path name for the "Ref" (aka "Reference") data set. + refstr: str + A string to describe ref (e.g. version number) + dev: str + Path name for the "Dev" (aka "Development") data set. + This data set will be compared against the "Reference" + data set. + devstr: str + A string to describe dev (e.g. version number) + + Keyword Args (optional): + dst: str + A string denoting the destination folder where a PDF + file containing plots will be written. + Default value: ./benchmark + subdst: str + A string denoting the sub-directory of dst where PDF + files containing plots will be written. In practice, + subdst is only needed for the 1-year benchmark output, + and denotes a date string (such as "Jan2016") that + corresponds to the month that is being plotted. + Default value: None + overwrite: bool + Set this flag to True to overwrite files in the + destination folder (specified by the dst argument). + Default value: False + verbose: bool + Set this flag to True to print extra informational output. + Default value: False + collection: str + Name of collection to use for plotting. + Default value: "SpeciesConc" + benchmark_type: str + A string denoting the type of benchmark output to plot, options are + FullChemBenchmark, TransportTracersBenchmark, or CH4Benchmark. + Default value: "FullChemBenchmark" + cmpres: string + Grid resolution at which to compare ref and dev data, e.g. '1x1.25' + plot_by_spc_cat: logical + Set this flag to False to send plots to one file rather + than separate file per category. + Default value: True + restrict_cats: list of strings + List of benchmark categories in benchmark_categories.yml to make + plots for. If empty, plots are made for all categories. + Default value: empty + plots: list of strings + List of plot types to create. + Default value: ['sfc', '500hpa', 'zonalmean'] + log_color_scale: bool + Set this flag to True to enable plotting data (not diffs) + on a log color scale. + Default value: False + normalize_by_area: bool + Set this flag to true to enable normalization of data + by surfacea area (i.e. kg s-1 --> kg s-1 m-2). + Default value: False + cats_in_ugm3: list of str + List of benchmark categories to to convert to ug/m3 + Default value: ["Aerosols", "Secondary_Organic_Aerosols"] + areas: dict of xarray DataArray: + Grid box surface areas in m2 on Ref and Dev grids. + Default value: None + refmet: str + Path name for ref meteorology + Default value: None + devmet: str + Path name for dev meteorology + Default value: None + sigdiff_files: list of str + Filenames that will contain the lists of species having + significant differences in the 'sfc', '500hpa', and + 'zonalmean' plots. These lists are needed in order to + fill out the benchmark approval forms. + Default value: None + weightsdir: str + Directory in which to place (and possibly reuse) xESMF regridder + netCDF files. + Default value: '.' + n_job: int + Defines the number of simultaneous workers for parallel plotting. + Set to 1 to disable parallel plotting. Value of -1 allows the + application to decide. + Default value: -1 + second_ref: str + Path name for a second "Ref" (aka "Reference") data set for + diff-of-diffs plotting. This dataset should have the same model + type and grid as ref. + Default value: None + second_dev: str + Path name for a second "Ref" (aka "Reference") data set for + diff-of-diffs plotting. This dataset should have the same model + type and grid as ref. + Default value: None + spcdb_dir: str + Directory of species_datbase.yml file + Default value: Directory of GCPy code repository + time_mean : bool + Determines if we should average the datasets over time + Default value: False + """ + +.. _bmk-funcs-plot-emis: + +make_benchmark_emis_plots +------------------------- + +This function generates plots of total emissions using output from +:file:`HEMCO_diagnostics.*` (for GEOS-Chem Classic) and/or +:file:`GCHP.Emissions.*` output files. + +.. code-block:: python + + def make_benchmark_emis_plots( + ref, + refstr, + dev, + devstr, + dst="./benchmark", + subdst=None, + plot_by_spc_cat=False, + plot_by_hco_cat=False, + benchmark_type="FullChemBenchmark", + cmpres=None, + overwrite=False, + verbose=False, + flip_ref=False, + flip_dev=False, + log_color_scale=False, + sigdiff_files=None, + weightsdir='.', + n_job=-1, + time_mean=False, + spcdb_dir=os.path.dirname(__file__) + ): + """ + Creates PDF files containing plots of emissions for model + benchmarking purposes. This function is compatible with benchmark + simulation output only. It is not compatible with transport tracers + emissions diagnostics. + + Args: + ref: str + Path name for the "Ref" (aka "Reference") data set. + refstr: str + A string to describe ref (e.g. version number) + dev: str + Path name for the "Dev" (aka "Development") data set. + This data set will be compared against the "Reference" + data set. + devstr: str + A string to describe dev (e.g. version number) + + Keyword Args (optional): + dst: str + A string denoting the destination folder where + PDF files containing plots will be written. + Default value: './benchmark + subdst: str + A string denoting the sub-directory of dst where PDF + files containing plots will be written. In practice, + and denotes a date string (such as "Jan2016") that + corresponds to the month that is being plotted. + Default value: None + plot_by_spc_cat: bool + Set this flag to True to separate plots into PDF files + according to the benchmark species categories (e.g. Oxidants, + Aerosols, Nitrogen, etc.) These categories are specified + in the YAML file benchmark_species.yml. + Default value: False + plot_by_hco_cat: bool + Set this flag to True to separate plots into PDF files + according to HEMCO emissions categories (e.g. Anthro, + Aircraft, Bioburn, etc.) + Default value: False + benchmark_type: str + A string denoting the type of benchmark output to plot, options are + FullChemBenchmark, TransportTracersBenchmark, or CH4Benchmark. + Default value: "FullChemBenchmark" + cmpres: string + Grid resolution at which to compare ref and dev data, e.g. '1x1.25' + overwrite: bool + Set this flag to True to overwrite files in the + destination folder (specified by the dst argument). + Default value: False + verbose: bool + Set this flag to True to print extra informational output. + Default value: False + flip_ref: bool + Set this flag to True to reverse the vertical level + ordering in the "Ref" dataset (in case "Ref" starts + from the top of atmosphere instead of the surface). + Default value: False + flip_dev: bool + Set this flag to True to reverse the vertical level + ordering in the "Dev" dataset (in case "Dev" starts + from the top of atmosphere instead of the surface). + Default value: False + log_color_scale: bool + Set this flag to True to enable plotting data (not diffs) + on a log color scale. + Default value: False + sigdiff_files: list of str + Filenames that will contain the lists of species having + significant differences in the 'sfc', '500hpa', and + 'zonalmean' plots. These lists are needed in order to + fill out the benchmark approval forms. + Default value: None + weightsdir: str + Directory in which to place (and possibly reuse) xESMF regridder + netCDF files. + Default value: '.' + n_job: int + Defines the number of simultaneous workers for parallel plotting. + Set to 1 to disable parallel plotting. + Value of -1 allows the application to decide. + Default value: -1 + spcdb_dir: str + Directory of species_datbase.yml file + Default value: Directory of GCPy code repository + time_mean : bool + Determines if we should average the datasets over time + Default value: False + + Remarks: + (1) If both plot_by_spc_cat and plot_by_hco_cat are + False, then all emission plots will be placed into the + same PDF file. + + (2) Emissions that are 3-dimensional will be plotted as + column sums. + column sums. + """ + +.. _bmk-funcs-plot-jvalue: + +make_benchmark_jvalue_plots +--------------------------- + +This function generates plots of J-values using the :literal:`JValues` +GEOS-Chem output files. + +.. code-block:: python + + def make_benchmark_jvalue_plots( + ref, + refstr, + dev, + devstr, + varlist=None, + dst="./benchmark", + subdst=None, + local_noon_jvalues=False, + cmpres=None, + plots=["sfc", "500hpa", "zonalmean"], + overwrite=False, + verbose=False, + flip_ref=False, + flip_dev=False, + log_color_scale=False, + sigdiff_files=None, + weightsdir='.', + n_job=-1, + time_mean=False, + spcdb_dir=os.path.dirname(__file__) + ): + """ + Creates PDF files containing plots of J-values for model + benchmarking purposes. + + Args: + ref: str + Path name for the "Ref" (aka "Reference") data set. + refstr: str + A string to describe ref (e.g. version number) + dev: str + Path name for the "Dev" (aka "Development") data set. + This data set will be compared against the "Reference" + data set. + devstr: str + A string to describe dev (e.g. version number) + + Keyword Args (optional): + varlist: list of str + List of J-value variables to plot. If not passed, + then all J-value variables common to both dev + and ref will be plotted. The varlist argument can be + a useful way of restricting the number of variables + plotted to the pdf file when debugging. + Default value: None + dst: str + A string denoting the destination folder where a + PDF file containing plots will be written. + Default value: ./benchmark. + subdst: str + A string denoting the sub-directory of dst where PDF + files containing plots will be written. In practice, + subdst is only needed for the 1-year benchmark output, + and denotes a date string (such as "Jan2016") that + corresponds to the month that is being plotted. + Default value: None + local_noon_jvalues: bool + Set this flag to plot local noon J-values. This will + divide all J-value variables by the JNoonFrac counter, + which is the fraction of the time that it was local noon + at each location. + Default value: False + cmpres: string + Grid resolution at which to compare ref and dev data, e.g. '1x1.25' + plots: list of strings + List of plot types to create. + Default value: ['sfc', '500hpa', 'zonalmean'] + overwrite: bool + Set this flag to True to overwrite files in the + destination folder (specified by the dst argument). + Default value: False. + verbose: bool + Set this flag to True to print extra informational output. + Default value: False + flip_ref: bool + Set this flag to True to reverse the vertical level + ordering in the "Ref" dataset (in case "Ref" starts + from the top of atmosphere instead of the surface). + Default value: False + flip_dev: bool + Set this flag to True to reverse the vertical level + ordering in the "Dev" dataset (in case "Dev" starts + from the top of atmosphere instead of the surface). + Default value: False + log_color_scale: bool + Set this flag to True if you wish to enable plotting data + (not diffs) on a log color scale. + Default value: False + sigdiff_files: list of str + Filenames that will contain the lists of J-values having + significant differences in the 'sfc', '500hpa', and + 'zonalmean' plots. These lists are needed in order to + fill out the benchmark approval forms. + Default value: None + weightsdir: str + Directory in which to place (and possibly reuse) xESMF regridder + netCDF files. + Default value: '.' + n_job: int + Defines the number of simultaneous workers for parallel plotting. + Set to 1 to disable parallel plotting. Value of -1 allows the + application to decide. + Default value: -1 + spcdb_dir: str + Directory of species_datbase.yml file + Default value: Directory of GCPy code repository + time_mean : bool + Determines if we should average the datasets over time + Default value: False + + Remarks: + Will create 4 files containing J-value plots: + (1 ) Surface values + (2 ) 500 hPa values + (3a) Full-column zonal mean values. + (3b) Stratospheric zonal mean values + These can be toggled on/off with the plots keyword argument. + + At present, we do not yet have the capability to split the + plots up into separate files per category (e.g. Oxidants, + Aerosols, etc.). This is primarily due to the fact that + we archive J-values from GEOS-Chem for individual species + but not family species. We could attempt to add this + functionality later if there is sufficient demand. + """ + +.. _bmk-funcs-plot-wetdep: + +make_benchmark_wetdep_plots +--------------------------- + +This function generates plots of wet deposition using +:literal:`WetLossConv` and :literal:`WetLossLS` GEOS-Chem output files. +It is currently primarily used for 1-Year Transport Tracer benchmarks, +plotting values for the following species as defined in +`benchmark_categories.yml +`_ +(included in GCPY). + +.. code-block:: python + + def make_benchmark_wetdep_plots( + ref, + refstr, + dev, + devstr, + collection, + dst="./benchmark", + cmpres=None, + datestr=None, + overwrite=False, + verbose=False, + benchmark_type="TransportTracersBenchmark", + plots=["sfc", "500hpa", "zonalmean"], + log_color_scale=False, + normalize_by_area=False, + areas=None, + refmet=None, + devmet=None, + weightsdir='.', + n_job=-1, + time_mean=False, + spcdb_dir=os.path.dirname(__file__) + ): + """ + Creates PDF files containing plots of species concentration + for model benchmarking purposes. + + Args: + ref: str + Path name for the "Ref" (aka "Reference") data set. + refstr: str + A string to describe ref (e.g. version number) + dev: str + Path name for the "Dev" (aka "Development") data set. + This data set will be compared against the "Reference" + data set. + devstr: str + A string to describe dev (e.g. version number) + collection: str + String name of collection to plot comparisons for. + + Keyword Args (optional): + dst: str + A string denoting the destination folder where a PDF + file containing plots will be written. + Default value: ./benchmark + datestr: str + A string with date information to be included in both the + plot pdf filename and as a destination folder subdirectory + for writing plots + Default value: None + benchmark_type: str + A string denoting the type of benchmark output to plot, options are + FullChemBenchmark, TransportTracersBenchmark, or CH4Benchmark. + Default value: "FullChemBenchmark" + overwrite: bool + Set this flag to True to overwrite files in the + destination folder (specified by the dst argument). + Default value: False. + verbose: bool + Set this flag to True to print extra informational output. + Default value: False. + plots: list of strings + List of plot types to create. + Default value: ['sfc', '500hpa', 'zonalmean'] + normalize_by_area: bool + Set this flag to true to enable normalization of data + by surfacea area (i.e. kg s-1 --> kg s-1 m-2). + Default value: False + areas: dict of xarray DataArray: + Grid box surface areas in m2 on Ref and Dev grids. + Default value: None + refmet: str + Path name for ref meteorology + Default value: None + devmet: str + Path name for dev meteorology + Default value: None + n_job: int + Defines the number of simultaneous workers for parallel plotting. + Set to 1 to disable parallel plotting. Value of -1 allows the + application to decide. + Default value: -1 + spcdb_dir: str + Directory of species_datbase.yml file + Default value: Directory of GCPy code repository + time_mean : bool + Determines if we should average the datasets over time + Default value: False + """ + +.. _bmk-funcs-table: + +=========================== +Benchmark tabling functions +=========================== + +.. table:: **Functions creating summary tables from benchmark + simulation output** + + +-----------------------------------------------+----------------------------------------------+ + | Function | Type of summary table created | + +===============================================+==============================================+ + | ``make_benchmark_aerosol_tables()`` | Global aerosol burdens (1yr benchmarks only) | + +-----------------------------------------------+----------------------------------------------+ + | ``make_benchmark_emis_tables()`` | Emissions (by species & inventory) | + +-----------------------------------------------+----------------------------------------------+ + | ``make_benchmark_mass_tables()`` | Total mass of each species | + +-----------------------------------------------+----------------------------------------------+ + | ``make_benchmark_mass_accumulation_tables()`` | Mass accumulation for each species | + +-----------------------------------------------+----------------------------------------------+ + | ``make_benchmark_mass_conservation_table()`` | Total mass of a single species at hourly | + | | intervals (to check mass conservation) | + +-----------------------------------------------+----------------------------------------------+ + | ``make_benchmark_oh_metrics()`` | Global OH metrics (mean OH, CH4 lifetime, | + | | methylchloroform lifetime) | + +-----------------------------------------------+----------------------------------------------+ + | ``make_benchmark_operations_budget()`` | Total mass of each species after each | + | | operation (transport, mixing, etc.) | + +-----------------------------------------------+----------------------------------------------+ + +The functions listed above create summary tables for quantities such as +total mass of species, total mass of emissions, and OH metrics. + +Many of these functions use pre-defined lists of variables in YAML +files. If one dataset includes a variable but the other dataset does +not, the data for that variable in the latter dataset will be +considered to be NaN and will be plotted as such. + +.. _bmk-funcs-table-aer: + +make_benchmark_aerosol_tables +----------------------------- + +This function creates tables of global aerosol budgets and burdens from GEOS-Chem +1-year full-chemistry benchmark simulation output. + +.. code-block:: python + + def make_benchmark_aerosol_tables( + devdir, + devlist_aero, + devlist_spc, + devlist_met, + devstr, + year, + days_per_mon, + dst='./benchmark', + overwrite=False, + is_gchp=False, + spcdb_dir=os.path.dirname(__file__) + ): + """ + Compute FullChemBenchmark aerosol budgets & burdens + + Args: + devdir: str + Path to development ("Dev") data directory + devlist_aero: list of str + List of Aerosols collection files (different months) + devlist_spc: list of str + List of SpeciesConc collection files (different months) + devlist_met: list of str + List of meteorology collection files (different months) + devstr: str + Descriptive string for datasets (e.g. version number) + year: str + The year of the benchmark simulation (e.g. '2016'). + days_per_month: list of int + List of number of days per month for all months + + Keyword Args (optional): + dst: str + Directory where budget tables will be created. + Default value: './benchmark' + overwrite: bool + Overwrite burden & budget tables? (default=True) + Default value: False + is_gchp: bool + Whether datasets are for GCHP + Default value: False + spcdb_dir: str + Directory of species_datbase.yml file + Default value: Directory of GCPy code repository + + """ + +.. _bmk-funcs-table-emis: + +make_benchmark_emis_tables +-------------------------- + +This function creates tables of emissions (by species and by +inventory) from the output of GEOS-Chem benchmark simulations. + +.. code-block:: python + + def make_benchmark_emis_tables( + reflist, + refstr, + devlist, + devstr, + dst="./benchmark", + benchmark_type="FullChemBenchmark", + refmet=None, + devmet=None, + overwrite=False, + ref_interval=[2678400.0], + dev_interval=[2678400.0], + spcdb_dir=os.path.dirname(__file__) + ): + """ + Creates a text file containing emission totals by species and + category for benchmarking purposes. + + Args: + reflist: list of str + List with the path names of the emissions file or files + (multiple months) that will constitute the "Ref" + (aka "Reference") data set. + refstr: str + A string to describe ref (e.g. version number) + devlist: list of str + List with the path names of the emissions file or files + (multiple months) that will constitute the "Dev" + (aka "Development") data set + devstr: str + A string to describe dev (e.g. version number) + + Keyword Args (optional): + dst: str + A string denoting the destination folder where the file + containing emissions totals will be written. + Default value: ./benchmark + benchmark_type: str + A string denoting the type of benchmark output to plot, options are + FullChemBenchmark, TransportTracersBenchmark or CH4Benchmark. + Default value: "FullChemBenchmark" + refmet: str + Path name for ref meteorology + Default value: None + devmet: str + Path name for dev meteorology + Default value: None + overwrite: bool + Set this flag to True to overwrite files in the + destination folder (specified by the dst argument). + Default value: False + ref_interval: list of float + The length of the ref data interval in seconds. By default, interval + is set to [2678400.0], which is the number of seconds in July + (our 1-month benchmarking month). + Default value: [2678400.0] + dev_interval: list of float + The length of the dev data interval in seconds. By default, interval + is set to [2678400.0], which is the number of seconds in July + (our 1-month benchmarking month). + Default value: [2678400.0] + spcdb_dir: str + Directory of species_datbase.yml file + Default value: Directory of GCPy code repository + + """ + +.. _bmk-funcs-table-mass: + +make_benchmark_mass_tables +-------------------------- + +This function creates tables of total mass for species in two +different GEOS-Chem benchmark simulations. + +.. code-block:: python + + def make_benchmark_mass_tables( + ref, + refstr, + dev, + devstr, + varlist=None, + dst="./benchmark", + subdst=None, + overwrite=False, + verbose=False, + label="at end of simulation", + spcdb_dir=os.path.dirname(__file__), + ref_met_extra=None, + dev_met_extra=None + ): + """ + Creates a text file containing global mass totals by species and + category for benchmarking purposes. + + Args: + reflist: str + Pathname that will constitute + the "Ref" (aka "Reference") data set. + refstr: str + A string to describe ref (e.g. version number) + dev: list of str + Pathname that will constitute + the "Dev" (aka "Development") data set. The "Dev" + data set will be compared against the "Ref" data set. + devstr: str + A string to describe dev (e.g. version number) + + Keyword Args (optional): + varlist: list of str + List of variables to include in the list of totals. + If omitted, then all variables that are found in either + "Ref" or "Dev" will be included. The varlist argument + can be a useful way of reducing the number of + variables during debugging and testing. + Default value: None + dst: str + A string denoting the destination folder where the file + containing emissions totals will be written. + Default value: ./benchmark + subdst: str + A string denoting the sub-directory of dst where PDF + files containing plots will be written. In practice, + subdst is only needed for the 1-year benchmark output, + and denotes a date string (such as "Jan2016") that + corresponds to the month that is being plotted. + Default value: None + overwrite: bool + Set this flag to True to overwrite files in the + destination folder (specified by the dst argument). + Default value: False + verbose: bool + Set this flag to True to print extra informational output. + Default value: False. + spcdb_dir: str + Directory of species_datbase.yml file + Default value: Directory of GCPy code repository + ref_met_extra: str + Path to ref Met file containing area data for use with restart files + which do not contain the Area variable. + Default value: '' + dev_met_extra: str + Path to dev Met file containing area data for use with restart files + which do not contain the Area variable. + Default value: '' + """ + +.. _bmk-funcs-table-accum: + +make_benchmark_mass_accumulation_tables +--------------------------------------- + +This function creates tables of mass accumulation over time for species in two +different GEOS-Chem benchmark simulations. + +.. code-block:: python + + def create_mass_accumulation_table( + refdatastart, + refdataend, + refstr, + refperiodstr, + devdatastart, + devdataend, + devstr, + devperiodstr, + varlist, + met_and_masks, + label, + trop_only=False, + outfilename="GlobalMassAccum_TropStrat.txt", + verbose=False, + spcdb_dir=os.path.dirname(__file__) + ): + """ + Creates a table of global mass accumulation for a list of species in + two data sets. The data sets, which typically represent output from two + different model versions, are usually contained in netCDF data files. + + Args: + refdatastart: xarray Dataset + The first data set to be compared (aka "Reference"). + refdataend: xarray Dataset + The first data set to be compared (aka "Reference"). + refstr: str + A string that can be used to identify refdata + (e.g. a model version number or other identifier). + refperiodstr: str + Ref simulation period start and end + devdatastart: xarray Dataset + The second data set to be compared (aka "Development"). + devdataend: xarray Dataset + The second data set to be compared (aka "Development"). + devstr: str + A string that can be used to identify the data set specified + by devfile (e.g. a model version number or other identifier). + devperiodstr: str + Ref simulation period start and end + varlist: list of strings + List of species concentation variable names to include + in the list of global totals. + met_and_masks: dict of xarray DataArray + Dictionary containing the meterological variables and + masks for the Ref and Dev datasets. + label: str + Label to go in the header string. Can be used to + pass the month & year. + + Keyword Args (optional): + trop_only: bool + Set this switch to True if you wish to print totals + only for the troposphere. + Default value: False (i.e. print whole-atmosphere totals). + outfilename: str + Name of the text file which will contain the table of + emissions totals. + Default value: "GlobalMass_TropStrat.txt" + verbose: bool + Set this switch to True if you wish to print out extra + informational messages. + Default value: False + spcdb_dir: str + Directory of species_datbase.yml file + Default value: Directory of GCPy code repository + + Remarks: + This method is mainly intended for model benchmarking purposes, + rather than as a general-purpose tool. + + Species properties (such as molecular weights) are read from a + YAML file called "species_database.yml". + """ + + +.. _bmk-funcs-table-cons: + +make_benchmark_mass_conservation_table +-------------------------------------- + +This function creates a timeseries table of the global mass of the +:literal:`PassiveTracer` species. Usually used with output from +1-year TransportTracers benchmark simulations. + +.. code-block:: python + + def make_benchmark_mass_conservation_table( + datafiles, + runstr, + dst="./benchmark", + overwrite=False, + areapath=None, + spcdb_dir=os.path.dirname(__file__) + ): + """ + Creates a text file containing global mass of the PassiveTracer + from Transport Tracer simulations across a series of restart files. + + Args: + datafiles: list of str + Path names of restart files. + runstr: str + Name to put in the filename and header of the output file + refstr: str + A string to describe ref (e.g. version number) + dev: str + Path name of "Dev" (aka "Development") data set file. + The "Dev" data set will be compared against the "Ref" data set. + devmet: list of str + Path name of dev meteorology data set. + devstr: str + A string to describe dev (e.g. version number) + + Keyword Args (optional): + dst: str + A string denoting the destination folder where the file + containing emissions totals will be written. + Default value: "./benchmark" + overwrite: bool + Set this flag to True to overwrite files in the + destination folder (specified by the dst argument). + Default value: False + areapath: str + Path to a restart file containing surface area data. + Default value: None + spcdb_dir: str + Path to the species_database.yml + Default value: points to gcpy/gcpy folder + """ + +.. _bmk-funcs-table-oh: + +make_benchmark_oh_metrics +------------------------- + +This function generates a table of OH metrics from GEOS-Chem benchmark +simulation output. + +.. code-block:: python + + def make_benchmark_oh_metrics( + ref, + refmet, + refstr, + dev, + devmet, + devstr, + dst="./benchmark", + overwrite=False, + ): + """ + Creates a text file containing metrics of global mean OH, MCF lifetime, + and CH4 lifetime for benchmarking purposes. + + Args: + ref: str + Path name of "Ref" (aka "Reference") data set file. + refmet: str + Path name of ref meteorology data set. + refstr: str + A string to describe ref (e.g. version number) + dev: str + Path name of "Dev" (aka "Development") data set file. + The "Dev" data set will be compared against the "Ref" data set. + devmet: list of str + Path name of dev meteorology data set. + devstr: str + A string to describe dev (e.g. version number) + + Keyword Args (optional): + dst: str + A string denoting the destination folder where the file + containing emissions totals will be written. + Default value: "./benchmark" + overwrite: bool + Set this flag to True to overwrite files in the + destination folder (specified by the dst argument). + Default value: False + """ + +.. _bmk-funcs-table-ops: + +make_benchmark_operations_budget +-------------------------------- + +Creates a table with the change in species mass after each GEOS-Chem +operation, using output from GEOS-Chem benchmark simulations. + +.. code-block:: python + + def make_benchmark_operations_budget( + refstr, + reffiles, + devstr, + devfiles, + ref_interval, + dev_interval, + benchmark_type=None, + label=None, + col_sections=["Full", "Trop", "PBL", "Strat"], + operations=[ + "Chemistry", "Convection", "EmisDryDep", + "Mixing", "Transport", "WetDep" + ], + compute_accum=True, + compute_restart=False, + require_overlap=False, + dst='.', + species=None, + overwrite=True, + verbose=False, + spcdb_dir=os.path.dirname(__file__) + ): + """ + Prints the "operations budget" (i.e. change in mass after + each operation) from a GEOS-Chem benchmark simulation. + + Args: + refstr: str + Labels denoting the "Ref" versions + reffiles: list of str + Lists of files to read from the "Ref" version. + devstr: str + Labels denoting the "Dev" versions + devfiles: list of str + Lists of files to read from "Dev" version. + interval: float + Number of seconds in the diagnostic interval. + + Keyword Args (optional): + benchmark_type: str + A string denoting the type of benchmark output to plot, options are + FullChemBenchmark, TransportTracersBenchmark, or CH4Benchmark. + Default value: None + label: str + Contains the date or date range for each dataframe title. + Default value: None + col_sections: list of str + List of column sections to calculate global budgets for. May + include Strat eventhough not calculated in GEOS-Chem, but Full + and Trop must also be present to calculate Strat. + Default value: ["Full", "Trop", "PBL", "Strat"] + operations: list of str + List of operations to calculate global budgets for. Accumulation + should not be included. It will automatically be calculated if + all GEOS-Chem budget operations are passed and optional arg + compute_accum is True. + Default value: ["Chemistry","Convection","EmisDryDep", + "Mixing","Transport","WetDep"] + compute_accum: bool + Optionally turn on/off accumulation calculation. If True, will + only compute accumulation if all six GEOS-Chem operations budgets + are computed. Otherwise a message will be printed warning that + accumulation will not be calculated. + Default value: True + compute_accum: bool + Optionally turn on/off accumulation calculation. If True, will + only compute accumulation if all six GEOS-Chem operations budgets + are computed. Otherwise a message will be printed warning that + accumulation will not be calculated. + Default value: True + compute_restart: bool + Optionally turn on/off calculation of mass change based on restart + file. Only functional for "Full" column section. + Default value: False + require_overlap: bool + Whether to calculate budgets for only species that are present in + both Ref or Dev. + Default value: False + dst: str + Directory where plots & tables will be created. + Default value: '.' (directory in which function is called) + species: list of str + List of species for which budgets will be created. + Default value: None (all species) + overwrite: bool + Denotes whether to overwrite existing budget file. + Default value: True + verbose: bool + Set this switch to True if you wish to print out extra + informational messages. + Default value: False + """ + "" diff --git a/docs/source/Contributing.rst b/docs/source/Contributing.rst deleted file mode 100644 index d95660b8..00000000 --- a/docs/source/Contributing.rst +++ /dev/null @@ -1,23 +0,0 @@ -.. _contributing: - -################## -Contribute to GCPy -################## - -We welcome new code additions to GCPy in the form of `pull requests -`_. If you have an example you -would like to add to this ReadTheDocs site, you can add it to the -:file:`examples` folder in the GCPy repository and submit a pull -request with this added file. If you would like to suggest -changes to the documentation on this site, you can do so by -describing your changes in a Github issue or by directly editing -the source ReST files included in the GCPy repository and -submitting a pull request with your changes. - -We do not currently have an automated testing pipeline operational for -GCPy. We ask that you test any changes by plotting / tabling relevant -diagnostics using the :file:`run_benchmark.py` plotting scripts -included in the :file:`benchmark` folder of the repository, then -verifying your results against the results of the same script using an -unchanged version of GCPy. Any further testing before finalizing your -pull request is greatly appreciated. diff --git a/docs/source/Getting-Started-with-GCPy.rst b/docs/source/Getting-Started-with-GCPy.rst index a21040cc..6fe74133 100644 --- a/docs/source/Getting-Started-with-GCPy.rst +++ b/docs/source/Getting-Started-with-GCPy.rst @@ -14,277 +14,302 @@ Installing GCPy Requirements ============ -:program:`GCPy` is currently supported for Linux and MacOS operating -systems. Due to a reliance on several packages without Windows -support, **GCPy is not currently supported for Windows**. You will -receive an error message if you attempt to use GCPy on Windows. - -.. tip:: - - Windows 11 (and some later builds of Windows 10) support the - `Windows Subsystem for Linux (WSL) - `_. If your - Windows version is WSL-compatible, you can install GCPy into a - Linux instance (such as Ubuntu 22.04) running under Windows. At - present, this is the only way to use GCPy locally on a Windows - computer. - -The only essential software you need before installing GCPy is a -distribution of the :program:`Conda` package manager. This is used to -create a Python environment for GCPy containing all of its software -dependences, including what version of Python you use. You must -using GCPy with Python version 3.9. - -You can check if you already have Conda installed by running the -following command: +:program:`GCPy` is currently supported on the following platforms: -.. code-block:: console +#. Linux (x86_64) +#. Windows Subsystem for Linux (running in Microsoft Windows 11) +#. MacOS - $ conda --version +To install GCPy, you will need: -.. attention:: +- **EITHER** a distribution of the :program:`Mamba` package manager +- **OR** a distribution of the :program:`Conda` package manager. - You must use Conda 4.12.0 or earlier to install GCPy and its - dependencies. Newer versions of Conda than this will install - Python package versions that are incompatible with GCPy. See - :ref:`Installing Conda 4.12.0 with Miniconda ` - below. +:program:`Mamba` is a fast drop-in replacement for the +widely-used :program:`Conda` package manager. We recommend using +:program:`Mamba` to create a Python environment for GCPy. This +environment will contain a version of the Python interpreter +(in this case, Python 3.9) plus packages upon which GCPy depends. - In the future we hope to be able to resolve this installation issue - so that you can use the latest Conda version. +.. note:: -If Conda is not already installed, you must use :program:`Miniconda` -to install Conda 4.12.0. Miniconda is a minimal installer for Conda -that generally includes many fewer packages in the base environment -than are available for download. This provides a lightweight Conda -installation from which you can create custom Python environments with -whatever Python packages you wish to use, including an environment -with GCPy dependencies. + If your system has an existing :program:`Conda` installation, and/or + you do not wish to upgrade from :program:`Conda` to + :program:`Mamba`, you may create the Python environment for GCPy + with :program:`Conda`. See the following sections for detailed + instructions. -.. _conda412_install: +.. _requirements-mamba: -============================================ -Steps to install Conda 4.12.0 with Miniconda -============================================ +Check if Mamba is installed +--------------------------- -If you already have a Conda version prior to 4.12.0 installed on your -system, you may skip this step and proceed to the section entitled -:ref:`gcpy_install`. +Check if you already have :program:`Mamba` on your system: -If you need to install Conda 4.12.0, follow these steps: +.. code-block:: console -#. Download the Miniconda installer script for your operating system - as shown below. The script will install Conda version 4.12.0 using - Python 3.9. + $ mamba --version - **Linux (x86_64 CPUs)** +If :program:`Mamba` has been installed, you will see output similar to this: - .. code-block:: console +.. code-block:: console - $ wget https://repo.anaconda.com/miniconda/Miniconda3-py39_4.12.0-Linux-x86_64.sh + mamba version X.Y.Z + conda version A.B.C - **MacOS (M1 CPUs)** +If you see this output, you may skip ahead to the :ref:`gcpy-install` +section. - .. code-block:: console +.. _requirements-conda: - $ wget https://repo.anaconda.com/miniconda/Miniconda3-py39_4.12.0-MacOSX-arm64.sh +Check if Conda is installed +--------------------------- - **MacOS (x86_64 CPUs)** +If your system does not have :program:`Mamba` installed, check if +:program:`Conda` is already present on your system: - .. code-block:: console +.. code-block:: console + + $ conda --version - $ wget https://repo.anaconda.com/miniconda/Miniconda3-py39_4.12.0-MacOSX-x86_64.sh +If a :program:`Conda` version exists, you will see its version number +printed to the screen: - .. tip:: +.. code-block:: console - If you do not have :program:`wget` installed on MacOS, you can - download it with the :program:`Homebrew` package manager: + conda version A.B.C - .. code-block:: +If neither :program:`Conda` or :program:`Mamba` are installed, we +recommend installing the :program:`Mamba` package manager yourself. +Please proceed to the :ref:`mamba-install` section for instructions. - $ brew install wget +.. _requirements-conda-older: - In the steps that follow, we will walk through installation using - the Linux installer script. The steps are the same for MacOS; just - substitute the appropriate MacOS script name for the Linux script - name in steps 2 and 3 below. |br| - |br| +Additional setup for older Conda versions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If your :program:`Conda` version is earlier than 23.7, you will need +to do the following additional steps. -#. Change the permission of the Miniconda installer script so that it - is executable: +.. code-block:: console - .. code-block:: console + $ conda install -n base conda-libmamba-solver + $ conda config --set solver libmamba - $ chmod 755 Miniconda3-py39_4.12.0-Linux-x86_64.sh +This will install the fast :program:`Mamba` environment solver into +your :program:`Conda` base environment. Using the :program:`Mamba` +solver within :program:`Conda` will speed up the Python environment +creation considerably. - |br| +.. note:: -#. Run the Miniconda installer script. + The :program:`Mamba` environment solver is used by default in + :program:`Conda` 23.7 and later. - .. code-block:: console +You may now skip ahead to the :ref:`gcpy-install` section. - $ ./Miniconda3-py39_4.12.0-Linux-x86_64.sh +.. _mamba-install: - |br| +================== +Install MambaForge +================== + +We recommend installing the :program:`MambaForge`, distribution, which +is a full implementation of :program:`Mamba` (as opposed to the +minimal :program:`MicroMamba` distribution). -#. Accept the license agreement. +Follow the instructions below to install :program:`MambaForge`: - When the installer script starts, you will be prompted to accept - the Miniconda license agreement: +MacOS +----- + +#. Install :program:`MambaForge` with `Homebrew `_: .. code-block:: console - Welcome to Miniconda3 py39_4.12.0 + $ brew install mambaforge - In order to continue the installation process, please review the license - agreement. - Please, press ENTER to continue - >>> + |br| - When you press :literal:`ENTER`, you will see the license agreement - in all of its gory legalese detail. Press the space bar repeatedly - to scroll down ot the end. You will then see this prompt: +#. Initialize :program:`Mamba` for your shell. Type one of the + following commands: .. code-block:: console - Do you accept the license terms? [yes|no] - [no] >>> + $ mamba init bash # If you use the bash shell (recommended!) + $ mamba init zsh # If you use the zsh shell + $ mamba init fish # If you use the fish shell + + :program:`Mamba` will add some code to your :file:`~/.bash_profile` + startup script that will tell your shell where to look for + Python environments. - Type :literal:`yes` and hit :literal:`ENTER` to accept. |br| |br| +#. Exit your current terminal session and open a new terminal + session. This will apply the changes. -#. Specify the installation path. +You may now skip ahead to the :ref:`gcpy-install` section. - You will then be prompted to provide a directory path for the - installation: + +Linux and Windows Subsystem for Linux +-------------------------------------- + +#. Download the :program:`MambaForge` installer script from the + `conda-forge GitHub releases page + `_: .. code-block:: console - Miniconda3 will now be installed into this location: - /home/YOUR-USERNAME/miniconda3 + $ wget https://github.com/conda-forge/miniforge/releases/download/23.3.1-0/Mambaforge-23.3.1-0-Linux-x86_64.sh - - Press ENTER to confirm the location - - Press CTRL-C to abort the installation - - Or specify a different location below + This will download the :program:`MambaForge` installer script + :file:`Mambaforge-23.3.1-0-Linux-x86_64.sh` to your computer. - [/home/YOUR-USERNAME/miniconda3] >>> + .. note:: - Press :literal:`ENTER` to continue, or specify a new path and then - press :literal:`ENTER`. + As of this writing (August 2023), the latest + :program:`MambaForge` version is :literal:`23.1.0-0`. If you + find that the version has since been updated, simply replace the + version number :literal:`23.3.1-0` in the above command with the + most recent version number. - .. tip:: + |br| - If a previous Conda installation is already installed to the - default path, you may choose to delete the previous installation - folder, or install Conda 4.12.0 to a different path. +#. Change the permission of the :program:`MambaForge` installer script + so that it is executable. + + .. code-block:: console + + $ chmod 755 Mambaforge-23.3.1-0-Linux-x86_64.sh - The script will then start installing the Conda 4.12.0 package - manager. |br| |br| +#. Execute the :program:`Mambaforge` installer script. + + .. code-block:: + + $ ./Mambaforge-23.3.1-0-Linux-x86_64.sh -#. Specify post-installation options. + To update an older version of :program:`Mamba`, add the + :literal:`-u` option to the above command. |br| + |br| - You will see this text at the bottom of the screen printout upon - successful installation: +#. Review and accept the license agreement. .. code-block:: console - Preparing transaction: done - Executing transaction: done - installation finished. - Do you wish the installer to initialize Miniconda3 - by running conda init? [yes|no] - [no] >>> + In order to continue the installation process, please review the license + agreement. + Please, press ENTER to continue + >>> - Type :literal:`yes` and press :literal:`ENTER`. You will see - output similar to this: + Press :literal:`ENTER` and then :literal:`SPACE` until you reach + the end of the license agreement. Then you will be asked: .. code-block:: console - no change /home/bob/miniconda3/condabin/conda - no change /home/bob/miniconda3/bin/conda - no change /home/bob/miniconda3/bin/conda-env - no change /home/bob/miniconda3/bin/activate - no change /home/bob/miniconda3/bin/deactivate - no change /home/bob/miniconda3/etc/profile.d/conda.sh - no change /home/bob/miniconda3/etc/fish/conf.d/conda.fish - no change /home/bob/miniconda3/shell/condabin/Conda.psm1 - no change /home/bob/miniconda3/shell/condabin/conda-hook.ps1 - no change /home/bob/miniconda3/lib/python3.9/site-packages/xontrib/conda.xsh - no change /home/bob/miniconda3/etc/profile.d/conda.csh - no change /home/bob/.bashrc - No action taken. - If you'd prefer that conda's base environment not be activated on startup, - set the auto_activate_base parameter to false: - - conda config --set auto_activate_base false - - Thank you for installing Miniconda3! + Do you accept the license terms? [yes|no] + [no] >>> + Type :literal:`yes` and hit :literal:`ENTER`. |br| |br| -#. Disable the base Conda environment from being activated at startup - Close the terminal window that you used to install Conda 4.12.0 and - open a new terminal window. You will see this prompt: +#. Specify the root installation path for :program:`MambaForge`. + + .. code-block:: + + Mambaforge will now be installed into this location: + /home/YOUR-USER-NAME/mambaforge + + - Press ENTER to confirm the location + - Press CTRL-C to abort the installation + - Or specify a different location below + [/home/YOUR-USER-NAME/mambaforge] >>> + + In most cases, it should be OK to accept the default installation + location. But on some systems, users may be encouraged to install + software into a different location (e.g. if there is a faster + filesystem available than the home directory filesystem). + Consult your sysadmin or IT staff if you are unsure where to + install :program:`MambaForge`. + + Press the :literal:`ENTER` key to accept the default installation + path or type a new path and then press :literal:`ENTER`. .. code-block:: console - (base) $ + :program:`MambaForge` will downlad and install Python software + packages into the :file:`pkgs` subfolder of the root + installation path. Similarly, when you :ref:`create Python + environments `, these will be installed to the + :file:`envs` subfolder of the root installation path. + + |br| - By default, Conda will open the :literal:`base` environment each - time that you open a new terminal window. to disable this - behavior, type: +#. You may see this warning: .. code-block:: console - (base) $ conda config --set auto_activate_base false + WARNING: + You currently have a PYTHONPATH environment variable set. This may cause + unexpected behavior when running the Python interpreter in Mambaforge. + For best results, please verify that your PYTHONPATH only points to + directories of packages that are compatible with the Python interpreter + in Mambaforge: /home/YOUR-USER-NAMEb/mambaforge + + As long as your :envvar:`PYTHONPATH` environment variable only + contains the path to the root-level GCPy folder, you may safely + ignore this. (More on :envvar:`PYTHONPATH` in the :ref:`next + section `.) |br| + |br| - The next time you open a terminal window, you will just see the - regular prompt, such as; +#. Tell the installer to initialize :program:`MambaForge`. .. code-block:: console - $ + Do you wish the installer to initialize Mambaforge + by running conda init? [yes|no] + [no] >>> - (or whatever you have defined your prompt to be in your startup scripts). + Type :literal:`yes` and then :literal:`ENTER`. The installer + script will add some code to your :file:`~/.bashrc` system startup + file that will tell your shell where to find Python + environments. |br| + |br| -Now that you have installed Conda 4.12.0, you may proceed to creating -a new Conda environment for GCPy, as shown below. -.. _gcpy_install: +#. Exit your current terminal session. Start a new terminal session + to apply the updates. You are now ready to install GCPy. -========================================== -Steps to install GCPy and its dependencies -========================================== +.. _gcpy-install: -#. Install Conda if it is not already installed. +================================= +Install GCPy and its dependencies +================================= - If Conda 4.12.0 or prior is already installed on your system, you - may skip this step. Otherwise, please follow the instructions - listed in :ref:`conda412_install`. |br| - |br| +Once you have made sure that :program:`Mamba` (or :program:`Conda`) is +present on your system, you may create a Python environment for GCPy. +Follow these steps: -#. Download the GCPy source code. +#. **Download the GCPy source code.** Create and go to the directory in which you would like to store GCPy. In - this example we will store GCPy in a :file:`python/packages` - subdirectory in your home directory, but you can store it wherever - you wish. You can also name the GCPy download whatever you want. In - this example the GCPy directory is called :file:`GCPy`. + this example we will store GCPy in your :file:`$HOME/python/` + path, but you can store it wherever you wish. You can also name + the GCPy download whatever you want. In this example the GCPy + directory is called :file:`GCPy`. .. code-block:: console - $ cd $HOME/python/packages + $ cd $HOME/python $ git clone https://github.com/geoschem/gcpy.git GCPy $ cd GCPy |br| -#. Create a new Python virtual environment for GCPy. +#. **Create a new Python virtual environment for GCPy.** A Python virtual environment is a named set of Python installs, e.g. packages, that are independent of other virtual @@ -292,54 +317,93 @@ Steps to install GCPy and its dependencies maintain a set of package dependencies compatible with GCPy without interfering with Python packages you use for other work. You can create a Python virtual environment from anywhere on your - system. It will be stored in your Conda installation rather than - the directory from which you create it. + system. It will be stored in your :program:`Mamba` (or + :program:`Conda` installation rather than the directory from which + you create it). You can create a Python virtual environment using a file that lists all packages and their versions to be included in the environment. - GCPy includes such as file, environment.yml, located in the + GCPy includes such as file, :file:`environment.yml`, located in the top-level directory of the package. - Run the following command at the command prompt to create a virtual + Run one of the following commands at the command prompt to create a virtual environment for use with GCPy. You can name environment whatever you wish. This example names it :file:`gcpy_env`. .. code-block:: console - $ conda env create -n gcpy_env --file=environment.yml + $ mamba env create -n gcpy_env --file=environment.yml # If using Mamba - Once successfully created you can load the environment by running the - following command, specifying the name of your environment. + $ conda env create -n gcpy_env --file=environment.yml # If using Conda + + A list of packages to be downloaded will be displayed. A + confirmation message will ask you if you really wish to install all + of the listed packages. Type :command:`Y` to proceed or + :command:`n` to abort. + + Once successfully created you can activate the environment with + one of these commands: .. code-block:: console - $ conda activate gcpy_env + $ mamba activate gcpy_env # If using Mamba - To exit the environment do the following: + $ conda activate gcpy_env # If using Conda + + To exit the environment, use one of these commands: .. code-block:: console - $ conda deactivate + $ mamba deactivate # If using Mamba + + $ conda deactivate # If using Conda |br| -#. Add GCPy to Python path. +#. **Add GCPy to** :envvar:`PYTHONPATH` The environment variable :envvar:`PYTHONPATH` specifies the - locations of Python libraries on your system that are not included - in your conda environment. If GCPy is included in - :envvar:`PYTHONPATH` then Python will recognize its existence - when you try to use. Add the following line to your startup script, - e.g. :file:`.bashrc`, and edit the path to where you are storing - GCPy. + locations of Python libraries on your system that were not + installed by :program:`Mamba`. + + Add the path to your GCPy source code folder :file:`~/.bashrc` file: .. code-block:: bash - PYTHONPATH=$PYTHONPATH:$HOME/python/packages/GCPy + export PYTHONPATH=$PYTHONPATH:$HOME/python/GCPy + + and then use + + .. code-block:: console + + $ source ~/.bashrc + + to apply the change. |br| + |br| + +#. **Set the** :envvar:`MPLBACKEND` **environment variable** + + The environment variable :envvar:`MPLBACKEND` specifies the X11 + backend that the Matplotlib package will use to render plots to the + screen. + + Add this line to your :file:`~/.bashrc` file on your local PC/Mac + and on any remote computer systems where you will use GCPy: + + .. code-block:: bash + + export MPLBACKEND=tkagg + + And then use: + + .. code-block:: console + + $ source ~/.bashrc + to apply the change. |br| |br| -#. Perform a simple test. +#. **Perform a simple test:** Run the following commands in your terminal to check if the installation was succcesful. @@ -348,8 +412,8 @@ Steps to install GCPy and its dependencies $ source $HOME/.bashrc # Alternatively close and reopen your terminal $ echo $PYTHONPATH # Check it contains path to your GCPy clone - $ conda activate gcpy_env - $ conda list # Check it contains contents of gcpy env file + $ mamba activate gcpy_env + $ mamba list # Check it contains contents of gcpy env file $ python >>> import gcpy @@ -371,7 +435,7 @@ latest available. .. code-block:: console - $ cd $HOME/python/packages/GCPy + $ cd $HOME/python/GCPy $ git fetch -p $ git checkout main $ git pull @@ -380,7 +444,7 @@ You can also checkout an older version by doing the following: .. code-block:: console - $ cd $HOME/python/packages/GCPy + $ cd $HOME/python/GCPy $ git fetch -p $ git tag $ git checkout tags/version_you_want @@ -390,6 +454,6 @@ commands to then update your virtual environment: .. code-block:: console - $ source activate gcpy_env - $ cd $HOME/python/packages/GCPy - $ conda env update --file environment.yml --prune + $ mamba activate gcpy_env + $ cd $HOME/python/GCPy + $ mamba env update --file environment.yml --prune diff --git a/docs/source/Guide-to-Useful-Capabilities.rst b/docs/source/Guide-to-Useful-Capabilities.rst index 09b095c7..473adb5c 100644 --- a/docs/source/Guide-to-Useful-Capabilities.rst +++ b/docs/source/Guide-to-Useful-Capabilities.rst @@ -1,4 +1,3 @@ - .. _capabilities: ######################## @@ -11,7 +10,7 @@ function documentation. .. _capabilities-spatial: ================ -Spatial Plotting +Spatial plotting ================ One hallmark of GCPy is easy-to-use spatial plotting of GEOS-Chem @@ -23,28 +22,29 @@ mean for all layers of the atmosphere. .. _capabilities-spatial-single: -Single Panel Plots +Single panel plots ------------------ -Single panel plots are generated through the -:file:`plot.single_panel()` function. :file:`plot.single_panel()` uses -Matplotlib and Cartopy plotting capabilities while handling certain -behind the scenes operations that are necessary for plotting GEOS-Chem -data, particularly for cubed-sphere and/or zonal mean data. +Single panel plots are generated through the :file:`single_panel()` +function (located in module :file:`gcpy.plot.single_panel`). This +function uses Matplotlib and Cartopy plotting capabilities while +handling certain behind the scenes operations that are necessary for +plotting GEOS-Chem data, particularly for cubed-sphere and/or zonal +mean data. .. code:: python import xarray as xr - import gcpy.plot as gcplot import matplotlib.pyplot as plt + from gcpy.plot.single_panel import single_panel # Read data ds = xr.open_dataset( 'GEOSChem.Restart.20160701_0000z.nc4' ) - # plot surface Ozone over the North Pacific - gcplot.single_panel( + # Plot surface Ozone over the North Pacific + single_panel( ds['SpeciesRst_O3'].isel(lev=0), title='Surface Ozone over the North Pacific', extent=[80, -90, -10, 60] @@ -56,8 +56,8 @@ data, particularly for cubed-sphere and/or zonal mean data. .. code:: python - #plot global zonal mean of Ozone - gcplot.single_panel( + # Plot global zonal mean of Ozone + single_panel( ds['SpeciesRst_O3'], plot_type='zonal_mean', title='Global Zonal Mean of Ozone' @@ -68,15 +68,16 @@ data, particularly for cubed-sphere and/or zonal mean data. .. image:: _static/images/single\_panel\_zonal\_mean.png :align: center -`Click here `__ for an example single panel plotting script. -`Click here `__ for detailed documentation for ``single_panel()``. +:ref:`Click here ` for an example single panel plotting script. +:ref:`Click here ` for detailed documentation for +:code:`single_panel()`. .. _capabilities-spatial-sixpanel: -Six Panel Comparison Plots +Six-panel comparison plots -------------------------- -Six panel plots are used to compare results across two different model +Six-panel plots are used to compare results across two different model runs. Single level and zonal mean plotting options are both available. The two model runs do not need to be the same resolution or even the same grid type (GEOS-Chem Classic and GCHP output can be mixed at will). @@ -84,8 +85,9 @@ same grid type (GEOS-Chem Classic and GCHP output can be mixed at will). .. code:: python import xarray as xr - import gcpy.plot as gcplot import matplotlib.pyplot as plt + from gcpy.plot.compare_single_level import compare_single_level + from gcpy.plot.compare_zonal_mean import compare_zonal_mean # Read data gcc_ds = xr.open_dataset( @@ -95,8 +97,8 @@ same grid type (GEOS-Chem Classic and GCHP output can be mixed at will). 'GCHP.SpeciesConc.20160716_1200z.nc4' ) - #Plot comparison of surface ozone over the North Pacific - gcplot.compare_single_level( + # Plot comparison of surface ozone over the North Pacific + compare_single_level( gcc_ds, 'GEOS-Chem Classic', gchp_ds, @@ -109,11 +111,12 @@ same grid type (GEOS-Chem Classic and GCHP output can be mixed at will). .. image:: _static/images/six\_panel\_single\_level.png :align: center + :width: 80% .. code:: python - #Plot comparison of global zonal mean ozone - gcplot.compare_zonal_mean( + # Plot comparison of global zonal mean ozone + compare_zonal_mean( gcc_ds, 'GEOS-Chem Classic', gchp_ds, @@ -123,34 +126,34 @@ same grid type (GEOS-Chem Classic and GCHP output can be mixed at will). plt.show() .. image:: _static/images/six\_panel\_zonal\_mean.png + :align: center + :width: 80% -`Click here `__ for an example six panel plotting script. -`Click here `__ -for complete documentation for ``compare_single_level()`` and ``compare_zonal_mean()``. +:ref:`Click here ` for an example six panel plotting +script. :ref:`Click here ` for complete documentation +for :code:`compare_single_level()` and :code:`compare_zonal_mean()`. .. _capabilities-spatial-benchmark: -Comprehensive Benchmark Plotting +Comprehensive benchmark plotting -------------------------------- The GEOS-Chem Support Team uses comprehensive plotting functions from -:file:`benchmark.py` to generate full plots of benchmark +module :file:`gcpy.benchmark_funcs` to generate full plots of benchmark diagnostics. Functions like -:file:`benchmark.make_benchmark_conc_plots` by default create plots -for every variable in a given collection -(e.g. :literal:`SpeciesConc`) at multiple vertical levels (surface, -500hPa, zonal mean) and divide plots into separate folders based on -category (e.g. Chlorine, Aerosols). The GCST uses full benchmark -plotting / table scripts similar to `this example -`__ to produce plots and tables for official -model benchmarks. Full documentation for the benchmark plotting -functions can be found -`here `__. +:ref:`gcpy.benchmark_funcs.make_benchmark_conc_plots() +` by default create plots for every variable +in a given collection (e.g. :literal:`SpeciesConc`) at multiple +vertical levels (surface, 500hPa, zonal mean) and divide plots into +separate folders based on category (e.g. Chlorine, Aerosols). The +GEOS-Chem Support Team uses benchmark plotting and tabling table +scripts (described in our :ref:`Benchmarking ` chapter) to +produce plots and tables for official model benchmarks. .. _capabilities-table: ============== -Table Creation +Table creation ============== GCPy has several dedicated functions for tabling GEOS-Chem output data @@ -159,7 +162,7 @@ used for model benchmarking purposes. .. _capabilities-table-budget: -Budget Tables +Budget tables ------------- Currently, budget tables can be created for "operations" (table shows @@ -168,39 +171,35 @@ the GEOS-Chem :literal:`Budget` diagnostics) or in overall averages for different aerosols or the Transport Tracers simulation. Operations budget tables are created using the -:file:`benchmark.make_benchmark_operations_budget` function and appear as -follows: +:ref:`gcpy.benchmark_funcs.make_benchmark_operations_budget() +` function and appear as follows: .. image:: _static/images/budget\_table.png :align: center -Full documentation for operations budget table creation can be found -`here `__. - .. _capabilities-tables-mass: -Mass Tables +Mass tables ----------- -The :file:`benchmark.make_benchmark_mass_tables` function uses species -concentrations and info from meteorology files to generate the total -mass of species in certain segments of the atmosphere (currently -global or only the troposphere). An example table is shown below: +The :ref:`gcpy.benchmark_funcs.make_benchmark_mass_tables() +` function uses species concentrations and info +from meteorology files to generate the total mass of species in +certain segments of the atmosphere (currently global or only the +troposphere). An example table is shown below: .. image:: _static/images/mass\_table.png :align: center -Full documentation for mass table creation can be found -`here `__. - .. _capabilities-tables-emissions: -Emissions Tables +Emissions tables ---------------- -The :file:`benchmark.make_benchmark_emis_tables` function creates -tables of total emissions categorized by species or by -inventory. Examples of both emissions table types are shown below: +The :ref:`gcpy.benchmark_funcs.make_benchmark_emis_tables() +` function creates tables of total emissions +categorized by species or by inventory. Examples of both emissions +table types are shown below: .. image:: _static/images/emissions\_totals.png :align: center @@ -208,9 +207,6 @@ inventory. Examples of both emissions table types are shown below: .. image:: _static/images/inventory\_totals.png :align: center -Full documentation for emissions table creation can be found `here -`__. - .. _capabilities-regridding: ========== @@ -219,24 +215,25 @@ Regridding .. _capabilities-regridding-rules: -General Regridding Rules +General regridding rules ------------------------ -GCPy supports regridding between all horizontal GEOS-Chem grid types, including -latitude/longitude grids (the grid format of GEOS-Chem Classic), +GCPy supports regridding between all horizontal GEOS-Chem grid types, +including latitude/longitude grids (the grid format of GEOS-Chem Classic), standard cubed-sphere (the standard grid format of GCHP), and stretched-grid (an optional grid format in GCHP). GCPy contains several horizontal regridding functions built off of xESMF. GCPy automatically handles most regridding needs when plotting GEOS-Chem data. -:file:`gcpy.file_regrid` allows you to regrid GEOS-Chem Classic files between -different grid resolutions and can be called from the command line or as a -function. +:ref:`gcpy.file_regrid() ` allows you to regrid +GEOS-Chem Classic and GCHP files between different grid resolutions +and can be called from the command line or as a function. -:file:`gcpy.regrid_restart_file` allows you to regrid GCHP files between -between different grid resolutions and grid types (standard and stretched -cubed-sphere grids), and can be called from the command line. +:ref:`gcpy.regrid_restart_file ` allows you to regrid +GCHP files between between different grid resolutions and grid +types (standard and stretched cubed-sphere grids), and can be +called from the command line. The 72-level and 47-level vertical grids are pre-defined in GCPy. Other vertical grids can also be defined if you provide `the A @@ -244,12 +241,13 @@ and B coefficients of the hybrid vertical grid `__. When plotting data of differing grid types or horizontal resolutions -using :file:`compare_single_level` or :file:`compare_zonal_mean`, you +using :ref:`compare_single_level() ` +or :ref:`compare_zonal_mean() `, you can specify a comparison resolution using the :literal:`cmpres` argument. This resolution will be used for the difference panels in each plot (the bottom four panels rather than the top two raw data panels). If you do not specify a comparison resolution, GCPy will automatically choose one. -For more extensive regridding information, visit the `detailed -regridding documentation `__. +For more extensive regridding information, visit the :ref:`detailed +regridding documentation `. diff --git a/docs/source/Plotting.rst b/docs/source/Plotting.rst index 45e73585..2496a0e9 100644 --- a/docs/source/Plotting.rst +++ b/docs/source/Plotting.rst @@ -4,54 +4,423 @@ Plotting ######## -This page describes in depth the plotting capabilities of GCPy, +This page describes in depth the general plotting capabilities of GCPy, including possible argument values for every plotting function. -.. _plot-csl-and-czm: +For information about GCPy functions that are specific to the +GEOS-Chem benchmark workflow, please see our :ref:`Benchmarking ` +chapter. -=========================================== -compare_single_level and compare_zonal_mean -=========================================== +.. _plot-six-panel: -:code:`gcpy.plot.compare_single_level()` and -:code:`gcpy.plot.compare_zonal_mean()` both generate six panel -plots comparing variables between two datasets. They share significant -structural overlap both in output appearance and code -implementation. This section gives an overview of the components of -the plots generated by these functions, their shared arguments, and -features unique to each function. +========================== +Six-panel comparison plots +========================== -.. _plot-csl-and-czm-shared: +The functions listed below generate six-panel plots comparing +variables between two datasets: -Shared structure ----------------- ++-----------------------------+------------------------------------+ +| Plotting function | Located in GCPy module | ++=============================+====================================+ +| ``compare_single_level()`` | ``gcpy.plot.compare_single_level`` | ++-----------------------------+------------------------------------+ +| ``compare_zonal_mean()`` | ``gcpy.plot.compare_zonal_mean`` | ++-----------------------------+------------------------------------+ Both :code:`compare_single_level()` and :code:`compare_zonal_mean()` generate a six panel plot for each variable passed. These plots can either be saved to PDFs or generated sequentially for visualization in the Matplotlib GUI using :code:`matplotlib.pyplot.show()`. Each plot uses data passed from a reference (:literal:`Ref`) dataset -and a development (:literal:`Dev`) dataset. +and a development (:literal:`Dev`) dataset. Both functions share +significant structural overlap both in output appearance and code +implementation. + +You can import these routines into your code with these statements: + +.. code-block:: python + + from gcpy.plot.compare_single_level import compare_single_level + from gcpy.plot.compare_zonal_mean import compare_zonal_mean Each panel has a title describing the type of panel, a colorbar for the values plotted in that panel, and the units of the data plotted in that panel. The upper two panels of each plot show actual values from -the Ref (left) and Dev (right) datasets for a given variable. The -middle two panels show the difference (:literal:`Dev - Ref`) between -the values in the Dev dataset and the values in the Ref dataset. The -left middle panel uses a full dynamic color map, while the right -middle panel caps the color map at the 5th and 95th percentiles. The -bottom two panels show the ratio (:literal:`Dev/Ref`) between the -values in the Dev dataset and the values in the Ref Dataset. -The left bottom panel uses a full dynamic color map, while the right -bottom panel caps the color map at 0.5 and 2.0. +the :literal:`Ref` (left) and :literal:`Dev` (right) datasets for a +given variable. The middle two panels show the difference +(:literal:`Dev - Ref`) between the values in the :literal:`Dev` +dataset and the values in the :literal:`Ref` dataset. The left middle +panel uses a full dynamic color map, while the right middle panel caps +the color map at the 5th and 95th percentiles. The bottom two panels +show the ratio (:literal:`Dev/Ref`) between the values in the Dev +dataset and the values in the Ref Dataset. The left bottom panel uses +a full dynamic color map, while the right bottom panel caps the color +map at 0.5 and 2.0. + +.. _plot-csl: + +Function :code:`compare_single_level` +------------------------------------- + +The :code:`compare_single_level` function accepts takes the following +arguments: + +.. code-block:: python + + def compare_single_level( + refdata, + refstr, + devdata, + devstr, + varlist=None, + ilev=0, + itime=0, + refmet=None, + devmet=None, + weightsdir='.', + pdfname="", + cmpres=None, + match_cbar=True, + normalize_by_area=False, + enforce_units=True, + convert_to_ugm3=False, + flip_ref=False, + flip_dev=False, + use_cmap_RdBu=False, + verbose=False, + log_color_scale=False, + extra_title_txt=None, + extent=None, + n_job=-1, + sigdiff_list=None, + second_ref=None, + second_dev=None, + spcdb_dir=os.path.dirname(__file__), + sg_ref_path='', + sg_dev_path='', + ll_plot_func='imshow', + **extra_plot_args + ): + """ + Create single-level 3x2 comparison map plots for variables common + in two xarray Datasets. Optionally save to PDF. + + Args: + refdata: xarray dataset + Dataset used as reference in comparison + refstr: str + String description for reference data to be used in plots + devdata: xarray dataset + Dataset used as development in comparison + devstr: str + String description for development data to be used in plots + + Keyword Args (optional): + varlist: list of strings + List of xarray dataset variable names to make plots for + Default value: None (will compare all common variables) + ilev: integer + Dataset level dimension index using 0-based system. + Indexing is ambiguous when plotting differing vertical grids + Default value: 0 + itime: integer + Dataset time dimension index using 0-based system + Default value: 0 + refmet: xarray dataset + Dataset containing ref meteorology + Default value: None + devmet: xarray dataset + Dataset containing dev meteorology + Default value: None + weightsdir: str + Directory path for storing regridding weights + Default value: None (will create/store weights in + current directory) + pdfname: str + File path to save plots as PDF + Default value: Empty string (will not create PDF) + cmpres: str + String description of grid resolution at which + to compare datasets + Default value: None (will compare at highest resolution + of ref and dev) + match_cbar: bool + Set this flag to True if you wish to use the same colorbar + bounds for the Ref and Dev plots. + Default value: True + normalize_by_area: bool + Set this flag to True if you wish to normalize the Ref + and Dev raw data by grid area. Input ref and dev datasets + must include AREA variable in m2 if normalizing by area. + Default value: False + enforce_units: bool + Set this flag to True to force an error if Ref and Dev + variables have different units. + Default value: True + convert_to_ugm3: bool + Whether to convert data units to ug/m3 for plotting. + Default value: False + flip_ref: bool + Set this flag to True to flip the vertical dimension of + 3D variables in the Ref dataset. + Default value: False + flip_dev: bool + Set this flag to True to flip the vertical dimension of + 3D variables in the Dev dataset. + Default value: False + use_cmap_RdBu: bool + Set this flag to True to use a blue-white-red colormap + for plotting the raw data in both the Ref and Dev datasets. + Default value: False + verbose: bool + Set this flag to True to enable informative printout. + Default value: False + log_color_scale: bool + Set this flag to True to plot data (not diffs) + on a log color scale. + Default value: False + extra_title_txt: str + Specifies extra text (e.g. a date string such as "Jan2016") + for the top-of-plot title. + Default value: None + extent: list + Defines the extent of the region to be plotted in form + [minlon, maxlon, minlat, maxlat]. + Default value plots extent of input grids. + Default value: [-1000, -1000, -1000, -1000] + n_job: int + Defines the number of simultaneous workers for parallel + plotting. Set to 1 to disable parallel plotting. + Value of -1 allows the application to decide. + Default value: -1 + sigdiff_list: list of str + Returns a list of all quantities having significant + differences (where |max(fractional difference)| > 0.1). + Default value: None + second_ref: xarray Dataset + A dataset of the same model type / grid as refdata, + to be used in diff-of-diffs plotting. + Default value: None + second_dev: xarray Dataset + A dataset of the same model type / grid as devdata, + to be used in diff-of-diffs plotting. + Default value: None + spcdb_dir: str + Directory containing species_database.yml file. + Default value: Path of GCPy code repository + sg_ref_path: str + Path to NetCDF file containing stretched-grid info + (in attributes) for the ref dataset + Default value: '' (will not be read in) + sg_dev_path: str + Path to NetCDF file containing stretched-grid info + (in attributes) for the dev dataset + Default value: '' (will not be read in) + ll_plot_func: str + Function to use for lat/lon single level plotting with + possible values 'imshow' and 'pcolormesh'. imshow is much + faster but is slightly displaced when plotting from + dateline to dateline and/or pole to pole. + Default value: 'imshow' + extra_plot_args: various + Any extra keyword arguments are passed through the + plotting functions to be used in calls to pcolormesh() (CS) + or imshow() (Lat/Lon). + """ + +and generates a comparison plot such as: + +.. image:: _static/images/six\_panel\_single\_level.png + :align: center + :width: 80% + +.. _plot-czm: + +Function :code:`compare_zonal_mean` +----------------------------------- + +.. code-block:: python + + def compare_zonal_mean( + refdata, + refstr, + devdata, + devstr, + varlist=None, + itime=0, + refmet=None, + devmet=None, + weightsdir='.', + pdfname="", + cmpres=None, + match_cbar=True, + pres_range=None, + normalize_by_area=False, + enforce_units=True, + convert_to_ugm3=False, + flip_ref=False, + flip_dev=False, + use_cmap_RdBu=False, + verbose=False, + log_color_scale=False, + log_yaxis=False, + extra_title_txt=None, + n_job=-1, + sigdiff_list=None, + second_ref=None, + second_dev=None, + spcdb_dir=os.path.dirname(__file__), + sg_ref_path='', + sg_dev_path='', + ref_vert_params=None, + dev_vert_params=None, + **extra_plot_args + ): + """ + Creates 3x2 comparison zonal-mean plots for variables + common in two xarray Datasets. Optionally save to PDF. + + Args: + refdata: xarray dataset + Dataset used as reference in comparison + refstr: str + String description for reference data to be used in plots + devdata: xarray dataset + Dataset used as development in comparison + devstr: str + String description for development data to be used in plots + + Keyword Args (optional): + varlist: list of strings + List of xarray dataset variable names to make plots for + Default value: None (will compare all common 3D variables) + itime: integer + Dataset time dimension index using 0-based system + Default value: 0 + refmet: xarray dataset + Dataset containing ref meteorology + Default value: None + devmet: xarray dataset + Dataset containing dev meteorology + Default value: None + weightsdir: str + Directory path for storing regridding weights + Default value: None (will create/store weights in + current directory) + pdfname: str + File path to save plots as PDF + Default value: Empty string (will not create PDF) + cmpres: str + String description of grid resolution at which + to compare datasets + Default value: None (will compare at highest resolution + of Ref and Dev) + match_cbar: bool + Set this flag to True to use same the colorbar bounds + for both Ref and Dev plots. + Default value: True + pres_range: list of two integers + Pressure range of levels to plot [hPa]. The vertical axis + will span the outer pressure edges of levels that contain + pres_range endpoints. + Default value: [0, 2000] + normalize_by_area: bool + Set this flag to True to to normalize raw data in both + Ref and Dev datasets by grid area. Input ref and dev + datasets must include AREA variable in m2 if normalizing + by area. + Default value: False + enforce_units: bool + Set this flag to True force an error if the variables in + the Ref and Dev datasets have different units. + Default value: True + convert_to_ugm3: str + Whether to convert data units to ug/m3 for plotting. + Default value: False + flip_ref: bool + Set this flag to True to flip the vertical dimension of + 3D variables in the Ref dataset. + Default value: False + flip_dev: bool + Set this flag to True to flip the vertical dimension of + 3D variables in the Dev dataset. + Default value: False + use_cmap_RdBu: bool + Set this flag to True to use a blue-white-red colormap for + plotting raw reference and development datasets. + Default value: False + verbose: logical + Set this flag to True to enable informative printout. + Default value: False + log_color_scale: bool + Set this flag to True to enable plotting data (not diffs) + on a log color scale. + Default value: False + log_yaxis: bool + Set this flag to True if you wish to create zonal mean + plots with a log-pressure Y-axis. + Default value: False + extra_title_txt: str + Specifies extra text (e.g. a date string such as "Jan2016") + for the top-of-plot title. + Default value: None + n_job: int + Defines the number of simultaneous workers for parallel + plotting. Set to 1 to disable parallel plotting. + Value of -1 allows the application to decide. + Default value: -1 + sigdiff_list: list of str + Returns a list of all quantities having significant + differences (where |max(fractional difference)| > 0.1). + Default value: None + second_ref: xarray Dataset + A dataset of the same model type / grid as refdata, + to be used in diff-of-diffs plotting. + Default value: None + second_dev: xarray Dataset + A dataset of the same model type / grid as devdata, + to be used in diff-of-diffs plotting. + Default value: None + spcdb_dir: str + Directory containing species_database.yml file. + Default value: Path of GCPy code repository + sg_ref_path: str + Path to NetCDF file containing stretched-grid info + (in attributes) for the ref dataset + Default value: '' (will not be read in) + sg_dev_path: str + Path to NetCDF file containing stretched-grid info + (in attributes) for the dev dataset + Default value: '' (will not be read in) + ref_vert_params: list(AP, BP) of list-like types + Hybrid grid parameter A in hPa and B (unitless). + Needed if ref grid is not 47 or 72 levels. + Default value: None + dev_vert_params: list(AP, BP) of list-like types + Hybrid grid parameter A in hPa and B (unitless). + Needed if dev grid is not 47 or 72 levels. + Default value: None + extra_plot_args: various + Any extra keyword arguments are passed through the + plotting functions to be used in calls to pcolormesh() + (CS) or imshow() (Lat/Lon). + """ + +and generates a comparison plot such as: + +.. image:: _static/images/six\_panel\_zonal\_mean.png + :align: center + :width: 80% + +.. _plot-shared: + +Shared structure +---------------- Both :code:`compare_single_level()` and :code:`compare_zonal_mean()` have four positional (required) arguments. -Arguments: -~~~~~~~~~~ - .. option:: refdata : xarray.Dataset Dataset used as reference in comparison @@ -70,33 +439,16 @@ Arguments: String description for development data to be used in plots OR list containing [dev1str, dev2str] for diff-of-diffs plots -:option:`refstr` and :option:`devstr` title the top two panels of +:option:`refstr` and :option:`devstr` title the top two panels of each six panel plot. -A basic script that calls :code:`compare_zonal_mean()` or -:code:`compare_single_level()` looks like: +Functions :code:`compare_single_level()` and +:code:`compare_zonal_mean()` share many arguments. Some of these +arguments are plotting options that change the format of the plots: -.. code-block:: python - - #!/usr/bin/env python - - import xarray as xr - import gcpy.plot as gcplot - import matplotlib.pyplot as plt - - file1 = '/path/to/ref' - file2 = '/path/to/dev' - ds1 = xr.open_dataset(file1) - ds2 = xr.open_dataset(file2) - gcplot.compare_zonal_mean(ds1, 'Ref run', ds2, 'Dev run') - #gcplot.compare_single_level(ds1, 'Ref run', ds2, 'Dev run') - plt.show() - -:code`compare_single_level()` and :code:`compare_zonal_mean()` also -share many keyword arguments. Some of these arguments are plotting -options that change the format of the plots, e.g. choosing to convert -units to ug/m\ :sup:`3`, which are automatically handled if you do not -specify a value for that argument. +For example, you may wish to convert units to ug/m\ :sup:`3` when +generating comparison plots of aerosol species. Activate this +option by setting the keyword argument :literal:`convert_to_ugm3=True`. Other arguments are necessary to achieve a correct plot depending on the format of :literal:`refdata` and :literal:`devdata` and require @@ -105,1097 +457,258 @@ specify if one of the datasets should be flipped vertically if Z coordinates in that dataset do not denote decreasing pressure as Z index increases, otherwise the vertical coordinates between your two datasets may be misaligned and result in an undesired plotting -outcome. +outcome. This may be done with by setting the boolean options +:literal:`flip_ref=True` and/or :literal:`flip_dev=True`. The :literal:`n_job` argument governs the parallel plotting settings of :code:`compare_single_level()` and :code:`compare_zonal_mean()` . -GCPy uses the joblib library to create plots in parallel. Due to +GCPy uses the JobLib library to create plots in parallel. Due to limitations with matplotlib, this parallelization creates plots (pages) in parallel rather than individual panels on a single page. Parallel plot creation is not enabled when you do not save to a -PDF. The default value of :literal:`n_job=-1` allows the function call to -automatically scale up to, at most, the number of cores available on -your system. On systems with higher (12+) core counts, the max number of cores is not typically reached because of the process handling mechanics of joblib. -However, on lower-end systems with lower core counts or less available memory, it is advantageous to use :literal:`n_job` to limit the max number of processes. - -Shared keyword arguments: -~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. option:: varlist : list of str - - List of xarray dataset variable names to make plots for - - Default value: None (will compare all common variables) - -.. option:: itime : int - - Dataset time dimension index using 0-based system. Can only plot - values from one time index in a single function call. - - Default value: 0 - -.. option:: refmet : xarray.Dataset - - Dataset containing ref meteorology. Needed for area-based - normalizations / ug/m3 unit conversions. - - Default value: None - -.. option:: devmet : xarray.Dataset - - Dataset containing dev meteorology. Needed for area-based - normalizations and/or ug/m3 unit conversions. - - Default value: None - -.. option:: weightsdir : str - - Directory path for storing regridding weight files generated by - xESMF. - - Default value: None (will create/store weights in current directory) - -.. option:: pdfname : str - - File path to save plots as PDF. - - Default value: Empty string (will not create PDF) - -.. option:: cmpres : str - - String description of grid resolution at which to compare - datasets. The possible formats are 'int' (e.g. '48' for c48) for - a cubed-sphere resolution or 'latxlon' (e.g. '4x5') for a - lat/lon resolution. - - Default value: None (will compare at highest resolution of Ref - and Dev) - -.. option:: match_cbar : bool - - Set this flag to True to use same the colorbar bounds for both - Ref and Dev plots. This only applies to the top two panels of - each plot. - - Default value: True - -.. option:: normalize_by_area : bool - - Set this flag to True to to normalize raw data in both Ref and - Dev datasets by grid area. Either input ref and dev datasets - must include AREA variable in m2 if normalizing by area, or - refmet and devmet datasets must include Met_AREAM2 variable. - - Default value: False - -.. option:: enforce_units : bool - - Set this flag to True force an error if the variables in the Ref - and Dev datasets have different units. - - Default value: True - -.. option:: convert_to_ugm3 : bool - - Whether to convert data units to ug/m3 for plotting. refmet and - devmet cannot be None if converting to ug/m3. - - Default value: False - -.. option:: flip_ref : bool - - Set this flag to True to flip the vertical dimension of 3D - variables in the Ref dataset. - - Default value: False - -.. option:: flip_dev : bool - - Set this flag to True to flip the vertical dimension of 3D - variables in the Dev dataset. - - Default value: False - -.. option:: use_cmap_RdBu : bool - - Set this flag to True to use a blue-white-red colormap for - plotting raw ref and dev data (the top two panels). - - Default value: False - -.. option:: verbose : bool - - Set this flag to True to enable informative printout. - - Default value: False - -.. option:: log_color_scale : bool - - Set this flag to True to enable plotting data (only the top two - panels, not diffs) on a log color scale. - - Default value: False - -.. option:: extra_title_txt : str - - Specifies extra text (e.g. a date string such as "Jan2016") - for the top-of-plot title. - - Default value: None - -.. option:: n_job : int - - Defines the number of simultaneous workers for parallel - plotting. Only applicable when saving to PDF. - Set to 1 to disable parallel plotting. Value of -1 allows the - application to decide. - - Default value: -1 - -.. option:: sigdiff_list : list of str - - Returns a list of all quantities having significant - differences (where \|max(fractional difference)\| > 0.1). - - Default value: [] - -.. option:: second_ref : xarray.Dataset - - A dataset of the same model type / grid as refdata, to be used - in diff-of-diffs plotting. - - Default value: None - -.. option:: second_dev : xarray.Dataset - - A dataset of the same model type / grid as devdata, to be used - in diff-of-diffs plotting. - - Default value: None - -.. option:: spcdb_dir : str - - Directory containing species_database.yml file. This file is - used for unit conversions to ug/m3. GEOS-Chem run directories - include a copy of this file which may be more up-to-date than - the version included with GCPy. - - Default value: Path of GCPy code repository - -.. option:: sg_ref_path : str - - Path to NetCDF file containing stretched-grid info (in - attributes) for the ref dataset. - - Default value: '' (will not be read in) - -.. option:: sg_dev_path : str +PDF. The default value of :literal:`n_job=-1` allows the function call +to automatically scale up to, at most, the number of cores available +on your system. - Path to NetCDF file containing stretched-grid info (in - attributes) for the dev dataset. +.. note:: - Default value: '' (will not be read in) + On systems with higher (12+) core counts, the maximum number of + cores is not typically reached because of the process handling + mechanics of JobLib. However, on lower-end systems with lower core + counts or less available memory, it is advantageous to use + :literal:`n_job` to limit the max number of processes. -compare_single_level --------------------- + Due to how Python handles memory management on Linux systems, using + more cores may result in memory not returned to the system after + the plots are created. Requesting fewer cores with + :literal:`n_job` may help to avoid this situation. -.. code-block:: python - - def compare_single_level(refdata, refstr, devdata, devstr, - varlist=None, ilev=0, itime=0, - refmet=None, devmet=None, weightsdir='.', - pdfname="", cmpres=None, match_cbar=True, - normalize_by_area=False, enforce_units=True, - convert_to_ugm3=False, flip_ref=False, flip_dev=False, - use_cmap_RdBu=False, verbose=False, log_color_scale=False, - extra_title_txt=None, extent = [-1000, -1000, -1000, -1000], - n_job=-1, sigdiff_list=[], second_ref=None, second_dev=None, - spcdb_dir=os.path.dirname(__file__), sg_ref_path='', sg_dev_path='', - ll_plot_func='imshow', **extra_plot_args - ): - - -:code:`compare_single_level()` features several keyword arguments that -are not relevant to :code:`compare_zonal_mean()`, including specifying -which level to plot, the lat/lon extent of the plots, and which -underlying :code:`matplotlib.plot` function to use for plotting. - -Function-specific keyword arguments: -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. option:: ilev : int - - Dataset level dimension index using 0-based system - - Default value: 0 - -.. option:: extent : list of float - - Defines the extent of the region to be plotted in form - [minlon, maxlon, minlat, maxlat]. Default value plots extent of input grids. - - Default value: [-1000, -1000, -1000, -1000] - -.. option:: ll_plot_func : str - - Function to use for lat/lon single level plotting with possible - values 'imshow' and 'pcolormesh'. imshow is much faster but is - slightly displaced when plotting from dateline to dateline - and/or pole to pole. +.. _plot-six-panel-example: - Default value: 'imshow' +Example script +-------------- -.. option:: **extra_plot_args - - Any extra keyword arguments are passed through the plotting - functions to be used in calls to :code:`pcolormesh()` (CS) or - :code:`imshow()` (Lat/Lon). - -compare_zonal_mean ------------------- - -.. code-block:: python - - def compare_zonal_mean(refdata, refstr, devdata, devstr, - varlist=None, itime=0, refmet=None, devmet=None, - weightsdir='.', pdfname="", cmpres=None, - match_cbar=True, pres_range=[0, 2000], - normalize_by_area=False, enforce_units=True, - convert_to_ugm3=False, flip_ref=False, flip_dev=False, - use_cmap_RdBu=False, verbose=False, log_color_scale=False, - log_yaxis=False, extra_title_txt=None, n_job=-1, sigdiff_list=[], - second_ref=None, second_dev=None, spcdb_dir=os.path.dirname(__file__), - sg_ref_path='', sg_dev_path='', ref_vert_params=[[],[]], - dev_vert_params=[[],[]], **extra_plot_args - ): - -:code:`compare_zonal_mean()` features several keyword arguments that -are not relevant to :code:`compare_single_level()`, including -specifying the pressure range to plot (defaulting to the complete -atmosphere), whether the y-axis of the plots (pressure) should be in -log format, and hybrid vertical grid parameters to pass if one or more -of Ref and Dev do not use the typical 72-level or 47-level grids. - -Function-specific keyword arguments: -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. option:: pres_range : list of ints - - Pressure range of levels to plot [hPa]. The vertical axis will - span the outer pressure edges of levels that contain pres_range - endpoints. - - Default value: [0,2000] - -.. option:: log_yaxis : bool - - Set this flag to True if you wish to create zonal mean - plots with a log-pressure Y-axis. - - Default value: False - -.. option:: ref_vert_params : list of list-like types - - Hybrid grid parameter A in hPa and B (unitless). Needed if ref - grid is not 47 or 72 levels. - - Default value: [[], []] - -.. option:: dev_vert_params : list of list-like types - - Hybrid grid parameter A in hPa and B (unitless). Needed if dev - grid is not 47 or 72 levels. - - Default value: [[], []] - -.. option:: **extra_plot_args - - Any extra keyword arguments are passed through the plotting - functions to be used in calls to :code:`pcolormesh()`. - -============ -Single_panel -============ - -.. code-block:: python - - def single_panel(plot_vals, ax=None, plot_type="single_level", - grid={}, gridtype="", title="fill",comap=WhGrYlRd, - norm=[],unit="",extent=(None, None, None, None), - masked_data=None,use_cmap_RdBu=False, - log_color_scale=False, add_cb=True, - pres_range=[0, 2000], pedge=np.full((1, 1), -1), - pedge_ind=np.full((1,1), -1), log_yaxis=False, - xtick_positions=[], xticklabels=[], proj=ccrs.PlateCarree(), - sg_path='', ll_plot_func="imshow", vert_params=[[],[]], - pdfname="", return_list_of_plots=False **extra_plot_args - ): - - -:code:`gcpy.plot.single_panel()` is used to create plots containing -only one panel of GEOS-Chem data. This function is used within -:code:`compare_single_level()` and :code:`compare_zonal_mean()` to -generate each panel plot. It can also be called directly on its own to -quickly plot GEOS-Chem data in zonal mean or single level format. +Here is a basic script that calls both :code:`compare_zonal_mean()` and +:code:`compare_single_level()`: .. code-block:: python #!/usr/bin/env python import xarray as xr - import gcpy.plot as gcplot import matplotlib.pyplot as plt + from gcpy.plot.compare_single_level import compare_single_level + from gcpy.plot.compare_zonal_mean import compare_zonal_mean - ds = xr.open_dataset('GEOSChem.SpeciesConc.20160701_0000z.nc4') - #get surface ozone - plot_data = ds['SpeciesConc_O3'].isel(lev=0) - - gcplot.single_panel(plot_data) + file1 = '/path/to/ref' + file2 = '/path/to/dev' + ds1 = xr.open_dataset(file1) + ds2 = xr.open_dataset(file2) + compare_zonal_mean(ds1, 'Ref run', ds2, 'Dev run') + plt.show() + compare_single_level(ds1, 'Ref run', ds2, 'Dev run') plt.show() -Currently :code:`single_panel()` expects data with a 1-length ( or -non-existent) time dimension, as well as a 1-length or non-existent Z -dimension for single level plotting, so you'll need to do some -pre-processing of your input data as shown in the above code snippet. - -:code:`single_panel()` contains a few amenities to help with plotting -GEOS-Chem data, including automatic grid detection for lat/lon or -standard cubed-sphere xarray :code:`DataArray`-s. You can also pass NumPy -arrays to plot, though you'll need to manually pass grid info in this -case. - -Arguments: ----------- - -In addition to the specific arguments listed below, any other keyword -arguments will be forwarded to :code:`matplotlib.pyplot.imshow()` / -:code:`matplotlib.pyplot.pcolormesh()`. - -.. option:: plot_vals : xarray.DataArray or numpy array - - Single data variable GEOS-Chem output to plot - -.. option:: ax : matplotlib axes - - Axes object to plot information - - Default value: None (Will create a new axes) - -.. option:: plot_type : str - - Either "single_level" or "zonal_mean" - - Default value: "single_level" - -.. option:: grid : dict - - Dictionary mapping plot_vals to plottable coordinates - - Default value: {} (will attempt to read grid from plot_vals) - -.. option:: gridtype : str - - "ll" for lat/lon or "cs" for cubed-sphere - - Default value: "" (will automatically determine from grid) - -.. option:: title : str - - Title to put at top of plot - - Default value: "fill" (will use name attribute of plot_vals - if available) - -.. option:: comap : matplotlib Colormap - - Colormap for plotting data values - - Default value: WhGrYlRd - -.. option:: norm : list - - List with range [0..1] normalizing color range for matplotlib methods - - Default value: [] (will determine from plot_vals) - -.. option:: unit : str - - Units of plotted data - - Default value: "" (will use units attribute of plot_vals if available) - -.. option:: extent : tuple (minlon, maxlon, minlat, maxlat) - - Describes minimum and maximum latitude and longitude of input data - - Default value: (None, None, None, None) (Will use full extent - of plot_vals if plot is single level. - -.. option:: masked_data : numpy array - - Masked area for avoiding near-dateline cubed-sphere plotting issues - - Default value: None (will attempt to determine from plot_vals) - -.. option:: use_cmap_RdBu : bool - - Set this flag to True to use a blue-white-red colormap - - Default value: False - -.. option:: log_color_scale : bool - - Set this flag to True to use a log-scale colormap - - Default value: False - -.. option:: add_cb : bool - - Set this flag to True to add a colorbar to the plot - - Default value: True - -.. option:: pres_range : list of int - - Range from minimum to maximum pressure for zonal mean plotting - - Default value: [0, 2000] (will plot entire atmosphere) - -.. option:: pedge : numpy array - - Edge pressures of vertical grid cells in plot_vals for zonal mean plotting - - Default value: np.full((1, 1), -1) (will determine automatically) - -.. option:: pedge_ind : numpy array - - Index of edge pressure values within pressure range in - plot_vals for zonal mean plotting - - Default value: np.full((1, 1), -1) (will determine automatically) - -.. option:: log_yaxis : bool - - Set this flag to True to enable log scaling of pressure in - zonal mean plots - - Default value: False - -.. option:: xtick_positions : list of float - - Locations of lat/lon or lon ticks on plot - - Default value: [] (will place automatically for zonal mean plots) - -.. option:: xticklabels : list of str - - Labels for lat/lon ticks - - Default value: [] (will determine automatically from xtick_positions) - -.. option:: sg_path : str - - Path to NetCDF file containing stretched-grid info (in - attributes) for plot_vals - - Default value: '' (will not be read in) - -.. option:: ll_plot_func : str - - Function to use for lat/lon single level plotting with - possible values 'imshow' and 'pcolormesh'. - imshow is much faster but is slightly displaced when plotting - from dateline to dateline and/or pole to pole. - - Default value: 'imshow' - -.. option:: vert_params : list(AP, BP) of list-like types - - Hybrid grid parameter A in hPa and B (unitless). Needed if - grid is not 47 or 72 levels. - - Default value: [[], []] - -.. option:: pdfname : str - - File path to save plots as PDF - - Default value: "" (will not create PDF) - -.. option:: extra_plot_args : various - - Any extra keyword arguments are passed to calls to - pcolormesh() (CS) or imshow() (Lat/Lon). - - -Function-specific return value: -------------------------------- - -:code:`single_panel()` returns the following object: - -.. option:: plot : matplotlib plot - - Plot object created from input - -.. _plot-bmk: - -============================ -Benchmark Plotting Functions -============================ - -:code:`gcpy.benchmark` contains several functions for plotting -GEOS-Chem output in formats requested by the GEOS-Chem Steering -Committee. The primary use of these functions is to create plots of -most GEOS-Chem output variables divided into specific categories, -e.g. species categories such as Aerosols or Bromine for the -SpeciesConc diagnostic. In each category, these functions create -single level PDFs for the surface and 500hPa and zonal mean PDFs for -the entire a tmosphere and only the stratosphere (defined a 1-100hPa). -For :code:`make_benchmark_emis_plots()`, only single level plots at -the surface are produced. -All of these plotting functions include bookmarks within the generated PDFs that point to the pages containing each plotted quantity. -Thus these functions serve as tools for quickly creating comprehensive plots comparing two GEOS-Chem runs. These functions are used to create -the publicly available plots for 1-month and 1-year benchmarks of new versions of GEOS-Chem. - -Many of these functions use pre-defined (via YAML files included in GCPy) lists of variables. If one dataset includes a variable but the other dataset does not, -the data for that variable in the latter dataset will be considered to be NaN and will be plotted as such. - -Shared structure of benchmark functions ---------------------------------------- - -Each of the :code:`gcpy.benchmark.make_benchmark_*_plots()` functions -requires 4 arguments to specify the ref and dev datasets. - -Shared arguments: -~~~~~~~~~~~~~~~~~ - -.. option:: ref: str - - Path name for the "Ref" (aka "Reference") data set. - -.. option:: refstr : str - - A string to describe ref (e.g. version number) - -.. option:: dev : str - - Path name for the "Dev" (aka "Development") data set. - This data set will be compared against the "Reference" data set. - -.. option:: devstr : str - - A string to describe dev (e.g. version number) - -Note that the :literal:`ref` and :literal:`dev` arguments in -:code:`make_benchmark_*_plots()` are the paths to NetCDF files, rather -than xarray Datasets as in :code:`compare_single_level()` and -:code:`compare_zonal_mean()`. The :code:`make_benchmark_*_plots()` -functions internally open these files as xarray Datasets and pass -those datasets to :code:`compare_single_level()` and -:code:`compare_zonal_mean()`. - -The benchmark plotting functions share several keyword -arguments. Keyword arguments that do not share the same purpose across -benchmark plotting functions have :literal:`NOTE:` in the description. - -Shared keyword arguments: -~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. option:: dst : str - - A string denoting the destination folder where a - PDF file containing plots will be written. - - Default value: ./benchmark. - -.. option:: subdst : str - - A string denoting the sub-directory of dst where PDF - files containing plots will be written. In practice, - subdst is only needed for the 1-year benchmark output, - and denotes a date string (such as "Jan2016") that - corresponds to the month that is being plotted. - NOTE: Not available in wetdep_plots - - Default value: None - -.. option:: overwrite : bool - - Set this flag to True to overwrite previously created files in the - destination folder (specified by the dst argument). - - Default value: False. - -.. option:: verbose : bool - - Set this flag to True to print extra informational output. - - Default value: False. - -.. option:: log_color_scale: bool - - Set this flag to True to enable plotting data (the top two panels - of each plot, not diffs) on a log color scale. - - Default value: False - -.. option:: sigdiff_files : list of str - - Filenames that will contain the list of quantities having - significant differences between datasets. Three files are used: - one for surface, one for 500hPa, and one for zonal mean. - These lists are needed in order to fill out the benchmark - approval forms. - - .. note:: Not available in wetdep_plots - - Default value: None - -.. option:: spcdb_dir : str - - Directory containing species_database.yml file. This file is - used for unit conversions to ug/m3. GEOS-Chem run directories - include a copy of this file which may be more up-to-date than - the version included with GCPy. - - Default value: Path of GCPy code repository - -.. option:: weightsdir : str - - Directory in which to place (and possibly reuse) xESMF regridder - netCDF files. - - Default value: '.' - -.. option:: n_job : int - - Defines the number of simultaneous workers for parallel plotting. - Set to 1 to disable parallel plotting. Value of -1 allows the - application to decide. - - .. note:: - - In :code:`make_benchmark_conc_plots()`, parallelization occurs - at the species category level. In all other functions, - parallelization occurs within calls to :code:`compare_single_level()` - and :code:`compare_zonal_mean()`. - - Default value: -1 in :code:`make_benchmark_conc_plots`, 1 in all others - -make_benchmark_aod_plots ------------------------- - -.. code-block:: python - - def make_benchmark_aod_plots(ref, refstr, dev, devstr, varlist=None, - dst="./benchmark", subdst=None, overwrite=False, verbose=False, - log_color_scale=False, sigdiff_files=None, weightsdir='.', n_job=-1, - spcdb_dir=os.path.dirname(__file__) - ): - - """ - Creates PDF files containing plots of column aerosol optical - depths (AODs) for model benchmarking purposes. - """ - -Function-specific keyword args: - -.. option:: varlist : list of str - - List of AOD variables to plot. If not passed, then all - AOD variables common to both Dev and Ref will be plotted. - Use the varlist argument to restrict the number of - variables plotted to the pdf file when debugging. - - Default value: None - - -This function creates column optical depth plots using the Aerosols diagnostic output. - -make_benchmark_conc_plots -------------------------- - -.. code-block:: python - - def make_benchmark_conc_plots(ref, refstr, dev, devstr, dst="./benchmark", - subdst=None, overwrite=False, verbose=False, collection="SpeciesConc", - benchmark_type="FullChemBenchmark", plot_by_spc_cat=True, restrict_cats=[], - plots=["sfc", "500hpa", "zonalmean"], use_cmap_RdBu=False, log_color_scale=False, - sigdiff_files=None, normalize_by_area=False, cats_in_ugm3=["Aerosols", "Secondary_Organic_Aerosols"], - areas=None, refmet=None, devmet=None, weightsdir='.', n_job=-1, second_ref=None - second_dev=None, spcdb_dir=os.path.dirname(__file__) - ): - - """ - Creates PDF files containing plots of species concentration - for model benchmarking purposes. - """ - -Function-specific keyword arguments: -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. option:: collection : str - - Name of collection to use for plotting. - - Default value: "SpeciesConc" - -.. option:: benchmark_type: str - - A string denoting the type of benchmark output to plot, - either FullChemBenchmark or TransportTracersBenchmark. - - Default value: "FullChemBenchmark" - -.. option:: plot_by_spc_cat: logical - - Set this flag to False to send plots to one file rather - than separate file per category. - - Default value: True - -.. option:: restrict_cats : list of str - - List of benchmark categories in benchmark_categories.yml to make - plots for. If empty, plots are made for all categories. - - Default value: empty - -.. option:: plots : list of str - - List of plot types to create. - - Default value: ['sfc', '500hpa', 'zonalmean'] - -.. option:: normalize_by_area: bool - - Set this flag to true to enable normalization of data - by surfacea area (i.e. kg s-1 --> kg s-1 m-2). - - Default value: False - -.. option:: cats_in_ugm3: list of str - - List of benchmark categories to to convert to ug/m3 - - Default value: ["Aerosols", "Secondary_Organic_Aerosols"] - -.. option:: areas : dict of xarray DataArray: - - Grid box surface areas in m2 on Ref and Dev grids. - - Default value: None - -.. option:: refmet : str - - Path name for ref meteorology - - Default value: None - -.. option:: devmet : str - - Path name for dev meteorology - - Default value: None - -.. option:: second_ref: str - - Path name for a second "Ref" (aka "Reference") data set for - diff-of-diffs plotting. This dataset should have the same model - type and grid as ref. - - Default value: None - -.. option:: second_dev: str - - Path name for a second "Ref" (aka "Reference") data set for - diff-of-diffs plotting. This dataset should have the same model - type and grid as ref. - - Default value: None - - -This function creates species concentration plots using the -:literal:`SpeciesConc` diagnostic output by default. This function is the -only benchmark plotting function that supports diff-of-diffs plotting, -in which 4 datasets are passed and the differences between two groups -of Ref datasets vs. two groups of Dev datasets is plotted (typically -used for comparing changes in GCHP vs. changes in GEOS-Chem Classic -across model versions). This is also the only benchmark plotting -function that sends plots to separate folders based on category -(as denoted by the :literal:`plot_by_spc_cat` flag). The full list of -species categories is denoted in :file:`benchmark_categories.yml` -(included in GCPy) as follows: - -.. code-block:: python +.. _plot-single-panel: - """ - FullChemBenchmark: - Aerosols: - Dust: DST1, DST2, DST3, DST4 - Inorganic: NH4, NIT, SO4 - OC_BC: BCPI, BCPO, OCPI, OCPO - SOA: Complex_SOA, Simple_SOA - Sea_Salt: AERI, BrSALA, BrSALC, ISALA, ISALC, NITs, - SALA, SALAAL, SALACL, SALC, SALCAL, SALCCL, SO4s - Bromine: Bry, BrOx, Br, Br2, BrCl, BrNO2, BrNO3, BrO, - CH3Br, CH2Br2, CHBr3, HOBr, HBr - Chlorine: Cly, ClOx, Cl, ClO, Cl2, Cl2O2, ClOO, ClNO2, ClNO3, - CCl4, CFCs, CH3Cl, CH2Cl2, CH3CCl3, CHCl3, HOCl, HCl, Halons, HCFCs, OClO - Iodine: Iy, IxOy, I, I2, IBr, ICl, IO, ION, IONO2, CH3I, CH2I2, - CH2ICl, CH2IBr, HI, HOI, OIO - Nitrogen: NOy, NOx, HNO2, HNO3, HNO4, MPAN, NIT, 'NO', NO2, NO3, - N2O5, MPN, PAN, PPN, N2O, NHx, NH3, NH4, MENO3, ETNO3, IPRNO3, NPRNO3 - Oxidants: O3, CO, OH, NOx - Primary_Organics: - Alcohols: EOH, MOH - Biogenics: ISOP, MTPA, MTPO, LIMO - HCs: ALK4, BENZ, CH4, C2H6, C3H8, PRPE, TOLU, XYLE - ROy: H2O2, H, H2, H2O, HO2, O1D, OH, RO2 - Secondary_Organic_Aerosols: - Complex_SOA: TSOA0, TSOA1, TSOA2, TSOA3, ASOA1, ASOA2, ASOA3, - ASOAN, TSOG0, TSOG1, TSOG2, TSOG3, ASOG1, ASOG2, ASOG3 - Isoprene_SOA: INDIOL, LVOCOA, SOAIE, SOAGX - Simple_SOA: SOAP, SOAS - Secondary_Organics: - Acids: ACTA - Aldehydes: ALD2, CH2O, HPALDs, MACR - Epoxides: IEPOX - Ketones: ACET, MEK, MVK - Nitrates: ISOPN - Other: GLYX, HCOOH, MAP, RCHO - Peroxides: MP - Sulfur: SOx, DMS, OCS, SO2, SO4 - TransportTracersBenchmark: - RnPbBeTracers: Rn222, Pb210, Pb210Strat, Be7, Be7Strat, Be10, Be10Strat - PassiveTracers: PassiveTracer, SF6Tracer, CH3ITracer, COAnthroEmis25dayTracer, - COAnthroEmis50dayTracer, COUniformEmis25dayTracer, GlobEmis90dayTracer, - NHEmis90dayTracer, SHEmis90dayTracer +================== +Single panel plots +================== - """ +Function :code:`single_panel()` (contained in GCPy module +:code:`gcpy.plot.single_panel`) is used to create plots containing +only one panel of GEOS-Chem data. This function is used within +:code:`compare_single_level()` and :code:`compare_zonal_mean()` to +generate each panel plot. It can also be called directly on its +own to quickly plot GEOS-Chem data in zonal mean or single level +format. +.. _plot-single-panel-func: -make_benchmark_emis_plots +Function: :code:`single_panel` +------------------------------ +Function :code:`single_panel()` accepts the following arguments: .. code-block:: python - def make_benchmark_emis_plots(ref, refstr, dev, devstr, dst="./benchmark", - subdst=None, plot_by_spc_cat=False, plot_by_hco_cat=False, overwrite=False, - verbose=False, flip_ref=False, flip_dev=False, log_color_scale=False, - sigdiff_files=None, weightsdir='.', n_job=-1, spcdb_dir=os.path.dirname(__file__) + def single_panel( + plot_vals, + ax=None, + plot_type="single_level", + grid=None, + gridtype="", + title="fill", + comap=WhGrYlRd, + norm=None, + unit="", + extent=None, + masked_data=None, + use_cmap_RdBu=False, + log_color_scale=False, + add_cb=True, + pres_range=None, + pedge=np.full((1, 1), -1), + pedge_ind=np.full((1, 1), -1), + log_yaxis=False, + xtick_positions=None, + xticklabels=None, + proj=ccrs.PlateCarree(), + sg_path='', + ll_plot_func="imshow", + vert_params=None, + pdfname="", + weightsdir='.', + vmin=None, + vmax=None, + return_list_of_plots=False, + **extra_plot_args ): - """ - Creates PDF files containing plots of emissions for model - benchmarking purposes. This function is compatible with benchmark - simulation output only. It is not compatible with transport tracers - emissions diagnostics. - - Remarks: - -------- - (1) If both plot_by_spc_cat and plot_by_hco_cat are - False, then all emission plots will be placed into the - same PDF file. - - (2) Emissions that are 3-dimensional will be plotted as - column sums. - """ - -Function-specific keyword args: -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. option:: plot_by_spc_cat : bool - - Set this flag to True to separate plots into PDF files - according to the benchmark species categories (e.g. Oxidants, - Aerosols, Nitrogen, etc.) These categories are specified - in the YAML file benchmark_species.yml. - - Default value: False - -.. option:: plot_by_hco_cat : bool - - Set this flag to True to separate plots into PDF files - according to HEMCO emissions categories (e.g. Anthro, - Aircraft, Bioburn, etc.) - - Default value: False - -.. option:: flip_ref : bool - - Set this flag to True to reverse the vertical level - ordering in the "Ref" dataset (in case "Ref" starts - from the top of atmosphere instead of the surface). + """ + Core plotting routine -- creates a single plot panel. + + Args: + plot_vals: xarray.DataArray, numpy.ndarray, or dask.array.Array + Single data variable GEOS-Chem output to plot + + Keyword Args (Optional): + ax: matplotlib axes + Axes object to plot information + Default value: None (Will create a new axes) + plot_type: str + Either "single_level" or "zonal_mean" + Default value: "single_level" + grid: dict + Dictionary mapping plot_vals to plottable coordinates + Default value: {} (will attempt to read grid from plot_vals) + gridtype: str + "ll" for lat/lon or "cs" for cubed-sphere + Default value: "" (will automatically determine from grid) + title: str + Title to put at top of plot + Default value: "fill" (will use name attribute of plot_vals + if available) + comap: matplotlib Colormap + Colormap for plotting data values + Default value: WhGrYlRd + norm: list + List with range [0..1] normalizing color range for matplotlib + methods. Default value: None (will determine from plot_vals) + unit: str + Units of plotted data + Default value: "" (will use units attribute of plot_vals + if available) + extent: tuple (minlon, maxlon, minlat, maxlat) + Describes minimum and maximum latitude and longitude of input + data. Default value: None (Will use full extent of plot_vals + if plot is single level). + masked_data: numpy array + Masked area for avoiding near-dateline cubed-sphere plotting + issues Default value: None (will attempt to determine from + plot_vals) + use_cmap_RdBu: bool + Set this flag to True to use a blue-white-red colormap + Default value: False + log_color_scale: bool + Set this flag to True to use a log-scale colormap + Default value: False + add_cb: bool + Set this flag to True to add a colorbar to the plot + Default value: True + pres_range: list(int) + Range from minimum to maximum pressure for zonal mean + plotting. Default value: [0, 2000] (will plot entire + atmosphere) + pedge: numpy array + Edge pressures of vertical grid cells in plot_vals + for zonal mean plotting. Default value: np.full((1, 1), -1) + (will determine automatically) + pedge_ind: numpy array + Index of edge pressure values within pressure range in + plot_vals for zonal mean plotting. + Default value: np.full((1, 1), -1) (will determine + automatically) + log_yaxis: bool + Set this flag to True to enable log scaling of pressure in + zonal mean plots. Default value: False + xtick_positions: list(float) + Locations of lat/lon or lon ticks on plot + Default value: None (will place automatically for + zonal mean plots) + xticklabels: list(str) + Labels for lat/lon ticks + Default value: None (will determine automatically from + xtick_positions) + proj: cartopy projection + Projection for plotting data + Default value: ccrs.PlateCarree() + sg_path: str + Path to NetCDF file containing stretched-grid info + (in attributes) for plot_vals. + Default value: '' (will not be read in) + ll_plot_func: str + Function to use for lat/lon single level plotting with + possible values 'imshow' and 'pcolormesh'. imshow is much + faster but is slightly displaced when plotting from dateline + to dateline and/or pole to pole. Default value: 'imshow' + vert_params: list(AP, BP) of list-like types + Hybrid grid parameter A in hPa and B (unitless). Needed if + grid is not 47 or 72 levels. Default value: None + pdfname: str + File path to save plots as PDF + Default value: "" (will not create PDF) + weightsdir: str + Directory path for storing regridding weights + Default value: "." (will store regridding files in + current directory) + vmin: float + minimum for colorbars + Default value: None (will use plot value minimum) + vmax: float + maximum for colorbars + Default value: None (will use plot value maximum) + return_list_of_plots: bool + Return plots as a list. This is helpful if you are using + a cubedsphere grid and would like access to all 6 plots + Default value: False + extra_plot_args: various + Any extra keyword arguments are passed to calls to + pcolormesh() (CS) or imshow() (Lat/Lon). + + Returns: + plot: matplotlib plot + Plot object created from input + """ + +Function :code:`single_panel()` expects data with a 1-length (or +non-existent) :literal:`T` (time) dimension, as well as a +1-length or non-existent :literal:`Z` (vertical level) dimension. - Default value: False - -.. option:: flip_dev : bool - - Set this flag to True to reverse the vertical level - ordering in the "Dev" dataset (in case "Dev" starts - from the top of atmosphere instead of the surface). - - Default value: False - - -This function generates plots of total emissions using output from ``HEMCO_diagnostics`` (for GEOS-Chem Classic) and/or ``GCHP.Emissions`` output files. - - -make_benchmark_jvalue_plots ---------------------------- - -.. code-block:: python - - def make_benchmark_jvalue_plots(ref, refstr, dev, devstr, varlist=None, - dst="./benchmark", subdst=None, local_noon_jvalues=False, - plots=["sfc", "500hpa", "zonalmean"],overwrite=False, verbose=False, - flip_ref=False, flip_dev=False, log_color_scale=False, sigdiff_files=None, - weightsdir='.', n_job=-1, spcdb_dir=os.path.dirname(__file__) - ): - """ - Creates PDF files containing plots of J-values for model - benchmarking purposes. - - Remarks: - -------- - Will create 4 files containing J-value plots: - (1 ) Surface values - (2 ) 500 hPa values - (3a) Full-column zonal mean values. - (3b) Stratospheric zonal mean values - These can be toggled on/off with the plots keyword argument. - - At present, we do not yet have the capability to split the - plots up into separate files per category (e.g. Oxidants, - Aerosols, etc.). This is primarily due to the fact that - we archive J-values from GEOS-Chem for individual species - but not family species. We could attempt to add this - functionality later if there is sufficient demand. - """ - - -Function-specific keyword args: -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. option:: varlist : list of str - - List of J-value variables to plot. If not passed, - then all J-value variables common to both dev - and ref will be plotted. The varlist argument can be - a useful way of restricting the number of variables - plotted to the pdf file when debugging. - - Default value: None - -.. option:: local_noon_jvalues : bool - - Set this flag to plot local noon J-values. This will - divide all J-value variables by the JNoonFrac counter, - which is the fraction of the time that it was local noon - at each location. - - Default value: False - -.. option:: plots : list of strings - - List of plot types to create. - - Default value: ['sfc', '500hpa', 'zonalmean'] - -.. option:: flip_ref : bool - - Set this flag to True to reverse the vertical level - ordering in the "Ref" dataset (in case "Ref" starts - from the top of atmosphere instead of the surface). - - Default value: False - -.. option:: flip_dev : bool - - Set this flag to True to reverse the vertical level - ordering in the "Dev" dataset (in case "Dev" starts - from the top of atmosphere instead of the surface). - - Default value: False - - -This function generates plots of J-values using the -:literal:`JValues` GEOS-Chem output files. +:code:`single_panel()` contains a few amenities to help with plotting +GEOS-Chem data, including automatic grid detection for lat/lon or +standard cubed-sphere xarray :code:`DataArray`-s. You can also pass NumPy +arrays to plot, though you'll need to manually pass grid info in this +case (with the :literal:`gridtype`, :literal:`pedge`, and +:literal:`pedge_ind` keyword arguments). -make_benchmark_wetdep_plots ---------------------------- +The sample script shown below shows how you can data at a single level and +timestep from an :literal:`xarray.DataArray` object. .. code-block:: python - def make_benchmark_wetdep_plots(ref, refstr, dev, devstr, collection, - dst="./benchmark", datestr=None, overwrite=False, verbose=False, - benchmark_type="TransportTracersBenchmark", plots=["sfc", "500hpa", "zonalmean"], - log_color_scale=False, normalize_by_area=False, areas=None, refmet=None, - devmet=None, weightsdir='.', n_job=-1, spcdb_dir=os.path.dirname(__file__) - ): - """ - Creates PDF files containing plots of species concentration - for model benchmarking purposes. - """ - -Function-specific keyword args: -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. option:: datestr : str - - A string with date information to be included in both the - plot pdf filename and as a destination folder subdirectory - for writing plots - - Default value: None - -.. option:: benchmark_type: str - - A string denoting the type of benchmark output to plot, - either FullChemBenchmark or TransportTracersBenchmark. - - Default value: "FullChemBenchmark" - -.. option:: plots : list of strings - - List of plot types to create. - - Default value: ['sfc', '500hpa', 'zonalmean'] - -.. option:: normalize_by_area: bool - - Set this flag to true to enable normalization of data - by surfacea area (i.e. kg s-1 --> kg s-1 m-2). - - Default value: False - -.. option:: areas : dict of xarray DataArray: - - Grid box surface areas in m2 on Ref and Dev grids. - - Default value: None - -.. option:: refmet : str - - Path name for ref meteorology - - Default value: None - -.. option:: devmet : str - - Path name for dev meteorology + #!/usr/bin/env python - Default value: None + import xarray as xr + import matplotlib.pyplot as plt + from gcpy.plot.single_panel import single_panel -This function generates plots of wet deposition using -:literal:`WetLossConv` and :literal:`WetLossLS` GEOS-Chem output files. -It is currently primarily used for 1-Year Transport Tracer benchmarks, -plotting values for the following species as defined in -:file:`benchmark_categories.yml`: + # Read data from a file into an xr.Dataset object + dset = xr.open_dataset('GEOSChem.SpeciesConc.20160701_0000z.nc4') -.. code-block:: python + # Extract ozone (v/v) from the xr.Dataset object, + # for time=0 (aka first timestep) and lev=0 (aka surface) + sfc_o3 = dset['SpeciesConcVV_O3'].isel(time=0).isel(lev=0) - """ - WetLossConv: Pb210, Pb210Strat, Be7, Be7Strat, Be10, Be10Strat - WetLossLS: Pb210, Pb210Strat, Be7, Be7Strat, Be10, Be10Strat - """ + # Plot the data! + single_panel(sfc_o3) + plt.show() diff --git a/docs/source/Regridding.rst b/docs/source/Regridding.rst index 1e82238d..4a4eac4e 100644 --- a/docs/source/Regridding.rst +++ b/docs/source/Regridding.rst @@ -8,42 +8,77 @@ Regridding ########## -This page describes the regridding capabilities of GCPy. GCPy -currently supports regridding of data from GEOS-Chem restarts and -output NetCDF files. Regridding is supported across any horizontal -resolution and any grid type available in GEOS-Chem, including lat/lon -(global or non-global), global standard cubed-sphere, and global -stretched-grid. GCPy also supports arbitrary vertical regridding -across different vertical resolutions. - -Regridding with GCPy is currently undergoing an overhaul. As of the current -release, regridding is split into two different categories - regridding -GEOS-Chem Classic format files (lat/lon), and regridding GCHP format files -(standard cubed-sphere, stretched cubed-sphere). +:program:`GCPy` currently supports regridding of data from: + +#. GEOS-Chem Classic restart files +#. GEOS-Chem Classic diagnostic files +#. GCHP restart files +#. GCHP diagnostic files +#. HEMCO restart files +#. HEMCO diagnostic files +#. As well as any netCDF file adhering to `COARDS + `_ + or `CF `_ conventions. + +Regridding is supported across any horizontal resolution and any grid +type available in GEOS-Chem, including lat/lon (global or non-global), +global standard cubed-sphere, and global stretched-grid. GCPy also +supports arbitrary vertical regridding across different vertical +resolutions. + +Regridding with GCPy is currently undergoing an overhaul. As of the +current release, regridding is split into two different +categories: + +#. Regridding between lat-lon grids using regridding weights computed + on the fly by GCPy, and +#. Regridding either lat-lon or cubed-sphere using regridding weights + computed as a preprocessing step. + +The latter method may be used for creating GCHP standard grid +and stretched grid restart files from either GCHP or GEOS-Chem Classic +restart files. .. _regrid-classic: -==================================== -Regridding Files - GEOS-Chem Classic -==================================== +=============================== +Using Online Regridding Weights +=============================== + +You can regrid existing GEOS-Chem restart or diagnostic files using +GCPy function :code:`gcpy.file_regrid`. This function can called +directly from the command line (:ref:`see the examples below +`) or from a Python script or +interpreter (:code:`gcpy.file_regrid.file_regrid()`) + +.. note:: + + For regridding to or from GCHP stretched-grid restart files, we + recommend using the :ref:`offline regridding weights method + `. -You can regrid existing GEOS-Chem Classic restart or output diagnostic files -between lat/lon resolutions using :code:`gcpy.file_regrid`. -:code:`gcpy.file_regrid` can either be called directly from the command line -using :code:`python -m gcpy.file_regrid` or as a function -(:code:`gcpy.file_regrid.file_regrid()`) from a Python script or interpreter. The syntax of :code:`file_regrid` is as follows: .. code-block:: python - def file_regrid(fin, fout, dim_format_in, dim_format_out, ll_res_out='0x0'): - """ - Regrids an input file to a new horizontal grid specification and saves it - as a new file. - """ - -Required Arguments: -------------------- + def file_regrid( + fin, + fout, + dim_format_in, + dim_format_out, + cs_res_out=0, + ll_res_out='0x0', + sg_params_in=None, + sg_params_out=None, + vert_params_out=None, + ): + """ + Regrids an input file to a new horizontal grid specification + and saves it as a new file. + """ + +gcpy.file_regrid required arguments: +------------------------------------ .. option:: fin : str @@ -55,169 +90,322 @@ Required Arguments: .. option:: dim_format_in : str - Format of the input file's dimensions (set this to 'classic' - denoting - a GEOS-Chem Classic file with a lat/lon grid) + Format of the input file's dimensions. Accepted values are: + + - :literal:`classic`: For GEOS-Chem Classic restart & diagnostic files + - :literal:`checkpoint` : For GCHP checkpoint & restart files + - :literal:`diagnostic`: For GCHP diagnostic files .. option:: dim_format_out : str - Format of the output file's dimensions (set this to 'classic' - denoting - a GEOS-Chem Classic file with a lat/lon grid) + Format of the output file's dimensions. Accepted values are: + + - :literal:`classic`: For GEOS-Chem Classic restart & diagnostic files + - :literal:`checkpoint` : For GCHP checkpoint & restart files + - :literal:`diagnostic`: For GCHP diagnostic files + +gcpy.file_regrid optional arguments: +------------------------------------ + +.. option:: sg_params_in : list of float + + Stretching parameters (:literal:`stretch-factor`, + :literal:`target-longitude`, :literal:`target-latitude`) for the + input grid. Only needed when the data contained in file + :option:`fin` is on a GCHP stretched grid. + + Default value: :literal:`[1.0, 170.0, -90.0]` -Optional arguments: -------------------- +.. option:: sg_params_out : list of float + + Stretching parameters (:literal:`stretch-factor`, + :literal:`target-longitude`, :literal:`target-latitude`) for the + output grid. Only needed when the data to be contained in file + :option:`fout` is to be placed on a GCHP stretched grid. + + Default value: :literal:`[1.0, 170.0, -90.0]` + +.. option:: cs_res_out : int + + Cubed-sphere resolution of the output dataset. Only needed when + the data in file :option:`fin` is on a GCHP cubed-sphere grid. + + Default value: :code:`0` .. option:: ll_res_out : str - The lat/lon resolution of the output dataset. + The lat/lon resolution of the output dataset. Only needed when + the data to be contained in file :option:`fout` is to be placed + on a GEOS-Chem Classic lat-lon grid. + + Default value: :code:`"0x0"`. + +.. option:: vert_params_out : list of float + + Hybrid grid parameter :math:`A` (in :literal:`hPa` and :math:`B` + (:literal:`unitless`), returned in list format: :code:`[A, B]` + + Default value: :code:`None` + +.. _regrid-classic-example: + +Examples +-------- - Default value: '0x0' +As stated previously, you can call +:code:`gcpy.file_regrid.file_regrid()` from a Python script, or from +the command line. Here we shall focus on command-line examples. -There is now only one grid format supported for regridding files using the -:code:`gcpy.file_regrid` method: :literal:`classic`. You must specify -:literal:`classic` as the value of both :code:`dim_format_in` and -:code:`dim_format_out`, as well as specifying a resolution as the value of -:code:`ll_res_out`. +#. Regrid a 4x5 GEOS-Chem Classic restart or diagnostic file to a + GEOS-Chem Classic 2x2.5 file: -As stated previously, you can either call -:code:`file_regrid.file_regrid()` directly or call it from the command -line using :code:`python -m gcpy.file_regrid ARGS`. An example command -line call (separated by line for readability) for regridding a 2x2.5 lat/lon -restart file to a 4x5 lat/lon grid looks like: + .. code-block:: -.. code-block:: + $ python -m gcpy.file_regrid \ + --filein /path/to/file_4x5.nc4 \ + --dim_format_in classic \ + --fileout /path/to/file_2x25.nc4 \ + --ll_res_out 2x2.5 \ + --dim_format_out classic - python -m gcpy.file_regrid \ - --filein initial_GEOSChem_rst.2x2.5.nc \ - --dim_format_in classic \ - --fileout GEOSChem_rst.4x5.nc \ - --ll_res_out 4x5 \ - --dim_format_out classic + |br| + +#. Regrid a 4x5 GEOS-Chem Classic restart or diagnostic file to a + GCHP C24 restart file: + + .. code-block:: + + $ python -m gcpy.file_regrid \ + --filein /path/to/file_4x5.nc4 \ + --dim_format_in classic \ + --fileout /path/to/file_c24.nc4 \ + --cs_res_out 24 \ + --dim_format_out checkpoint + + |br| + +#. Regrid a GCHP C48 restart file to a GCHP stretched grid C48 restart + file. The stretch parameters are: + + - stretch-factor: 5 + - target-longitude: -72 + - target-latitude: 41 + + .. code-block:: + + $ python -m gcpy.file_regrid \ + --filein /path/to/file_c48.nc4 \ + --dim_format_in checkpoint \ + --fileout /path/to/file_c48_sg.nc4 \ + --cs_res_out 48 \ + --dim_format_out checkpoint \ + --sg_params_out 5 -72 41 + + |br| + +#. Regrid the GCHP stretched grid C48 restart file from Example 3 + above to a GCHP C24 diagnostic file. + + .. code-block:: + + $ python -m gcpy.file_regrid \ + --filein /path/to/file_c48_sg.nc4 \ + --sg_params_in 5 -72 41 \ + --dim_format_in checkpoint \ + --fileout /path/to/file_c24.nc4 \ + --cs_res_out 24 \ + --dim_format_out diagnostic .. _regrid-gchp: -======================= -Regridding Files - GCHP -======================= +================================ +Using Offline Regridding Weights +================================ + +This approach requires generating regridding weights using python +packages `gridspec `_ and +`sparselt `_. Regridding with +:literal:`GCPy`, :literal:`gridspec` and :literal:`sparselt` is a +three stage process: + +#. Create grid specifications for the source and target grids using + :literal:`gridspec`. +#. Create regridding weights for the transformation using + :literal:`ESMF_RegridWeightGen`. +#. Run the regridding operation using the :code:`regrid_restart_file` + submodule of GCPy. + +.. note:: -GCHP regridding is where the first steps of the overhaul in GCPy regridding have -happened. We are moving towards an integrated approach for all GEOS-Chem grid -types using `gridspec `_ and -`sparselt `_. For now, this is only -supported for GCHP grid formats, but in a later GCPy this will be the single -method for regridding all GEOS-Chem grid formats. + As of GCPy 1.4.0, the :ref:`default GCPy environment + ` (aka :literal:`gcpy_env`) now contains + :literal:`gridspec` and :literal:`sparselt` packages. You no + longer need to use the separate :literal:`gchp_regridding` + environment as in prior versions. -Currently, this method is only available from the command line. The syntax of -:code:`regrid_restart_file` is as follows: +.. _regrid-gchp-args: -Required Arguments: -------------------- +gcpy.regrid_restart_file required arguments: +-------------------------------------------- + +There are three arguments required by the GCPy function +:literal:`regrid_restart_file`: .. option:: file_to_regrid : str - The GCHP restart file to be regridded + The GEOS-Chem Classic or GCHP data file to be regridded. .. option:: regridding_weights_file : str - Regridding weights to be used in the regridding transformation, generated - by :literal:`ESMF_RegridWeightGen` + Regridding weights to be used in the regridding transformation, + generated by :literal:`ESMF_RegridWeightGen` .. option:: template_file : str - The GCHP restart file to use as a template for the regridded restart - file - attributes, dimensions, and variables for the output file will be - taken from this template. Typically this will be the same file as the file - you are regridding! + The GC-Classic or GCHP restart file to use as a template for the + regridded restart file. Attributes, dimensions, and variables + for the output file will be taken from this template. + +gcpy.regrid_restart_file optional arguments: +-------------------------------------------- -Optional arguments: -------------------- +There are four optional arguments, all of which are for regridded to a +stretched cubed-sphere grid. .. option:: --stretched-grid : switch - A switch to indicate that the target grid is a stretched cubed-sphere grid + A switch to indicate that the target grid is a stretched + cubed-sphere grid. .. option:: --stretch-factor : float - The grid stretching factor for the target stretched grid. Only takes - effect when :code:`--stretched-grid` is set. See the - `GCHP documentation `_ - for more information + The grid stretching factor for the target stretched grid. Only + takes effect when :code:`--stretched-grid` is set. See the + `GCHP documentation + `_ + for more information. Make sure this value exactly matches the + value you plan to use in GCHP configuration file + :file:`setCommonRunSettings.sh`. .. option:: --target-latitude : float - The latitude of the centre point for stretching the target grid. Only - takes effect when :code:`--stretched-grid` is set. See the - `GCHP documentation `_ - for more information + The latitude of the centre point for stretching the target + grid. Only takes effect when :code:`--stretched-grid` is + set. See the `GCHP documentation + `_ + for more information. Make sure this value exactly matches the + value you plan to use in GCHP configuration file + :file:`setCommonRunSettings.sh`. .. option:: --target-longitude : float - The longitude of the centre point for stretching the target grid. Only - takes effect when :code:`--stretched-grid` is set. See the - `GCHP documentation `_ - for more information + The longitude of the centre point for stretching the target + grid. Only takes effect when :code:`--stretched-grid` is + set. See the `GCHP documentation `_ + for more information. Make sure this value exactly matches the + value you plan to use in GCHP configuration file + :file:`setCommonRunSettings.sh`. -.. _regrid-gchp-firsttime: +.. _regrid-gchp-example-1: -First Time Setup ------------------ +Example 1: Standard Lat-Lon to Cubed-Sphere Regridding +------------------------------------------------------ -Until GCPy contains a complete regridding implementation that works for all -GEOS-Chem grid formats, we recommend that you create a small -`conda `_ environment in which to carry out -your GCHP regridding. +This example will show regridding a GC-Classic 4x5 restart file to a +GCHP c24 restart file. -The following conda `environment file `_ -will get you set up with an environment for regridding with -:literal:`gridspec` and :literal:`sparselt`: +#. Activate your GCPy environment. -.. code-block:: yaml + .. code-block:: console - name: gchp_regridding - channels: - - conda-forge - dependencies: - - python=3.9 - - esmf - - gridspec - - numpy - - requests - - sparselt - - xarray - - xesmf + $ mamba activate gcpy_env # Or whatever your environment's name is -.. tip:: + |br| - For your convenience, we have placed a copy of the above - environment file at the path - :file:`docs/environment/gchp_regridding.yml`. +#. Create a lat-lon source grid specification using + :code:`gridspec-create`. -After installing and switching to this new conda environment, you should have -the :literal:`gridspec` commands available to you at the command line. + .. code-block:: console -.. _regrid-gchp-procedure: + $ gridspec-create latlon --pole-centered --half-polar 46 72 -Regridding ----------- + This will produce 1 file: :file:`regular_lat_lon_46x72.nc`. |br| + |br| -Regridding with :literal:`gridspec` and :literal:`sparselt` is a three stage -process: +#. Create a target grid specification using :code:`gridspec-create`. -#. Create grid specifications for the source and target grids using - :literal:`gridspec` + .. code-block:: console -#. Create regridding weights for the transformation using - :literal:`ESMF_RegridWeightGen` + $ gridspec-create gcs 24 + + This will produce 7 files: :file:`c24_gridspec.nc` and + :file:`c24.tile[1-6].nc` |br| + |br| + +#. Create the regridding weights for the regridding transformation + (46x72 to C24) using :code:`ESMF_RegridWeightGen`. -#. Run the regridding operation using the new :code:`regrid_restart_file` - submodule of GCPy + .. code-block:: console + + $ ESMF_RegridWeightGen \ + --source regular_lat_lon_46x72.nc \ + --destination c24_gridspec.nc \ + --method conserve \ + --weight 46x72_to_c24_weights.nc + + This will produce a log file, :file:`PET0.RegridWeightGen.Log`, and our + regridding weights, :file:`46x72_to_c24_weights.nc` |br| + |br| + +#. Use the grid weights produced in previous steps to complete the + regridding. + + .. code-block:: console + $ python -m gcpy.regrid_restart_file \ + GEOSChem.Restart.20190701_0000z.nc4 \ + 46x72_to_c24_weights.nc \ + GEOSChem.Restart.20190701_0000z.c24_old.nc4 -Standard Cubed-Sphere Regridding --------------------------------- + The arguments to :code:`gcpy.regrid_restart_file` + :ref:`are described above `. In this example + (lat-lon to cubed-sphere) we need to use a GEOS-Chem Classic + restart file as the file to be regridded and a GCHP restart file as + the template file. + + .. note:: + + The resolution of the template file does not matter as long as it + contains all of the variables and attributes that you wish to + include in the regridded restart file. + + After running :code:`gcpy.regrid_restart_file`, a single restart file + named :file:`new_restart_file.nc` will be created. You can rename + this file and use it to initialize your GCHP C24 simulation. |br| + |br| + +#. Deactivate your GCPy environment when finished. + + .. code-block:: console + + $ mamba deactivate + +.. _regrid-gchp-example-2: + +Example 2: Standard Cubed-Sphere to Cubed-Sphere Regridding +----------------------------------------------------------- We will use the example of regridding the out-of-the-box -:literal:`GEOSChem.Restart.20190701_0000z.c48.nc4` restart file from C48 to -C60 to demonstrate the standard cubed-sphere regridding process: +:file:`GEOSChem.Restart.20190701_0000z.c48.nc4` restart file from +C48 to C60 to demonstrate the standard cubed-sphere regridding process: + +#. Activate your GCPy environment. + + .. code-block:: console + + $ mamba activate gcpy_env # Or whatever your environment's name is + + |br| #. Create a source grid specification using :code:`gridspec-create`. @@ -225,8 +413,9 @@ C60 to demonstrate the standard cubed-sphere regridding process: $ gridspec-create gcs 48 - This will produce 7 files - :literal:`c48_gridspec.nc` and - :literal:`c48.tile[1-6].nc` + This will produce 7 files: :literal:`c48_gridspec.nc` and + :literal:`c48.tile[1-6].nc` |br| + |br| #. Create a target grid specification using :code:`gridspec-create`. @@ -234,42 +423,68 @@ C60 to demonstrate the standard cubed-sphere regridding process: $ gridspec-create gcs 60 - Again, this will produce 7 files - :literal:`c60_gridspec` and - :literal:`c60.tile[1-6].nc` + Again, this will produce 7 files: :literal:`c60_gridspec.nc` and + :literal:`c60.tile[1-6].nc` |br| + |br| -#. Create the regridding weights for the regridding transformation using - :code:`ESMF_RegridWeightGen`. +#. Create the regridding weights for the regridding transformation + (C48 to C60) using :code:`ESMF_RegridWeightGen`. .. code-block:: console - $ ESMF_RegridWeightGen \ - --source c48_gridspec.nc \ - --destination c60_gridspec.nc \ - --method conserve \ - --weight c48_to_c60_weights.nc + $ ESMF_RegridWeightGen \ + --source c48_gridspec.nc \ + --destination c60_gridspec.nc \ + --method conserve \ + --weight c48_to_c60_weights.nc - This will produce a log file, :literal:`PET0.RegridWeightGen.Log`, and our - regridding weights, :literal:`c48_to_c60_weights.nc` + This will produce a log file, :file:`PET0.RegridWeightGen.Log`, + and our regridding weights, :file:`c48_to_c60_weights.nc` |br| + |br| -#. Finally, use the grid weights produced in step 3 to complete the regridding. You will need to activate your GCPy python environment for this step. +#. Use the grid weights produced in earlier steps to complete the regridding. .. code-block:: console - $ python -m gcpy.regrid_restart_file \ - GEOSChem.Restart.20190701_0000z.c48.nc4 \ - c48_to_c60_weights.nc \ - GEOSChem.Restart.20190701_0000z.c48.nc4 + $ python -m gcpy.regrid_restart_file \ + GEOSChem.Restart.20190701_0000z.c48.nc4 \ + c48_to_c60_weights.nc \ + GEOSChem.Restart.20190701_0000z.c48.nc4 - This will produce a single file, :literal:`new_restart_file.nc`, regridded - from C48 to C60, that you can rename and use as you please. + The arguments to :code:`gcpy.regrid_restart_file` + :ref:`are described above `. Because we are + regridding from one cubed-sphere grid to another cubed-sphere grid, + we can use the file to be regridded as the template file. -Stretched Cubed-Sphere Regridding ---------------------------------- + After running :code:`gcpy.regrid_restart_file`, a single restart + file named :file:`new_restart_file.nc` will be created. You can + rename this file as you wish and use it for your GCHP C60 + simulation. |br| + |br| -We will use the example of regridding the out-of-the-box -:literal:`GEOSChem.Restart.20190701_0000z.c48.nc4` restart file from C48 to -a C120 base resolution stretched grid with a stretch factor of 4.0 over Bermuda -to demonstrate the stretched cubed-sphere regridding process: +#. Deactivate your GCPy environment when you have finished. + + .. code-block:: console + + $ mamba deactivate + +Example 3: Standard to Stretched Cubed-Sphere Regridding +-------------------------------------------------------- + +This example regrids the out-of-the-box c48 restart file +(:file:`GEOSChem.Restart.20190701_0000z.c48.nc4`) from a standard +cubed-sphere grid to a stretched grid. The base resolution will remain +the same at c48. The regridded file will have a stretch factor of 4.0 +over Bermuda which means a regional grid resolution of c196 (4 +times 48) in that area. + +#. Activate your GCPy environment: + + .. code-block:: console + + $ mamba activate gcpy_env # Or whatever your environment's name is + + |br| #. Create a source grid specification using :code:`gridspec-create`. @@ -277,61 +492,90 @@ to demonstrate the stretched cubed-sphere regridding process: $ gridspec-create gcs 48 - This will produce 7 files - :literal:`c48_gridspec.nc` and - :literal:`c48.tile[1-6].nc` + This will produce 7 files: :file:`c48_gridspec.nc` and + :file:`c48.tile[1-6].nc` |br| + |br| #. Create a target grid specification using :code:`gridspec-create`. + This will be for the stretched grid. .. code-block:: console - $ gridspec-create sgcs 120 -s 4.0 -t 32.0 -64.0 + $ gridspec-create sgcs 48 -s 4.0 -t 32.0 -64.0 - Here, the :code:`-s` option denotes the stretch factor and the :code:`-t` - option denotes the latitude / longitude of the centre point of the grid - stretch. + Here, the :code:`-s` option denotes the stretch factor and the + :code:`-t` option denotes the latitude / longitude of the centre + point of the grid stretch. - Again, this will produce 7 files - :literal:`c120_..._gridspec.nc` and - :literal:`c120_..._tile[1-6].nc`, where :literal:`...` denotes randomly - generated characters. + Again, this will produce 7 files: :file:`c48_..._gridspec.nc` and + :file:`c48_..._tile[1-6].nc`, where :file:`...` denotes randomly + generated characters. Be sure to look for these since you will need + them in the next step. |br| + |br| -#. Create the regridding weights for the regridding transformation using - :code:`ESMF_RegridWeightGen`, replacing :literal:`c120_..._gridspec.nc` - with the actual name of the file created in the previous step. +#. Create the regridding weights for the regridding transformation + (C48 to C48-stretched) using :code:`ESMF_RegridWeightGen`, + replacing :file:`c48_..._gridspec.nc` with the actual name of the + file created in the previous step. An example is shown below. .. code-block:: console - $ ESMF_RegridWeightGen \ - --source c48_gridspec.nc \ - --destination c120_..._gridspec.nc \ - --method conserve \ - --weight c48_to_c120_stretched_weights.nc + $ ESMF_RegridWeightGen \ + --source c48_gridspec.nc \ + --destination c48_s4d00_tdtdqp9ktebm5_gridspec.nc \ + --method conserve \ + --weight c48_to_c48_stretched_weights.nc - This will produce a log file, :literal:`PET0.RegridWeightGen.Log`, and our - regridding weights, :literal:`c48_to_c120_stretched_weights.nc` + This will produce a log file, :file:`PET0.RegridWeightGen.Log`, and our + regridding weights, :file:`c48_to_c48_stretched_weights.nc` |br| + |br| -#. Finally, use the grid weights produced in step 3 to complete the regridding. - You will need to switch to your GCPy python environment for this step. +#. Use the grid weights produced in earlier steps to complete the + regridding. .. code-block:: console - $ python -m gcpy.regrid_restart_file \ - --stretched-grid \ - --stretch-factor 4.0 \ - --target-latitude 32.0 \ - --target-longitude -64.0 \ - GEOSChem.Restart.20190701_0000z.c48.nc4 \ - c48_to_c120_stretched_weights.nc \ - GEOSChem.Restart.20190701_0000z.c48.nc4 - - This will produce a single file, :literal:`new_restart_file.nc`, regridded - from C48 to C120, with a stretch factor of 4.0 over 32.0N, -64.0E, that you - can rename and use as you please. It is generally a good idea to rename the - file to include the grid resolution, stretch factor, and target lat/lon for - easy reference. + $ python -m gcpy.regrid_restart_file \ + --stretched-grid \ + --stretch-factor 4.0 \ + --target-latitude 32.0 \ + --target-longitude -64.0 \ + GEOSChem.Restart.20190701_0000z.c48.nc4 \ + c48_to_c48_stretched_weights.nc \ + GEOSChem.Restart.20190701_0000z.c48.nc4 + + The arguments to :code:`gcpy.regrid_restart_file` + :ref:`are described above `. Because we are + regridding from one cubed-sphere grid to another cubed-sphere grid, + we can use the file to be regridded as the template file. + + This will produce a single file, :literal:`new_restart_file.nc`, + regridded from C48 standard to C48 stretched with a stretch factor + of 4.0 over 32.0N, -64.0E, that you can rename and use as you + please. + + .. tip:: + + It is generally a good idea to rename the file to include + the grid resolution, stretch factor, and target lat/lon for easy + reference. You can copy it somewhere to keep long-term and link to + it from the GCHP Restarts subdirectory in the run directory. + + .. code-block:: console + + $ mv new_restart_file.nc GEOSChem.Restart.20190701_0000z.c120.s4_32N_64E.nc + + You can also easily reference the file's stretch parameters by + looking at the global attributes in the file. When using the + file as a restart file in GCHP make sure that you use the exact + same parameters in both the file's global attributes and GCHP + configuration file :file:`setCommonRunSettings.sh`. + +#. Deactivate your GCPy environment when you have finished. .. code-block:: console - $ mv new_restart_file.nc GEOSChem.Restart.20190701_0000z.c120.s4_32N_64E.nc + $ mamba deactivate .. _regrid-plot: @@ -339,31 +583,32 @@ to demonstrate the stretched cubed-sphere regridding process: Regridding for Plotting in GCPy =============================== -When plotting in GCPy (e.g. through :code:`compare_single_level()` or -:code:`compare_zonal_mean()`), the vast majority of regridding is -handled internally. You can optionally request a specific -horizontal comparison resolution in :code:`compare_single_level()`` -and :code:`compare_zonal_mean()`. Note that all regridding in these -plotting functions only applies to the comparison panels (not the top -two panels which show data directly from each dataset). There are only -two scenarios where you will need to pass extra information to GCPy to -help it determine grids and to regrid when plotting. +When plotting in GCPy (e.g. through +:code:`gcpy.compare_single_level()` or +:code:`gcpy.compare_zonal_mean()`), the vast majority of regridding is +handled internally. You can optionally request a specific horizontal +comparison resolution in :code:`compare_single_level()` and +:code:`compare_zonal_mean()`. Note that all regridding in these +plotting functions only applies to the comparison panels (not +the top two panels which show data directly from each dataset). There +are only two scenarios where you will need to pass extra information +to GCPy to help it determine grids and to regrid when plotting. Pass stretched-grid file paths ------------------------------ Stretched-grid parameters cannot currently be automatically determined from grid coordinates. If you are plotting stretched-grid data in -:code:`compare_single_level()` or :code:`compare_zonal_mean()` (even -if regridding to another format), you need to use the -:code:`sg_ref_path` or :code:`sg_dev_path` arguments to pass the path -of your original stretched-grid restart file to GCPy. -If using :code:`single_panel()`, pass the file path using -:code:`sg_path`. Stretched-grid restart files created using GCPy -contain the specified stretch factor, target longitude, and -target latitude in their metadata. Currently, output files from -stretched-grid runs of GCHP do not contain any metadata that specifies -the stretched-grid used. +:code:`gcpy.compare_single_level()` or +:code:`gcpy.compare_zonal_mean()` (even if regridding to another +format), you need to use the :code:`sg_ref_path` or +:code:`sg_dev_path` arguments to pass the path of your original +stretched-grid restart file to GCPy. If using :code:`single_panel()`, +pass the file path using :code:`sg_path`. Stretched-grid restart files +created using GCPy contain the specified stretch factor, target +longitude, and target latitude in their metadata. Currently, output +files from stretched-grid runs of GCHP do not contain any metadata +that specifies the stretched-grid used. Pass vertical grid parameters for non-72/47-level grids ------------------------------------------------------- @@ -381,7 +626,7 @@ Automatic regridding decision process ------------------------------------- When you do not specify a horizontal comparison resolution using the -:code:`cmpres` argument in :code:`compare_single_level()` and +:code:`cmpres` argument in :code:`gcpy.compare_single_level()` and :code:`compare_zonal_mean()`, GCPy follows several steps to determine what comparison resolution it should use: diff --git a/docs/source/Report_Request.rst b/docs/source/Report_Request.rst deleted file mode 100644 index 56811f8f..00000000 --- a/docs/source/Report_Request.rst +++ /dev/null @@ -1,10 +0,0 @@ -Report a Problem or Request a Feature -===================================== - -If you encounter an error when using GCPy or if any documentation is unclear, you should -`open a new issue on the GCPy Github page `__. -Pre-defined templates exist for asking a question or reporting a bug / issue. - -We are open to adding new functionality to GCPy as requested by its userbase. Some requested functionality -may be better suited to example scripts rather than direct code additions to GCPy. In that case, we can add -examples to the `Example Scripts` section of this ReadTheDocs site. diff --git a/docs/source/Single_panel b/docs/source/Single_panel deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/source/Single_panel.rst b/docs/source/Single_panel.rst index 3a7d45d1..37e1d213 100644 --- a/docs/source/Single_panel.rst +++ b/docs/source/Single_panel.rst @@ -1,77 +1,123 @@ +.. _single-panel: + +This example script may also be found at `gcpy/examples/plotting/plot_single_panel.py `_. + +##################### Single Panel Plotting -===================== +##################### .. code-block:: python - #!/usr/bin/env python """ Global and Regional Single Panel Plots -------------------------------------- - This example script demonstrates the core single panel plotting capabilities of GCPy, - including global and regional single level plots as well as global zonal mean plots. - The example data described here is in lat/lon format, but the same code works equally - well for cubed-sphere (GCHP) data. + This example script demonstrates the core single panel plotting + capabilities of GCPy, including global and regional single level plots + as well as global zonal mean plots. + + The example data described here is in lat/lon format, but the same code + works equally well for cubed-sphere (GCHP) data. + + For full documentation on the plotting capabilities of GCPy + (including full argument lists), please see the GCPy documentation + at https://gcpy.readthedocs.io. """ - - #xarray allows us to read in any NetCDF file, the format of most GEOS-Chem diagnostics, - #as an xarray Dataset import xarray as xr - ds = xr.open_dataset('GEOSChem.Restart.20160701_0000z.nc4') - - #You can easily view the variables available for plotting using xarray. - #Each of these variables has its own xarray DataArray within the larger Dataset container. - print(ds.data_vars) - - #Most variables have some sort of prefix; in this example all variables are - #prefixed with 'SpeciesRst_'. We'll select the DataArray for ozone. - da = ds.SpeciesRst_O3 - - #Printing a DataArray gives a summary of the dimensions and attributes of the data. - print(da) - #This Restart file has a time dimension of size 1, with 72 vertical levels, - #46 latitude indicies, and 72 longitude indices. - import gcpy.plot as gcplot - - """ - Single level plots - ------------------ - """ - #gcpy.single_panel is the core plotting function of GCPy, able to create a one panel zonal mean or - #single level plot. Here we will create a single level plot of ozone at ~500 hPa. - #We must manually index into the level that we want to plot (index 22 in the standard 72-layer - #and 47-layer GMAO vertical grids). - slice_500 = da.isel(lev=22) - - #single_panel has many arguments which can be optionally specified. The only argument you must always - #pass to a call to single_panel is the DataArray that you want to plot. - #By default, the created plot includes a colorbar with units read from the DataArray, an automatic title - #(the data variable name in the DataArray), and an extent equivalent to the full lat/lon extent of the DataArray import matplotlib.pyplot as plt - gcplot.single_panel(slice_500) - plt.show() - - #You can specify a specific area of the globe you would like plotted using the 'extent' argument, - #which uses the format [min_longitude, max_longitude, min_latitude, max_latitude] with bounds [-180, 180, -90, 90] - gcplot.single_panel(slice_500, extent=[50, -90, -10, 60]) - plt.show() - - #Other commonly used arguments include specifying a title and a colormap (defaulting to a White-Green-Yellow-Red colormap) - #You can find more colormaps at https://matplotlib.org/tutorials/colors/colormaps.html - gcplot.single_panel(slice_500, title='500mb Ozone over the North Pacific', comap = plt.cm.viridis, - log_color_scale=True, extent=[80, -90, -10, 60]) - plt.show() - - """ - Zonal Mean Plotting - ------------------- - """ - - #Use the plot_type argument to specify zonal_mean plotting - gcplot.single_panel(da, plot_type="zonal_mean") - plt.show() - - #You can specify pressure ranges in hPa for zonal mean plot (by default every vertical level is plotted) - gcplot.single_panel(da, pres_range=[0, 100], log_yaxis=True, log_color_scale=True) - plt.show() - + from gcpy.plot.single_panel import single_panel + + + def main(): + """ + Example routine to create single panel plots. + """ + + # xarray allows us to read in any NetCDF file, the format of + # GEOS-Chem diagnostics as an xarray Dataset + dset = xr.open_dataset('GEOSChem.Restart.20160701_0000z.nc4') + + # You can easily view the variables available for plotting + # using xarray. Each of these variables has its own xarray + # DataArray within the larger Dataset container. + print(dset.data_vars) + + # Most variables have some sort of prefix; in this example all + # variables are prefixed with 'SpeciesRst_'. We'll select the + # DataArray for ozone. + darr = dset.SpeciesRst_O3 + + # Printing a DataArray gives a summary of the dimensions and attributes + # of the data. + print(darr) + + # This Restart file has a time dimension of size 1, with 72 vertical levels, + #46 latitude indicies, and 72 longitude indices. + + + # ================== + # Single-level Plots + # ================== + + # gcpy.single_panel is the core plotting function of GCPy, able to + # create a one panel zonal mean or single level plot. Here we will + # create a single level plot of ozone at ~500 hPa. We must manually + # index into the level that we want to plot (index 22 in the standard + # 72-layer and 47-layer GMAO vertical grids). + slice_500 = darr.isel(lev=22) + + # single_panel has many arguments which can be optionally specified. + # The only argument you must always pass to a call to single_panel is + # the DataArray that you want to plot. By default, the created plot + # includes a colorbar with units read from the DataArray, an + # automatic title (the data variable name in the DataArray), and + # an extent equivalent to the full lat/lon extent of the DataArray + single_panel(slice_500) + plt.show() + + #You can specify a specific area of the globe you would like plotted + # using the 'extent' argument, which uses the format [min_longitude, + # max_longitude, min_latitude, max_latitude] with bounds + # [-180, 180, -90, 90] + single_panel(slice_500, extent=[50, -90, -10, 60]) + plt.show() + + # Other commonly used arguments include specifying a title and a + # colormap (defaulting to a White-Green-Yellow-Red colormap) + #You can find more colormaps at + # https://matplotlib.org/tutorials/colors/colormaps.html + single_panel( + slice_500, + title='500mb Ozone over the North Pacific', + comap=plt.get_cmap("viridis"), + log_color_scale=True, + extent=[80, -90, -10, 60] + ) + plt.show() + + # =================== + # Zonal Mean Plotting + # =================== + + # Use the plot_type argument to specify zonal_mean plotting + single_panel( + darr, + plot_type="zonal_mean" + ) + plt.show() + + #You can specify pressure ranges in hPa for zonal mean plot + # (by default every vertical level is plotted) + single_panel( + darr, + pres_range=[0, 100], + log_yaxis=True, + log_color_scale=True + ) + plt.show() + + + + # Only execute when we run as a standalone script + if __name__ == '__main__': + main() diff --git a/docs/source/Six_panel.rst b/docs/source/Six_panel.rst index 240a787c..553cc0f9 100644 --- a/docs/source/Six_panel.rst +++ b/docs/source/Six_panel.rst @@ -1,61 +1,119 @@ +.. _six-panel: + +################## Six Panel Plotting -================== +################## -.. code-block:: python +This example script may also be found at `gcpy/exampls/plotting/plot_comparisons.py `_. +.. code-block:: python #!/usr/bin/env python """ Six Panel Comparison Plots -------------------------------------- - This example script demonstrates the comparitive plotting capabilities of GCPy, - including single level plots as well as global zonal mean plots. - These comparison plots are frequently used to evaluate results from different runs / versions - of GEOS-Chem, but can also be used to compare results from different points in one run that - are stored in separate xarray datasets. - The example data described here is in lat/lon format, but the same code works equally - well for cubed-sphere (GCHP) data. + This example script demonstrates the comparitive plotting + capabilities of GCPy, including single level plots as well as + global zonal mean plots. These comparison plots are frequently + used to evaluate results from different runs / versions of + GEOS-Chem, but can also be used to compare results from different + points in one run that are stored in separate xarray datasets. + + The example data described here is in lat/lon format, but the same + code works equally well for cubed-sphere (GCHP) data. """ - - #xarray allows us to read in any NetCDF file, the format of most GEOS-Chem diagnostics, - #as an xarray Dataset import xarray as xr - ref_ds = xr.open_dataset('first_run/GEOSChem.Restart.20160801_0000z.nc4') - dev_ds = xr.open_dataset('second_run/GEOSChem.Restart.20160801_0000z.nc4') - - import gcpy.plot as gcplot - - """ - Single level plots - ------------------ - """ - - #compare_single_level generates sets of six panel plots for data at a specified level in your datasets. - #By default, the level at index 0 (likely the surface) is plotted. Here we will plot data at ~500 hPa, - #which is located at index 21 in the standard 72-level and 47-level GMAO vertical grids. - ilev=21 - - #You likely want to look at the same variables across both of your datasets. If a variable is in - #one dataset but not the other, the plots will show NaN values for the latter. - #You can pass variable names in a list to these comparison plotting functions (otherwise all variables will plot). - varlist = ['SpeciesRst_O3', 'SpeciesRst_CO2'] - - #compare_single_level has many arguments which can be optionally specified. The first four arguments are required. - #They specify your first xarray Dataset, the name of your first dataset, your second xarray Dataset, and the name of - #your second dataset. Here we will also pass a specific level and the names of the variables you want to plot. import matplotlib.pyplot as plt - gcplot.compare_single_level(ref_ds, 'Dataset 1', dev_ds, 'Dataset 2', ilev=ilev, varlist=varlist) - plt.show() - - #Using plt.show(), you can view the plots interactively. You can also save out the plots to a PDF. - gcplot.compare_single_level(ref_ds, 'Dataset 1', dev_ds, 'Dataset 2', ilev=ilev, varlist=varlist, pdfname='single_level.pdf') - - """ - Zonal Mean Plotting - ------------------- - """ - #compare_zonal_mean generates sets of six panel plots containing zonal mean data across your dataset. - #compare_zonal_mean shares many of the same arguments as compare_single_level. - #You can specify pressure ranges in hPa for zonal mean plotting (by default every vertical level is plotted) - gcplot.compare_zonal_mean(ref_ds, 'Dataset 1', dev_ds, 'Dataset 2', pres_range=[0, 100], varlist=varlist, pdfname='zonal_mean.pdf') - + from gcpy.constants import skip_these_vars + from gcpy.plot.compare_single_level import compare_single_level + from gcpy.plot.compare_zonal_mean import compare_zonal_mean + + + def main(): + """ + Example function to create six-panel comparison plots. + """ + + # xarray allows us to read in any NetCDF file, the format of + # GEOS-Chem diagnostics, #as an xarray Dataset + # + # The skip_these_vars list avoids trying to read certain + # GCHP variables that cause data read issues. + ref_ds = xr.open_dataset( + 'first_run/GEOSChem.Restart.20160801_0000z.nc4', + drop_variables=skip_these_vars + ) + dev_ds = xr.open_dataset( + 'second_run/GEOSChem.Restart.20160801_0000z.nc4', + drop_variables=skip_these_vars + ) + + # ================== + # Single level plots + # ================== + + # compare_single_level generates sets of six panel plots for + # data at a specified level in your datasets. By default, the + # level at index 0 (likely the surface) is plotted. Here we will + # plot data at ~500 hPa, which is located at index 21 in the + # standard 72-level and 47-level GMAO vertical grids. + ilev=21 + + # You likely want to look at the same variables across both of + # your datasets. If a variable is in one dataset but not the other, + # the plots will show NaN values for the latter. You can pass + # variable names in a list to these comparison plotting functions + # (otherwise all variables will plot). + varlist = ['SpeciesRst_O3', 'SpeciesRst_CO2'] + + # compare_single_level has many arguments which can be optionally + # specified. The first four arguments are required. They specify + # your first xarray Dataset, the name of your first dataset, + # your second xarray Dataset, and the name of your second dataset. + # Here we will also pass a specific level and the names of the + # variables you want to plot. + compare_single_level( + ref_ds, + 'Dataset 1', + dev_ds, + 'Dataset 2', + ilev=ilev, + varlist=varlist + ) + plt.show() + + # Using plt.show(), you can view the plots interactively. + # You can also save out the plots to a PDF. + compare_single_level( + ref_ds, + 'Dataset 1', + dev_ds, + 'Dataset 2', + ilev=ilev, + varlist=varlist, + pdfname='single_level.pdf' + ) + + # ================== + # Zonal Mean Plots + # ================== + + # compare_zonal_mean generates sets of six panel plots containing + # zonal mean data across your dataset. compare_zonal_mean shares + # many of the same arguments as compare_single_level. You can + # specify pressure ranges in hPa for zonal mean plotting (by + # default every vertical level is plotted) + compare_zonal_mean( + ref_ds, + 'Dataset 1', + dev_ds, + 'Dataset 2', + pres_range=[0, 100], + varlist=varlist, + pdfname='zonal_mean.pdf' + ) + + + # Only execute when we run as a standalone script + if __name__ == '__main__': + main() diff --git a/docs/source/Tabling.rst b/docs/source/Tabling.rst deleted file mode 100644 index d09d940f..00000000 --- a/docs/source/Tabling.rst +++ /dev/null @@ -1,436 +0,0 @@ -.. _table: - -####### -Tabling -####### - -This page describes the tabling capabilities of GCPy, including -possible argument values for every tabling function. These functions are primarily used for model benchmarking purposes. All tables are printed to text files. - - -================ -Emissions tables -================ - -.. code-block:: python - - - def make_benchmark_emis_tables(reflist, refstr, devlist, - devstr, dst="./benchmark", refmet=None, devmet=None, - overwrite=False, ref_interval=[2678400.0], dev_interval=[2678400.0], - spcdb_dir=os.path.dirname(__file__) - ): - """ - Creates a text file containing emission totals by species and - category for benchmarking purposes. - """ - -Arguments: ----------- - -.. option:: reflist: list of str - - List with the path names of the emissions file or files - (multiple months) that will constitute the "Ref" - (aka "Reference") data set. - -.. option:: refstr : str - - A string to describe ref (e.g. version number) - -.. option:: devlist : list of str - - List with the path names of the emissions file or files - (multiple months) that will constitute the "Dev" - (aka "Development") data set - -.. option:: devstr : str - - A string to describe dev (e.g. version number) - - -Keyword arguments: ------------------- - -.. option:: dst : str - - A string denoting the destination folder where the file - containing emissions totals will be written. - - Default value: ./benchmark - -.. option:: refmet : str - - Path name for ref meteorology - - Default value: None - -.. option:: devmet : str - - Path name for dev meteorology - - Default value: None - -.. option:: overwrite : bool - - Set this flag to True to overwrite files in the - destination folder (specified by the dst argument). - - Default value: False - -.. option:: ref_interval : list of float - - The length of the ref data interval in seconds. By default, interval - is set to [2678400.0], which is the number of seconds in July - (our 1-month benchmarking month). - - Default value: [2678400.0] - -.. option:: dev_interval : list of float - - The length of the dev data interval in seconds. By default, interval - is set to [2678400.0], which is the number of seconds in July - (our 1-month benchmarking month). - - Default value: [2678400.0] - -.. option:: spcdb_dir : str - - Directory of species_datbase.yml file - - Default value: Directory of GCPy code repository - - -:code:`gcpy.benchmark.make_benchmark_emis_tables()` generates tables -of total emissions categorized by species or by inventory. These -tables contain total global emissions over the lengths of the Ref and -Dev datasets, as well as the differences between totals across the two -datasets. Passing a list of datasets as Ref or Dev (e.g. multiple -months of emissions files) will result in printing totals emissions -summed across all files in the list. Make sure to update the -literal:`ref_interval` and/or :literal:`dev_interval` arguments if you -pass input that does not correspond with 1 31 day month. - -=========== -Mass Tables -=========== - -.. code-block:: python - - def make_benchmark_mass_tables(ref, refstr, dev, devstr, - varlist=None, dst="./benchmark", subdst=None, overwrite=False, - verbose=False, label="at end of simulation", - spcdb_dir=os.path.dirname(__file__), - ref_met_extra='', dev_met_extra='' - ): - """ - Creates a text file containing global mass totals by species and - category for benchmarking purposes. - """ - -Arguments: ----------- - -.. option:: reflist : str - - Pathname that will constitute - the "Ref" (aka "Reference") data set. - -.. option:: refstr : str - - A string to describe ref (e.g. version number) - -.. option:: dev : list of str - - Pathname that will constitute - the "Dev" (aka "Development") data set. The "Dev" - data set will be compared against the "Ref" data set. - -.. option:: devstr : str - - A string to describe dev (e.g. version number) - - -Keyword arguments: ------------------- - -.. option:: varlist : list of str - - List of variables to include in the list of totals. - If omitted, then all variables that are found in either - "Ref" or "Dev" will be included. The varlist argument - can be a useful way of reducing the number of - variables during debugging and testing. - - Default value: None - -.. option:: dst : str - - A string denoting the destination folder where the file - containing emissions totals will be written. - - Default value: ./benchmark - -.. option:: subdst : str - - A string denoting the sub-directory of dst where PDF - files containing plots will be written. In practice, - subdst is only needed for the 1-year benchmark output, - and denotes a date string (such as "Jan2016") that - corresponds to the month that is being plotted. - - Default value: None - -.. option:: overwrite : bool - - Set this flag to True to overwrite files in the - destination folder (specified by the dst argument). - - Default value: False - -.. option:: verbose : bool - - Set this flag to True to print extra informational output. - - Default value: False. - -.. option:: spcdb_dir : str - - Directory of species_datbase.yml file - - Default value: Directory of GCPy code repository - -.. option:: ref_met_extra : str - - Path to ref Met file containing area data for use with restart files - which do not contain the Area variable. - Default value : '' - -.. option:: dev_met_extra : str - - Path to dev Met file containing area data for use with restart files - which do not contain the Area variable. - - Default value: '' - - -:code:`gcpy.benchmark.make_benchmark_mass_tables` is used to create -global mass tables of GEOS-Chem species from a -:literal:`Restart` file. This function will create one table of total -mass by species from the earth's surface to the top of the -stratosphere and one table for only the troposphere. -The tables contain total mass for each of the ref and dev datasets in -Gg, as well as absolute and percentage difference between the two -datasets. If your restart files do not contain an Area variable -(:literal:`AREA` for GEOS-Chem Classic or :literal:`Met_AREAM2` for -GCHP) then you will need to use the :literal:`ref_met_extra` and/or -:literal:`dev_met_extra` arguments to pass the paths of NetCDF files -containing the corresponding area variables (usually contained in -meteorology diagnostic output). - -======================== -Operations Budget Tables -======================== - -.. code-block:: python - - def make_benchmark_operations_budget(refstr, reffiles, devstr, - devfiles, ref_interval, dev_interval, benchmark_type=None, - label=None, col_sections=["Full", "Trop", "PBL", "Strat"], - operations=["Chemistry","Convection","EmisDryDep","Mixing", - "Transport","WetDep"], compute_accum=True, - require_overlap=False, dst='.', species=None, overwrite=True - ): - """ - Prints the "operations budget" (i.e. change in mass after - each operation) from a GEOS-Chem benchmark simulation. - """ - - -Arguments: ----------- - -.. option:: refstr : str - - Labels denoting the "Ref" versions - -.. option:: reffiles : list of str - - Lists of files to read from the "Ref" version. - -.. option:: devstr : str - - Labels denoting the "Dev" versions - -.. option:: devfiles : list of str - - Lists of files to read from "Dev" version. - -.. option:: interval : float - - Number of seconds in the diagnostic interval. - - -Keyword arguments: ------------------- - -.. option:: benchmark_type : str - - "TransportTracersBenchmark" or "FullChemBenchmark". - - Default value: None - -.. option:: label : str - - Contains the date or date range for each dataframe title. - - Default value: None - -.. option:: col_sections : list of str - - List of column sections to calculate global budgets for. May - include Strat eventhough not calculated in GEOS-Chem, but Full - and Trop must also be present to calculate Strat. - - Default value: ["Full", "Trop", "PBL", "Strat"] - -.. option:: operations : list of str - - List of operations to calculate global budgets for. Accumulation - should not be included. It will automatically be calculated if - all GEOS-Chem budget operations are passed and optional arg - compute_accum is True. - - Default value: ["Chemistry","Convection","EmisDryDep", - "Mixing","Transport","WetDep"] - -.. option:: compute_accum : bool - - Optionally turn on/off accumulation calculation. If True, will - only compute accumulation if all six GEOS-Chem operations budgets - are computed. Otherwise a message will be printed warning that - accumulation will not be calculated. - - Default value: True - -.. option:: require_overlap : bool - - Whether to calculate budgets for only species that are present in - both Ref or Dev. - - Default value: False - -.. option:: dst : str - - Directory where plots & tables will be created. - - Default value: '.' (directory in which function is called) - -.. option:: species : list of str - - List of species for which budgets will be created. - - Default value: None (all species) - -.. option:: overwrite : bool - - Denotes whether to overwrite existing budget file. - - Default value: True - -:code:`gcpy.benchmark.make_benchmark_operations_budget()` creates -tables of budgets for species separated by model operation. The tables -show budgets for each of the ref and dev datasets in Gg, as well as -absolute and percentage difference between the two datasets. -Note that total accumulation across all operations will only be -printed if you set :code:`compute_accum==True` and -all operations are included in :literal:`operations`. Note also that -when using the non-local mixing scheme (default), :literal:`'Mixing'` -includes emissions and dry deposition applied below the -PBL. :literal:`'EmisDryDep'` therefore only captures fluxes above the -PBL. When using full mixing, :literal:`'Mixing'` and -:literal:`'EmisDryDep'` are fully separated. - -=========================== -Aerosol Budgets and Burdens -=========================== - -.. code-block:: python - - def make_benchmark_aerosol_tables(devdir, devlist_aero, devlist_spc, - devlist_met, devstr, year, days_per_mon, dst='./benchmark', - overwrite=False, is_gchp=False, spcdb_dir=os.path.dirname(__file__) - ): - """ - Compute FullChemBenchmark aerosol budgets & burdens - """ - - -Arguments: ----------- - -.. option:: devdir: str - - Path to development ("Dev") data directory - -.. option:: devlist_aero : list of str - - List of Aerosols collection files (different months) - -.. option:: devlist_spc : list of str - - List of SpeciesConc collection files (different months) - -.. option:: devlist_met : list of str - - List of meteorology collection files (different months) - -.. option:: devstr : str - - Descriptive string for datasets (e.g. version number) - -.. option:: year : str - - The year of the benchmark simulation (e.g. '2016'). - -.. option:: days_per_mon : list of int - - List of number of days per month for all months - - -Keyword arguments: ------------------- - -.. option:: dst : str - - Directory where budget tables will be created. - - Default value: './benchmark' - -.. option:: overwrite : bool - - Overwrite burden & budget tables? (default=True) - - Default value: False - -.. option:: is_gchp : bool - - Whether datasets are for GCHP - - Default value: False - -.. option:: spcdb_dir : str - - Directory of species_datbase.yml file - - Default value: Directory of GCPy code repository - - -:code:`gcpy.benchmark.make_benchmark_aerosol_tables()` generates two -different tables using output from a single dataset. One -contains annual mean aerosol burdens in Tg in the stratosphere, -troposphere, and combined stratosphere and troposphere. The other -table shows annual global mean AOD in the stratosphere, troposphere, -and combined stratosphere and troposphere. Aerosol species used are -pre-defined in :code:`aod_species.yml`: BCPI, OCPI, SO4, DST1, SALA, -and SALC. diff --git a/docs/source/benchmark_plotting.rst b/docs/source/benchmark_plotting.rst deleted file mode 100644 index e2fd80e4..00000000 --- a/docs/source/benchmark_plotting.rst +++ /dev/null @@ -1,146 +0,0 @@ -############################ -Benchmark Plotting / Tabling -############################ - -Below is an example configuration file used to input the desired -options for the comprehensive benchmark comparison script -:code:`run_benchmark.py`. Additional configuration file examples can -be found in the :file:`benchmarks` directory of GCpy. - -The :file:`run_benchmark.py` script allows one to perform benchmark -comparisons between any simulation duration supplied in the -configuration file provided the ref and dev simulations time periods -match. Additionally, if the durations specified are exactly one year, -then the corresponding :literal:`bmk_type` specialty comparison script -will be run (either :file:`run_1yr_fullchem_benchmark.py` or -:file:`run_1yr_tt_benchmark.py`). Any other duration will run the standard -suite of benchmark comparisons. - -To generate plots from a 1-month benchmark simulation, you would call -:file:`run_benchmark.py` as follows: - -.. code-block:: console - - (gcpy_env) $ run_benchmark.py 1mo_benchmark.yml - -Where :file:`1mo_benchmark.yml` contains the following inputs: - -.. code-block:: yaml - - &--- - # ===================================================================== - # Benchmark configuration file (**EDIT AS NEEDED**) - # customize in the following manner: - # (1) Edit the path variables so that they point to folders w/ model data - # (2) Edit the version strings for each benchmark simulation - # (3) Edit the switches that turn on/off creating of plots and tables - # (4) If necessary, edit labels for the dev and ref versions - # Note: When doing GCHP vs GCC comparisions gchp_dev will be compared - # to gcc_dev (not gcc_ref!). This ensures consistency in version names - # when doing GCHP vs GCC diff-of-diffs (mps, 6/27/19) - # ===================================================================== - # - # Configuration for 1 month FullChemBenchmark - # - # paths: - # main_dir: High-level directory containing ref & dev rundirs - # results_dir: Directory where plots/tables will be created - # weights_dir: Path to regridding weights - # spcdb_dir: Folder in which the species_database.yml file is - # located. If set to "default", then will look for - # species_database.yml in one of the Dev rundirs. - # - paths: - main_dir: /n/holyscratch01/external_repos/GEOS-CHEM/gcgrid/geos-chem/validation/gcpy_test_data/1mon - results_dir: /path/to/BenchmarkResults - weights_dir: /n/holyscratch01/external_repos/GEOS-CHEM/gcgrid/gcdata/ExtData/GCHP/RegriddingWeights - spcdb_dir: default - # - # data: Contains configurations for ref and dev runs - # version: Version string (must not contain spaces) - # dir: Path to run directory - # outputs_subdir: Subdirectory w/ GEOS-Chem diagnostic files - # restarts_subdir: Subdirectory w/ GEOS-Chem restarts - # bmk_start: Simulation start date (YYYY-MM-DDThh:mm:ss) - # bmk_end: Simulation end date (YYYY-MM-DDThh:mm:ss) - # resolution: GCHP resolution string - # - data: - ref: - gcc: - version: GCC_ref - dir: GCC_ref - outputs_subdir: OutputDir - restarts_subdir: Restarts - bmk_start: "2019-07-01T00:00:00" - bmk_end: "2019-08-01T00:00:00" - gchp: - version: GCHP_ref - dir: GCHP_ref - outputs_subdir: OutputDir - restarts_subdir: Restarts - bmk_start: "2019-07-01T00:00:00" - bmk_end: "2019-08-01T00:00:00" - is_pre_13.1: False - is_pre_14.0: False - resolution: c24 - dev: - gcc: - version: GCC_dev - dir: GCC_dev - outputs_subdir: OutputDir - restarts_subdir: Restarts - bmk_start: "2019-07-01T00:00:00" - bmk_end: "2019-08-01T00:00:00" - gchp: - version: GCHP_dev - dir: GCHP_dev - outputs_subdir: OutputDir - restarts_subdir: Restarts - bmk_start: "2019-07-01T00:00:00" - bmk_end: "2019-08-01T00:00:00" - is_pre_13.1: False - is_pre_14.0: False - resolution: c24 - # - # options: Specify the types of comparisons to perform - # - options: - bmk_type: FullChemBenchmark - gcpy_test: True # Specify if this is a gcpy test validation run - comparisons: - gcc_vs_gcc: - run: True # True to run this comparison - dir: GCC_version_comparison - tables_subdir: Tables - gchp_vs_gcc: - run: True - dir: GCHP_GCC_comparison - tables_subdir: Tables - gchp_vs_gchp: - run: True - dir: GCHP_version_comparison - tables_subdir: Tables - gchp_vs_gcc_diff_of_diffs: - run: True - dir: GCHP_GCC_diff_of_diffs - # - # outputs: Types of output to generate (plots/tables) - # - outputs: - plot_conc: True - plot_emis: True - emis_table: True - plot_jvalues: True - plot_aod: True - mass_table: True - ops_budget_table: False - OH_metrics: True - ste_table: True # GCC only - plot_options: # Plot concentrations and emissions by category? - by_spc_cat: True - by_hco_cat: True - -YAML configuration files for 1-year benchmarks -(:file:`1yr_fullchem_benchmark.yml`, :file:`1yr_tt_benchmark.yml`) are -also provided in the :file:`benchmarks` folder. diff --git a/docs/source/bpch_to_nc.rst b/docs/source/bpch_to_nc.rst deleted file mode 100644 index d7dd9325..00000000 --- a/docs/source/bpch_to_nc.rst +++ /dev/null @@ -1,188 +0,0 @@ -Convert BPCH to NetCDF -====================== - -.. code-block:: python - - - #!/usr/bin/env python - ''' - Example script that illustrates how to create a netCDF file - from an old GEOS-Chem binary punch ("bpch") file. - ''' - - # Imports - import gcpy - import xarray as xr - import xbpch as xb - import warnings - - # Suppress harmless run-time warnings (mostly about underflow in division) - warnings.filterwarnings('ignore', category=RuntimeWarning) - warnings.filterwarnings('ignore', category=UserWarning) - - # ---------------------------------------------------------------------- - # User configurable settings (EDIT THESE ACCORDINGLY) - # ---------------------------------------------------------------------- - - # Name of Bpch file - bpchfile = '/path/to/bpch/file' - - # tracerinfo.dat and diaginfo,dat fiels - tinfo_file = '/path/to/tracerinfo.dat' - dinfo_file = '/path/to/diaginfo.dat' - - # Name of netCDF file - ncfile = '/path/to/netcdf/file' - - # Date string for the time:units attribute - datestr = 'YYYY-MM-DD' - - # Number of seconds in the diagnostic interval (assume 1-month) - interval = 86400.0 * 31.0 - - # ---------------------------------------------------------------------- - # Open the bpch file and save it into an xarray Dataset object - # NOTE: For best results, also specify the corresponding - # tracerinfo.dat diaginfo.dat metadata files. - # ---------------------------------------------------------------------- - try: - ds = xb.open_bpchdataset(filename=bpchfile, - tracerinfo_file=tinfo_file, - diaginfo_file=dinfo_file) - except FileNotFoundError: - print('Could not find file {}'.format(bpchfile)) - raise - - # ---------------------------------------------------------------------- - # Further manipulate the Dataset - # ---------------------------------------------------------------------- - - # Transpose the order of the xarray Dataset object read by - # xbpch so that its dimensions will be in the same order as - # Dataset objects read from netCDF files. - ds = ds.transpose() - - # Convert the bpch variable names to the same naming - # convention as the netCDF ("History") diagnostics. - ds = gcpy.convert_bpch_names_to_netcdf_names(ds) - - # xbpch does not include a time dimension, so we'll add one here - coords = ds.coords - coords['time'] = 0.0 - - # ------------------------------------------------------------------ - # Further edit variable attributes - # ------------------------------------------------------------------ - for v in ds.data_vars.keys(): - - # Append time to the data array - ds[v] = xr.concat([ds[v]], 'time') - - # Add long_name attribute for COARDS netCDF compliance - ds[v].attrs['long_name'] = ds[v].attrs['full_name'] - - # Remove some extraneous attributes that xbpch sets - del ds[v].attrs['name'] - del ds[v].attrs['full_name'] - del ds[v].attrs['scale_factor'] - del ds[v].attrs['hydrocarbon'] - del ds[v].attrs['tracer'] - del ds[v].attrs['category'] - del ds[v].attrs['chemical'] - del ds[v].attrs['original_shape'] - del ds[v].attrs['origin'] - del ds[v].attrs['number'] - del ds[v].attrs['molwt'] - del ds[v].attrs['C'] - - # Make the units attribute consistent with the units - # attribute from the GEOS-Chem History diagnostics - # NOTE: There probably is a more Pythonic way to code - # this, but this will work for sure. - if 'ug/m3' in ds[v].units: - ds[v].attrs['units'] = 'ug m-3' - if 'ug Celsius/m3' in ds[v].units: - ds[v].attrs['units'] = 'ug C m-3' - if 'count/cm3' in ds[v].units: - ds[v].attrs['units'] = 'molec m-3' - if 'cm/s' in ds[v].units: - ds[v].attrs['units'] = 'cm s-1' - if 'count/cm2/s' in ds[v].units: - ds[v].attrs['units'] = 'molec cm-2 s-1' - if 'kg/m2s' in ds[v].units: - ds[v].attrs['units'] = 'kg m-2 s-1' - if 'kg/m2/s' in ds[v].units: - ds[v].attrs['units'] = 'kg m-2 s-1' - if 'kg/s' in ds[v].units: - ds[v].attrs['units'] = 'kg s-1' - if 'W/m2' in ds[v].units: - ds[v].attrs['units'] = 'W m-2' - if 'm/s' in ds[v].units: - ds[v].attrs['units'] = 'm s-1' - if 'Pa/s' in ds[v].units: - ds[v].attrs['units'] = 'Pa s-1' - if 'g/kg' in ds[v].units: - ds[v].attrs['units'] = 'g kg-1' - if v.strip() == 'TotalOC': - ds[v].attrs['units'] = 'ug m-3' - if v.strip() in [ 'HO2concAfterChem']: - ds[v].attrs['units'] = 'ppb' - if v.strip() in ['O1DconcAfterChem', - 'O3PconcAfterChem', - 'OHconcAfterChem']: - ds[v].attrs['units'] = 'molec cm-3' - if v.strip() in ['Loss_CO', 'Prod_CO', - 'Loss_Ox', 'Prod_Ox', 'Prod_SO4']: - ds[v].attrs['units'] = 'molec/cm3/s' - if v.strip() in 'Met_CLDTOPS': - ds[v].attrs['units'] = 'level' - if v.strip() in 'Met_PHIS': - ds[v].attrs['units'] = 'm2 s-1' - if v.strip() in ['Met_PRECCON', 'Met_PRECTOT']: - ds[v].attrs['units'] = 'kg m-2 s-1' - if v.strip() in 'Met_AVGW': - ds[v].attrs['units'] = 'vol vol-1' - if v.strip() in 'Met_AIRNUMDEN': - ds[v].attrs['units'] = 'molec cm-3' - if v.strip() in ['ProdCOfromCH4', 'ProdCOfromNMVOC']: - ds[v].attrs['units'] = 'molec cm-3 s-1' - - # Convert these prodloss diagnostics from kg (bpch) to kg/s - # to be consistent with the GEOS-Chem History diagnostics - # NOTE: Assume a 1-month interval ( - if v.strip() in ['ProdSO4fromH2O2inCloud', 'ProdSO4fromO3inCloud', - 'ProdSO4fromO2inCloudMetal', 'ProdSO4fromO3inSeaSalt', - 'ProdSO4fromHOBrInCloud', 'ProdSO4fromSRO3', - 'ProdSO4fromSRHObr', 'ProdSO4fromO3s']: - ds[v].attrs['units'] = 'kg S s-1' - ds[v] = ds[v] / interval - if v.strip() in ['LossHNO3onSeaSalt']: - ds[v].attrs['units'] = 'kg s-1' - ds[v] = ds[v] / interval - - # ------------------------------------------------------------------ - # Edit attributes for coordinate dimensions - # ------------------------------------------------------------------ - - # Time - ds['time'].attrs['long_name'] = 'time' - ds['time'].attrs['units'] = \ - 'hours since {} 00:00:00.00 UTC'.format(datestr) - ds['time'].attrs['calendar'] = 'standard' - ds['time'].attrs['axis'] = 'T' - - # "lon", "lat", "lev" - ds['lon'].attrs['axis'] = 'X' - ds['lat'].attrs['axis'] = 'Y' - ds['lev'].attrs['axis'] = 'Z' - ds['lev'].attrs['units'] = 'level' - - # Global title - ds.attrs['title'] = 'Created by bpch2nc.py' - ds.attrs['conventions'] = 'COARDS' - ds.attrs['references'] = 'www.geos-chem.org; wiki.geos-chem.org' - - # ------------------------------------------------------------------ - # Create the netCDF file - # ------------------------------------------------------------------ - ds.to_netcdf(ncfile) diff --git a/docs/source/conf.py b/docs/source/conf.py index 7552a838..53840721 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -22,7 +22,7 @@ author = 'GEOS-Chem Support Team' # The full version, including alpha/beta/rc tags -release = '1.3.3' +release = '1.4.0' # -- General configuration --------------------------------------------------- diff --git a/docs/source/index.rst b/docs/source/index.rst index cf6cdbcc..1c38079b 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -25,26 +25,24 @@ For documentation on setting up and running GEOS-Chem please see our .. toctree:: :maxdepth: 4 :caption: Usage Details: - + Plotting - Tabling Regridding + Benchmarking .. toctree:: :maxdepth: 1 :caption: Example Scripts: - + Six_panel Single_panel - benchmark_plotting plot_timeseries - bpch_to_nc .. toctree:: :maxdepth: 1 :caption: Help & Reference: - - Report_Request - Contributing + + reference/CONTRIBUTING.md + reference/SUPPORT.md editing_these_docs Release_guide diff --git a/docs/source/plot_timeseries.rst b/docs/source/plot_timeseries.rst index be60ce53..f228e533 100644 --- a/docs/source/plot_timeseries.rst +++ b/docs/source/plot_timeseries.rst @@ -1,8 +1,12 @@ +.. _plot-timeseries: + +############### Plot Timeseries -=============== +############### -.. code-block:: python +This example script may also be found at `gcpy/examples/plotting/plot_single_panel.py `_. +.. code-block:: python #!/usr/bin/env python ''' @@ -14,19 +18,19 @@ Plot Timeseries Page 1: ------- - O3 from the first model layer (from the "SpeciesConc" - diagnostic collection is) plotted in blue. + O3 from the first model layer (from the "SpeciesConc" + diagnostic collection is) plotted in blue. - O3 at 10 meter height (from the "SpeciesConc_10m" - diagnostic collection) is plotted in red. + O3 at 10 meter height (from the "SpeciesConc_10m" + diagnostic collection) is plotted in red. Page 2: ------- - HNO3 from the first model layer (from the SpeciesConc - diagnostic collection is) plotted in blue. + HNO3 from the first model layer (from the SpeciesConc + diagnostic collection is) plotted in blue. - HNO3 at 10 meter height (from the SpeciesConc_10m - diagnostic collection) is plotted in red. + HNO3 at 10 meter height (from the SpeciesConc_10m + diagnostic collection) is plotted in red. You can of course modify this for your own particular applications. @@ -38,15 +42,16 @@ Plot Timeseries ''' # Imports - import gcpy.constants as gcon import os + import warnings import numpy as np import matplotlib.dates as mdates import matplotlib.ticker as mticker import matplotlib.pyplot as plt from matplotlib.backends.backend_pdf import PdfPages import xarray as xr - import warnings + from gcpy import constants + # Tell matplotlib not to look for an X-window, as we are plotting to # a file and not to the screen. This will avoid some warning messages. @@ -58,292 +63,292 @@ Plot Timeseries def find_files_in_dir(path, substrs): - ''' - Returns a list of all files in a directory that match one or more - substrings. - - Args: - ----- - path : str - Path to the directory in which to search for files. - - substrs : list of str - List of substrings used in the search for files. - - Returns: - -------- - file_list : list of str - List of files in the directory (specified by path) - that match all substrings (specified in substrs). - ''' - - # Initialize - file_list = [] - - # Walk through the given data directory. Then for each file found, - # add it to file_list if it matches text in search_list. - for root, directory, files in os.walk(path): - for f in files: - for s in substrs: - if s in f: - file_list.append(os.path.join(root, f)) - - # Return an alphabetically sorted list of files - file_list.sort() - return file_list - - - def find_value_index(seq, val): - ''' - Finds the index of a numpy array that is close to a value. - - Args: - ----- - seq : numpy ndarray - An array of numeric values. - - val : number - The value to search for in seq. - - Returns: - -------- - result : integer - The index of seq that has a value closest to val. - - Remarks: - -------- - This algorithm was found on this page: - https://stackoverflow.com/questions/48900977/find-all-indexes-of-a-numpy-array-closest-to-a-value - ''' - r = np.where(np.diff(np.sign(seq - val)) != 0) - idx = r + (val - seq[r]) / (seq[r + np.ones_like(r)] - seq[r]) - idx = np.append(idx, np.where(seq == val)) - idx = np.sort(idx) - result = np.round(idx) - - # NOTE: xarray needs integer values, so convert here! - return int(result[0]) - - - def read_geoschem_data(path, collections): - ''' - Returns an xarray Dataset containing timeseries data. - - Args: - ----- - path : str - Directory path where GEOS-Chem diagnostic output - files may be found. - - collections: list of str - List of GEOS-Chem collections. Files for these - collections will be read into the xarray Dataset. - - Returns: - -------- - ds : xarray Dataset - A Dataset object containing the GEOS-Chem diagnostic - output corresponding to the collections that were - specified. - ''' - - # Get a list of variables that GCPy should not read. - # These are mostly variables introduced into GCHP with the MAPL v1.0.0 - # update. These variables contain either repeated or non-standard - # dimensions that can cause problems in xarray when combining datasets. - skip_vars = gcon.skip_these_vars - - # Find all files in the given - file_list = find_files_in_dir(path, collections) - - # Return a single xarray Dataset containing data from all files - # NOTE: Need to add combine="nested" for xarray 0.15 and higher - v = xr.__version__.split(".") - if int(v[0]) == 0 and int(v[1]) >= 15: - return xr.open_mfdataset(file_list, - drop_variables=skip_vars, - combine="nested", - concat_dim=None) - else: - return xr.open_mfdataset(file_list, - drop_variables=skip_vars) - - - def plot_timeseries_data(ds, site_coords): - ''' - Plots a timseries of data at a given (lat,lon) location. - - Args: - ----- - ds : xarray Dataset - Dataset containing GEOS-Chem timeseries data. - - site_coords : tuple - Contains the coordinate (lat, lon) of a site location - at which the timeseries data will be plotted. - ''' - - # ---------------------------------------------------------------------- - # Get the GEOS-Chem data for O3 and HNO3 corresponding to the - # location of the observational station. We will save these into - # xarray DataArray objects, which we'll need for plotting. - # - # YOU CAN EDIT THIS FOR YOUR OWN PARTICULAR APPLICATION! - # ---------------------------------------------------------------------- - - # Find the indices corresponding to the site lon and lat - lat_idx = find_value_index(ds.lat.values, site_coords[0]) - lon_idx = find_value_index(ds.lon.values, site_coords[1]) - - # Save O3 from the first level (~60m height) (ppb) into a DataArray - O3_L1 = ds['SpeciesConc_O3'].isel(lon=lon_idx, lat=lat_idx, lev=0) - O3_L1 *= 1.0e9 - O3_L1.attrs['units'] = 'ppbv' - - # Save O3 @ 10m height into a DataArray - O3_10m = ds['SpeciesConc10m_O3'].isel(lon=lon_idx, lat=lat_idx) - O3_10m *= 1.0e9 - O3_10m.attrs['units'] = 'ppbv' - - # Save HNO3 from the first level (~60m height) into a DataArray - HNO3_L1 = ds['SpeciesConc_HNO3'].isel(lon=lon_idx, lat=lat_idx, lev=0) - HNO3_L1 *= 1.0e9 - HNO3_L1.attrs['units'] = 'ppbv' - - # Save HNO3 @ 10m height into a DataArray - HNO3_10m = ds['SpeciesConc10m_HNO3'].isel(lon=lon_idx, lat=lat_idx) - HNO3_10m *= 1.0e9 - HNO3_10m.attrs['units'] = 'ppbv' - - # ---------------------------------------------------------------------- - # Create a PDF file of the plots - # ---------------------------------------------------------------------- - - # Get min & max days of the plot span (for setting the X-axis range). - # To better center the plot, add a cushion of 12 hours on either end. - time = ds['time'].values - datemin = np.datetime64(time[0]) - np.timedelta64(12, 'h') - datemax = np.datetime64(time[-1]) + np.timedelta64(12, 'h') - - # Define a PDF object so that we can save the plots to PDF - pdf = PdfPages('O3_and_HNO3.pdf') - - # Loop over number of desired pages (in this case, 2) - for i in range(0, 2): - - # Create a new figure: 1 plot per page, 2x as wide as high - figs, ax0 = plt.subplots(1, 1, figsize=[12, 6]) - - # ----------------------------- - # Plot O3 on the first page - # ----------------------------- - if i == 0: - - # 1st model level - O3_L1.plot.line(ax=ax0, x='time', color='blue', - marker='o', label='O3 from 1st model level', - linestyle='-') - - # 10 mheight - O3_10m.plot.line(ax=ax0, x='time', color='red', - marker='x', label='O3 at 10m height', - linestyle='-') - - # Set title (has to be after the line plots are drawn) - ax0.set_title('O3 from the 1st model level and at 10m height') - - # Set Y-axis minor tick marks at every 2 ppb (5 intervals) - ax0.yaxis.set_minor_locator(mticker.AutoMinorLocator(5)) - - # Set y-axis title - ax0.set_ylabel('O3 (ppbv)') - - # ----------------------------- - # Plot HNO3 on the second page - # ----------------------------- - if i == 1: - - # 1st model level - HNO3_L1.plot.line(ax=ax0, x='time', color='blue', - marker='o', label='HNO3 from 1st model level', - linestyle='-') - - # 10m height - HNO3_10m.plot.line(ax=ax0, x='time', color='red', - marker='x', label='HNO3 at 10m height', - linestyle='-') - - # Set title (has to be after the line plots are drawn - ax0.set_title('HNO3 from the 1st model level and at 10m height') - - # Set Y-axis minor tick marks at every 0.05 ppb (4 intervals) - ax0.yaxis.set_minor_locator(mticker.AutoMinorLocator(4)) - - # Set y-axis title - ax0.set_ylabel('HNO3 (ppbv)') - - # ----------------------------- - # Set general plot parameters - # ----------------------------- - - # Add the plot legend - ax0.legend() - - # Set the X-axis range - ax0.set_xlim(datemin, datemax) - - # Set the X-axis major tickmarks - locator = mdates.DayLocator() - formatter = mdates.DateFormatter('%d') - ax0.xaxis.set_major_locator(locator) - ax0.xaxis.set_major_formatter(formatter) - - # Set X-axis minor tick marks at noon of each day - # (i.e. split up the major interval into 2 bins) - ax0.xaxis.set_minor_locator(mticker.AutoMinorLocator(2)) - - # Don't rotate the X-axis jtick labels - ax0.xaxis.set_tick_params(rotation=0) + ''' + Returns a list of all files in a directory that match one or more + substrings. + + Args: + ----- + path : str + Path to the directory in which to search for files. + + substrs : list of str + List of substrings used in the search for files. + + Returns: + -------- + file_list : list of str + List of files in the directory (specified by path) + that match all substrings (specified in substrs). + ''' + + # Initialize + file_list = [] + + # Walk through the given data directory. Then for each file found, + # add it to file_list if it matches text in search_list. + for root, directory, files in os.walk(path): + for f in files: + for s in substrs: + if s in f: + file_list.append(os.path.join(root, f)) + + # Return an alphabetically sorted list of files + file_list.sort() + return file_list + + + def find_value_index(seq, val): + ''' + Finds the index of a numpy array that is close to a value. + + Args: + ----- + seq : numpy ndarray + An array of numeric values. + + val : number + The value to search for in seq. + + Returns: + -------- + result : integer + The index of seq that has a value closest to val. + + Remarks: + -------- + This algorithm was found on this page: + https://stackoverflow.com/questions/48900977/find-all-indexes-of-a-numpy-array-closest-to-a-value + ''' + r = np.where(np.diff(np.sign(seq - val)) != 0) + idx = r + (val - seq[r]) / (seq[r + np.ones_like(r)] - seq[r]) + idx = np.append(idx, np.where(seq == val)) + idx = np.sort(idx) + result = np.round(idx) + + # NOTE: xarray needs integer values, so convert here! + return int(result[0]) + + + def read_geoschem_data(path, collections): + ''' + Returns an xarray Dataset containing timeseries data. + + Args: + ----- + path : str + Directory path where GEOS-Chem diagnostic output + files may be found. + + collections: list of str + List of GEOS-Chem collections. Files for these + collections will be read into the xarray Dataset. + + Returns: + -------- + ds : xarray Dataset + A Dataset object containing the GEOS-Chem diagnostic + output corresponding to the collections that were + specified. + ''' + + # Get a list of variables that GCPy should not read. + # These are mostly variables introduced into GCHP with the MAPL v1.0.0 + # update. These variables contain either repeated or non-standard + # dimensions that can cause problems in xarray when combining datasets. + skip_vars = constants.skip_these_vars + + # Find all files in the given + file_list = find_files_in_dir(path, collections) + + # Return a single xarray Dataset containing data from all files + # NOTE: Need to add combine="nested" for xarray 0.15 and higher + v = xr.__version__.split(".") + if int(v[0]) == 0 and int(v[1]) >= 15: + return xr.open_mfdataset(file_list, + drop_variables=skip_vars, + combine="nested", + concat_dim=None) + else: + return xr.open_mfdataset(file_list, + drop_variables=skip_vars) + + + def plot_timeseries_data(ds, site_coords): + ''' + Plots a timseries of data at a given (lat,lon) location. + + Args: + ----- + ds : xarray Dataset + Dataset containing GEOS-Chem timeseries data. + + site_coords : tuple + Contains the coordinate (lat, lon) of a site location + at which the timeseries data will be plotted. + ''' + + # ---------------------------------------------------------------------- + # Get the GEOS-Chem data for O3 and HNO3 corresponding to the + # location of the observational station. We will save these into + # xarray DataArray objects, which we'll need for plotting. + # + # YOU CAN EDIT THIS FOR YOUR OWN PARTICULAR APPLICATION! + # ---------------------------------------------------------------------- + + # Find the indices corresponding to the site lon and lat + lat_idx = find_value_index(ds.lat.values, site_coords[0]) + lon_idx = find_value_index(ds.lon.values, site_coords[1]) + + # Save O3 from the first level (~60m height) (ppb) into a DataArray + O3_L1 = ds['SpeciesConc_O3'].isel(lon=lon_idx, lat=lat_idx, lev=0) + O3_L1 *= 1.0e9 + O3_L1.attrs['units'] = 'ppbv' + + # Save O3 @ 10m height into a DataArray + O3_10m = ds['SpeciesConc10m_O3'].isel(lon=lon_idx, lat=lat_idx) + O3_10m *= 1.0e9 + O3_10m.attrs['units'] = 'ppbv' + + # Save HNO3 from the first level (~60m height) into a DataArray + HNO3_L1 = ds['SpeciesConc_HNO3'].isel(lon=lon_idx, lat=lat_idx, lev=0) + HNO3_L1 *= 1.0e9 + HNO3_L1.attrs['units'] = 'ppbv' + + # Save HNO3 @ 10m height into a DataArray + HNO3_10m = ds['SpeciesConc10m_HNO3'].isel(lon=lon_idx, lat=lat_idx) + HNO3_10m *= 1.0e9 + HNO3_10m.attrs['units'] = 'ppbv' + + # ---------------------------------------------------------------------- + # Create a PDF file of the plots + # ---------------------------------------------------------------------- + + # Get min & max days of the plot span (for setting the X-axis range). + # To better center the plot, add a cushion of 12 hours on either end. + time = ds['time'].values + datemin = np.datetime64(time[0]) - np.timedelta64(12, 'h') + datemax = np.datetime64(time[-1]) + np.timedelta64(12, 'h') + + # Define a PDF object so that we can save the plots to PDF + pdf = PdfPages('O3_and_HNO3.pdf') + + # Loop over number of desired pages (in this case, 2) + for i in range(0, 2): + + # Create a new figure: 1 plot per page, 2x as wide as high + figs, ax0 = plt.subplots(1, 1, figsize=[12, 6]) + + # ----------------------------- + # Plot O3 on the first page + # ----------------------------- + if i == 0: + + # 1st model level + O3_L1.plot.line(ax=ax0, x='time', color='blue', + marker='o', label='O3 from 1st model level', + linestyle='-') + + # 10 mheight + O3_10m.plot.line(ax=ax0, x='time', color='red', + marker='x', label='O3 at 10m height', + linestyle='-') + + # Set title (has to be after the line plots are drawn) + ax0.set_title('O3 from the 1st model level and at 10m height') + + # Set Y-axis minor tick marks at every 2 ppb (5 intervals) + ax0.yaxis.set_minor_locator(mticker.AutoMinorLocator(5)) + + # Set y-axis title + ax0.set_ylabel('O3 (ppbv)') + + # ----------------------------- + # Plot HNO3 on the second page + # ----------------------------- + if i == 1: + + # 1st model level + HNO3_L1.plot.line(ax=ax0, x='time', color='blue', + marker='o', label='HNO3 from 1st model level', + linestyle='-') + + # 10m height + HNO3_10m.plot.line(ax=ax0, x='time', color='red', + marker='x', label='HNO3 at 10m height', + linestyle='-') + + # Set title (has to be after the line plots are drawn + ax0.set_title('HNO3 from the 1st model level and at 10m height') + + # Set Y-axis minor tick marks at every 0.05 ppb (4 intervals) + ax0.yaxis.set_minor_locator(mticker.AutoMinorLocator(4)) + + # Set y-axis title + ax0.set_ylabel('HNO3 (ppbv)') + + # ----------------------------- + # Set general plot parameters + # ----------------------------- + + # Add the plot legend + ax0.legend() + + # Set the X-axis range + ax0.set_xlim(datemin, datemax) + + # Set the X-axis major tickmarks + locator = mdates.DayLocator() + formatter = mdates.DateFormatter('%d') + ax0.xaxis.set_major_locator(locator) + ax0.xaxis.set_major_formatter(formatter) + + # Set X-axis minor tick marks at noon of each day + # (i.e. split up the major interval into 2 bins) + ax0.xaxis.set_minor_locator(mticker.AutoMinorLocator(2)) + + # Don't rotate the X-axis jtick labels + ax0.xaxis.set_tick_params(rotation=0) + + # Center the X-axis tick labels + for tick in ax0.xaxis.get_major_ticks(): + tick.label1.set_horizontalalignment('center') + + # Set X-axis and Y-axis labels + ax0.set_xlabel('Day of July (and August) 2016') + + # ----------------------------- + # Save this page to PDF + # ----------------------------- + pdf.savefig(figs) + plt.close(figs) - # Center the X-axis tick labels - for tick in ax0.xaxis.get_major_ticks(): - tick.label1.set_horizontalalignment('center') + # ---------------------------------------------------------------------- + # Save the PDF file to disk + # ---------------------------------------------------------------------- + pdf.close() - # Set X-axis and Y-axis labels - ax0.set_xlabel('Day of July (and August) 2016') - - # ----------------------------- - # Save this page to PDF - # ----------------------------- - pdf.savefig(figs) - plt.close(figs) - # ---------------------------------------------------------------------- - # Save the PDF file to disk - # ---------------------------------------------------------------------- - pdf.close() + def main(): + ''' + Main program. + ''' + # Path where the data files live + # (YOU MUST EDIT THIS FOR YUR OWN PARTICULAR APPLICATION!) + path_to_data = '/path/to/GEOS-Chem/diagnostic/data/files' + # Get a list of files in the ConcAboveSfc and SpeciesConc collections + # (YOU CAN EDIT THIS FOR YOUR OWN PARTICULAR APPLICATION!) + collections = ['ConcAboveSfc', 'SpeciesConc'] - def main(): - ''' - Main program. - ''' - # Path where the data files live - # (YOU MUST EDIT THIS FOR YUR OWN PARTICULAR APPLICATION!) - path_to_data = '/path/to/GEOS-Chem/diagnostic/data/files' - - # Get a list of files in the ConcAboveSfc and SpeciesConc collections - # (YOU CAN EDIT THIS FOR YOUR OWN PARTICULAR APPLICATION!) - collections = ['ConcAboveSfc', 'SpeciesConc'] - - # Read GEOS-Chem data into an xarray Dataset - ds = read_geoschem_data(path_to_data, collections) + # Read GEOS-Chem data into an xarray Dataset + ds = read_geoschem_data(path_to_data, collections) - # Plot timeseries data at Centerville, AL (32.94N, 87.18W) - # (YOU CAN EDIT THIS FOR YOUR OWN PARTICULAR APPLICATION!) - site_coords = (32.94, -87.18) - plot_timeseries_data(ds, site_coords) + # Plot timeseries data at Centerville, AL (32.94N, 87.18W) + # (YOU CAN EDIT THIS FOR YOUR OWN PARTICULAR APPLICATION!) + site_coords = (32.94, -87.18) + plot_timeseries_data(ds, site_coords) - if __name__ == "__main__": - main() + if __name__ == "__main__": + main() diff --git a/docs/source/reference/CONTRIBUTING.md b/docs/source/reference/CONTRIBUTING.md new file mode 120000 index 00000000..c97564d9 --- /dev/null +++ b/docs/source/reference/CONTRIBUTING.md @@ -0,0 +1 @@ +../../../CONTRIBUTING.md \ No newline at end of file diff --git a/docs/source/reference/SUPPORT.md b/docs/source/reference/SUPPORT.md new file mode 120000 index 00000000..7811d359 --- /dev/null +++ b/docs/source/reference/SUPPORT.md @@ -0,0 +1 @@ +../../../SUPPORT.md \ No newline at end of file diff --git a/examples/plotting/plot_comparisons.py b/examples/plotting/plot_comparisons.py deleted file mode 100755 index 426f8ecd..00000000 --- a/examples/plotting/plot_comparisons.py +++ /dev/null @@ -1,55 +0,0 @@ -#!/usr/bin/env python -""" -Six Panel Comparison Plots --------------------------------------- -This example script demonstrates the comparitive plotting capabilities of GCPy, -including single level plots as well as global zonal mean plots. -These comparison plots are frequently used to evaluate results from different runs / versions -of GEOS-Chem, but can also be used to compare results from different points in one run that -are stored in separate xarray datasets. -The example data described here is in lat/lon format, but the same code works equally -well for cubed-sphere (GCHP) data. -""" - -#xarray allows us to read in any NetCDF file, the format of most GEOS-Chem diagnostics, -#as an xarray Dataset -import xarray as xr -ref_ds = xr.open_dataset('first_run/GEOSChem.Restart.20160801_0000z.nc4') -dev_ds = xr.open_dataset('second_run/GEOSChem.Restart.20160801_0000z.nc4') - -import gcpy.plot as gcplot - -""" -Single level plots ------------------- -""" - -#compare_single_level generates sets of six panel plots for data at a specified level in your datasets. -#By default, the level at index 0 (likely the surface) is plotted. Here we will plot data at ~500 hPa, -#which is located at index 21 in the standard 72-level and 47-level GMAO vertical grids. -ilev=21 - -#You likely want to look at the same variables across both of your datasets. If a variable is in -#one dataset but not the other, the plots will show NaN values for the latter. -#You can pass variable names in a list to these comparison plotting functions (otherwise all variables will plot). -varlist = ['SpeciesRst_O3', 'SpeciesRst_CO2'] - -#compare_single_level has many arguments which can be optionally specified. The first four arguments are required. -#They specify your first xarray Dataset, the name of your first dataset, your second xarray Dataset, and the name of -#your second dataset. Here we will also pass a specific level and the names of the variables you want to plot. -import matplotlib.pyplot as plt -gcplot.compare_single_level(ref_ds, 'Dataset 1', dev_ds, 'Dataset 2', ilev=ilev, varlist=varlist) -plt.show() - -#Using plt.show(), you can view the plots interactively. You can also save out the plots to a PDF. -gcplot.compare_single_level(ref_ds, 'Dataset 1', dev_ds, 'Dataset 2', ilev=ilev, varlist=varlist, pdfname='single_level.pdf') - -""" -Zonal Mean Plotting -------------------- -""" -#compare_zonal_mean generates sets of six panel plots containing zonal mean data across your dataset. -#compare_zonal_mean shares many of the same arguments as compare_single_level. -#You can specify pressure ranges in hPa for zonal mean plotting (by default every vertical level is plotted) -gcplot.compare_zonal_mean(ref_ds, 'Dataset 1', dev_ds, 'Dataset 2', pres_range=[0, 100], varlist=varlist, pdfname='zonal_mean.pdf') - diff --git a/examples/plotting/plot_single_panel.py b/examples/plotting/plot_single_panel.py deleted file mode 100755 index fd2b0a47..00000000 --- a/examples/plotting/plot_single_panel.py +++ /dev/null @@ -1,73 +0,0 @@ -#!/usr/bin/env python -""" -Global and Regional Single Panel Plots --------------------------------------- -This example script demonstrates the core single panel plotting capabilities of GCPy, -including global and regional single level plots as well as global zonal mean plots. -The example data described here is in lat/lon format, but the same code works equally -well for cubed-sphere (GCHP) data. -For full documentation on the plotting capabilities of GCPy (including full argument lists), -please see the GCPy Wiki at https://github.com/geoschem/gcpy/wiki -""" - -#xarray allows us to read in any NetCDF file, the format of most GEOS-Chem diagnostics, -#as an xarray Dataset -import xarray as xr -ds = xr.open_dataset('GEOSChem.Restart.20160701_0000z.nc4') - -#You can easily view the variables available for plotting using xarray. -#Each of these variables has its own xarray DataArray within the larger Dataset container. -print(ds.data_vars) - -#Most variables have some sort of prefix; in this example all variables are -#prefixed with 'SpeciesRst_'. We'll select the DataArray for ozone. -da = ds.SpeciesRst_O3 - -#Printing a DataArray gives a summary of the dimensions and attributes of the data. -print(da) -#This Restart file has a time dimension of size 1, with 72 vertical levels, -#46 latitude indicies, and 72 longitude indices. -import gcpy.plot as gcplot - -""" -Single level plots ------------------- -""" -#gcpy.single_panel is the core plotting function of GCPy, able to create a one panel zonal mean or -#single level plot. Here we will create a single level plot of ozone at ~500 hPa. -#We must manually index into the level that we want to plot (index 22 in the standard 72-layer -#and 47-layer GMAO vertical grids). -slice_500 = da.isel(lev=22) - -#single_panel has many arguments which can be optionally specified. The only argument you must always -#pass to a call to single_panel is the DataArray that you want to plot. -#By default, the created plot includes a colorbar with units read from the DataArray, an automatic title -#(the data variable name in the DataArray), and an extent equivalent to the full lat/lon extent of the DataArray -import matplotlib.pyplot as plt -gcplot.single_panel(slice_500) -plt.show() - -#You can specify a specific area of the globe you would like plotted using the 'extent' argument, -#which uses the format [min_longitude, max_longitude, min_latitude, max_latitude] with bounds [-180, 180, -90, 90] -gcplot.single_panel(slice_500, extent=[50, -90, -10, 60]) -plt.show() - -#Other commonly used arguments include specifying a title and a colormap (defaulting to a White-Green-Yellow-Red colormap) -#You can find more colormaps at https://matplotlib.org/tutorials/colors/colormaps.html -gcplot.single_panel(slice_500, title='500mb Ozone over the North Pacific', comap = plt.cm.viridis, - log_color_scale=True, extent=[80, -90, -10, 60]) -plt.show() - -""" -Zonal Mean Plotting -------------------- -""" - -#Use the plot_type argument to specify zonal_mean plotting -gcplot.single_panel(da, plot_type="zonal_mean") -plt.show() - -#You can specify pressure ranges in hPa for zonal mean plot (by default every vertical level is plotted) -gcplot.single_panel(da, pres_range=[0, 100], log_yaxis=True, log_color_scale=True) -plt.show() - diff --git a/examples/timeseries/mda8_o3_timeseries.py b/examples/timeseries/mda8_o3_timeseries.py deleted file mode 100755 index 353d8c1b..00000000 --- a/examples/timeseries/mda8_o3_timeseries.py +++ /dev/null @@ -1,68 +0,0 @@ -#!/usr/bin/env python -""" -MDA8 Timeseries Calculations -======================================== - -A common statistic used when constructing standards for air quality -criteria pollutants is to look at the ranked distribution of the -daily maxima of rolling 8-hour averages of a substance, or MDA8 for -short. -""" -# Author: Daniel Rothenberg -# Version: June 1, 2017 - -import matplotlib.pyplot as plt -plt.style.use(['seaborn-talk', 'seaborn-ticks']) - -import pandas as pd -import xarray as xr - - -# Read hourly data -InFile = 'GEOSChem.Hourly_SfcO3.2017.nc' -ds = xr.open_dataset(InFile) -o3_data = ds['SpeciesConc_O3'] - -# Compute the 8-hour rolling averages for ozone -avg_8hr_o3 = (o3_data.rolling(time=8, min_periods=6).mean()) - -# By default, this takes the last timestamp in a rolling interval; i.e. the -# timestamps correspond to the preceding 8 hours. We want them to refer to -# the proeding 8 hours, so we can adjust them using datetime arithmetic -times_np = avg_8hr_o3.time.values -times_pd = pd.to_datetime(times_np) - pd.Timedelta('8h') -avg_8hr_o3.time.values[:] = times_pd - -# Finally, aggregate by calendar day and compute the maxima of the set of -# 8-hour averages for each day -mda8_o3 = avg_8hr_o3.resample(time='D').max(dim='time') -mda8_o3.name='mda8_o3' - -# Save output to new netCDF file -mda8_o3.to_netcdf('GEOSChem.MDA8_O3.20170.nc', 'w', format='NETCDF4', - encoding={'lat': {'_FillValue': None}, - 'lon': {'_FillValue': None}, - 'time': {'_FillValue': None}, - 'mda8_o3': {'_FillValue': None}}) - -# Select data for one specific location, near Boston -boston_mda8_o3 = mda8_o3.sel(lon=-71., lat=42., method='nearest') -boston_o3 = o3_data.sel(lon=-71., lat=42., method='nearest') - -# Plot both the original (hourly) and MDA* timeseries on the same plot. -fig = plt.figure(figsize=(9, 3)) -ax = fig.add_subplot(111) -boston_o3.plot(ax=ax, color='k') -ax.stem(boston_mda8_o3.time.values, boston_mda8_o3.data, - ':r', markerfmt='ro') -ax.set_ylim(0) - -import matplotlib.dates as mdates -ax.xaxis.set_major_formatter(mdates.DateFormatter("%h %d")) -for tick in ax.xaxis.get_majorticklabels(): - tick.set_horizontalalignment('center') - -ax.set_xlabel("") -ax.set_ylabel("(MDA8) O$_3$ [ppb]") - -plt.show() diff --git a/examples/working_with_files/regrid_restart_ll_to_cs.py b/examples/working_with_files/regrid_restart_ll_to_cs.py deleted file mode 100755 index 7fd1f542..00000000 --- a/examples/working_with_files/regrid_restart_ll_to_cs.py +++ /dev/null @@ -1,89 +0,0 @@ -#!/usr/bin/env python -""" -Regrids a 4x5 GEOS-Chem Classic restart file to cubed-sphere resolutions. -""" - -# Imports -from os.path import join -import numpy as np -import xarray as xr -import sparselt.esmf -import sparselt.xr - -# Path to regridding weights (EDIT AS NEEDED) -weights_dir="/path/to/regridding/weights/" - -# List of simulation types (EDIT AS NEEDED) -simulation_list = ["carboncycle"] - -# List of months (EDIT AS NEEDED) -month_list = ["01", "07"] - -# List of cubed-sphere grids (EDIT AS NEEDED) -cubed_sphere_grid_list = ["c24", "c48", "c90", "c180", "c360"] - -# Preserves all global and variable attributes -with xr.set_options(keep_attrs=True): - - # Loop over simulation types - for sim in simulation_list: - - # Loop over months - for mm in month_list: - - # Read input data - infile = f"GEOSChem.Restart.{sim}.2019{mm}01_0000z.nc4" - print(f"Reading {infile}") - ds_in = xr.open_dataset(infile) - - # Rename GCClassic "SpeciesRst_" prefix to GCHP "SPC_" prefix - old_to_new_names = {} - for v in ds_in.data_vars.keys(): - if "SpeciesRst_" in v: - new_name = v.replace("SpeciesRst_", "SPC_") - old_to_new_names[v] = new_name - ds_in = ds_in.rename(old_to_new_names) - - # Loop over cubed-sphere grids - for cs in cubed_sphere_grid_list: - - # Number of grid points per side - cs_res = int(cs[1:]) - - # Regridding transform file - regrid_file = f"regrid_weights_latlon46x72_to_{cs}.nc" - weights_file = join(weights_dir, regrid_file) - - # Create a linear transform object from the regridding - # weights file for the combination of source and target - # horizontal resolutions. NOTE: GCHP restart files use - # a grid where lat = 6*cs_res. - transform = sparselt.esmf.load_weights( - weights_file, - input_dims=[('lat', 'lon'), (46, 72)], - output_dims=[('lat', 'lon'), (6*cs_res, cs_res)] - ) - - # Regrid to cubed-sphere - ds_out = sparselt.xr.apply(transform, ds_in) - - # Redefine coordinate arrays to be consistent - # with GCHP restart file expectations - coords_dict = { - "lon": np.arange(1, cs_res+1, dtype=np.float64), - "lat": np.arange(1, 6*cs_res+1, dtype=np.float64), - "lev": np.arange(1, 73, dtype=np.float64), - } - ds_out = ds_out.assign_coords(coords_dict) - - # Write to output resolution - outfile = f"GEOSChem.Restart.{sim}.2015{mm}01_0000z.{cs}.nc4" - print(f"Writing {outfile}") - ds_out.to_netcdf(outfile) - - # Cleanup - del transform - del ds_out - - # Cleanup - del ds_in diff --git a/examples/yaml/.gitignore b/examples/yaml/.gitignore deleted file mode 100644 index 314f02b1..00000000 --- a/examples/yaml/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*.txt \ No newline at end of file diff --git a/examples/yaml/species2wiki.py b/examples/yaml/species2wiki.py deleted file mode 100755 index f319a4cf..00000000 --- a/examples/yaml/species2wiki.py +++ /dev/null @@ -1,306 +0,0 @@ -#!/usr/bin/env python - -""" -species2wiki.py: Creates a MediaWiki table from a GEOS-Chem -species database file in YAML format. This prints out information -for the table - -Calling sequence: ------------------ -./species2wiki.py species # Creates wiki table w/ general metadata -./species2wiki.py henry # Creates wiki table w/ Henry's law metadata -./species2wiki.py wetdep # Creates wiki table w/ wetdep metadata -./species2wiki.py drydep # Creates wiki table w/ drydep metadata -""" - -# Imports -import os -import sys -import yaml - -# ====================================================================== -# Configurables (MUST EDIT) -# ====================================================================== - -# YAML file to read -yaml_file = '../../gcpy/species_database.yml' - -# ====================================================================== -# Methods -# ====================================================================== - -def print_species_table(metadata): - """ - Extracts general metadatas from the species database and - creates a text file in MediaWiki table format. - - Args: - ----- - metadata : dict - Dictionary with species metadata - """ - - # Fields to print - keys_to_print = [ - "Formula", "FullName", "MW_g", "Gas/Aer", "Is_Kpp", - "Is_Advected", "Is_DryDep", "Is_WetDep", "Is_Photolysis" - ] - - # Output file - wiki_table_file = "wiki_species_table.txt" - - # Open file - with open(wiki_table_file, "w") as f: - - # Loop over species names - for s in metadata.keys(): - - # Skip anchors for other variables - if "_PROP" in s: - continue - - # Print the species name as the first column - spc_db = metadata[s] - print('\n|-valign="top"', file=f) - print("|{}".format(s), file=f) - - # Loop over other tags of the species database - # Special handling for MW_g - for t in keys_to_print: - if "EmMW_g" in t or "MolecRatio" in t: - pass - - elif "MW_g" in t: - if t in spc_db.keys(): - print("|{}".format(spc_db[t]), end="", file=f) - if "EmMW_g" in t and "MolecRatio" in t: - print("
({}, {}C)".format( - spc_db["EmMW_g"], - spc_db["MolecRatio"] - ), file=f) - else: - print(file=f) - else: - print("| -", file=f) - - elif "Gas/Aer" in t: - if "Is_Gas" in spc_db.keys(): - print("|Gas", file=f) - elif "Is_Aerosol" in spc_db.keys(): - print("|Aer", file=f) - - elif "Is_" in t: - if t in spc_db.keys(): - if spc_db[t] is True: - print("|X", file=f) - else: - print("| -", file=f) - else: - print("| -", file=f) - - else: - if t in spc_db.keys(): - print("|{}".format(spc_db[t]), file=f) - else: - print("| -", file=f) - - - # Close the file - f.close() - - -def print_henry_table(metadata): - """ - Extracts Henry's law metadata from the species database and - creates a text file in MediaWiki table format. - - Args: - ----- - metadata : dict - Dictionary with species metadata - """ - - # Fields to print -- for "GEOS-Chem Species" table on the wiki - keys_to_print = ["FullName", "DD_Hstar", "Henry_K0", "Henry_CR"] - - # Output file - wiki_table_file = "wiki_henry_table.txt" - - # Open file - with open(wiki_table_file, "w") as f: - - # Loop over species names - for s in metadata.keys(): - - # Skip anchors for other variables - if "_PROP" in s: - continue - - # Print the species name as the first column - spc_db = metadata[s] - print('\n|-valign="top"', file=f) - print("|{}".format(s), file=f) - - # Loop over other tags of the species database - for t in keys_to_print: - if t in spc_db.keys(): - print("|{}".format(spc_db[t]), file=f) - else: - print("| -", file=f) - - # Close the file - f.close() - - -def print_drydep_table(metadata): - """ - Extracts drydep metadata from the species database and - creates a text file in MediaWiki table format. - - Args: - ----- - metadata : dict - Dictionary with species metadata - """ - - # Fields to print -- for "GEOS-Chem Species" table on the wiki - keys_to_print = [ - "MW_g", "EmMW_g", "MolecRatio", "Radius", "Density", - "DD_AeroDryDep", "DD_DustDryDep", "DD_DvzAerSnow", - "DD_DvzMinVal", "DD_Hstar", "DD_F0" - ] - - # Output file - wiki_table_file = "wiki_drydep_table.txt" - - # Open file - with open(wiki_table_file, "w") as f: - - # Loop over species names - for s in metadata.keys(): - - # Skip anchors for other variables - if "_PROP" in s: - continue - - # Print the species name as the first column - spc_db = metadata[s] - print('\n|-valign="top"', file=f) - print("|{}".format(s), file=f) - - # Loop over other tags of the species database - for t in keys_to_print: - - if "DD_DvzMinVal" in t: - if t in spc_db.keys(): - print("|{} snow
{} land".format( - spc_db[t][0], - spc_db[t][1] - ), file=f) - else: - print("| -", file=f) - - else: - if t in spc_db.keys(): - print("|{}".format(spc_db[t]), file=f) - else: - print("| -", file=f) - - # Close the file - f.close() - - -def print_wetdep_table(metadata): - """ - Extracts wetdep metadata from the species database and - creates a text file in MediaWiki table format. - - Args: - ----- - metadata : dict - Dictionary with species metadata - """ - - # Fields to print -- for "GEOS-Chem Species" table on the wiki - keys_to_print = [ - "MW_g", "EmMW_g", "MolecRatio", "WD_CoarseAer", "WD_AerScafEff", - "WD_KcScaleFac", "WD_RainoutEff", "WD_RetFactor" - ] - - # Output file - wiki_table_file = "wiki_wetdep_table.txt" - - # Open file - with open(wiki_table_file, "w") as f: - - # Loop over species names - for s in metadata.keys(): - - # Skip anchors for other variables - if "_PROP" in s: - continue - - # Print the species name as the first column - spc_db = metadata[s] - print('\n|-valign="top"', file=f) - print("|{}".format(s), file=f) - - # Loop over other tags of the species database - for t in keys_to_print: - - if "WD_KcScaleFac" in t or "WD_RainoutEff" in t: - if t in spc_db.keys(): - print("|{}".format(spc_db[t][0]), file=f) - print("|{}".format(spc_db[t][1]), file=f) - print("|{}".format(spc_db[t][2]), file=f) - else: - print("| -", file=f) - print("| -", file=f) - print("| -", file=f) - - else: - if t in spc_db.keys(): - print("|{}".format(spc_db[t]), file=f) - else: - print("| -", file=f) - - # Close the file - f.close() - - -def main(): - """ - Main program. Parses arguments and calls the proper routine - to create the table in MediaWiki format - """ - - # Parse arguments - n_args = len(sys.argv) - if n_args == 0 or n_args > 2: - msg = "Usage: species2wiki.py [species|henry|wetdep|drydep]" - raise ValueError(msg) - table = sys.argv[1].upper() - - # Read the YAML file into a dict - try: - metadata = yaml.load(open(yaml_file), Loader=yaml.FullLoader) - except FileNotFoundError: - msg = "Could not find filename: {}".format(filename) - raise FileNotFoundError(msg) - - # Print the selected table in MediaWiki format - if "SPECIES" in table: - print_species_table(metadata) - elif "HENRY" in table: - print_henry_table(metadata) - elif "WETDEP" in table: - print_wetdep_table(metadata) - elif "DRYDEP" in table: - print_drydep_table(metadata) - else: - msg = "Argument must be one of species|henry|wetdep|drydep!" - raise ValueError(msg) - - -if __name__ == "__main__": - main() diff --git a/gcpy/__init__.py b/gcpy/__init__.py index 8d4d6e0d..62335aba 100644 --- a/gcpy/__init__.py +++ b/gcpy/__init__.py @@ -1,28 +1,27 @@ -''' -GCPY initialization script. Imports nested packages for convenience. -''' +""" +GCPy import script +""" -# Figure this out later -try: - from ._version import __version__ -except ImportError: - raise ImportError('gcpy was not properly installed; some functionality ' - 'may be not work. If installing from source code, ' - 'please re-install in place by running\n' - '$ pip install -e .' - '\nElse, please reinstall using your package manager.') +from .benchmark import * +from .examples import * -from .util import * -from .date_time import * -from .units import * -from .ste_flux import * -from .regrid import * -from .plot import * -from .oh_metrics import * -from .mean_oh_from_logs import * -from .grid import * -from .constants import * +from .append_grid_corners import * +from .benchmark_funcs import * +from .budget_ox import * from .budget_tt import * -from .benchmark import * +from .constants import * +from .cstools import * +from .date_time import * from .file_regrid import * +from .grid import * from .grid_stretching_transforms import * +from .mean_oh_from_logs import * +from .oh_metrics import * +from .plot import * +from .raveller_1D import * +from .regrid import * +from .regrid_restart_file import * +from .ste_flux import * +from .units import * +from .util import * +from ._version import * diff --git a/gcpy/_version.py b/gcpy/_version.py index 09ca992f..85a24b43 100644 --- a/gcpy/_version.py +++ b/gcpy/_version.py @@ -1,2 +1,2 @@ -__version__ = '1.3.2' +__version__ = '1.4.0' diff --git a/gcpy/benchmark/README.md b/gcpy/benchmark/README.md new file mode 100644 index 00000000..5942456e --- /dev/null +++ b/gcpy/benchmark/README.md @@ -0,0 +1,11 @@ +# README.md (gcpy/benchmark) + +This directory contains development materials for GEOS-Chem benchmarking. + +| File or folder | Description | +| -------------- | ----------- | +| `cloud/` | Contains template config files for benchmarks on the AWS cloud platform. | +| `config/` | Contains configuration files with options for 1-month and 1-year benchmarks. | +| `modules/` | Contains scripts for creating 1-year benchmarks. These are imported by the `run.benchmark.py` script. | +| `plot_driver.sh` | Script to submit the `run_benchmark.py` script to a computational queue using the SLURM scheduler. | +| `run_benchmark.py` | Driver script for GEOS-Chem benchmarks (1-hour, 1-month, 1-year). | diff --git a/gcpy/benchmark/__init__.py b/gcpy/benchmark/__init__.py new file mode 100644 index 00000000..53968276 --- /dev/null +++ b/gcpy/benchmark/__init__.py @@ -0,0 +1,6 @@ +""" +GCPy import script +""" +from .modules import * + +from .run_benchmark import * diff --git a/gcpy/benchmark/benchmark_slurm.sh b/gcpy/benchmark/benchmark_slurm.sh new file mode 100755 index 00000000..7b8992d0 --- /dev/null +++ b/gcpy/benchmark/benchmark_slurm.sh @@ -0,0 +1,43 @@ +#!/bin/bash + +#SBATCH -c 8 +#SBATCH -N 1 +#SBATCH -t 0-4:00 +#SBATCH -p seas_compute,shared +#SBATCH --mem=100000 +#SBATCH --mail-type=END + +#============================================================================ +# This us a sample SLURM script that you can use to run the GCPy +# benchmark plotting code as a SLURM batch job. +# +# You can modify the SLURM parameters above for your setup. +# +# Tip: Using less cores can reduce the amount of memory required. +#============================================================================ + +# Apply all bash initialization settings +. ~/.bashrc + +# Make sure to set multiple threads; Joblib will use multiple +# cores to parallelize certain plotting operations. +export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK +export OMP_STACKSIZE=500m + +# Turn on Python environment (edit for your setup) +mamba activate gcpy_env + +# Specify a YAML file with benchmark options +# Uncomment the file that you wish: +#config="1mo_benchmark.yml" +config="1yr_fullchem_benchmark.yml" +#config="1yr_tt_benchmark.yml" + +# Call the run_benchmark script to make the plots +python -m gcpy.benchmark.run_benchmark "${config}" > benchmark.log 2>&1 + +# Turn off python environment +mamba deactivate + +exit 0 + diff --git a/gcpy/benchmark/cloud/README.md b/gcpy/benchmark/cloud/README.md new file mode 100644 index 00000000..d351f7a0 --- /dev/null +++ b/gcpy/benchmark/cloud/README.md @@ -0,0 +1,8 @@ +# README.md for gcpy/benchmark/cloud + +This folder contains configuration files for benchmarks that run on the Amazon Web Services cloud platform. + +| File | Description | +| -------------------------- | ------------- | +| template.1hr_benchmark.yml | Template configuration file for 1-hour benchmarks | +| template.1mo_benchmark.yml | Template configuration file for 1-month benchmarks | diff --git a/gcpy/benchmark/cloud/template.1hr_benchmark.yml b/gcpy/benchmark/cloud/template.1hr_benchmark.yml new file mode 100644 index 00000000..257d22a5 --- /dev/null +++ b/gcpy/benchmark/cloud/template.1hr_benchmark.yml @@ -0,0 +1,130 @@ +--- +# ===================================================================== +# Benchmark configuration file (**EDIT AS NEEDED**) +# customize in the following manner: +# +# (1) Edit the path variables so that they point to folders +# containing model data +# (2) Edit the version strings for each benchmark simulation +# (3) Edit the switches that turn on/off creating of plots and +# tables as well as other plotting options +# (4) If necessary, edit labels for the dev and ref versions +# +# Note: When doing GCHP vs GCC comparisions gchp_dev will be compared +# to gcc_dev (not gcc_ref!). This ensures consistency in version names +# when doing GCHP vs GCC diff-of-diffs. +# ===================================================================== +# +# Configuration for 1-hour FullChemBenchmark +# +# paths: +# main_dir: High-level directory containing ref & dev rundirs +# results_dir: Directory where plots/tables will be created +# weights_dir: Path to regridding weights +# spcdb_dir: Folder in which the species_database.yml file is +# located. If set to "default", then will look for +# species_database.yml in one of the Dev rundirs. +# +paths: + main_dir: ${GEOSCHEM_BENCHMARK_WORKING_DIR} + results_dir: BenchmarkResults + weights_dir: ${GEOSCHEM_BENCHMARK_WORKING_DIR}/weights + spcdb_dir: default +# +# data: Contains configurations for ref and dev runs +# version: Version string (must not contain spaces) +# dir: Path to run directory +# outputs_subdir: Subdirectory w/ GEOS-Chem diagnostic files +# restarts_subdir: Subdirectory w/ GEOS-Chem restarts +# bmk_start: Simulation start date (YYYY-MM-DDThh:mm:ss) +# bmk_end: Simulation end date (YYYY-MM-DDThh:mm:ss) +# resolution: GCHP resolution string +# +data: + ref: + gcc: + version: ${GEOSCHEM_BENCHMARK_REF_PRIMARY_KEY} + dir: ref-gcc/run-directory + outputs_subdir: OutputDir + restarts_subdir: Restarts + bmk_start: "2019-07-01T00:00:00" + bmk_end: "2019-07-01T01:00:00" + gchp: + version: ${GEOSCHEM_BENCHMARK_REF_PRIMARY_KEY} + dir: ref-gchp/run-directory + outputs_subdir: OutputDir + restarts_subdir: Restarts + bmk_start: "2019-07-01T00:00:00" + bmk_end: "2019-07-01T01:00:00" + is_pre_14.0: False + resolution: c24 + dev: + gcc: + version: ${GEOSCHEM_BENCHMARK_DEV_PRIMARY_KEY} + dir: dev-gcc/run-directory + outputs_subdir: OutputDir + restarts_subdir: Restarts + bmk_start: "2019-07-01T00:00:00" + bmk_end: "2019-07-01T01:00:00" + gchp: + version: ${GEOSCHEM_BENCHMARK_DEV_PRIMARY_KEY} + dir: dev-gchp/run-directory + outputs_subdir: OutputDir + restarts_subdir: Restarts + bmk_start: "2019-07-01T00:00:00" + bmk_end: "2019-07-01T01:00:00" + is_pre_14.0: False + resolution: c24 +# +# options: Customizes the benchmark plot output +# +options: + # + # bmk_type: Specifies the type of benchmark + # + bmk_type: FullChemBenchmark + # + # comparisons: Specifies the comparisons to perform. + # + comparisons: + gcc_vs_gcc: + run: False + dir: GCC_version_comparison + tables_subdir: Tables + gchp_vs_gcc: + run: False + dir: GCHP_GCC_comparison + tables_subdir: Tables + gchp_vs_gchp: + run: False + dir: GCHP_version_comparison + tables_subdir: Tables + gchp_vs_gcc_diff_of_diffs: + run: False + dir: GCHP_GCC_diff_of_diffs + # + # outputs: Specifies the plots and tables to generate + # + outputs: + plot_conc: False + plot_emis: False + emis_table: True + plot_jvalues: False + plot_aod: False + mass_table: True + mass_accum_table: False + ops_budget_table: False + OH_metrics: True + ste_table: True # GCC only + summary_table: True + plot_options: + by_spc_cat: True + by_hco_cat: True + # + # n_cores: Specify the number of cores to use. + # -1: Use $OMP_NUM_THREADS cores + # -2: Use $OMP_NUM_THREADS - 1 cores + # -N: Use $OMP_NUM_THREADS - (N-1) cores + # 1: Disable parallelization (use a single core) + # + n_cores: -1 diff --git a/gcpy/benchmark/cloud/template.1mo_benchmark.yml b/gcpy/benchmark/cloud/template.1mo_benchmark.yml new file mode 100644 index 00000000..fad5d6d0 --- /dev/null +++ b/gcpy/benchmark/cloud/template.1mo_benchmark.yml @@ -0,0 +1,130 @@ +--- +# ===================================================================== +# Benchmark configuration file (**EDIT AS NEEDED**) +# customize in the following manner: +# +# (1) Edit the path variables so that they point to folders +# containing model data +# (2) Edit the version strings for each benchmark simulation +# (3) Edit the switches that turn on/off creating of plots and +# tables as well as other plotting options +# (4) If necessary, edit labels for the dev and ref versions +# +# Note: When doing GCHP vs GCC comparisions gchp_dev will be compared +# to gcc_dev (not gcc_ref!). This ensures consistency in version names +# when doing GCHP vs GCC diff-of-diffs. +# ===================================================================== +# +# Configuration for 1-month FullChemBenchmark +# +# paths: +# main_dir: High-level directory containing ref & dev rundirs +# results_dir: Directory where plots/tables will be created +# weights_dir: Path to regridding weights +# spcdb_dir: Folder in which the species_database.yml file is +# located. If set to "default", then will look for +# species_database.yml in one of the Dev rundirs. +# +paths: + main_dir: ${GEOSCHEM_BENCHMARK_WORKING_DIR} + results_dir: BenchmarkResults + weights_dir: ${GEOSCHEM_BENCHMARK_WORKING_DIR}/weights + spcdb_dir: default +# +# data: Contains configurations for ref and dev runs +# version: Version string (must not contain spaces) +# dir: Path to run directory +# outputs_subdir: Subdirectory w/ GEOS-Chem diagnostic files +# restarts_subdir: Subdirectory w/ GEOS-Chem restarts +# bmk_start: Simulation start date (YYYY-MM-DDThh:mm:ss) +# bmk_end: Simulation end date (YYYY-MM-DDThh:mm:ss) +# resolution: GCHP resolution string +# +data: + ref: + gcc: + version: ${GEOSCHEM_BENCHMARK_REF_PRIMARY_KEY} + dir: ref-gcc/run-directory + outputs_subdir: OutputDir + restarts_subdir: Restarts + bmk_start: "2019-07-01T00:00:00" + bmk_end: "2019-08-01T00:00:00" + gchp: + version: ${GEOSCHEM_BENCHMARK_REF_PRIMARY_KEY} + dir: ref-gchp/run-directory + outputs_subdir: OutputDir + restarts_subdir: Restarts + bmk_start: "2019-07-01T00:00:00" + bmk_end: "2019-08-01T00:00:00" + is_pre_14.0: False + resolution: c24 + dev: + gcc: + version: ${GEOSCHEM_BENCHMARK_DEV_PRIMARY_KEY} + dir: dev-gcc/run-directory + outputs_subdir: OutputDir + restarts_subdir: Restarts + bmk_start: "2019-07-01T00:00:00" + bmk_end: "2019-08-01T00:00:00" + gchp: + version: ${GEOSCHEM_BENCHMARK_DEV_PRIMARY_KEY} + dir: dev-gchp/run-directory + outputs_subdir: OutputDir + restarts_subdir: Restarts + bmk_start: "2019-07-01T00:00:00" + bmk_end: "2019-08-01T00:00:00" + is_pre_14.0: False + resolution: c24 +# +# options: Customizes the benchmark plot output +# +options: + # + # bmk_type: Specifies the type of benchmark + # + bmk_type: FullChemBenchmark + # + # comparisons: Specifies the comparisons to perform. + # + comparisons: + gcc_vs_gcc: + run: False + dir: GCC_version_comparison + tables_subdir: Tables + gchp_vs_gcc: + run: False + dir: GCHP_GCC_comparison + tables_subdir: Tables + gchp_vs_gchp: + run: False + dir: GCHP_version_comparison + tables_subdir: Tables + gchp_vs_gcc_diff_of_diffs: + run: False + dir: GCHP_GCC_diff_of_diffs + # + # outputs: Specifies the plots and tables to generate + # + outputs: + plot_conc: True + plot_emis: True + emis_table: True + plot_jvalues: True + plot_aod: True + mass_table: True + mass_accum_table: False + ops_budget_table: False + OH_metrics: True + ste_table: True # GCC only + summary_table: True + plot_options: + by_spc_cat: True + by_hco_cat: True + # + # n_cores: Specify the number of cores to use. + # -1: Use $OMP_NUM_THREADS cores + # -2: Use $OMP_NUM_THREADS - 1 cores + # -N: Use $OMP_NUM_THREADS - (N-1) cores + # 1: Disable parallelization (use a single core) + # + n_cores: -1 diff --git a/benchmark/1mo_benchmark.yml b/gcpy/benchmark/config/1mo_benchmark.yml similarity index 71% rename from benchmark/1mo_benchmark.yml rename to gcpy/benchmark/config/1mo_benchmark.yml index 3a15f591..637f9145 100644 --- a/benchmark/1mo_benchmark.yml +++ b/gcpy/benchmark/config/1mo_benchmark.yml @@ -1,17 +1,21 @@ -&--- +--- # ===================================================================== # Benchmark configuration file (**EDIT AS NEEDED**) # customize in the following manner: -# (1) Edit the path variables so that they point to folders w/ model data +# +# (1) Edit the path variables so that they point to folders +# containing model data # (2) Edit the version strings for each benchmark simulation -# (3) Edit the switches that turn on/off creating of plots and tables +# (3) Edit the switches that turn on/off creating of plots and +# tables as well as other plotting options # (4) If necessary, edit labels for the dev and ref versions +# # Note: When doing GCHP vs GCC comparisions gchp_dev will be compared # to gcc_dev (not gcc_ref!). This ensures consistency in version names -# when doing GCHP vs GCC diff-of-diffs (mps, 6/27/19) +# when doing GCHP vs GCC diff-of-diffs. # ===================================================================== # -# Configuration for 1 month FullChemBenchmark +# Configuration for 1-month FullChemBenchmark # # paths: # main_dir: High-level directory containing ref & dev rundirs @@ -22,9 +26,9 @@ # species_database.yml in one of the Dev rundirs. # paths: - main_dir: /n/holyscratch01/external_repos/GEOS-CHEM/gcgrid/geos-chem/validation/gcpy_test_data/1mon + main_dir: /path/to/benchmark/main/dir results_dir: /path/to/BenchmarkResults - weights_dir: /n/holyscratch01/external_repos/GEOS-CHEM/gcgrid/gcdata/ExtData/GCHP/RegriddingWeights + weights_dir: /n/holyscratch01/external_repos/GEOS-CHEM/gcgrid/data/ExtData/GCHP/RegriddingWeights spcdb_dir: default # # data: Contains configurations for ref and dev runs @@ -43,7 +47,7 @@ data: dir: GCC_ref outputs_subdir: OutputDir restarts_subdir: Restarts - bmk_start: "2019-07-01T00:00:00" + bmk_start: "2019-07-01T00:00:00" bmk_end: "2019-08-01T00:00:00" gchp: version: GCHP_ref @@ -52,7 +56,6 @@ data: restarts_subdir: Restarts bmk_start: "2019-07-01T00:00:00" bmk_end: "2019-08-01T00:00:00" - is_pre_13.1: False is_pre_14.0: False resolution: c24 dev: @@ -61,43 +64,47 @@ data: dir: GCC_dev outputs_subdir: OutputDir restarts_subdir: Restarts - bmk_start: "2019-07-01T00:00:00" + bmk_start: "2019-07-01T00:00:00" bmk_end: "2019-08-01T00:00:00" gchp: version: GCHP_dev dir: GCHP_dev outputs_subdir: OutputDir restarts_subdir: Restarts - bmk_start: "2019-07-01T00:00:00" + bmk_start: "2019-07-01T00:00:00" bmk_end: "2019-08-01T00:00:00" - is_pre_13.1: False is_pre_14.0: False resolution: c24 # -# options: Specify the types of comparisons to perform -# +# options: Customizes the benchmark plot output +# options: + # + # bmk_type: Specifies the type of benchmark + # bmk_type: FullChemBenchmark - gcpy_test: True # Specify if this is a gcpy test validation run + # + # comparisons: Specifies the comparisons to perform. + # comparisons: - gcc_vs_gcc: - run: True # True to run this comparison + gcc_vs_gcc: + run: True dir: GCC_version_comparison tables_subdir: Tables - gchp_vs_gcc: + gchp_vs_gcc: run: True - dir: GCHP_GCC_comparison + dir: GCHP_GCC_comparison tables_subdir: Tables - gchp_vs_gchp: + gchp_vs_gchp: run: True dir: GCHP_version_comparison tables_subdir: Tables - gchp_vs_gcc_diff_of_diffs: + gchp_vs_gcc_diff_of_diffs: run: True dir: GCHP_GCC_diff_of_diffs -# -# outputs: Types of output to generate (plots/tables) -# + # + # outputs: Specifies the plots and tables to generate + # outputs: plot_conc: True plot_emis: True @@ -105,10 +112,19 @@ options: plot_jvalues: True plot_aod: True mass_table: True + mass_accum_table: False ops_budget_table: False OH_metrics: True ste_table: True # GCC only summary_table: True - plot_options: # Plot concentrations and emissions by category? + plot_options: by_spc_cat: True by_hco_cat: True + # + # n_cores: Specify the number of cores to use. + # -1: Use $OMP_NUM_THREADS cores + # -2: Use $OMP_NUM_THREADS - 1 cores + # -N: Use $OMP_NUM_THREADS - (N-1) cores + # 1: Disable parallelization (use a single core) + # + n_cores: -1 diff --git a/benchmark/1yr_ch4_benchmark.yml b/gcpy/benchmark/config/1yr_ch4_benchmark.yml similarity index 70% rename from benchmark/1yr_ch4_benchmark.yml rename to gcpy/benchmark/config/1yr_ch4_benchmark.yml index a83ba5e2..29af5bfa 100644 --- a/benchmark/1yr_ch4_benchmark.yml +++ b/gcpy/benchmark/config/1yr_ch4_benchmark.yml @@ -2,16 +2,20 @@ # ===================================================================== # Benchmark configuration file (**EDIT AS NEEDED**) # customize in the following manner: -# (1) Edit the path variables so that they point to folders w/ model data +# +# (1) Edit the path variables so that they point to folders +# containing model data # (2) Edit the version strings for each benchmark simulation -# (3) Edit the switches that turn on/off creating of plots and tables +# (3) Edit the switches that turn on/off creating of plots and +# tables as well as other plotting options # (4) If necessary, edit labels for the dev and ref versions +# # Note: When doing GCHP vs GCC comparisions gchp_dev will be compared # to gcc_dev (not gcc_ref!). This ensures consistency in version names -# when doing GCHP vs GCC diff-of-diffs (mps, 6/27/19) +# when doing GCHP vs GCC diff-of-diffs. # ===================================================================== # -# Configuration for 1yr CH4Benchmark +# Configuration for 1-year CH4Benchmark # # paths: # main_dir: High-level directory containing ref & dev rundirs @@ -22,8 +26,8 @@ # species_database.yml in one of the Dev rundirs. # paths: - main_dir: /n/holyscratch01/external_repos/GEOS-CHEM/gcgrid/geos-chem/validation/gcpy_test_data/1yr_fullchem - results_dir: /path/to/BenchmarkResults + main_dir: /path/to/benchmark/main/dir # EDIT AS NEEDED + results_dir: /path/to/BenchmarkResults # EDIT AS NEEDED weights_dir: /n/holyscratch01/external_repos/GEOS-CHEM/gcgrid/data/ExtData/GCHP/RegriddingWeights spcdb_dir: default # @@ -52,12 +56,11 @@ data: restarts_subdir: Restarts bmk_start: "2019-01-01T00:00:00" bmk_end: "2020-01-01T00:00:00" - is_pre_13.1: False # for gcpy_test_data, edit if needed - is_pre_14.0: False # for gcpy_test_data, edit if needed - resolution: c24 # for gcpy_test_data, edit if needed + is_pre_14.0: False + resolution: c24 dev: gcc: - version: GCC_dev + version: GCC_dev dir: GCC_dev outputs_subdir: OutputDir restarts_subdir: Restarts @@ -70,34 +73,38 @@ data: restarts_subdir: Restarts bmk_start: "2019-01-01T00:00:00" bmk_end: "2020-01-01T00:00:00" - is_pre_13.1: False # for gcpy_test_data, edit if needed - is_pre_14.0: False # for gcpy_test_data, edit if needed - resolution: c24 # for gcpy_test_data, edit if needed + is_pre_14.0: False + resolution: c24 +# +# options: Customizes the benchmark plot output # -# options: Specify the types of comparisons to perform -# options: + # + # bmk_type: Specifies the type of benchmark + # bmk_type: CH4Benchmark - gcpy_test: False # Specify if this is a gcpy test validation run + # + # comparisons: Specifies the comparisons to perform. + # comparisons: - gcc_vs_gcc: - run: True # True to run this comparison + gcc_vs_gcc: + run: True dir: GCC_version_comparison tables_subdir: Tables - gchp_vs_gcc: - run: False - dir: GCHP_GCC_comparison + gchp_vs_gcc: + run: True + dir: GCHP_GCC_comparison tables_subdir: Tables - gchp_vs_gchp: - run: False + gchp_vs_gchp: + run: True dir: GCHP_version_comparison tables_subdir: Tables gchp_vs_gcc_diff_of_diffs: run: False dir: GCHP_GCC_diff_of_diffs -# -# outputs: Types of output to generate (plots/tables) -# + # + # outputs: Specifies the plots and tables to generate + # outputs: plot_conc: True plot_emis: True @@ -113,3 +120,11 @@ options: plot_options: by_spc_cat: True by_hco_cat: True + # + # n_cores: Specify the number of cores to use. + # -1: Use $OMP_NUM_THREADS cores + # -2: Use $OMP_NUM_THREADS - 1 cores + # -N: Use $OMP_NUM_THREADS - (N-1) cores + # 1: Disable parallelization (use a single core) + # + n_cores: -1 diff --git a/benchmark/1yr_fullchem_benchmark.yml b/gcpy/benchmark/config/1yr_fullchem_benchmark.yml similarity index 61% rename from benchmark/1yr_fullchem_benchmark.yml rename to gcpy/benchmark/config/1yr_fullchem_benchmark.yml index 747a0226..8f86efae 100644 --- a/benchmark/1yr_fullchem_benchmark.yml +++ b/gcpy/benchmark/config/1yr_fullchem_benchmark.yml @@ -1,31 +1,37 @@ -&--- +--- # ===================================================================== # Benchmark configuration file (**EDIT AS NEEDED**) # customize in the following manner: -# (1) Edit the path variables so that they point to folders w/ model data +# +# (1) Edit the path variables so that they point to folders +# containing model data # (2) Edit the version strings for each benchmark simulation -# (3) Edit the switches that turn on/off creating of plots and tables +# (3) Edit the switches that turn on/off creating of plots and +# tables as well as other plotting options # (4) If necessary, edit labels for the dev and ref versions +# # Note: When doing GCHP vs GCC comparisions gchp_dev will be compared # to gcc_dev (not gcc_ref!). This ensures consistency in version names -# when doing GCHP vs GCC diff-of-diffs (mps, 6/27/19) +# when doing GCHP vs GCC diff-of-diffs. # ===================================================================== # -# Configuration for 1yr FullChemBenchmark +# Configuration for 1-year FullChemBenchmark # # paths: -# main_dir: High-level directory containing ref & dev rundirs -# results_dir: Directory where plots/tables will be created -# weights_dir: Path to regridding weights -# spcdb_dir: Folder in which the species_database.yml file is -# located. If set to "default", then will look for -# species_database.yml in one of the Dev rundirs. +# main_dir: High-level directory containing ref & dev rundirs +# results_dir: Directory where plots/tables will be created +# weights_dir: Path to regridding weights +# spcdb_dir: Folder in which the species_database.yml file is +# located. If set to "default", then will look for +# species_database.yml in one of the Dev rundirs. +# obs_data_dir: Path to observational data (for models vs obs plots) # paths: - main_dir: /n/holyscratch01/external_repos/GEOS-CHEM/gcgrid/geos-chem/validation/gcpy_test_data/1yr_fullchem + main_dir: /path/to/benchmark/main/dir results_dir: /path/to/BenchmarkResults weights_dir: /n/holyscratch01/external_repos/GEOS-CHEM/gcgrid/data/ExtData/GCHP/RegriddingWeights spcdb_dir: default + obs_data_dir: /path/to/observational/data # # data: Contains configurations for ref and dev runs # version: Version string (must not contain spaces) @@ -42,7 +48,7 @@ data: version: GCC_ref dir: GCC_ref outputs_subdir: OutputDir - restarts_subdir: restarts + restarts_subdir: Restarts bmk_start: "2019-01-01T00:00:00" bmk_end: "2020-01-01T00:00:00" gchp: @@ -52,15 +58,14 @@ data: restarts_subdir: Restarts bmk_start: "2019-01-01T00:00:00" bmk_end: "2020-01-01T00:00:00" - is_pre_13.1: False # for gcpy_test_data, edit if needed - is_pre_14.0: True # for gcpy_test_data, edit if needed - resolution: c48 # for gcpy_test_data, edit if needed + is_pre_14.0: False + resolution: c24 dev: gcc: version: GCC_dev dir: GCC_dev outputs_subdir: OutputDir - restarts_subdir: restarts + restarts_subdir: Restarts bmk_start: "2019-01-01T00:00:00" bmk_end: "2020-01-01T00:00:00" gchp: @@ -70,34 +75,38 @@ data: restarts_subdir: Restarts bmk_start: "2019-01-01T00:00:00" bmk_end: "2020-01-01T00:00:00" - is_pre_13.1: False # for gcpy_test_data, edit if needed - is_pre_14.0: False # for gcpy_test_data, edit if needed - resolution: c24 # for gcpy_test_data, edit if needed + is_pre_14.0: False + resolution: c24 +# +# options: Customizes the benchmark plot output # -# options: Specify the types of comparisons to perform -# options: + # + # bmk_type: Specifies the type of benchmark + # bmk_type: FullChemBenchmark - gcpy_test: True # Specify if this is a gcpy test validation run + # + # comparisons: Specifies the comparisons to perform. + # comparisons: - gcc_vs_gcc: - run: True # True to run this comparison + gcc_vs_gcc: + run: True dir: GCC_version_comparison tables_subdir: Tables - gchp_vs_gcc: + gchp_vs_gcc: run: True - dir: GCHP_GCC_comparison + dir: GCHP_GCC_comparison tables_subdir: Tables - gchp_vs_gchp: + gchp_vs_gchp: run: True dir: GCHP_version_comparison tables_subdir: Tables gchp_vs_gcc_diff_of_diffs: run: True dir: GCHP_GCC_diff_of_diffs -# -# outputs: Types of output to generate (plots/tables) -# + # + # outputs: Specifies the plots and tables to generate + # outputs: plot_conc: True plot_emis: True @@ -110,6 +119,15 @@ options: Ox_budget_table: True ste_table: True # GCC only OH_metrics: True + plot_models_vs_obs: True plot_options: by_spc_cat: True by_hco_cat: True + # + # n_cores: Specify the number of cores to use. + # -1: Use $OMP_NUM_THREADS cores + # -2: Use $OMP_NUM_THREADS - 1 cores + # -N: Use $OMP_NUM_THREADS - (N-1) cores + # 1: Disable parallelization (use a single core) + # + n_cores: -1 diff --git a/benchmark/1yr_tt_benchmark.yml b/gcpy/benchmark/config/1yr_tt_benchmark.yml similarity index 71% rename from benchmark/1yr_tt_benchmark.yml rename to gcpy/benchmark/config/1yr_tt_benchmark.yml index d4b2f4ef..ffc01b1a 100644 --- a/benchmark/1yr_tt_benchmark.yml +++ b/gcpy/benchmark/config/1yr_tt_benchmark.yml @@ -1,17 +1,21 @@ -&--- +--- # ===================================================================== # Benchmark configuration file (**EDIT AS NEEDED**) # customize in the following manner: -# (1) Edit the path variables so that they point to folders w/ model data +# +# (1) Edit the path variables so that they point to folders +# containing model data # (2) Edit the version strings for each benchmark simulation -# (3) Edit the switches that turn on/off creating of plots and tables +# (3) Edit the switches that turn on/off creating of plots and +# tables as well as other plotting options # (4) If necessary, edit labels for the dev and ref versions +# # Note: When doing GCHP vs GCC comparisions gchp_dev will be compared # to gcc_dev (not gcc_ref!). This ensures consistency in version names -# when doing GCHP vs GCC diff-of-diffs (mps, 6/27/19) +# when doing GCHP vs GCC diff-of-diffs. # ===================================================================== # -# Configuration for 1 year TransportTracersBenchmark +# Configuration for 1-year TransportTracersBenchmark # # paths: # main_dir: High-level directory containing ref & dev rundirs @@ -22,7 +26,7 @@ # species_database.yml in one of the Dev rundirs. # paths: - main_dir: /n/holyscratch01/external_repos/GEOS-CHEM/gcgrid/geos-chem/validation/gcpy_test_data/1yr_transporttracer + main_dir: /path/to/benchmark/main/dir results_dir: /path/to/BenchmarkResults weights_dir: /n/holyscratch01/external_repos/GEOS-CHEM/gcgrid/data/ExtData/GCHP/RegriddingWeights spcdb_dir: default @@ -42,7 +46,7 @@ data: version: GCC_ref dir: GCC_ref outputs_subdir: OutputDir - restarts_subdir: restarts + restarts_subdir: Restarts bmk_start: "2019-01-01T00:00:00" bmk_end: "2020-01-01T00:00:00" gchp: @@ -52,15 +56,14 @@ data: restarts_subdir: Restarts bmk_start: "2019-01-01T00:00:00" bmk_end: "2020-01-01T00:00:00" - is_pre_13.1: True # for gcpy_test_data, edit if needed - is_pre_14.0: True # for gcpy_test_data, edit if needed - resolution: c48 # for gcpy_test_data, edit if needed + is_pre_14.0: False + resolution: c24 dev: gcc: - version: GCC_dev + version: GCC_dev dir: GCC_dev outputs_subdir: OutputDir - restarts_subdir: restarts + restarts_subdir: Restarts bmk_start: "2019-01-01T00:00:00" bmk_end: "2020-01-01T00:00:00" gchp: @@ -70,15 +73,19 @@ data: restarts_subdir: Restarts bmk_start: "2019-01-01T00:00:00" bmk_end: "2020-01-01T00:00:00" - is_pre_13.1: True # for gcpy_test_data, edit if needed - is_pre_14.0: True # for gcpy_test_data, edit if needed - resolution: c48 # for gcpy_test_data, edit if needed + is_pre_14.0: False + resolution: c24 # -# options: Specify the types of comparisons to perform +# options: Customizes the benchmark plot output # options: + # + # bmk_type: Specifies the type of benchmark + # bmk_type: TransportTracersBenchmark - gcpy_test: True + # + # comparisons: Specifies the comparisons to perform. + # comparisons: gcc_vs_gcc: run: True @@ -92,14 +99,25 @@ options: run: True dir: GCHP_version_comparison tables_subdir: Tables - # GCHP vs GCC diff of diffs not included in 1-yr tt benchmark -# -# outputs: Types of output to generate (plots/tables) -# + gchp_vs_gcc_diff_of_diffs: + run: False + dir: GCHP_GCC_diff_of_diffs + # + # outputs: Specifies the plots and tables to generate + # outputs: plot_conc: True plot_wetdep: True rnpbbe_budget: True operations_budget: False + mass_table: True ste_table: True cons_table: True + # + # n_cores: Specify the number of cores to use. + # -1: Use $OMP_NUM_THREADS cores + # -2: Use $OMP_NUM_THREADS - 1 cores + # -N: Use $OMP_NUM_THREADS - (N-1) cores + # 1: Disable parallelization (use a single core) + # + n_cores: -1 diff --git a/gcpy/benchmark/config/README.md b/gcpy/benchmark/config/README.md new file mode 100644 index 00000000..11aeacd3 --- /dev/null +++ b/gcpy/benchmark/config/README.md @@ -0,0 +1,10 @@ +# README.md (gcpy/benchmark/config) + +This directory contains configuration files (in YAML format) that control options for 1-month and 1-year benchmarks. These files are used as input to the `run_benchmark.py` script (located one directory level above). + +| File | Description | +| ---- | ----------- | +| `1mo_benchmark.yml` | Options for creating 1-month benchmark plots and tables (fullchem simulation) | +| `1yr_ch4_benchmark.yml` | Options for creating 1-year benchmark plots and tables (CH4 simulation) | +| `1yr_fullchem_benchmark.yml` | Options for creating 1-year benchmark plots & tables (fullchem simulation)| +| `1yr_tt_benchmark.yml` | Options for creating 1-year benchmark plots and tables (TransportTracers simulation) | diff --git a/gcpy/benchmark/modules/GC_72_vertical_levels.csv b/gcpy/benchmark/modules/GC_72_vertical_levels.csv new file mode 100644 index 00000000..6016af57 --- /dev/null +++ b/gcpy/benchmark/modules/GC_72_vertical_levels.csv @@ -0,0 +1,73 @@ +Lev,Eta Edge,Eta Mid,Altitude (km),Pressure (hPa) +1,,0.9925,0.058,1005.650 +2,,0.977456,0.189,990.408 +3,,0.96237,0.32,975.122 +4,,0.947285,0.454,959.837 +5,,0.9322,0.589,944.553 +6,,0.917116,0.726,929.268 +7,,0.902031,0.864,913.984 +8,,0.886948,1.004,898.701 +9,,0.871864,1.146,883.418 +10,,0.856781,1.29,868.135 +11,,0.841698,1.436,852.852 +12,,0.826616,1.584,837.570 +13,,0.809021,1.759,819.743 +14,,0.7864,1.988,796.822 +15,,0.761265,2.249,771.354 +16,,0.736134,2.517,745.890 +17,,0.711006,2.792,720.429 +18,,0.685878,3.074,694.969 +19,,0.654471,3.439,663.146 +20,,0.61679,3.896,624.967 +21,,0.579115,4.375,586.793 +22,,0.541449,4.879,548.628 +23,,0.503795,5.413,510.475 +24,,0.466153,5.98,472.335 +25,,0.428528,6.585,434.212 +26,,0.390927,7.237,396.112 +27,,0.353349,7.943,358.038 +28,,0.309854,8.846,313.966 +29,,0.263587,9.936,267.087 +30,,0.223772,11.021,226.745 +31,,0.190061,12.086,192.587 +32,,0.161513,13.134,163.661 +33,,0.137287,14.17,139.115 +34,,0.116695,15.198,118.250 +35,,0.099191,16.222,100.514 +36,,0.084313,17.243,85.439 +37,,0.0716,18.269,72.558 +38,,0.060682,19.309,61.496 +39,,0.051326,20.364,52.016 +40,,0.043326,21.438,43.910 +41,,0.036499,22.531,36.993 +42,,0.030673,23.648,31.089 +43,,0.025699,24.794,26.049 +44,,0.021467,25.971,21.761 +45,,0.017878,27.18,18.124 +46,,0.014844,28.423,15.050 +47,,0.012287,29.701,12.460 +48,,0.010141,31.015,10.285 +49,,0.008336,32.372,8.456 +50,,0.006818,33.782,6.918 +51,,0.005548,35.244,5.632 +52,,0.004492,36.759,4.562 +53,,0.003619,38.328,3.677 +54,,0.0029,39.951,2.948 +55,,0.002312,41.627,2.353 +56,,0.001834,43.355,1.868 +57,,0.001446,45.134,1.476 +58,,0.001135,46.962,1.160 +59,,0.000886,48.835,0.907 +60,,0.000687,50.754,0.706 +61,,0.000529,52.716,0.546 +62,,0.000405,54.717,0.420 +63,,0.000308,56.752,0.322 +64,,0.000232,58.816,0.245 +65,,0.000173,60.902,0.185 +66,,0.000128,63.004,0.140 +67,,0.000093,65.115,0.105 +68,,0.000067,67.243,0.078 +69,,0.000046,69.44,0.057 +70,,0.00003,71.812,0.040 +71,,0.000016,74.594,0.026 +72,,0.000005,78.146,0.015 diff --git a/gcpy/benchmark/modules/README.md b/gcpy/benchmark/modules/README.md new file mode 100644 index 00000000..0dead945 --- /dev/null +++ b/gcpy/benchmark/modules/README.md @@ -0,0 +1,9 @@ +# README.md (gcpy/benchmark/modules) + +This directory contains scripts for creating 1-year benchmark plots. These are imported by the `run_benchmark.py` script in the parent folder. + +| File | Description | +| ---- | ----------- | +| `benchmark_models_vs_obs.py` | Routines for creating plots of model outputs vs. observations | +| `run_1yr_fullchem_benchmark.py/` | Routines for creating plots and tables for 1-year fullchem benchmarks. | +| `run_1yr_tt_benchmark.py/` | Routines for creating plots and tables for 1-year TransportTracers benchmarks. | diff --git a/gcpy/benchmark/modules/__init__.py b/gcpy/benchmark/modules/__init__.py new file mode 100644 index 00000000..6a6f3f80 --- /dev/null +++ b/gcpy/benchmark/modules/__init__.py @@ -0,0 +1,3 @@ +""" +GCPy import script +""" diff --git a/gcpy/benchmark/modules/benchmark_models_vs_obs.py b/gcpy/benchmark/modules/benchmark_models_vs_obs.py new file mode 100644 index 00000000..06b0b07a --- /dev/null +++ b/gcpy/benchmark/modules/benchmark_models_vs_obs.py @@ -0,0 +1,1114 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +gcpy/benchmark/modules/benchmark_model_vs_obs.py + +Python functions to plot modeled data from 1-year fullchem benchmark +simulations against observations for the year 2019. At present, only +O3 plots are supported, but this can be extended in the future. + +Author: Matt Rowlinson + +Linted with PyLint and incorporated into GCPy +by Bob Yantosca +""" +import os +import glob +from datetime import datetime, timedelta +from matplotlib.backends.backend_pdf import PdfPages +from matplotlib.figure import Figure +import matplotlib.pyplot as plt +import pandas as pd +import numpy as np +import xarray as xr +from gcpy.constants import skip_these_vars +from gcpy.util import verify_variable_type, dataset_reader, make_directory +from gcpy.cstools import extract_grid, find_index, is_cubed_sphere + +def read_nas( + input_file, + verbose=False, + +): + """ + Read NASA Ames data files from EBAS (https://ebas-data.nilu.no) + Creates data frame of O3 values converted to ppb and dictionary + with key site information (name, lat, lon, altitude) + + Args: + ----- + input_file : str + Path to data file with observational data (e.g. sonde data). + + Keyword Args: + ------------- + verbose : bool + Toggles verbose printout on (True) or off (False). + Default value: False + + Returns: + -------- + obs_dataframe : pandas DataFrame + Dataframe containing observational data from input_file. + + obs_site_coords : dict + Dictionary containing formatted site name: lon, lat and altitude. + """ + verify_variable_type(input_file, str) + + if verbose: + print(f"read_nas: Reading {input_file}") + + with open(input_file, encoding='UTF-8') as the_file: + header = np.array( + [next(the_file) for x in range(155) ] + ) + n_hdr = int( + header[0].split(' ')[0] + ) + st_ymd = header[6].split(' ') + st_ymd = list( + filter( + None, + st_ymd + ) + ) + start_date = datetime( + int(st_ymd[0]), + int(st_ymd[1]), + int(st_ymd[2]) + ) + for line in header: + if 'Station name' in line: + site = line.split(':')[1:] + site = '_'.join(site).replace('\n','').\ + replace(' ',' ').replace('/','-') + site = site.replace('Atmospheric Observatory','') + site = site.replace(' Research Station','') + elif 'Station longitude:' in line: + lon = float(line.split(' ')[-1].replace('\n','')) + elif 'Station latitude:' in line: + lat = float(line.split(' ')[-1].replace('\n','')) + elif 'Station altitude:' in line: + alt = float(line.split(' ')[-2].replace('\n','')) + + file_hdr = np.loadtxt( + input_file, + skiprows=n_hdr + ) + obs_dataframe = pd.DataFrame( + file_hdr, + index=file_hdr[:,0] + ) + obs_dataframe, qcflag = find_times( + obs_dataframe, + start_date + ) + obs_dataframe = pd.DataFrame( + { + 'Value': obs_dataframe.values/1.99532748, + 'Flag': qcflag + }, + index=obs_dataframe.index + ) + obs_dataframe = obs_dataframe[obs_dataframe.Flag == 0.000] + obs_dataframe = obs_dataframe.loc['2019'] + obs_dataframe = obs_dataframe.resample('H').mean() + obs_dataframe = pd.DataFrame( + { + site: obs_dataframe.Value + }, + index=obs_dataframe.index + ) + obs_site_coords = { site: + { + 'lon': lon, + 'lat': lat, + 'alt': alt + } + } + + return obs_dataframe, obs_site_coords + + +def read_observational_data( + path, + verbose +): + """ + Reads the observational O3 data from EBAS + (taken from https://ebas-data.nilu.no/ on 15/05/2023) + + Loops over all data files (in NASA/Ames format) within + a folder and concatenates them into a single DataFrame. + + Args: + ----- + path : str + Path to the observational data directory + + verbose : bool + Toggles verbose printout on (True) or off (False). + Default value: False + + Returns: + -------- + obs_dataframe : pandas DataFrame + DataFrame object with the observational data (i.e. station + names, data, metadata). + + obs_site_coords : dict + Dictionary with coordinates of each observation site + """ + verify_variable_type(path, str) + + first = True + obs_site_coords = {} + dataframe = None + for infile in sorted(glob.glob(f"{path}/*nas")): + obs_dataframe, xyz = read_nas( + infile, + verbose=verbose + ) + if first: + dataframe = obs_dataframe + obs_site_coords.update(xyz) + first = False + else: + dataframe = pd.concat( + [dataframe, obs_dataframe], + axis=1 + ) + obs_site_coords.update(xyz) + + # If dataframe0 is undefined, the loop didn't execute... so throw error + if dataframe is None: + raise ValueError(f"Could not find data in {path}!") + + obs_dataframe = dataframe.groupby( + dataframe.columns, + axis=1 + ).max() + + return obs_dataframe, obs_site_coords + + +def read_model_data( + filepaths, + varname, + verbose=False, +): + """ + Reads model data from a netCDF file. Adds special handling to look + for species concentrations variable names starting with either + "SpeciesConcVV" or "SpeciesConc". This is necessary for backwards + compatitbility with GEOS-Chem output prior to version 14.1.0. + + Args: + ----- + filepaths : list of str + List of data files to read. + + varname : str or list of str + Variable name(s) to read from data files. + + Keyword Args: + ------------- + varbose : bool + Toggles verbose output on (True) or off (False). + Default value: False + + Returns: + -------- + dataarray : xarray DataArray + DataArray object containing data read from files + specified by the filepaths argument. + """ + + # Read the Ref and Dev model data + reader = dataset_reader( + multi_files=True, + verbose=verbose, + ) + + # Set temporary variable name for use below + varname_tmp = varname + + # First try reading the data as-is + try: + dataset = reader( + filepaths, + drop_variables=skip_these_vars, + data_vars=[varname_tmp] + ).load() + + # If we encounter a ValueError, it may be because the data is + # older # and may have e.g. SpeciesConc fields instead of + # SpeciesConcVV fields. Reset the varname_tmp and try again. + except ValueError: + varname_tmp = varname_tmp.replace("VV", "") + dataset = reader( + filepaths, + drop_variables=skip_these_vars, + data_vars=[varname_tmp] + ).load() + + # Rename to the original name to avid confusion with data + # from GEOS-Chem versions prior to 14.1.0 + with xr.set_options(keep_attrs=True): + dataset = dataset.rename({varname_tmp: varname}) + + # If we fail again, then throw an error! + except [FileNotFoundError, OSError, IOError] as exc: + msg = f"get_model_data: Could not read Ref data for {varname}!" + raise exc(msg) from exc + + # Create a DataArray object and convert to ppbv (if necessary) + with xr.set_options(keep_attrs=True): + dataarray = dataset[varname] + if "mol mol-1" in dataarray.attrs["units"]: + dataarray.values *= 1.0e9 + dataarray.attrs["units"] = "ppbv" + + return dataarray + + +def find_times( + obs_dataframe, + start_time +): + """ + Convert timestamps in nasa ames data files to python datetime + objects Set DataFrame index to the new datetime array + + Args: + ---------- + obs_dataframe : pandas DataFrame + DataFrame with O3 values from GAW site + + start_time : str + Reference start time for timestamp taken from nasa ames file + + Returns + ------ + obs_dataframe: pandas DataFrame + O3 in ppbV with datetime index + + qcflag : pandas Dataframe + QC flag with datetime index + """ + end_time = obs_dataframe[obs_dataframe.columns[1]] + time_x = [] + + for index in range(len(end_time)): + time_x.append(start_time + timedelta(days=end_time.values[index])) + + obs_dataframe.index = time_x + qcflag =obs_dataframe[obs_dataframe.columns[-1]] + obs_dataframe = obs_dataframe[obs_dataframe.columns[2]] + + return obs_dataframe, qcflag + + +def get_nearest_model_data_to_obs_cs( + gc_data, + gc_cs_grid, + gc_level_alts_m, + lon_value, + lat_value, + alt_value +): + """ + Returns GEOS-Chem model data (on a cubed-sphere grid) at the + grid box closest to an observation site location. + + Args: + ----- + gc_data : xarray DataArray + GEOS-Chem output to be processed + + gc_cs_grid: xarray Dataset + Coordinate arrays defining the cubed-sphere grid. + + gc_level_alts_m: pandas Series + Altitudes of GEOS-Chem levels in meters + + lon_value : float + GAW site longitude + + lat_value : float + GAW site latitude + + alt_value : float + GAW site altitude + + Keyword Args: + ------------- + + Returns: + -------- + dataframe: pandas.DataFrame + Model data closest to the observation site. + """ + verify_variable_type(gc_data, xr.DataArray) + verify_variable_type(gc_cs_grid, xr.Dataset) + verify_variable_type(gc_level_alts_m, pd.Series) + + # Prevent the latitude from getting too close to the N or S poles + lat_value = max(min(lat_value, 89.75), -89.75) + + # Indices (nf, yInd, xInd) of box nearest to observation site + cs_indices = find_index( + lat_value, + lon_value, + gc_cs_grid + ) + + # Index of nearest vertical levle to observation site + z_idx=( + np.abs( + gc_level_alts_m.values - float(alt_value) + ) + ).argmin() + + return gc_data.isel( + nf=cs_indices[0, 0], + Ydim=cs_indices[1, 0], + Xdim=cs_indices[2, 0], + lev=z_idx + ).to_dataframe() + + +def get_nearest_model_data_to_obs_ll( + gc_data, + gc_cs_grid, + gc_level_alts_m, + lon_value, + lat_value, + alt_value, + +): + """ + Returns GEOS-Chem model data (on a cubed-sphere grid) at the + grid box closest to an observation site location. + + Args: + ----- + gc_data : xarray DataSet + GEOS-Chem output to be processed + + gc_cs_grid : NoneType + Dummy variable (needed to make the argument list the + same as in get_nearest_model_data_to_obs_ll). + + gc_level_alts_m: pandas Series + Altitudes of GEOS-Chem levels in meters + + lon_value : float + GAW site longitude + + lat_value : float + GAW site latitude + + alt_value : float + GAW site altitude + + Returns: + -------- + dataframe: pandas.DataFrame + Model data closest to the observation site. + """ + verify_variable_type(gc_data, xr.DataArray) + verify_variable_type(gc_cs_grid, type(None)) + verify_variable_type(gc_level_alts_m, pd.Series) + + x_idx=( + np.abs( + gc_data.lon.values - float(lon_value) + ) + ).argmin() + + y_idx=( + np.abs( + gc_data.lat.values - float(lat_value) + ) + ).argmin() + + z_idx=( + np.abs( + gc_level_alts_m.values - float(alt_value) + ) + ).argmin() + + return gc_data.isel( + lon=x_idx, + lat=y_idx, + lev=z_idx + ).to_dataframe() + + +def which_finder_function( + data +): + """ + Returns the function that will be used to get the model data nearest + to the observation site. The function that is returned depends on + whether the model grid is lat-lon or cubed-sphere, as different + handling needs to be applied to each grid + + Args: + ----- + data : xarray.DataArray + Model data + + Returns: + -------- + A reference to the function that will read the data, depending + on whether the data is placed on a cubed-sphere grid or on + a lat-lon grid. + """ + verify_variable_type(data, (xr.DataArray, xr.Dataset)) + + if is_cubed_sphere(data): + return get_nearest_model_data_to_obs_cs + + return get_nearest_model_data_to_obs_ll + + +def get_geoschem_level_metadata( + filename=None, + search_key=None, + verbose=False, +): + """ + Reads a comma-separated variable (.csv) file with GEOS-Chem vertical + level metadata and returns it in a pandas DataFrame object. + + Args: + ----- + filename : str + Name of the comma-separated variable to read. + Default value: "__file__/GC_72_vertical_levels.csv" + + Keyword Args: + ------------- + search_key : str + If present, will return metadata that matches this value. + Default: None + + verbose : bool + Toggles verbose printout on (True) or off (False). + Default value: True + + Returns: + -------- + metadata : pandas DataFrame + Metadata for each of the GEOS-Chem vertical levels. + """ + if filename is None: + filename = os.path.join( + os.path.dirname(__file__), + "GC_72_vertical_levels.csv" + ) + + try: + if verbose: + print(f"get_geoschem_level_metadata: Reading {filename}") + metadata = pd.read_csv(filename) + except (IOError, OSError, FileNotFoundError) as exc: + msg = f"Could not read GEOS-Chem level metadata in {filename}!" + raise exc(msg) from exc + + if search_key is None: + return metadata + return metadata[search_key] + + +def prepare_data_for_plot( + obs_dataframe, + obs_site_coords, + obs_site_name, + ref_dataarray, + ref_cs_grid, + dev_dataarray, + dev_cs_grid, + gc_level_alts_m, + varname="SpeciesConcVV_O3", + **kwargs, +): + """ + Prepares data for passing to routine plot_single_frames as follows: + + (1) Computes the mean of observations at the given station site. + (2) Returns the GEOS-Chem Ref and Dev data at the gridbox closest + to the given station site. + (3) Creates the top-of-plot title for the given station site. + + Args: + ----- + obs_dataframe : pandas DataFrame + Observations at each station site. + + obs_site_coords : dict + Coordinates (lon, lat, alt) for each observation station site. + + obs_site_name : str + Name of the observation station site. + + ref_dataarray, dev_dataarray : xarray DataArray + Data from the Ref and Dev model versions. + + ref_cs_grid, dev_cs_grid : xarray.Dataset or NoneType + Dictionary containing the cubed-sphere grid definitions for + ref_dataarray and dev_dataarray (or None if ref_dataarray or + dev_dataarray are not placed on a cubed-sphere grid). + + gc_level_alts_m : pandas Series + Metadata pertaining to GEOS-Chem vertical levels + + Keyword Args (Optional) + ----------------------- + varname : str + GEOS-Chem diagnostic name for the Ref and Dev model data. + Default value: "SpeciesConcVV_O3" + + Returns: + -------- + obs_dataframe : pandas DataFrame + Meanb observational data at the given station site. + + ref_series, dev_series : pandas Series + Data from the Ref and Dev model versions at the + closest grid box to the observation station site. + + subplot_title : str + Plot title string for the given observation station site. + + subplot_ylabel : str + Label for the Y-axis (e.g. species name). + """ + verify_variable_type(obs_dataframe, pd.DataFrame) + verify_variable_type(obs_site_coords, dict) + verify_variable_type(obs_site_name, str) + verify_variable_type(ref_dataarray, xr.DataArray) + verify_variable_type(dev_dataarray, xr.DataArray) + verify_variable_type(ref_cs_grid, (xr.Dataset, type(None))) + verify_variable_type(dev_cs_grid, (xr.Dataset, type(None))) + verify_variable_type(gc_level_alts_m, pd.Series) + verify_variable_type(varname, str) + + # Get data from the Ref model closest to the data site + finder_function = which_finder_function(ref_dataarray) + ref_dataframe = finder_function( + ref_dataarray, + ref_cs_grid, + gc_level_alts_m, + lon_value=round(obs_site_coords[obs_site_name]['lon'], 2), + lat_value=round(obs_site_coords[obs_site_name]['lat'], 2), + alt_value=round(obs_site_coords[obs_site_name]['alt'], 1) + ) + + # Get data from the Dev model closest to the obs site + finder_function = which_finder_function(dev_dataarray) + dev_dataframe = finder_function( + dev_dataarray, + dev_cs_grid, + gc_level_alts_m, + lon_value=round(obs_site_coords[obs_site_name]['lon'], 2), + lat_value=round(obs_site_coords[obs_site_name]['lat'], 2), + alt_value=round(obs_site_coords[obs_site_name]['alt'], 1) + ) + + # Take the monthly mean of observations for plotting + # (since some observation sites have multiple months of data) + obs_dataframe = obs_dataframe.resample('M').mean() + + # Create the top title for the subplot for this observation site + # (use integer lon & lat values and N/S lat and E/W lon notation) + lon = int(round(obs_site_coords[obs_site_name]['lon'], 0)) + lat = int(round(obs_site_coords[obs_site_name]['lat'], 0)) + ystr = "S" + if lat >= 0: + ystr = "N" + xstr = "W" + if lon >= 0: + xstr = "E" + lon = abs(lon) + lat = abs(lat) + subplot_title = \ + f"{obs_site_name.strip()} ({lat}$^\\circ${ystr},{lon}$^\\circ${xstr})" + + # Y-axis label (i.e. species name) + subplot_ylabel = varname.split("_")[1] + " (ppbv)" + + return obs_dataframe, ref_dataframe[varname], dev_dataframe[varname], \ + subplot_title, subplot_ylabel + + +def plot_single_station( + fig, + rows_per_page, + cols_per_page, + subplot_index, + subplot_title, + subplot_ylabel, + obs_dataframe, + obs_site_name, + ref_series, + ref_label, + dev_series, + dev_label, + **kwargs +): + """ + Plots observation data vs. model data at a single station site. + + Args: + ----- + fig : matplotlib.figure.Figure + Matplotlib Figure object containing the plot. + + rows_per_page, cols_per_page : int + Number of rows and columns on each page of the plot. + + subplot_index : int + Index of the subplot on the page. Runs from 0 to + (cols_per_page * rows_per_page - 1). + + subplot_title, subplot_ylabel : str + Top title and y-axis label for each subplot + + obs_dataframe : pandas DataFrame + Observational data. + + obs_site_name: : str + Name of the observation station site. + + ref_series, dev_series : pandas Series + GEOS-Chem data at closest grid box to the observation + station site for the Ref and Dev model versions. + + ref_label, dev_label : str + Descriptive labels (e.g. version numbers) for the + GEOS-Chem Ref and Dev model versions. + """ + verify_variable_type(fig, Figure) + verify_variable_type(rows_per_page, int) + verify_variable_type(cols_per_page, int) + verify_variable_type(subplot_index, int) + verify_variable_type(subplot_title, str) + verify_variable_type(subplot_ylabel, str) + verify_variable_type(obs_dataframe, pd.DataFrame) + verify_variable_type(ref_series, pd.Series) + verify_variable_type(ref_label, str) + verify_variable_type(dev_series, pd.Series) + verify_variable_type(dev_label, str) + + # Create matplotlib axes object for this subplot + # axes_subplot is of type matplotlib.axes_.subplots.AxesSubplot + axes_subplot = fig.add_subplot( + rows_per_page, + cols_per_page, + subplot_index + 1, + ) + + # Set title for top of each frame + axes_subplot.set_title( + f"{subplot_title}", + weight='bold', + fontsize=7 + ) + + ## Plot observational data + axes_subplot.plot( + obs_dataframe.index, + obs_dataframe[obs_site_name], + color='k', + marker='^', + markersize=4, + lw=1, + label='Observations' + ) + + # Plot model data + axes_subplot.plot( + obs_dataframe.index, + ref_series, + color='r', + marker='o', + markersize=3, + lw=1, + label=ref_label + ) + axes_subplot.plot( + obs_dataframe.index, + dev_series, + color='g', + marker='s', + markersize=3, + lw=1, + label=dev_label + ) + + # Apply y-axis label only if this is a leftmost plot panel + if subplot_index == 0 or subplot_index % cols_per_page == 0: + axes_subplot.set_ylabel( + subplot_ylabel, + fontsize=8 + ) + + # Set X-axis and Y-axis ticks and labels + axes_subplot.set_xticks( + obs_dataframe.index + ) + # NOTE: In newer versions of matplotlib you can pass the + # xticklabels keyword to the set_xticks function. But we need + # to set the xticklabels separately for backwards compatibility + # with older matplotlib versions. -- Bob Yantosca (06 Jul 2023) + axes_subplot.set_xticklabels( + ['J','F','M','A','M','J','J','A','S','O','N','D'] + ) + axes_subplot.set_ylim( + 0, + 80 + ) + axes_subplot.set_yticks( + [0, 20, 40, 60, 80] + ) + axes_subplot.tick_params( + axis='both', + which='major', + labelsize=6 + ) + + +def plot_one_page( + pdf, + obs_dataframe, + obs_site_coords, + obs_site_names, + ref_dataarray, + ref_label, + ref_cs_grid, + dev_dataarray, + dev_label, + dev_cs_grid, + gc_level_alts_m, + rows_per_page=3, + cols_per_page=3, + varname="SpeciesConcVV_O3", + **kwargs +): + """ + Plots a single page of models vs. observations. + + Args: + ----- + obs_dataframe : pandas DataFrame + Observations at each station site. + + obs_site_coords : dict + Coordinates (lon, lat, alt) for each observation station site. + + obs_site_names : list of str + Names of observation station sites that fit onto a single page. + + ref_dataarray, dev_dataarray : xarray DataArray + Data from the Ref and Dev model versions. + + ref_label, dev_label: str + Labels describing the Ref and Dev datasets (e.g. version numbers) + + ref_cs_grid, dev_cs_grid : xarray.Dataset or NoneType + Dictionary containing the cubed-sphere grid definitions for + ref_dataarray and dev_dataarray (or None if ref_dataarray or + dev_dataarray are not placed on a cubed-sphere grid). + + gc_level_alts_m : pandas DataFrame + Metadata pertaining to GEOS-Chem vertical levels + + Keyword Args: + ------------- + + rows_per_page, cols_per_page : int + Number of rows and columns to plot on a single page. + Default values: 3 rows, 3 columns + + varname : str + Variable name for GEOS-Chem diagnostic data. + Default value: "SpeciesConcVV_O3" + + verbose : bool + Toggles verbose printout on (True) or off (False). + Default value: False + """ + verify_variable_type(obs_dataframe, pd.DataFrame) + verify_variable_type(obs_site_coords, dict) + verify_variable_type(obs_site_names, list) + verify_variable_type(ref_dataarray, xr.DataArray) + verify_variable_type(ref_label, str) + verify_variable_type(ref_cs_grid, (xr.Dataset, type(None))) + verify_variable_type(dev_dataarray, xr.DataArray) + verify_variable_type(dev_label, str) + verify_variable_type(dev_cs_grid, (xr.Dataset, type(None))) + verify_variable_type(gc_level_alts_m, pd.Series) + + # Define a new matplotlib.figure.Figure object for this page + # Landscape width: 11" x 8" + fig = plt.figure(figsize=(11, 8)) + fig.tight_layout() + + # Loop over all of the stations that fit on the page + for subplot_index, obs_site_name in enumerate(obs_site_names): + + # Find the model Ref & Dev data closest to the observational + # station site. Also take monthly average of observations, + obs_dataframe, \ + ref_series, dev_series, \ + subplot_title, subplot_ylabel \ + = prepare_data_for_plot( + obs_dataframe, # pandas.DataFrame + obs_site_coords, # dict + obs_site_name, # str + ref_dataarray, # xarray.DataArray + ref_cs_grid, # dict or none + dev_dataarray, # xarray.DataArray + dev_cs_grid, # dict or none + gc_level_alts_m, # pandas.Series + varname=varname, # str + **kwargs + ) + + # Plot models vs. observation for a single station site + plot_single_station( + fig, # matplotlib.figure.Figure + rows_per_page, # int + cols_per_page, # int + subplot_index, # int + subplot_title, # str + subplot_ylabel, # str + obs_dataframe, # pandas.Dataframe + obs_site_name, # str + ref_series, # pandas.Series + ref_label, # str + dev_series, # pandas.Series + dev_label, # str + **kwargs + ) + + # Add extra spacing around plots + plt.subplots_adjust( + hspace=0.4, + top=0.9 + ) + + # Add top-of-page legend + plt.legend( + ncol=3, + bbox_to_anchor=(0.5, 0.98), + bbox_transform=fig.transFigure, + loc='upper center' + ) + + # Save this page to the PDF file + pdf.savefig(fig) + + +def plot_models_vs_obs( + obs_dataframe, + obs_site_coords, + ref_dataarray, + ref_label, + dev_dataarray, + dev_label, + gc_level_alts_m, + varname="SpeciesConcVV_O3", + dst="./benchmark", + **kwargs +): + """ + Plots models vs. observations using a 3 rows x 3 column layout. + + Args: + ----- + obs_dataframe : pandas DataFrame + Observations at each station site. + + obs_site_coords : dict + Coordinates (lon, lat, alt) for each observation station site. + + ref_dataarray, dev_dataarray : xarray DataArray + Data from the Ref and Dev model versions. + + ref_label, dev_label: str + Labels describing the Ref and Dev datasets (e.g. version numbers) + + gc_level_alts_m : pandas DataFrame + Metadata pertaining to GEOS-Chem vertical levels + + Keyword Args: + ------------- + varname : str + Variable name for GEOS-Chem diagnostic data. + Default value: "SpeciesConcVV_O3" + + dst : str + Root folder where output will be created. + Default value: "./benchmark" + + verbose : bool + Toggles verbose printout on (True) or off (False). + Default value: False + """ + verify_variable_type(obs_dataframe, pd.DataFrame) + verify_variable_type(obs_site_coords, dict) + verify_variable_type(ref_dataarray, xr.DataArray) + verify_variable_type(ref_label, str) + verify_variable_type(dev_dataarray, xr.DataArray) + verify_variable_type(dev_label, str) + verify_variable_type(gc_level_alts_m, pd.Series) + + # Get the cubed-sphere grid definitions for Ref & Dev + # (will be returned as "None" for lat/lon grids) + ref_cs_grid = extract_grid(ref_dataarray) + dev_cs_grid = extract_grid(dev_dataarray) + + # Figure setup + plt.style.use('seaborn-darkgrid') + rows_per_page = 3 + cols_per_page = 3 + plots_per_page = rows_per_page * cols_per_page + + # Open the plot as a PDF document + pdf_file = f"{dst}/models_vs_obs.surface.{varname.split('_')[1]}.pdf" + pdf = PdfPages(pdf_file) + + # Sort station sites N to S latitude order according to: + # https://www.geeksforgeeks.org/python-sort-nested-dictionary-by-key/ + # NOTE: obs_site_names will be a MultiIndex list (a list of tuples) + obs_site_names = sorted( + obs_site_coords.items(), + key = lambda x: x[1]['lat'], + reverse=True + ) + + # Convert obs_site_names from a MultiIndex list to a regular list + obs_site_names = [list(tpl)[0] for tpl in obs_site_names] + + # Loop over the number of obs sites that fit on a page + for start in range(0, len(obs_site_names), plots_per_page): + end = start + plots_per_page - 1 + + # Plot obs sites that fit on a single page + plot_one_page( + pdf, # PdfPages + obs_dataframe, # pandas.DataFrame + obs_site_coords, # dict + obs_site_names[start:end+1], # list of str + ref_dataarray, # xarray.DataArray + ref_label, # str + ref_cs_grid, # xarray.DataSet or NoneType + dev_dataarray, # xarray.DataArray + dev_label, # str + dev_cs_grid, # xarray.Dataset or NoneType + gc_level_alts_m, # pandas.Series + rows_per_page=rows_per_page, # int + cols_per_page=cols_per_page, # int + varname=varname, # str + **kwargs + ) + + # Close the PDF file after all pages are plotted. + pdf.close() + + +def make_benchmark_models_vs_obs_plots( + obs_filepaths, + ref_filepaths, + ref_label, + dev_filepaths, + dev_label, + varname="SpeciesConcVV_O3", + dst="./benchmark", + verbose=False, + overwrite=False +): + """ + Driver routine to create model vs. observation plots. + + Args: + ----- + obs_filepaths : str or list + Path(s) to the observational data. + + ref_filepaths, dev_filepaths: str or list + Path(s) to the Ref and Dev model versions to be compared. + + ref_label, dev_label : str + Descriptive labels (e.g. for version numbers) for the + Ref and Dev model data. + + Keyword Args (optional): + ------------------------ + varname : str + Variable name for model data to be plotted against + observations. Default value: "SpeciesConcVV_O3". + + dst : str + Path to the root folder where plots will be created. + + verbose : bool + Toggles verbose printout on (True) or off (False). + Default value: False + + overwrite : bool + Toggles whether plots should be overwritten (True) + or not (False). Default value: True + """ + verify_variable_type(obs_filepaths, (str, list)) + verify_variable_type(ref_filepaths, (str, list)) + verify_variable_type(ref_label, str) + verify_variable_type(dev_filepaths, (str, list)) + verify_variable_type(dev_label, str) + + # Create the destination folder + make_directory( + dst, + overwrite=overwrite + ) + + # Get altitude [m] of GEOS-Chem level edges + gc_level_alts_m = \ + get_geoschem_level_metadata( + search_key="Altitude (km)" + ) * 1.0e3 + + # Read the observational data + obs_dataframe, obs_site_coords = read_observational_data( + obs_filepaths, + verbose=verbose + ) + + # Read the model data + ref_dataarray = read_model_data( + ref_filepaths, + varname=varname + ) + dev_dataarray = read_model_data( + dev_filepaths, + varname=varname + ) + + # Plot data vs observations + plot_models_vs_obs( + obs_dataframe, # pandas.DataFrame + obs_site_coords, # dict + ref_dataarray, # xarray.DataArray + ref_label, # str + dev_dataarray, # xarray.DataArray + dev_label, # str + gc_level_alts_m, # pandas.Series + varname=varname, # str + dst=dst, # str + verbose=verbose # bool + ) diff --git a/benchmark/modules/run_1yr_fullchem_benchmark.py b/gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py old mode 100755 new mode 100644 similarity index 82% rename from benchmark/modules/run_1yr_fullchem_benchmark.py rename to gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py index 5cb9bb0a..7321c50a --- a/benchmark/modules/run_1yr_fullchem_benchmark.py +++ b/gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py @@ -25,11 +25,7 @@ (3) Make sure the /path/to/gcpy/benchmark is in your PYTHONPATH shell environment variable. - (4) If you wish to use the gcpy test data, then set "gcpy_test: True" - in 1yr_tt_benchmark.yml. If you wish to use actual GEOS-Chem - output data, then set "gcpy_test: False". - - (5) Type at the command line + (4) Type at the command line ./run_benchmark.py 1yr_fullchem_benchmark.yml @@ -47,7 +43,7 @@ https://github.com/ipython/ipython/issues/10627 -This script corresponds with GCPy 1.3.2. Edit this version ID if releasing +This script corresponds with GCPy 1.4.0. Edit this version ID if releasing a new version of GCPy. """ @@ -60,15 +56,13 @@ from shutil import copyfile from calendar import monthrange import numpy as np -#import xarray as xr from joblib import Parallel, delayed from gcpy.util import get_filepath, get_filepaths import gcpy.ste_flux as ste import gcpy.oh_metrics as oh import gcpy.budget_ox as ox -from gcpy import benchmark as bmk -#from gcpy.grid import get_input_res - +from gcpy import benchmark_funcs as bmk +import gcpy.benchmark.modules.benchmark_models_vs_obs as mvo # Tell matplotlib not to look for an X-window os.environ["QT_QPA_PLATFORM"] = "offscreen" @@ -233,6 +227,17 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): #gchp_vs_gcc_budgetdir = os.path.join(gchp_vs_gcc_resultsdir, "Budget") #gchp_vs_gchp_budgetdir = os.path.join(gchp_vs_gchp_resultsdir, "Budget") + # Models vs. observations directories + gcc_vs_gcc_models_vs_obs_dir = os.path.join( + gcc_vs_gcc_resultsdir, "ModelVsObs" + ) + gchp_vs_gcc_models_vs_obs_dir = os.path.join( + gchp_vs_gcc_resultsdir, "ModelVsObs" + ) + gchp_vs_gchp_models_vs_obs_dir = os.path.join( + gchp_vs_gchp_resultsdir, "ModelVsObs" + ) + # ====================================================================== # Plot title strings # ====================================================================== @@ -244,7 +249,7 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): gchp_vs_gchp_devstr = config["data"]["dev"]["gchp"]["version"] diff_of_diffs_refstr = bmk.diff_of_diffs_toprow_title(config, "gcc") diff_of_diffs_devstr = bmk.diff_of_diffs_toprow_title(config, "gchp") - + ######################################################################## ### THE REST OF THESE SETTINGS SHOULD NOT NEED TO BE CHANGED ### ######################################################################## @@ -256,27 +261,18 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): # Get days per month and seconds per month for ref sec_per_month_ref = np.zeros(12) days_per_month_ref = np.zeros(12) - for t in range(12): - days_per_month_ref[t] = monthrange(int(bmk_year_ref), t + 1)[1] - sec_per_month_ref[t] = days_per_month_ref[t] * 86400.0 + for mon in range(12): + days_per_month_ref[mon] = monthrange(int(bmk_year_ref), mon + 1)[1] + sec_per_month_ref[mon] = days_per_month_ref[mon] * 86400.0 # Get all months array of start datetimes for benchmark year bmk_start_ref = np.datetime64(bmk_year_ref + "-01-01") - bmk_end_ref = np.datetime64("{}-01-01".format(int(bmk_year_ref) + 1)) + bmk_end_ref = np.datetime64(f"{int(bmk_year_ref) + 1}-01-01") all_months_ref = np.arange( bmk_start_ref, bmk_end_ref, step=np.timedelta64(1, "M"), dtype="datetime64[M]" ) all_months_gchp_ref = all_months_ref - # Reset all months datetime array if GCHP ref is legacy filename format. - # Legacy format uses time-averaging period mid-point not start. - if config["data"]["ref"]["gchp"]["is_pre_13.1"]: - all_months_gchp_ref = np.zeros(12, dtype="datetime64[h]") - for t in range(12): - middle_hr = int(days_per_month_ref[t] * 24 / 2) - delta = np.timedelta64(middle_hr, "h") - all_months_gchp_ref[t] = all_months_ref[t].astype("datetime64[h]") + delta - # Get subset of month datetimes and seconds per month for only benchmark months bmk_mons_ref = all_months_ref[bmk_mon_inds] bmk_mons_gchp_ref = all_months_gchp_ref[bmk_mon_inds] @@ -292,27 +288,18 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): # Get days per month and seconds per month for dev sec_per_month_dev = np.zeros(12) days_per_month_dev = np.zeros(12) - for t in range(12): - days_per_month_dev[t] = monthrange(int(bmk_year_dev), t + 1)[1] - sec_per_month_dev[t] = days_per_month_dev[t] * 86400.0 + for mon in range(12): + days_per_month_dev[mon] = monthrange(int(bmk_year_dev), mon + 1)[1] + sec_per_month_dev[mon] = days_per_month_dev[mon] * 86400.0 # Get all months array of start datetimes for benchmark year bmk_start_dev = np.datetime64(bmk_year_dev + "-01-01") - bmk_end_dev = np.datetime64("{}-01-01".format(int(bmk_year_dev) + 1)) + bmk_end_dev = np.datetime64(f"{int(bmk_year_dev) + 1}-01-01") all_months_dev = np.arange( bmk_start_dev, bmk_end_dev, step=np.timedelta64(1, "M"), dtype="datetime64[M]" ) all_months_gchp_dev = all_months_dev - # Reset all months datetime array if GCHP dev is legacy filename format. - # Legacy format uses time-averaging period mid-point not start. - if config["data"]["dev"]["gchp"]["is_pre_13.1"]: - all_months_gchp_dev = np.zeros(12, dtype="datetime64[h]") - for t in range(12): - middle_hr = int(days_per_month_dev[t] * 24 / 2) - delta = np.timedelta64(middle_hr, "h") - all_months_gchp_dev[t] = all_months_dev[t].astype("datetime64[h]") + delta - # Get subset of month datetimes and seconds per month for only benchmark months bmk_mons_dev = all_months_dev[bmk_mon_inds] bmk_mons_gchp_dev = all_months_gchp_dev[bmk_mon_inds] @@ -343,6 +330,8 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): print(" - Table of OH metrics") if config["options"]["outputs"]["ste_table"]: print(" - Table of strat-trop exchange") + if config["options"]["outputs"]["plot_models_vs_obs"]: + print(" - Plots of models vs. observations") print("Comparisons will be made for the following combinations:") if config["options"]["comparisons"]["gcc_vs_gcc"]["run"]: print(" - GCC vs GCC") @@ -361,8 +350,16 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): # ================================================================== # GCC vs GCC filepaths for StateMet collection data # ================================================================== - refmet = get_filepaths(gcc_vs_gcc_refdir, "StateMet", all_months_ref)[0] - devmet = get_filepaths(gcc_vs_gcc_devdir, "StateMet", all_months_dev)[0] + refmet = get_filepaths( + gcc_vs_gcc_refdir, + "StateMet", + all_months_ref + )[0] + devmet = get_filepaths( + gcc_vs_gcc_devdir, + "StateMet", + all_months_dev + )[0] # ================================================================== # GCC vs GCC species concentration plots @@ -405,16 +402,17 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): benchmark_type=bmk_type, overwrite=True, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # -------------------------------------------------------------- # GCC vs GCC species concentration plots: Seasonal # -------------------------------------------------------------- - for t in range(bmk_n_months): - print(f"\nCreating plots for {bmk_mon_strs[t]}") + for mon in range(bmk_n_months): + print(f"\nCreating plots for {bmk_mon_strs[mon]}") # Create plots - mon_ind = bmk_mon_inds[t] + mon_ind = bmk_mon_inds[mon] bmk.make_benchmark_conc_plots( ref[mon_ind], gcc_vs_gcc_refstr, @@ -423,13 +421,14 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): refmet=refmet[mon_ind], devmet=devmet[mon_ind], dst=gcc_vs_gcc_resultsdir, - subdst=bmk_mon_yr_strs_dev[t], + subdst=bmk_mon_yr_strs_dev[mon], weightsdir=config["paths"]["weights_dir"], benchmark_type=bmk_type, plot_by_spc_cat=config["options"]["outputs"][ "plot_options"]["by_spc_cat"], overwrite=True, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # ================================================================== @@ -472,23 +471,24 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): benchmark_type=bmk_type, overwrite=True, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # -------------------------------------------------------------- # GCC vs GCC emissions plots: Seasonal # -------------------------------------------------------------- - for t in range(bmk_n_months): - print(f"\nCreating plots for {bmk_mon_strs[t]}") + for mon in range(bmk_n_months): + print(f"\nCreating plots for {bmk_mon_strs[mon]}") # Create plots - mon_ind = bmk_mon_inds[t] + mon_ind = bmk_mon_inds[mon] bmk.make_benchmark_emis_plots( ref[mon_ind], gcc_vs_gcc_refstr, dev[mon_ind], gcc_vs_gcc_devstr, dst=gcc_vs_gcc_resultsdir, - subdst=bmk_mon_yr_strs_dev[t], + subdst=bmk_mon_yr_strs_dev[mon], weightsdir=config["paths"]["weights_dir"], plot_by_spc_cat=config["options"]["outputs"][ "plot_options"]["by_spc_cat"], @@ -497,6 +497,7 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): benchmark_type=bmk_type, overwrite=True, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # ================================================================== @@ -566,26 +567,28 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): weightsdir=config["paths"]["weights_dir"], overwrite=True, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # -------------------------------------------------------------- # GCC vs GCC J-value plots: Seasonal # -------------------------------------------------------------- - for t in range(bmk_n_months): - print(f"\nCreating plots for {bmk_mon_strs[t]}") + for mon in range(bmk_n_months): + print(f"\nCreating plots for {bmk_mon_strs[mon]}") # Create plots - mon_ind = bmk_mon_inds[t] + mon_ind = bmk_mon_inds[mon] bmk.make_benchmark_jvalue_plots( ref[mon_ind], gcc_vs_gcc_refstr, dev[mon_ind], gcc_vs_gcc_devstr, dst=gcc_vs_gcc_resultsdir, - subdst=bmk_mon_yr_strs_dev[t], + subdst=bmk_mon_yr_strs_dev[mon], weightsdir=config["paths"]["weights_dir"], overwrite=True, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # ================================================================== @@ -623,26 +626,28 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): weightsdir=config["paths"]["weights_dir"], overwrite=True, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # -------------------------------------------------------------- # GCC vs GCC column AOD plots: Seasonal # -------------------------------------------------------------- - for t in range(bmk_n_months): - print(f"\nCreating plots for {bmk_mon_strs[t]}") + for mon in range(bmk_n_months): + print(f"\nCreating plots for {bmk_mon_strs[mon]}") # Create plots - mon_ind = bmk_mon_inds[t] + mon_ind = bmk_mon_inds[mon] bmk.make_benchmark_aod_plots( ref[mon_ind], gcc_vs_gcc_refstr, dev[mon_ind], gcc_vs_gcc_devstr, dst=gcc_vs_gcc_resultsdir, - subdst=bmk_mon_yr_strs_dev[t], + subdst=bmk_mon_yr_strs_dev[mon], weightsdir=config["paths"]["weights_dir"], overwrite=True, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # ================================================================== @@ -651,21 +656,21 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): if config["options"]["outputs"]["mass_table"]: print("\n%%% Creating GCC vs. GCC mass tables %%%") - def gcc_vs_gcc_mass_table(m): + def gcc_vs_gcc_mass_table(mon): """ - Create mass table for each benchmark month m in parallel + Create mass table for each benchmark month mon in parallel """ # Filepaths refpath = get_filepath( gcc_vs_gcc_refrstdir, "Restart", - bmk_mons_ref[m] + bmk_mons_ref[mon] ) devpath = get_filepath( gcc_vs_gcc_devrstdir, "Restart", - bmk_mons_dev[m] + bmk_mons_dev[mon] ) # Create tables @@ -675,16 +680,22 @@ def gcc_vs_gcc_mass_table(m): devpath, gcc_vs_gcc_devstr, dst=gcc_vs_gcc_tablesdir, - subdst=bmk_mon_yr_strs_dev[m], - label=f"at 01{bmk_mon_yr_strs_dev[m]}", + subdst=bmk_mon_yr_strs_dev[mon], + label=f"at 01{bmk_mon_yr_strs_dev[mon]}", overwrite=True, spcdb_dir=spcdb_dir, ) - # Run in parallel - results = Parallel(n_jobs=-1)( - delayed(gcc_vs_gcc_mass_table)(t) for t in range(bmk_n_months) - ) + # Create tables in parallel + # Turn off parallelization if n_jobs==1 + if config["options"]["n_cores"] != 1: + results = Parallel(n_jobs=config["options"]["n_cores"])( + delayed(gcc_vs_gcc_mass_table)(mon) + for mon in range(bmk_n_months) + ) + else: + for mon in range(bmk_n_months): + results = gcc_vs_gcc_mass_table(mon) # ================================================================== # GCC vs GCC operations budgets tables @@ -692,7 +703,7 @@ def gcc_vs_gcc_mass_table(m): if config["options"]["outputs"]["ops_budget_table"]: print("\n%%% Creating GCC vs. GCC operations budget tables %%%") - def gcc_vs_gcc_ops_budg(m): + def gcc_vs_gcc_ops_budg(mon): """ Create budget table for each benchmark month m in parallel """ @@ -701,12 +712,12 @@ def gcc_vs_gcc_ops_budg(m): refpath = get_filepath( gcc_vs_gcc_refdir, "Budget", - bmk_mons_ref[m] + bmk_mons_ref[mon] ) devpath = get_filepath( gcc_vs_gcc_devdir, "Budget", - bmk_mons_dev[m] + bmk_mons_dev[mon] ) # Create tables @@ -715,17 +726,23 @@ def gcc_vs_gcc_ops_budg(m): refpath, config["data"]["dev"]["gcc"]["version"], devpath, - sec_per_month_ref[m], - sec_per_month_dev[m], + sec_per_month_ref[mon], + sec_per_month_dev[mon], benchmark_type=bmk_type, - label=f"at 01{bmk_mon_yr_strs_dev[m]}", + label=f"at 01{bmk_mon_yr_strs_dev[mon]}", dst=gcc_vs_gcc_tablesdir, ) - # Run in parallel - results = Parallel(n_jobs=-1)( - delayed(gcc_vs_gcc_ops_budg)(t) for t in range(bmk_n_months) - ) + # Create tables in parallel + # Turn off parallelization if n_jobs==1 + if config["options"]["n_cores"] != 1: + results = Parallel(n_jobs=config["options"]["n_cores"])( + delayed(gcc_vs_gcc_ops_budg)(mon) \ + for mon in range(bmk_n_months) + ) + else: + for mon in range(bmk_n_months): + results = gcc_vs_gcc_ops_budg(mon) # ================================================================== # GCC vs GCC aerosols budgets/burdens tables @@ -827,6 +844,37 @@ def gcc_vs_gcc_ops_budg(m): spcdb_dir=spcdb_dir, ) + # ================================================================== + # GCC vs GCC Model vs. Observations plots + # ================================================================== + if config["options"]["outputs"]["plot_models_vs_obs"]: + print("\n%%% Creating GCC vs. GCC models vs. obs. plots %%%") + + # Filepaths + ref = get_filepaths( + gcc_vs_gcc_refdir, + "SpeciesConc", + all_months_ref + )[0] + dev = get_filepaths( + gcc_vs_gcc_devdir, + "SpeciesConc", + all_months_dev + )[0] + + # Plot models vs. observations (O3 for now) + mvo.make_benchmark_models_vs_obs_plots( + config["paths"]["obs_data_dir"], + ref, + config["data"]["ref"]["gcc"]["version"], + dev, + config["data"]["dev"]["gcc"]["version"], + dst=gcc_vs_gcc_models_vs_obs_dir, + overwrite=True, + verbose=False + ) + + # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Create GCHP vs GCC benchmark plots and tables # @@ -848,7 +896,6 @@ def gcc_vs_gcc_ops_budg(m): "StateMet", all_months_gchp_dev, is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"]["is_pre_13.1"] )[0] # Get GCHP grid resolution from met collection file @@ -875,9 +922,7 @@ def gcc_vs_gcc_ops_budg(m): gchp_vs_gcc_devdir, "SpeciesConc", all_months_gchp_dev, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"][ - "is_pre_13.1"], + is_gchp=True )[0] # Create plots @@ -896,16 +941,17 @@ def gcc_vs_gcc_ops_budg(m): benchmark_type=bmk_type, overwrite=True, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # -------------------------------------------------------------- # GCHP vs GCC species concentration plots: Seasonal # -------------------------------------------------------------- - for t in range(bmk_n_months): - print(f"\nCreating plots for {bmk_mon_strs[t]}") + for mon in range(bmk_n_months): + print(f"\nCreating plots for {bmk_mon_strs[mon]}") # Create plots - mon_ind = bmk_mon_inds[t] + mon_ind = bmk_mon_inds[mon] bmk.make_benchmark_conc_plots( ref[mon_ind], gchp_vs_gcc_refstr, @@ -914,13 +960,14 @@ def gcc_vs_gcc_ops_budg(m): refmet=refmet[mon_ind], devmet=devmet[mon_ind], dst=gchp_vs_gcc_resultsdir, - subdst=bmk_mon_yr_strs_dev[t], + subdst=bmk_mon_yr_strs_dev[mon], weightsdir=config["paths"]["weights_dir"], benchmark_type=bmk_type, plot_by_spc_cat=config["options"]["outputs"][ "plot_options"]["by_spc_cat"], overwrite=True, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # ============================================================== @@ -943,8 +990,7 @@ def gcc_vs_gcc_ops_budg(m): gchp_vs_gcc_devdir, "Emissions", all_months_gchp_dev, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"]["is_pre_13.1"] + is_gchp=True )[0] # Create plots @@ -962,32 +1008,36 @@ def gcc_vs_gcc_ops_budg(m): "plot_options"]["by_spc_cat"], plot_by_hco_cat=config["options"]["outputs"][ "plot_options"]["by_hco_cat"], + benchmark_type=bmk_type, overwrite=True, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # -------------------------------------------------------------- # GCHP vs GCC emissions plots: Seasonal # -------------------------------------------------------------- - for t in range(bmk_n_months): - print(f"\nCreating plots for {bmk_mon_strs[t]}") + for mon in range(bmk_n_months): + print(f"\nCreating plots for {bmk_mon_strs[mon]}") # Create plots - mon_ind = bmk_mon_inds[t] + mon_ind = bmk_mon_inds[mon] bmk.make_benchmark_emis_plots( ref[mon_ind], gchp_vs_gcc_refstr, dev[mon_ind], gchp_vs_gcc_devstr, dst=gchp_vs_gcc_resultsdir, - subdst=bmk_mon_yr_strs_dev[t], + subdst=bmk_mon_yr_strs_dev[mon], weightsdir=config["paths"]["weights_dir"], plot_by_spc_cat=config["options"]["outputs"][ "plot_options"]["by_spc_cat"], plot_by_hco_cat=config["options"]["outputs"][ "plot_options"]["by_hco_cat"], + benchmark_type=bmk_type, overwrite=True, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # ================================================================== @@ -1006,8 +1056,7 @@ def gcc_vs_gcc_ops_budg(m): gchp_vs_gcc_devdir, "Emissions", all_months_gchp_dev, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"]["is_pre_13.1"] + is_gchp=True )[0] # Create emissions table that spans entire year @@ -1020,6 +1069,7 @@ def gcc_vs_gcc_ops_budg(m): dst=gchp_vs_gcc_resultsdir, ref_interval=sec_per_month_ref, dev_interval=sec_per_month_dev, + benchmark_type=bmk_type, overwrite=True, spcdb_dir=spcdb_dir, ) @@ -1044,8 +1094,7 @@ def gcc_vs_gcc_ops_budg(m): gchp_vs_gcc_devdir, "JValues", all_months_gchp_dev, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"]["is_pre_13.1"] + is_gchp=True )[0] # Create plots @@ -1061,26 +1110,28 @@ def gcc_vs_gcc_ops_budg(m): weightsdir=config["paths"]["weights_dir"], overwrite=True, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # -------------------------------------------------------------- # GCHP vs GCC J-values plots: Seasonal # -------------------------------------------------------------- - for t in range(bmk_n_months): - print(f"\nCreating plots for {bmk_mon_strs[t]}") + for mon in range(bmk_n_months): + print(f"\nCreating plots for {bmk_mon_strs[mon]}") # Create plots - mon_ind = bmk_mon_inds[t] + mon_ind = bmk_mon_inds[mon] bmk.make_benchmark_jvalue_plots( ref[mon_ind], gchp_vs_gcc_refstr, dev[mon_ind], gchp_vs_gcc_devstr, dst=gchp_vs_gcc_resultsdir, - subdst=bmk_mon_yr_strs_dev[t], + subdst=bmk_mon_yr_strs_dev[mon], weightsdir=config["paths"]["weights_dir"], overwrite=True, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # ================================================================== @@ -1103,9 +1154,7 @@ def gcc_vs_gcc_ops_budg(m): gchp_vs_gcc_devdir, "Aerosols", all_months_gchp_dev, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"][ - "is_pre_13.1"], + is_gchp=True )[0] # Create plots @@ -1121,26 +1170,28 @@ def gcc_vs_gcc_ops_budg(m): weightsdir=config["paths"]["weights_dir"], overwrite=True, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # -------------------------------------------------------------- # GCHP vs GCC column AOD plots: Seasonal # -------------------------------------------------------------- - for t in range(bmk_n_months): - print(f"\nCreating plots for {bmk_mon_strs[t]}") + for mon in range(bmk_n_months): + print(f"\nCreating plots for {bmk_mon_strs[mon]}") # Create plots - mon_ind = bmk_mon_inds[t] + mon_ind = bmk_mon_inds[mon] bmk.make_benchmark_aod_plots( ref[mon_ind], gchp_vs_gcc_refstr, dev[mon_ind], gchp_vs_gcc_devstr, dst=gchp_vs_gcc_resultsdir, - subdst=bmk_mon_yr_strs_dev[t], + subdst=bmk_mon_yr_strs_dev[mon], weightsdir=config["paths"]["weights_dir"], overwrite=True, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # ================================================================== @@ -1149,7 +1200,7 @@ def gcc_vs_gcc_ops_budg(m): if config["options"]["outputs"]["mass_table"]: print("\n%%% Creating GCHP vs. GCC mass tables %%%") - def gchp_vs_gcc_mass_table(m): + def gchp_vs_gcc_mass_table(mon): """ Create mass table for each benchmark month in parallel """ @@ -1158,16 +1209,14 @@ def gchp_vs_gcc_mass_table(m): refpath = get_filepath( gchp_vs_gcc_refrstdir, "Restart", - bmk_mons_dev[m] + bmk_mons_dev[mon] ) devpath = get_filepath( gchp_vs_gcc_devrstdir, "Restart", - bmk_mons_dev[m], + bmk_mons_dev[mon], is_gchp=True, gchp_res=config["data"]["dev"]["gchp"]["resolution"], - gchp_is_pre_13_1=config["data"]["dev"]["gchp"][ - "is_pre_13.1"], gchp_is_pre_14_0=config["data"]["dev"]["gchp"][ "is_pre_14.0"] ) @@ -1180,8 +1229,6 @@ def gchp_vs_gcc_mass_table(m): bmk_end_dev, is_gchp=True, gchp_res=config["data"]["dev"]["gchp"]["resolution"], - gchp_is_pre_13_1=config["data"]["dev"]["gchp"][ - "is_pre_13.1"], gchp_is_pre_14_0=config["data"]["dev"]["gchp"][ "is_pre_14.0"] ) @@ -1193,16 +1240,23 @@ def gchp_vs_gcc_mass_table(m): devpath, gchp_vs_gcc_devstr, dst=gchp_vs_gcc_tablesdir, - subdst=bmk_mon_yr_strs_dev[m], - label=f"at 01{bmk_mon_yr_strs_dev[m]}", + subdst=bmk_mon_yr_strs_dev[mon], + label=f"at 01{bmk_mon_yr_strs_dev[mon]}", overwrite=True, spcdb_dir=spcdb_dir, dev_met_extra=devareapath ) - results = Parallel(n_jobs=-1)( - delayed(gchp_vs_gcc_mass_table)(t) for t in range(bmk_n_months) - ) + # Create tables in parallel + # Turn off parallelization if n_jobs==1 + if config["options"]["n_cores"] != 1: + results = Parallel(n_jobs=config["options"]["n_cores"])( + delayed(gchp_vs_gcc_mass_table)(mon) \ + for mon in range(bmk_n_months) + ) + else: + for mon in range(bmk_n_months): + results = gchp_vs_gcc_mass_table(mon) # ================================================================== # GCHP vs GCC operations budgets tables @@ -1210,7 +1264,7 @@ def gchp_vs_gcc_mass_table(m): if config["options"]["outputs"]["ops_budget_table"]: print("\n%%% Creating GCHP vs. GCC operations budget tables %%%") - def gchp_vs_gcc_ops_budg(m): + def gchp_vs_gcc_ops_budg(mon): """ Create operations budgets for each benchmark month m in parallel """ @@ -1219,15 +1273,13 @@ def gchp_vs_gcc_ops_budg(m): refpath = get_filepath( gchp_vs_gcc_refdir, "Budget", - bmk_mons_dev[m] + bmk_mons_dev[mon] ) devpath = get_filepath( gchp_vs_gcc_devdir, "Budget", - bmk_mons_gchp_dev[m], - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"][ - "is_pre_13.1"] + bmk_mons_gchp_dev[mon], + is_gchp=True ) # Create tables @@ -1236,10 +1288,10 @@ def gchp_vs_gcc_ops_budg(m): refpath, config["data"]["dev"]["gchp"]["version"], devpath, - bmk_sec_per_month_dev[m], - bmk_sec_per_month_dev[m], + bmk_sec_per_month_dev[mon], + bmk_sec_per_month_dev[mon], benchmark_type=bmk_type, - label=f"at 01{bmk_mon_yr_strs_dev[m]}", + label=f"at 01{bmk_mon_yr_strs_dev[mon]}", operations=[ "Chemistry", "Convection", @@ -1251,9 +1303,16 @@ def gchp_vs_gcc_ops_budg(m): dst=gchp_vs_gcc_tablesdir, ) - results = Parallel(n_jobs=-1)( - delayed(gchp_vs_gcc_ops_budg)(t) for t in range(bmk_n_months) - ) + # Create tables in parallel + # Turn off parallelization if n_jobs==1 + if config["options"]["n_cores"] != 1: + results = Parallel(n_jobs=config["options"]["n_cores"])( + delayed(gchp_vs_gcc_ops_budg)(mon) \ + for mon in range(bmk_n_months) + ) + else: + for mon in range(bmk_n_months): + results = gchp_vs_gcc_ops_budg(mon) # ================================================================== # GCHP vs GCC aerosol budgets and burdens tables @@ -1266,15 +1325,13 @@ def gchp_vs_gcc_ops_budg(m): gchp_vs_gcc_devdir, "Aerosols", all_months_gchp_dev, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"]["is_pre_13.1"] + is_gchp=True )[0] devspc = get_filepaths( gchp_vs_gcc_devdir, "SpeciesConc", all_months_gchp_dev, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"]["is_pre_13.1"] + is_gchp=True )[0] # Create tables @@ -1339,8 +1396,7 @@ def gchp_vs_gcc_ops_budg(m): gchp_vs_gcc_devdir, "Metrics", all_months_gchp_dev, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"]["is_pre_13.1"] + is_gchp=True )[0] # Create table @@ -1360,6 +1416,37 @@ def gchp_vs_gcc_ops_budg(m): if config["options"]["outputs"]["ste_table"]: print("\n%%% Skipping GCHP vs. GCC Strat-Trop Exchange table %%%") + # ================================================================== + # GCHP vs GCC Model vs. Observations plots + # ================================================================== + if config["options"]["outputs"]["plot_models_vs_obs"]: + print("\n%%% Creating GCHP vs. GCC models vs. obs. plots %%%") + + # Filepaths + ref = get_filepaths( + gchp_vs_gcc_refdir, + "SpeciesConc", + all_months_dev + )[0] + dev = get_filepaths( + gchp_vs_gcc_devdir, + "SpeciesConc", + all_months_gchp_dev, + is_gchp=True + )[0] + + # Plot models vs. observations (O3 for now) + mvo.make_benchmark_models_vs_obs_plots( + config["paths"]["obs_data_dir"], + ref, + config["data"]["dev"]["gcc"]["version"], + dev, + config["data"]["dev"]["gchp"]["version"], + dst=gchp_vs_gcc_models_vs_obs_dir, + overwrite=True, + verbose=False + ) + # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Create GCHP vs GCHP benchmark plots and tables # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -1372,15 +1459,13 @@ def gchp_vs_gcc_ops_budg(m): gchp_vs_gchp_refdir, "StateMet", all_months_gchp_ref, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["ref"]["gchp"]["is_pre_13.1"] + is_gchp=True )[0] devmet = get_filepaths( gchp_vs_gcc_devdir, "StateMet", all_months_gchp_dev, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"]["is_pre_13.1"] + is_gchp=True )[0] # Get GCHP grid resolutions from met collection file @@ -1408,15 +1493,13 @@ def gchp_vs_gcc_ops_budg(m): gchp_vs_gchp_refdir, "SpeciesConc", all_months_gchp_ref, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["ref"]["gchp"]["is_pre_13.1"] + is_gchp=True )[0] dev = get_filepaths( gchp_vs_gchp_devdir, "SpeciesConc", all_months_gchp_dev, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"]["is_pre_13.1"] + is_gchp=True )[0] # Create plots @@ -1438,16 +1521,17 @@ def gchp_vs_gcc_ops_budg(m): "plot_options"]["by_spc_cat"], overwrite=True, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # -------------------------------------------------------------- # GCHP vs GCHP species concentration plots: Seasonal # -------------------------------------------------------------- - for t in range(bmk_n_months): - print(f"\nCreating plots for {bmk_mon_strs[t]}") + for mon in range(bmk_n_months): + print(f"\nCreating plots for {bmk_mon_strs[mon]}") # Create plots - mon_ind = bmk_mon_inds[t] + mon_ind = bmk_mon_inds[mon] bmk.make_benchmark_conc_plots( ref[mon_ind], gchp_vs_gchp_refstr, @@ -1457,13 +1541,14 @@ def gchp_vs_gcc_ops_budg(m): refmet=refmet[mon_ind], devmet=devmet[mon_ind], dst=gchp_vs_gchp_resultsdir, - subdst=bmk_mon_yr_strs_dev[t], + subdst=bmk_mon_yr_strs_dev[mon], weightsdir=config["paths"]["weights_dir"], benchmark_type=bmk_type, plot_by_spc_cat=config["options"]["outputs"][ "plot_options"]["by_spc_cat"], overwrite=True, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # ================================================================== @@ -1481,15 +1566,13 @@ def gchp_vs_gcc_ops_budg(m): gchp_vs_gchp_refdir, "Emissions", all_months_gchp_ref, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["ref"]["gchp"]["is_pre_13.1"] + is_gchp=True )[0] dev = get_filepaths( gchp_vs_gchp_devdir, "Emissions", all_months_gchp_dev, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"]["is_pre_13.1"] + is_gchp=True )[0] # Create plots @@ -1508,18 +1591,20 @@ def gchp_vs_gcc_ops_budg(m): "plot_options"]["by_spc_cat"], plot_by_hco_cat=config["options"]["outputs"][ "plot_options"]["by_hco_cat"], + benchmark_type=bmk_type, overwrite=True, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # -------------------------------------------------------------- # GCHP vs GCHP species concentration plots: Seasonal # -------------------------------------------------------------- - for t in range(bmk_n_months): - print(f"\nCreating plots for {bmk_mon_strs[t]}") + for mon in range(bmk_n_months): + print(f"\nCreating plots for {bmk_mon_strs[mon]}") # Create plots - mon_ind = bmk_mon_inds[t] + mon_ind = bmk_mon_inds[mon] bmk.make_benchmark_emis_plots( ref[mon_ind], gchp_vs_gchp_refstr, @@ -1527,14 +1612,16 @@ def gchp_vs_gcc_ops_budg(m): gchp_vs_gchp_devstr, dst=gchp_vs_gchp_resultsdir, cmpres=cmpres, - subdst=bmk_mon_yr_strs_dev[t], + subdst=bmk_mon_yr_strs_dev[mon], weightsdir=config["paths"]["weights_dir"], plot_by_spc_cat=config["options"]["outputs"][ "plot_options"]["by_spc_cat"], plot_by_hco_cat=config["options"]["outputs"][ "plot_options"]["by_hco_cat"], + benchmark_type=bmk_type, overwrite=True, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # ================================================================== @@ -1548,15 +1635,13 @@ def gchp_vs_gcc_ops_budg(m): gchp_vs_gchp_refdir, "Emissions", all_months_gchp_ref, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["ref"]["gchp"]["is_pre_13.1"] + is_gchp=True )[0] dev = get_filepaths( gchp_vs_gchp_devdir, "Emissions", all_months_gchp_dev, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"]["is_pre_13.1"] + is_gchp=True )[0] # Create table @@ -1570,6 +1655,7 @@ def gchp_vs_gcc_ops_budg(m): dst=gchp_vs_gchp_resultsdir, ref_interval=sec_per_month_ref, dev_interval=sec_per_month_dev, + benchmark_type=bmk_type, overwrite=True, spcdb_dir=spcdb_dir, ) @@ -1589,15 +1675,13 @@ def gchp_vs_gcc_ops_budg(m): gchp_vs_gchp_refdir, "JValues", all_months_gchp_ref, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["ref"]["gchp"]["is_pre_13.1"] + is_gchp=True )[0] dev = get_filepaths( gchp_vs_gchp_devdir, "JValues", all_months_gchp_dev, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"]["is_pre_13.1"] + is_gchp=True )[0] # Create plots @@ -1614,27 +1698,29 @@ def gchp_vs_gcc_ops_budg(m): weightsdir=config["paths"]["weights_dir"], overwrite=True, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # -------------------------------------------------------------- # GCHP vs GCHP J-values plots: Seasonal # -------------------------------------------------------------- - for t in range(bmk_n_months): - print(f"\nCreating plots for {bmk_mon_strs[t]}") + for mon in range(bmk_n_months): + print(f"\nCreating plots for {bmk_mon_strs[mon]}") # Create plots - mon_ind = bmk_mon_inds[t] + mon_ind = bmk_mon_inds[mon] bmk.make_benchmark_jvalue_plots( ref[mon_ind], gchp_vs_gchp_refstr, dev[mon_ind], gchp_vs_gchp_devstr, dst=gchp_vs_gchp_resultsdir, - subdst=bmk_mon_yr_strs_dev[t], + subdst=bmk_mon_yr_strs_dev[mon], cmpres=cmpres, weightsdir=config["paths"]["weights_dir"], overwrite=True, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # ================================================================== @@ -1652,15 +1738,13 @@ def gchp_vs_gcc_ops_budg(m): gchp_vs_gchp_refdir, "Aerosols", all_months_gchp_ref, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["ref"]["gchp"]["is_pre_13.1"] + is_gchp=True )[0] dev = get_filepaths( gchp_vs_gchp_devdir, "Aerosols", all_months_gchp_dev, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"]["is_pre_13.1"] + is_gchp=True )[0] # Create plots @@ -1677,27 +1761,29 @@ def gchp_vs_gcc_ops_budg(m): weightsdir=config["paths"]["weights_dir"], overwrite=True, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # -------------------------------------------------------------- # GCHP vs GCHP column AOD plots: Seasonal # -------------------------------------------------------------- - for t in range(bmk_n_months): - print(f"\nCreating plots for {bmk_mon_strs[t]}") + for mon in range(bmk_n_months): + print(f"\nCreating plots for {bmk_mon_strs[mon]}") # Create plots - mon_ind = bmk_mon_inds[t] + mon_ind = bmk_mon_inds[mon] bmk.make_benchmark_aod_plots( ref[mon_ind], gchp_vs_gchp_refstr, dev[mon_ind], gchp_vs_gchp_devstr, dst=gchp_vs_gchp_resultsdir, - subdst=bmk_mon_yr_strs_dev[t], + subdst=bmk_mon_yr_strs_dev[mon], cmpres=cmpres, weightsdir=config["paths"]["weights_dir"], overwrite=True, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # ================================================================== @@ -1706,7 +1792,7 @@ def gchp_vs_gcc_ops_budg(m): if config["options"]["outputs"]["mass_table"]: print("\n%%% Creating GCHP vs. GCHP mass tables %%%") - def gchp_vs_gchp_mass_table(m): + def gchp_vs_gchp_mass_table(mon): """ Create mass table for each benchmark month m in parallel """ @@ -1715,11 +1801,9 @@ def gchp_vs_gchp_mass_table(m): refpath = get_filepath( gchp_vs_gchp_refrstdir, "Restart", - bmk_mons_ref[m], + bmk_mons_ref[mon], is_gchp=True, gchp_res=config["data"]["ref"]["gchp"]["resolution"], - gchp_is_pre_13_1=config["data"]["ref"]["gchp"][ - "is_pre_13.1"], gchp_is_pre_14_0=config["data"]["ref"]["gchp"][ "is_pre_14.0"] ) @@ -1728,11 +1812,9 @@ def gchp_vs_gchp_mass_table(m): devpath = get_filepath( gchp_vs_gchp_devrstdir, "Restarts", - bmk_mons_dev[m], + bmk_mons_dev[mon], is_gchp=True, gchp_res=config["data"]["dev"]["gchp"]["resolution"], - gchp_is_pre_13_1=config["data"]["dev"]["gchp"][ - "is_pre_13.1"], gchp_is_pre_14_0=config["data"]["dev"]["gchp"][ "is_pre_14.0"] ) @@ -1745,8 +1827,6 @@ def gchp_vs_gchp_mass_table(m): bmk_end_ref, is_gchp=True, gchp_res=config["data"]["dev"]["gchp"]["resolution"], - gchp_is_pre_13_1=config["data"]["dev"]["gchp"][ - "is_pre_13.1"], gchp_is_pre_14_0=config["data"]["dev"]["gchp"][ "is_pre_14.0"] ) @@ -1756,8 +1836,6 @@ def gchp_vs_gchp_mass_table(m): bmk_end_dev, is_gchp=True, gchp_res=config["data"]["dev"]["gchp"]["resolution"], - gchp_is_pre_13_1=config["data"]["dev"]["gchp"][ - "is_pre_13.1"], gchp_is_pre_14_0=config["data"]["dev"]["gchp"][ "is_pre_14.0"] ) @@ -1769,18 +1847,24 @@ def gchp_vs_gchp_mass_table(m): devpath, gchp_vs_gchp_devstr, dst=gchp_vs_gchp_tablesdir, - subdst=bmk_mon_yr_strs_dev[m], - label=f"at 01{bmk_mon_yr_strs_dev[m]}", + subdst=bmk_mon_yr_strs_dev[mon], + label=f"at 01{bmk_mon_yr_strs_dev[mon]}", overwrite=True, spcdb_dir=spcdb_dir, ref_met_extra=refareapath, dev_met_extra=devareapath ) - # Run in parallel - results = Parallel(n_jobs=-1)( - delayed(gchp_vs_gchp_mass_table)(t) for t in range(bmk_n_months) - ) + # Create tables in parallel + # Turn off parallelization if n_jobs==1 + if config["options"]["n_cores"] != 1: + results = Parallel(n_jobs=config["options"]["n_cores"])( + delayed(gchp_vs_gchp_mass_table)(mon) \ + for mon in range(bmk_n_months) + ) + else: + for mon in range(bmk_n_months): + results = gchp_vs_gchp_mass_table(mon) # ================================================================== # GCHP vs GCHP operations budgets tables @@ -1789,7 +1873,7 @@ def gchp_vs_gchp_mass_table(m): print("\n%%% Creating GCHP vs. GCHP operations budget tables %%%") # Diagnostic collections to read - def gchp_vs_gchp_ops_budg(m): + def gchp_vs_gchp_ops_budg(mon): """ Creates operations budgets for each benchmark month m in parallel """ @@ -1798,18 +1882,14 @@ def gchp_vs_gchp_ops_budg(m): refpath = get_filepath( gchp_vs_gchp_refdir, "Budget", - bmk_mons_gchp_ref[m], - is_gchp=True, - gchp_is_pre_13_1=config["data"]["ref"]["gchp"][ - "is_pre_13.1"], + bmk_mons_gchp_ref[mon], + is_gchp=True ) devpath = get_filepath( gchp_vs_gchp_devdir, "Budget", - bmk_mons_gchp_dev[m], - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"][ - "is_pre_13.1"], + bmk_mons_gchp_dev[mon], + is_gchp=True ) # Compute tables @@ -1818,10 +1898,10 @@ def gchp_vs_gchp_ops_budg(m): refpath, config["data"]["dev"]["gchp"]["version"], devpath, - bmk_sec_per_month_ref[m], - bmk_sec_per_month_dev[m], + bmk_sec_per_month_ref[mon], + bmk_sec_per_month_dev[mon], benchmark_type=bmk_type, - label=f"at 01{bmk_mon_yr_strs_dev[m]}", + label=f"at 01{bmk_mon_yr_strs_dev[mon]}", operations=[ "Chemistry", "Convection", @@ -1833,10 +1913,16 @@ def gchp_vs_gchp_ops_budg(m): dst=gchp_vs_gchp_tablesdir, ) - # Run in parallel - results = Parallel(n_jobs=-1)( - delayed(gchp_vs_gchp_ops_budg)(t) for t in range(bmk_n_months) - ) + # Create tables in parallel + # Turn off parallelization if n_jobs==1 + if config["options"]["n_cores"] != 1: + results = Parallel(n_jobs=config["options"]["n_cores"])( + delayed(gchp_vs_gchp_ops_budg)(mon) \ + for mon in range(bmk_n_months) + ) + else: + for mon in range(bmk_n_months): + results = gchp_vs_gchp_ops_budg(mon) # ================================================================== # GCHP vs GCHP aerosol budgets and burdens tables @@ -1849,15 +1935,13 @@ def gchp_vs_gchp_ops_budg(m): gchp_vs_gchp_devdir, "Aerosols", all_months_gchp_dev, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"]["is_pre_13.1"] + is_gchp=True )[0] devspc = get_filepaths( gchp_vs_gchp_devdir, "SpeciesConc", all_months_gchp_dev, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"]["is_pre_13.1"] + is_gchp=True )[0] # Create tables @@ -1906,15 +1990,13 @@ def gchp_vs_gchp_ops_budg(m): gchp_vs_gchp_refdir, "Metrics", all_months_gchp_ref, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["ref"]["gchp"]["is_pre_13.1"] + is_gchp=True )[0] dev = get_filepaths( gchp_vs_gchp_devdir, "Metrics", all_months_gchp_dev, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"]["is_pre_13.1"] + is_gchp=True )[0] # Create the OH Metrics table @@ -1934,6 +2016,38 @@ def gchp_vs_gchp_ops_budg(m): if config["options"]["outputs"]["ste_table"]: print("\n%%% Skipping GCHP vs. GCHP Strat-Trop Exchange table %%%") + # ================================================================== + # GCHP vs GCHP Model vs. Observations plots + # ================================================================== + if config["options"]["outputs"]["plot_models_vs_obs"]: + print("\n%%% Creating GCHP vs. GCHP models vs. obs. plots %%%") + + # Filepaths + ref = get_filepaths( + gchp_vs_gchp_refdir, + "SpeciesConc", + all_months_gchp_ref, + is_gchp=True + )[0] + dev = get_filepaths( + gchp_vs_gchp_devdir, + "SpeciesConc", + all_months_gchp_dev, + is_gchp=True + )[0] + + # Plot models vs. observations (O3 for now) + mvo.make_benchmark_models_vs_obs_plots( + config["paths"]["obs_data_dir"], + ref, + config["data"]["ref"]["gchp"]["version"], + dev, + config["data"]["dev"]["gchp"]["version"], + dst=gchp_vs_gchp_models_vs_obs_dir, + overwrite=True, + verbose=False + ) + # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Create GCHP vs GCC difference of differences benchmark plots # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -1944,7 +2058,8 @@ def gchp_vs_gchp_ops_budg(m): # -------------------------------------------------------------- - # GCHP vs GCC diff-of-diff species concentration plots: Annual Mean + # GCHP vs GCC diff-of-diff species concentration plots: + # Annual Mean # -------------------------------------------------------------- # Filepaths @@ -1962,15 +2077,13 @@ def gchp_vs_gchp_ops_budg(m): gchp_vs_gchp_refdir, "SpeciesConc", all_months_gchp_ref, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["ref"]["gchp"]["is_pre_13.1"] + is_gchp=True )[0] gchp_dev = get_filepaths( gchp_vs_gchp_devdir, "SpeciesConc", all_months_gchp_dev, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"]["is_pre_13.1"] + is_gchp=True )[0] # Create plots @@ -1993,23 +2106,24 @@ def gchp_vs_gchp_ops_budg(m): second_dev=gchp_dev, cats_in_ugm3=None, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # -------------------------------------------------------------- # GCHP vs GCC diff-of-diff species concentration plots: Seasonal # -------------------------------------------------------------- - for t in range(bmk_n_months): - print(f"\nCreating plots for {bmk_mon_strs[t]}") + for mon in range(bmk_n_months): + print(f"\nCreating plots for {bmk_mon_strs[mon]}") # Create plots - mon_ind = bmk_mon_inds[t] + mon_ind = bmk_mon_inds[mon] bmk.make_benchmark_conc_plots( gcc_ref[mon_ind], diff_of_diffs_refstr, gchp_ref[mon_ind], diff_of_diffs_devstr, dst=diff_of_diffs_resultsdir, - subdst=bmk_mon_yr_strs_dev[t], + subdst=bmk_mon_yr_strs_dev[mon], weightsdir=config["paths"]["weights_dir"], benchmark_type=bmk_type, plot_by_spc_cat=config["options"]["outputs"][ @@ -2020,6 +2134,7 @@ def gchp_vs_gchp_ops_budg(m): second_dev=gchp_dev[mon_ind], cats_in_ugm3=None, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # ================================================================== diff --git a/benchmark/modules/run_1yr_tt_benchmark.py b/gcpy/benchmark/modules/run_1yr_tt_benchmark.py old mode 100755 new mode 100644 similarity index 80% rename from benchmark/modules/run_1yr_tt_benchmark.py rename to gcpy/benchmark/modules/run_1yr_tt_benchmark.py index 87f4be79..878dad76 --- a/benchmark/modules/run_1yr_tt_benchmark.py +++ b/gcpy/benchmark/modules/run_1yr_tt_benchmark.py @@ -24,11 +24,7 @@ (3) Make sure the /path/to/gcpy/benchmark is in your PYTHONPATH shell environment variable. - (4) If you wish to use the gcpy test data, then set "gcpy_test: True" - in 1yr_tt_benchmark.yml. If you wish to use actual GEOS-Chem - output data, then set "gcpy_test: False". - - (5) Type at the command line + (4) Type at the command line ./run_benchmark.py 1yr_tt_benchmark.yml @@ -46,7 +42,7 @@ https://github.com/ipython/ipython/issues/10627 -This script corresponds with GCPy 1.3.2. Edit this version ID if releasing +This script corresponds with GCPy 1.4.0. Edit this version ID if releasing a new version of GCPy. """ @@ -59,8 +55,9 @@ from shutil import copyfile from calendar import monthrange import numpy as np +from joblib import Parallel, delayed from gcpy.util import get_filepath, get_filepaths -from gcpy import benchmark as bmk +from gcpy import benchmark_funcs as bmk import gcpy.budget_tt as ttbdg import gcpy.ste_flux as ste @@ -123,6 +120,21 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): ) # Diagnostic file directory paths + gcc_vs_gcc_refrstdir = os.path.join( + config["paths"]["main_dir"], + config["data"]["ref"]["gcc"]["dir"], + config["data"]["ref"]["gcc"]["restarts_subdir"] + ) + gchp_vs_gcc_refrstdir = os.path.join( + config["paths"]["main_dir"], + config["data"]["ref"]["gchp"]["dir"], + config["data"]["ref"]["gchp"]["restarts_subdir"] + ) + gchp_vs_gchp_refrstdir = os.path.join( + config["paths"]["main_dir"], + config["data"]["ref"]["gchp"]["dir"], + config["data"]["ref"]["gchp"]["restarts_subdir"] + ) gcc_vs_gcc_devrstdir = os.path.join( config["paths"]["main_dir"], config["data"]["dev"]["gcc"]["dir"], @@ -220,6 +232,13 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): # Dates and times -- Ref data # ====================================================================== + # Get days per month and seconds per month for ref + sec_per_month_ref = np.zeros(12) + days_per_month_ref = np.zeros(12) + for mon in range(12): + days_per_month_ref[mon] = monthrange(int(bmk_year_ref), mon + 1)[1] + sec_per_month_ref[mon] = days_per_month_ref[mon] * 86400.0 + # Get all months array of start datetimes for benchmark year bmk_start_ref = np.datetime64(bmk_year_ref + "-01-01") bmk_end_ref = np.datetime64(f"{int(bmk_year_ref) + 1}-01-01") @@ -231,22 +250,18 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): ) all_months_gchp_ref = all_months_ref + # Get subset of month datetimes and seconds per month + # for only benchmark months + bmk_mons_ref = all_months_ref[bmk_mon_inds] + bmk_mons_gchp_ref = all_months_gchp_ref[bmk_mon_inds] + bmk_sec_per_month_ref = sec_per_month_ref[bmk_mon_inds] + # Compute seconds in the Ref year sec_per_yr_ref = 0 for t in range(12): days_in_mon = monthrange(int(bmk_year_ref), t + 1)[1] sec_per_yr_ref += days_in_mon * 86400.0 - # Overwrite all_months_gchp_ref if GCHP ref is legacy filename format. - # Legacy format uses time-averaging period mid-point not start. - if config["data"]["ref"]["gchp"]["is_pre_13.1"]: - all_months_gchp_ref = np.zeros(12, dtype="datetime64[h]") - for t in range(12): - days_in_mon = monthrange(int(bmk_year_ref), t + 1)[1] - middle_hr = int(days_in_mon * 24 / 2) - delta = np.timedelta64(middle_hr, "h") - all_months_gchp_ref[t] = all_months_ref[t].astype("datetime64[h]") + delta - # ====================================================================== # Dates and times -- Dev data # ====================================================================== @@ -254,6 +269,13 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): # Month/year strings for use in table subdirectories (e.g. Jan2016) bmk_mon_yr_strs_dev = [v + bmk_year_dev for v in bmk_mon_strs] + # Get days per month and seconds per month for dev + sec_per_month_dev = np.zeros(12) + days_per_month_dev = np.zeros(12) + for mon in range(12): + days_per_month_dev[mon] = monthrange(int(bmk_year_dev), mon + 1)[1] + sec_per_month_dev[mon] = days_per_month_dev[mon] * 86400.0 + # Get all months array of start datetimes for benchmark year bmk_start_dev = np.datetime64(bmk_year_dev + "-01-01") bmk_end_dev = np.datetime64(f"{int(bmk_year_dev) + 1}-01-01") @@ -265,23 +287,16 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): ) all_months_gchp_dev = all_months_dev + bmk_mons_dev = all_months_dev[bmk_mon_inds] + bmk_mons_gchp_dev = all_months_gchp_dev[bmk_mon_inds] + bmk_sec_per_month_dev = sec_per_month_dev[bmk_mon_inds] + # Compute seconds in the Dev year sec_per_yr_dev = 0 for t in range(12): days_in_mon = monthrange(int(bmk_year_dev), t + 1)[1] sec_per_yr_dev += days_in_mon * 86400.0 - # Overwrite all_months_gchp_ref if GCHP ref is legacy filename format. - # Legacy format uses time-averaging period mid-point not start. - if config["data"]["dev"]["gchp"]["is_pre_13.1"]: - sec_per_yr_dev = 0 - all_months_gchp_dev = np.zeros(12, dtype="datetime64[h]") - for t in range(12): - days_in_mon = monthrange(int(bmk_year_dev), t + 1)[1] - middle_hr = int(days_in_mon * 24 / 2) - delta = np.timedelta64(middle_hr, "h") - all_months_gchp_dev[t] = all_months_dev[t].astype("datetime64[h]") + delta - # ======================================================================= # Print the list of plots & tables to the screen # ======================================================================= @@ -296,6 +311,8 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): print(" - Operations budget table") if config["options"]["outputs"]["ste_table"]: print(" - Table of strat-trop exchange") + if config["options"]["outputs"]["mass_table"]: + print(" - Table of species mass") if config["options"]["outputs"]["cons_table"]: print(" - Table of mass conservation") print("Comparisons will be made for the following combinations:") @@ -332,7 +349,7 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): print("\n%%% Creating GCC vs. GCC concentration plots %%%") # Only plot concentration categories for TransportTracers - restrict_cats = ["RnPbBeTracers", "PassiveTracers"] + restrict_cats = ["RnPbBeTracers", "TransportTracers"] # -------------------------------------------------------------- # GCC vs GCC species concentration plots: Annual mean @@ -368,6 +385,7 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): restrict_cats=restrict_cats, overwrite=True, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # -------------------------------------------------------------- @@ -390,6 +408,7 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): restrict_cats=restrict_cats, overwrite=True, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # ================================================================== @@ -436,6 +455,7 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): benchmark_type=bmk_type, overwrite=True, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # ---------------------------------------------------------- @@ -457,8 +477,10 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): benchmark_type=bmk_type, overwrite=True, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) + # ================================================================== # GCC vs GCC radionuclides budget tables # ================================================================== @@ -474,6 +496,53 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): spcdb_dir=spcdb_dir, ) + # ================================================================== + # GCC vs GCC mass tables + # ================================================================== + if config["options"]["outputs"]["mass_table"]: + print("\n%%% Creating GCC vs. GCC mass tables %%%") + + def gcc_vs_gcc_mass_table(mon): + """ + Create mass table for each benchmark month mon in parallel + """ + + # Filepaths + refpath = get_filepath( + gcc_vs_gcc_refrstdir, + "Restart", + bmk_mons_ref[mon] + ) + devpath = get_filepath( + gcc_vs_gcc_devrstdir, + "Restart", + bmk_mons_dev[mon] + ) + + # Create tables + bmk.make_benchmark_mass_tables( + refpath, + gcc_vs_gcc_refstr, + devpath, + gcc_vs_gcc_devstr, + dst=gcc_vs_gcc_tablesdir, + subdst=bmk_mon_yr_strs_dev[mon], + label=f"at 01{bmk_mon_yr_strs_dev[mon]}", + overwrite=True, + spcdb_dir=spcdb_dir, + ) + + # Create tables in parallel + # Turn off parallelization if n_jobs==1 + if config["options"]["n_cores"] != 1: + results = Parallel(n_jobs=config["options"]["n_cores"])( + delayed(gcc_vs_gcc_mass_table)(mon) \ + for mon in range(bmk_n_months) + ) + else: + for mon in range(bmk_n_months): + results = gcc_vs_gcc_mass_table(mon) + # ================================================================== # GCC vs GCC operations budgets tables # ================================================================== @@ -559,8 +628,7 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): gchp_vs_gcc_devdir, "StateMet", all_months_gchp_dev, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"]["is_pre_13.1"], + is_gchp=True )[0] # ================================================================== @@ -570,7 +638,7 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): print("\n%%% Creating GCHP vs. GCC concentration plots %%%") # Only plot concentration categories for TransportTracers - restrict_cats = ["RnPbBeTracers", "PassiveTracers"] + restrict_cats = ["RnPbBeTracers", "TransportTracers"] # -------------------------------------------------------------- # GCHP vs GCC species concentration plots: Annual Mean @@ -587,8 +655,7 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): gchp_vs_gcc_devdir, collection, all_months_gchp_dev, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"]["is_pre_13.1"] + is_gchp=True )[0] # Create plots @@ -608,6 +675,7 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): restrict_cats=restrict_cats, overwrite=True, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # -------------------------------------------------------------- @@ -630,6 +698,7 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): restrict_cats=restrict_cats, overwrite=True, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # ================================================================== @@ -658,9 +727,7 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): gchp_vs_gcc_devdir, collection, all_months_gchp_dev, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"][ - "is_pre_13.1"], + is_gchp=True )[0] # Create plots @@ -679,6 +746,7 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): benchmark_type=bmk_type, normalize_by_area=True, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # ---------------------------------------------------------- @@ -701,6 +769,7 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): benchmark_type=bmk_type, normalize_by_area=True, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # ================================================================== @@ -721,6 +790,56 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): spcdb_dir=spcdb_dir, ) + # ================================================================== + # GCHP vs GCC global mass tables + # ================================================================== + if config["options"]["outputs"]["mass_table"]: + print("\n%%% Creating GCHP vs. GCC mass tables %%%") + + def gchp_vs_gcc_mass_table(mon): + """ + Create mass table for each benchmark month in parallel + """ + + # Filepaths + refpath = get_filepath( + gchp_vs_gcc_refrstdir, + "Restart", + bmk_mons_dev[mon] + ) + devpath = get_filepath( + gchp_vs_gcc_devrstdir, + "Restart", + bmk_mons_dev[mon], + is_gchp=True, + gchp_res=config["data"]["dev"]["gchp"]["resolution"], + gchp_is_pre_14_0=config["data"]["dev"]["gchp"][ + "is_pre_14.0"] + ) + + # KLUDGE: ewl, bmy, 13 Oct 2022 + # Use last GCHP restart file, which has correct area values + devareapath = get_filepath( + gchp_vs_gcc_devrstdir, + "Restart", + bmk_end_dev, + is_gchp=True, + gchp_res=config["data"]["dev"]["gchp"]["resolution"], + gchp_is_pre_14_0=config["data"]["dev"]["gchp"][ + "is_pre_14.0"] + ) + + # Create tables in parallel + # Turn off parallelization if n_jobs==1 + if config["options"]["n_cores"] != 1: + results = Parallel(n_jobs=config["options"]["n_cores"])( + delayed(gchp_vs_gcc_mass_table)(mon) \ + for mon in range(bmk_n_months) + ) + else: + for mon in range(bmk_n_months): + results = gchp_vs_gcc_mass_table(mon) + # ================================================================== # GCHP vs GCC operations budgets tables # ================================================================== @@ -738,8 +857,7 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): gchp_vs_gcc_devdir, collection, all_months_gchp_dev, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"]["is_pre_13.1"] + is_gchp=True )[0] # Make operations budget table @@ -779,15 +897,13 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): gchp_vs_gchp_refdir, "StateMet", all_months_gchp_ref, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["ref"]["gchp"]["is_pre_13.1"] + is_gchp=True )[0] devmet = get_filepaths( gchp_vs_gchp_devdir, "StateMet", all_months_gchp_dev, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"]["is_pre_13.1"] + is_gchp=True )[0] # ================================================================== @@ -797,7 +913,7 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): print("\n%%% Creating GCHP vs. GCHP concentration plots %%%") # Only plot concentration categories for TransportTracers - restrict_cats = ["RnPbBeTracers", "PassiveTracers"] + restrict_cats = ["RnPbBeTracers", "TransportTracers"] # -------------------------------------------------------------- # GCHP vs GCHP species concentration plots: Annual Mean @@ -809,15 +925,13 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): gchp_vs_gchp_refdir, collection, all_months_gchp_ref, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["ref"]["gchp"]["is_pre_13.1"] + is_gchp=True )[0] dev = get_filepaths( gchp_vs_gchp_devdir, collection, all_months_gchp_dev, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"]["is_pre_13.1"] + is_gchp=True )[0] # Make concentration plots @@ -837,7 +951,8 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): restrict_cats=restrict_cats, overwrite=True, spcdb_dir=spcdb_dir, - cmpres=cmpres + cmpres=cmpres, + n_job=config["options"]["n_cores"] ) # -------------------------------------------------------------- @@ -860,7 +975,8 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): restrict_cats=restrict_cats, overwrite=True, spcdb_dir=spcdb_dir, - cmpres=cmpres + cmpres=cmpres, + n_job=config["options"]["n_cores"] ) # ================================================================== @@ -884,17 +1000,13 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): gchp_vs_gchp_refdir, collection, all_months_gchp_ref, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["ref"]["gchp"][ - "is_pre_13.1"], + is_gchp=True )[0] dev = get_filepaths( gchp_vs_gchp_devdir, collection, all_months_gchp_dev, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"][ - "is_pre_13.1"], + is_gchp=True )[0] # Create plots @@ -914,7 +1026,8 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): benchmark_type=bmk_type, normalize_by_area=True, spcdb_dir=spcdb_dir, - cmpres=cmpres + cmpres=cmpres, + n_job=config["options"]["n_cores"] ) # ---------------------------------------------------------- @@ -937,7 +1050,8 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): benchmark_type=bmk_type, normalize_by_area=True, spcdb_dir=spcdb_dir, - cmpres=cmpres + cmpres=cmpres, + n_job=config["options"]["n_cores"] ) # ================================================================== @@ -958,6 +1072,86 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): spcdb_dir=spcdb_dir ) + # ================================================================== + # GCHP vs GCHP global mass tables + # ================================================================== + if config["options"]["outputs"]["mass_table"]: + print("\n%%% Creating GCHP vs. GCHP mass tables %%%") + + def gchp_vs_gchp_mass_table(mon): + """ + Create mass table for each benchmark month m in parallel + """ + + # Ref filepaths + refpath = get_filepath( + gchp_vs_gchp_refrstdir, + "Restart", + bmk_mons_ref[mon], + is_gchp=True, + gchp_res=config["data"]["ref"]["gchp"]["resolution"], + gchp_is_pre_14_0=config["data"]["ref"]["gchp"][ + "is_pre_14.0"] + ) + + # Dev filepaths + devpath = get_filepath( + gchp_vs_gchp_devrstdir, + "Restarts", + bmk_mons_dev[mon], + is_gchp=True, + gchp_res=config["data"]["dev"]["gchp"]["resolution"], + gchp_is_pre_14_0=config["data"]["dev"]["gchp"][ + "is_pre_14.0"] + ) + + # KLUDGE: ewl, bmy, 13 Oct 2022 + # Use last GCHP restart file, which has correct area values + refareapath = get_filepath( + gchp_vs_gchp_refrstdir, + "Restart", + bmk_end_ref, + is_gchp=True, + gchp_res=config["data"]["dev"]["gchp"]["resolution"], + gchp_is_pre_14_0=config["data"]["dev"]["gchp"][ + "is_pre_14.0"] + ) + devareapath = get_filepath( + gchp_vs_gchp_devrstdir, + "Restart", + bmk_end_dev, + is_gchp=True, + gchp_res=config["data"]["dev"]["gchp"]["resolution"], + gchp_is_pre_14_0=config["data"]["dev"]["gchp"][ + "is_pre_14.0"] + ) + + # Create tables + bmk.make_benchmark_mass_tables( + refpath, + gchp_vs_gchp_refstr, + devpath, + gchp_vs_gchp_devstr, + dst=gchp_vs_gchp_tablesdir, + subdst=bmk_mon_yr_strs_dev[mon], + label=f"at 01{bmk_mon_yr_strs_dev[mon]}", + overwrite=True, + spcdb_dir=spcdb_dir, + ref_met_extra=refareapath, + dev_met_extra=devareapath + ) + + # Create tables in parallel + # Turn off parallelization if n_jobs==1 + if config["options"]["n_cores"] != 1: + results = Parallel(n_jobs=config["options"]["n_cores"])( + delayed(gchp_vs_gchp_mass_table)(mon) \ + for mon in range(bmk_n_months) + ) + else: + for mon in range(bmk_n_months): + results = gchp_vs_gchp_mass_table(mon) + # ================================================================== # GCHP vs GCHP operations budgets tables # ================================================================== @@ -970,15 +1164,13 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): gchp_vs_gchp_refdir, col, all_months_gchp_ref, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["ref"]["gchp"]["is_pre_13.1"], + is_gchp=True )[0] devs = get_filepaths( gchp_vs_gchp_devdir, col, all_months_gchp_dev, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"]["is_pre_13.1"], + is_gchp=True )[0] # Create table @@ -1054,7 +1246,6 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): all_months_dev, is_gchp=True, gchp_res=config["data"]["dev"]["gchp"]["resolution"], - gchp_is_pre_13_1=config["data"]["dev"]["gchp"]["is_pre_13.1"], gchp_is_pre_14_0=config["data"]["dev"]["gchp"]["is_pre_14.0"], )[0] @@ -1068,7 +1259,6 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): bmk_end_dev, is_gchp=True, gchp_res=config["data"]["dev"]["gchp"]["resolution"], - gchp_is_pre_13_1=config["data"]["dev"]["gchp"]["is_pre_13.1"], gchp_is_pre_14_0=config["data"]["dev"]["gchp"]["is_pre_14.0"], ) diff --git a/benchmark/run_benchmark.py b/gcpy/benchmark/run_benchmark.py similarity index 82% rename from benchmark/run_benchmark.py rename to gcpy/benchmark/run_benchmark.py index fc776cc8..42d36418 100755 --- a/benchmark/run_benchmark.py +++ b/gcpy/benchmark/run_benchmark.py @@ -17,11 +17,6 @@ ./run_1mo_benchmark.py -To test gcpy, copy this script and the corresponding yaml config file -anywhere you want to run the test. Set gcpy_test to True at the top -of the script. Benchmark artifacts will be created locally in new folder -called Plots. - Remarks: By default, matplotlib will try to open an X window for plotting. @@ -36,7 +31,7 @@ https://github.com/ipython/ipython/issues/10627 -This script corresponds with GCPy 1.3.2. Edit this version ID if releasing +This script corresponds with GCPy 1.4.0. Edit this version ID if releasing a new version of GCPy. """ @@ -51,12 +46,14 @@ from datetime import datetime import numpy as np from gcpy.util import get_filepath, read_config_file -import gcpy.ste_flux as ste -import gcpy.oh_metrics as oh -import gcpy.benchmark as bmk +from gcpy import ste_flux as ste +from gcpy import oh_metrics as oh +from gcpy import benchmark_funcs as bmk from gcpy.date_time import add_months, is_full_year -from modules.run_1yr_fullchem_benchmark import run_benchmark as run_1yr_benchmark -from modules.run_1yr_tt_benchmark import run_benchmark as run_1yr_tt_benchmark +from gcpy.benchmark.modules.run_1yr_fullchem_benchmark \ + import run_benchmark as run_1yr_benchmark +from gcpy.benchmark.modules.run_1yr_tt_benchmark \ + import run_benchmark as run_1yr_tt_benchmark # Tell matplotlib not to look for an X-window os.environ["QT_QPA_PLATFORM"] = "offscreen" @@ -194,89 +191,63 @@ def run_benchmark_default(config): # Benchmark output directories # ===================================================================== # Results directories - if config["options"]["gcpy_test"]: - mainresultsdir = os.path.join(".", config["paths"]["results_dir"]) - gcc_vs_gcc_resultsdir = os.path.join( - mainresultsdir, - config["options"]["comparisons"]["gcc_vs_gcc"]["dir"] - ) - gchp_vs_gchp_resultsdir = os.path.join( - mainresultsdir, - config["options"]["comparisons"]["gchp_vs_gchp"]["dir"] - ) - gchp_vs_gcc_resultsdir = os.path.join( - mainresultsdir, - "GCHP_GCC_comparison") - diff_of_diffs_resultsdir = os.path.join( - mainresultsdir, - "GCHP_GCC_diff_of_diffs" - ) - if not os.path.exists(mainresultsdir): - os.mkdir(mainresultsdir) - # Make copy of benchmark script in results directory - curfile = os.path.realpath(__file__) - dest = os.path.join(mainresultsdir, curfile.split("/")[-1]) - if not os.path.exists(dest): - copyfile(curfile, dest) - - else: - gcc_vs_gcc_resultsdir = os.path.join( - config["paths"]["main_dir"], - config["data"]["dev"]["gcc"]["dir"], - config["paths"]["results_dir"], - ) - gchp_vs_gchp_resultsdir = os.path.join( - config["paths"]["main_dir"], - config["data"]["dev"]["gchp"]["dir"], - config["paths"]["results_dir"], - config["options"]["comparisons"]["gchp_vs_gchp"]["dir"], - ) - gchp_vs_gcc_resultsdir = os.path.join( - config["paths"]["main_dir"], - config["data"]["dev"]["gchp"]["dir"], - config["paths"]["results_dir"], - config["options"]["comparisons"]["gchp_vs_gcc"]["dir"], - ) - diff_of_diffs_resultsdir = os.path.join( - config["paths"]["main_dir"], - config["data"]["dev"]["gchp"]["dir"], - config["paths"]["results_dir"], - "GCHP_GCC_diff_of_diffs", - ) - base_gchp_resultsdir = os.path.join( - config["paths"]["main_dir"], - config["data"]["dev"]["gchp"]["dir"], - config["paths"]["results_dir"], - ) - - # make results directories that don't exist - for resdir, plotting_type in zip( - [ - gcc_vs_gcc_resultsdir, - base_gchp_resultsdir, - gchp_vs_gchp_resultsdir, - gchp_vs_gcc_resultsdir, - diff_of_diffs_resultsdir, - ], - [ - config["options"]["comparisons"]["gcc_vs_gcc"]["run"], - config["options"]["comparisons"]["gchp_vs_gcc"]["run"] - or config["options"]["comparisons"]["gchp_vs_gchp"]["run"] - or config["options"]["comparisons"]["gchp_vs_gcc_diff_of_diffs"]["run"], - config["options"]["comparisons"]["gchp_vs_gchp"]["run"], - config["options"]["comparisons"]["gchp_vs_gcc"]["run"], - config["options"]["comparisons"]["gchp_vs_gcc_diff_of_diffs"]["run"], - ], - ): - if plotting_type and not os.path.exists(resdir): - os.mkdir(resdir) - if resdir in [gcc_vs_gcc_resultsdir, base_gchp_resultsdir]: - # Make copy of benchmark script in results directory - curfile = os.path.realpath(__file__) - dest = os.path.join(resdir, curfile.split("/")[-1]) - if os.path.exists(dest): - copyfile(curfile, dest) + gcc_vs_gcc_resultsdir = os.path.join( + config["paths"]["main_dir"], + config["data"]["dev"]["gcc"]["dir"], + config["paths"]["results_dir"], + ) + gchp_vs_gchp_resultsdir = os.path.join( + config["paths"]["main_dir"], + config["data"]["dev"]["gchp"]["dir"], + config["paths"]["results_dir"], + config["options"]["comparisons"]["gchp_vs_gchp"]["dir"], + ) + gchp_vs_gcc_resultsdir = os.path.join( + config["paths"]["main_dir"], + config["data"]["dev"]["gchp"]["dir"], + config["paths"]["results_dir"], + config["options"]["comparisons"]["gchp_vs_gcc"]["dir"], + ) + diff_of_diffs_resultsdir = os.path.join( + config["paths"]["main_dir"], + config["data"]["dev"]["gchp"]["dir"], + config["paths"]["results_dir"], + "GCHP_GCC_diff_of_diffs", + ) + base_gchp_resultsdir = os.path.join( + config["paths"]["main_dir"], + config["data"]["dev"]["gchp"]["dir"], + config["paths"]["results_dir"], + ) + # make results directories that don't exist + for resdir, plotting_type in zip( + [ + gcc_vs_gcc_resultsdir, + base_gchp_resultsdir, + gchp_vs_gchp_resultsdir, + gchp_vs_gcc_resultsdir, + diff_of_diffs_resultsdir, + ], + [ + config["options"]["comparisons"]["gcc_vs_gcc"]["run"], + config["options"]["comparisons"]["gchp_vs_gcc"]["run"] + or config["options"]["comparisons"]["gchp_vs_gchp"]["run"] + or config["options"]["comparisons"]["gchp_vs_gcc_diff_of_diffs"]["run"], + config["options"]["comparisons"]["gchp_vs_gchp"]["run"], + config["options"]["comparisons"]["gchp_vs_gcc"]["run"], + config["options"]["comparisons"]["gchp_vs_gcc_diff_of_diffs"]["run"], + ], + ): + if plotting_type and not os.path.exists(resdir): + os.mkdir(resdir) + if resdir in [gcc_vs_gcc_resultsdir, base_gchp_resultsdir]: + # Make copy of benchmark script in results directory + curfile = os.path.realpath(__file__) + dest = os.path.join(resdir, curfile.split("/")[-1]) + if os.path.exists(dest): + copyfile(curfile, dest) + gcc_vs_gcc_tablesdir = os.path.join( gcc_vs_gcc_resultsdir, config["options"]["comparisons"]["gcc_vs_gcc"]["tables_subdir"], @@ -318,16 +289,6 @@ def run_benchmark_default(config): gcc_end_ref_date = np.datetime64(config["data"]["ref"]["gcc"]["bmk_end"]) gchp_end_ref_date = np.datetime64(config["data"]["ref"]["gchp"]["bmk_end"]) - # TODO: remove is_pre_13.1 option with 14.0 release - if config["data"]["ref"]["gchp"]["is_pre_13.1"]: - if add_months(gchp_ref_date, 1) == gchp_end_ref_date: - gchp_ref_date = np.datetime( - config["data"]["ref"]["gchp"]["bmk_start"][0:8] + "16T12:00:00" - ) - else: - print("Error: `is_pre_13.1: True` option only supported for exactly 1 month and 1 year benchmarks") - sys.exit() - # Dev start used in diagnostic filename gcc_dev_date = np.datetime64(config["data"]["dev"]["gcc"]["bmk_start"]) gchp_dev_date = np.datetime64(config["data"]["dev"]["gchp"]["bmk_start"]) @@ -335,29 +296,12 @@ def run_benchmark_default(config): gcc_end_dev_date = np.datetime64(config["data"]["dev"]["gcc"]["bmk_end"]) gchp_end_dev_date = np.datetime64(config["data"]["dev"]["gchp"]["bmk_end"]) - # TODO: remove is_pre_13.1 option with 14.0 release - if config["data"]["dev"]["gchp"]["is_pre_13.1"]: - if add_months(gchp_dev_date, 1) == gchp_end_dev_date: - gchp_dev_date = np.datetime( - config["data"]["dev"]["gchp"]["bmk_start"][0:8] + "16T12:00:00" - ) - else: - print("Error: `is_pre_13.1: True` option only supported for exactly 1 month and 1 year benchmarks") - sys.exit() - - # Seconds per month gcc_ref_sec_diff = (gcc_end_ref_date - gcc_ref_date).astype("float64") gchp_ref_sec_diff = (gchp_end_ref_date - gchp_ref_date).astype("float64") gcc_dev_sec_diff = (gcc_end_dev_date - gcc_dev_date).astype("float64") gchp_dev_sec_diff = (gchp_end_dev_date - gchp_dev_date).astype("float64") - # Double gchp sec/month if mid-point timestamp in filename (legacy format) - if config["data"]["ref"]["gchp"]["is_pre_13.1"]: - gchp_ref_sec_diff = gchp_ref_sec_diff * 2 - if config["data"]["dev"]["gchp"]["is_pre_13.1"]: - gchp_dev_sec_diff = gchp_dev_sec_diff * 2 - # ====================================================================== # Significant difference filenames # ====================================================================== @@ -402,6 +346,8 @@ def run_benchmark_default(config): print(" - Table of emissions totals by spc and inventory") if config["options"]["outputs"]["mass_table"]: print(" - Table of species mass") + if config["options"]["outputs"]["mass_accum_table"]: + print(" - Table of species mass accumulation") if config["options"]["outputs"]["OH_metrics"]: print(" - Table of OH metrics") if config["options"]["outputs"]["ste_table"]: @@ -479,11 +425,12 @@ def run_benchmark_default(config): dst=gcc_vs_gcc_resultsdir, weightsdir=config["paths"]["weights_dir"], plot_by_spc_cat=config["options"]["outputs"]["plot_options"][ - "by_spc_cat" - ], + "by_spc_cat"], + benchmark_type=config["options"]["bmk_type"], overwrite=True, sigdiff_files=gcc_vs_gcc_sigdiff, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # ================================================================== @@ -505,14 +452,14 @@ def run_benchmark_default(config): dst=gcc_vs_gcc_resultsdir, weightsdir=config["paths"]["weights_dir"], plot_by_spc_cat=config["options"]["outputs"]["plot_options"][ - "by_spc_cat" - ], + "by_spc_cat"], plot_by_hco_cat=config["options"]["outputs"]["plot_options"][ - "by_hco_cat" - ], + "by_hco_cat"], + benchmark_type=config["options"]["bmk_type"], overwrite=True, sigdiff_files=gcc_vs_gcc_sigdiff, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # ================================================================== @@ -534,6 +481,7 @@ def run_benchmark_default(config): dst=gcc_vs_gcc_resultsdir, ref_interval=[gcc_ref_sec_diff], dev_interval=[gcc_dev_sec_diff], + benchmark_type=config["options"]["bmk_type"], overwrite=True, spcdb_dir=spcdb_dir, ) @@ -559,6 +507,7 @@ def run_benchmark_default(config): overwrite=True, sigdiff_files=gcc_vs_gcc_sigdiff, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # ================================================================== @@ -582,6 +531,7 @@ def run_benchmark_default(config): overwrite=True, sigdiff_files=gcc_vs_gcc_sigdiff, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # ================================================================== @@ -605,6 +555,41 @@ def run_benchmark_default(config): spcdb_dir=spcdb_dir, ) + # ================================================================== + # GCC vs GCC global mass accumulation tables + # ================================================================== + if config["options"]["outputs"]["mass_accum_table"]: + print("\n%%% Creating GCC vs. GCC mass accumulation tables %%%") + + # Filepaths for start and end restart files + refs = get_filepath(gcc_vs_gcc_refrst, "Restart", gcc_ref_date) + devs = get_filepath(gcc_vs_gcc_devrst, "Restart", gcc_dev_date) + refe = get_filepath(gcc_vs_gcc_refrst, "Restart", gcc_end_ref_date) + deve = get_filepath(gcc_vs_gcc_devrst, "Restart", gcc_end_dev_date) + + # Get period strings + refs_str = np.datetime_as_string(gcc_ref_date, unit="s") + devs_str = np.datetime_as_string(gcc_dev_date, unit="s") + refe_str = np.datetime_as_string(gcc_end_ref_date, unit="s") + deve_str = np.datetime_as_string(gcc_end_dev_date, unit="s") + refperiod = refs_str + ' - ' + refe_str + devperiod = devs_str + ' - ' + deve_str + + # Create tables + bmk.make_benchmark_mass_accumulation_tables( + refs, + refe, + config["data"]["ref"]["gcc"]["version"], + refperiod, + devs, + deve, + config["data"]["dev"]["gcc"]["version"], + devperiod, + overwrite=True, + dst=gcc_vs_gcc_tablesdir, + spcdb_dir=spcdb_dir, + ) + # ================================================================== # GCC vs GCC operation budgets tables # ================================================================== @@ -757,8 +742,7 @@ def run_benchmark_default(config): gchp_vs_gcc_devdir, "StateMet", gchp_dev_date, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"]["is_pre_13.1"], + is_gchp=True ) # Get GCHP grid resolution from met collection file @@ -777,8 +761,7 @@ def run_benchmark_default(config): gchp_vs_gcc_devdir, "SpeciesConc", gchp_dev_date, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"]["is_pre_13.1"], + is_gchp=True ) # Create plots @@ -792,11 +775,12 @@ def run_benchmark_default(config): dst=gchp_vs_gcc_resultsdir, weightsdir=config["paths"]["weights_dir"], plot_by_spc_cat=config["options"]["outputs"]["plot_options"][ - "by_spc_cat" - ], + "by_spc_cat"], + benchmark_type=config["options"]["bmk_type"], overwrite=True, sigdiff_files=gchp_vs_gcc_sigdiff, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # ================================================================== @@ -811,8 +795,7 @@ def run_benchmark_default(config): gchp_vs_gcc_devdir, "Emissions", gchp_dev_date, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"]["is_pre_13.1"], + is_gchp=True ) # Create emissions plots @@ -829,9 +812,11 @@ def run_benchmark_default(config): plot_by_hco_cat=config["options"]["outputs"]["plot_options"][ "by_hco_cat" ], + benchmark_type=config["options"]["bmk_type"], overwrite=True, sigdiff_files=gchp_vs_gcc_sigdiff, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # ================================================================== @@ -846,8 +831,7 @@ def run_benchmark_default(config): gchp_vs_gcc_devdir, "Emissions", gchp_dev_date, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"]["is_pre_13.1"], + is_gchp=True ) # Create plots @@ -859,6 +843,7 @@ def run_benchmark_default(config): dst=gchp_vs_gcc_resultsdir, ref_interval=[gcc_dev_sec_diff], dev_interval=[gchp_dev_sec_diff], + benchmark_type=config["options"]["bmk_type"], overwrite=True, devmet=devmet, spcdb_dir=spcdb_dir, @@ -876,8 +861,7 @@ def run_benchmark_default(config): gchp_vs_gcc_devdir, "JValues", gchp_dev_date, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"]["is_pre_13.1"], + is_gchp=True ) # Create plots @@ -891,6 +875,7 @@ def run_benchmark_default(config): overwrite=True, sigdiff_files=gchp_vs_gcc_sigdiff, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # ================================================================== @@ -905,8 +890,7 @@ def run_benchmark_default(config): gchp_vs_gcc_devdir, "Aerosols", gchp_dev_date, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"]["is_pre_13.1"], + is_gchp=True ) # Create plots @@ -920,6 +904,7 @@ def run_benchmark_default(config): overwrite=True, sigdiff_files=gchp_vs_gcc_sigdiff, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # ================================================================== @@ -954,6 +939,63 @@ def run_benchmark_default(config): spcdb_dir=spcdb_dir, ) + # ================================================================== + # GCHP vs GCC global mass accumulation tables + # ================================================================== + if config["options"]["outputs"]["mass_accum_table"]: + print("\n%%% Creating GCHP vs. GCC mass accumulation tables %%%") + + # Filepaths for start and end restart files + refs = get_filepath( + gchp_vs_gcc_refrst, + "Restart", + gcc_dev_date + ) + devs = get_filepath( + gchp_vs_gcc_devrst, + "Restart", + gchp_dev_date, + is_gchp=True, + gchp_res=config["data"]["dev"]["gchp"]["resolution"], + gchp_is_pre_14_0=config["data"]["dev"]["gchp"]["is_pre_14.0"] + ) + refe = get_filepath( + gchp_vs_gcc_refrst, + "Restart", + gcc_end_dev_date + ) + deve = get_filepath( + gchp_vs_gcc_devrst, + "Restart", + gchp_end_dev_date, + is_gchp=True, + gchp_res=config["data"]["dev"]["gchp"]["resolution"], + gchp_is_pre_14_0=config["data"]["dev"]["gchp"]["is_pre_14.0"] + ) + + # Get period strings + refs_str = np.datetime_as_string(gcc_dev_date, unit="s") + devs_str = np.datetime_as_string(gchp_dev_date, unit="s") + refe_str = np.datetime_as_string(gcc_end_dev_date, unit="s") + deve_str = np.datetime_as_string(gchp_end_dev_date, unit="s") + refperiod = refs_str + ' - ' + refe_str + devperiod = devs_str + ' - ' + deve_str + + # Create tables + bmk.make_benchmark_mass_accumulation_tables( + refs, + refe, + config["data"]["dev"]["gcc"]["version"], + refperiod, + devs, + deve, + config["data"]["dev"]["gchp"]["version"], + devperiod, + overwrite=True, + dst=gchp_vs_gcc_tablesdir, + spcdb_dir=spcdb_dir, + ) + # ================================================================== # GCHP vs GCC operations budgets tables # ================================================================== @@ -966,11 +1008,10 @@ def run_benchmark_default(config): gchp_vs_gcc_devdir, "Budget", gchp_dev_date, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"]["is_pre_13.1"], + is_gchp=True ) - # Create plots + # Create table bmk.make_benchmark_operations_budget( config["data"]["dev"]["gcc"]["version"], ref, @@ -1003,8 +1044,7 @@ def run_benchmark_default(config): gchp_vs_gcc_devdir, "Metrics", gchp_dev_date, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"]["is_pre_13.1"], + is_gchp=True ) # Create table @@ -1097,15 +1137,13 @@ def run_benchmark_default(config): gchp_vs_gchp_refdir, "StateMet", gchp_ref_date, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["ref"]["gchp"]["is_pre_13.1"], + is_gchp=True ) devmet = get_filepath( gchp_vs_gchp_devdir, "StateMet", gchp_dev_date, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"]["is_pre_13.1"], + is_gchp=True ) # Get GCHP grid resolutions from met collection file @@ -1125,15 +1163,13 @@ def run_benchmark_default(config): gchp_vs_gchp_refdir, "SpeciesConc", gchp_ref_date, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["ref"]["gchp"]["is_pre_13.1"], + is_gchp=True ) dev = get_filepath( gchp_vs_gchp_devdir, "SpeciesConc", gchp_dev_date, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"]["is_pre_13.1"], + is_gchp=True ) # Create plots @@ -1147,11 +1183,12 @@ def run_benchmark_default(config): dst=gchp_vs_gchp_resultsdir, weightsdir=config["paths"]["weights_dir"], plot_by_spc_cat=config["options"]["outputs"]["plot_options"][ - "by_spc_cat" - ], + "by_spc_cat"], + benchmark_type=config["options"]["bmk_type"], overwrite=True, sigdiff_files=gchp_vs_gchp_sigdiff, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # ================================================================== @@ -1165,15 +1202,13 @@ def run_benchmark_default(config): gchp_vs_gchp_refdir, "Emissions", gchp_ref_date, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["ref"]["gchp"]["is_pre_13.1"], + is_gchp=True ) dev = get_filepath( gchp_vs_gchp_devdir, "Emissions", gchp_dev_date, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"]["is_pre_13.1"], + is_gchp=True ) # Create plots @@ -1185,14 +1220,14 @@ def run_benchmark_default(config): dst=gchp_vs_gchp_resultsdir, weightsdir=config["paths"]["weights_dir"], plot_by_spc_cat=config["options"]["outputs"]["plot_options"][ - "by_spc_cat" - ], + "by_spc_cat"], plot_by_hco_cat=config["options"]["outputs"]["plot_options"][ - "by_hco_cat" - ], + "by_hco_cat"], + benchmark_type=config["options"]["bmk_type"], overwrite=True, sigdiff_files=gchp_vs_gchp_sigdiff, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # ================================================================== @@ -1206,15 +1241,13 @@ def run_benchmark_default(config): gchp_vs_gchp_refdir, "Emissions", gchp_ref_date, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["ref"]["gchp"]["is_pre_13.1"], + is_gchp=True ) dev = get_filepath( gchp_vs_gchp_devdir, "Emissions", gchp_dev_date, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["ref"]["gchp"]["is_pre_13.1"], + is_gchp=True ) # Create tables @@ -1226,6 +1259,7 @@ def run_benchmark_default(config): dst=gchp_vs_gchp_resultsdir, ref_interval=[gchp_ref_sec_diff], dev_interval=[gchp_dev_sec_diff], + benchmark_type=config["options"]["bmk_type"], overwrite=True, refmet=refmet, devmet=devmet, @@ -1243,15 +1277,13 @@ def run_benchmark_default(config): gchp_vs_gchp_refdir, "JValues", gchp_ref_date, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["ref"]["gchp"]["is_pre_13.1"], + is_gchp=True ) dev = get_filepath( gchp_vs_gchp_devdir, "JValues", gchp_dev_date, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"]["is_pre_13.1"], + is_gchp=True ) # Create plots @@ -1265,6 +1297,7 @@ def run_benchmark_default(config): overwrite=True, sigdiff_files=gchp_vs_gchp_sigdiff, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # ================================================================== @@ -1278,15 +1311,13 @@ def run_benchmark_default(config): gchp_vs_gchp_refdir, "Aerosols", gchp_ref_date, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["ref"]["gchp"]["is_pre_13.1"], + is_gchp=True ) dev = get_filepath( gchp_vs_gchp_devdir, "Aerosols", gchp_dev_date, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"]["is_pre_13.1"], + is_gchp=True ) # Create plots @@ -1300,6 +1331,7 @@ def run_benchmark_default(config): overwrite=True, sigdiff_files=gchp_vs_gchp_sigdiff, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) # ================================================================== @@ -1337,6 +1369,69 @@ def run_benchmark_default(config): spcdb_dir=spcdb_dir, ) + # ================================================================== + # GCHP vs GCHP global mass accumulation tables + # ================================================================== + if config["options"]["outputs"]["mass_accum_table"]: + print("\n%%% Creating GCHP vs. GCHP mass accumulation tables %%%") + + # Filepaths for start and end restart files + refs = get_filepath( + gchp_vs_gchp_refrst, + "Restart", + gchp_ref_date, + is_gchp=True, + gchp_res=config["data"]["ref"]["gchp"]["resolution"], + gchp_is_pre_14_0=config["data"]["ref"]["gchp"]["is_pre_14.0"] + ) + devs = get_filepath( + gchp_vs_gchp_devrst, + "Restart", + gchp_dev_date, + is_gchp=True, + gchp_res=config["data"]["dev"]["gchp"]["resolution"], + gchp_is_pre_14_0=config["data"]["dev"]["gchp"]["is_pre_14.0"] + ) + refe = get_filepath( + gchp_vs_gchp_refrst, + "Restart", + gchp_end_ref_date, + is_gchp=True, + gchp_res=config["data"]["ref"]["gchp"]["resolution"], + gchp_is_pre_14_0=config["data"]["ref"]["gchp"]["is_pre_14.0"] + ) + deve = get_filepath( + gchp_vs_gchp_devrst, + "Restart", + gchp_end_dev_date, + is_gchp=True, + gchp_res=config["data"]["dev"]["gchp"]["resolution"], + gchp_is_pre_14_0=config["data"]["dev"]["gchp"]["is_pre_14.0"] + ) + + # Get period strings + refs_str = np.datetime_as_string(gchp_ref_date, unit="s") + devs_str = np.datetime_as_string(gchp_dev_date, unit="s") + refe_str = np.datetime_as_string(gchp_end_ref_date, unit="s") + deve_str = np.datetime_as_string(gchp_end_dev_date, unit="s") + refperiod = refs_str + ' - ' + refe_str + devperiod = devs_str + ' - ' + deve_str + + # Create tables + bmk.make_benchmark_mass_accumulation_tables( + refs, + refe, + config["data"]["ref"]["gchp"]["version"], + refperiod, + devs, + deve, + config["data"]["dev"]["gchp"]["version"], + devperiod, + overwrite=True, + dst=gchp_vs_gchp_tablesdir, + spcdb_dir=spcdb_dir, + ) + # ================================================================== # GCHP vs GCHP operations budgets tables # ================================================================== @@ -1348,15 +1443,13 @@ def run_benchmark_default(config): gchp_vs_gchp_refdir, "Budget", gchp_ref_date, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["ref"]["gchp"]["is_pre_13.1"], + is_gchp=True ) dev = get_filepath( gchp_vs_gchp_devdir, "Budget", gchp_dev_date, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"]["is_pre_13.1"], + is_gchp=True ) # Create tables @@ -1391,15 +1484,13 @@ def run_benchmark_default(config): gchp_vs_gchp_refdir, "Metrics", gchp_ref_date, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["ref"]["gchp"]["is_pre_13.1"], + is_gchp=True ) dev = get_filepath( gchp_vs_gchp_devdir, "Metrics", gchp_dev_date, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"]["is_pre_13.1"], + is_gchp=True ) # Create table @@ -1466,15 +1557,13 @@ def run_benchmark_default(config): gchp_vs_gchp_refdir, "SpeciesConc", gchp_ref_date, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["ref"]["gchp"]["is_pre_13.1"], + is_gchp=True ) gchp_dev = get_filepath( gchp_vs_gchp_devdir, "SpeciesConc", gchp_dev_date, - is_gchp=True, - gchp_is_pre_13_1=config["data"]["dev"]["gchp"]["is_pre_13.1"], + is_gchp=True ) # Create diff-of-diff plots for species concentrations @@ -1486,12 +1575,14 @@ def run_benchmark_default(config): diff_of_diffs_devstr, dst=diff_of_diffs_resultsdir, weightsdir=config["paths"]["weights_dir"], + benchmark_type=config["options"]["bmk_type"], overwrite=True, use_cmap_RdBu=True, second_ref=gcc_dev, second_dev=gchp_dev, cats_in_ugm3=None, spcdb_dir=spcdb_dir, + n_job=config["options"]["n_cores"] ) @@ -1501,15 +1592,15 @@ def run_benchmark_default(config): print("\n%%%% All requested benchmark plots/tables created! %%%%") -def main(): +def main(argv): """ Driver program. Determines which benchmark script script to call for 1-hour, 1-day, 1-month, or 1-year benchmarks. """ - config_filename = sys.argv[1] if len(sys.argv) == 2 else "1mo_benchmark.yml" + config_filename = argv[1] if len(argv) == 2 else "1mo_benchmark.yml" config = read_config_file(config_filename) choose_benchmark_type(config) if __name__ == "__main__": - main() + main(sys.argv) diff --git a/gcpy/benchmark_categories.yml b/gcpy/benchmark_categories.yml index 70626853..73dd3e49 100644 --- a/gcpy/benchmark_categories.yml +++ b/gcpy/benchmark_categories.yml @@ -197,6 +197,7 @@ FullChemBenchmark: Nitrates: - ISOPN Other: + - FURA - GLYC - GLYX - HCOOH @@ -220,22 +221,26 @@ TransportTracersBenchmark: RnPbBeTracers: - Rn222 - Pb210 - - Pb210Strat + - Pb210s - Be7 - - Be7Strat + - Be7s - Be10 - - Be10Strat - PassiveTracers: - PassiveTracers: + - Be10s + TransportTracers: + TransportTracers: - PassiveTracer - - SF6Tracer - - CH3ITracer - - COAnthroEmis25dayTracer - - COAnthroEmis50dayTracer - - COUniformEmis25dayTracer - - GlobEmis90dayTracer - - NHEmis90dayTracer - - SHEmis90dayTracer + - SF6 + - CH3I + - aoa + - aoa_bl + - aoa_nh + - CO_25 + - CO_50 + - e90 + - e90_n + - e90_s + - st80_25 + - stOX WetLossConv: WetLossConv: - Pb210 diff --git a/gcpy/benchmark.py b/gcpy/benchmark_funcs.py similarity index 83% rename from gcpy/benchmark.py rename to gcpy/benchmark_funcs.py index 20e162ef..e6fe88c6 100644 --- a/gcpy/benchmark.py +++ b/gcpy/benchmark_funcs.py @@ -1,7 +1,6 @@ """ Specific utilities for creating plots from GEOS-Chem benchmark simulations. """ - import os import warnings import itertools @@ -15,22 +14,20 @@ from joblib import Parallel, delayed from tabulate import tabulate from gcpy import util -from gcpy.plot import compare_single_level, compare_zonal_mean from gcpy.regrid import create_regridders from gcpy.grid import get_troposphere_mask from gcpy.units import convert_units -import gcpy.constants as gcon - -# Save warnings format to undo overwriting built into PyPDF2 -warning_format = warnings.showwarning +from gcpy.constants import COL_WIDTH, MW_AIR_g, skip_these_vars, TABLE_WIDTH +from gcpy.plot.compare_single_level import compare_single_level +from gcpy.plot.compare_zonal_mean import compare_zonal_mean # Suppress numpy divide by zero warnings to prevent output spam np.seterr(divide="ignore", invalid="ignore") # YAML files -aod_spc = "aod_species.yml" -emission_spc = "emission_species.yml" -emission_inv = "emission_inventories.yml" +AOD_SPC = "aod_species.yml" +EMISSION_SPC = "emission_species.yml" +EMISSION_INV = "emission_inventories.yml" def create_total_emissions_table( @@ -122,12 +119,8 @@ def create_total_emissions_table( # ================================================================== # Initialization # ================================================================== - - # Make sure refdata and devdata are both xarray Dataset objects - if not isinstance(refdata, xr.Dataset): - raise TypeError("The refdata argument must be an xarray Dataset!") - if not isinstance(devdata, xr.Dataset): - raise TypeError("The devdata argument must be an xarray Dataset!") + util.verify_variable_type(refdata, xr.Dataset) + util.verify_variable_type(devdata, xr.Dataset) # Get ref area [m2] if "AREA" in refdata.data_vars.keys(): @@ -155,7 +148,7 @@ def create_total_emissions_table( # this benchmark.py file is found. properties = util.read_config_file( os.path.join( - spcdb_dir, + spcdb_dir, "species_database.yml" ), quiet=True @@ -200,7 +193,7 @@ def create_total_emissions_table( for species_name, target_units in species.items(): # Get a list of emission variable names for each species - diagnostic_template = f"{species_name}" + diagnostic_template = template.replace("{}", species_name) varnames = util.get_emissions_varnames(cvars, diagnostic_template) # Also add variables that might be in either Ref or Dev @@ -244,12 +237,12 @@ def create_total_emissions_table( title3 = f"### Dev = {devstr}" # Print header to file - print("#" * 89, file=f) - print(f"{title1 : <86}{'###'}", file=f) - print(f"{title2 : <86}{'###'}", file=f) - print(f"{title3 : <86}{'###'}", file=f) - print("#" * 89, file=f) - print(f"{'' : <19}{'Ref' : >20}{'Dev' : >20}{'Dev - Ref' : >14}{'% diff' : >10} {'diffs'}", file=f) + print("#" * TABLE_WIDTH, file=f) + print(f"{title1 : <{TABLE_WIDTH-3}}{'###'}", file=f) + print(f"{title2 : <{TABLE_WIDTH-3}}{'###'}", file=f) + print(f"{title3 : <{TABLE_WIDTH-3}}{'###'}", file=f) + print("#" * TABLE_WIDTH, file=f) + print(f"{'' : <{COL_WIDTH-1}}{'Ref' : >{COL_WIDTH}}{'Dev' : >{COL_WIDTH}}{'Dev - Ref' : >{COL_WIDTH}}{'% diff' : >{COL_WIDTH}} {'diffs'}", file=f) # ============================================================= # Loop over all emissions variables corresponding to this @@ -361,7 +354,7 @@ def create_total_emissions_table( refstr, devstr, diff_list), - width=90 + width=TABLE_WIDTH ) def create_global_mass_table( @@ -431,12 +424,8 @@ def create_global_mass_table( # ================================================================== # Initialization # ================================================================== - - # Make sure refdata and devdata are xarray Dataset objects - if not isinstance(refdata, xr.Dataset): - raise TypeError("The refdata argument must be an xarray Dataset!") - if not isinstance(devdata, xr.Dataset): - raise TypeError("The devdata argument must be an xarray Dataset!") + util.verify_variable_type(refdata, xr.Dataset) + util.verify_variable_type(devdata, xr.Dataset) # Make sure required arguments are passed if varlist is None: @@ -480,17 +469,17 @@ def create_global_mass_table( placeholder = "@%% insert diff status here %%@" # Print header to file - print("#" * 89, file=f) - print(f"{title1 : <86}{'###'}", file=f) - print(f"{'###' : <86}{'###'}", file=f) - print(f"{title2 : <86}{'###'}", file=f) - print(f"{title3 : <86}{'###'}", file=f) - print(f"{'###' : <86}{'###'}", file=f) + print("#" * TABLE_WIDTH, file=f) + print(f"{title1 : <{TABLE_WIDTH-3}}{'###'}", file=f) + print(f"{'###' : <{TABLE_WIDTH-3}}{'###'}", file=f) + print(f"{title2 : <{TABLE_WIDTH-3}}{'###'}", file=f) + print(f"{title3 : <{TABLE_WIDTH-3}}{'###'}", file=f) + print(f"{'###' : <{TABLE_WIDTH-3}}{'###'}", file=f) print(f"{placeholder}", file=f) - print("#" * 89, file=f) + print("#" * TABLE_WIDTH, file=f) # Column headers - print(f"{'' : <19}{'Ref' : >20}{'Dev' : >20}{'Dev - Ref' : >14}{'% diff' : >10} {'diffs'}", file=f) + print(f"{'' : <{COL_WIDTH-1}}{'Ref' : >{COL_WIDTH}}{'Dev' : >{COL_WIDTH}}{'Dev - Ref' : >{COL_WIDTH}}{'% diff' : >{COL_WIDTH}} {'diffs'}", file=f) # ================================================================== # Print global masses for all species @@ -592,7 +581,289 @@ def create_global_mass_table( diff_list, fancy_format=True ), - width=100 # Force it not to wrap + width=TABLE_WIDTH + ) + + +def create_mass_accumulation_table( + refdatastart, + refdataend, + refstr, + refperiodstr, + devdatastart, + devdataend, + devstr, + devperiodstr, + varlist, + met_and_masks, + label, + trop_only=False, + outfilename="GlobalMassAccum_TropStrat.txt", + verbose=False, + spcdb_dir=os.path.dirname(__file__) +): + """ + Creates a table of global mass accumulation for a list of species in + two data sets. The data sets, which typically represent output from two + different model versions, are usually contained in netCDF data files. + + Args: + refdatastart: xarray Dataset + The first data set to be compared (aka "Reference"). + refdataend: xarray Dataset + The first data set to be compared (aka "Reference"). + refstr: str + A string that can be used to identify refdata + (e.g. a model version number or other identifier). + refperiodstr: str + Ref simulation period start and end + devdatastart: xarray Dataset + The second data set to be compared (aka "Development"). + devdataend: xarray Dataset + The second data set to be compared (aka "Development"). + devstr: str + A string that can be used to identify the data set specified + by devfile (e.g. a model version number or other identifier). + devperiodstr: str + Ref simulation period start and end + varlist: list of strings + List of species concentation variable names to include + in the list of global totals. + met_and_masks: dict of xarray DataArray + Dictionary containing the meterological variables and + masks for the Ref and Dev datasets. + label: str + Label to go in the header string. Can be used to + pass the month & year. + + Keyword Args (optional): + trop_only: bool + Set this switch to True if you wish to print totals + only for the troposphere. + Default value: False (i.e. print whole-atmosphere totals). + outfilename: str + Name of the text file which will contain the table of + emissions totals. + Default value: "GlobalMass_TropStrat.txt" + verbose: bool + Set this switch to True if you wish to print out extra + informational messages. + Default value: False + spcdb_dir: str + Directory of species_datbase.yml file + Default value: Directory of GCPy code repository + + Remarks: + This method is mainly intended for model benchmarking purposes, + rather than as a general-purpose tool. + + Species properties (such as molecular weights) are read from a + YAML file called "species_database.yml". + """ + + # ================================================================== + # Initialization + # ================================================================== + util.verify_variable_type(refdatastart, xr.Dataset) + util.verify_variable_type(refdataend, xr.Dataset) + util.verify_variable_type(devdatastart, xr.Dataset) + util.verify_variable_type(devdataend, xr.Dataset) + + # Make sure required arguments are passed + if varlist is None: + raise ValueError('The "varlist" argument was not passed!') + if met_and_masks is None: + raise ValueError('The "met_and_masks" argument was not passed!') + + # Load a YAML file containing species properties (such as + # molecular weights), which we will need for unit conversions. + # This is located in the "data" subfolder of this current directory.2 + properties = util.read_config_file( + os.path.join( + spcdb_dir, + "species_database.yml" + ), + quiet=True + ) + + # ================================================================== + # Open file for output + # ================================================================== + + # Create file + try: + f = open(outfilename, "w") + except (IOError, OSError, FileNotFoundError) as e: + raise e(f"Could not open {outfilename} for writing!") from e + + # Define a list for differences + diff_list = [] + + # Title strings + title1 = f"### Global mass accumulation (Gg) {label} (Trop + Strat)" + if trop_only: + title1 = f"### Global mass accumulation (Gg) {label} (Trop only)" + title2 = f"### Computed as change in instantaneous mass across period" + title3 = f"### Ref = {refstr}" + title4 = f"### Dev = {devstr}" + title5 = f"### Ref period: {refperiodstr}" + title6 = f"### Dev period: {devperiodstr}" + + # Write a placeholder to the file that denotes where + # the list of species with differences will be written + placeholder = "@%% insert diff status here %%@" + + # Print header to file + print("#" * TABLE_WIDTH, file=f) + print(f"{title1 : <{TABLE_WIDTH-3}}{'###'}", file=f) + print(f"{'###' : <{TABLE_WIDTH-3}}{'###'}", file=f) + print(f"{title2 : <{TABLE_WIDTH-3}}{'###'}", file=f) + print(f"{'###' : <{TABLE_WIDTH-3}}{'###'}", file=f) + print(f"{title3 : <{TABLE_WIDTH-3}}{'###'}", file=f) + print(f"{title4 : <{TABLE_WIDTH-3}}{'###'}", file=f) + print(f"{'###' : <{TABLE_WIDTH-3}}{'###'}", file=f) + print(f"{title5 : <{TABLE_WIDTH-3}}{'###'}", file=f) + print(f"{title6 : <{TABLE_WIDTH-3}}{'###'}", file=f) + print(f"{'###' : <{TABLE_WIDTH-3}}{'###'}", file=f) + print(f"{placeholder}", file=f) + print("#" * TABLE_WIDTH, file=f) + + # Column headers + print(f"{'' : <{COL_WIDTH-1}}{'Ref' : >{COL_WIDTH}}{'Dev' : >{COL_WIDTH}}{'Dev - Ref' : >{COL_WIDTH}}{'% diff' : >{COL_WIDTH}} {'diffs'}", file=f) + + # ================================================================== + # Print global masses for all species + # + # NOTE: By this point, all secies will be in both Ref and Dev' + # because we have added them in the calling routine + # ================================================================== + for v in varlist: + + # Get the species name + spc_name = v.split("_")[1] + + # Get a list of properties for the given species + species_properties = properties.get(spc_name) + + # If no properties are found, then skip to next species + if species_properties is None: + if verbose: + msg = f"No properties found for {spc_name} ... skippping" + print(msg) + continue + + # Specify target units + target_units = "Gg" + mol_wt_g = species_properties.get("MW_g") + if mol_wt_g is None: + if verbose: + msg = \ + f"No molecular weight found for {spc_name} ... skippping" + print(msg) + continue + + # ============================================================== + # Convert units of Ref and save to a DataArray + # (or skip if Ref contains NaNs everywhere) + # ============================================================== + refarrays = refdatastart[v] + if not np.isnan(refdatastart[v].values).all(): + refarrays = convert_units( + refarrays, + spc_name, + species_properties, + target_units, + area_m2=met_and_masks["Refs_Area"], + delta_p=met_and_masks["Refs_Delta_P"], + box_height=met_and_masks["Refs_BxHeight"], + ) + + refarraye = refdataend[v] + if not np.isnan(refdataend[v].values).all(): + refarraye = convert_units( + refarraye, + spc_name, + species_properties, + target_units, + area_m2=met_and_masks["Refe_Area"], + delta_p=met_and_masks["Refe_Delta_P"], + box_height=met_and_masks["Refe_BxHeight"], + ) + + refarray = refarrays + refarray.values = refarraye.values - refarrays.values + + # ============================================================== + # Convert units of Dev and save to a DataArray + # (or skip if Dev contains NaNs everywhere) + # ============================================================== + devarrays = devdatastart[v] + if not np.isnan(devdatastart[v].values).all(): + devarrays = convert_units( + devarrays, + spc_name, + species_properties, + target_units, + area_m2=met_and_masks["Devs_Area"], + delta_p=met_and_masks["Devs_Delta_P"], + box_height=met_and_masks["Devs_BxHeight"], + ) + #print('devarrays: {}'.format(devarrays.values)) + + devarraye = devdataend[v] + if not np.isnan(devdataend[v].values).all(): + devarraye = convert_units( + devarraye, + spc_name, + species_properties, + target_units, + area_m2=met_and_masks["Deve_Area"], + delta_p=met_and_masks["Deve_Delta_P"], + box_height=met_and_masks["Deve_BxHeight"], + ) + + devarray = devarrays + devarray.values = devarraye.values - devarrays.values + + # ============================================================== + # Print global masses for Ref and Dev + # (we will mask out tropospheric boxes in util.print_totals) + # ============================================================== + # ewl: for now trop_only is always false for accumulation table + if trop_only: + util.print_totals( + refarray, + devarray, + f, + diff_list, + masks=met_and_masks + ) + else: + util.print_totals( + refarray, + devarray, + f, + diff_list + ) + + # ================================================================== + # Cleanup and quit + # ================================================================== + + # Close file + f.close() + + # Reopen file and replace placeholder text by diff_text + util.insert_text_into_file( + filename=outfilename, + search_text=placeholder, + replace_text=diff_list_to_text( + refstr, + devstr, + diff_list, + fancy_format=True + ), + width=TABLE_WIDTH ) @@ -755,8 +1026,8 @@ def make_benchmark_conc_plots( reader = util.dataset_reader(time_mean, verbose=verbose) # Open datasets - refds = reader(ref, drop_variables=gcon.skip_these_vars).load() - devds = reader(dev, drop_variables=gcon.skip_these_vars).load() + refds = reader(ref, drop_variables=skip_these_vars).load() + devds = reader(dev, drop_variables=skip_these_vars).load() # Rename SpeciesConc_ to SpeciesConcVV_ for consistency with new # naming introduced in GEOS-Chem 14.1.0 @@ -787,21 +1058,21 @@ def make_benchmark_conc_plots( refmetds = None devmetds = None if refmet: - refmetds = reader(refmet, drop_variables=gcon.skip_these_vars).load() + refmetds = reader(refmet, drop_variables=skip_these_vars).load() if devmet: - devmetds = reader(devmet, drop_variables=gcon.skip_these_vars).load() + devmetds = reader(devmet, drop_variables=skip_these_vars).load() # Determine if doing diff-of-diffs diff_of_diffs = False if second_ref is not None and second_dev is not None: diff_of_diffs = True - + # Open second datasets if passed as arguments (used for diff of diffs) # Regrid to same horz grid resolution if two refs or two devs do not match. if diff_of_diffs: - second_refds = reader(second_ref, drop_variables=gcon.skip_these_vars).load() - second_devds = reader(second_dev, drop_variables=gcon.skip_these_vars).load() + second_refds = reader(second_ref, drop_variables=skip_these_vars).load() + second_devds = reader(second_dev, drop_variables=skip_these_vars).load() print('\nPrinting second_refds (dev of ref for diff-of-diffs)\n') print(second_refds) @@ -1223,13 +1494,18 @@ def createplots(filecat): ) return {filecat: cat_diff_dict} + # -------------------------------------------- # Create the plots in parallel - results = Parallel(n_jobs=n_job)( - delayed(createplots)(filecat) for _, filecat in enumerate(catdict) - ) -# # Do not create plots in parallel -# for _, filecat in enumerate(catdict): -# createplots(filecat) + # Turn off parallelization if n_job=1 + if n_job != 1: + results = Parallel(n_jobs=n_job)( + delayed(createplots)(filecat) + for _, filecat in enumerate(catdict) + ) + else: + for _, filecat in enumerate(catdict): + results = createplots(filecat) + # -------------------------------------------- dict_sfc = {list(result.keys())[0]: result[list( result.keys())[0]]['sfc'] for result in results} @@ -1430,13 +1706,13 @@ def make_benchmark_emis_plots( # Ref dataset try: - refds = reader(ref, drop_variables=gcon.skip_these_vars) + refds = reader(ref, drop_variables=skip_these_vars) except (OSError, IOError, FileNotFoundError) as e: raise e(f"Could not find Ref file: {ref}") from e # Dev dataset try: - devds = reader(dev, drop_variables=gcon.skip_these_vars) + devds = reader(dev, drop_variables=skip_these_vars) except (OSError, IOError, FileNotFoundError) as e: raise e(f"Could not find Ref file: {dev}") from e @@ -1583,8 +1859,18 @@ def createfile_hco_cat(c): diff_dict[c] = diff_emis return diff_dict - results = Parallel(n_jobs=n_job)(delayed(createfile_hco_cat)(c) - for c in emis_cats) + # --------------------------------------- + # Create plots in parallel + # Turn off parallelization if n_job=1 + if n_job != 1: + results = Parallel(n_jobs=n_job)( + delayed(createfile_hco_cat)(c) + for c in emis_cats + ) + else: + for c in emis_cats: + results = createfile_hco_cat(c) + # --------------------------------------- dict_emis = {list(result.keys())[0]: result[list(result.keys())[0]] for result in results} @@ -1685,10 +1971,19 @@ def createfile_bench_cat(filecat): util.add_nested_bookmarks_to_pdf( pdfname, filecat, emisdict, warninglist) return catspc - results = Parallel(n_jobs=n_job)( - delayed(createfile_bench_cat)(filecat) - for i, filecat in enumerate(catdict) - ) + + #------------------------------------------------ + # Create plots in parallel + # Turn of parallalization if n_job=1 + if n_job != 1: + results = Parallel(n_jobs=n_job)( + delayed(createfile_bench_cat)(filecat) + for _, filecat in enumerate(catdict) + ) + else: + for _, filecat in enumerate(catdict): + results = createfile_bench_cat(filecat) + #------------------------------------------------ allcatspc = [spc for result in results for spc in result] # Give warning if emissions species is not assigned a benchmark @@ -1803,27 +2098,27 @@ def make_benchmark_emis_tables( devmetds = None if LooseVersion(xr.__version__) < LooseVersion("0.15.0"): - refds = xr.open_mfdataset(reflist, drop_variables=gcon.skip_these_vars) - devds = xr.open_mfdataset(devlist, drop_variables=gcon.skip_these_vars) + refds = xr.open_mfdataset(reflist, drop_variables=skip_these_vars) + devds = xr.open_mfdataset(devlist, drop_variables=skip_these_vars) if refmet is not None: refmetds = xr.open_mfdataset( - refmet, drop_variables=gcon.skip_these_vars) + refmet, drop_variables=skip_these_vars) if devmet is not None: devmetds = xr.open_mfdataset( - devmet, drop_variables=gcon.skip_these_vars) + devmet, drop_variables=skip_these_vars) else: # , combine="nested", concat_dim="time") - refds = xr.open_mfdataset(reflist, drop_variables=gcon.skip_these_vars) + refds = xr.open_mfdataset(reflist, drop_variables=skip_these_vars) # , combine="nested", concat_dim="time") - devds = xr.open_mfdataset(devlist, drop_variables=gcon.skip_these_vars) + devds = xr.open_mfdataset(devlist, drop_variables=skip_these_vars) if refmet is not None: # , combine="nested", concat_dim="time") refmetds = xr.open_mfdataset( - refmet, drop_variables=gcon.skip_these_vars) + refmet, drop_variables=skip_these_vars) if devmet is not None: # , combine="nested", concat_dim="time") devmetds = xr.open_mfdataset( - devmet, drop_variables=gcon.skip_these_vars) + devmet, drop_variables=skip_these_vars) # ================================================================== # Create table of emissions @@ -1833,7 +2128,7 @@ def make_benchmark_emis_tables( spc_dict = util.read_config_file( os.path.join( os.path.dirname(__file__), - emission_spc + EMISSION_SPC ), quiet=True ) @@ -1841,7 +2136,7 @@ def make_benchmark_emis_tables( inv_dict = util.read_config_file( os.path.join( os.path.dirname(__file__), - emission_inv + EMISSION_INV ), quiet=True ) @@ -2032,13 +2327,13 @@ def make_benchmark_jvalue_plots( # Ref dataset try: - refds = reader(ref, drop_variables=gcon.skip_these_vars) + refds = reader(ref, drop_variables=skip_these_vars) except (OSError, IOError, FileNotFoundError) as e: raise e(f"Could not find Ref file: {ref}") from e # Dev dataset try: - devds = reader(dev, drop_variables=gcon.skip_these_vars) + devds = reader(dev, drop_variables=skip_these_vars) except (OSError, IOError, FileNotFoundError) as e: raise e(f"Could not find Ref file: {dev}") from e @@ -2411,13 +2706,13 @@ def make_benchmark_aod_plots( # Read the Ref dataset try: - refds = reader(ref, drop_variables=gcon.skip_these_vars) + refds = reader(ref, drop_variables=skip_these_vars) except (OSError, IOError, FileNotFoundError) as e: raise e(f"Could not find Ref file: {ref}") from e # Read the Dev dataset try: - devds = reader(dev, drop_variables=gcon.skip_these_vars) + devds = reader(dev, drop_variables=skip_these_vars) except (OSError, IOError, FileNotFoundError) as e: raise e(f"Could not find Ref file: {dev}") from e @@ -2468,7 +2763,7 @@ def make_benchmark_aod_plots( newvars = util.read_config_file( os.path.join( os.path.dirname(__file__), - aod_spc + AOD_SPC ), quiet=True ) @@ -2710,20 +3005,18 @@ def make_benchmark_mass_tables( # Read data with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=xr.SerializationWarning) - refds = xr.open_dataset(ref, drop_variables=gcon.skip_these_vars) - devds = xr.open_dataset(dev, drop_variables=gcon.skip_these_vars) + refds = xr.open_dataset(ref, drop_variables=skip_these_vars) + devds = xr.open_dataset(dev, drop_variables=skip_these_vars) # ================================================================== # Update GCHP restart dataset (if any) # ================================================================== - # Ref - if any(v.startswith("SPC_") for v in refds.data_vars.keys()): - refds = util.rename_and_flip_gchp_rst_vars(refds) - - # Dev - if any(v.startswith("SPC_") for v in devds.data_vars.keys()): - devds = util.rename_and_flip_gchp_rst_vars(devds) + # If the data is from a GCHP restart file, rename variables and + # flip levels to match the GEOS-Chem Classic naming and level + # conventions. Otherwise no changes will be made. + refds = util.rename_and_flip_gchp_rst_vars(refds) + devds = util.rename_and_flip_gchp_rst_vars(devds) # ================================================================== # Make sure that all necessary meteorological variables are found @@ -2738,7 +3031,7 @@ def make_benchmark_mass_tables( ref_area = util.get_area_from_dataset( xr.open_dataset( ref_met_extra, - drop_variables=gcon.skip_these_vars + drop_variables=skip_these_vars ) ) @@ -2749,7 +3042,7 @@ def make_benchmark_mass_tables( dev_area = util.get_area_from_dataset( xr.open_dataset( dev_met_extra, - drop_variables=gcon.skip_these_vars + drop_variables=skip_these_vars ) ) @@ -2877,6 +3170,304 @@ def make_benchmark_mass_tables( gc.collect() +def make_benchmark_mass_accumulation_tables( + ref_start, + ref_end, + refstr, + refperiodstr, + dev_start, + dev_end, + devstr, + devperiodstr, + varlist=None, + dst="./benchmark", + subdst=None, + overwrite=False, + verbose=False, + label="at end of simulation", + spcdb_dir=os.path.dirname(__file__), +): + """ + Creates a text file containing global mass totals by species and + category for benchmarking purposes. + + Args: + ref_start: list of str + Pathname that will constitute + the "Ref" (aka "Reference") data set. + ref_end: list of str + Pathname that will constitute + the "Ref" (aka "Reference") data set. + refstr: str + A string to describe ref (e.g. version number) + refperiodstr: str + Ref simulation period start and end + dev_start: list of str + Pathname that will constitute + the "Dev" (aka "Development") data set. The "Dev" + data set will be compared against the "Ref" data set. + dev_end: list of str + Pathname that will constitute + the "Dev" (aka "Development") data set. The "Dev" + data set will be compared against the "Ref" data set. + devstr: str + A string to describe dev (e.g. version number) + devperiodstr: str + Dev simulation period start and end + + Keyword Args (optional): + varlist: list of str + List of variables to include in the list of totals. + If omitted, then all variables that are found in either + "Ref" or "Dev" will be included. The varlist argument + can be a useful way of reducing the number of + variables during debugging and testing. + Default value: None + dst: str + A string denoting the destination folder where the file + containing emissions totals will be written. + Default value: ./benchmark + subdst: str + A string denoting the sub-directory of dst where PDF + files containing plots will be written. In practice, + subdst is only needed for the 1-year benchmark output, + and denotes a date string (such as "Jan2016") that + corresponds to the month that is being plotted. + Default value: None + overwrite: bool + Set this flag to True to overwrite files in the + destination folder (specified by the dst argument). + Default value: False + verbose: bool + Set this flag to True to print extra informational output. + Default value: False. + spcdb_dir: str + Directory of species_datbase.yml file + Default value: Directory of GCPy code repository + """ + + # ================================================================== + # Define destination directory + # ================================================================== + if os.path.isdir(dst) and not overwrite: + msg = "Directory {} exists. Pass overwrite=True to overwrite " \ + + "files in that directory, if any." + msg = msg.format(dst) + raise ValueError(msg) + if not os.path.isdir(dst): + try: + os.makedirs(dst) + except FileExistsError: + pass + + # ================================================================== + # Read data from netCDF into Dataset objects + # ================================================================== + + print('Creating mass accumulation tables from four restart files:') + print(' Ref start: {}'.format(ref_start)) + print(' Ref end: {}'.format(ref_end)) + print(' Dev start: {}'.format(dev_start)) + print(' Dev end: {}'.format(dev_end)) + + # Read data + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=xr.SerializationWarning) + refSds = xr.open_dataset(ref_start, drop_variables=skip_these_vars) + refEds = xr.open_dataset(ref_end, drop_variables=skip_these_vars) + devSds = xr.open_dataset(dev_start, drop_variables=skip_these_vars) + devEds = xr.open_dataset(dev_end, drop_variables=skip_these_vars) + + # ================================================================== + # Update GCHP restart dataset if needed + # ================================================================== + + # If the data is from a GCHP restart file, rename variables and + # flip levels to match the GEOS-Chem Classic naming and level + # conventions. Otherwise no changes will be made. + refSds = util.rename_and_flip_gchp_rst_vars(refSds) + refEds = util.rename_and_flip_gchp_rst_vars(refEds) + devSds = util.rename_and_flip_gchp_rst_vars(devSds) + devEds = util.rename_and_flip_gchp_rst_vars(devEds) + + # Add area to start restart dataset if area in end but not start + # Need to consider area variable names used in both GC-Classic and GCHP + # Should put this in a function (todo) + refSkeys = refSds.data_vars.keys() + refEkeys = refEds.data_vars.keys() + devSkeys = devSds.data_vars.keys() + devEkeys = devEds.data_vars.keys() + areaVars = ["Met_AREAM2", "AREA"] + for areaVar in areaVars: + if areaVar in refEkeys and areaVar not in refSkeys: + refSds[areaVar] = refEds[areaVar] + if areaVar in devEkeys and areaVar not in devSkeys: + devSds[areaVar] = devEds[areaVar] + + # ================================================================== + # Make sure that all necessary meteorological variables are found + # ================================================================== + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=xr.SerializationWarning) + + # Find the area variable in Ref + refs_area = util.get_area_from_dataset(refSds) + refe_area = util.get_area_from_dataset(refEds) + + # Find the area variable in Dev + devs_area = util.get_area_from_dataset(devSds) + deve_area = util.get_area_from_dataset(devEds) + + # Find required meteorological variables in Ref + # (or exit with an error if we can't find them) + metvar_list = ["Met_DELPDRY", "Met_BXHEIGHT", "Met_TropLev"] + refsmet = util.get_variables_from_dataset(refSds, metvar_list) + refemet = util.get_variables_from_dataset(refEds, metvar_list) + devsmet = util.get_variables_from_dataset(devSds, metvar_list) + devemet = util.get_variables_from_dataset(devEds, metvar_list) + + # ================================================================== + # Make sure that all necessary species are found + # ================================================================== + + # Get lists of variables names in datasets + vardict = util.compare_varnames(refSds, devSds, quiet=(not verbose)) + commonvars = vardict["commonvars3D"] + refonly = vardict['refonly'] + devonly = vardict['devonly'] + + # Narrow down the lists to only include species + commonspc = [v for v in commonvars if "SpeciesRst_" in v] + refonlyspc = [v for v in refonly if v.startswith('SpeciesRst_')] + devonlyspc = [v for v in devonly if v.startswith('SpeciesRst_')] + + # Add ref only species to dev dataset with all nan values + if refonlyspc: + for v in refonlyspc: + devSds[v] = devSds[commonspc[0]] + devSds[v].data = np.full(devSds[v].shape, np.nan) + devSds[v].attrs['units'] = refSds[v].units + devEds[v] = devEds[commonspc[0]] + devEds[v].data = np.full(devEds[v].shape, np.nan) + devEds[v].attrs['units'] = refEds[v].units + commonspc.append(v) + + # Add dev only species to ref dataset with all nan values + if devonlyspc: + for v in devonlyspc: + refSds[v] = refSds[commonspc[0]] + refSds[v].data = np.full(refSds[v].shape, np.nan) + devSds[v].attrs['units'] = refSds[v].units + refEds[v] = refEds[commonspc[0]] + refEds[v].data = np.full(refEds[v].shape, np.nan) + devEds[v].attrs['units'] = refEds[v].units + commonspc.append(v) + + # Set list of variables to print in mass table. If this list was passed + # as argument, check that all the vars are now in commonspc to ensure + # in both datasets. + if varlist: + for v in varlist: + if v not in commonspc: + raise ValueError( + f"{dst} folder error: Variable {v} in varlist passed to make_benchmark_mass_tables is not present in Ref and Dev datasets" + ) + else: + varlist = commonspc + + # Sort the list of species to be printed alphabetically + varlist.sort() + + # ================================================================== + # Create the mask arrays for the troposphere for Ref and Dev + # ================================================================== + refs_tropmask = get_troposphere_mask(refsmet) + refe_tropmask = get_troposphere_mask(refemet) + devs_tropmask = get_troposphere_mask(devsmet) + deve_tropmask = get_troposphere_mask(devemet) + + # ================================================================== + # Create a dictionary to hold all of the meterological + # variables and mask variables that we need to pass down + # ================================================================== + met_and_masks = { + "Refs_Area": refs_area, + "Refe_Area": refe_area, + "Devs_Area": devs_area, + "Deve_Area": deve_area, + "Refs_Delta_P": refsmet["Met_DELPDRY"], + "Refe_Delta_P": refemet["Met_DELPDRY"], + "Devs_Delta_P": devsmet["Met_DELPDRY"], + "Deve_Delta_P": devemet["Met_DELPDRY"], + "Refs_BxHeight": refsmet["Met_BXHEIGHT"], + "Refe_BxHeight": refemet["Met_BXHEIGHT"], + "Devs_BxHeight": devsmet["Met_BXHEIGHT"], + "Deve_BxHeight": devemet["Met_BXHEIGHT"], + "Refs_TropMask": refs_tropmask, + "Refe_TropMask": refe_tropmask, + "Devs_TropMask": devs_tropmask, + "Deve_TropMask": deve_tropmask, + } + + # ================================================================== + # Create global mass accumulation table + # ================================================================== + if subdst is not None: + mass_filename = f"GlobalMassAccumulation_TropStrat_{subdst}.txt" + else: + mass_filename = "GlobalMassAccumulation_TropStrat.txt" + mass_file = os.path.join(dst, mass_filename) + create_mass_accumulation_table( + refSds, + refEds, + refstr, + refperiodstr, + devSds, + devEds, + devstr, + devperiodstr, + varlist, + met_and_masks, + label, + outfilename=mass_file, + verbose=verbose, + spcdb_dir=spcdb_dir + ) + + ## ================================================================== + ## Create tropospheric mass table + ## ================================================================== + #if subdst is not None: + # mass_filename = f"GlobalMassAccumulation_Trop_{subdst}.txt" + #else: + # mass_filename = 'GlobalMassAccumulation_Trop.txt' + #mass_file = os.path.join(dst, mass_filename) + #create_mass_accumulation_table( + # refSds, + # refEds, + # refstr, + # devSds, + # devEds, + # devstr, + # varlist, + # met_and_masks, + # label, + # outfilename=mass_file, + # trop_only=True, + # verbose=verbose, + # spcdb_dir=spcdb_dir + #) + + # ------------------------------------------- + # Clean up + # ------------------------------------------- + del refSds + del refEds + del devSds + del devEds + gc.collect() + + def make_benchmark_oh_metrics( ref, refmet, @@ -2926,10 +3517,10 @@ def make_benchmark_oh_metrics( # Read data from netCDF into Dataset objects # ================================================================== - refds = xr.open_dataset(ref, drop_variables=gcon.skip_these_vars) - devds = xr.open_dataset(dev, drop_variables=gcon.skip_these_vars) - refmetds = xr.open_dataset(refmet, drop_variables=gcon.skip_these_vars) - devmetds = xr.open_dataset(devmet, drop_variables=gcon.skip_these_vars) + refds = xr.open_dataset(ref, drop_variables=skip_these_vars) + devds = xr.open_dataset(dev, drop_variables=skip_these_vars) + refmetds = xr.open_dataset(refmet, drop_variables=skip_these_vars) + devmetds = xr.open_dataset(devmet, drop_variables=skip_these_vars) # ================================================================== # Get tropopause mask @@ -3208,16 +3799,16 @@ def make_benchmark_wetdep_plots( reader = util.dataset_reader(time_mean, verbose=verbose) # Open datasets - refds = reader(ref, drop_variables=gcon.skip_these_vars) - devds = reader(dev, drop_variables=gcon.skip_these_vars) + refds = reader(ref, drop_variables=skip_these_vars) + devds = reader(dev, drop_variables=skip_these_vars) # Open met datasets if passed as arguments refmetds = None devmetds = None if refmet is not None: - refmetds = reader(refmet, drop_variables=gcon.skip_these_vars) + refmetds = reader(refmet, drop_variables=skip_these_vars) if devmet is not None: - devmetds = reader(devmet, drop_variables=gcon.skip_these_vars) + devmetds = reader(devmet, drop_variables=skip_these_vars) # Compute mean of data over the time dimension (if time_mean=True) if time_mean: @@ -3430,7 +4021,7 @@ def make_benchmark_aerosol_tables( # Read the species database spcdb = util.read_config_file( os.path.join( - spcdb_dir, + spcdb_dir, "species_database.yml" ), quiet=True @@ -3440,7 +4031,7 @@ def make_benchmark_aerosol_tables( mw = {} for v in species_list: mw[v] = spcdb[v]["MW_g"] - mw["Air"] = gcon.MW_AIR_g + mw["Air"] = MW_AIR_g # Get the list of relevant AOD diagnostics from a YAML file aod = util.read_config_file( @@ -3472,9 +4063,9 @@ def make_benchmark_aerosol_tables( compat='override', coords='all') ds_spc = xr.open_mfdataset( - devlist_spc, drop_variables=gcon.skip_these_vars) + devlist_spc, drop_variables=skip_these_vars) ds_met = xr.open_mfdataset( - devlist_met, drop_variables=gcon.skip_these_vars) + devlist_met, drop_variables=skip_these_vars) else: ds_aer = xr.open_mfdataset( devlist_aero, @@ -3483,10 +4074,10 @@ def make_benchmark_aerosol_tables( coords='all') # , # combine="nested", concat_dim="time") ds_spc = xr.open_mfdataset(devlist_spc, - drop_variables=gcon.skip_these_vars) # , + drop_variables=skip_these_vars) # , # combine="nested", concat_dim="time") ds_met = xr.open_mfdataset(devlist_met, - drop_variables=gcon.skip_these_vars) # , + drop_variables=skip_these_vars) # , # combine="nested", concat_dim="time") # Rename SpeciesConc_ to SpeciesConcVV_ for consistency with new @@ -3679,11 +4270,13 @@ def make_benchmark_operations_budget( operations=["Chemistry", "Convection", "EmisDryDep", "Mixing", "Transport", "WetDep"], compute_accum=True, + compute_restart=False, require_overlap=False, dst='.', species=None, overwrite=True, - verbose=False + verbose=False, + spcdb_dir=os.path.dirname(__file__) ): """ Prints the "operations budget" (i.e. change in mass after @@ -3727,6 +4320,16 @@ def make_benchmark_operations_budget( are computed. Otherwise a message will be printed warning that accumulation will not be calculated. Default value: True + compute_accum: bool + Optionally turn on/off accumulation calculation. If True, will + only compute accumulation if all six GEOS-Chem operations budgets + are computed. Otherwise a message will be printed warning that + accumulation will not be calculated. + Default value: True + compute_restart: bool + Optionally turn on/off calculation of mass change based on restart + file. Only functional for "Full" column section. + Default value: False require_overlap: bool Whether to calculate budgets for only species that are present in both Ref or Dev. @@ -3779,6 +4382,8 @@ def make_benchmark_operations_budget( all_operations = gc_operations if compute_accum and len(gc_operations) == 6: all_operations = gc_operations + ["ACCUMULATION"] + if compute_restart: + all_operations = gc_operations + ["RESTART"] n_ops = len(all_operations) # Print info @@ -3792,6 +4397,9 @@ def make_benchmark_operations_budget( else: print("***Will not compute ACCUMULATION since not all GEOS-Chem" " operation budgets will be computed.") + if compute_restart: + print("*** Will compute RESTART operation as mass change " + "based on simulation start and end restart files ***") # ------------------------------------------ # Read data @@ -3802,7 +4410,7 @@ def make_benchmark_operations_budget( # Read data from disk (either one month or 12 months) print('Opening ref and dev data') - skip_vars = gcon.skip_these_vars + skip_vars = skip_these_vars if annual: if LooseVersion(xr.__version__) < LooseVersion("0.15.0"): ref_ds = xr.open_mfdataset(reffiles, drop_variables=skip_vars) @@ -3816,6 +4424,8 @@ def make_benchmark_operations_budget( ref_ds = xr.open_dataset(reffiles, drop_variables=skip_vars) dev_ds = xr.open_dataset(devfiles, drop_variables=skip_vars) + # TODO: Add section for reading files for computing mass from restart file + # ------------------------------------------ # Species # ------------------------------------------ @@ -4116,6 +4726,111 @@ def make_benchmark_operations_budget( df.loc[dfrow, "Diff"] = diff df.loc[dfrow, "Pct_diff"] = pctdiff + # ------------------------------------------ + # Compute mass change in restarts for each column section (if applicable) + # ------------------------------------------ + if compute_restart: + print('Computing RESTART operation budgets...') + + # Load a YAML file containing species properties (such as + # molecular weights), which we will need for unit conversions. + properties = util.read_config_file( + os.path.join( + spcdb_dir, + "species_database.yml" + ), + quiet=True + ) + + # Loop over all column sections + for col_section in col_sections: + + # Loop over species + for i, spc in enumerate(spclist): + + # Keep track of progress + if (i + 1) % 50 == 0: + print(f" {col_section}: species {i + 1} of {n_spc}") + + # Get the accumulation dataframe row to fill. Skip if not found + # of if not Full column section. + dfrow = (df["Column_Section"] == "Full") \ + & (df["Species"] == spc) \ + & (df["Operation"] == "RESTART") + if not any(dfrow): + continue + + # Get ref and dev mass + + # Get species properties for unit conversion. If none, skip. + species_properties = properties.get(spc) + if species_properties is None: + continue + else: + mol_wt_g = species_properties.get("MW_g") + if mol_wt_g is None: + continue + + # Specify target units + target_units = "Gg" + + # ============================================================== + # Convert units of Ref and save to a DataArray + # (or skip if Ref contains NaNs everywhere) + # ============================================================== + refarray = refdata[v] + if not np.isnan(refdata[v].values).all(): + refarray = convert_units( + refarray, + spc, + species_properties, + target_units, + area_m2=met_and_masks["Ref_Area"], + delta_p=met_and_masks["Ref_Delta_P"], + box_height=met_and_masks["Ref_BxHeight"], + ) + + # ============================================================== + # Convert units of Dev and save to a DataArray + # (or skip if Dev contains NaNs everywhere) + # ============================================================== + devarray = devdata[v] + if not np.isnan(devdata[v].values).all(): + devarray = convert_units( + devarray, + spc_name, + species_properties, + target_units, + area_m2=met_and_masks["Dev_Area"], + delta_p=met_and_masks["Dev_Delta_P"], + box_height=met_and_masks["Dev_BxHeight"], + ) + + + # Compute ref mass as end mass minus start mass in ref + # TODO + + # Compute dev mass as end mass minus start mass in dev + # TODO - copy above once compete. The rest should just work. + + # Calculate diff and % diff + if not np.isnan(refmass) and not np.isnan(devmass): + diff = devmass - refmass + try: + pctdiff = diff / refmass * 100 + except BaseException: + pctdiff = np.nan + else: + diff = np.nan + pctdiff = np.nan + + # Fill dataframe + df.loc[dfrow, "Units_converted"] = units[spc] + df.loc[dfrow, "Ref"] = refmass + df.loc[dfrow, "Dev"] = devmass + df.loc[dfrow, "Diff"] = diff + df.loc[dfrow, "Pct_diff"] = pctdiff + # Sanity check write to csv (for testing. Keep commented out otherwise) #df.to_csv('df.csv', na_rep='NA') @@ -4127,10 +4842,7 @@ def make_benchmark_operations_budget( util.make_directory(dst, overwrite) # Print budgets to file - if label is not None: - filename = f"{dst}/Budgets_After_Operations_{label}.txt" - else: - filename = f"{dst}/Budgets_After_Operations.txt".format(dst) + filename = f"{dst}/Budgets_After_Operations.txt" with open(filename, "w+") as f: print("#" * 78, file=f) if label is not None and benchmark_type is not None: @@ -4275,7 +4987,7 @@ def make_benchmark_mass_conservation_table( # Calculate global mass for the tracer at all restart dates # ================================================================== for f in datafiles: - ds = xr.open_dataset(f, drop_variables=gcon.skip_these_vars) + ds = xr.open_dataset(f, drop_variables=skip_these_vars) # Save date in desired format #datestr = str(pd.to_datetime(ds.time.values[0])) @@ -4288,7 +5000,7 @@ def make_benchmark_mass_conservation_table( area = util.get_area_from_dataset( xr.open_dataset( areapath, - drop_variables=gcon.skip_these_vars + drop_variables=skip_these_vars ) ) @@ -4358,10 +5070,10 @@ def make_benchmark_mass_conservation_table( print(' ', file=f) print(' Summary', file=f) print(' ' + '-' * 30, file=f) - print(f" Max mass = {max_mass : 2.13f} Tg") - print(f" Min mass = {min_mass : 2.13f} Tg") - print(f" Abs diff = {absdiff : >16.3f} g") - print(f" Pct diff = {pctdiff : >16.10f} %") + print(f" Max mass = {max_mass : 2.13f} Tg", file=f) + print(f" Min mass = {min_mass : 2.13f} Tg", file=f) + print(f" Abs diff = {absdiff : >16.3f} g", file=f) + print(f" Pct diff = {pctdiff : >16.10f} %", file=f) gc.collect() @@ -4504,7 +5216,7 @@ def create_benchmark_summary_table( # Print header to file print("#" * 80, file=f) print(f"{title1 : <77}{'###'}", file=f) - print(f"{'###' : <77}{'###'}", file=f) + print(f"{'###' : <77}{'###'}", file=f) print(f"{title2 : <77}{'###'}", file=f) print(f"{title3 : <77}{'###'}", file=f) print("#" * 80, file=f) @@ -4515,7 +5227,7 @@ def create_benchmark_summary_table( # ================================================================== # Variables to skip - skip_vars = gcon.skip_these_vars + skip_vars = skip_these_vars skip_vars.append("AREA") # Pick the proper function to read the data @@ -4624,14 +5336,13 @@ def diff_list_to_text( diff_text : str String with concatenated list values. """ - if not isinstance(diff_list, list): - raise ValueError("Argument 'diff_list' must be a list!") + util.verify_variable_type(diff_list, list) # Use "Dev" and "Ref" for inserting into a header if fancy_format: refstr = "Ref" devstr = "Dev" - + # Strip out duplicates from diff_list # Prepare a message about species differences (or alternate msg) diff_list = util.unique_values(diff_list, drop=[None]) @@ -4643,15 +5354,16 @@ def diff_list_to_text( else: diff_text = f"{devstr} and {refstr} are identical" - # If we are placing the text in a header, - # then trim the length of diff_text to fit. + # If we are placing the text in a header, trim the length of diff_text + # to fit. NOTE: TABLE_WIDTH-7 leaves room for the '### ' at the start + # of the string and the '###' at the end of the string, if fancy_format: + diff_text = f"### {diff_text : <{TABLE_WIDTH-7}}{'###'}" diff_text = util.wrap_text( diff_text, - width=83 + width=TABLE_WIDTH ) - diff_text = f"### {diff_text : <82}{'###'}" - + return diff_text.strip() @@ -4675,12 +5387,8 @@ def diff_of_diffs_toprow_title(config, model): title: str The plot title string for the diff-of-diff """ - if not isinstance(config, dict): - msg = "The 'config' argument must be of type 'dict`!" - raise ValueError(msg) - if not isinstance(model, str): - msg = "The 'model' argument must be of type 'str'!" - raise ValueError(msg) + util.verify_variable_type(config, dict) + util.verify_variable_type(model, str) if not "gcc" in model and not "gchp" in model: msg = "The 'model' argument must be either 'gcc' or 'gchp'!" raise ValueError(msg) diff --git a/gcpy/budget_tt.py b/gcpy/budget_tt.py index 14a317f4..49d8d7e8 100644 --- a/gcpy/budget_tt.py +++ b/gcpy/budget_tt.py @@ -462,7 +462,10 @@ def annual_average(globvars, ds, collection, conv_factor): for spc in globvars.species_list: # Whole-atmosphere quanity [g] or [g d-1] - varname = collection.strip() + "_" + spc + if "SpeciesConc" in collection: + varname = collection.strip() + "VV_" + spc + else: + varname = collection.strip() + "_" + spc q[spc + "_f"] = ds[varname].values * conv_factor[spc] # Shape of the data @@ -626,7 +629,7 @@ def trop_residence_time(globvars): result[spc + "_t"] = 0.0 # Concentration [g] - var = "SpeciesConc_" + spc + var = "SpeciesConcVV_" + spc q_cnc = globvars.ds_cnc[var].values * globvars.vv_to_g[spc] q_cnc = np.ma.masked_array(q_cnc, globvars.tropmask) diff --git a/gcpy/constants.py b/gcpy/constants.py index f2df04d0..8afc484b 100644 --- a/gcpy/constants.py +++ b/gcpy/constants.py @@ -48,3 +48,16 @@ "contacts", "cubed_sphere" ] + + +# ====================================================================== +# Table and column widths for emissions & mass tables +# ====================================================================== +TABLE_WIDTH = 105 +COL_WIDTH = 20 + +# ====================================================================== +# Default encoding for file I/O +# ====================================================================== +ENCODING = "UTF-8" + diff --git a/gcpy/cstools.py b/gcpy/cstools.py new file mode 100644 index 00000000..a1d5dcac --- /dev/null +++ b/gcpy/cstools.py @@ -0,0 +1,811 @@ +""" +Contains tools for working with cubed-sphere data. + +Originally developed by Liam Bindle and Sebastian Eastham. +Included into GCPy by Lizzie Lundgren and Bob Yantosca. + +Style updates suggested by the "Pylint" python linter have been adopted. + +Example: + import gcpy + c24_grid=gcpy.gen_grid(24) # Generate C24 grid + lat=40.0 # Specify target latitude + lon=150.0 # Specify target longitude + idx=gcpy.find_index(lat,lon,c24_grid) # Returns numpy ndarray + # [[3],[7],[1]] which can be + # used to index data + # (nf=3, Ydim=7, Xdim=1) + + # Check and use to get data + datafile='/n/home/GeosChem.SpeciesConc.20190701_0000z.nc4' + import xarray ax xr + ds=xr.open_dataset(datafile) + nf=idx[0,0] + Ydim=idx[1,0] + Xdim=idx[2,0] + ds['lats'].isel(nf=nf,Ydim=Ydim,Xdim=Xdim].item() + # prints 38.711082458496094 + ds['lons'].isel(nf=nf,Ydim=Ydim,Xdim=Xdim].item() + # prints 151.61871337890625 + ds['SpeciesConcVV_O3'].isel(time=0,lev=0,nf=nf,Ydim=Ydim,Xdim=Xdim).item() + # prints 2.7790051149167994e-08 +""" +import numpy as np +import xarray as xr +import gcpy +try: + import pyproj + import shapely.ops + import shapely.geometry +except ImportError as exc: + raise ImportError( + "gcpy.cstools needs packages 'pyproj' and 'shapely'!" + ) from exc + +# Constants +RAD_TO_DEG = 180.0 / np.pi +DEG_TO_RAD = np.pi / 180.0 + + +def extract_grid( + data +): + """ + Extracts the grid information from an xarray.Dataset object and + returns the grid information as a cubed-sphere xarray.Dataset. + + Args: + ----- + data : xarray.Dataset or xarray.DataArray + The input dataset + + data_cs: xarray.Dataset or None + Same data as in argument "ds", but on a cubed-sphere grid + If the data is not placed on a cubed-sphere grid, then + this will be returned with the value None. + """ + gcpy.util.verify_variable_type(data, (xr.DataArray, xr.Dataset)) + + if not is_cubed_sphere(data): + return None + + cs_res = get_cubed_sphere_res(data) + return gen_grid(cs_res) + + +def read_gridspec(gs_obj): + """ + Reads a GridSpec object and returns an xarray.Dataset object. + + Args: + ----- + gs_obj : GridSpec + The GridSpec object as input + + Returns: + -------- + ds : xarray.Dataset + The same data as an xarray.Dataset object. + """ + n_cs = gs_obj._tiles[0].area.shape[0] + lon = np.zeros((6, n_cs, n_cs)) + lon_b = np.zeros((6, n_cs+1, n_cs+1)) + lat = np.zeros((6, n_cs, n_cs)) + lat_b = np.zeros((6, n_cs+1, n_cs+1)) + area = np.zeros((6, n_cs, n_cs)) + for i in range(6): + tile = gs_obj._tiles[i] + # lon_b is identical to original definition + lon_b[i,...] = tile.supergrid_lons[::2,::2] + # lat_b is dentical to original definition + lat_b[i,...] = tile.supergrid_lats[::2,::2] + # lon is NOT identical to original definition + lon[i,...] = tile.supergrid_lons[1::2,1::2] + # lat is NOT identical to original definition + lat[i,...] = tile.supergrid_lats[1::2,1::2] + area[i,...] = tile.area[...] + + data = xr.Dataset( + data_vars={ + "area": (['nf','Ydim','Xdim'], area), + "lon": (['nf','Ydim','Xdim'], lon), + "lat": (['nf','Ydim','Xdim'], lat), + "lon_b": (['nf','Ydim_b','Xdim_b'], lon_b), + "lat_b": (['nf','Ydim_b','Xdim_b'], lat_b), + }, + coords={ + "nf": (['nf'], list(range(6))), + "Ydim": (['Ydim'], list(range(n_cs))), + "Xdim": (['Xdim'], list(range(n_cs))), + "Ydim_b": (['Ydim_b'], list(range(n_cs+1))), + "Xdim_b": (['Xdim_b'], list(range(n_cs+1))), + }, + attrs={ + "description": f"c{n_cs:d} grid data" + }, + ) + return data + + +def face_area( + lon_b, + lat_b, + r_sphere=6.375e6 +): + """ + Calculates area of cubed-sphere grid cells on one face. + Inputs must be in degrees. Edge arrays must be shaped [N+1 x N+1]. + + Args: + ----- + lon_b, lat_b : list of float + Longitude and latitude bounds (degrees) + + Keyword Args (optional): + ------------------------ + r_sphere : float + Radius of Earth (meters). Default value: 6.375e6 + + Returns: + -------- + cs_area : numpy.ndarray + Array of surface area (m2) in each grid box of a + cubed-sphere grid face. + """ + + # Convert inputs to radians + lon_b_rad = lon_b * DEG_TO_RAD + lat_b_rad = lat_b * DEG_TO_RAD + + r_sq = r_sphere * r_sphere + n_cs = lon_b.shape[1] - 1 + + # Allocate output array + cs_area = np.zeros((n_cs,n_cs)) + + # Ordering + valid_combo = np.array([[1,2,4],[2,3,1],[3,2,4],[4,1,3]]) - 1 + + for i_lon in range(n_cs): + for i_lat in range(n_cs): + lon_corner = np.zeros(4) + lat_corner = np.zeros(4) + xyz_corner = np.zeros((4,3)) + for i_vert in range(4): + x_lon = i_lon + (i_vert > 1) + x_lat = i_lat + (i_vert == 0 or i_vert == 3) + lon_corner[i_vert] = lon_b_rad[x_lon,x_lat] + lat_corner[i_vert] = lat_b_rad[x_lon,x_lat] + for i_vert in range(4): + xyz_corner[i_vert,:] = ll2xyz( + lon_corner[i_vert], + lat_corner[i_vert] + ) + tot_ang = 0.0 + for i_corner in range(4): + curr_combo = valid_combo[i_corner,:] + xyz_mini = np.zeros((3,3)) + for i_mini in range(3): + xyz_mini[i_mini,:] = xyz_corner[curr_combo[i_mini],:] + curr_ang = sphere_angle( + xyz_mini[0,:], + xyz_mini[1,:], + xyz_mini[2,:] + ) + tot_ang += curr_ang + cs_area[i_lon,i_lat] = r_sq * (tot_ang - (2.0*np.pi)) + + return cs_area + + +def ll2xyz( + lon_pt, + lat_pt +): + """ + Converts a lon/lat pair (in radians) to Cartesian co-ordinates. + This is vectorizable. + + Args: + ----- + lon_pt, lat_pt : float + Longitude & latitude in radians. + + Returns: + -------- + [x_pt, y_pt, z_pt] : list of numpy.float64 + Cartesian vector coordinates (X,Y.Z) normalized to + the unit sphere. + """ + x_pt = np.cos(lat_pt) * np.cos(lon_pt) + y_pt = np.cos(lat_pt) * np.sin(lon_pt) + z_pt = np.sin(lat_pt) + + return [x_pt, y_pt, z_pt] + + +def sphere_angle( + e_1, + e_2, + e_3 +): + """ + Computes the angle between 3 points on a sphere. + + Args: + ----- + e_1 : list of float + (x, y) coordinates at mid point + + e_2, e_3: : list of float + (x, y) coordinates at points on either side of midpoint + + Returns: + -------- + angle : float + The spherical angle at point e_1. + """ + p_vec = np.ones(3) + q_vec = np.ones(3) + p_vec[0] = e_1[1]*e_2[2] - e_1[2]*e_2[1] + p_vec[1] = e_1[2]*e_2[0] - e_1[0]*e_2[2] + p_vec[2] = e_1[0]*e_2[1] - e_1[1]*e_2[0] + + q_vec[0] = e_1[1]*e_3[2] - e_1[2]*e_3[1] + q_vec[1] = e_1[2]*e_3[0] - e_1[0]*e_3[2] + q_vec[2] = e_1[0]*e_3[1] - e_1[1]*e_3[0] + + ddd = np.sum(p_vec * p_vec) * np.sum(q_vec * q_vec) + if ddd <= 0.0: + angle = 0.0 + else: + ddd = np.sum(p_vec * q_vec) / np.sqrt(ddd) + if np.abs(ddd) > 1.0: + angle = np.pi / 2.0 + else: + angle = np.arccos(ddd) + + return angle + + +def grid_area( + cs_grid=None, + cs_res=None +): + """ + Return area (m2) for each cell in a cubed-sphere grid + + Args: + ----- + cs_grid : dict + Cubed-sphere grid definition as a dict of: + {'lat' : lat midpoints, + 'lon' : lon midpoints, + 'lat_b' : lat edges, + 'lon_b' : lon edges} + where each value has an extra face dimension of length 6. + + Returns: + -------- + grid_area : numpy.ndarray + Surface area (m2) for each cell in the cubed-sphere grid. + NOTE: Uses GMAO convention, array shape = (6, n, n), + where n is the number of cells along a face edge. + """ + # Calculate area on a cubed sphere + if cs_res is None: + cs_res = cs_grid['lon_b'].shape[-1] - 1 + elif cs_grid is None: + cs_grid = gcpy.csgrid_GMAO(cs_res) + elif cs_grid is not None and cs_res is not None: + assert cs_res == cs_grid['lon_b'].shape[-1], \ + 'Routine grid_area received inconsistent inputs' + cs_area = np.zeros((6,cs_res,cs_res)) + cs_area[0,:,:] = face_area( + cs_grid['lon_b'][0,:,:], + cs_grid['lat_b'][0,:,:] + ) + for i_face in range(1,6): + cs_area[i_face,:,:] = cs_area[0,:,:].copy() + + return cs_area + + +def gen_grid( + n_cs, + stretch_factor=None, + target_lon=None, + target_lat=None +): + """ + Returns an xarray.Dataset object specifying a cubed-sphere + stretched grid. + + Args: + ----- + n_cs : int + Number of grid boxes along a single face of the cubed-sphere. + + stretch_factor : int + Specifies the stretching factor. Default value: None + + target_lon, target_lat : float + Specifies the longitude and latitude at the center of the + cubed-sphere grid face that will be stretched. + Default values: None, None + + Returns: + -------- + grid : xarray.Dataset + Cubed-sphere grid definition containing the variables: + {'lat' : lat midpoints, + 'lon' : lon midpoints, + 'lat_b' : lat edges, + 'lon_b' : lon edges} + where each value has an extra face dimension of length 6. + """ + if stretch_factor is not None: + cs_temp, _ = gcpy.make_grid_SG( + n_cs, + stretch_factor, + target_lon, + target_lat + ) + else: + cs_temp = gcpy.csgrid_GMAO(n_cs) + + return xr.Dataset( + {'nf': (['nf'], np.array(range(6))), + 'Ydim': (['Ydim'], np.array(range(n_cs))), + 'Xdim': (['Xdim'], np.array(range(n_cs))), + 'Ydim_b': (['Ydim_b'], np.array(range(n_cs+1))), + 'Xdim_b': (['Xdim_b'], np.array(range(n_cs+1))), + 'lat': (['nf','Ydim','Xdim'], cs_temp['lat']), + 'lon': (['nf','Ydim','Xdim'], cs_temp['lon']), + 'lat_b': (['nf','Ydim_b','Xdim_b'], cs_temp['lat_b']), + 'lon_b': (['nf','Ydim_b','Xdim_b'], cs_temp['lon_b']), + 'area': (['nf','Ydim','Xdim'], grid_area(cs_temp)) + } + ) + + +def corners_to_xy( + x_c, + y_c +): + """ + Creates xy coordinates for each grid-box + Developed, tested, and supplied by Liam Bindle. + + Args: + ----- + x_c : numpy.ndarray + Grid-box corner longitudes; array shape = (n+1, n+1), + where n is the cubed-sphere grid size. + + y_c : numpy.ndarray + Grid-box corner longitudes; array shape = (n+1, n+1), + where n is the cubed-sphere grid size. + + Returns: + -------- + x_y : numpy.ndarray + Grid-box cartesian coordinates; array shape = (n, n, 5), + where n is the cubed-sphere grid size. + """ + p_0 = slice(0, -1) + p_1 = slice(1, None) + boxes_x = np.moveaxis( + np.array( + [ + x_c[p_0, p_0], + x_c[p_1, p_0], + x_c[p_1, p_1], + x_c[p_0, p_1], + x_c[p_0, p_0] + ] + ), + 0, -1 + ) + boxes_y = np.moveaxis( + np.array( + [ + y_c[p_0, p_0], + y_c[p_1, p_0], + y_c[p_1, p_1], + y_c[p_0, p_1], + y_c[p_0, p_0] + ] + ), + 0, -1 + ) + return np.moveaxis( + np.array( + [boxes_x, boxes_y] + ), 0, -1 + ) + + +def central_angle( + x_0, + y_0, + x_1, + y_1): + """ + Returns the distance (central angle) between cartesian + coordinates (x_0, y_0) and (x_1, y_1). This is vectorizable. + Developed, tested, and supplied by Liam Bindle. + + Args: + ----- + x_0, y_0 : float + Longitude and latitude (degrees) of coordinates (x_0, y_0). + + x_1, y_1: float + Longitude and latitude (degrees) of coordinates (x_1, y_1). + + Returns: + -------- + distance : float + Distance (degrees) between (x_0, y_0) and (x_1, y_1). + """ + x_0 = x_0 * DEG_TO_RAD + x_1 = x_1 * DEG_TO_RAD + y_0 = y_0 * DEG_TO_RAD + y_1 = y_1 * DEG_TO_RAD + + return np.arccos( + np.sin(y_0) * np.sin(y_1) + \ + np.cos(y_0) * np.cos(y_1) * \ + np.cos(np.abs(x_0 - x_1)) + ) * RAD_TO_DEG + + +def find_index_single( + lat, + lon, + x_centers_flat, + y_centers_flat, + xy_polygon_defs, + cs_size, + latlon_crs, + jitter_size=0.0 +): + """ + Returns the cubed-sphere grid box corresponding to a given + latitude and longitude. Called by routine find_index. + + Args: + ----- + lat, lon : float or list(float) + Latitude and longitude (degrees) of the point for which + cubed-sphere grid indices are desired. + + x_centers_flat, y_centers_flat : float or list(float) + Flattened (i.e. in Fortran column-major notation) arrays + of cubed-sphere xDim and yDim values (degrees). + + xy_polygon_defs : float or list(float) + XY polygon definitions for cubed-sphere grid boxes + (i.e. the output of function corners_to_xy). + + cs_size : int or list(int) + Cubed-sphere grid size (i.e. the number of points along + a face edge). + + latlon_crs : ? + Ouptut of pyproj.Proj("+proj=latlon") + + jitter_size : float + If the point cannot be matched to a cubed-sphere grid box, + then shift longitude by the distance [m] specified in + jitter_size before doing the lookup once more. A nonzero + jitter_size value may be needed when the latitude is close + to +90 or -90. + + Returns: + -------- + nf_cs, xdim_cs, ydim_cs : int, list(float), list(float) + nf_cs is the number of cube-sphere face + xdim_cs (aka XDim) and ydim_cs (aka YDim) are the longitude + and latitude arrays for each cell of the cubed-sphere grid. + """ + # Center on x_find, y_find + x_find = lon + y_find = lat + gnomonic_crs = pyproj.Proj(f'+proj=gnom +lat_0={y_find} +lon_0={x_find}') + + # Generate all distances + distances = central_angle( + x_find, + y_find, + x_centers_flat, + y_centers_flat + ) + four_nearest_indexes = np.argpartition(distances, 4)[:4] + + # Unravel 4 smallest indexes + four_nearest_indexes = np.unravel_index( + four_nearest_indexes, + (6, cs_size, cs_size) + ) + four_nearest_xy = xy_polygon_defs[four_nearest_indexes] + four_nearest_polygons = [ + shapely.geometry.Polygon(polygon_xy) for polygon_xy in four_nearest_xy + ] + + # Transform to gnomonic projection + gno_transform = pyproj.Transformer.from_proj( + latlon_crs, + gnomonic_crs, + always_xy=True + ).transform + four_nearest_polygons_gno = [ + shapely.ops.transform(gno_transform, polygon) \ + for polygon in four_nearest_polygons + ] + + # Figure out which polygon contains the point + xy_find = shapely.geometry.Point(x_find, y_find) + xy_find_gno = shapely.ops.transform(gno_transform, xy_find) + polygon_contains_point = [ + polygon.contains(xy_find_gno) for polygon in four_nearest_polygons_gno + ] + + # If the point cannot be matched (such as can happen near the poles), + # move the longitude by the jitter_size (in meters) and try again. + if np.count_nonzero(polygon_contains_point) == 0: + if jitter_size > 0.0: + nf_cs, ydim_cs, xdim_cs = find_index_single( + y_find, + x_find+jitter_size, + x_centers_flat, + y_centers_flat, + xy_polygon_defs, + cs_size, + latlon_crs, + jitter_size=0.0 + ) + else: + msg = f'Point at {x_find:8.2f} E, {y_find:8.2f} N ' + msg+= 'could not be matched' + raise ValueError(msg) + + # The first will be selected, if more than one + polygon_with_point = np.argmax(polygon_contains_point) + + # Get original index + nf_cs = four_nearest_indexes[0][polygon_with_point] + ydim_cs = four_nearest_indexes[1][polygon_with_point] + xdim_cs = four_nearest_indexes[2][polygon_with_point] + + return nf_cs, ydim_cs, xdim_cs + + +def find_index( + lat, + lon, + grid, + jitter_size=0.0 +): + """ + Returns the cubed-sphere grid box indices corresponding to + given latitude and longitude coordinates. + + Based on a routine developed, tested, and supplied by Liam Bindle. + + Args: + ----- + lat, lon : float + Latitude and longitude (degrees) of the point for which + cubed-sphere indices are desired. + + grid : xarray.Dataset + Cubed-sphere grid definition with the following variables: + {'lat' : lat midpoints, + 'lon' : lon midpoints, + 'lat_b' : lat edges, + 'lon_b' : lon edges} + where each value has an extra face dimension of length 6. + + Keyword Args (optional): + ------------------------ + jitter_size : float + If the point cannot be matched to a cubed-sphere grid box, + then shift longitude by the distance [m] specified in + jitter_size before doing the lookup once more. A nonzero + jitter_size value may be needed when the latitude is close + to +90 or -90. Default value: 0 + + Returns: + -------- + ind : numpy.ndarray + Array containing (nf, YDim, XDim), where: + nf is the number of cubed-sphere faces (= 6) + YDim is the cubed-sphere longitude index at (lat, lon) + XDim is the cubed-sphere latitude index at (lat, lon) + """ + gcpy.util.verify_variable_type(grid, xr.Dataset) + + lon_vec = np.asarray(lon) + lat_vec = np.asarray(lat) + n_find = lon_vec.size + + # Get the corners + x_corners = grid['lon_b'].values + y_corners = grid['lat_b'].values + x_centers = grid['lon'].values + y_centers = grid['lat'].values + x_centers_flat = x_centers.flatten() + y_centers_flat = y_centers.flatten() + + cs_size = x_centers.shape[-1] + + # Generate everything that will be reused + # Get XY polygon definitions for grid boxes + # 5 (x,y) points defining polygon corners (first and last are same) + xy_polygon_defs = np.zeros((6, cs_size, cs_size, 5, 2)) + for nf_cs in range(6): + xy_polygon_defs[nf_cs, ...] = corners_to_xy( + x_c=x_corners[nf_cs, :, :], + y_c=y_corners[nf_cs, :, :] + ) + latlon_crs = pyproj.Proj("+proj=latlon") + + # Find 4 shortest distances to (x_find, y_find) + idx = np.full((3,n_find), 0) + for x_find, y_find, i_find in \ + zip(np.nditer(lon_vec), np.nditer(lat_vec), list(range(n_find))): + + nf_cs, ydim_cs, xdim_cs = find_index_single( + y_find, + x_find, + x_centers_flat, + y_centers_flat, + xy_polygon_defs, + cs_size, + latlon_crs, + jitter_size=jitter_size + ) + idx[:,i_find] = [nf_cs, ydim_cs, xdim_cs] + + return idx + + +def is_cubed_sphere( + data +): + """ + Given an xarray Dataset or DataArray object, determines if the + data is placed on a cubed-sphere grid. + + Args: + ----- + data : xarray.Dataset or xarray.DataArray + The input data to be tested + + Returns: + -------- + is_gchp : bool + Returns True if data is placed on a cubed-sphere grid, + and False otherwise. + + Remarks: + -------- + A cubed-sphere data file has one of the following attributes + (1) A dimension named "nf" (GCHP/GEOS diagnostic files) + (2) The lat/lon ratio is exactly 6 (GCHP/GEOS checkpoints) + """ + gcpy.util.verify_variable_type(data, (xr.DataArray, xr.Dataset)) + + if is_cubed_sphere_diag_grid(data): + return True + if is_cubed_sphere_rst_grid(data): + return True + return False + + +def is_cubed_sphere_diag_grid(data): + """ + Determines if a cubed-sphere grid has History file dimensions. + (i.e. a dimension named "nf", aka number of grid faces). + + Args: + ----- + data : xarray.DataArray or xarray.Dataset + The input data. + + Returns: + -------- + True if the grid has History diagnostic dimensions, + False otherwise. + """ + if "nf" in data.dims: + return True + return False + + +def is_cubed_sphere_rst_grid(data): + """ + Determines if a cubed-sphere grid has restart file dimensions. + (i.e. lat and lon, with lat = lon*6). + + Args: + ----- + data : xarray.DataArray or xarray.Dataset + The input data. + + Returns: + -------- + True if the grid has restart dimensions, False otherwise. + """ + gcpy.util.verify_variable_type(data, (xr.DataArray, xr.Dataset)) + + # TODO: Rethink this if we ever end up changing the GC-Classic + # restart variables to start with SPC, or if we ever rename the + # internal state variables in GCHP. A more robust back-up check + # could be to see if all the lats and lons are integer, since + # that will be the case with the GCHP restart file format. + if "lat" in data.dims: + if data.dims["lat"] == data.dims["lon"] * 6: + return True + if "SPC_" in data.data_vars.keys(): + return True + return False + + +def get_cubed_sphere_res(data): + """ + Given a Dataset or DataArray object, returns the number of + grid cells along one side of the cubed-sphere grid face + (e.g. 24 for grid resolution C24, which has 24x25 grid cells + per face). + + Args: + ----- + data : xarray.DataArray or xarray.Dataset + The input data. + + Returns: + -------- + cs_res : int + The cubed-sphere resolution. Will return 0 if the data + is not placed on a cubed-sphere grid. + """ + gcpy.util.verify_variable_type(data, (xr.DataArray, xr.Dataset)) + + if not is_cubed_sphere(data): + return 0 + if is_cubed_sphere_rst_grid(data): + return data.dims["lon"] + return data.dims["Xdim"] + + +def is_gchp_lev_positive_down(data): + """ + Determines if GCHP data is arranged vertically from the top of the + atmosphere downwards or from the surface upwards, according to: + + (1) Checkpoint files: lev:positive="down + (2) Emissions collection: lev:positive="down" + (3) Other collections lev:positive="up" + + Args: + ----- + data : xarray.DataArray or xarray.Dataset + The input data + + Returns: + -------- + True if the data is arranged from top-of-atm downwards. + False if the data is arranged from the surface upwards. + """ + gcpy.util.verify_variable_type(data, (xr.DataArray, xr.Dataset)) + + if is_cubed_sphere_rst_grid(data): + return True + if is_cubed_sphere_diag_grid(data): + emis_vars = [var for var in data.data_vars if var.startswith("Emis")] + if len(emis_vars) > 0: + return True + return False diff --git a/gcpy/emission_inventories.yml b/gcpy/emission_inventories.yml index 075d6488..46eb9e67 100644 --- a/gcpy/emission_inventories.yml +++ b/gcpy/emission_inventories.yml @@ -29,3 +29,5 @@ CH4Benchmark: GEPA: Tg Scarpelli_Canada: Tg Scarpelli_Mexico: Tg +TransportTracersBenchmark: + CEDS: Tg diff --git a/gcpy/emission_species.yml b/gcpy/emission_species.yml index 64121960..8f425679 100644 --- a/gcpy/emission_species.yml +++ b/gcpy/emission_species.yml @@ -5,6 +5,10 @@ FullChemBenchmark: BCPI: Tg BCPO: Tg BENZ: Tg + BrSALA: Tg + BrSALC: Tg + C2H2: Tg + C2H4: Tg C2H6: Tg C3H8: Tg CH2Br2: Tg @@ -18,6 +22,7 @@ FullChemBenchmark: DST4: Tg EOH: Tg ETNO3: Tg + FURA: Tg GLYC: Tg GLYX: Tg HAC: Tg @@ -50,3 +55,22 @@ FullChemBenchmark: pFe: Tg CH4Benchmark: CH4: Tg +TransportTracersBenchmark: +# Rn222: kg +# Pb210: kg +# Pb210s: kg + Be7: kg + Be7s: kg + Be10: kg + Be10s: kg + CH3I: Tg +# CO_25: Tg +# CO_50: Tg +# e90: Tg +# e90_n: Tg +# e90_s: Tg +# nh_5: Tg +# nh_50: Tg + SF6: Tg +# st80_25: Tg +# stOX: Tg diff --git a/examples/README.txt b/gcpy/examples/README.txt similarity index 100% rename from examples/README.txt rename to gcpy/examples/README.txt diff --git a/gcpy/examples/__init__.py b/gcpy/examples/__init__.py new file mode 100644 index 00000000..1d23ef3d --- /dev/null +++ b/gcpy/examples/__init__.py @@ -0,0 +1,11 @@ +""" +GCPy import script +""" +# NOTE: Will fail due to xbpch not being in the library +#from .bpch_to_nc import * +from .diagnostics import * +from .dry_run import * +from .plotting import * +from .timeseries import * +from .working_with_files import * +from .xarray_examples import * diff --git a/gcpy/examples/bpch_to_nc/__init__.py b/gcpy/examples/bpch_to_nc/__init__.py new file mode 100644 index 00000000..430c9569 --- /dev/null +++ b/gcpy/examples/bpch_to_nc/__init__.py @@ -0,0 +1,7 @@ +""" +GCPy import script +""" + +from .bpch2nc import * +from .bpch_tagco_prodloss_to_nc import * + diff --git a/examples/bpch_to_nc/bpch2nc.py b/gcpy/examples/bpch_to_nc/bpch2nc.py similarity index 100% rename from examples/bpch_to_nc/bpch2nc.py rename to gcpy/examples/bpch_to_nc/bpch2nc.py diff --git a/examples/bpch_to_nc/bpch_tagco_prodloss_to_nc.py b/gcpy/examples/bpch_to_nc/bpch_tagco_prodloss_to_nc.py similarity index 100% rename from examples/bpch_to_nc/bpch_tagco_prodloss_to_nc.py rename to gcpy/examples/bpch_to_nc/bpch_tagco_prodloss_to_nc.py diff --git a/examples/diagnostics/README.md b/gcpy/examples/diagnostics/README.md similarity index 100% rename from examples/diagnostics/README.md rename to gcpy/examples/diagnostics/README.md diff --git a/gcpy/examples/diagnostics/__init__.py b/gcpy/examples/diagnostics/__init__.py new file mode 100644 index 00000000..5746e32f --- /dev/null +++ b/gcpy/examples/diagnostics/__init__.py @@ -0,0 +1,6 @@ +""" +GCPy import script +""" + +from .compare_diags import * + diff --git a/examples/diagnostics/compare_diagnostics.ipynb b/gcpy/examples/diagnostics/compare_diagnostics.ipynb similarity index 100% rename from examples/diagnostics/compare_diagnostics.ipynb rename to gcpy/examples/diagnostics/compare_diagnostics.ipynb diff --git a/examples/diagnostics/compare_diags.py b/gcpy/examples/diagnostics/compare_diags.py similarity index 77% rename from examples/diagnostics/compare_diags.py rename to gcpy/examples/diagnostics/compare_diags.py index dc64113c..c80d2660 100755 --- a/examples/diagnostics/compare_diags.py +++ b/gcpy/examples/diagnostics/compare_diags.py @@ -2,19 +2,19 @@ """ Example script that can compare diagnostics from two different netCDF collections. Similar to compute_diagnostics.ipynb, but can be used -without having to open a Jupyter notebook. +without having to open a Jupyter notebook. The parameters for the +configuration are specified in a YAML file whose name is passed +as an argument. """ - -# Imports import os import sys import warnings import numpy as np -import xarray as xr -from yaml import load as yaml_load_file -import gcpy.benchmark as bmk -import gcpy.constants as constants -import gcpy.util as util +from gcpy.util import add_missing_variables, compare_varnames, \ + dataset_reader, read_config_file +from gcpy.constants import skip_these_vars +from gcpy.plot.compare_single_level import compare_single_level +from gcpy.plot.compare_zonal_mean import compare_zonal_mean # Tell matplotlib not to look for an X-window os.environ["QT_QPA_PLATFORM"] = "offscreen" @@ -63,11 +63,8 @@ def read_data(config): Contains Ref and Dev data as xarray Dataset fields. """ - # If we are using the gcpy_test data, use the gcpy_test_dir - if config["options"]["gcpy_test"]: - rootdir = config["paths"]["test_data_dir"] - else: - rootdir = config["paths"]["main_dir"] + # Root data path + rootdir = config["paths"]["main_dir"] # Define paths to Ref & Dev files ref_file = os.path.join( @@ -83,25 +80,37 @@ def read_data(config): config["data"]["dev"]["file"] ) + # Function to read the data + reader = dataset_reader( + multi_files=False, + verbose=False + ) + # Read Ref data try: - refdata = xr.open_dataset( + refdata = reader( ref_file, - drop_variables=constants.skip_these_vars - ) - except Exception: + drop_variables=skip_these_vars + ).load() + except FileNotFoundError as exc: msg = "Error reading " + ref_file - raise Exception(msg) + raise FileNotFoundError(msg) from exc # Read Dev data try: - devdata = xr.open_dataset( + devdata = reader( dev_file, - drop_variables=constants.skip_these_vars - ) - except Exception: + drop_variables=skip_these_vars + ).load() + except FileNotFoundError as exc: msg = "Error reading " + dev_file - raise Exception(msg) + raise FileNotFoundError(msg) from exc + + # If the data is from a GCHP restart file, rename variables and + # flip levels to match the GEOS-Chem Classic naming and level + # conventions. Otherwise no changes will be made. + refdata = util.rename_and_flip_gchp_rst_vars(refdata) + devdata = util.rename_and_flip_gchp_rst_vars(devdata) # Define dictionary for return data = { @@ -138,7 +147,7 @@ def print_totals_and_diffs(config, refdata, devdata, varlist): if any(x in config["options"]["totals_and_diffs"]["diff_type"] \ for x in ["percent", "pctdiff", "%"]): do_percent_diff = True - + # Determine if we will print to a file do_file = len(filename) > 0 if not do_file and not do_screen: @@ -151,7 +160,7 @@ def print_totals_and_diffs(config, refdata, devdata, varlist): config["paths"]["plots_dir"], filename ) - f = open(pathname, 'w') + ofile = open(pathname, 'w', encoding="UTF-8") # Percent threshold for reporting differences @@ -165,7 +174,7 @@ def print_totals_and_diffs(config, refdata, devdata, varlist): diff_label = f"|percent difference| > {threshold} %" line = f"... Only showing variables with {diff_label}" if do_file: - print(line, file=f) + print(line, file=ofile) else: print(line) @@ -180,20 +189,20 @@ def print_totals_and_diffs(config, refdata, devdata, varlist): diff_label ) if do_file: - print(line, file=f) + print(line, file=ofile) if do_screen: print(line) # Always print nonzero differences, but only print zero differences # if the configuration option "skip_zero_diffs" is False. - for v in varlist: + for var in varlist: # Absolute difference - refsum = np.sum(refdata[v].values) - devsum = np.sum(devdata[v].values) + refsum = np.sum(refdata[var].values) + devsum = np.sum(devdata[var].values) # Absolute difference - absdiff = np.sum(devdata[v].values - refdata[v].values) + absdiff = np.sum(devdata[var].values - refdata[var].values) # Compute percent difference if needed # otherwise we'll use the absolute difference for plotting @@ -205,7 +214,7 @@ def print_totals_and_diffs(config, refdata, devdata, varlist): # Line to be printed line = "{} : {} | {} | {} ".format( - v.ljust(20), + var.ljust(20), str(refsum).ljust(22), str(devsum).ljust(22), diff @@ -214,7 +223,7 @@ def print_totals_and_diffs(config, refdata, devdata, varlist): # Skip small values if np.abs(diff) > threshold: if do_file: - print(line, file=f) + print(line, file=ofile) if do_screen: print(line) @@ -222,13 +231,13 @@ def print_totals_and_diffs(config, refdata, devdata, varlist): else: if not config["options"]["totals_and_diffs"]["skip_small_diffs"]: if do_file: - print(line, file=f) + print(line, file=ofile) if do_screen: print(line) # Close file if do_file: - f.close() + ofile.close() def compare_data(config, data): @@ -248,12 +257,12 @@ def compare_data(config, data): # array of NaN values to refdata. Ditto for devdata. This will # allow us to show that the variable is missing in either # refdata or devdata. - [refdata, devdata] = util.add_missing_variables(refdata, devdata) + [refdata, devdata] = add_missing_variables(refdata, devdata) # Get the list of common variable names verbose = config["options"]["verbose"] quiet = not verbose - vardict = util.compare_varnames(refdata, devdata, quiet=quiet) + vardict = compare_varnames(refdata, devdata, quiet=quiet) varlist_level = vardict["commonvars2D"] + vardict["commonvars3D"] varlist_zonal = vardict["commonvars3D"] @@ -263,6 +272,14 @@ def compare_data(config, data): varlist_level = [v for v in varlist_level if v in restrict_vars] varlist_zonal = [v for v in varlist_zonal if v in restrict_vars] + # Determine if we need to flip levels in the vertical + flip_ref = False + flip_dev = False + if "flip_levels" in config["data"]["ref"]: + flip_ref = config["data"]["ref"]["flip_levels"] + if "flip_levels" in config["data"]["dev"]: + flip_dev = config["data"]["dev"]["flip_levels"] + # ================================================================== # Generate the single level comparison plot # ================================================================== @@ -272,15 +289,18 @@ def compare_data(config, data): config["paths"]["plots_dir"], config["options"]["level_plot"]["pdfname"] ) - bmk.compare_single_level( + compare_single_level( refdata, config["data"]["ref"]["label"], devdata, config["data"]["dev"]["label"], + flip_ref=flip_ref, + flip_dev=flip_dev, ilev=config["options"]["level_plot"]["level_to_plot"], varlist=varlist_level, pdfname=pdfname, weightsdir=config["paths"]["weights_dir"], + n_job=config["options"]["n_cores"], verbose=verbose ) @@ -293,22 +313,24 @@ def compare_data(config, data): config["paths"]["plots_dir"], config["options"]["zonal_mean"]["pdfname"] ) - bmk.compare_zonal_mean( + compare_zonal_mean( refdata, config["data"]["ref"]["label"], devdata, config["data"]["dev"]["label"], + flip_ref=flip_ref, + flip_dev=flip_dev, varlist=varlist_zonal, pdfname=pdfname, weightsdir=config["paths"]["weights_dir"], + n_job=config["options"]["n_cores"], verbose=verbose ) # ================================================================== # Print totals for each quantity # ================================================================== - if config["options"]["totals_and_diffs"]["create_table"] or \ - config["options"]["totals_and_diffs"]["print_to_screen"]: + if config["options"]["totals_and_diffs"]["create_table"]: print('... Printing totals and differences') print_totals_and_diffs( config, @@ -318,20 +340,20 @@ def compare_data(config, data): ) -def main(): +def main(argv): """ Main program, reads data and calls compare_data to make plots. """ # Take the config file as the 2nd argument (or use a default) # NOTE: sys.argv[0] is always the program name! - if len(sys.argv) == 2: - config_file = sys.argv[1] + if len(argv) == 2: + config_file = argv[1] else: config_file = "compare_diags.yml" # Get paths and options from the configuration file - config = util.read_config_file(config_file) + config = read_config_file(config_file) # Create dirs for plots & weights (if necessary) create_dirs(config) @@ -340,7 +362,6 @@ def main(): compare_data(config, read_data(config)) +# Only execute when we run as a standalone script if __name__ == "__main__": - main() - - + main(sys.argv) diff --git a/examples/diagnostics/compare_diags.yml b/gcpy/examples/diagnostics/compare_diags.yml similarity index 87% rename from examples/diagnostics/compare_diags.yml rename to gcpy/examples/diagnostics/compare_diags.yml index 864d86ab..69db1377 100644 --- a/examples/diagnostics/compare_diags.yml +++ b/gcpy/examples/diagnostics/compare_diags.yml @@ -3,7 +3,6 @@ paths: main_dir: /path/to/your/data # Add the path to your output here plots_dir: ./Results weights_dir: /n/holyscratch01/external_repos/GEOS-CHEM/gcgrid/gcdata/ExtData/GCHP/RegriddingWeights - test_data_dir: /n/holyscratch01/external_repos/GEOS-CHEM/gcgrid/geos-chem/validation/gcpy_test_data/1mon/ data: ref: @@ -18,7 +17,6 @@ data: file: GEOSChem.SpeciesConc.20190701_0000z.nc4 options: - gcpy_test: True verbose: False restrict_vars: [] level_plot: diff --git a/gcpy/examples/dry_run/__init__.py b/gcpy/examples/dry_run/__init__.py new file mode 100644 index 00000000..81c5a6d0 --- /dev/null +++ b/gcpy/examples/dry_run/__init__.py @@ -0,0 +1,6 @@ +""" +GCPy initialization script: gcpy/src/examples +""" + +from .download_data import * + diff --git a/examples/dry-run/download_data.py b/gcpy/examples/dry_run/download_data.py similarity index 100% rename from examples/dry-run/download_data.py rename to gcpy/examples/dry_run/download_data.py diff --git a/examples/dry-run/download_data.yml b/gcpy/examples/dry_run/download_data.yml similarity index 100% rename from examples/dry-run/download_data.yml rename to gcpy/examples/dry_run/download_data.yml diff --git a/gcpy/examples/plotting/__init__.py b/gcpy/examples/plotting/__init__.py new file mode 100644 index 00000000..b9f7dd64 --- /dev/null +++ b/gcpy/examples/plotting/__init__.py @@ -0,0 +1,5 @@ +""" +GCPy import script +""" +from .plot_single_panel import * +from .plot_comparisons import * diff --git a/gcpy/examples/plotting/plot_comparisons.py b/gcpy/examples/plotting/plot_comparisons.py new file mode 100755 index 00000000..9ddd0561 --- /dev/null +++ b/gcpy/examples/plotting/plot_comparisons.py @@ -0,0 +1,190 @@ +#!/usr/bin/env python +""" +Six Panel Comparison Plots +-------------------------------------- +This example script demonstrates the comparitive plotting +capabilities of GCPy, including single level plots as well as +global zonal mean plots. These comparison plots are frequently +used to evaluate results from different runs / versions of +GEOS-Chem, but can also be used to compare results from different +points in one run that are stored in separate xarray datasets. + +The example data described here is in lat/lon format, but the same +code works equally well for cubed-sphere (GCHP) data. + +NOTE: If you are using GCPy from a Mac, set the environment variable: + + export MPLBACKEND="MacOSX" + +Otherwise set: + + export MPLBACKEND="tkagg" + +This will set the proper X11 backend (which is needed to open a plot +window on the screen. There is some incompatibility with the Tck/Tk +backend "tkagg" in MacOS X operating systems. +""" +import argparse +import xarray as xr +import matplotlib.pyplot as plt +from gcpy.constants import skip_these_vars +from gcpy.plot.compare_single_level import compare_single_level +from gcpy.plot.compare_zonal_mean import compare_zonal_mean +from gcpy.util import rename_and_flip_gchp_rst_vars + + +def plot_comparisons( + ref, + dev, + varname, + level +): + """ + Example function to create six-panel comparison plots. + + Args: + ----- + ref (str) : Path to the "Ref" data file. + dev (str) : Path to the "Dev" data file. + varname (str) : Variable to plot + level (int) : Level to plot (for single-level comparisons only). + """ + # xarray allows us to read in any NetCDF file, the format of + # GEOS-Chem diagnostics, #as an xarray Dataset + # + # The skip_these_vars list avoids trying to read certain + # GCHP variables that cause data read issues. + ref_ds = xr.open_dataset( + ref, + drop_variables=skip_these_vars + ) + dev_ds = xr.open_dataset( + dev, + drop_variables=skip_these_vars + ) + + # If the data is from a GCHP restart file, rename variables and + # flip levels to match the GEOS-Chem Classic naming and level + # conventions. Otherwise no changes will be made. + ref_ds = rename_and_flip_gchp_rst_vars(ref_ds) + dev_ds = rename_and_flip_gchp_rst_vars(dev_ds) + + # ================== + # Single level plots + # ================== + + # compare_single_level generates sets of six panel plots for + # data at a specified level in your datasets. By default, the + # level at index 0 (likely the surface) is plotted. + # + # You likely want to look at the same variables across both of + # your datasets. If a variable is in one dataset but not the other, + # the plots will show NaN values for the latter. You can pass + # variable names in a list to these comparison plotting functions + # (otherwise all variables will plot). + # + # NOTE: For simplicity, we will just restrict the comparisons + # to a single variable. But you can add as many variables as + # you like to varlist. + varlist = [varname] + + # compare_single_level has many arguments which can be optionally + # specified. The first four arguments are required. They specify + # your first xarray Dataset, the name of your first dataset, + # your second xarray Dataset, and the name of your second dataset. + # Here we will also pass a specific level and the names of the + # variables you want to plot. + compare_single_level( + ref_ds, + 'Ref version', + dev_ds, + 'Dev version', + ilev=level, + varlist=varlist + ) + plt.show() + + # Using plt.show(), you can view the plots interactively. + # You can also save out the plots to a PDF. + compare_single_level( + ref_ds, + 'Ref version', + dev_ds, + 'Dev version', + ilev=level, + varlist=varlist, + pdfname='single_level.pdf' + ) + + # ================== + # Zonal Mean Plots + # ================== + + # compare_zonal_mean generates sets of six panel plots containing + # zonal mean data across your dataset. compare_zonal_mean shares + # many of the same arguments as compare_single_level. You can + # specify pressure ranges in hPa for zonal mean plotting (by + # default every vertical level is plotted) + compare_zonal_mean( + ref_ds, + 'Ref version', + dev_ds, + 'Dev version', + pres_range=[0, 100], + varlist=varlist, + pdfname='zonal_mean.pdf' + ) + + +def main(): + """ + Parses command-line arguments and calls plot_comparisons + """ + + # Tell the parser which arguments to look for + parser = argparse.ArgumentParser( + description="Single-panel plotting example program" + ) + parser.add_argument( + "-r", "--ref", + metavar="REF", + type=str, + required=True, + help="path to NetCDF file for the Ref model" + ) + parser.add_argument( + "-d", "--dev", + metavar="DEV", + type=str, + required=True, + help="path to NetCDF file for the Dev model" + ) + parser.add_argument( + "-v", "--varname", + metavar="VAR", + type=str, + required=True, + help="Variable name to plot" + ) + parser.add_argument( + "-l", "--level", + metavar="LEV", + type=int, + required=True, + help="level to plot (single-level plots only), starting at 0" + ) + + # Parse command-line arguments + args = parser.parse_args() + + # Call the plot_single_panel routine + plot_comparisons( + args.ref, + args.dev, + args.varname, + args.level + ) + + +if __name__ == "__main__": + main() diff --git a/gcpy/examples/plotting/plot_single_panel.py b/gcpy/examples/plotting/plot_single_panel.py new file mode 100755 index 00000000..b10f9d1b --- /dev/null +++ b/gcpy/examples/plotting/plot_single_panel.py @@ -0,0 +1,183 @@ +#!/usr/bin/env python +""" +Global and Regional Single Panel Plots +-------------------------------------- +This example script demonstrates the core single panel plotting +capabilities of GCPy, including global and regional single level plots +as well as global zonal mean plots. + +The example data described here is in lat/lon format, but the same code +works equally well for cubed-sphere (GCHP) data. + +For full documentation on the plotting capabilities of GCPy +(including full argument lists), please see the GCPy documentation +at https://gcpy.readthedocs.io. + +NOTE: If you are using GCPy from a Mac, set the environment variable: + + export MPLBACKEND="MacOSX" + +Otherwise set: + + export MPLBACKEND="tkagg" + +This will set the proper X11 backend (which is needed to open a plot +window on the screen. There is some incompatibility with the Tck/Tk +backend "tkagg" in MacOS X operating systems. +""" +import argparse +import xarray as xr +import matplotlib.pyplot as plt +from gcpy.plot.single_panel import single_panel +from gcpy.util import rename_and_flip_gchp_rst_vars + + +def plot_single_panel(infile, varname, level): + """ + Example routine to create single panel plots. + + Args: + ----- + infile (str) : Name of netCDF file to read. + varname (str) : Name of variable to plot + level (int) : Model level for single-panel plots + in Python notation (starting from 0) + """ + + # xarray allows us to read in any NetCDF file + dset = xr.open_dataset(infile) + + # If the data is from a GCHP restart file, rename variables and + # flip levels to match the GEOS-Chem Classic naming and level + # conventions. Otherwise no changes will be made. + dset = rename_and_flip_gchp_rst_vars(dset) + + # You can easily view the variables available for plotting + # using xarray. Each of these variables has its own xarray + # DataArray within the larger Dataset container. + print(dset.data_vars) + + # Most variables have some sort of prefix; in this example all + # variables are prefixed with 'SpeciesRst_'. We'll select the + # DataArray for ozone. + darr = dset[varname] + + # Printing a DataArray gives a summary of the dimensions and attributes + # of the data. + print(darr) + + # ================== + # Single-level Plots + # ================== + + # gcpy.single_panel is the core plotting function of GCPy, able to + # create a one panel zonal mean or single level plot. Here we will + # create a single level plot. We must manually index into the level + # (in Python notation, starting from 0). + darr_single_level = darr.isel(lev=level) + + # single_panel has many arguments which can be optionally specified. + # The only argument you must always pass to a call to single_panel is + # the DataArray that you want to plot. By default, the created plot + # includes a colorbar with units read from the DataArray, an + # automatic title (the data variable name in the DataArray), and + # an extent equivalent to the full lat/lon extent of the DataArray + single_panel( + darr_single_level, + title=f"{varname} at level {level}" + ) + plt.show() + + # You can specify a specific area of the globe you would like plotted + # using the 'extent' argument, which uses the format [min_longitude, + # max_longitude, min_latitude, max_latitude] with bounds + # [-180, 180, -90, 90] + single_panel( + darr_single_level, + extent=[50, -90, -10, 60], + title=f"{varname} at level {level} over N. Pacific" + ) + plt.show() + + # Other commonly used arguments include specifying a title and a + # colormap (defaulting to a White-Green-Yellow-Red colormap) + #You can find more colormaps at + # https://matplotlib.org/tutorials/colors/colormaps.html + single_panel( + darr_single_level, + title=f"{varname} at level {level} over N. Pacific, viridis colormap", + comap=plt.get_cmap("viridis"), + log_color_scale=True, + extent=[80, -90, -10, 60] + ) + plt.show() + + # =================== + # Zonal Mean Plotting + # =================== + + # Use the plot_type argument to specify zonal_mean plotting + single_panel( + darr, + plot_type="zonal_mean", + title=f"Zonal mean plot for {varname}, full atmosphere" + ) + plt.show() + + # You can specify pressure ranges in hPa for zonal mean plot + # (by default every vertical level is plotted) + single_panel( + darr, + pres_range=[0, 100], + log_yaxis=True, + log_color_scale=True, + plot_type="zonal_mean", + title=f"Zonal mean plot for {varname}, stratopshere-only" + ) + plt.show() + + +def main(): + """ + Parses command-line arguments and calls plot_single_panel. + """ + + # Tell the parser which arguments to look for + parser = argparse.ArgumentParser( + description="Single-panel plotting example program" + ) + parser.add_argument( + "-i", "--infile", + metavar="INF", + type=str, + required=True, + help="input NetCDF file" + ) + parser.add_argument( + "-v", "--varname", + metavar="VARNAME", + type=str, + required=True, + help="variable to plot" + ) + parser.add_argument( + "-l", "--level", + metavar="LEV", + type=int, + required=True, + help="level to plot (single-panel plots only), starting at 0" + ) + + # Parse command-line arguments + args = parser.parse_args() + + # Call the plot_single_panel routine + plot_single_panel( + args.infile, + args.varname, + args.level + ) + + +if __name__ == "__main__": + main() diff --git a/examples/timeseries/README.txt b/gcpy/examples/timeseries/README.txt similarity index 100% rename from examples/timeseries/README.txt rename to gcpy/examples/timeseries/README.txt diff --git a/gcpy/examples/timeseries/__init__.py b/gcpy/examples/timeseries/__init__.py new file mode 100644 index 00000000..92fd2052 --- /dev/null +++ b/gcpy/examples/timeseries/__init__.py @@ -0,0 +1,6 @@ +""" +GCPy import script +""" +from .mda8_o3_timeseries import * +from .plot_timeseries import * + diff --git a/gcpy/examples/timeseries/mda8_o3_timeseries.py b/gcpy/examples/timeseries/mda8_o3_timeseries.py new file mode 100755 index 00000000..078469bd --- /dev/null +++ b/gcpy/examples/timeseries/mda8_o3_timeseries.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python +""" +MDA8 Timeseries Calculations +======================================== + +A common statistic used when constructing standards for air quality +criteria pollutants is to look at the ranked distribution of the +daily maxima of rolling 8-hour averages of a substance, or MDA8 for +short. +""" +# Author: Daniel Rothenberg +# Version: June 1, 2017 +# +# NOTE: Now placed into a function so that we only execute +# these commands when running as a standalone script. +# (Bob Yantosca, 09 Aug 2023) + +import matplotlib.pyplot as plt +import matplotlib.dates as mdates +import pandas as pd +import xarray as xr +plt.style.use(['seaborn-v0_8-talk', 'seaborn-v0_8-ticks']) + + +def main(): + """ + MDA8 O3 timeseries example + """ + + # Read hourly data ### EDIT THIS IF NECESSARY + InFile = 'GEOSChem.Hourly_SfcO3.2017.nc' + ds = xr.open_dataset(InFile) + o3_data = ds['SpeciesConc_O3'] + + # Compute the 8-hour rolling averages for ozone + avg_8hr_o3 = (o3_data.rolling(time=8, min_periods=6).mean()) + + # By default, this takes the last timestamp in a rolling interval; i.e. the + # timestamps correspond to the preceding 8 hours. We want them to refer to + # the proeding 8 hours, so we can adjust them using datetime arithmetic + times_np = avg_8hr_o3.time.values + times_pd = pd.to_datetime(times_np) - pd.Timedelta('8h') + avg_8hr_o3.time.values[:] = times_pd + + # Finally, aggregate by calendar day and compute the maxima of the set of + # 8-hour averages for each day + mda8_o3 = avg_8hr_o3.resample(time='D').max(dim='time') + mda8_o3.name='mda8_o3' + + # Save output to new netCDF file + mda8_o3.to_netcdf( + 'GEOSChem.MDA8_O3.20170.nc', 'w', + format='NETCDF4', + encoding={'lat': {'_FillValue': None}, + 'lon': {'_FillValue': None}, + 'time': {'_FillValue': None}, + 'mda8_o3': {'_FillValue': None}} + ) + + # Select data for one specific location, near Boston + boston_mda8_o3 = mda8_o3.sel(lon=-71., lat=42., method='nearest') + boston_o3 = o3_data.sel(lon=-71., lat=42., method='nearest') + + # Plot both the original (hourly) and MDA* timeseries on the same plot. + fig = plt.figure(figsize=(9, 3)) + ax = fig.add_subplot(111) + boston_o3.plot(ax=ax, color='k') + ax.stem(boston_mda8_o3.time.values, boston_mda8_o3.data, + ':r', markerfmt='ro') + ax.set_ylim(0) + + ax.xaxis.set_major_formatter(mdates.DateFormatter("%h %d")) + for tick in ax.xaxis.get_majorticklabels(): + tick.set_horizontalalignment('center') + + ax.set_xlabel("") + ax.set_ylabel("(MDA8) O$_3$ [ppb]") + + plt.show() + + +# Only execute when running as a standalone script +if __name__ == '__main__': + main() diff --git a/examples/timeseries/plot_timeseries.py b/gcpy/examples/timeseries/plot_timeseries.py similarity index 97% rename from examples/timeseries/plot_timeseries.py rename to gcpy/examples/timeseries/plot_timeseries.py index 2add4027..83f52ce6 100755 --- a/examples/timeseries/plot_timeseries.py +++ b/gcpy/examples/timeseries/plot_timeseries.py @@ -32,15 +32,16 @@ ''' # Imports -import gcpy.constants as gcon import os +import warnings import numpy as np import matplotlib.dates as mdates import matplotlib.ticker as mticker import matplotlib.pyplot as plt from matplotlib.backends.backend_pdf import PdfPages import xarray as xr -import warnings +from gcpy import constants + # Tell matplotlib not to look for an X-window, as we are plotting to # a file and not to the screen. This will avoid some warning messages. @@ -70,7 +71,7 @@ def find_files_in_dir(path, substrs): List of files in the directory (specified by path) that match all substrings (specified in substrs). ''' - + # Initialize file_list = [] @@ -130,9 +131,9 @@ def read_geoschem_data(path, collections): files may be found. collections: list of str - List of GEOS-Chem collections. Files for these + List of GEOS-Chem collections. Files for these collections will be read into the xarray Dataset. - + Returns: -------- ds : xarray Dataset @@ -145,15 +146,15 @@ def read_geoschem_data(path, collections): # These are mostly variables introduced into GCHP with the MAPL v1.0.0 # update. These variables contain either repeated or non-standard # dimensions that can cause problems in xarray when combining datasets. - skip_vars = gcon.skip_these_vars - - # Find all files in the given - file_list = find_files_in_dir(path, collections) + skip_vars = constants.skip_these_vars + + # Find all files in the given + file_list = find_files_in_dir(path, collections) # Return a single xarray Dataset containing data from all files # NOTE: Need to add combine="nested" for xarray 0.15 and higher v = xr.__version__.split(".") - if int(v[0]) == 0 and int(v[1]) >= 15: + if int(v[0]) == 0 and int(v[1]) >= 15: return xr.open_mfdataset(file_list, drop_variables=skip_vars, combine="nested", @@ -329,7 +330,7 @@ def main(): # Get a list of files in the ConcAboveSfc and SpeciesConc collections # (YOU CAN EDIT THIS FOR YOUR OWN PARTICULAR APPLICATION!) collections = ['ConcAboveSfc', 'SpeciesConc'] - + # Read GEOS-Chem data into an xarray Dataset ds = read_geoschem_data(path_to_data, collections) diff --git a/gcpy/examples/working_with_files/__init__.py b/gcpy/examples/working_with_files/__init__.py new file mode 100644 index 00000000..baa06058 --- /dev/null +++ b/gcpy/examples/working_with_files/__init__.py @@ -0,0 +1,7 @@ +""" +GCPy import script +""" +from .add_blank_var_to_restart_file import * +from .concatenate_files import * +from .insert_field_into_restart_file import * +from .regrid_restart_ll_to_cs import * diff --git a/examples/working_with_files/add_blank_var_to_restart_file.py b/gcpy/examples/working_with_files/add_blank_var_to_restart_file.py similarity index 76% rename from examples/working_with_files/add_blank_var_to_restart_file.py rename to gcpy/examples/working_with_files/add_blank_var_to_restart_file.py index 3c06a33e..db8c8c28 100755 --- a/examples/working_with_files/add_blank_var_to_restart_file.py +++ b/gcpy/examples/working_with_files/add_blank_var_to_restart_file.py @@ -10,10 +10,10 @@ add_blank_var.py varname infile outfile """ -from sys import argv import numpy as np import xarray as xr from gcpy.util import create_blank_dataarray +from gcpy.constants import skip_these_vars def add_blank_var_to_ncfile( @@ -42,33 +42,39 @@ def add_blank_var_to_ncfile( """ with xr.set_options(keep_attrs=True): - ds = xr.open_dataset(infile) + dset = xr.open_dataset( + infile, + drop_variables=skip_these_vars + ) if varattrs is None: - varattrs = ds.attrs + varattrs = dset.attrs - da = create_blank_dataarray( + darr = create_blank_dataarray( varname, - ds.sizes, - ds.coords, + dset.sizes, + dset.coords, varattrs, fill_value=0.0, fill_type=np.float32 ) - ds = xr.merge([ds, da]) + dset = xr.merge([dset, darr]) - ds.to_netcdf(outfile) + dset.to_netcdf(outfile) -if __name__ == '__main__': +def main(): + """ + Main program + """ # Name of the blank varible to add (EDIT AS NEEDED) # NOTE: For GCHP, the prefix must be "SPC_" instead of "SpeciesRst_" - varname = "SpeciesRst_PRO2" + var_name = "SpeciesRst_PRO2" # Variable attributes (EDIT AS NEEDED) - varattrs = { + var_attrs = { "MW_g" : "146.98", "long_name" : "Dummy species to track production rate of RO2", "units" : "mol mol-1 dry", @@ -79,8 +85,12 @@ def add_blank_var_to_ncfile( # Add blank variable to restart file (EDIT FILENAMES AS NEEDED) add_blank_var_to_ncfile( - varname, + var_name, 'GEOSChem.Restart.20190701_0000z.nc4', 'new.GEOSChem.Restart.20190701_0000z.nc4', - varattrs=varattrs + varattrs=var_attrs ) + +# Only execute when we run as a standalone script +if __name__ == '__main__': + main() diff --git a/examples/working_with_files/concatenate_files.py b/gcpy/examples/working_with_files/concatenate_files.py similarity index 80% rename from examples/working_with_files/concatenate_files.py rename to gcpy/examples/working_with_files/concatenate_files.py index 60a9932d..3b910ee1 100755 --- a/examples/working_with_files/concatenate_files.py +++ b/gcpy/examples/working_with_files/concatenate_files.py @@ -17,12 +17,12 @@ ''' # Imports -import gcpy.constants as gcon import os +import warnings import numpy as np import xarray as xr from xarray.coding.variables import SerializationWarning -import warnings +from gcpy import constants # Suppress harmless run-time warnings (mostly about underflow or NaNs) warnings.filterwarnings("ignore", category=RuntimeWarning) @@ -55,25 +55,25 @@ def find_files_in_dir(path, substrs): # Walk through the given data directory. Then for each file found, # add it to file_list if it matches text in search_list. - for root, directory, files in os.walk(path): - for f in files: - for s in substrs: - if s in f: - file_list.append(os.path.join(root, f)) + for root, _, files in os.walk(path): + for file_name in files: + for sub_str in substrs: + if sub_str in file_name: + file_list.append(os.path.join(root, file_name)) # Return an alphabetically sorted list of files file_list.sort() return file_list -def replace_nans_with_zeroes(ds, verbose=True): +def replace_nans_with_zeroes(dset, verbose=True): ''' Replaces NaN values with zeroes for each variable within an an xarray Dataset. Args: ---- - ds : xarray Dataset + dset : xarray Dataset The input dataset, containing one or more data variables. Keyword Args (optional): @@ -88,7 +88,7 @@ def replace_nans_with_zeroes(ds, verbose=True): with xr.set_options(keep_attrs=True): # Loop over all variables in the Dataset - for v in ds.data_vars.keys(): + for var in dset.data_vars.keys(): # OPTIONAL STEP: # Xarray will try convert missing values to NaN's, @@ -99,18 +99,21 @@ def replace_nans_with_zeroes(ds, verbose=True): # replace these with zeros, so that NaNs won't # get read into atmospheric models, etc. # - # NOTE: ds[v].values converts to a numpy ndarray, + # NOTE: dset[v].values converts to a numpy ndarray, # so that you can use numpy functions. - ds[v].where(np.isnan(ds[v].values), other=0.0, drop=False) + dset[var].where( + np.isnan(dset[var].values), + other=0.0, + drop=False + ) # OPTIONAL: Print min & max for each variable # Comment out if you wish if verbose: - print('{} : {} {}'.format( - v, np.min(ds[v].values), np.max(ds[v].values))) + print(f"{var} : {np.min(dset[var].values)} {np.max(dset[var].values)}") # Return the modified Datast - return ds + return dset def main(): @@ -130,24 +133,24 @@ def main(): # These are mostly variables introduced into GCHP with the MAPL v1.0.0 # update. These variables contain either repeated or non-standard # dimensions that can cause problems in xarray when combining datasets. - skip_vars = gcon.skip_these_vars - + skip_vars = constants.skip_these_vars + # Look for all the netCDF files in the path file_list = find_files_in_dir(path_to_dir, substrs) # Return a single xarray Dataset containing data from all files # NOTE: Need to add combine="nested" for xarray 0.15 and higher - v = xr.__version__.split(".") - if int(v[0]) == 0 and int(v[1]) >= 15: - ds = xr.open_mfdataset(file_list, + var = xr.__version__.split(".") + if int(var[0]) == 0 and int(var[1]) >= 15: + dset = xr.open_mfdataset(file_list, drop_variables=skip_vars, combine="nested") else: - ds = xr.open_mfdataset(file_list, + dset = xr.open_mfdataset(file_list, drop_variables=skip_vars) # Replace NaN values with zeroes - ds = replace_nans_with_zeroes(ds, verbose=True) + dset = replace_nans_with_zeroes(dset, verbose=True) # Specify the path and filename for the concatenated data # (YOU CAN EDIT THIS) @@ -155,7 +158,9 @@ def main(): outfile = os.path.join(outdir, 'my_concatenated_output_file.nc') # Write concatenated data to a netCDF file - ds.to_netcdf(outfile) + dset.to_netcdf(outfile) + +# Only execute when running as a standalone script if __name__ == "__main__": main() diff --git a/examples/working_with_files/insert_field_into_restart_file.py b/gcpy/examples/working_with_files/insert_field_into_restart_file.py similarity index 69% rename from examples/working_with_files/insert_field_into_restart_file.py rename to gcpy/examples/working_with_files/insert_field_into_restart_file.py index 3e95d232..6b761b17 100755 --- a/examples/working_with_files/insert_field_into_restart_file.py +++ b/gcpy/examples/working_with_files/insert_field_into_restart_file.py @@ -8,10 +8,10 @@ """ # Imports -import gcpy.constants as gcon +import warnings import xarray as xr from xarray.coding.variables import SerializationWarning -import warnings +from gcpy import constants # Suppress harmless run-time warnings (mostly about underflow or NaNs) warnings.filterwarnings("ignore", category=RuntimeWarning) @@ -24,7 +24,7 @@ def main(): """ # Data vars to skip - skip_vars = gcon.skip_these_vars + skip_vars = constants.skip_these_vars # List of dates (EDIT accordingly) file_list = [ @@ -48,39 +48,41 @@ def main(): with xr.set_options(keep_attrs=True): # Loop over dates - for f in file_list: + for file_name in file_list: # Input and output files - infile = '../' + f - outfile = f + infile = '../' + file_name + outfile = file_name print("Creating " + outfile) # Open input file - ds = xr.open_dataset(infile, drop_variables=skip_vars) + dset = xr.open_dataset(infile, drop_variables=skip_vars) # Create a new DataArray from a given species (EDIT ACCORDINGLY) if "GCHP" in infile: - dr = ds["SPC_ETO"] - dr.name = "SPC_ETOO" + darr = dset["SPC_ETO"] + darr.name = "SPC_ETOO" else: - dr = ds["SpeciesRst_ETO"] - dr.name = "SpeciesRst_ETOO" + darr = dset["SpeciesRst_ETO"] + darr.name = "SpeciesRst_ETOO" # Update attributes (EDIT ACCORDINGLY) - dr.attrs["FullName"] = "peroxy radical from ethene" - dr.attrs["Is_Gas"] = "true" - dr.attrs["long_name"] = "Dry mixing ratio of species ETOO" - dr.attrs["MW_g"] = 77.06 + darr.attrs["FullName"] = "peroxy radical from ethene" + darr.attrs["Is_Gas"] = "true" + darr.attrs["long_name"] = "Dry mixing ratio of species ETOO" + darr.attrs["MW_g"] = 77.06 # Merge the new DataArray into the Dataset - ds = xr.merge([ds, dr], compat="override") + dset = xr.merge([dset, darr], compat="override") # Create a new file - ds.to_netcdf(outfile) + dset.to_netcdf(outfile) + + # Free memory by setting dset to a null dataset + dset = xr.Dataset() - # Free memory by setting ds to a null dataset - ds = xr.Dataset() +# Only execute when we run as a standalone script if __name__ == "__main__": main() diff --git a/gcpy/examples/working_with_files/regrid_restart_ll_to_cs.py b/gcpy/examples/working_with_files/regrid_restart_ll_to_cs.py new file mode 100755 index 00000000..27138ebd --- /dev/null +++ b/gcpy/examples/working_with_files/regrid_restart_ll_to_cs.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python +""" +Regrids a 4x5 GEOS-Chem Classic restart file to cubed-sphere resolutions. +""" + +# Imports +from os.path import join +import numpy as np +import xarray as xr +import sparselt.esmf +import sparselt.xr + +def main(): + + # Path to regridding weights (EDIT AS NEEDED) + weights_dir="/path/to/regridding/weights/" + + # List of simulation types (EDIT AS NEEDED) + simulation_list = ["carboncycle"] + + # List of months (EDIT AS NEEDED) + month_list = ["01", "07"] + + # List of cubed-sphere grids (EDIT AS NEEDED) + cubed_sphere_grid_list = ["c24", "c48", "c90", "c180", "c360"] + + # Preserves all global and variable attributes + with xr.set_options(keep_attrs=True): + + # Loop over simulation types + for sim in simulation_list: + + # Loop over months + for mm in month_list: + + # Read input data + infile = f"GEOSChem.Restart.{sim}.2019{mm}01_0000z.nc4" + print(f"Reading {infile}") + ds_in = xr.open_dataset(infile) + + # Rename GCClassic "SpeciesRst_" prefix to GCHP "SPC_" prefix + old_to_new_names = {} + for v in ds_in.data_vars.keys(): + if "SpeciesRst_" in v: + new_name = v.replace("SpeciesRst_", "SPC_") + old_to_new_names[v] = new_name + ds_in = ds_in.rename(old_to_new_names) + + # Loop over cubed-sphere grids + for cs in cubed_sphere_grid_list: + + # Number of grid points per side + cs_res = int(cs[1:]) + + # Regridding transform file + regrid_file = f"regrid_weights_latlon46x72_to_{cs}.nc" + weights_file = join(weights_dir, regrid_file) + + # Create a linear transform object from the regridding + # weights file for the combination of source and target + # horizontal resolutions. NOTE: GCHP restart files use + # a grid where lat = 6*cs_res. + transform = sparselt.esmf.load_weights( + weights_file, + input_dims=[('lat', 'lon'), (46, 72)], + output_dims=[('lat', 'lon'), (6*cs_res, cs_res)] + ) + + # Regrid to cubed-sphere + ds_out = sparselt.xr.apply(transform, ds_in) + + # Redefine coordinate arrays to be consistent + # with GCHP restart file expectations + coords_dict = { + "lon": np.arange(1, cs_res+1, dtype=np.float64), + "lat": np.arange(1, 6*cs_res+1, dtype=np.float64), + "lev": np.arange(1, 73, dtype=np.float64), + } + ds_out = ds_out.assign_coords(coords_dict) + + # Write to output resolution + outfile = f"GEOSChem.Restart.{sim}.2015{mm}01_0000z.{cs}.nc4" + print(f"Writing {outfile}") + ds_out.to_netcdf(outfile) + + # Cleanup + del transform + del ds_out + + # Cleanup + del ds_in + + +# Only execute when we run as a standalone script +if __name__ == '__main__': + main() diff --git a/gcpy/examples/xarray_examples/__init__.py b/gcpy/examples/xarray_examples/__init__.py new file mode 100644 index 00000000..3d4d1320 --- /dev/null +++ b/gcpy/examples/xarray_examples/__init__.py @@ -0,0 +1,6 @@ +""" +GCPy initialization script: gcpy/src/examples +""" + +from . import * + diff --git a/examples/xarray/xarray_overview.ipynb b/gcpy/examples/xarray_examples/xarray_overview.ipynb similarity index 100% rename from examples/xarray/xarray_overview.ipynb rename to gcpy/examples/xarray_examples/xarray_overview.ipynb diff --git a/gcpy/file_regrid.py b/gcpy/file_regrid.py index e27a5c17..41cfea01 100644 --- a/gcpy/file_regrid.py +++ b/gcpy/file_regrid.py @@ -1,107 +1,371 @@ +""" +Regrids data horizontally between lat/lon and/or cubed-sphere grids +(including stretched grids). +""" import argparse import os +import warnings import numpy as np import xarray as xr -try: - import xesmf as xe - from distutils.version import LooseVersion - if LooseVersion(xe.__version__) < LooseVersion("0.2.1"): - raise ImportError( - "file_regrid.py requires xESMF version 0.2.1 or higher.") -except ImportError as e: - print('file_regrid.py requires xESMF version 0.2.1 or higher!\n\nSee the installation ' + \ - 'instructions here: https://xesmf.readthedocs.io/en/latest/installation.html\n') -import pandas as pd - -from gcpy.grid import get_input_res, get_vert_grid, get_grid_extents -from gcpy.regrid import make_regridder_S2S, reformat_dims, make_regridder_L2S, \ - make_regridder_C2L, make_regridder_L2L -from gcpy.util import reshape_MAPL_CS +from gcpy.grid import get_input_res, get_grid_extents, \ + get_ilev_coord, get_lev_coord +from gcpy.regrid import make_regridder_S2S, reformat_dims, \ + make_regridder_L2S, make_regridder_C2L, make_regridder_L2L +from gcpy.util import verify_variable_type +from gcpy.cstools import get_cubed_sphere_res, is_gchp_lev_positive_down + +# Ignore any FutureWarnings +warnings.simplefilter(action="ignore", category=FutureWarning) + def file_regrid( - fin, fout, dim_format_in, dim_format_out, cs_res_out=0, - ll_res_out='0x0', sg_params_in=[1.0, 170.0, -90.0], - sg_params_out=[1.0, 170.0, -90.0], vert_params_out=[[], []]): + filein, + fileout, + dim_format_in, + dim_format_out, + cs_res_out=0, + ll_res_out="0x0", + sg_params_in=None, + sg_params_out=None, + verbose=False, + weightsdir="." +): """ - Regrids an input file to a new horizontal grid specification and saves it - as a new file. + Regrids an input file to a new horizontal grid specification + and saves it as a new file. Args: - fin: str - The input filename - fout: str - The output filename (file will be overwritten if it already exists) - dim_format_in: str - Format of the input file's dimensions (choose from: classic, - checkpoint, diagnostic), where classic denotes lat/lon and - checkpoint / diagnostic are cubed-sphere formats - dim_format_out: str - Format of the output file's dimensions (choose from: classic, - checkpoint, diagnostic), where classic denotes lat/lon - and checkpoint / diagnostic are cubed-sphere formats + ----- + filein: str + The input filename + fileout: str + The output filename (file will be overwritten if it already exists) + dim_format_in: str + Format of the input file's dimensions (choose from: classic, + checkpoint, diagnostic), where classic denotes lat/lon and + checkpoint / diagnostic are cubed-sphere formats + dim_format_out: str + Format of the output file's dimensions (choose from: classic, + checkpoint, diagnostic), where classic denotes lat/lon + and checkpoint / diagnostic are cubed-sphere formats Keyword Args (optional): - cs_res_out: int - The cubed-sphere resolution of the output dataset. - Not used if dim_format_out is classic - Default value: 0 - ll_res_out: str - The lat/lon resolution of the output dataset. - Not used if dim_format_out is not classic - Default value: '0x0' - sg_params_in: list[float, float, float] - Input grid stretching parameters - [stretch-factor, target longitude, target latitude]. - Not used if dim_format_in is classic - Default value: [1.0, 170.0, -90.0] (No stretching) - sg_params_out: list[float, float, float] - Output grid stretching parameters - [stretch-factor, target longitude, target latitude]. - Not used if dim_format_out is classic - Default value: [1.0, 170.0, -90.0] (No stretching) - vert_params_out: list(list, list) of list-like types - Hybrid grid parameter A in hPa and B (unitless) in [AP, BP] format. - Needed for lat/lon output if not using full 72-level or 47-level grid - Default value: [[], []] - + ------------------------ + cs_res_out: int + The cubed-sphere resolution of the output dataset. + Not used if dim_format_out is classic + Default value: 0 + ll_res_out: str + The lat/lon resolution of the output dataset. + Not used if dim_format_out is not classic + Default value: "0x0" + sg_params_in: list[float, float, float] + Input grid stretching parameters + [stretch-factor, target longitude, target latitude]. + Not used if dim_format_in is classic. + Default value: [1.0, 170.0, -90.0] (No stretching) + sg_params_out: list[float, float, float] + Output grid stretching parameters + [stretch-factor, target longitude, target latitude]. + Not used if dim_format_out is classic + Default value: [1.0, 170.0, -90.0] (No stretching) + verbose : bool + Toggles verbose output on (True) or off (False). + weightsdir : str + Path to the directory containing regridding weights (or + where weights will be created). Default value: "." """ + verify_variable_type(filein, str) + verify_variable_type(fileout, str) + verify_variable_type(dim_format_in, str) + verify_variable_type(dim_format_out, str) + + # TODO: Consider renaming checkpoint, classic, diagnostic, + # which may be confusing to users. + + # Error check arguments + valid_formats = ["checkpoint", "classic", "diagnostic"] + if dim_format_in not in valid_formats: + msg = f"Argument 'dim_format_in' must be one of: {valid_formats}!" + raise ValueError(msg) + if dim_format_out not in valid_formats: + msg = f"Argument 'dim_format_out' must be one of: {valid_formats}!" + raise ValueError(msg) + + # Assign default values for optional keywords + if sg_params_in is None: + sg_params_in = [1.0, 170.0, -90.0] + if sg_params_out is None: + sg_params_out = [1.0, 170.0, -90.0] + + # ------------------------------------------------------------------ + # There still seem to be a few issues with regridding to cubed- + # sphere stretched grids. For time time being, stop with error + # if sg_params_in or sg_params_out do not equal the defaults. + # -- Bob Yantosca & Lizzie Lundgren (24 Oct 2023) + if not np.array_equal(sg_params_in, [1.0, 170.0, -90.0]) or \ + not np.array_equal(sg_params_out, [1.0, 170.0, -90.0]): + msg = "Regridding to or from cubed-sphere stretched grids is\n" + \ + "currently not supported. Please use the offline regridding\n" + \ + "method described in the Regridding section of gcpy.readthedocs.io." + raise RuntimeError(msg) + # ------------------------------------------------------------------ # Load dataset - ds_in = xr.open_dataset(fin, decode_cf=False) - ds_in = ds_in.load() - time = ds_in.time - cs_res_in = 0 - if dim_format_in != 'classic': - # Reformat dimensions to T, Z, F, Y, X - ds_in = reformat_dims(ds_in, format=dim_format_in, towards_common=True) - - # Drop variables that don't look like fields - non_fields = [ - v for v in ds_in.variables.keys() - if len(set(ds_in[v].dims) - {'T', 'Z', 'F', 'Y', 'X'}) > 0 or - len(ds_in[v].dims) == 0] - ds_in = ds_in.drop(non_fields) + dset = xr.open_dataset( + filein, + decode_cf=False, + engine="netcdf4" + ).load() + cs_res_in = get_cubed_sphere_res(dset) + + # Verbose printout of inputs + if verbose: + print("Inputs to file_regrid.py") + print(f"filein : {filein}") + print(f"dim_format_in : {dim_format_in}") + if "classic" not in dim_format_in: + print(f"sg_params_in : {sg_params_in}") + print(f"fileout : {fileout}") + print(f"dim_format_out : {dim_format_out}") + if "classic" in dim_format_out: + print(f"ll_res_out : {ll_res_out}") + else: + print(f"cs_res_out : {cs_res_out}") + print(f"sg_params_out : {sg_params_out}") + print(f"verbose : {verbose}") + print(f"weightsdir : {weightsdir}") - # Transpose to T, Z, F, Y, X - ds_in = ds_in.transpose('T', 'Z', 'F', 'Y', 'X') + # Make sure all xarray.Dataset global & variable attributes are kept + with xr.set_options(keep_attrs=True): - assert ds_in.dims['X'] == ds_in.dims['Y'] - cs_res_in = ds_in.dims['X'] + # ============================================================== + # Regrid data + # ============================================================== - elif dim_format_in == 'classic' and dim_format_out != 'classic': - ds_in = drop_and_rename_classic_vars(ds_in) + # Save type of data for later restoration + # Avoid using the dtype of GCHP cubed-sphere grid variables + dset_tmp = dset + dtype_orig = np.dtype(dset[list(dset_tmp.data_vars.keys())[-1]]) + dset_tmp = xr.Dataset() + + if dim_format_in != "classic" and dim_format_out != "classic": + + # ---------------------------------------------------------- + # Input grid is CS/SG; Output grid is CS/SG + # ---------------------------------------------------------- + dset = regrid_cssg_to_cssg( + fileout, + dset, + dim_format_in, + sg_params_in, + cs_res_out, + dim_format_out, + sg_params_out, + verbose=verbose, + weightsdir=weightsdir + ) + + elif dim_format_in == "classic" and dim_format_out != "classic": + + # ---------------------------------------------------------- + # Input grid is LL; Output grid is CS/SG + # ---------------------------------------------------------- + dset = regrid_ll_to_cssg( + dset, + cs_res_out, + dim_format_out, + sg_params_out, + verbose=verbose, + weightsdir=weightsdir + ) + + elif dim_format_in != "classic" and dim_format_out == "classic": + + # ---------------------------------------------------------- + # Input grid is CS/SG; Output grid is LL + # ---------------------------------------------------------- + dset = regrid_cssg_to_ll( + dset, + cs_res_in, + dim_format_in, + sg_params_in, + ll_res_out, + verbose=verbose, + weightsdir=weightsdir + ) + + elif dim_format_in == "classic" and dim_format_out == "classic": + + # ---------------------------------------------------------- + # Input grid is LL; Output grid is LL + # ---------------------------------------------------------- + dset = regrid_ll_to_ll( + dset, + ll_res_out, + verbose=verbose, + weightsdir=weightsdir + ) + + # ============================================================== + # Post-regridding stuff + # ============================================================== + + # Correct precision changes (accidental 32-bit to 64-bit) + # NOTE: Add a workaround to prevent the xr.DataArray.astype + # function from overwriting the "lev" dimension. + dset_tmp = dset.astype( + dtype=dtype_orig, + casting="same_kind", + copy=False + ) + dset = dset_tmp.assign_coords(lev=dset.lev) + + # Write dataset to file + dset.to_netcdf( + fileout, + mode="w", + format="NETCDF4", + engine="netcdf4", + unlimited_dims=["time"], + ) + + # Free memory of the temporary dataset + dset_tmp = xr.Dataset() + + # Print the resulting dataset + if verbose: + print(dset) + + +def prepare_cssg_input_grid( + dset, + dim_format_in +): + """ + Reformats cubed-sphere/stretched grid data to the universal + format and drops non-regriddable fields. + + Args: + ----- + dset : xr.Dataset + Input grid (cubed-sphere or stretched grid) + dim_format_in : str + Either "checkpoint" (for restart files) + or "diagnostic" (for History diagnostic files) + + Returns: + -------- + dset : xr.Dataset + Data with reformatted dimensions and dropped fields + cs_res_in : int + Cubed-sphere/stretched grid resolution + """ + + # Reformat dimensions to "common dimensions (T, Z, F, Y, X) + dset = reformat_dims( + dset, + dim_format_in, + towards_common=True + ) - # save type of data for later restoration - original_dtype = np.dtype(ds_in[list(ds_in.data_vars)[0]]) + # Drop variables that don't look like fields + # NOTE: Don't drop "lons" and "lats" if present. + non_fields = [ + v for v in dset.variables.keys() + if len(set(dset[v].dims) - {"T", "Z", "F", "Y", "X"}) > 0 + or len(dset[v].dims) == 0] + dset_in = dset.drop(non_fields) - oface_files=[] - if cs_res_in == cs_res_out and all( - [v1 == v2 for v1, v2 in zip(sg_params_in, sg_params_out)]): - print('Skipping regridding since grid parameters are identical') - ds_out = ds_in + # Transpose to T, Z, F, Y, X + dset = dset_in.transpose("T", "Z", "F", "Y", "X") + + assert dset.dims["X"] == dset.dims["Y"] + cs_res_in = dset.dims["X"] + + return dset, cs_res_in + + +def regrid_cssg_to_cssg( + fileout, + dset, + dim_format_in, + sg_params_in, + cs_res_out, + dim_format_out, + sg_params_out, + verbose=False, + weightsdir="." +): + """ + Regrids from the cubed-sphere/stretched grid to a different + cubed-sphere/stretched grid resolution. + + Args: + ----- + fileout : str + File name template + dset : xarray.Dataset + Data on a cubed-sphere/stretched grid + dim_format_in, dim_format_out : str + Input & output grid format ("checkpoint", "diagnostic") + cs_res_out : int + Cubed-sphere grid resolution + sg_params_in, sg_params_out: list[float, float, float] + Input & output grid stretching parameters + [stretch-factor, target longitude, target latitude]. + + Keyword Args (optional): + ------------------------ + verbose : bool + Toggles verbose output on (True) or off (False). + weightsdir : str + Path to the directory containing regridding weights (or + where weights will be created). Default value: "." + + Returns: + -------- + dset : xarray.Dataset + Data regridded to the output lat-lon grid + """ + if verbose: + print("file_regrid.py: Regridding from CS/SG to CS/SG") + + # Keep all xarray attributes + with xr.set_options(keep_attrs=True): + + # Flip vertical levels (if necessary) and + # set the lev:positive attribute accordingly + dset = flip_lev_coord_if_necessary( + dset, + dim_format_in=dim_format_in, + dim_format_out=dim_format_out + ) + + # Change CS/SG dimensions to universal format + # and drop non-regriddable variables + dset, cs_res_in = prepare_cssg_input_grid( + dset, + dim_format_in + ) + + # ============================================================== + # Only regrid if the cubed-sphere grids are similar + # (i.e. same resolution & stretched-grid parameters) + # ============================================================== + if cs_res_in == cs_res_out and \ + np.array_equal(sg_params_in, sg_params_out) and \ + dim_format_in == dim_format_out: + print("Skipping regridding since grid parameters are identical") + + # Put regridded dataset back into a familiar format + dset = dset.rename({ + "y": "Y", + "x": "X", + }) + + return dset - elif dim_format_in != 'classic' and dim_format_out != 'classic': - # CS/SG to CS/SG # Make regridders regridders = make_regridder_S2S( cs_res_in, @@ -111,312 +375,1177 @@ def file_regrid( tlat_in=sg_params_in[2], sf_out=sg_params_out[0], tlon_out=sg_params_out[1], - tlat_out=sg_params_out[2]) + tlat_out=sg_params_out[2], + weightsdir=weightsdir + ) + # Save temporary output face files to minimize RAM usage - oface_files = [os.path.join('.',fout+str(x)) for x in range(6)] + oface_files = [os.path.join(".",fileout+str(x)) for x in range(6)] + # For each output face, sum regridded input faces - oface_datasets = [] for oface in range(6): oface_regridded = [] - for iface, regridder in regridders[oface].items(): - ds_iface = ds_in.isel(F=iface) - if 'F' in ds_iface.coords: - ds_iface = ds_iface.drop('F') - oface_regridded.append(regridder(ds_iface, keep_attrs=True)) + for (iface, regridder) in regridders[oface].items(): + dset_iface = dset.isel(F=iface) + if "F" in dset_iface.coords: + dset_iface = dset_iface.drop("F") + oface_regridded.append( + regridder( + dset_iface, + keep_attrs=True + ) + ) oface_regridded = xr.concat( oface_regridded, - dim='intersecting_ifaces').sum( - 'intersecting_ifaces', - keep_attrs=True).expand_dims({'F':[oface]}) + dim="intersecting_ifaces" + ).sum( + "intersecting_ifaces", + keep_attrs=True).expand_dims({"F":[oface]}) oface_regridded.to_netcdf( oface_files[oface], - format='NETCDF4_CLASSIC' + format="NETCDF4", + engine="netcdf4", + mode="w" ) - ds_out=xr.open_mfdataset(oface_files, combine='by_coords', concat_dim='F',engine='netcdf4') + + # Combine face files + dset = xr.open_mfdataset( + oface_files, + combine="nested", + concat_dim="F", + engine="netcdf4" + ) + + # Remove any temporary files + for oface in oface_files: + os.remove(oface) + + # ============================================================== + # Reshape the data if necessary + # ============================================================== + # Put regridded dataset back into a familiar format - ds_out = ds_out.rename({ - 'y': 'Y', - 'x': 'X', + dset = dset.rename({ + "y": "Y", + "x": "X", }) - # lat, lon are from xESMF which we don't want - ds_out = ds_out.drop(['lat', 'lon']) - elif dim_format_in == 'classic' and dim_format_out != 'classic': - # LL to SG/CS - llres_in = get_input_res(ds_in)[0] - # make regridders + # Reformat dimensions from "common dimension format" + # to CS/GG "checkpoint" or "diagnostics" format + dset = reformat_dims( + dset, + format=dim_format_out, + towards_common=False + ) + + # Rename variables if we are going between different grid types + if "checkpoint" in dim_format_in and "diagnostic" in dim_format_out: + dset = rename_restart_variables( + dset, + towards_gchp=False + ) + if "diagnostic" in dim_format_in and "checkpoint" in dim_format_out: + dset = rename_restart_variables( + dset, + towards_gchp=True + ) + + # Fix names and attributes of of coordinate variables depending + # on the format of the ouptut grid (checkpoint or diagnostic). + dset = adjust_cssg_grid_and_coords( + dset, + dim_format_in, + dim_format_out + ) + + # Save stretched-grid metadata as global attrs + dset = save_cssg_metadata( + dset, + cs_res_out, + dim_format_out, + sg_params_out, + verbose=verbose + ) + + return dset + + +def regrid_cssg_to_ll( + dset, + cs_res_in, + dim_format_in, + sg_params_in, + ll_res_out, + verbose=False, + weightsdir="." +): + """ + Regrids from the cubed-sphere/stretched grid to the lat-lon grid. + + Args: + ----- + dset : xarray.Dataset + Data on a cubed-sphere/stretched grid + cs_res_in : int + Cubed-sphere grid resolution + dim_format_in : str + Input grid format ("checkpoint", "diagnostic") + sg_params_in: list[float, float, float] + Input grid stretching parameters + [stretch-factor, target longitude, target latitude]. + ll_res_out : str + Output grid lat/lon resolution (e.g. "4x5") + + Keyword Args (optional): + ------------------------ + verbose: bool + Toggles verbose printout on (True) or off (False) + weightsdir : str + Path to the directory containing regridding weights (or + where weights will be created). Default value: "." + + Returns: + -------- + dset : xarray.Dataset + Data regridded to the output lat-lon grid + """ + if verbose: + print("file_regrid.py: Regridding from CS/SG to LL") + + with xr.set_options(keep_attrs=True): + + # Flip vertical levels (if necessary) and + # set the lev:positive attribute accordingly + dset = flip_lev_coord_if_necessary( + dset, + dim_format_in=dim_format_in, + dim_format_out="classic" + ) + + # Drop non-regriddable variables (if any) + dset = drop_classic_vars( + dset, + towards_gchp=False + ) + + # Change CS/SG dimensions to universal format + # and drop non-regriddable variables + dset, cs_res_in = prepare_cssg_input_grid( + dset, + dim_format_in + ) + + # Regrid data + regridders = make_regridder_C2L( + cs_res_in, + ll_res_out, + sg_params=sg_params_in, + weightsdir=weightsdir + ) + dset = xr.concat( + [regridders[face](dset.isel(F=face), keep_attrs=True) + for face in range(6)], + dim="F" + ).sum("F", keep_attrs=True) + + # Update dimensions and attributes on the lat-lon grid + dset = dset.rename({ + "T": "time", + "Z": "lev" + }) + + # If regridding from a GCHP checkpoint/restart file, then + # rename variables to adhere GCClassic name conventions. + if "checkpoint" in dim_format_in: + dset = rename_restart_variables( + dset, + towards_gchp=False + ) + + # Save lat/lon coordinate metadata + dset = save_ll_metadata( + dset, + verbose=verbose + ) + + # Drop cubed-sphere variables + if "lons" in dset.data_vars: + dset = dset.drop_vars(["lons"]) + if "lats" in dset.data_vars: + dset = dset.drop_vars(["lats"]) + + return dset + + +def regrid_ll_to_cssg( + dset, + cs_res_out, + dim_format_out, + sg_params_out, + verbose=False, + weightsdir="." +): + """ + Regrids from the lat-lon grid to the cubed-sphere/stretched grid. + + Args: + ----- + dset : xarray.Dataset + Data on a lat/lon grid + cs_res_in : int + Cubed-sphere grid resolution + dim_format_out : str + Either "checkpoint" (for restart files) or + "diagnostic" (for History diagnostic files). + sg_params_out: list[float, float, float] + Output grid stretching parameters + [stretch-factor, target longitude, target latitude]. + + Keyword Args (optional): + ------------------------ + verbose : bool + Toggles verbose output on (True) or off (False). + weightsdir : str + Path to the directory containing regridding weights (or + where weights will be created). Default value: "." + + Returns: + -------- + dset : xarray.Dataset + Data regridded to the output cubed-sphere/stretched-grid + """ + if verbose: + print("file_regrid.py: Regridding from LL to CS/SG") + + with xr.set_options(keep_attrs=True): + + # Flip vertical levels (if necessary) and set lev:positive + dset = flip_lev_coord_if_necessary( + dset, + dim_format_in="classic", + dim_format_out=dim_format_out + ) + + # Drop non- regriddable variables when going from ll -> cs + dset = drop_classic_vars(dset) + + # If regridding to a GCHP checkpoint/restart file, then + # rename variables to adhere to GCHP naming conventions. + if "checkpoint" in dim_format_out: + dset = rename_restart_variables( + dset, + towards_gchp=True + ) + + # Input lat/lon grid resolution + llres_in = get_input_res(dset)[0] + + # Regrid data to CS/SG regridders = make_regridder_L2S( - llres_in, cs_res_out, sg_params=sg_params_out) - ds_out = xr.concat([regridders[face](ds_in, keep_attrs=True) - for face in range(6)], dim='nf') - # flip vertical - ds_out = ds_out.reindex(lev=ds_out.lev[::-1]) - ds_out = ds_out.rename({ - 'y': 'Ydim', - 'x': 'Xdim', + llres_in, + cs_res_out, + sg_params=sg_params_out, + weightsdir=weightsdir + ) + dset = xr.concat( + [regridders[face](dset, keep_attrs=True) for face in range(6)], + dim="nf" + ) + + # Rename dimensions to the "common dimension format" + dset = dset.rename({ + "time": "T", + "lev": "Z", + "nf": "F", + "y": "Y", + "x": "X", + "lat": "Y", + "lon": "X" }) - # lat, lon are from xESMF which we don't want - ds_out = ds_out.drop(['lat', 'lon']) - - if dim_format_out == 'checkpoint': - # convert to checkpoint format - ds_out = reshape_MAPL_CS(ds_out) - mi = pd.MultiIndex.from_product([ - np.linspace(1, 6, 6), - np.linspace(1, cs_res_out, cs_res_out) - ]) - ds_out = ds_out.assign_coords({'lat': mi}) - ds_out = ds_out.unstack('lat') - - ds_out = ds_out.stack(lat=['lat_level_0', 'lat_level_1']) - ds_out = ds_out.assign_coords({ - 'lat': np.linspace(1, 6 * cs_res_out, 6 * cs_res_out), - 'lon': np.linspace(1, ds_out.lon.size, ds_out.lon.size), - 'lev': np.linspace(ds_out.lev.size, 1, ds_out.lev.size) - }) - ds_out = ds_out.transpose('time', 'lev', 'lat', 'lon') - else: - # convert to diagnostic format - ds_out = ds_out.transpose('time', 'lev', 'nf', 'Ydim', 'Xdim') - ds_out = ds_out.assign_coords({ - 'nf': np.linspace(1, 6, 6), - 'lev': np.linspace(1, 72, 72)}) - print( - 'WARNING: xarray coordinates are not fully implemented for diagnostic format') - - elif dim_format_in != 'classic' and dim_format_out == 'classic': - # SG/CS to LL - regridders = make_regridder_C2L( - cs_res_in, ll_res_out, sg_params=sg_params_in) - ds_out = xr.concat( - [regridders[face](ds_in.isel(F=face), - keep_attrs=True) for face in range(6)], - dim='F').sum( - 'F', keep_attrs=True) - ds_out = ds_out.rename({ - 'T': 'time', - 'Z': 'lev'}) - ds_out = drop_and_rename_classic_vars(ds_out, towards_gchp=False) - ds_out = ds_out.reindex(lev=ds_out.lev[::-1]) - _, lev_coords, _ = get_vert_grid(ds_out, *vert_params_out) - ds_out = ds_out.assign_coords({'lev': lev_coords}) - ds_out['lat'].attrs = {'long_name': 'Latitude', - 'units': 'degrees_north', - 'axis': 'Y'} - ds_out['lon'].attrs = {'long_name': 'Longitude', - 'units': 'degrees_east', - 'axis': 'X'} - elif dim_format_in == 'classic' and dim_format_out == 'classic': - # ll to ll - in_extent = get_grid_extents(ds_in) + + # Reformat dims from "common dimension format" to "diagnostic" + # (we will convert to "checkpoint" later) + dset = reformat_dims( + dset, + format="diagnostic", + towards_common=False + ) + + # Fix names and attributes of of coordinate variables depending + # on the format of the ouptut grid (checkpoint or diagnostic). + # Also convert the "diagnostic" grid to the "checkpoint" grid + # if "checkpoint" output are requested. + dset = adjust_cssg_grid_and_coords( + dset, + dim_format_in="diagnostic", + dim_format_out=dim_format_out + ) + + # Save stretched-grid metadata as global attrs + dset = save_cssg_metadata( + dset, + cs_res_out, + dim_format_out, + sg_params_out, + verbose=verbose + ) + + return dset + + +def regrid_ll_to_ll( + dset, + ll_res_out, + verbose=False, + weightsdir="." +): + """ + Regrid from the lat/lon grid to the cubed-sphere/stretched grid. + + Args: + ----- + dset : xarray.Dataset + Data on a lat/lon grid + ll_res_out : str + Output grid lat-lon grid resolution (e.g. "4x5") + + Keyword Args (optional): + ------------------------ + verbose : bool + Toggles verbose output on (True) or off (False). + weightsdir : str + Path to the directory containing regridding weights (or + where weights will be created). Default value: "." + + Returns: + -------- + dset : xarray.Dataset + Data regridded to the output lat-lon grid. + """ + if verbose: + print("file_regrid.py: Regridding from LL to LL") + + with xr.set_options(keep_attrs=True): + + # Get the input & output extents + in_extent = get_grid_extents(dset) out_extent = in_extent - ll_res_in = get_input_res(ds_in)[0] - [lat_in, lon_in] = list(map(float, ll_res_in.split('x'))) - [lat_out, lon_out] = list(map(float, ll_res_out.split('x'))) + ll_res_in = get_input_res(dset)[0] + [lat_in, lon_in] = list(map(float, ll_res_in.split("x"))) + [lat_out, lon_out] = list(map(float, ll_res_out.split("x"))) + # Return if the output & input grids are the same if lat_in == lat_out and lon_in == lon_out: - print('Skipping regridding since grid parameters are identical') - ds_out = ds_in - else: - lon_attrs = ds_in.lon.attrs - lat_attrs = ds_in.lat.attrs - # drop non-regriddable variables - non_fields = [v for v in ds_in.variables.keys( - ) if 'lat' not in ds_in[v].dims and 'lon' not in ds_in[v].dims] - non_fields_ds = ds_in[non_fields] - ds_in = ds_in.drop(non_fields) - - regridder = make_regridder_L2L( - ll_res_in, - ll_res_out, - reuse_weights=True, - in_extent=in_extent, - out_extent=out_extent) - ds_out = regridder(ds_in, keep_attrs=True) - ds_out = ds_out.merge(non_fields_ds) - ds_out['lon'].attrs = lon_attrs - ds_out['lat'].attrs = lat_attrs - ds_out = ds_out.transpose('time', 'lev', 'ilev', 'lat', 'lon') - - if dim_format_in != 'classic' and dim_format_out != 'classic': - # Reformat dimensions to desired output format - ds_out = reformat_dims( - ds_out, - format=dim_format_out, - towards_common=False) - if dim_format_out != 'classic': - # Store stretched-grid parameters as metadata - ds_out.attrs['stretch_factor'] = sg_params_out[0] - ds_out.attrs['target_longitude'] = sg_params_out[1] - ds_out.attrs['target_latitude'] = sg_params_out[2] - ds_out.attrs['cs_res'] = cs_res_out - - ds_out = ds_out.assign_coords({'time': time}) - # correct precision changes (accidental 32-bit to 64-bit) - # save attributes (no longer needed in xarray >=0.16.1) - attrs = ds_out.attrs - data_attrs = {var : ds_out[str(var)].attrs for var in list(ds_out.variables)} - ds_out = ds_out.astype(original_dtype) - for var in list(ds_out.variables): - ds_out[str(var)].attrs = data_attrs[var] - ds_out.attrs = attrs - # Write dataset - ds_out.to_netcdf( - fout, - format='NETCDF4_CLASSIC' - ) - # Print the resulting dataset - print(ds_out) - # Remove any temporary files - for f in oface_files: os.remove(f) + print("Skipping regridding since grid parameters are identical") + return dset + + # Drop non-regriddable variables + non_fields = [ + var for var in dset.variables.keys() \ + if "lat" not in dset[var].dims \ + and "lon" not in dset[var].dims + ] + dset = dset.drop(["lat_bnds", "lon_bnds"]) + non_fields = dset[non_fields] + dset = dset.drop(non_fields) + + # Set the lev:positive attribute accordingly + dset = flip_lev_coord_if_necessary( + dset, + dim_format_in="classic", + dim_format_out="classic" + ) + + # Create the regridder and regrid the data + regridder = make_regridder_L2L( + ll_res_in, + ll_res_out, + reuse_weights=True, + in_extent=in_extent, + out_extent=out_extent, + weightsdir=weightsdir + ) + dset = regridder( + dset, + keep_attrs=True + ) + + # Add the non-regriddable fields back + dset = dset.merge(non_fields) + + # Change order of dimensions + dset = dset.transpose( + "time", "lev", "ilev", "lat", "lon", ... + ) + # Save lat/lon coordinate metadata + dset = save_ll_metadata( + dset, + verbose=verbose + ) -def rename_restart_variables(ds, towards_gchp=True): + return dset + + +def flip_lev_coord_if_necessary( + dset, + dim_format_in, + dim_format_out +): """ - Renames restart variables according to GEOS-Chem Classic and GCHP conventions. + Flips the "lev" and "ilev" coords of an xarray.Dataset in the + vertical depending on the values of dim_format_in and + dim_format_out. Also sets the attributes "lev:positive" and + "ilev:positive" accordingly. Args: - ds: xarray.Dataset - The input dataset + ----- + dset : xarray.Dataset + The input dataset. + dim_format_in : str + Input grid format ("classic", "checkpoint", "diagnostic"). + dim_format_out : str + Output grid format ("classic", "checkpoint", "diagnostic"). - Keyword Args (optional): - towards_gchp: bool - Whether renaming to (True) or from (False) GCHP format - Default value: True + Args: + ----- + dset : xarray.Dataset + The modified dataset. + + Remarks: + -------- + (1) classic : lev is in ascending order (lev:positive="up" ) + (2) diagnostic : lev is in ascending order* (lev:positive="up" ) + (3) checkpoint : lev is in descending order (lev:positive="down") + + *Except for the Emissions collection, which has lev arranged + in descending order. + + TODO: Make ths function more robust for all cases, since GCHP + diagnostics may or may not have lev:positive="up". + """ + verify_variable_type(dset, xr.Dataset) + verify_variable_type(dim_format_in, str) + verify_variable_type(dim_format_out, str) + + # ================================================================== + # Case 1: checkpoint/diagnostic to classic + # lev, ilev need to be in ascending order + # ================================================================== + if dim_format_in != "classic" and dim_format_out == "classic": + + # Flip lev and set to eta values at midpoints (if necessary) + if "ilev" in dset.coords: + if is_gchp_lev_positive_down(dset): + dset = dset.reindex(ilev=dset.ilev[::-1]) + coord = get_ilev_coord( + n_lev=dset.dims["ilev"], + top_down=False + ) + dset = dset.assign_coords({"ilev": coord}) + dset.ilev.attrs["positive"] = "up" + + # Flip lev and set to eta values at midpoints (if necessary) + if "lev" in dset.coords: + if is_gchp_lev_positive_down(dset): + dset = dset.reindex(lev=dset.lev[::-1]) + coord = get_lev_coord( + n_lev=dset.dims["lev"], + top_down=False + ) + dset = dset.assign_coords({"lev": coord}) + dset.lev.attrs["positive"] = "up" + + return dset + + # ================================================================== + # Case 2: classic/diagnostic to checkpoint + # lev needs to be in descending order (with ascending indices) + # + # TODO: Check for Emissions diagnostic (not a common use case) + # ================================================================== + if dim_format_in != "checkpoint" and dim_format_out == "checkpoint": + + if "lev" in dset.coords: + if not is_gchp_lev_positive_down(dset): + dset = dset.reindex(lev=dset.lev[::-1]) + coord = get_lev_coord( + n_lev=dset.dims["lev"], + gchp_indices=True + ) + dset = dset.assign_coords({"lev": coord}) + dset.lev.attrs["positive"] = "down" + + return dset + + # ================================================================== + # Case 3: classic/checkpoint to diagnostic: + # lev, ilev need to be in ascending order + # + # TODO: Check for Emissions diagnostic (not a common use case) + # ================================================================== + if dim_format_in != "diagnostic" and dim_format_out == "diagnostic": + + if "lev" in dset.coords: + if is_gchp_lev_positive_down(dset): + dset = dset.reindex(lev=dset.lev[::-1]) + coord = get_lev_coord( + n_lev=dset.dims["lev"], + gchp_indices=True + ) + dset = dset.assign_coords({"lev": coord}) + dset.lev.attrs["positive"] = "up" + + return dset + + # ================================================================== + # Case 4: checkpoint to checkpoint + # No flipping needed, but add lev:positive="down" + # ================================================================== + if dim_format_in == "checkpoint" and dim_format_out == "checkpoint": + + if "lev" in dset.coords: + dset.lev.attrs["positive"] = "down" + return dset + + return dset + + +def save_ll_metadata( + dset, + verbose=False, +): + """ + Updates the lat-lon coordinate metadata in an xarray.Dataset object. + + Args: + ----- + dset : xarray.Dataset + The input data (on lat-lon grid). + + Keyword Arguments: + ------------------ + verbose : bool + Toggles verbose printout on (True) or off (False) Returns: - xarray.Dataset - Input dataset with variables renamed + -------- + dset : xarray.Dataset + Original data plus updated coordinate metadata. """ + with xr.set_options(keep_attrs=True): - if towards_gchp: - old_str = 'SpeciesRst' - new_str = 'SPC' - else: - old_str = 'SPC' - new_str = 'SpeciesRst' - return ds.rename({name: name.replace(old_str, new_str, 1) - for name in list(ds.data_vars) - if name.startswith(old_str)}) + dset.time.attrs = { + "axis": "T" + } + + dset.lat.attrs = { + "long_name": "Latitude", + "units": "degrees_north", + "axis": "Y" + } + + dset.lon.attrs = { + "long_name": "Longitude", + "units": "degrees_east", + "axis": "X" + } + + if "ilev" in dset.coords: + dset.ilev.attrs["long_name"] = \ + "hybrid level at interfaces ((A/P0)+B)" + dset.ilev.attrs["units"] = "level" + dset.ilev.attrs["axis"] = "Z" + + if "lev" in dset.coords: + dset.lev.attrs["long_name"] = \ + "hybrid level at midpoints ((A/P0)+B)" + dset.lev.attrs["units"] = "level" + dset.lev.attrs["axis"] = "Z" + + if verbose: + print("file_regrid.py: In routine save_ll_metadata:") + print(dset.coords) + + return dset + + +def save_cssg_metadata( + dset, + cs_res_out, + dim_format_out, + sg_params_out, + verbose=False +): + """ + Saves the stretched-grid metadata to an xarray.Dataset object + containing cubed-sphere/stretched grid data. + + Args: + ----- + dset : xarray.Dataset + Data on the stretched grid. + cs_res_out : int + Cubed-sphere grid resolution. + dim_format_out : str + Either "checkpoint" (for restart files) or + "diagnostic" (for History diagnostic files). + sg_params_out: list[float, float, float] + Output grid stretching parameters + [stretch-factor, target longitude, target latitude]. + verbose : bool + Toggles verbose printout on (True) or off (False). + + Returns: + -------- + dset : xarray.Dataset + The original data, plus stretched grid metadata. + """ + if verbose: + print("file_regrid.py: Saving CS/SG coordinate metadata") + + with xr.set_options(keep_attrs=True): + # Stretched-grid global attrs + dset.attrs["stretch_factor"] = sg_params_out[0] + dset.attrs["target_longitude"] = sg_params_out[1] + dset.attrs["target_latitude"] = sg_params_out[2] + dset.attrs["cs_res"] = cs_res_out -def drop_and_rename_classic_vars(ds, towards_gchp=True): + # Special handling for "checkpoint" format + if "checkpoint" in dim_format_out: + if "lon" in dset.dims: + dset.lon.attrs = { + "standard_name": "longitude", + "long_name": "Longitude", + "units": "degrees_east", + "axis": "X" + } + if "lat" in dset.dims: + dset.lat.attrs = { + "standard_name": "latitude", + "long_name": "Latitude", + "units": "degrees_north", + "axis": "Y" + } + + # Special handling for "checkpoint" format + if "diagnostic" in dim_format_out: + if "lons" in dset.dims: + dset.lons.attrs = { + "standard_name": "longitude", + "long_name": "Longitude", + "units": "degrees_east", + "axis": "X" + } + if "lats" in dset.dims: + dset.lats.attrs = { + "standard_name": "la7titude", + "long_name": "Latitude", + "units": "degrees_north", + "axis": "Y" + } + + # ilev:positive is set by flip_lev_coord_if_necessary + if "ilev" in dset.coords: + dset.ilev.attrs["long_name"] = \ + "hybrid level at interfaces ((A/P0)+B)" + dset.ilev.attrs["units"] = "level" + dset.ilev.attrs["axis"] = "Z" + + # lev:positive is set by flip_lev_coord_if_necessary + if "lev" in dset.coords: + dset.lev.attrs["long_name"] = \ + "hybrid level at midpoints ((A/P0)+B)" + dset.lev.attrs["units"] = "level" + dset.lev.attrs["axis"] = "Z" + + return dset + + +def rename_restart_variables( + dset, + towards_gchp=True +): """ - Renames and drops certain restart variables according to GEOS-Chem Classic + Renames restart variables according to GEOS-Chem Classic and GCHP conventions. Args: - ds: xarray.Dataset - The input dataset + ----- + dset : xarray.Dataset + The input dataset. + + Keyword Args (optional): + ------------------------ + towards_gchp: bool + Whether renaming to (True) or from (False) GCHP format + Default value: True + + Returns: + -------- + dset : xarray.Dataset + The modified dataset. + """ + verify_variable_type(dset, xr.Dataset) + + # Keep all xarray attribute settings + with xr.set_options(keep_attrs=True): + + # Dictionary for name replacements + old_to_new = {} + + # ============================================================== + # classic/diagnostic -> checkpoint + # ============================================================== + if towards_gchp: + for var in dset.data_vars.keys(): + if "Met_DELPDRY" in var: + old_to_new[var] = "DELP_DRY" + if var.startswith("Met_"): + old_to_new[var] = var.replace("Met_", "") + if var.startswith("Chem_"): + old_to_new[var] = var.replace("Chem_", "") + if var.startswith("SpeciesRst_"): + old_to_new[var] = var.replace("SpeciesRst_", "SPC_") + if var.startswith("SpeciesConcVV_"): + old_to_new[var] = var.replace("SpeciesConcVV_", "SPC_") + + return dset.rename(old_to_new) + + # ============================================================== + # checkpoint -> classic/diagnostic + # ============================================================== + for var in dset.data_vars.keys(): + if var == "DELP_DRY": + old_to_new[var] = "Met_DELPDRY" + if var == "BXHEIGHT": + old_to_new[var] = "Met_BXHEIGHT" + if var == "StatePSC": + old_to_new[var] = "Chem_StatePSC" + if var == "KPPHvalue": + old_to_new[var] = "Chem_KPPHvalue" + if var == "DryDepNitrogen": + old_to_new[var] = "ChemDryDepNitrogen" + if var == "WetDepNitrogen": + old_to_new[var] = "Chem_WetDepNitrogen" + if var == "SO2AfterChem": + old_to_new[var] = "Chem_SO2AfterChem" + if var == "JNO2": + old_to_new[var] = "Chem_JNO2" + if var == "JOH": + old_to_new[var] = "Chem_JOH" + if var == "H2O2AfterChem": + old_to_new[var] = "Chem_H2O2AfterChem" + if var == "ORVCsesq": + old_to_new[var] = "Chem_ORVCsesq" + if var == "AeroH2OSNA": + old_to_new[var] = "Chem_AeroH2OSNA" + if var.startswith("SPC_"): + old_to_new[var] = var.replace("SPC_", "SpeciesRst_") + + return dset.rename(old_to_new) + + +def adjust_cssg_grid_and_coords( + dset, + dim_format_in, + dim_format_out, +): + """ + Adjusts cubed-sphere/stretched-grid coordinate names and attributes. + + Args: + ----- + dset : xarray.Dataset + The input data + dim_format_in, dim_format_out: str + Either "checkpoint" (for checkpoint/restart files) or + "diagnostic" (for History diagnostic files). + + Returns: + -------- + dset : xarray.Dataset + The input data with updated coordinate names & attributes. + + Remarks: + -------- + "diagnostic" dimension format: (time, lev, nf, Ydim, Xdim) + "checkpoint" dimension format: (time, lev, lat, lon); lat = 6*lon + """ + # Keep all xarray attributes intact + with xr.set_options(keep_attrs=True): + + # ============================================================== + # Rename coordinates returned by the xESMF regridding to + # the "lons" and "lats" coordinates as saved out by MAPL. + # ============================================================== + if "diagnostic" in dim_format_in: + if "Xdim" in dset.variables: + dset = dset.rename_vars({"Xdim": "lons"}) + if "Ydim" in dset.variables: + dset = dset.rename_vars({"Ydim": "lats"}) + + if "checkpoint" in dim_format_in: + if "lon" in dset.variables: + dset = dset.rename_vars({"lon": "lons"}) + if "lat" in dset.variables: + dset = dset.rename_vars({"lat": "lats"}) + + if "lons" in dset.variables: + dset.lons.attrs = { + "standard_name": "longitude", + "long_name": "Longitude", + "units": "degrees_east" + } + + if "lats" in dset.variables: + dset.lats.attrs = { + "standard_name": "latitude", + "long_name": "latitude", + "units": "degrees_north" + } + + # ================================================================== + # For "diagnostic" dimension format only + # ================================================================== + if "diagnostic" in dim_format_out: + + # Add "fake" Xdim and Ydim coordinates as done by MAPL, + # which is needed for the GMAO GrADS visualization software. + # NOTE: Use .values to convert to numpy.ndarray type in + # order to avoid xarray from trying to redefileine dim "nf". + if "lons" in dset.coords and "lats" in dset.coords: + dset = dset.assign_coords({ + "Xdim": dset.lons.isel(nf=0, Ydim=0).values, + "Ydim": dset.lats.isel(nf=0, Xdim=0).values + }) + elif "lon" in dset.variables and "lat" in dset.variables: + dset = dset.assign_coords({ + "Xdim": dset.lon.isel(nf=0, Ydim=0).values, + "Ydim": dset.lat.isel(nf=0, Xdim=0).values + }) + dset.Xdim.attrs = { + "long_name": "Fake Longitude for GrADS Compatibility", + "units": "degrees_east" + } + dset.Ydim.attrs = { + "long_name": "Fake Latitude for GrADS Compatibility", + "units": "degrees_north" + } + + # Drop dimensions that may be left over from regridding + if "lon" in dset.variables: + dset = dset.drop_vars("lon") + if "lat" in dset.variables: + dset = dset.drop_vars("lat") + + # ================================================================== + # For "checkpoint" dimension format only + # ================================================================== + if "checkpoint" in dim_format_out: + + # Reshape the grid from (time, lev, nf, Xdim, Ydim) dimensions + # to (time, lev, lat, lon) dimensions (where lat/lon = 6) + # Also drop any unnecessary variables + dset = reshape_cssg_diag_to_chkpt(dset) + if "lons" in dset.variables: + dset = dset.drop_vars("lons") + if "lats" in dset.variables: + dset = dset.drop_vars("lats") + + return dset + + +def drop_classic_vars( + dset, + towards_gchp=True +): + """ + Renames and drops certain restart variables according to + GEOS-Chem Classic and GCHP conventions. + + Args: + ----- + dset : xarray.Dataset + The input dataset. Keyword Args (optional): - towards_gchp: bool - Whether going to (True) or from (False) GCHP format - Default value: True + ------------------------ + towards_gchp: bool + Whether going to (True) or from (False) GCHP format. + Default value: True Returns: - xarray.Dataset - Input dataset with variables renamed and dropped + -------- + dset : xarray.Dataset + The modified dataset. + """ + with xr.set_options(keep_attrs=True): + if towards_gchp: + dset = dset.drop_vars( + ["P0", + "hyam", + "hybm", + "hyai", + "hybi", + "AREA", + "ilev", + "PS1DRY", + "PS1WET", + "TMPU1", + "SPHU1", + "StatePSC", + "lon_bnds", + "lat_bnds"], + errors="ignore" + ) + + return dset + + +def order_dims_time_lev_lat_lon(dset): """ + Transposes dims of an Dataset to be in (time, lev, lat, lon) order. + This corresponds to Fortran column-major ordering. - if towards_gchp: - ds = ds.rename( - {name: name.replace('Met_', '', 1).replace('Chem_', '', 1) - for name in list(ds.data_vars) - if name.startswith('Met_') or name.startswith('Chem_')}) - if 'DELPDRY' in list(ds.data_vars): ds = ds.rename({'DELPDRY': 'DELP_DRY'}) - ds = ds.drop_vars(['P0', - 'hyam', - 'hybm', - 'hyai', - 'hybi', - 'AREA', - 'ilev', - 'PS1DRY', - 'PS1WET', - 'TMPU1', - 'SPHU1', - 'StatePSC'], - errors='ignore') + + Args: + ----- + dset : xarray.Dataset + The input dataset. + + Returns: + -------- + dset : xarray.Dataset + The modified dataset. + """ + verify_variable_type(dset, xr.Dataset) + + if "lev" in dset.dims and "time" in dset.dims: + dset = dset.transpose("time", "lev", "lat", "lon") + elif "lev" in dset.dims: + dset = dset.transpose("lev", "lat", "lon") + elif "time" in dset.dims: + dset = dset.transpose("time", "lat", "lon") else: - renames = {'DELP_DRY': 'Met_DELPDRY', - 'BXHEIGHT': 'Met_BXHEIGHT', - 'TropLev': 'Met_TropLev', - 'DryDepNitrogen': 'Chem_DryDepNitrogen', - 'WetDepNitrogen': 'Chem_WetDepNitrogen', - 'H2O2AfterChem': 'Chem_H2O2AfterChem', - 'SO2AfterChem': 'Chem_SO2AfterChem', - 'KPPHvalue': 'Chem_KPPHvalue'} - data_vars = list(ds.data_vars) - new_renames = renames.copy() - for key in renames.keys(): - if key not in data_vars: - del(new_renames[key]) - ds = ds.rename(new_renames) - - return rename_restart_variables(ds, towards_gchp=towards_gchp) - - -if __name__ == '__main__': + dset = dset.transpose("lat", "lon") + + return dset + + +def reshape_cssg_diag_to_chkpt( + dset, + verbose=False +): + """ + Reshapes a dataset from diagnostic to checkpoint dimension format. + + Args: + ----- + dset : xarray.Dataset + Dataset with dimensions (time, lev, nf, Xdim, Ydim). + + Keyword Args (optional) + ----------------------- + verbose : bool + Toggles verbose output on (True) or off (False). + + Returns: + -------- + dset : xarray.Dataset + Dataset wtih dimensions (time, lev, lat, lon), where lat/lon=6. + """ + verify_variable_type(dset, xr.Dataset) + + if verbose: + print("file_regrid.py: reshyaping diagnostic to checkpoint") + + # Keep xarray attributes unchanged + with xr.set_options(keep_attrs=True): + + # ============================================================== + # Get the size of the Xdim/YDim or lons/lats coords + # ============================================================== + if "Xdim" in dset.dims and "Ydim" in dset.dims: + xdim = dset.dims["Xdim"] + ydim = dset.dims["Ydim"] + elif "lon" in dset.dims and "lat" in dset.dims: + xdim = dset.dims["lon"] + ydim = dset.dims["lat"] + else: + msg = "Dimensions (Xdim, Ydim) or (lon,lat)' not found!" + raise ValueError(msg) + + # ============================================================== + # Create the "lon" coord as a 1-D vector of values + # ============================================================== + if "Xdim" in dset.dims: + dset = dset.rename_dims({"Xdim": "lon"}) + elif "lon" in dset.coords: + dset = dset.drop_vars("lon") + dset = dset.assign_coords({ + "lon": np.linspace(1, xdim, xdim, dtype=np.float64) + }) + + # ============================================================== + # The dset,stack operation combines the nf and Ydim + # dimensions into a MultiIndex (i.e. a list of tuples, + # where each tuple is (face number, cell number). + # We then have to unpack that into a linear list that + # ranges from 1..nf*ydim. + # ============================================================== + if "nf" in dset.dims and "Ydim" in dset.dims: + dset = dset.stack(lat=("nf", "Ydim")) + multi_index_list = dset.lat.values + lats = np.zeros(6 * ydim) # 6 cubed-sphere faces + for i, tpl in enumerate(multi_index_list): + lats[i] = (tpl[1] + (tpl[0] * ydim)) + 1 + dset = dset.assign_coords({"lat": lats}) + + # ============================================================== + # Transpose dimensions + # ============================================================== + dset = order_dims_time_lev_lat_lon(dset) + + # ============================================================== + # Drop coordinates not needed in checkpoint format files + # ============================================================== + if "lons" in dset.variables: + dset = dset.drop_vars("lons") + if "lats" in dset.variables: + dset = dset.drop_vars("lats") + + return dset + + +def main(): + """ + Main program for file_regrid. Parses command-line arguments and + calls the file_regrid routine. + + Command-line arguments: + ----------------------- + -i, --filein + Input file, contains original data. + + -o --fileout + Output file, contains regridded data. + + --sg-params-in + Input grid stretching parameters (GCHP only). + + --sg-params-out + Output grid stretching parameters (GCHP only). + + --dim-format-in + Format of the input file's dimensions: + ("checkpoint", "diagnostics". "classic") + + --dim-format-out + Format of the output file's dimensions: + ("checkpoint", "diagnostics", "classic") + + --cs_res_out + Cubed-sphere resolution for the output file (e.g 24, 48, 360) + + --ll_res_out + Resolution for the output file in 'latxlon` format + + --verbose + Toggles verbose printout on (True) or off (False). + + -w --weightsdir + Directory where regridding weights are stored (or will be created) + """ + + # Tell parser which arguments to expect parser = argparse.ArgumentParser( - description='General cubed-sphere to cubed-sphere regridder.') - parser.add_argument('-i', '--filein', - metavar='FIN', - type=str, - required=True, - help='input NetCDF file') - parser.add_argument('-o', '--fileout', - metavar='FOUT', - type=str, - required=True, - help='name of output file') + description="General cubed-sphere to cubed-sphere regridder." + ) + parser.add_argument( + "-i", "--filein", + metavar="FILEIN", + type=str, + required=True, + help="input NetCDF file" + ) + parser.add_argument( + "-o", "--fileout", + metavar="FILEOUT", + type=str, + required=True, + help="name of output file" + ) parser.add_argument( - '--sg_params_in', metavar='P', type=float, nargs=3, + "--sg_params_in", + metavar="P", + type=float, + nargs=3, default=[1.0, 170.0, -90.0], - help='input grid stretching parameters (stretch-factor, target longitude, target latitude)') + help="input grid stretching parameters (stretch-factor, target longitude, target latitude)" + ) parser.add_argument( - '--sg_params_out', metavar='P', type=float, nargs=3, + "--sg_params_out", + metavar="P", + type=float, + nargs=3, default=[1.0, 170.0, -90.0], - help='output grid stretching parameters (stretch-factor, target longitude, target latitude)') - parser.add_argument('--cs_res_out', - metavar='RES', - type=int, - required=False, - help='output grid\'s cubed-sphere resolution') + help="output grid stretching parameters (stretch-factor, target longitude, target latitude)" + ) + parser.add_argument( + "--cs_res_out", + metavar="RES", + type=int, + required=False, + help="output grid\"s cubed-sphere resolution" + ) parser.add_argument( - '--ll_res_out', - metavar='RES', + "--ll_res_out", + metavar="RES", type=str, required=False, - help='output grid\'s lat/lon resolution in \'latxlon\' format') + help="output grid\"s lat/lon resolution in \"latxlon\" format" + ) parser.add_argument( - '--dim_format_in', - metavar='WHICH', + "--dim_format_in", + metavar="WHICH", type=str, choices=[ - 'checkpoint', - 'diagnostic', - 'classic'], + "checkpoint", + "diagnostic", + "classic"], required=True, - help='format of the input file\'s dimensions (choose from: checkpoint, diagnostic)') + help="format of the input file's dimensions (choose from: checkpoint, diagnostic)" + ) parser.add_argument( - '--dim_format_out', - metavar='WHICH', + "--dim_format_out", + metavar="WHICH", type=str, choices=[ - 'checkpoint', - 'diagnostic', - 'classic'], + "checkpoint", + "diagnostic", + "classic"], required=True, - help='format of the output file\'s dimensions (choose from: checkpoint, diagnostic)') + help="format of the output file's dimensions (choose from: checkpoint, diagnostic)" + ) parser.add_argument( - '--vert_params_out', - metavar='VERT', - type=list, - required=False, - help='Hybrid grid parameter A in hPa and B (unitless) in [AP, BP] format') - + "--verbose", + metavar="VERB", + type=bool, + default=False, + help="Toggles verbose output on (True) or off (False)" + ) + parser.add_argument( + "-w", "--weightsdir", + metavar="WGT", + type=str, + default=False, + help="Directory where regridding weights are found (or will be created)" + ) args = parser.parse_args() + + # Regrid the file file_regrid( args.filein, args.fileout, @@ -426,4 +1555,11 @@ def drop_and_rename_classic_vars(ds, towards_gchp=True): args.ll_res_out, args.sg_params_in, args.sg_params_out, - args.vert_params_out) + args.verbose, + args.weightsdir + ) + + +# Only call when run as standalone +if __name__ == "__main__": + main() diff --git a/gcpy/grid.py b/gcpy/grid.py index 3a8622d4..d9a51805 100644 --- a/gcpy/grid.py +++ b/gcpy/grid.py @@ -1,11 +1,14 @@ -import numpy as np +""" +Module containing variables and functions that define and +manipulate GEOS-Chem horizontal and vertical grids +""" +from itertools import product import xarray as xr -from numpy import asarray +import numpy as np import scipy.sparse -from itertools import product -from .util import get_shape_of_data +from gcpy.util import get_shape_of_data, verify_variable_type from .grid_stretching_transforms import scs_transform -from .constants import R_EARTH_m +from gcpy.constants import R_EARTH_m def get_troposphere_mask(ds): @@ -166,7 +169,7 @@ def call_make_grid(res, gridtype, in_extent=[-180, 180, -90, 90], Returns: [grid, grid_list]: list(dict, list(dict)) - Returns the created grid. + Returns the created grid. grid_list is a list of grids if gridtype is 'cs', else it is None """ @@ -289,6 +292,126 @@ def get_vert_grid(dataset, AP=[], BP=[]): return new_grid.p_edge(), new_grid.p_mid(), np.size(AP) +def get_ilev_coord( + n_lev=72, + AP_edge=None, + BP_edge=None, + top_down=False, + gchp_indices=False +): + """ + Returns the eta values (defined as (A/P0) + B) at vertical + level edges. These are used to define the "ilev" netCDF + coordinate variable. + + Keyword Args (optional): + ------------------------ + n_lev : int + Number of levels in the grid. Default = 72 + AP_edge : list-like + Hybrid grid parameter A (hPa), with values placed on level + edges. If not specified, values from the _GEOS_72L_AP array + in this module will be used. + AP_edge : list-like + Hybrid grid parameter B (unitless), with values placed on level + edges. If not specified, values from the _GEOS_72L_BP array in + this module will be used. + top_down : bool + Set this to True if the eta coordinate will be arranged from + top-of-atm downward (True) or from the surface upward (False). + gchp_indices : bool + Set this to True to return an array of indices (as is used + in GCHP files). + + Returns: + -------- + ilev : numpy.ndarray + List of eta values at vertical grid edges + """ + if n_lev is None: + n_lev = 72 + + # Return GCHP-style indices for the level dimension + if gchp_indices: + return np.linspace(1, n_lev+1, n_lev+1, dtype=np.float64) + + # Get eta values at vertical level edges + if AP_edge is None and n_lev == 72: + AP_edge = _GEOS_72L_AP + if AP_edge is None and n_lev == 47: + AP_edge = _GEOS_47L_AP + if BP_edge is None and n_lev == 72: + BP_edge = _GEOS_72L_BP + if BP_edge is None and n_lev == 47: + BP_edge = _GEOS_47L_BP + ilev = np.array((AP_edge/1000.0) + BP_edge, dtype=np.float64) + if top_down: + ilev = ilev[::-1] + return ilev + +def get_lev_coord( + n_lev=72, + AP_edge=None, + BP_edge=None, + top_down=False, + gchp_indices=False +): + """ + Returns the eta values (defined as (A/P0) + B) at vertical + level midpoints. These are used to define the "lev" + netCDF coordinate variable. + + Keyword Args (optional): + ------------------------ + n_lev : int + Number of levels in the grid. Default = 72 + AP_edge : list-like + Hybrid grid parameter A (hPa), with values placed on level + edges. If not specified, values from the _GEOS_72L_AP array + in this module will be used. + AP_edge : list-like + Hybrid grid parameter B (unitless), with values placed on level + edges. If not specified, values from the _GEOS_72L_BP array in + this module will be used. + top_down : bool + Set this to true if the eta coordinate will be arranged from + top-of-atm downward (True) or from the surface upward (False). + gchp_indices : bool + Set this to True to return an array of indices (as is used + in GCHP files). + + Returns: + -------- + lev : numpy.ndarray + List of eta values at vertical grid midpoints + """ + if n_lev is None: + n_lev = 72 + + # Return GCHP-style indices for the level dimension + if gchp_indices: + return np.linspace(1, n_lev, n_lev, dtype=np.float64) + + # Compute AP, BP at midpoints. + # Convert inputs to numpy.ndarray for fast computation + if AP_edge is None and n_lev == 72: + AP_edge = _GEOS_72L_AP + if AP_edge is None and n_lev == 47: + AP_edge = _GEOS_47L_AP + if BP_edge is None and n_lev == 72: + BP_edge = _GEOS_72L_BP + if BP_edge is None and n_lev == 47: + BP_edge = _GEOS_47L_BP + AP_edge = np.array(AP_edge) + BP_edge = np.array(BP_edge) + AP_mid = (AP_edge[0:n_lev:1] + AP_edge[1:n_lev+1:1]) * 0.5 + BP_mid = (BP_edge[0:n_lev:1] + BP_edge[1:n_lev+1:1]) * 0.5 + lev = np.array((AP_mid / 1000.0) + BP_mid, dtype=np.float64) + if top_down: + lev = lev[::-1] + return lev + + def get_pressure_indices(pedge, pres_range): """ Get indices where edge pressure values are within a given pressure range @@ -922,8 +1045,8 @@ def calc_rectilinear_grid_area(lon_edge, lat_edge): # Convert from km to m _radius_earth_m = R_EARTH_m - lon_edge = asarray(lon_edge, dtype=float) - lat_edge = asarray(lat_edge, dtype=float) + lon_edge = np.asarray(lon_edge, dtype=float) + lat_edge = np.asarray(lat_edge, dtype=float) n_lon = (lon_edge.size) - 1 n_lat = (lat_edge.size) - 1 @@ -967,7 +1090,7 @@ def calc_delta_lon(lon_edge): n_lon = (lon_edge.size) - 1 - lon_edge = asarray(lon_edge) + lon_edge = np.asarray(lon_edge) # Set up output array lon_delta = np.zeros((n_lon)) @@ -1122,7 +1245,7 @@ def _initialize(self): pp[:, i, j] = latlon_to_cartesian( lambda_rad[i, j], theta_rad[i, j]) - # Map the edges on the sphere back to the cube. + # Map the edges on the sphere back to the cube. #Note that all intersections are at x = -rsq3 # print("EDGES") for ij in range(1, c + 1): diff --git a/gcpy/plot.py b/gcpy/plot.py deleted file mode 100644 index de89ac4d..00000000 --- a/gcpy/plot.py +++ /dev/null @@ -1,3071 +0,0 @@ -""" -Module containing functions for creating plots -""" -import os -import copy -import warnings -from multiprocessing import current_process -from tempfile import TemporaryDirectory -import matplotlib as mpl -import matplotlib.colors as mcolors -import matplotlib.pyplot as plt -import matplotlib.ticker as mticker -import numpy as np -import xarray as xr -import cartopy.crs as ccrs -from matplotlib.backends.backend_pdf import PdfPages -from joblib import Parallel, delayed -from PyPDF2 import PdfFileMerger -from .grid import get_vert_grid, get_pressure_indices, \ - pad_pressure_edges, convert_lev_to_pres, get_grid_extents, call_make_grid, \ - get_input_res -from .regrid import regrid_comparison_data, create_regridders, gen_xmat, \ - regrid_vertical -from .util import reshape_MAPL_CS, get_diff_of_diffs, get_nan_mask, \ - all_zero_or_nan, slice_by_lev_and_time, compare_varnames, read_config_file -from .units import check_units, data_unit_is_mol_per_mol -from .constants import MW_AIR_g - -# Save warnings format to undo overwriting built into PyPDF2 -_warning_format = warnings.showwarning - -# Suppress numpy divide by zero warnings to prevent output spam -np.seterr(divide="ignore", invalid="ignore") - -_current_dir = os.path.dirname(__file__) - -_rgb_WhGrYlRd = np.genfromtxt(_current_dir + '/colormaps/WhGrYlRd.txt', - delimiter=' ') -WhGrYlRd = mcolors.ListedColormap(_rgb_WhGrYlRd / 255.0) - - -def six_plot( - subplot, - all_zero, - all_nan, - plot_val, - grid, - ax, - rowcol, - title, - comap, - unit, - extent, - masked_data, - other_all_nan, - gridtype, - vmins, - vmaxs, - use_cmap_RdBu, - match_cbar, - verbose, - log_color_scale, - pedge=np.full((1, 1), -1), - pedge_ind=np.full((1, 1), -1), - log_yaxis=False, - xtick_positions=[], - xticklabels=[], - plot_type="single_level", - ratio_log=False, - proj=ccrs.PlateCarree(), - ll_plot_func='imshow', - **extra_plot_args -): - """ - Plotting function to be called from compare_single_level or - compare_zonal_mean. Primarily exists to eliminate code redundancy - in the prior listed functions and has not been tested separately. - - Args: - subplot: str - Type of plot to create (ref, dev, absolute difference or - fractional difference) - all_zero: bool - Set this flag to True if the data to be plotted consist only of zeros - all_nan: bool - Set this flag to True if the data to be plotted consist only of NaNs - plot_val: xarray DataArray - Single variable GEOS-Chem output values to plot - grid: dict - Dictionary mapping plot_val to plottable coordinates - ax: matplotlib axes - Axes object to plot information. Will create a new axes - if none is passed. - rowcol: tuple - Subplot position in overall Figure - title: str - Title to print on axes - comap: matplotlib Colormap - Colormap for plotting data values - unit: str - Units of plotted data - extent: tuple (minlon, maxlon, minlat, maxlat) - Describes minimum and maximum latitude and longitude of input data - masked_data: numpy array - Masked area for cubed-sphere plotting - other_all_nan: bool - Set this flag to True if plotting ref/dev and the other of ref/dev - is all nan - gridtype: str - "ll" for lat/lon or "cs" for cubed-sphere - vmins: list of float - list of length 3 of minimum ref value, dev value, and absdiff value - vmaxs: list of float - list of length 3 of maximum ref value, dev value, and absdiff value - use_cmap_RdBu: bool - Set this flag to True to use a blue-white-red colormap - match_cbar: bool - Set this flag to True if you are plotting with the same colorbar - for ref and dev - verbose: bool - Set this flag to True to enable informative printout. - log_color_scale: bool - Set this flag to True to enable log-scale colormapping - - Keyword Args (optional): - pedge: numpy array - Edge pressures of grid cells in data to be plotted - Default value: np.full((1,1), -1) - pedge_ind: numpy array - Indices where edge pressure values are within a given pressure range - Default value: np.full((1,1), -1) - log_yaxis: bool - Set this flag to True to enable log scaling of pressure in zonal - mean plots - Default value: False - xtick_positions: list of float - Locations of lat/lon or lon ticks on plot - Default value: [] - xticklabels: list of str - Labels for lat/lon ticks - Default value: [] - plot_type: str - Type of plot, either "single_level" or "zonal"mean" - Default value: "single_level" - ratio_log: bool - Set this flag to True to enable log scaling for ratio plots - Default value: False - proj: cartopy projection - Projection for plotting data - Default value: ccrs.PlateCarree() - ll_plot_func: str - Function to use for lat/lon single level plotting with possible values - 'imshow' and 'pcolormesh'. imshow is much faster but is slightly - displaced when plotting from dateline to dateline and/or pole to pole. - Default value: 'imshow' - extra_plot_args: various - Any extra keyword arguments are passed through the plotting functions to - be used in calls to pcolormesh() (CS) or imshow() (Lat/Lon). - """ - # Set min and max of the data range - if subplot in ("ref", "dev"): - if all_zero or all_nan: - if subplot == "ref": - [vmin, vmax] = [vmins[0], vmaxs[0]] - else: - [vmin, vmax] = [vmins[1], vmaxs[1]] - elif use_cmap_RdBu: - if subplot == "ref": - if match_cbar and (not other_all_nan): - absmax = max([np.abs(vmins[2]), np.abs(vmaxs[2])]) - else: - absmax = max([np.abs(vmins[0]), np.abs(vmaxs[0])]) - else: - if match_cbar and (not other_all_nan): - absmax = max([np.abs(vmins[2]), np.abs(vmaxs[2])]) - else: - absmax = max([np.abs(vmins[1]), np.abs(vmaxs[1])]) - [vmin, vmax] = [-absmax, absmax] - else: - if subplot == "ref": - if match_cbar and (not other_all_nan): - [vmin, vmax] = [vmins[2], vmaxs[2]] - else: - [vmin, vmax] = [vmins[0], vmaxs[0]] - else: - if match_cbar and (not other_all_nan): - [vmin, vmax] = [vmins[2], vmaxs[2]] - else: - [vmin, vmax] = [vmins[1], vmaxs[1]] - else: - if all_zero: - [vmin, vmax] = [0, 0] - elif all_nan: - [vmin, vmax] = [np.nan, np.nan] - else: - if subplot == "dyn_abs_diff": - # Min and max of abs. diff, excluding NaNs - diffabsmax = max( - [np.abs(np.nanmin(plot_val)), np.abs(np.nanmax(plot_val))] - ) - [vmin, vmax] = [-diffabsmax, diffabsmax] - elif subplot == "res_abs_diff": - [pct5, pct95] = [ - np.percentile(plot_val, 5), - np.percentile(plot_val, 95), - ] - abspctmax = np.max([np.abs(pct5), np.abs(pct95)]) - [vmin, vmax] = [-abspctmax, abspctmax] - elif subplot == "dyn_frac_diff": - fracdiffabsmax = np.max( - [np.abs(np.nanmin(plot_val)), np.abs(np.nanmax(plot_val))] - ) - [vmin, vmax] = [1 / fracdiffabsmax, fracdiffabsmax] - # if vmin > 0.5: - # vmin = 0.5 - # if vmax < 2: - # vmax = 2 - else: - [vmin, vmax] = [0.5, 2] - if verbose: - print(f"Subplot ({rowcol}) vmin, vmax: {vmin}, {vmax}") - - # Normalize colors (put into range [0..1] for matplotlib methods) - if subplot in ("ref", "dev"): - norm = normalize_colors( - vmin, vmax, is_difference=use_cmap_RdBu, - log_color_scale=log_color_scale, ratio_log=ratio_log - ) - elif subplot in ("dyn_abs_diff", "res_abs_diff"): - norm = normalize_colors(vmin, vmax, is_difference=True) - else: - # remove NaNs for compatibility with color normalization - plot_val = get_nan_mask(plot_val) - norm = normalize_colors( - vmin, - vmax, - is_difference=True, - log_color_scale=True, - ratio_log=ratio_log) - # Create plot - plot = single_panel( - plot_val, - ax, - plot_type, - grid, - gridtype, - title, - comap, - norm, - unit, - extent, - masked_data, - use_cmap_RdBu, - log_color_scale, - add_cb=False, - pedge=pedge, - pedge_ind=pedge_ind, - log_yaxis=log_yaxis, - xtick_positions=xtick_positions, - xticklabels=xticklabels, - proj=proj, - ll_plot_func=ll_plot_func, - **extra_plot_args) - - # Define the colorbar for the plot - cb = plt.colorbar( - plot, - ax=ax, - orientation="horizontal", - norm=norm, - pad=0.10) - cb.mappable.set_norm(norm) - if all_zero or all_nan: - if subplot in ("ref", "dev"): - if use_cmap_RdBu: - cb.set_ticks([0.0]) - else: - cb.set_ticks([0.5]) - else: - cb.set_ticks([0.0]) - if all_nan: - cb.set_ticklabels(["Undefined throughout domain"]) - else: - cb.set_ticklabels(["Zero throughout domain"]) - else: - if subplot in ("ref", "dev") and log_color_scale: - cb.formatter = mticker.LogFormatter(base=10) - elif subplot in ("dyn_frac_diff", "res_frac_diff") and np.all(np.isin(plot_val, [1])): - cb.set_ticklabels(["Ref and Dev equal throughout domain"]) - elif subplot in ("dyn_frac_diff", "res_frac_diff"): - if subplot == "dyn_frac_diff" and vmin != 0.5 and vmax != 2.0: - if vmin > 0.1 and vmax < 10: - cb.locator = mticker.MaxNLocator(nbins=4) - cb.formatter = mticker.ScalarFormatter() - else: - cb.formatter = mticker.LogFormatter(base=10) - cb.locator = mticker.LogLocator(base=10, subs='all') - cb.update_ticks() - else: - cb.formatter = mticker.ScalarFormatter() - cb.set_ticks([0.5, 0.75, 1, 1.5, 2.0]) - else: - if (vmax - vmin) < 0.1 or (vmax - vmin) > 100: - cb.locator = mticker.MaxNLocator(nbins=4) - - try: - cb.formatter.set_useOffset(False) - except BaseException: - # not all automatically chosen colorbar formatters properly handle the - # above method - pass - - cb.minorticks_off() - cb.update_ticks() - cb.set_label(unit) - - -def compare_single_level( - refdata, - refstr, - devdata, - devstr, - varlist=None, - ilev=0, - itime=0, - refmet=None, - devmet=None, - weightsdir='.', - pdfname="", - cmpres=None, - match_cbar=True, - normalize_by_area=False, - enforce_units=True, - convert_to_ugm3=False, - flip_ref=False, - flip_dev=False, - use_cmap_RdBu=False, - verbose=False, - log_color_scale=False, - extra_title_txt=None, - extent=[-1000, -1000, -1000, -1000], - n_job=-1, - sigdiff_list=[], - second_ref=None, - second_dev=None, - spcdb_dir=os.path.dirname(__file__), - sg_ref_path='', - sg_dev_path='', - ll_plot_func='imshow', - **extra_plot_args -): - """ - Create single-level 3x2 comparison map plots for variables common - in two xarray Datasets. Optionally save to PDF. - - Args: - refdata: xarray dataset - Dataset used as reference in comparison - refstr: str - String description for reference data to be used in plots - devdata: xarray dataset - Dataset used as development in comparison - devstr: str - String description for development data to be used in plots - - Keyword Args (optional): - varlist: list of strings - List of xarray dataset variable names to make plots for - Default value: None (will compare all common variables) - ilev: integer - Dataset level dimension index using 0-based system. - Indexing is ambiguous when plotting differing vertical grids - Default value: 0 - itime: integer - Dataset time dimension index using 0-based system - Default value: 0 - refmet: xarray dataset - Dataset containing ref meteorology - Default value: None - devmet: xarray dataset - Dataset containing dev meteorology - Default value: None - weightsdir: str - Directory path for storing regridding weights - Default value: None (will create/store weights in - current directory) - pdfname: str - File path to save plots as PDF - Default value: Empty string (will not create PDF) - cmpres: str - String description of grid resolution at which - to compare datasets - Default value: None (will compare at highest resolution - of ref and dev) - match_cbar: bool - Set this flag to True if you wish to use the same colorbar - bounds for the Ref and Dev plots. - Default value: True - normalize_by_area: bool - Set this flag to True if you wish to normalize the Ref and Dev - raw data by grid area. Input ref and dev datasets must include - AREA variable in m2 if normalizing by area. - Default value: False - enforce_units: bool - Set this flag to True to force an error if Ref and Dev - variables have different units. - Default value: True - convert_to_ugm3: bool - Whether to convert data units to ug/m3 for plotting. - Default value: False - flip_ref: bool - Set this flag to True to flip the vertical dimension of - 3D variables in the Ref dataset. - Default value: False - flip_dev: bool - Set this flag to True to flip the vertical dimension of - 3D variables in the Dev dataset. - Default value: False - use_cmap_RdBu: bool - Set this flag to True to use a blue-white-red colormap - for plotting the raw data in both the Ref and Dev datasets. - Default value: False - verbose: bool - Set this flag to True to enable informative printout. - Default value: False - log_color_scale: bool - Set this flag to True to plot data (not diffs) - on a log color scale. - Default value: False - extra_title_txt: str - Specifies extra text (e.g. a date string such as "Jan2016") - for the top-of-plot title. - Default value: None - extent: list - Defines the extent of the region to be plotted in form - [minlon, maxlon, minlat, maxlat]. - Default value plots extent of input grids. - Default value: [-1000, -1000, -1000, -1000] - n_job: int - Defines the number of simultaneous workers for parallel plotting. - Set to 1 to disable parallel plotting. - Value of -1 allows the application to decide. - Default value: -1 - sigdiff_list: list of str - Returns a list of all quantities having significant - differences (where |max(fractional difference)| > 0.1). - Default value: [] - second_ref: xarray Dataset - A dataset of the same model type / grid as refdata, - to be used in diff-of-diffs plotting. - Default value: None - second_dev: xarray Dataset - A dataset of the same model type / grid as devdata, - to be used in diff-of-diffs plotting. - Default value: None - spcdb_dir: str - Directory containing species_database.yml file. - Default value: Path of GCPy code repository - sg_ref_path: str - Path to NetCDF file containing stretched-grid info - (in attributes) for the ref dataset - Default value: '' (will not be read in) - sg_dev_path: str - Path to NetCDF file containing stretched-grid info - (in attributes) for the dev dataset - Default value: '' (will not be read in) - ll_plot_func: str - Function to use for lat/lon single level plotting with possible values - 'imshow' and 'pcolormesh'. imshow is much faster but is slightly displaced - when plotting from dateline to dateline and/or pole to pole. - Default value: 'imshow' - extra_plot_args: various - Any extra keyword arguments are passed through the plotting functions to be used - in calls to pcolormesh() (CS) or imshow() (Lat/Lon). - """ - warnings.showwarning = _warning_format - # Error check arguments - if not isinstance(refdata, xr.Dataset): - raise TypeError("The refdata argument must be an xarray Dataset!") - - if not isinstance(devdata, xr.Dataset): - raise TypeError("The devdata argument must be an xarray Dataset!") - - # Determine if doing diff-of-diffs - if second_ref is not None and second_dev is not None: - diff_of_diffs = True - else: - diff_of_diffs = False - - # Prepare diff-of-diffs datasets if needed - if diff_of_diffs: - refdata, devdata = refdata.load(), devdata.load() - second_ref, second_dev = second_ref.load(), second_dev.load() - -# # If needed, use fake time dim in case dates are different in datasets. -# # This needs more work for case of single versus multiple times. -# aligned_time = [np.datetime64('2000-01-01')] * refdata.dims['time'] -# refdata = refdata.assign_coords({'time': aligned_time}) -# devdata = devdata.assign_coords({'time': aligned_time}) -# second_ref = second_ref.assign_coords({'time': aligned_time}) -# second_dev = second_dev.assign_coords({'time': aligned_time}) - - refdata, fracrefdata = get_diff_of_diffs(refdata, second_ref) - devdata, fracdevdata = get_diff_of_diffs(devdata, second_dev) - frac_refstr = 'GCC_dev / GCC_ref' - frac_devstr = 'GCHP_dev / GCHP_ref' - # If no varlist is passed, plot all (surface only for 3D) - if varlist is None: - quiet = not verbose - vardict = compare_varnames(refdata, devdata, quiet=quiet) - varlist = vardict["commonvars3D"] + vardict["commonvars2D"] - print("Plotting all common variables") - n_var = len(varlist) - - # If no PDF name passed, then do not save to PDF - savepdf = True - if pdfname == "": - savepdf = False - if convert_to_ugm3: - properties = read_config_file( - os.path.join( - spcdb_dir, - "species_database.yml" - ), - quiet=True - ) - - sg_ref_params = [1, 170, -90] - sg_dev_params = [1, 170, -90] - # Get stretched-grid info if passed - if sg_ref_path != '': - sg_ref_attrs = xr.open_dataset(sg_ref_path).attrs - sg_ref_params = [ - sg_ref_attrs['stretch_factor'], - sg_ref_attrs['target_longitude'], - sg_ref_attrs['target_latitude']] - - if sg_dev_path != '': - sg_dev_attrs = xr.open_dataset(sg_dev_path).attrs - sg_dev_params = [ - sg_dev_attrs['stretch_factor'], - sg_dev_attrs['target_longitude'], - sg_dev_attrs['target_latitude']] - - # Get grid info and regrid if necessary - [refres, refgridtype, devres, devgridtype, cmpres, cmpgridtype, regridref, - regriddev, regridany, refgrid, devgrid, cmpgrid, refregridder, - devregridder, refregridder_list, devregridder_list] = create_regridders( - refdata, - devdata, - weightsdir, - cmpres=cmpres, - sg_ref_params=sg_ref_params, - sg_dev_params=sg_dev_params - ) - - # ============================================================== - # Handle grid extents for lat-lon grids - # ============================================================== - - # Get lat/lon extents, if applicable - refminlon, refmaxlon, refminlat, refmaxlat = get_grid_extents(refgrid) - devminlon, devmaxlon, devminlat, devmaxlat = get_grid_extents(devgrid) - - if -1000 not in extent: - cmpminlon, cmpmaxlon, cmpminlat, cmpmaxlat = extent - else: - # Account for 0-360 coordinate scale - uniform_refminlon, uniform_refmaxlon = refminlon, refmaxlon - uniform_devminlon, uniform_devmaxlon = devminlon, devmaxlon - if uniform_refmaxlon > 185: - uniform_refminlon, uniform_refmaxlon = -180, 180 - if uniform_devmaxlon > 185: - uniform_devminlon, uniform_devmaxlon = -180, 180 - - cmpminlon, cmpmaxlon, cmpminlat, cmpmaxlat = \ - [np.max([(uniform_refminlon+180%360)-180, uniform_devminlon]), - np.min([uniform_refmaxlon, uniform_devmaxlon]), - np.max([refminlat, devminlat]), - np.min([refmaxlat, devmaxlat])] - - # Set plot bounds for non cubed-sphere regridding and plotting - ref_extent = (refminlon, refmaxlon, refminlat, refmaxlat) - dev_extent = (devminlon, devmaxlon, devminlat, devmaxlat) - cmp_extent = (cmpminlon, cmpmaxlon, cmpminlat, cmpmaxlat) - # ============================================================== - # Loop over all variables - # ============================================================== - ds_refs = [None] * n_var - frac_ds_refs = [None] * n_var - ds_devs = [None] * n_var - frac_ds_devs = [None] * n_var - for i in range(n_var): - varname = varlist[i] - # ============================================================== - # Slice the data, allowing for no time dimension (bpch) - # ============================================================== - # Ref - ds_refs[i] = slice_by_lev_and_time( - refdata, - varname, - itime, - ilev, - flip_ref - ) - if diff_of_diffs: - frac_ds_refs[i] = slice_by_lev_and_time( - fracrefdata, - varname, - itime, - ilev, - flip_ref - ) - # Dev - ds_devs[i] = slice_by_lev_and_time( - devdata, - varname, - itime, - ilev, - flip_dev - ) - if diff_of_diffs: - frac_ds_devs[i] = slice_by_lev_and_time( - fracdevdata, - varname, - itime, - ilev, - flip_dev - ) - - # ================================================================== - # Handle units as needed - # ================================================================== - - # Convert to ppb if units string is variation of mol/mol - if data_unit_is_mol_per_mol(ds_refs[i]): - ds_refs[i].values = ds_refs[i].values * 1e9 - ds_refs[i].attrs["units"] = "ppb" - if data_unit_is_mol_per_mol(ds_devs[i]): - ds_devs[i].values = ds_devs[i].values * 1e9 - ds_devs[i].attrs["units"] = "ppb" - - # If units string is ppbv (true for bpch data) then rename units - if ds_refs[i].units.strip() == "ppbv": - ds_refs[i].attrs["units"] = "ppb" - if ds_devs[i].units.strip() == "ppbv": - ds_devs[i].attrs["units"] = "ppb" - - # If units string is W/m2 (may be true for bpch data) then rename units - if ds_refs[i].units.strip() == "W/m2": - ds_refs[i].attrs["units"] = "W m-2" - if ds_devs[i].units.strip() == "W/m2": - ds_devs[i].attrs["units"] = "W m-2" - - # If units string is UNITLESS (may be true for bpch data) then rename - # units - if ds_refs[i].units.strip() == "UNITLESS": - ds_refs[i].attrs["units"] = "1" - if ds_devs[i].units.strip() == "UNITLESS": - ds_devs[i].attrs["units"] = "1" - - # Check that units are the same in ref and dev. Will exit with - # an error if do not match and enforce_units is true (default). - if not check_units(ds_refs[i], ds_devs[i]) and enforce_units: - raise ValueError( - 'Units in ref and dev must match when enforce_units is True') - - # Convert from ppb to ug/m3 if convert_to_ugm3 is passed as true - if convert_to_ugm3: - - # Error checks: must pass met, not normalize by area, and be in ppb - if refmet is None or devmet is None: - msg = "Met mata ust be passed to convert units to ug/m3." - raise ValueError(msg) - if normalize_by_area: - msg = "Normalizing by area is not allowed if plotting ug/m3" - raise ValueError(msg) - if ds_refs[i].units != "ppb" or ds_devs[i].units != "ppb": - msg = "Units must be mol/mol if converting to ug/m3." - raise ValueError(msg) - - # Slice air density data by lev and time - # (assume same format and dimensions as refdata and devdata) - ref_airden = slice_by_lev_and_time( - refmet, - "Met_AIRDEN", - itime, - ilev, - False - ) - dev_airden = slice_by_lev_and_time( - devmet, - "Met_AIRDEN", - itime, - ilev, - False - ) - - # Get a list of properties for the given species - spc_name = varname.replace(varname.split("_")[0] + "_", "") - species_properties = properties.get(spc_name) - - # If no properties are found, then exit with an error. - # Otherwise, get the molecular weight in g/mol. - if species_properties is None: - # Hack lumped species until we implement a solution - if spc_name in ["Simple_SOA", "Complex_SOA"]: - spc_mw_g = 150.0 - else: - msg = f"No properties found for {spc_name}. Cannot convert" \ - + " to ug/m3." - raise ValueError(msg) - else: - spc_mw_g = species_properties.get("MW_g") - if spc_mw_g is None: - msg = f"Molecular weight not found for species {spc_name}!" \ - + " Cannot convert to ug/m3." - raise ValueError(msg) - - # Convert values from ppb to ug/m3: - # ug/m3 = mol/mol * mol/g air * kg/m3 air * 1e3g/kg - # * g/mol spc * 1e6ug/g - # = ppb * air density * (spc MW / air MW) - ds_refs[i].values = ds_refs[i].values * ref_airden.values \ - * (spc_mw_g / MW_AIR_g) - ds_devs[i].values = ds_devs[i].values * dev_airden.values \ - * (spc_mw_g / MW_AIR_g) - - # Update units string - ds_refs[i].attrs["units"] = "\u03BCg/m3" # ug/m3 using mu - ds_devs[i].attrs["units"] = "\u03BCg/m3" - - # ================================================================== - # Get the area variables if normalize_by_area=True. They can be - # either in the main datasets as variable AREA or in the optionally - # passed meteorology datasets as Met_AREAM2. - # ================================================================== - if normalize_by_area: - # ref - if "AREA" in refdata.data_vars.keys(): - ref_area = refdata["AREA"] - elif refmet is not None: - if "Met_AREAM2" in refmet.data_vars.keys(): - ref_area = refmet["Met_AREAM2"] - else: - msg = "normalize_by_area = True but AREA not " \ - + "present in the Ref dataset and ref met with Met_AREAM2" \ - + " not passed!" - raise ValueError(msg) - if "time" in ref_area.dims: - ref_area = ref_area.isel(time=0) - if refgridtype == 'cs': - ref_area = reshape_MAPL_CS(ref_area) - - # dev - if "AREA" in devdata.data_vars.keys(): - dev_area = devdata["AREA"] - elif devmet is not None: - if "Met_AREAM2" in devmet.data_vars.keys(): - dev_area = devmet["Met_AREAM2"] - else: - msg = "normalize_by_area = True but AREA not " \ - + "present in the Dev dataset and dev met with Met_AREAM2" \ - | " not passed!" - raise ValueError(msg) - if "time" in dev_area.dims: - dev_area = dev_area.isel(time=0) - if devgridtype == 'cs': - dev_area = reshape_MAPL_CS(dev_area) - - # Make sure the areas do not have a lev dimension - if "lev" in ref_area.dims: - ref_area = ref_area.isel(lev=0) - if "lev" in dev_area.dims: - dev_area = dev_area.isel(lev=0) - - # ============================================================== - # Reshape cubed sphere data if using MAPL v1.0.0+ - # TODO: update function to expect data in this format - # ============================================================== - - for i in range(n_var): - ds_refs[i] = reshape_MAPL_CS(ds_refs[i]) - ds_devs[i] = reshape_MAPL_CS(ds_devs[i]) - #ds_ref_cmps[i] = reshape_MAPL_CS(ds_ref_cmps[i]) - #ds_dev_cmps[i] = reshape_MAPL_CS(ds_dev_cmps[i]) - if diff_of_diffs: - frac_ds_refs[i] = reshape_MAPL_CS(frac_ds_refs[i]) - frac_ds_devs[i] = reshape_MAPL_CS(frac_ds_devs[i]) - #frac_ds_ref_cmps[i] = reshape_MAPL_CS(frac_ds_ref_cmps[i]) - #frac_ds_dev_cmps[i] = reshape_MAPL_CS(frac_ds_dev_cmps[i]) - - - # ================================================================== - # Create arrays for each variable in Ref and Dev datasets - # and do any necessary horizontal regridding. 'cmp' stands for comparison - # and represents ref and dev data regridded as needed to a common - # grid type and resolution for use in difference and ratio plots. - # ================================================================== - ds_ref_cmps = [None] * n_var - ds_dev_cmps = [None] * n_var - frac_ds_ref_cmps = [None] * n_var - frac_ds_dev_cmps = [None] * n_var - - global_cmp_grid = call_make_grid(cmpres, cmpgridtype)[0] - # Get grid limited to cmp_extent for comparison datasets - # Do not do this for cross-dateline plotting - if cmp_extent[0] < cmp_extent[1]: - regional_cmp_extent = cmp_extent - else: - regional_cmp_extent = [-180, 180, -90, 90] - - regional_cmp_grid = call_make_grid(cmpres, cmpgridtype, - in_extent=[-180,180,-90,90], - out_extent=regional_cmp_extent)[0] - - # Get comparison data extents in same midpoint format as lat-lon grid. - cmp_mid_minlon, cmp_mid_maxlon, cmp_mid_minlat, cmp_mid_maxlat = \ - get_grid_extents(regional_cmp_grid, edges=False) - - cmpminlon_ind = np.where(global_cmp_grid["lon"] >= cmp_mid_minlon)[0][0] - cmpmaxlon_ind = np.where(global_cmp_grid["lon"] <= cmp_mid_maxlon)[0][-1] - cmpminlat_ind = np.where(global_cmp_grid["lat"] >= cmp_mid_minlat)[0][0] - cmpmaxlat_ind = np.where(global_cmp_grid["lat"] <= cmp_mid_maxlat)[0][-1] - - for i in range(n_var): - ds_ref = ds_refs[i] - ds_dev = ds_devs[i] - - # Do area normalization before regridding if normalize_by_area is True. - # Assumes units are the same in ref and dev. If enforce_units is passed - # as false then normalization may not be correct. - if normalize_by_area: - exclude_list = ["WetLossConvFrac", "Prod_", "Loss_"] - if not any(s in varname for s in exclude_list): - ds_ref.values = ds_ref.values / ref_area.values - ds_dev.values = ds_dev.values / dev_area.values - ds_refs[i] = ds_ref - ds_devs[i] = ds_dev - if diff_of_diffs: - frac_ds_refs[i] = frac_ds_refs[i].values / ref_area.values - frac_ds_devs[i] = frac_ds_devs[i].values / dev_area.values - ref_cs_res = refres - dev_cs_res = devres - if cmpgridtype == "cs": - ref_cs_res = cmpres - dev_cs_res = cmpres - # Ref - ds_ref_cmps[i] = regrid_comparison_data( - ds_ref, - ref_cs_res, - regridref, - refregridder, - refregridder_list, - global_cmp_grid, - refgridtype, - cmpgridtype, - cmpminlat_ind, - cmpmaxlat_ind, - cmpminlon_ind, - cmpmaxlon_ind - ) - # Dev - ds_dev_cmps[i] = regrid_comparison_data( - ds_dev, - dev_cs_res, - regriddev, - devregridder, - devregridder_list, - global_cmp_grid, - devgridtype, - cmpgridtype, - cmpminlat_ind, - cmpmaxlat_ind, - cmpminlon_ind, - cmpmaxlon_ind - ) - # Diff of diffs - if diff_of_diffs: - frac_ds_ref_cmps[i] = regrid_comparison_data( - frac_ds_refs[i], - ref_cs_res, - regridref, - refregridder, - refregridder_list, - global_cmp_grid, - refgridtype, - cmpgridtype, - cmpminlat_ind, - cmpmaxlat_ind, - cmpminlon_ind, - cmpmaxlon_ind - ) - frac_ds_dev_cmps[i] = regrid_comparison_data( - frac_ds_devs[i], - dev_cs_res, - regriddev, - devregridder, - devregridder_list, - global_cmp_grid, - devgridtype, - cmpgridtype, - cmpminlat_ind, - cmpmaxlat_ind, - cmpminlon_ind, - cmpmaxlon_ind - ) - # ================================================================= - # Define function to create a single page figure to be called - # in a parallel loop - # ================================================================= - def createfig(ivar, temp_dir=''): - - # Suppress harmless run-time warnings (mostly about underflow) - warnings.filterwarnings('ignore', category=RuntimeWarning) - warnings.filterwarnings('ignore', category=UserWarning) - - if savepdf and verbose: - print(f"{ivar} ", end="") - varname = varlist[ivar] - - ds_ref = ds_refs[ivar] - ds_dev = ds_devs[ivar] - - # ============================================================== - # Set units and subtitle, including modification if normalizing - # area. Note if enforce_units is False (non-default) then - # units on difference plots will be wrong. - # ============================================================== - cmn_units = ds_ref.attrs["units"] - subtitle_extra = "" - if normalize_by_area: - exclude_list = ["WetLossConvFrac", "Prod_", "Loss_"] - if not any(s in varname for s in exclude_list): - if "/" in cmn_units: - cmn_units = f"{cmn_units}/m2" - else: - cmn_units = f"{cmn_units} m-2" - ds_ref.attrs["units"] = cmn_units - ds_dev.attrs["units"] = cmn_units - subtitle_extra = ", Normalized by Area" - - # ============================================================== - # Get comparison data sets, regridding input slices if needed - # ============================================================== - - # Reshape ref/dev cubed sphere data, if any - ds_ref_reshaped = None - if refgridtype == "cs": - ds_ref_reshaped = ds_ref.data.reshape(6, refres, refres) - ds_dev_reshaped = None - if devgridtype == "cs": - ds_dev_reshaped = ds_dev.data.reshape(6, devres, devres) - - ds_ref_cmp = ds_ref_cmps[ivar] - ds_dev_cmp = ds_dev_cmps[ivar] - frac_ds_ref_cmp = frac_ds_ref_cmps[ivar] - frac_ds_dev_cmp = frac_ds_dev_cmps[ivar] - - # Reshape comparison cubed sphere data, if any - if cmpgridtype == "cs": - def call_reshape(cmp_data): - new_data = None - if isinstance(cmp_data, xr.DataArray): - new_data = cmp_data.data.reshape(6, cmpres, cmpres) - elif isinstance(cmp_data, np.ndarray): - new_data = cmp_data.reshape(6, cmpres, cmpres) - return new_data - - ds_ref_cmp_reshaped = call_reshape(ds_ref_cmp) - ds_dev_cmp_reshaped = call_reshape(ds_dev_cmp) - frac_ds_ref_cmp_reshaped = call_reshape(frac_ds_ref_cmp) - frac_ds_dev_cmp_reshaped = call_reshape(frac_ds_dev_cmp) - - # ============================================================== - # Get min and max values for use in the colorbars - # ============================================================== - - # Choose from values within plot extent - if -1000 not in extent: - min_max_extent = extent - else: - min_max_extent = cmp_extent - # Find min and max lon - min_max_minlon = np.min([min_max_extent[0], min_max_extent[1]]) - min_max_maxlon = np.max([min_max_extent[0], min_max_extent[1]]) - min_max_minlat = min_max_extent[2] - min_max_maxlat = min_max_extent[3] - - def get_extent_for_colors(ds, minlon, maxlon, minlat, maxlat): - ds_new = ds.copy() - lat_var='lat' - lon_var='lon' - # Account for cubed-sphere data - if 'lons' in ds_new.coords: - lat_var='lats' - lon_var='lons' - if ds_new['lon'].max() > 190: - minlon=minlon%360 - maxlon=maxlon%360 - # account for global plot - if minlon == maxlon and maxlon == 180: - minlon = 0 - maxlon = 360 - # account for cross dateline - if minlon > maxlon: - temp = minlon - minlon = maxlon - maxlon = temp - return ds_new.where(ds_new[lon_var] >= minlon, drop=True).\ - where(ds_new[lon_var] <= maxlon, drop=True).\ - where(ds_new[lat_var] >= minlat, drop=True).\ - where(ds_new[lat_var] <= maxlat, drop=True) - - ds_ref_reg = get_extent_for_colors(ds_ref, min_max_minlon, min_max_maxlon, min_max_minlat, min_max_maxlat) - ds_dev_reg = get_extent_for_colors(ds_dev, min_max_minlon, min_max_maxlon, min_max_minlat, min_max_maxlat) - - # Ref - vmin_ref = float(np.nanmin(ds_ref_reg.data)) - vmax_ref = float(np.nanmax(ds_ref_reg.data)) - - # Dev - vmin_dev = float(np.nanmin(ds_dev_reg.data)) - vmax_dev = float(np.nanmax(ds_dev_reg.data)) - - # Comparison - if cmpgridtype == "cs": - vmin_ref_cmp = float(np.nanmin(ds_ref_cmp)) - vmax_ref_cmp = float(np.nanmax(ds_ref_cmp)) - vmin_dev_cmp = float(np.nanmin(ds_dev_cmp)) - vmax_dev_cmp = float(np.nanmax(ds_dev_cmp)) -# vmin_cmp = np.nanmin([vmin_ref_cmp, vmin_dev_cmp]) -# vmax_cmp = np.nanmax([vmax_ref_cmp, vmax_dev_cmp]) -# else: -# vmin_cmp = np.nanmin([np.nanmin(ds_ref_cmp), np.nanmin(ds_dev_cmp)]) -# vmax_cmp = np.nanmax([np.nanmax(ds_ref_cmp), np.nanmax(ds_dev_cmp)]) - - # Get overall min & max - vmin_abs = np.nanmin([vmin_ref, vmin_dev])#, vmin_cmp]) - vmax_abs = np.nanmax([vmax_ref, vmax_dev])#, vmax_cmp]) - # ============================================================== - # Test if Ref and/or Dev contain all zeroes or all NaNs. - # This will have implications as to how we set min and max - # values for the color ranges below. - # ============================================================== - - ref_is_all_zero, ref_is_all_nan = all_zero_or_nan(ds_ref.values) - dev_is_all_zero, dev_is_all_nan = all_zero_or_nan(ds_dev.values) - - # ============================================================== - # Calculate absolute difference - # ============================================================== - if cmpgridtype == "ll": - absdiff = np.array(ds_dev_cmp) - np.array(ds_ref_cmp) - else: - absdiff = ds_dev_cmp_reshaped - ds_ref_cmp_reshaped - # Test if the abs. diff. is zero everywhere or NaN everywhere - absdiff_is_all_zero, absdiff_is_all_nan = all_zero_or_nan(absdiff) - # For cubed-sphere, take special care to avoid a spurious - # boundary line, as described here: https://stackoverflow.com/ - # questions/46527456/preventing-spurious-horizontal-lines-for- - # ungridded-pcolormesh-data - if cmpgridtype == "cs": - absdiff = np.ma.masked_where(np.abs(cmpgrid["lon"] - 180) < 2, - absdiff) - - # ============================================================== - # Calculate fractional difference, set divides by zero to NaN - # ============================================================== - if cmpgridtype == "ll": - # Replace fractional difference plots with absolute difference - # of fractional datasets if necessary - if frac_ds_dev_cmp is not None and frac_ds_ref_cmp is not None: - fracdiff = np.array(frac_ds_dev_cmp) - \ - np.array(frac_ds_ref_cmp) - else: - fracdiff = np.abs(np.array(ds_dev_cmp)) / \ - np.abs(np.array(ds_ref_cmp)) - else: - if frac_ds_dev_cmp is not None and frac_ds_ref_cmp is not None: - fracdiff = frac_ds_dev_cmp_reshaped - \ - frac_ds_ref_cmp_reshaped - else: - fracdiff = np.abs(ds_dev_cmp_reshaped) / \ - np.abs(ds_ref_cmp_reshaped) - - # Replace Infinity values with NaN - fracdiff = np.where(np.abs(fracdiff) == np.inf, np.nan, fracdiff) - fracdiff[np.abs(fracdiff > 1e308)] = np.nan - - # Test if the frac. diff. is zero everywhere or NaN everywhere - fracdiff_is_all_zero = not np.any(fracdiff) or \ - (np.nanmin(fracdiff) == 0 and - np.nanmax(fracdiff) == 0) - fracdiff_is_all_nan = np.isnan(fracdiff).all() or ref_is_all_zero - - # For cubed-sphere, take special care to avoid a spurious - # boundary line, as described here: https://stackoverflow.com/ - # questions/46527456/preventing-spurious-horizontal-lines-for- - # ungridded-pcolormesh-data - if cmpgridtype == "cs": - fracdiff = np.ma.masked_where(np.abs(cmpgrid["lon"] - 180) < 2, - fracdiff) - - # ============================================================== - # Create 3x2 figure - # ============================================================== - - # Create figures and axes objects - # Also define the map projection that will be shown - if extent[0] > extent[1]: - proj = ccrs.PlateCarree(central_longitude=180) - else: - proj = ccrs.PlateCarree() - figs, ((ax0, ax1), (ax2, ax3), (ax4, ax5)) = plt.subplots( - 3, 2, figsize=[12, 14], - subplot_kw={"projection": proj} - ) - # Ensure subplots don't overlap when invoking plt.show() - if not savepdf: - plt.subplots_adjust(hspace=0.4) - # Give the figure a title - offset = 0.96 - fontsize = 25 - if "lev" in ds_ref.dims and "lev" in ds_dev.dims: - if ilev == 0: - levstr = "Surface" - elif ilev == 22: - levstr = "500 hPa" - else: - levstr = "Level " + str(ilev - 1) - if extra_title_txt is not None: - figs.suptitle( - f"{varname}, {levstr} ({extra_title_txt})", - fontsize=fontsize, - y=offset, - ) - else: - figs.suptitle( - f"{varname}, {levstr}", - fontsize=fontsize, y=offset - ) - elif ( - "lat" in ds_ref.dims - and "lat" in ds_dev.dims - and "lon" in ds_ref.dims - and "lon" in ds_dev.dims - ): - if extra_title_txt is not None: - figs.suptitle( - f"{varname} ({extra_title_txt})", - fontsize=fontsize, - y=offset, - ) - else: - figs.suptitle( - f"{varname}", - fontsize=fontsize, - y=offset) - else: - print(f"Incorrect dimensions for {varname}!") - - # ============================================================== - # Set colormaps for data plots - # - # Use shallow copy (copy.copy() to create color map objects, - # in order to avoid set_bad() from being applied to the base - # color table. See: https://docs.python.org/3/library/copy.html - # ============================================================== - - # Colormaps for 1st row (Ref and Dev) - if use_cmap_RdBu: - cmap_toprow_nongray = copy.copy(mpl.cm.RdBu_r) - cmap_toprow_gray = copy.copy(mpl.cm.RdBu_r) - else: - cmap_toprow_nongray = copy.copy(WhGrYlRd) - cmap_toprow_gray = copy.copy(WhGrYlRd) - cmap_toprow_gray.set_bad(color="gray") - - if refgridtype == "ll": - if ref_is_all_nan: - ref_cmap = cmap_toprow_gray - else: - ref_cmap = cmap_toprow_nongray - - if dev_is_all_nan: - dev_cmap = cmap_toprow_gray - else: - dev_cmap = cmap_toprow_nongray - - # Colormaps for 2nd row (Abs. Diff.) and 3rd row (Frac. Diff,) - cmap_nongray = copy.copy(mpl.cm.RdBu_r) - cmap_gray = copy.copy(mpl.cm.RdBu_r) - cmap_gray.set_bad(color="gray") - - # ============================================================== - # Set titles for plots - # ============================================================== - - if refgridtype == "ll": - ref_title = f"{refstr} (Ref){subtitle_extra}\n{refres}" - else: - ref_title = f"{refstr} (Ref){subtitle_extra}\nc{refres}" - - if devgridtype == "ll": - dev_title = f"{devstr} (Dev){subtitle_extra}\n{devres}" - else: - dev_title = f"{devstr} (Dev){subtitle_extra}\nc{devres}" - if regridany: - absdiff_dynam_title = \ - f"Difference ({cmpres})\nDev - Ref, Dynamic Range" - absdiff_fixed_title = \ - f"Difference ({cmpres})\nDev - Ref, Restricted Range [5%,95%]" - if diff_of_diffs: - fracdiff_dynam_title = \ - f"Difference ({cmpres}), Dynamic Range\n{frac_devstr} - {frac_refstr}" - fracdiff_fixed_title = \ - f"Difference ({cmpres}), Restricted Range [5%,95%]\n{frac_devstr} - {frac_refstr}" - else: - fracdiff_dynam_title = \ - f"Ratio ({cmpres})\nDev/Ref, Dynamic Range" - fracdiff_fixed_title = \ - f"Ratio ({cmpres})\nDev/Ref, Fixed Range" - else: - absdiff_dynam_title = "Difference\nDev - Ref, Dynamic Range" - absdiff_fixed_title = \ - "Difference\nDev - Ref, Restricted Range [5%,95%]" - if diff_of_diffs: - fracdiff_dynam_title = \ - f"Difference, Dynamic Range\n{frac_devstr} - {frac_refstr}" - fracdiff_fixed_title = \ - "Difference, Restricted Range [5%,95%]\n{frac_devstr} - {frac_refstr}" - else: - fracdiff_dynam_title = "Ratio \nDev/Ref, Dynamic Range" - fracdiff_fixed_title = "Ratio \nDev/Ref, Fixed Range" - - # ============================================================== - # Bundle variables for 6 parallel plotting calls - # 0 = Ref 1 = Dev - # 2 = Dynamic abs diff 3 = Restricted abs diff - # 4 = Dynamic frac diff 5 = Restricted frac diff - # ============================================================== - - subplots = [ - "ref", "dev", - "dyn_abs_diff", "res_abs_diff", - "dyn_frac_diff", "res_frac_diff", - ] - if diff_of_diffs: - subplots = ["ref", "dev", - "dyn_abs_diff", "res_abs_diff", - "dyn_abs_diff", "res_abs_diff"] - - all_zeros = [ - ref_is_all_zero, - dev_is_all_zero, - absdiff_is_all_zero, - absdiff_is_all_zero, - fracdiff_is_all_zero, - fracdiff_is_all_zero, - ] - - all_nans = [ - ref_is_all_nan, - dev_is_all_nan, - absdiff_is_all_nan, - absdiff_is_all_nan, - fracdiff_is_all_nan, - fracdiff_is_all_nan, - ] - if -1000 not in extent: - extents = [extent[:], extent[:], - extent[:], extent[:], - extent[:], extent[:]] - else: - plot_extent = [np.max([cmp_extent[0], -180]), - np.min([cmp_extent[1], 180]), - cmp_extent[2], cmp_extent[3]] - extents = [plot_extent[:], plot_extent[:], - plot_extent[:], plot_extent[:], - plot_extent[:], plot_extent[:]] - plot_vals = [ds_ref, ds_dev, absdiff, absdiff, fracdiff, fracdiff] - grids = [refgrid, devgrid, regional_cmp_grid.copy(), regional_cmp_grid.copy(), - regional_cmp_grid.copy(), regional_cmp_grid.copy()] - axs = [ax0, ax1, ax2, ax3, ax4, ax5] - rowcols = [(0, 0), (0, 1), (1, 0), (1, 1), (2, 0), (2, 1)] - titles = [ - ref_title, - dev_title, - absdiff_dynam_title, - absdiff_fixed_title, - fracdiff_dynam_title, - fracdiff_fixed_title, - ] - - if refgridtype == "ll": - cmaps = [ref_cmap, dev_cmap, cmap_gray, - cmap_gray, cmap_gray, cmap_gray] - else: - cmaps = [ - cmap_toprow_nongray, - cmap_toprow_nongray, - cmap_nongray, - cmap_nongray, - cmap_nongray, - cmap_nongray, - ] - - ref_masked = None - dev_masked = None - if refgridtype == "cs": - ref_masked = np.ma.masked_where( - np.abs(refgrid["lon"] - 180) < 2, ds_ref_reshaped - ) - if devgridtype == "cs": - dev_masked = np.ma.masked_where( - np.abs(devgrid["lon"] - 180) < 2, ds_dev_reshaped - ) - masked = [ref_masked, dev_masked, absdiff, absdiff, fracdiff, fracdiff] - - gridtypes = [ - refgridtype, - devgridtype, - cmpgridtype, - cmpgridtype, - cmpgridtype, - cmpgridtype, - ] - - unit_list = [ds_ref.units, ds_dev.units, cmn_units, - cmn_units, "unitless", "unitless"] - - other_all_nans = [dev_is_all_nan, ref_is_all_nan, - False, False, False, False] - - mins = [vmin_ref, vmin_dev, vmin_abs] - maxs = [vmax_ref, vmax_dev, vmax_abs] - - ratio_logs = [False, False, False, False, True, True] - # Plot - for i in range(6): - six_plot( - subplots[i], - all_zeros[i], - all_nans[i], - plot_vals[i], - grids[i], - axs[i], - rowcols[i], - titles[i], - cmaps[i], - unit_list[i], - extents[i], - masked[i], - other_all_nans[i], - gridtypes[i], - mins, - maxs, - use_cmap_RdBu, - match_cbar, - verbose, - log_color_scale, - plot_type="single_level", - ratio_log=ratio_logs[i], - proj=proj, - ll_plot_func=ll_plot_func, - **extra_plot_args - ) - - # ============================================================== - # Add this page of 6-panel plots to a PDF file - # ============================================================== - if savepdf: - folders = pdfname.split('/') - pdfname_temp = folders[-1] + "BENCHMARKFIGCREATION.pdf" + str(ivar) - full_path = temp_dir - for folder in folders[:-1]: - full_path = os.path.join(full_path, folder) - if not os.path.isdir(full_path): - try: - os.mkdir(full_path) - except FileExistsError: - pass - pdf = PdfPages(os.path.join(full_path, pdfname_temp)) - pdf.savefig(figs) - pdf.close() - plt.close(figs) - # ============================================================== - # Update the list of variables with significant differences. - # Criterion: abs(1 - max(fracdiff)) > 0.1 - # Do not include NaNs in the criterion, because these indicate - # places where fracdiff could not be computed (div-by-zero). - # ============================================================== - if np.abs(1 - np.nanmax(fracdiff)) > 0.1: - sigdiff_list.append(varname) - return varname - return "" - - # ================================================================== - # Call figure generation function in a parallel loop over variables - # ================================================================== - # do not attempt nested thread parallelization due to issues with - # matplotlib - if current_process().name != "MainProcess": - n_job = 1 - - if not savepdf: - # disable parallel plotting to allow interactive figure plotting - for i in range(n_var): - createfig(i) - - else: - with TemporaryDirectory() as temp_dir: - results = Parallel(n_jobs=n_job)(delayed(createfig)(i, temp_dir) - for i in range(n_var)) - # update sig diffs after parallel calls - if current_process().name == "MainProcess": - for varname in results: - if isinstance(varname, str): - sigdiff_list.append(varname) - - # ================================================================== - # Finish - # ================================================================== - if verbose: - print("Closed PDF") - merge = PdfFileMerger() - #print("Creating {} for {} variables".format(pdfname, n_var)) - pdf = PdfPages(pdfname) - pdf.close() - for i in range(n_var): - temp_pdfname = pdfname - if pdfname[0] == '/': - temp_pdfname = temp_pdfname[1:] - merge.append( - os.path.join( - str(temp_dir), - temp_pdfname + - "BENCHMARKFIGCREATION.pdf" + - str(i))) - merge.write(pdfname) - merge.close() - warnings.showwarning = _warning_format - - -def compare_zonal_mean( - refdata, - refstr, - devdata, - devstr, - varlist=None, - itime=0, - refmet=None, - devmet=None, - weightsdir='.', - pdfname="", - cmpres=None, - match_cbar=True, - pres_range=[0, 2000], - normalize_by_area=False, - enforce_units=True, - convert_to_ugm3=False, - flip_ref=False, - flip_dev=False, - use_cmap_RdBu=False, - verbose=False, - log_color_scale=False, - log_yaxis=False, - extra_title_txt=None, - n_job=-1, - sigdiff_list=[], - second_ref=None, - second_dev=None, - spcdb_dir=os.path.dirname(__file__), - sg_ref_path='', - sg_dev_path='', - ref_vert_params=[[], []], - dev_vert_params=[[], []], - **extra_plot_args -): - """ - Create single-level 3x2 comparison zonal-mean plots for variables - common in two xarray Daatasets. Optionally save to PDF. - - Args: - refdata: xarray dataset - Dataset used as reference in comparison - refstr: str - String description for reference data to be used in plots - devdata: xarray dataset - Dataset used as development in comparison - devstr: str - String description for development data to be used in plots - - Keyword Args (optional): - varlist: list of strings - List of xarray dataset variable names to make plots for - Default value: None (will compare all common 3D variables) - itime: integer - Dataset time dimension index using 0-based system - Default value: 0 - refmet: xarray dataset - Dataset containing ref meteorology - Default value: None - devmet: xarray dataset - Dataset containing dev meteorology - Default value: None - weightsdir: str - Directory path for storing regridding weights - Default value: None (will create/store weights in - current directory) - pdfname: str - File path to save plots as PDF - Default value: Empty string (will not create PDF) - cmpres: str - String description of grid resolution at which - to compare datasets - Default value: None (will compare at highest resolution - of Ref and Dev) - match_cbar: bool - Set this flag to True to use same the colorbar bounds - for both Ref and Dev plots. - Default value: True - pres_range: list of two integers - Pressure range of levels to plot [hPa]. The vertical axis will - span the outer pressure edges of levels that contain pres_range - endpoints. - Default value: [0,2000] - normalize_by_area: bool - Set this flag to True to to normalize raw data in both - Ref and Dev datasets by grid area. Input ref and dev datasets - must include AREA variable in m2 if normalizing by area. - Default value: False - enforce_units: bool - Set this flag to True force an error if the variables in - the Ref and Dev datasets have different units. - Default value: True - convert_to_ugm3: str - Whether to convert data units to ug/m3 for plotting. - Default value: False - flip_ref: bool - Set this flag to True to flip the vertical dimension of - 3D variables in the Ref dataset. - Default value: False - flip_dev: bool - Set this flag to True to flip the vertical dimension of - 3D variables in the Dev dataset. - Default value: False - use_cmap_RdBu: bool - Set this flag to True to use a blue-white-red colormap for - plotting raw reference and development datasets. - Default value: False - verbose: logical - Set this flag to True to enable informative printout. - Default value: False - log_color_scale: bool - Set this flag to True to enable plotting data (not diffs) - on a log color scale. - Default value: False - log_yaxis: bool - Set this flag to True if you wish to create zonal mean - plots with a log-pressure Y-axis. - Default value: False - extra_title_txt: str - Specifies extra text (e.g. a date string such as "Jan2016") - for the top-of-plot title. - Default value: None - n_job: int - Defines the number of simultaneous workers for parallel plotting. - Set to 1 to disable parallel plotting. - Value of -1 allows the application to decide. - Default value: -1 - sigdiff_list: list of str - Returns a list of all quantities having significant - differences (where |max(fractional difference)| > 0.1). - Default value: [] - second_ref: xarray Dataset - A dataset of the same model type / grid as refdata, - to be used in diff-of-diffs plotting. - Default value: None - second_dev: xarray Dataset - A dataset of the same model type / grid as devdata, - to be used in diff-of-diffs plotting. - Default value: None - spcdb_dir: str - Directory containing species_database.yml file. - Default value: Path of GCPy code repository - sg_ref_path: str - Path to NetCDF file containing stretched-grid info - (in attributes) for the ref dataset - Default value: '' (will not be read in) - sg_dev_path: str - Path to NetCDF file containing stretched-grid info - (in attributes) for the dev dataset - Default value: '' (will not be read in) - ref_vert_params: list(AP, BP) of list-like types - Hybrid grid parameter A in hPa and B (unitless). - Needed if ref grid is not 47 or 72 levels. - Default value: [[], []] - dev_vert_params: list(AP, BP) of list-like types - Hybrid grid parameter A in hPa and B (unitless). - Needed if dev grid is not 47 or 72 levels. - Default value: [[], []] - extra_plot_args: various - Any extra keyword arguments are passed through the plotting functions to be used - in calls to pcolormesh() (CS) or imshow() (Lat/Lon). - """ - warnings.showwarning = _warning_format - if not isinstance(refdata, xr.Dataset): - raise TypeError("The refdata argument must be an xarray Dataset!") - - if not isinstance(devdata, xr.Dataset): - raise TypeError("The devdata argument must be an xarray Dataset!") - - # Determine if doing diff-of-diffs - if second_ref is not None and second_dev is not None: - diff_of_diffs = True - else: - diff_of_diffs = False - - # Prepare diff-of-diffs datasets if needed - if diff_of_diffs: - refdata, devdata = refdata.load(), devdata.load() - second_ref, second_dev = second_ref.load(), second_dev.load() - -# # If needed, use fake time dim in case dates are different in datasets. -# # This needs more work for case of single versus multiple times. -# aligned_time = np.datetime64('2000-01-01') -# refdata = refdata.assign_coords({'time' : [aligned_time]}) -# devdata = devdata.assign_coords({'time' : [aligned_time]}) -# second_ref = second_ref.assign_coords({'time' : [aligned_time]}) -# second_dev = second_dev.assign_coords({'time' : [aligned_time]}) - - refdata, fracrefdata = get_diff_of_diffs(refdata, second_ref) - devdata, fracdevdata = get_diff_of_diffs(devdata, second_dev) - - frac_refstr = 'GCC_dev / GCC_ref' - frac_devstr = 'GCHP_dev / GCHP_ref' - - # If no varlist is passed, plot all 3D variables in the dataset - if varlist is None: - quiet = not verbose - vardict = compare_varnames(refdata, devdata, quiet=quiet) - varlist = vardict["commonvars3D"] - print("Plotting all 3D variables") - n_var = len(varlist) - - # Exit out if there are no 3D variables - if not n_var: - print("WARNING: no 3D variables to plot zonal mean for!") - return - - # If no PDF name passed, then do not save to PDF - savepdf = True - if pdfname == "": - savepdf = False - # If converting to ug/m3, load the species database - if convert_to_ugm3: - properties = read_config_file( - os.path.join( - spcdb_dir, - "species_database.yml" - ), - quiet=True - ) - - # Get mid-point pressure and edge pressures for this grid - ref_pedge, ref_pmid, _ = get_vert_grid(refdata, *ref_vert_params) - dev_pedge, dev_pmid, _ = get_vert_grid(devdata, *dev_vert_params) - - # Get indexes of pressure subrange (full range is default) - ref_pedge_ind = get_pressure_indices(ref_pedge, pres_range) - dev_pedge_ind = get_pressure_indices(dev_pedge, pres_range) - - # Pad edges if subset does not include surface or TOA so data spans - # entire subrange - ref_pedge_ind = pad_pressure_edges( - ref_pedge_ind, - refdata.sizes["lev"], - np.size(ref_pmid)) - dev_pedge_ind = pad_pressure_edges( - dev_pedge_ind, - devdata.sizes["lev"], - np.size(dev_pmid)) - - # pmid indexes do not include last pedge index - ref_pmid_ind = ref_pedge_ind[:-1] - dev_pmid_ind = dev_pedge_ind[:-1] - - # Convert levels to pressures in ref and dev data - refdata = convert_lev_to_pres(refdata, ref_pmid, ref_pedge) - devdata = convert_lev_to_pres(devdata, dev_pmid, dev_pedge) - - if diff_of_diffs: - fracrefdata = convert_lev_to_pres(fracrefdata, ref_pmid, ref_pedge) - fracdevdata = convert_lev_to_pres(fracdevdata, dev_pmid, dev_pedge) - - # ================================================================== - # Reduce pressure range if reduced range passed as input. Indices - # must be flipped if flipping vertical axis. - # ================================================================== - # this may require checking for 48 / 73 levels - ref_pmid_ind_flipped = refdata.sizes["lev"] - ref_pmid_ind[::-1] - 1 - dev_pmid_ind_flipped = devdata.sizes["lev"] - dev_pmid_ind[::-1] - 1 - if flip_ref: - ref_pmid_ind = ref_pmid_ind_flipped - if flip_dev: - dev_pmid_ind = dev_pmid_ind_flipped - - refdata = refdata.isel(lev=ref_pmid_ind) - devdata = devdata.isel(lev=dev_pmid_ind) - if diff_of_diffs: - fracrefdata = fracrefdata.isel(lev=ref_pmid_ind) - fracdevdata = fracdevdata.isel(lev=dev_pmid_ind) - - sg_ref_params = [1, 170, -90] - sg_dev_params = [1, 170, -90] - # Get stretched-grid info if passed - if sg_ref_path != '': - sg_ref_attrs = xr.open_dataset(sg_ref_path).attrs - sg_ref_params = [ - sg_ref_attrs['stretch_factor'], - sg_ref_attrs['target_longitude'], - sg_ref_attrs['target_latitude']] - - if sg_dev_path != '': - sg_dev_attrs = xr.open_dataset(sg_dev_path).attrs - sg_dev_params = [ - sg_dev_attrs['stretch_factor'], - sg_dev_attrs['target_longitude'], - sg_dev_attrs['target_latitude']] - - [refres, refgridtype, devres, devgridtype, cmpres, cmpgridtype, - regridref, regriddev, regridany, refgrid, devgrid, cmpgrid, - refregridder, devregridder, refregridder_list, devregridder_list] = \ - create_regridders( - refdata, - devdata, - weightsdir=weightsdir, - cmpres=cmpres, - zm=True, - sg_ref_params=sg_ref_params, - sg_dev_params=sg_dev_params - ) - - # use smaller vertical grid as target for vertical regridding - target_index = np.array([len(ref_pedge), len(dev_pedge)]).argmin() - pedge = [ref_pedge, dev_pedge][target_index] - pedge_ind = [ref_pedge_ind, dev_pedge_ind][target_index] - - # ================================================================== - # Loop over all variables - # ================================================================== - ds_refs = [None] * n_var - frac_ds_refs = [None] * n_var - ds_devs = [None] * n_var - frac_ds_devs = [None] * n_var - for i in range(n_var): - - varname = varlist[i] - - # ================================================================== - # Slice the data, allowing for no time dimension (bpch) - # ================================================================== - - # Ref - if "time" in refdata[varname].dims: - ds_refs[i] = refdata[varname].isel(time=itime) - if diff_of_diffs: - frac_ds_refs[i] = fracrefdata[varname].isel(time=itime) - else: - ds_refs[i] = refdata[varname] - if diff_of_diffs: - frac_ds_refs[i] = fracrefdata[varname] - - # Dev - if "time" in devdata[varname].dims: - ds_devs[i] = devdata[varname].isel(time=itime) - if diff_of_diffs: - frac_ds_devs[i] = fracdevdata[varname].isel(time=itime) - - else: - ds_devs[i] = devdata[varname] - if diff_of_diffs: - frac_ds_devs[i] = fracdevdata[varname] - - # ================================================================== - # Handle units as needed - # ================================================================== - - # Convert to ppb if units string is variation of mol/mol - if data_unit_is_mol_per_mol(ds_refs[i]): - ds_refs[i].values = ds_refs[i].values * 1e9 - ds_refs[i].attrs["units"] = "ppb" - if data_unit_is_mol_per_mol(ds_devs[i]): - ds_devs[i].values = ds_devs[i].values * 1e9 - ds_devs[i].attrs["units"] = "ppb" - - # If units string is ppbv (true for bpch data) then rename units - if ds_refs[i].units.strip() == "ppbv": - ds_refs[i].attrs["units"] = "ppb" - if ds_devs[i].units.strip() == "ppbv": - ds_devs[i].attrs["units"] = "ppb" - - # If units string is W/m2 (may be true for bpch data) then rename units - if ds_refs[i].units.strip() == "W/m2": - ds_refs[i].attrs["units"] = "W m-2" - if ds_devs[i].units.strip() == "W/m2": - ds_devs[i].attrs["units"] = "W m-2" - - # If units string is UNITLESS (may be true for bpch data) then rename - # units - if ds_refs[i].units.strip() == "UNITLESS": - ds_refs[i].attrs["units"] = "1" - if ds_devs[i].units.strip() == "UNITLESS": - ds_devs[i].attrs["units"] = "1" - - # Check that units are the same in ref and dev. Will exit with - # an error if do not match and enforce_units is true (default). - if not check_units(ds_refs[i], ds_devs[i]) and enforce_units: - raise ValueError( - 'Units in ref and dev must match when enforce_units is True') - - # Convert from ppb to ug/m3 if convert_to_ugm3 is passed as true - if convert_to_ugm3: - - # Error checks: must pass met, not normalize by area, and be in ppb - if refmet is None or devmet is None: - msg = "Met mata ust be passed to convert units to ug/m3." - raise ValueError(msg) - if normalize_by_area: - msg = "Normalizing by area is now allowed if plotting ug/m3" - raise ValueError(msg) - if ds_refs[i].units != "ppb" or ds_devs[i].units != "ppb": - msg = "Units must be mol/mol if converting to ug/m3." - raise ValueError(msg) - - # Slice air density data by time and lev - # (assume same format and dimensions as refdata and devdata) - if "time" in refmet["Met_AIRDEN"].dims: - ref_airden = refmet["Met_AIRDEN"].isel(time=itime, - lev=ref_pmid_ind) - else: - ref_airden = refmet["Met_AIRDEN"].isel(lev=ref_pmid_ind) - if "time" in devmet["Met_AIRDEN"].dims: - dev_airden = devmet["Met_AIRDEN"].isel(time=itime, - lev=dev_pmid_ind) - else: - dev_airden = devmet["Met_AIRDEN"].isel(lev=dev_pmid_ind) - - # Get a list of properties for the given species - spc_name = varname.replace(varname.split("_")[0] + "_", "") - species_properties = properties.get(spc_name) - - # If no properties are found, then exit with an error. - # Otherwise, get the molecular weight in g/mol. - if species_properties is None: - # Hack lumped species until we implement a solution - if spc_name in ["Simple_SOA", "Complex_SOA"]: - spc_mw_g = 150.0 - else: - msg = f"No properties found for {spc_name}. Cannot convert" \ - + " to ug/m3." - raise ValueError(msg) - else: - # Get the species molecular weight in g/mol - spc_mw_g = species_properties.get("MW_g") - if spc_mw_g is None: - msg = f"Molecular weight not found for for species {spc_name}!" \ - + " Cannot convert to ug/m3." - raise ValueError(msg) - - # Convert values from ppb to ug/m3: - # ug/m3 = 1e-9ppb * mol/g air * kg/m3 air * 1e3g/kg - # * g/mol spc * 1e6ug/g - # = ppb * air density * (spc MW / air MW) - ds_refs[i].values = ds_refs[i].values * ref_airden.values \ - * (spc_mw_g / MW_AIR_g) - ds_devs[i].values = ds_devs[i].values * dev_airden.values \ - * (spc_mw_g / MW_AIR_g) - - # Update units string - ds_refs[i].attrs["units"] = "\u03BCg/m3" # ug/m3 using mu - ds_devs[i].attrs["units"] = "\u03BCg/m3" - - # ============================================================== - # Reshape cubed sphere data if using MAPL v1.0.0+ - # TODO: update function to expect data in this format - # ============================================================== - - ds_refs[i] = reshape_MAPL_CS(ds_refs[i]) - ds_devs[i] = reshape_MAPL_CS(ds_devs[i]) - if diff_of_diffs: - frac_ds_refs[i] = reshape_MAPL_CS(frac_ds_refs[i]) - frac_ds_devs[i] = reshape_MAPL_CS(frac_ds_devs[i]) - - # Flip in the vertical if applicable - if flip_ref: - ds_refs[i].data = ds_refs[i].data[::-1, :, :] - if diff_of_diffs: - frac_ds_refs[i].data = frac_ds_refs[i].data[::-1, :, :] - if flip_dev: - ds_devs[i].data = ds_devs[i].data[::-1, :, :] - if diff_of_diffs: - frac_ds_devs[i].data = frac_ds_devs[i].data[::-1, :, :] - # ================================================================== - # Get the area variables if normalize_by_area=True. They can be - # either in the main datasets as variable AREA or in the optionally - # passed meteorology datasets as Met_AREAM2. - # ================================================================== - if normalize_by_area: - if "AREA" in refdata.data_vars.keys(): - ref_area = refdata["AREA"] - elif refmet is not None: - if "Met_AREAM2" in refmet.data_vars.keys(): - ref_area = refmet["Met_AREAM2"] - else: - msg = "normalize_by_area = True but AREA not " \ - + "present in the Ref dataset and ref met with Met_AREAM2" \ - + " not passed!" - raise ValueError(msg) - if "time" in ref_area.dims: - ref_area = ref_area.isel(time=0) - if refgridtype == 'cs': - ref_area = reshape_MAPL_CS(ref_area) - - if "AREA" in devdata.data_vars.keys(): - dev_area = devdata["AREA"] - elif devmet is not None: - if "Met_AREAM2" in devmet.data_vars.keys(): - dev_area = devmet["Met_AREAM2"] - else: - msg = "normalize_by_area = True but AREA not " \ - + "present in the Dev dataset and dev met with Met_AREAM2" \ - | " not passed!" - raise ValueError(msg) - if "time" in dev_area.dims: - dev_area = dev_area.isel(time=0) - if devgridtype == 'cs': - dev_area = reshape_MAPL_CS(dev_area) - - # Make sure the areas do not have a lev dimension - if "lev" in ref_area.dims: - ref_area = ref_area.isel(lev=0) - if "lev" in dev_area.dims: - dev_area = dev_area.isel(lev=0) - - # ================================================================== - # Create arrays for each variable in the Ref and Dev dataset - # and regrid to the comparison grid. - # ================================================================== - ds_ref_cmps = [None] * n_var - ds_dev_cmps = [None] * n_var - frac_ds_ref_cmps = [None] * n_var - frac_ds_dev_cmps = [None] * n_var - # store units in case data changes from DataArray to numpy array - ref_units = [None] * n_var - dev_units = [None] * n_var - - # regrid vertically if necessary - if len(ref_pedge) != len(pedge): - xmat = gen_xmat(ref_pedge[ref_pedge_ind], pedge[pedge_ind]) - elif len(dev_pedge) != len(pedge): - xmat = gen_xmat(dev_pedge[dev_pedge_ind], pedge[pedge_ind]) - - for i in range(n_var): - - ds_ref = ds_refs[i] - ds_dev = ds_devs[i] - frac_ds_ref = frac_ds_refs[i] - frac_ds_dev = frac_ds_devs[i] - # Do area normalization before regridding if normalize_by_area=True - if normalize_by_area: - exclude_list = ["WetLossConvFrac", "Prod_", "Loss_"] - if not any(s in varname for s in exclude_list): - ds_ref.values = ds_ref.values / ref_area.values - ds_dev.values = ds_dev.values / dev_area.values - ds_refs[i] = ds_ref - ds_devs[i] = ds_dev - if diff_of_diffs: - frac_ds_ref.values = frac_ds_ref.values / ref_area.values - frac_ds_refs[i] = frac_ds_ref - frac_ds_dev.values = frac_ds_dev.values / dev_area.values - frac_ds_devs[i] = frac_ds_dev - - # save units for later use - ref_units[i] = ds_ref.attrs["units"] - dev_units[i] = ds_dev.attrs["units"] - - ref_nlev = len(ds_ref['lev']) - dev_nlev = len(ds_dev['lev']) - - # Regrid variables horizontally - # Ref - ds_ref = regrid_comparison_data( - ds_ref, - refres, - regridref, - refregridder, - refregridder_list, - cmpgrid, - refgridtype, - cmpgridtype, - nlev=ref_nlev - ) - if diff_of_diffs: - frac_ds_ref = regrid_comparison_data( - frac_ds_ref, - refres, - regridref, - refregridder, - refregridder_list, - cmpgrid, - cmpgridtype, - refgridtype, - nlev=ref_nlev - ) - # Dev - ds_dev = regrid_comparison_data( - ds_dev, - devres, - regriddev, - devregridder, - devregridder_list, - cmpgrid, - devgridtype, - cmpgridtype, - nlev=dev_nlev - ) - if diff_of_diffs: - frac_ds_dev = regrid_comparison_data( - frac_ds_dev, - devres, - regriddev, - devregridder, - devregridder_list, - cmpgrid, - devgridtype, - cmpgridtype, - nlev=dev_nlev - ) - - # store regridded CS data before dealing with vertical regridding - if refgridtype == "cs": - ds_refs[i] = ds_ref - frac_ds_refs[i] = frac_ds_ref - if devgridtype == "cs": - ds_devs[i] = ds_dev - frac_ds_devs[i] = frac_ds_dev - - # Reduce variables to smaller vert grid if necessary for comparison - if len(ref_pedge) != len(pedge): - ds_ref = regrid_vertical(ds_ref, xmat, dev_pmid[dev_pmid_ind]) - if diff_of_diffs: - frac_ds_ref = regrid_vertical(frac_ds_ref, xmat, dev_pmid[dev_pmid_ind]) - - if len(dev_pedge) != len(pedge): - ds_dev = regrid_vertical(ds_dev, xmat, ref_pmid[ref_pmid_ind]) - if diff_of_diffs: - frac_ds_dev = regrid_vertical(frac_ds_dev, xmat, ref_pmid[ref_pmid_ind]) - ds_ref_cmps[i] = ds_ref - ds_dev_cmps[i] = ds_dev - if diff_of_diffs: - frac_ds_ref_cmps[i] = frac_ds_ref - frac_ds_dev_cmps[i] = frac_ds_dev - # Universal plot setup - xtick_positions = np.arange(-90, 91, 30) - xticklabels = [r"{}$\degree$".format(x) for x in xtick_positions] - - # ================================================================== - # Define function to create a single page figure to be called - # in a parallel loop - # ================================================================== - def createfig(ivar, temp_dir=''): - - # Suppress harmless run-time warnings (mostly about underflow) - warnings.filterwarnings('ignore', category=RuntimeWarning) - warnings.filterwarnings('ignore', category=UserWarning) - - if savepdf and verbose: - print(f"{ivar} ", end="") - varname = varlist[ivar] - - # ============================================================== - # Assign data variables - # ============================================================== - ds_ref = ds_refs[ivar] - ds_dev = ds_devs[ivar] - ds_ref_cmp = ds_ref_cmps[ivar] - ds_dev_cmp = ds_dev_cmps[ivar] - frac_ds_ref_cmp = frac_ds_ref_cmps[ivar] - frac_ds_dev_cmp = frac_ds_dev_cmps[ivar] - - # ============================================================== - # Area normalization units and subtitle - # Set units and subtitle, including modification if normalizing - # area. Note if enforce_units is False (non-default) then - # units on difference plots will be wrong. - # ============================================================== - cmn_units = ref_units[ivar] - subtitle_extra = "" - if normalize_by_area: - exclude_list = ["WetLossConvFrac", "Prod_", "Loss_"] - if not any(s in varname for s in exclude_list): - if "/" in cmn_units: - cmn_units = f"{cmn_units}/m2" - else: - cmn_units = f"{cmn_units} m-2" - ref_units[ivar] = cmn_units - dev_units[ivar] = cmn_units - subtitle_extra = ", Normalized by Area" - - # ============================================================== - # Calculate zonal mean - # ============================================================== - # Ref - if refgridtype == "ll": - zm_ref = ds_ref.mean(dim="lon") - else: - zm_ref = ds_ref.mean(axis=2) - - # Dev - if devgridtype == "ll": - zm_dev = ds_dev.mean(dim="lon") - else: - zm_dev = ds_dev.mean(axis=2) - # Comparison - zm_dev_cmp = ds_dev_cmp.mean(axis=2) - zm_ref_cmp = ds_ref_cmp.mean(axis=2) - if diff_of_diffs: - frac_zm_dev_cmp = frac_ds_dev_cmp.mean(axis=2) - frac_zm_ref_cmp = frac_ds_ref_cmp.mean(axis=2) - # ============================================================== - # Get min and max values for use in the colorbars - # and also flag if Ref and/or Dev are all zero or all NaN - # ============================================================== - - # Ref - vmin_ref = float(zm_ref.min()) - vmax_ref = float(zm_ref.max()) - - # Dev - vmin_dev = float(zm_dev.min()) - vmax_dev = float(zm_dev.max()) - - # Comparison - vmin_cmp = np.min([zm_ref_cmp.min(), zm_dev_cmp.min()]) - vmax_cmp = np.max([zm_ref_cmp.max(), zm_dev_cmp.max()]) - - # Take min/max across all grids - vmin_abs = np.min([vmin_ref, vmin_dev, vmin_cmp]) - vmax_abs = np.max([vmax_ref, vmax_dev, vmax_cmp]) - - # ============================================================== - # Test if Ref and/or Dev contain all zeroes or all NaNs. - # This will have implications as to how we set min and max - # values for the color ranges below. - # ============================================================== - ref_values = ds_ref.values if isinstance(ds_ref, xr.DataArray) else ds_ref - dev_values = ds_dev.values if isinstance(ds_dev, xr.DataArray) else ds_dev - ref_is_all_zero, ref_is_all_nan = all_zero_or_nan(ref_values) - dev_is_all_zero, dev_is_all_nan = all_zero_or_nan(dev_values) - - # ============================================================== - # Calculate zonal mean difference - # ============================================================== - - zm_diff = np.array(zm_dev_cmp) - np.array(zm_ref_cmp) - - # Test if abs. diff is zero everywhere or NaN everywhere - absdiff_is_all_zero, absdiff_is_all_nan = all_zero_or_nan(zm_diff) - - # ============================================================== - # Calculate fractional difference, set divides by zero to Nan - # ============================================================== - if diff_of_diffs: - zm_fracdiff = np.array(frac_zm_dev_cmp) - \ - np.array(frac_zm_ref_cmp) - else: - zm_fracdiff = np.abs(np.array(zm_dev_cmp)) / \ - np.abs(np.array(zm_ref_cmp)) - zm_fracdiff = np.where(np.abs(zm_fracdiff) == - np.inf, np.nan, zm_fracdiff) - zm_fracdiff[zm_fracdiff > 1e308] = np.nan - # Test if the frac. diff is zero everywhere or NaN everywhere - fracdiff_is_all_zero = not np.any(zm_fracdiff) or \ - (np.nanmin(zm_fracdiff) == 0 and - np.nanmax(zm_fracdiff) == 0) - fracdiff_is_all_nan = np.isnan(zm_fracdiff).all() - - # ============================================================== - # Create 3x2 figure - # ============================================================== - - # Create figs and axes objects - figs, ((ax0, ax1), (ax2, ax3), (ax4, ax5)) = plt.subplots( - 3, 2, figsize=[12, 15.3] - ) - # Ensure subplots don't overlap when invoking plt.show() - if not savepdf: - plt.subplots_adjust(hspace=0.4) - # Give the plot a title - offset = 0.96 - fontsize = 25 - if extra_title_txt is not None: - figs.suptitle( - f"{varname}, Zonal Mean ({extra_title_txt})", - fontsize=fontsize, - y=offset, - ) - else: - figs.suptitle(f"{varname}, Zonal Mean", - fontsize=fontsize, y=offset) - - # ============================================================== - # Set color map objects. Use gray for NaNs (no worries, - # because zonal means are always plotted on lat-alt grids). - # - # Use shallow copy (copy.copy() to create color map objects, - # in order to avoid set_bad() from being applied to the base - # color table. See: https://docs.python.org/3/library/copy.html - # ============================================================== - - if use_cmap_RdBu: - cmap1 = copy.copy(mpl.cm.RdBu_r) - else: - cmap1 = copy.copy(WhGrYlRd) - cmap1.set_bad("gray") - - cmap_plot = copy.copy(mpl.cm.RdBu_r) - cmap_plot.set_bad(color="gray") - - # ============================================================== - # Set titles for plots - # ============================================================== - - if refgridtype == "ll": - ref_title = f"{refstr} (Ref){subtitle_extra}\n{refres}" - else: - ref_title = f"{refstr} (Ref){subtitle_extra}\n{cmpres} regridded from c{refres}" - - if devgridtype == "ll": - dev_title = f"{devstr} (Dev){subtitle_extra}\n{devres}" - else: - dev_title = f"{devstr} (Dev){subtitle_extra}\n{cmpres} regridded from c{devres}" - - if regridany: - absdiff_dynam_title = \ - f"Difference ({cmpres})\nDev - Ref, Dynamic Range" - absdiff_fixed_title = \ - f"Difference ({cmpres})\nDev - Ref, Restricted Range [5%,95%]" - if diff_of_diffs: - fracdiff_dynam_title = \ - f"Difference ({cmpres}), Dynamic Range\n{frac_devstr} - {frac_refstr}" - fracdiff_fixed_title = \ - f"Difference ({cmpres}), Restricted Range [5%,95%]\n{frac_devstr} - {frac_refstr}" - else: - fracdiff_dynam_title = \ - f"Ratio ({cmpres})\nDev/Ref, Dynamic Range" - fracdiff_fixed_title = \ - f"Ratio ({cmpres})\nDev/Ref, Fixed Range" - else: - absdiff_dynam_title = "Difference\nDev - Ref, Dynamic Range" - absdiff_fixed_title = \ - "Difference\nDev - Ref, Restricted Range [5%,95%]" - if diff_of_diffs: - fracdiff_dynam_title = \ - f"Difference, Dynamic Range\n{frac_devstr} - {frac_refstr}".\ - fracdiff_fixed_title = \ - f"Difference, Restricted Range [5%,95%]\n{frac_devstr} - {frac_refstr}" - else: - fracdiff_dynam_title = "Ratio \nDev/Ref, Dynamic Range" - fracdiff_fixed_title = "Ratio \nDev/Ref, Fixed Range" - - # ============================================================== - # Bundle variables for 6 parallel plotting calls - # 0 = Ref 1 = Dev - # 2 = Dynamic abs diff 3 = Restricted abs diff - # 4 = Dynamic frac diff 5 = Restricted frac diff - # ============================================================== - - subplots = [ - "ref", "dev", - "dyn_abs_diff", "res_abs_diff", - "dyn_frac_diff", "res_frac_diff", - ] - if diff_of_diffs: - subplots = ["ref", "dev", - "dyn_abs_diff", "res_abs_diff", - "dyn_abs_diff", "res_abs_diff"] - - all_zeros = [ - ref_is_all_zero, - dev_is_all_zero, - absdiff_is_all_zero, - absdiff_is_all_zero, - fracdiff_is_all_zero, - fracdiff_is_all_zero, - ] - - all_nans = [ - ref_is_all_nan, - dev_is_all_nan, - absdiff_is_all_nan, - absdiff_is_all_nan, - fracdiff_is_all_nan, - fracdiff_is_all_nan, - ] - plot_vals = [zm_ref, zm_dev, zm_diff, zm_diff, - zm_fracdiff, zm_fracdiff] - - axs = [ax0, ax1, ax2, ax3, ax4, ax5] - - cmaps = [cmap1, cmap1, cmap_plot, cmap_plot, cmap_plot, cmap_plot] - - rowcols = [(0, 0), (0, 1), (1, 0), (1, 1), (2, 0), (2, 1)] - - titles = [ - ref_title, - dev_title, - absdiff_dynam_title, - absdiff_fixed_title, - fracdiff_dynam_title, - fracdiff_fixed_title, - ] - - grids = [refgrid, devgrid, cmpgrid, cmpgrid, cmpgrid, cmpgrid] - - if refgridtype != "ll": - grids[0] = cmpgrid - if devgridtype != "ll": - grids[1] = cmpgrid - extents = [None, None, None, None, None, None] - - masked = ["ZM", "ZM", "ZM", "ZM", "ZM", "ZM"] - - unit_list = [ref_units[ivar], dev_units[ivar], cmn_units, cmn_units, - "unitless", "unitless"] - - other_all_nans = [dev_is_all_nan, ref_is_all_nan, - False, False, False, False] - - gridtypes = [ - cmpgridtype, - cmpgridtype, - cmpgridtype, - cmpgridtype, - cmpgridtype, - cmpgridtype, - ] - - pedges = [ref_pedge, dev_pedge, pedge, pedge, pedge, pedge] - - pedge_inds = [ref_pedge_ind, dev_pedge_ind, pedge_ind, - pedge_ind, pedge_ind, pedge_ind] - - mins = [vmin_ref, vmin_dev, vmin_abs] - maxs = [vmax_ref, vmax_dev, vmax_abs] - - ratio_logs = [False, False, False, False, True, True] - # Plot - for i in range(6): - six_plot( - subplots[i], - all_zeros[i], - all_nans[i], - plot_vals[i], - grids[i], - axs[i], - rowcols[i], - titles[i], - cmaps[i], - unit_list[i], - extents[i], - masked[i], - other_all_nans[i], - gridtypes[i], - mins, - maxs, - use_cmap_RdBu, - match_cbar, - verbose, - log_color_scale, - pedges[i], - pedge_inds[i], - log_yaxis, - plot_type="zonal_mean", - xtick_positions=xtick_positions, - xticklabels=xticklabels, - ratio_log=ratio_logs[i], - **extra_plot_args - ) - - # ============================================================== - # Add this page of 6-panel plots to the PDF file - # ============================================================== - if savepdf: - folders = pdfname.split('/') - pdfname_temp = folders[-1] + "BENCHMARKFIGCREATION.pdf" + str(ivar) - full_path = temp_dir - for folder in folders[:-1]: - full_path = os.path.join(full_path, folder) - if not os.path.isdir(full_path): - try: - os.mkdir(full_path) - except FileExistsError: - pass - pdf = PdfPages(os.path.join(full_path, pdfname_temp)) - pdf.savefig(figs) - pdf.close() - plt.close(figs) - # ============================================================== - # Update the list of variables with significant differences. - # Criterion: abs(1 - max(fracdiff)) > 0.1 - # Do not include NaNs in the criterion, because these indicate - # places where fracdiff could not be computed (div-by-zero). - # ============================================================== - if np.abs(1 - np.nanmax(zm_fracdiff)) > 0.1: - sigdiff_list.append(varname) - return varname - return "" - - # ================================================================== - # Call figure generation function in a parallel loop over variables - # ================================================================== - # do not attempt nested thread parallelization due to issues with matplotlib - if current_process().name != "MainProcess": - n_job = 1 - - if not savepdf: - # disable parallel plotting to allow interactive figure plotting - for i in range(n_var): - createfig(i) - - else: - with TemporaryDirectory() as temp_dir: - results = Parallel(n_jobs=n_job)(delayed(createfig)(i, temp_dir) - for i in range(n_var)) - # update sig diffs after parallel calls - if current_process().name == "MainProcess": - for varname in results: - if type(varname) is str: - sigdiff_list.append(varname) - - # ================================================================== - # Finish - # ================================================================== - if verbose: - print("Closed PDF") - merge = PdfFileMerger() - #print("Creating {} for {} variables".format(pdfname, n_var)) - pdf = PdfPages(pdfname) - pdf.close() - for i in range(n_var): - temp_pdfname = pdfname - if pdfname[0] == '/': - temp_pdfname = temp_pdfname[1:] - merge.append( - os.path.join( - str(temp_dir), - temp_pdfname + - "BENCHMARKFIGCREATION.pdf" + - str(i))) - merge.write(pdfname) - merge.close() - warnings.showwarning = _warning_format - - -def normalize_colors(vmin, vmax, is_difference=False, - log_color_scale=False, ratio_log=False): - """ - Normalizes a data range to the colormap range used by matplotlib - functions. For log-color scales, special handling is done to prevent - taking the log of data that is all zeroes. - - Args: - vmin: float - Minimum value of the data range. - vmax: float - Maximum value of the data range. - - Keyword Args (optional): - is_difference: bool - Set this switch to denote that we are using a difference - color scale (i.e. with zero in the middle of the range). - Default value: False - log_color_scale: bool - Logical flag to denote that we are using a logarithmic - color scale instead of a linear color scale. - Default value: False - - Returns: - norm: matplotlib Norm - The normalized matplotlib color range, stored in - a matplotlib Norm object. - - Remarks: - For log color scales, we will use a range of 3 orders of - magnitude (i.e. from vmax/1e3 to vmax). - """ - - # Define class for logarithmic non-symmetric color scheme - class MidpointLogNorm(mcolors.LogNorm): - """ - Class for logarithmic non-symmetric color scheme - """ - def __init__(self, vmin=None, vmax=None, midpoint=None, clip=False): - mcolors.LogNorm.__init__(self, vmin=vmin, vmax=vmax, clip=clip) - self.midpoint = midpoint - - def __call__(self, value, clip=None): - result, _ = self.process_value(value) - x = [np.log(self.vmin), np.log(self.midpoint), np.log(self.vmax)] - y = [0, 0.5, 1] - return np.ma.array(np.interp(np.log(value), x, y), - mask=result.mask, copy=False) - - if (abs(vmin) == 0 and abs(vmax) == 0) or ( - np.isnan(vmin) and np.isnan(vmax)): - # If the data is zero everywhere (vmin=vmax=0) or undefined - # everywhere (vmin=vmax=NaN), then normalize the data range - # so that the color corresponding to zero (white) will be - # placed in the middle of the colorbar, where we will - # add a single tick. - if is_difference: - return mcolors.Normalize(vmin=-1.0, vmax=1.0) - return mcolors.Normalize(vmin=0.0, vmax=1.0) - - # For log color scales, assume a range 3 orders of magnitude - # below the maximum value. Otherwise use a linear scale. - if log_color_scale and not ratio_log: - return mcolors.LogNorm(vmin=vmax / 1e3, vmax=vmax) - if log_color_scale: - return MidpointLogNorm(vmin=vmin, vmax=vmax, midpoint=1) - return mcolors.Normalize(vmin=vmin, vmax=vmax) - - -def single_panel(plot_vals, - ax=None, - plot_type="single_level", - grid={}, - gridtype="", - title="fill", - comap=WhGrYlRd, - norm=[], - unit="", - extent=(None, None, None, None), - masked_data=None, - use_cmap_RdBu=False, - log_color_scale=False, - add_cb=True, - pres_range=[0, 2000], - pedge=np.full((1, 1), -1), - pedge_ind=np.full((1, 1), -1), - log_yaxis=False, - xtick_positions=[], - xticklabels=[], - proj=ccrs.PlateCarree(), - sg_path='', - ll_plot_func="imshow", - vert_params=[[], []], - pdfname="", - weightsdir='.', - vmin=None, - vmax=None, - return_list_of_plots=False, - **extra_plot_args - ): - """ - Core plotting routine -- creates a single plot panel. - - Args: - plot_vals: xarray DataArray or numpy array - Single data variable GEOS-Chem output to plot - - Keyword Args (Optional): - ax: matplotlib axes - Axes object to plot information - Default value: None (Will create a new axes) - plot_type: str - Either "single_level" or "zonal_mean" - Default value: "single_level" - grid: dict - Dictionary mapping plot_vals to plottable coordinates - Default value: {} (will attempt to read grid from plot_vals) - gridtype: str - "ll" for lat/lon or "cs" for cubed-sphere - Default value: "" (will automatically determine from grid) - title: str - Title to put at top of plot - Default value: "fill" (will use name attribute of plot_vals if available) - comap: matplotlib Colormap - Colormap for plotting data values - Default value: WhGrYlRd - norm: list - List with range [0..1] normalizing color range for matplotlib methods - Default value: [] (will determine from plot_vals) - unit: str - Units of plotted data - Default value: "" (will use units attribute of plot_vals if available) - extent: tuple (minlon, maxlon, minlat, maxlat) - Describes minimum and maximum latitude and longitude of input data - Default value: (None, None, None, None) (Will use full extent of plot_vals - if plot is single level. - masked_data: numpy array - Masked area for avoiding near-dateline cubed-sphere plotting issues - Default value: None (will attempt to determine from plot_vals) - use_cmap_RdBu: bool - Set this flag to True to use a blue-white-red colormap - Default value: False - log_color_scale: bool - Set this flag to True to use a log-scale colormap - Default value: False - add_cb: bool - Set this flag to True to add a colorbar to the plot - Default value: True - pres_range: list(int) - Range from minimum to maximum pressure for zonal mean plotting - Default value: [0, 2000] (will plot entire atmosphere) - pedge: numpy array - Edge pressures of vertical grid cells in plot_vals - for zonal mean plotting - Default value: np.full((1, 1), -1) (will determine automatically) - pedge_ind: numpy array - Index of edge pressure values within pressure range in plot_vals - for zonal mean plotting - Default value: np.full((1, 1), -1) (will determine automatically) - log_yaxis: bool - Set this flag to True to enable log scaling of pressure in zonal mean plots - Default value: False - xtick_positions: list(float) - Locations of lat/lon or lon ticks on plot - Default value: [] (will place automatically for zonal mean plots) - xticklabels: list(str) - Labels for lat/lon ticks - Default value: [] (will determine automatically from xtick_positions) - proj: cartopy projection - Projection for plotting data - Default value: ccrs.PlateCarree() - sg_path: str - Path to NetCDF file containing stretched-grid info (in attributes) for plot_vals - Default value: '' (will not be read in) - ll_plot_func: str - Function to use for lat/lon single level plotting with possible values - 'imshow' and 'pcolormesh'. imshow is much faster but is slightly displaced - when plotting from dateline to dateline and/or pole to pole. - Default value: 'imshow' - vert_params: list(AP, BP) of list-like types - Hybrid grid parameter A in hPa and B (unitless). Needed if grid is not 47 or 72 levels. - Default value: [[], []] - pdfname: str - File path to save plots as PDF - Default value: "" (will not create PDF) - weightsdir: str - Directory path for storing regridding weights - Default value: "." (will store regridding files in current directory) - Default value: "" (will not create PDF) - vmin: float - minimum for colorbars - Default value: None (will use plot value minimum) - vmax: float - maximum for colorbars - Default value: None (will use plot value maximum) - return_list_of_plots: bool - Return plots as a list. This is helpful if you are using a cubedsphere grid - and would like access to all 6 plots - Default value: False - extra_plot_args: various - Any extra keyword arguments are passed to calls to pcolormesh() (CS) or imshow() (Lat/Lon). - - Returns: - plot: matplotlib plot - Plot object created from input - """ - - # Eliminate 1D level or time dimensions - plot_vals = plot_vals.squeeze() - data_is_xr = isinstance(plot_vals, xr.DataArray) - if xtick_positions == []: - # if plot_type == "single_level": - # xtick_positions = np.arange(extent[0], extent[1], (extent[1]-extent[0])/12) - if plot_type == "zonal_mean": - xtick_positions = np.arange(-90, 90, 30) - - if xticklabels == []: - xticklabels = [r"{}$\degree$".format(x) for x in xtick_positions] - - if unit == "" and data_is_xr: - try: - unit = plot_vals.units.strip() - except BaseException: - pass - - if title == "fill" and data_is_xr: - try: - title = plot_vals.name - except BaseException: - pass - # Generate grid if not passed - if grid == {}: - res, gridtype = get_input_res(plot_vals) - sg_params = [1, 170, -90] - if sg_path != '': - sg_attrs = xr.open_dataset(sg_path).attrs - sg_params = [ - sg_attrs['stretch_factor'], - sg_attrs['target_longitude'], - sg_attrs['target_latitude']] - - if plot_type == 'single_level': - grid_extent = get_grid_extents(plot_vals) - [grid, _] = call_make_grid(res, gridtype, in_extent=grid_extent, sg_params=sg_params) - - else: # zonal mean - if np.all(pedge_ind == -1) or np.all(pedge == -1): - - # Get mid-point pressure and edge pressures for this grid - pedge, pmid, _ = get_vert_grid(plot_vals, *vert_params) - - # Get indexes of pressure subrange (full range is default) - pedge_ind = get_pressure_indices(pedge, pres_range) - - # Pad edges if subset does not include surface or TOA so data spans - # entire subrange - pedge_ind = pad_pressure_edges( - pedge_ind, plot_vals.sizes["lev"], len(pmid)) - - # pmid indexes do not include last pedge index - pmid_ind = pedge_ind[:-1] - # Convert levels to pressures in ref and dev data - plot_vals = convert_lev_to_pres(plot_vals, pmid, pedge) - # get proper levels - plot_vals = plot_vals.isel(lev=pmid_ind) - - [input_res, input_gridtype, _, _, - _, new_gridtype, regrid, _, _, _, _, - grid, regridder, _, regridder_list, _] = create_regridders( - plot_vals, - plot_vals, - weightsdir=weightsdir, - cmpres=None, - zm=True, - sg_ref_params=sg_params - ) - if gridtype == 'cs': - plot_vals = reshape_MAPL_CS(plot_vals) - nlev = len(plot_vals['lev']) - # Ref - plot_vals = regrid_comparison_data( - plot_vals, - input_res, - regrid, - regridder, - regridder_list, - grid, - input_gridtype, - new_gridtype, - nlev=nlev - ) - # average across longitude bands - # assume lon dim is index 2 (no time dim) if a numpy array is passed - lon_ind = 2 - if isinstance(plot_vals, xr.DataArray): - lon_ind = plot_vals.dims.index('lon') - # calculate zonal means - plot_vals = plot_vals.mean(axis=lon_ind) - if gridtype == "": - _, gridtype = get_input_res(plot_vals) - if extent == (None, None, None, None) or extent is None: - extent = get_grid_extents(grid) - # convert to -180 to 180 grid if needed (necessary if going - # cross-dateline later) - if extent[0] > 180 or extent[1] > 180: - #extent = [((extent[0]+180)%360)-180, ((extent[1]+180)%360)-180, extent[2], extent[3]] - extent = [extent[0] - 180, extent[1] - 180, extent[2], extent[3]] - ''' - if extent[0] < -180 and 'x' in res: - lon_res = float(res.split('x')[1]) - extent = [180, - if extent[1] > 180 and 'x' in res: - extent[1] = 180 - ''' - # Account for cross-dateline extent - if extent[0] > extent[1]: - if gridtype == "ll": - # rearrange data with dateline in the middle instead of prime meridian - # change extent / grid to where dateline is 0, prime meridian is -180 / 180 - # needed for numpy arrays if doing pcolormesh / imshow, and xarray DataArrays - # if using imshow - proj = ccrs.PlateCarree(central_longitude=180) - if ll_plot_func == "imshow" or \ - not isinstance(plot_vals, xr.DataArray): - i = 0 - while grid['lon_b'][i] < 0: - i = i+1 - plot_vals_holder = copy.deepcopy(plot_vals) - if not isinstance(plot_vals, xr.DataArray): - plot_vals_holder[:,:-i] = plot_vals[:,i:] - plot_vals_holder[:,-i:] = plot_vals[:,:i] - else: - plot_vals_holder.values[:,:-i] = plot_vals.values[:,i:] - plot_vals_holder.values[:,-i:] = plot_vals.values[:,:i] - plot_vals = plot_vals_holder - extent[0] = extent[0] % 360 - 180 - extent[1] = extent[1] % 360 - 180 - grid["lon_b"] = grid["lon_b"] % 360 - 180 - grid["lon"] = grid["lon"] % 360 - 180 - if isinstance(plot_vals, xr.DataArray): - plot_vals['lon'] = plot_vals['lon'] % 360 - 180 - # realign grid also if doing imshow or using numpy arrays - if ll_plot_func == "imshow" or \ - not isinstance(plot_vals, xr.DataArray): - temp_grid = copy.deepcopy(grid) - temp_grid['lon_b'][:-i] = grid['lon_b'][i:] - temp_grid['lon_b'][-i:] = grid['lon_b'][:i] - temp_grid['lon'][:-i] = grid['lon'][i:] - temp_grid['lon'][-i:] = grid['lon'][:i] - grid = temp_grid - if isinstance(plot_vals, xr.DataArray): - plot_vals = plot_vals.assign_coords({'lon' : grid['lon']}) - if gridtype == "cs": - proj = ccrs.PlateCarree(central_longitude=180) - extent[0] = extent[0] % 360 - 180 - extent[1] = extent[1] % 360 - 180 - grid["lon_b"] = grid["lon_b"] % 360 - 180 - grid["lon"] = grid["lon"] % 360 - 180 - - if ax is None: - if plot_type == "zonal_mean": - ax = plt.axes() - if plot_type == "single_level": - ax = plt.axes(projection=proj) - - fig = plt.gcf() - data_is_xr = isinstance(plot_vals, xr.DataArray) - # Normalize colors (put into range [0..1] for matplotlib methods) - if norm == []: - if data_is_xr: - vmin = plot_vals.data.min() if vmin is None else vmin - vmax = plot_vals.data.max() if vmax is None else vmax - elif isinstance(plot_vals, np.ndarray): - vmin = np.min(plot_vals) if vmin is None else vmin - vmax = np.max(plot_vals) if vmax is None else vmax - norm = normalize_colors( - vmin, - vmax, - is_difference=use_cmap_RdBu, - log_color_scale=log_color_scale) - - # Create plot - ax.set_title(title) - if plot_type == "zonal_mean": - # Zonal mean plot - plot = ax.pcolormesh( - grid["lat_b"], - pedge[pedge_ind], - plot_vals, - cmap=comap, - norm=norm, - **extra_plot_args) - ax.set_aspect("auto") - ax.set_ylabel("Pressure (hPa)") - if log_yaxis: - ax.set_yscale("log") - ax.yaxis.set_major_formatter( - mticker.FuncFormatter(lambda y, _: "{:g}".format(y)) - ) - ax.invert_yaxis() - ax.set_xticks(xtick_positions) - ax.set_xticklabels(xticklabels) - - elif gridtype == "ll": - if ll_plot_func == 'imshow': - # Lat/Lon single level - [minlon, maxlon, minlat, maxlat] = extent - # expand extent to minimize imshow distortion - #[dlat,dlon] = list(map(float, res.split('x'))) - dlon = grid['lon'][2] - grid['lon'][1] - dlat = grid['lat'][2] - grid['lat'][1] - - def get_nearest_extent(val, array, direction, spacing): - # choose nearest values in grid to desired extent to minimize distortion - grid_vals = np.asarray(array) - diff = grid_vals - val - if direction == 'greater': - diff[diff < 0] = np.inf - i = diff.argmin() - if diff[i] == np.inf: - # expand extent to value beyond grid limits if extent - # is already > max grid value - return grid_vals[(np.abs(grid_vals - val)).argmin()] - return grid_vals[i] - else: - diff[diff > 0] = -np.inf - i = diff.argmax() - if diff[i] == -np.inf: - # expand extent to value beyond grid limits if extent is already < min grid value - # plot will be distorted if full global to avoid - # cartopy issues - return grid_vals[( - np.abs(grid_vals - val)).argmin()] - spacing - return max(grid_vals[i], -180) - closest_minlon = get_nearest_extent( - minlon, grid['lon_b'], 'less', dlon) - closest_maxlon = get_nearest_extent( - maxlon, grid['lon_b'], 'greater', dlon) - # don't adjust if extent includes poles where points are not evenly - # spaced anyway - if np.abs( - grid['lat_b'][0] - - grid['lat_b'][1]) != np.abs( - grid['lat_b'][1] - - grid['lat_b'][2]) and minlat < grid['lat_b'][1]: - closest_minlat = grid['lat_b'][0] - else: - closest_minlat = get_nearest_extent( - minlat, grid['lat_b'], 'less', dlat) - - if np.abs(grid['lat_b'][-1] - grid['lat_b'][-2]) != np.abs(grid['lat_b'] - [-2] - grid['lat_b'][-3]) and maxlat > grid['lat_b'][-2]: - closest_maxlat = grid['lat_b'][-1] - else: - closest_maxlat = get_nearest_extent( - maxlat, grid['lat_b'], 'greater', dlat) - - extent = [ - closest_minlon, - closest_maxlon, - closest_minlat, - closest_maxlat] - if isinstance(plot_vals, xr.DataArray): - # filter data by bounds of extent - plot_vals = plot_vals.where( - plot_vals.lon > closest_minlon, - drop=True).where( - plot_vals.lon < closest_maxlon, - drop=True).where( - plot_vals.lat > minlat, - drop=True).where( - plot_vals.lat < maxlat, - drop=True) - else: - # filter data by indices of grid - minlon_i = np.where(grid['lon_b']==closest_minlon)[0] - if len(minlon_i) == 0: - minlon_i = 0 - else: - minlon_i = int(minlon_i) - maxlon_i = np.where(grid['lon_b']==closest_maxlon)[0] - if len(maxlon_i) == 0: - maxlon_i = -1 - else: - maxlon_i = int(maxlon_i) - minlat_i = np.where(grid['lat_b']==closest_minlat)[0] - if len(minlat_i) == 0: - minlat_i = 0 - else: - minlat_i = int(minlat_i) - maxlat_i = np.where(grid['lat_b']==closest_maxlat)[0] - if len(maxlat_i) == 0: - maxlat_i = -1 - else: - maxlat_i = int(maxlat_i) - plot_vals = plot_vals[minlat_i:maxlat_i+1, - minlon_i:maxlon_i+1] - # Create a lon/lat plot - plot = ax.imshow( - plot_vals, - extent=extent, - transform=proj, - cmap=comap, - norm=norm, - origin='lower', - interpolation='nearest', - **extra_plot_args - ) - else: - plot = ax.pcolormesh( - grid["lon_b"], - grid["lat_b"], - plot_vals, - transform=proj, - cmap=comap, - norm=norm, - **extra_plot_args - ) - ax.set_extent(extent, crs=proj) - ax.coastlines() - ax.set_xticks(xtick_positions) - ax.set_xticklabels(xticklabels) - - else: - # Cubed-sphere single level - try: - if masked_data is None: - masked_data = np.ma.masked_where( - np.abs( - grid["lon"] - - 180) < 2, - plot_vals.data.reshape( - 6, - res, - res)) - except ValueError: - # Comparison of numpy arrays throws errors - pass - [minlon, maxlon, minlat, maxlat] = extent - # Catch issue with plots extending into both the western and eastern - # hemisphere - if np.max(grid["lon_b"] > 180): - grid["lon_b"] = (((grid["lon_b"] + 180) % 360) - 180) - - plots = [] - for j in range(6): - plot = ax.pcolormesh( - grid["lon_b"][j, :, :], - grid["lat_b"][j, :, :], - masked_data[j, :, :], - transform=proj, - cmap=comap, - norm=norm, - **extra_plot_args - ) - plots.append(plot) - ax.set_extent(extent, crs=proj) - ax.coastlines() - ax.set_xticks(xtick_positions) - ax.set_xticklabels(xticklabels) - - if add_cb: - cb = plt.colorbar(plot, ax=ax, orientation="horizontal", pad=0.10) - cb.mappable.set_norm(norm) - if data_is_xr: - all_zero, all_nan = all_zero_or_nan(plot_vals.values) - else: - all_zero, all_nan = all_zero_or_nan(plot_vals) - if all_zero or all_nan: - if use_cmap_RdBu: - cb.set_ticks([0.0]) - else: - cb.set_ticks([0.5]) - if all_nan: - cb.set_ticklabels(["Undefined throughout domain"]) - else: - cb.set_ticklabels(["Zero throughout domain"]) - else: - if log_color_scale: - cb.formatter = mticker.LogFormatter(base=10) - else: - if (vmax - vmin) < 0.1 or (vmax - vmin) > 100: - cb.locator = mticker.MaxNLocator(nbins=4) - - try: - cb.formatter.set_useOffset(False) - except BaseException: - # not all automatically chosen colorbar formatters properly handle - # the above method - pass - cb.update_ticks() - cb.set_label(unit) - - if pdfname != "": - pdf = PdfPages(pdfname) - pdf.savefig(fig) - pdf.close() - - # in some cases users may wish to get a list of all associated plots - # eg. cubedsphere grids have six plots associated with them - if return_list_of_plots: - return plots if 'plots' in locals() else [plot] - return plot diff --git a/gcpy/plot/__init__.py b/gcpy/plot/__init__.py new file mode 100644 index 00000000..c7cda50a --- /dev/null +++ b/gcpy/plot/__init__.py @@ -0,0 +1,8 @@ +""" +GCPy import script +""" +from .compare_single_level import * +from .compare_zonal_mean import * +from .core import * +from .single_panel import * +from .six_plot import * diff --git a/gcpy/colormaps/WhGrYlRd.txt b/gcpy/plot/colormaps/WhGrYlRd.txt similarity index 100% rename from gcpy/colormaps/WhGrYlRd.txt rename to gcpy/plot/colormaps/WhGrYlRd.txt diff --git a/gcpy/plot/compare_single_level.py b/gcpy/plot/compare_single_level.py new file mode 100644 index 00000000..5a9db5ef --- /dev/null +++ b/gcpy/plot/compare_single_level.py @@ -0,0 +1,1186 @@ +""" +compare_single_level.py: Function to create a six-panel plot comparing +quantities at a single model level for two different model versions. +Called from the GEOS-Chem benchmarking scripts and from the +compare_diags.py example script. +""" +import os +import copy +import warnings +from multiprocessing import current_process +from tempfile import TemporaryDirectory +import matplotlib as mpl +import matplotlib.pyplot as plt +from matplotlib.backends.backend_pdf import PdfPages +import numpy as np +import xarray as xr +import cartopy.crs as ccrs +from joblib import Parallel, delayed +from pypdf import PdfMerger +from gcpy.grid import get_grid_extents, call_make_grid +from gcpy.regrid import regrid_comparison_data, create_regridders +from gcpy.util import reshape_MAPL_CS, get_diff_of_diffs, \ + all_zero_or_nan, slice_by_lev_and_time, compare_varnames, \ + read_config_file, verify_variable_type +from gcpy.units import check_units, data_unit_is_mol_per_mol +from gcpy.constants import MW_AIR_g +from gcpy.plot.core import gcpy_style, six_panel_subplot_names, \ + _warning_format, WhGrYlRd +from gcpy.plot.six_plot import six_plot + +# Suppress numpy divide by zero warnings to prevent output spam +np.seterr(divide="ignore", invalid="ignore") + +# Use a style sheet to control plot attributes +plt.style.use(gcpy_style) + + +def compare_single_level( + refdata, + refstr, + devdata, + devstr, + varlist=None, + ilev=0, + itime=0, + refmet=None, + devmet=None, + weightsdir='.', + pdfname="", + cmpres=None, + match_cbar=True, + normalize_by_area=False, + enforce_units=True, + convert_to_ugm3=False, + flip_ref=False, + flip_dev=False, + use_cmap_RdBu=False, + verbose=False, + log_color_scale=False, + extra_title_txt=None, + extent=None, + n_job=-1, + sigdiff_list=None, + second_ref=None, + second_dev=None, + spcdb_dir=os.path.dirname(__file__), + sg_ref_path='', + sg_dev_path='', + ll_plot_func='imshow', + **extra_plot_args +): + """ + Create single-level 3x2 comparison map plots for variables common + in two xarray Datasets. Optionally save to PDF. + + Args: + refdata: xarray dataset + Dataset used as reference in comparison + refstr: str + String description for reference data to be used in plots + devdata: xarray dataset + Dataset used as development in comparison + devstr: str + String description for development data to be used in plots + + Keyword Args (optional): + varlist: list of strings + List of xarray dataset variable names to make plots for + Default value: None (will compare all common variables) + ilev: integer + Dataset level dimension index using 0-based system. + Indexing is ambiguous when plotting differing vertical grids + Default value: 0 + itime: integer + Dataset time dimension index using 0-based system + Default value: 0 + refmet: xarray dataset + Dataset containing ref meteorology + Default value: None + devmet: xarray dataset + Dataset containing dev meteorology + Default value: None + weightsdir: str + Directory path for storing regridding weights + Default value: None (will create/store weights in + current directory) + pdfname: str + File path to save plots as PDF + Default value: Empty string (will not create PDF) + cmpres: str + String description of grid resolution at which + to compare datasets + Default value: None (will compare at highest resolution + of ref and dev) + match_cbar: bool + Set this flag to True if you wish to use the same colorbar + bounds for the Ref and Dev plots. + Default value: True + normalize_by_area: bool + Set this flag to True if you wish to normalize the Ref + and Dev raw data by grid area. Input ref and dev datasets + must include AREA variable in m2 if normalizing by area. + Default value: False + enforce_units: bool + Set this flag to True to force an error if Ref and Dev + variables have different units. + Default value: True + convert_to_ugm3: bool + Whether to convert data units to ug/m3 for plotting. + Default value: False + flip_ref: bool + Set this flag to True to flip the vertical dimension of + 3D variables in the Ref dataset. + Default value: False + flip_dev: bool + Set this flag to True to flip the vertical dimension of + 3D variables in the Dev dataset. + Default value: False + use_cmap_RdBu: bool + Set this flag to True to use a blue-white-red colormap + for plotting the raw data in both the Ref and Dev datasets. + Default value: False + verbose: bool + Set this flag to True to enable informative printout. + Default value: False + log_color_scale: bool + Set this flag to True to plot data (not diffs) + on a log color scale. + Default value: False + extra_title_txt: str + Specifies extra text (e.g. a date string such as "Jan2016") + for the top-of-plot title. + Default value: None + extent: list + Defines the extent of the region to be plotted in form + [minlon, maxlon, minlat, maxlat]. + Default value plots extent of input grids. + Default value: [-1000, -1000, -1000, -1000] + n_job: int + Defines the number of simultaneous workers for parallel + plotting. Set to 1 to disable parallel plotting. + Value of -1 allows the application to decide. + Default value: -1 + sigdiff_list: list of str + Returns a list of all quantities having significant + differences (where |max(fractional difference)| > 0.1). + Default value: None + second_ref: xarray Dataset + A dataset of the same model type / grid as refdata, + to be used in diff-of-diffs plotting. + Default value: None + second_dev: xarray Dataset + A dataset of the same model type / grid as devdata, + to be used in diff-of-diffs plotting. + Default value: None + spcdb_dir: str + Directory containing species_database.yml file. + Default value: Path of GCPy code repository + sg_ref_path: str + Path to NetCDF file containing stretched-grid info + (in attributes) for the ref dataset + Default value: '' (will not be read in) + sg_dev_path: str + Path to NetCDF file containing stretched-grid info + (in attributes) for the dev dataset + Default value: '' (will not be read in) + ll_plot_func: str + Function to use for lat/lon single level plotting with + possible values 'imshow' and 'pcolormesh'. imshow is much + faster but is slightly displaced when plotting from + dateline to dateline and/or pole to pole. + Default value: 'imshow' + extra_plot_args: various + Any extra keyword arguments are passed through the + plotting functions to be used in calls to pcolormesh() (CS) + or imshow() (Lat/Lon). + """ + warnings.showwarning = _warning_format + # Error check arguments + verify_variable_type(refdata, xr.Dataset) + verify_variable_type(devdata, xr.Dataset) + + # Create empty lists for keyword arguments + if extent is None: + extent = [-1000, -1000, -1000, -1000] + if sigdiff_list is None: + sigdiff_list = [] + + # Determine if doing diff-of-diffs + diff_of_diffs = second_ref is not None and second_dev is not None + + # Prepare diff-of-diffs datasets if needed + if diff_of_diffs: + refdata, devdata = refdata.load(), devdata.load() + second_ref, second_dev = second_ref.load(), second_dev.load() + +# # If needed, use fake time dim in case dates are different +# # in datasets. This needs more work for case of single versus +# # multiple times. +# aligned_time = [np.datetime64('2000-01-01')] * refdata.dims['time'] +# refdata = refdata.assign_coords({'time': aligned_time}) +# devdata = devdata.assign_coords({'time': aligned_time}) +# second_ref = second_ref.assign_coords({'time': aligned_time}) +# second_dev = second_dev.assign_coords({'time': aligned_time}) + + refdata, fracrefdata = get_diff_of_diffs(refdata, second_ref) + devdata, fracdevdata = get_diff_of_diffs(devdata, second_dev) + frac_refstr = 'GCC_dev / GCC_ref' + frac_devstr = 'GCHP_dev / GCHP_ref' + # If no varlist is passed, plot all (surface only for 3D) + if varlist is None: + quiet = not verbose + vardict = compare_varnames(refdata, devdata, quiet=quiet) + varlist = vardict["commonvars3D"] + vardict["commonvars2D"] + print("Plotting all common variables") + n_var = len(varlist) + + # If no PDF name passed, then do not save to PDF + savepdf = True + if pdfname == "": + savepdf = False + if convert_to_ugm3: + properties = read_config_file( + os.path.join( + spcdb_dir, + "species_database.yml" + ), + quiet=True + ) + + sg_ref_params = [1, 170, -90] + sg_dev_params = [1, 170, -90] + # Get stretched-grid info if passed + if sg_ref_path != '': + sg_ref_attrs = xr.open_dataset(sg_ref_path).attrs + sg_ref_params = [ + sg_ref_attrs['stretch_factor'], + sg_ref_attrs['target_longitude'], + sg_ref_attrs['target_latitude']] + + if sg_dev_path != '': + sg_dev_attrs = xr.open_dataset(sg_dev_path).attrs + sg_dev_params = [ + sg_dev_attrs['stretch_factor'], + sg_dev_attrs['target_longitude'], + sg_dev_attrs['target_latitude']] + + # Get grid info and regrid if necessary + [refres, refgridtype, devres, devgridtype, cmpres, cmpgridtype, regridref, + regriddev, regridany, refgrid, devgrid, cmpgrid, refregridder, + devregridder, refregridder_list, devregridder_list] = create_regridders( + refdata, + devdata, + weightsdir, + cmpres=cmpres, + sg_ref_params=sg_ref_params, + sg_dev_params=sg_dev_params + ) + + # ============================================================== + # Handle grid extents for lat-lon grids + # ============================================================== + + # Get lat/lon extents, if applicable + refminlon, refmaxlon, refminlat, refmaxlat = get_grid_extents(refgrid) + devminlon, devmaxlon, devminlat, devmaxlat = get_grid_extents(devgrid) + + if -1000 not in extent: + cmpminlon, cmpmaxlon, cmpminlat, cmpmaxlat = extent + else: + # Account for 0-360 coordinate scale + uniform_refminlon, uniform_refmaxlon = refminlon, refmaxlon + uniform_devminlon, uniform_devmaxlon = devminlon, devmaxlon + if uniform_refmaxlon > 185: + uniform_refminlon, uniform_refmaxlon = -180, 180 + if uniform_devmaxlon > 185: + uniform_devminlon, uniform_devmaxlon = -180, 180 + + cmpminlon, cmpmaxlon, cmpminlat, cmpmaxlat = \ + [np.max([(uniform_refminlon+180%360)-180, uniform_devminlon]), + np.min([uniform_refmaxlon, uniform_devmaxlon]), + np.max([refminlat, devminlat]), + np.min([refmaxlat, devmaxlat])] + + # Set plot bounds for non cubed-sphere regridding and plotting + # Pylint says ref_extent and dev_extent are not used + # -- Bob Yantosca (15 Aug 2023) + #ref_extent = (refminlon, refmaxlon, refminlat, refmaxlat) + #dev_extent = (devminlon, devmaxlon, devminlat, devmaxlat) + cmp_extent = (cmpminlon, cmpmaxlon, cmpminlat, cmpmaxlat) + # ============================================================== + # Loop over all variables + # ============================================================== + ds_refs = [None] * n_var + frac_ds_refs = [None] * n_var + ds_devs = [None] * n_var + frac_ds_devs = [None] * n_var + for i in range(n_var): + varname = varlist[i] + # ============================================================== + # Slice the data, allowing for no time dimension (bpch) + # ============================================================== + # Ref + ds_refs[i] = slice_by_lev_and_time( + refdata, + varname, + itime, + ilev, + flip_ref + ) + if diff_of_diffs: + frac_ds_refs[i] = slice_by_lev_and_time( + fracrefdata, + varname, + itime, + ilev, + flip_ref + ) + # Dev + ds_devs[i] = slice_by_lev_and_time( + devdata, + varname, + itime, + ilev, + flip_dev + ) + if diff_of_diffs: + frac_ds_devs[i] = slice_by_lev_and_time( + fracdevdata, + varname, + itime, + ilev, + flip_dev + ) + + # ================================================================== + # Handle units as needed + # ================================================================== + + # Convert to ppb if units string is variation of mol/mol + if data_unit_is_mol_per_mol(ds_refs[i]): + ds_refs[i].values = ds_refs[i].values * 1e9 + ds_refs[i].attrs["units"] = "ppb" + if data_unit_is_mol_per_mol(ds_devs[i]): + ds_devs[i].values = ds_devs[i].values * 1e9 + ds_devs[i].attrs["units"] = "ppb" + + # If units string is ppbv (true for bpch data) then rename units + if ds_refs[i].units.strip() == "ppbv": + ds_refs[i].attrs["units"] = "ppb" + if ds_devs[i].units.strip() == "ppbv": + ds_devs[i].attrs["units"] = "ppb" + + # If units string is W/m2 (may be true for bpch data) then rename units + if ds_refs[i].units.strip() == "W/m2": + ds_refs[i].attrs["units"] = "W m-2" + if ds_devs[i].units.strip() == "W/m2": + ds_devs[i].attrs["units"] = "W m-2" + + # If units string is UNITLESS (may be true for bpch data) then rename + # units + if ds_refs[i].units.strip() == "UNITLESS": + ds_refs[i].attrs["units"] = "1" + if ds_devs[i].units.strip() == "UNITLESS": + ds_devs[i].attrs["units"] = "1" + + # Compare units of ref and dev. The check_units function will throw an + # error if units do not match and enforce_units is True. + check_units(ds_refs[i], ds_devs[i], enforce_units) + + # Convert from ppb to ug/m3 if convert_to_ugm3 is passed as true + if convert_to_ugm3: + + # Error checks: must pass met, not normalize by area, and be in ppb + if refmet is None or devmet is None: + msg = "Met mata ust be passed to convert units to ug/m3." + raise ValueError(msg) + if normalize_by_area: + msg = "Normalizing by area is not allowed if plotting ug/m3" + raise ValueError(msg) + if ds_refs[i].units != "ppb" or ds_devs[i].units != "ppb": + msg = "Units must be mol/mol if converting to ug/m3." + raise ValueError(msg) + + # Slice air density data by lev and time + # (assume same format and dimensions as refdata and devdata) + ref_airden = slice_by_lev_and_time( + refmet, + "Met_AIRDEN", + itime, + ilev, + False + ) + dev_airden = slice_by_lev_and_time( + devmet, + "Met_AIRDEN", + itime, + ilev, + False + ) + + # Get a list of properties for the given species + spc_name = varname.replace(varname.split("_")[0] + "_", "") + species_properties = properties.get(spc_name) + + # If no properties are found, then exit with an error. + # Otherwise, get the molecular weight in g/mol. + if species_properties is None: + # Hack lumped species until we implement a solution + if spc_name in ["Simple_SOA", "Complex_SOA"]: + spc_mw_g = 150.0 + else: + msg = f"No properties found for {spc_name}. Cannot convert" \ + + " to ug/m3." + raise ValueError(msg) + else: + spc_mw_g = species_properties.get("MW_g") + if spc_mw_g is None: + msg = f"Molecular weight not found for species {spc_name}!" \ + + " Cannot convert to ug/m3." + raise ValueError(msg) + + # Convert values from ppb to ug/m3: + # ug/m3 = mol/mol * mol/g air * kg/m3 air * 1e3g/kg + # * g/mol spc * 1e6ug/g + # = ppb * air density * (spc MW / air MW) + ds_refs[i].values = ds_refs[i].values * ref_airden.values \ + * (spc_mw_g / MW_AIR_g) + ds_devs[i].values = ds_devs[i].values * dev_airden.values \ + * (spc_mw_g / MW_AIR_g) + + # Update units string + ds_refs[i].attrs["units"] = "\u03BCg/m3" # ug/m3 using mu + ds_devs[i].attrs["units"] = "\u03BCg/m3" + + # ================================================================== + # Get the area variables if normalize_by_area=True. They can be + # either in the main datasets as variable AREA or in the optionally + # passed meteorology datasets as Met_AREAM2. + # ================================================================== + if normalize_by_area: + # ref + if "AREA" in refdata.data_vars.keys(): + ref_area = refdata["AREA"] + elif refmet is not None: + if "Met_AREAM2" in refmet.data_vars.keys(): + ref_area = refmet["Met_AREAM2"] + else: + msg = "normalize_by_area = True but AREA not " \ + + "present in the Ref dataset and ref met with Met_AREAM2" \ + + " not passed!" + raise ValueError(msg) + if "time" in ref_area.dims: + ref_area = ref_area.isel(time=0) + if refgridtype == 'cs': + ref_area = reshape_MAPL_CS(ref_area) + + # dev + if "AREA" in devdata.data_vars.keys(): + dev_area = devdata["AREA"] + elif devmet is not None: + if "Met_AREAM2" in devmet.data_vars.keys(): + dev_area = devmet["Met_AREAM2"] + else: + msg = "normalize_by_area = True but AREA not " \ + + "present in the Dev dataset and dev met with Met_AREAM2" \ + | " not passed!" + raise ValueError(msg) + if "time" in dev_area.dims: + dev_area = dev_area.isel(time=0) + if devgridtype == 'cs': + dev_area = reshape_MAPL_CS(dev_area) + + # Make sure the areas do not have a lev dimension + if "lev" in ref_area.dims: + ref_area = ref_area.isel(lev=0) + if "lev" in dev_area.dims: + dev_area = dev_area.isel(lev=0) + + # ============================================================== + # Reshape cubed sphere data if using MAPL v1.0.0+ + # TODO: update function to expect data in this format + # ============================================================== + + for i in range(n_var): + ds_refs[i] = reshape_MAPL_CS(ds_refs[i]) + ds_devs[i] = reshape_MAPL_CS(ds_devs[i]) + #ds_ref_cmps[i] = reshape_MAPL_CS(ds_ref_cmps[i]) + #ds_dev_cmps[i] = reshape_MAPL_CS(ds_dev_cmps[i]) + if diff_of_diffs: + frac_ds_refs[i] = reshape_MAPL_CS(frac_ds_refs[i]) + frac_ds_devs[i] = reshape_MAPL_CS(frac_ds_devs[i]) + #frac_ds_ref_cmps[i] = reshape_MAPL_CS(frac_ds_ref_cmps[i]) + #frac_ds_dev_cmps[i] = reshape_MAPL_CS(frac_ds_dev_cmps[i]) + + + # ================================================================== + # Create arrays for each variable in Ref and Dev datasets + # and do any necessary horizontal regridding. 'cmp' stands for comparison + # and represents ref and dev data regridded as needed to a common + # grid type and resolution for use in difference and ratio plots. + # ================================================================== + ds_ref_cmps = [None] * n_var + ds_dev_cmps = [None] * n_var + frac_ds_ref_cmps = [None] * n_var + frac_ds_dev_cmps = [None] * n_var + + global_cmp_grid = call_make_grid(cmpres, cmpgridtype)[0] + # Get grid limited to cmp_extent for comparison datasets + # Do not do this for cross-dateline plotting + if cmp_extent[0] < cmp_extent[1]: + regional_cmp_extent = cmp_extent + else: + regional_cmp_extent = [-180, 180, -90, 90] + + regional_cmp_grid = call_make_grid(cmpres, cmpgridtype, + in_extent=[-180,180,-90,90], + out_extent=regional_cmp_extent)[0] + + # Get comparison data extents in same midpoint format as lat-lon grid. + cmp_mid_minlon, cmp_mid_maxlon, cmp_mid_minlat, cmp_mid_maxlat = \ + get_grid_extents(regional_cmp_grid, edges=False) + + cmpminlon_ind = np.where(global_cmp_grid["lon"] >= cmp_mid_minlon)[0][0] + cmpmaxlon_ind = np.where(global_cmp_grid["lon"] <= cmp_mid_maxlon)[0][-1] + cmpminlat_ind = np.where(global_cmp_grid["lat"] >= cmp_mid_minlat)[0][0] + cmpmaxlat_ind = np.where(global_cmp_grid["lat"] <= cmp_mid_maxlat)[0][-1] + + for i in range(n_var): + ds_ref = ds_refs[i] + ds_dev = ds_devs[i] + + # Do area normalization before regridding if normalize_by_area is True. + # Assumes units are the same in ref and dev. If enforce_units is passed + # as false then normalization may not be correct. + if normalize_by_area: + exclude_list = ["WetLossConvFrac", "Prod_", "Loss_"] + if not any(s in varname for s in exclude_list): + ds_ref.values = ds_ref.values / ref_area.values + ds_dev.values = ds_dev.values / dev_area.values + ds_refs[i] = ds_ref + ds_devs[i] = ds_dev + if diff_of_diffs: + frac_ds_refs[i] = frac_ds_refs[i].values / ref_area.values + frac_ds_devs[i] = frac_ds_devs[i].values / dev_area.values + ref_cs_res = refres + dev_cs_res = devres + if cmpgridtype == "cs": + ref_cs_res = cmpres + dev_cs_res = cmpres + # Ref + ds_ref_cmps[i] = regrid_comparison_data( + ds_ref, + ref_cs_res, + regridref, + refregridder, + refregridder_list, + global_cmp_grid, + refgridtype, + cmpgridtype, + cmpminlat_ind, + cmpmaxlat_ind, + cmpminlon_ind, + cmpmaxlon_ind + ) + # Dev + ds_dev_cmps[i] = regrid_comparison_data( + ds_dev, + dev_cs_res, + regriddev, + devregridder, + devregridder_list, + global_cmp_grid, + devgridtype, + cmpgridtype, + cmpminlat_ind, + cmpmaxlat_ind, + cmpminlon_ind, + cmpmaxlon_ind + ) + # Diff of diffs + if diff_of_diffs: + frac_ds_ref_cmps[i] = regrid_comparison_data( + frac_ds_refs[i], + ref_cs_res, + regridref, + refregridder, + refregridder_list, + global_cmp_grid, + refgridtype, + cmpgridtype, + cmpminlat_ind, + cmpmaxlat_ind, + cmpminlon_ind, + cmpmaxlon_ind + ) + frac_ds_dev_cmps[i] = regrid_comparison_data( + frac_ds_devs[i], + dev_cs_res, + regriddev, + devregridder, + devregridder_list, + global_cmp_grid, + devgridtype, + cmpgridtype, + cmpminlat_ind, + cmpmaxlat_ind, + cmpminlon_ind, + cmpmaxlon_ind + ) + # ================================================================= + # Define function to create a single page figure to be called + # in a parallel loop + # ================================================================= + def createfig(ivar, temp_dir=''): + + # Suppress harmless run-time warnings (mostly about underflow) + warnings.filterwarnings('ignore', category=RuntimeWarning) + warnings.filterwarnings('ignore', category=UserWarning) + + if savepdf and verbose: + print(f"{ivar} ", end="") + varname = varlist[ivar] + + ds_ref = ds_refs[ivar] + ds_dev = ds_devs[ivar] + + # ============================================================== + # Set units and subtitle, including modification if normalizing + # area. Note if enforce_units is False (non-default) then + # units on difference plots will be wrong. + # ============================================================== + cmn_units = ds_ref.attrs["units"] + subtitle_extra = "" + if normalize_by_area: + exclude_list = ["WetLossConvFrac", "Prod_", "Loss_"] + if not any(s in varname for s in exclude_list): + if "/" in cmn_units: + cmn_units = f"{cmn_units}/m2" + else: + cmn_units = f"{cmn_units} m-2" + ds_ref.attrs["units"] = cmn_units + ds_dev.attrs["units"] = cmn_units + subtitle_extra = ", Normalized by Area" + + # ============================================================== + # Get comparison data sets, regridding input slices if needed + # ============================================================== + + # Reshape ref/dev cubed sphere data, if any + ds_ref_reshaped = None + if refgridtype == "cs": + ds_ref_reshaped = ds_ref.data.reshape(6, refres, refres) + ds_dev_reshaped = None + if devgridtype == "cs": + ds_dev_reshaped = ds_dev.data.reshape(6, devres, devres) + + ds_ref_cmp = ds_ref_cmps[ivar] + ds_dev_cmp = ds_dev_cmps[ivar] + frac_ds_ref_cmp = frac_ds_ref_cmps[ivar] + frac_ds_dev_cmp = frac_ds_dev_cmps[ivar] + + # Reshape comparison cubed sphere data, if any + if cmpgridtype == "cs": + def call_reshape(cmp_data): + new_data = None + if isinstance(cmp_data, xr.DataArray): + new_data = cmp_data.data.reshape(6, cmpres, cmpres) + elif isinstance(cmp_data, np.ndarray): + new_data = cmp_data.reshape(6, cmpres, cmpres) + return new_data + + ds_ref_cmp_reshaped = call_reshape(ds_ref_cmp) + ds_dev_cmp_reshaped = call_reshape(ds_dev_cmp) + frac_ds_ref_cmp_reshaped = call_reshape(frac_ds_ref_cmp) + frac_ds_dev_cmp_reshaped = call_reshape(frac_ds_dev_cmp) + + # ============================================================== + # Get min and max values for use in the colorbars + # ============================================================== + + # Choose from values within plot extent + if -1000 not in extent: + min_max_extent = extent + else: + min_max_extent = cmp_extent + # Find min and max lon + min_max_minlon = np.min([min_max_extent[0], min_max_extent[1]]) + min_max_maxlon = np.max([min_max_extent[0], min_max_extent[1]]) + min_max_minlat = min_max_extent[2] + min_max_maxlat = min_max_extent[3] + + def get_extent_for_colors(dset, minlon, maxlon, minlat, maxlat): + ds_new = dset.copy() + lat_var='lat' + lon_var='lon' + # Account for cubed-sphere data + if 'lons' in ds_new.coords: + lat_var='lats' + lon_var='lons' + if ds_new['lon'].max() > 190: + minlon=minlon%360 + maxlon=maxlon%360 + # account for global plot + if minlon == maxlon and maxlon == 180: + minlon = 0 + maxlon = 360 + # account for cross dateline + if minlon > maxlon: + minlon, maxlon = maxlon, minlon + + # Add .compute() to force evaluation of ds_new[lon_var] + # See https://github.com/geoschem/gcpy/issues/254 + # Also note: This may return as a dask.array.Array object + return ds_new.where(\ + ds_new[lon_var].compute() >= minlon, drop=True).\ + where(ds_new[lon_var].compute() <= maxlon, drop=True).\ + where(ds_new[lat_var].compute() >= minlat, drop=True).\ + where(ds_new[lat_var].compute() <= maxlat, drop=True) + + ds_ref_reg = get_extent_for_colors( + ds_ref, + min_max_minlon, + min_max_maxlon, + min_max_minlat, + min_max_maxlat + ) + ds_dev_reg = get_extent_for_colors( + ds_dev, + min_max_minlon, + min_max_maxlon, + min_max_minlat, + min_max_maxlat + ) + + # Ref + vmin_ref = float(np.nanmin(ds_ref_reg.data)) + vmax_ref = float(np.nanmax(ds_ref_reg.data)) + + # Dev + vmin_dev = float(np.nanmin(ds_dev_reg.data)) + vmax_dev = float(np.nanmax(ds_dev_reg.data)) + +# Pylint says that these are unused variables, so comment out +# -- Bob Yantosca (15 Aug 2023) +# # Comparison +# if cmpgridtype == "cs": +# vmin_ref_cmp = float(np.nanmin(ds_ref_cmp)) +# vmax_ref_cmp = float(np.nanmax(ds_ref_cmp)) +# vmin_dev_cmp = float(np.nanmin(ds_dev_cmp)) +# vmax_dev_cmp = float(np.nanmax(ds_dev_cmp)) +# vmin_cmp = np.nanmin([vmin_ref_cmp, vmin_dev_cmp]) +# vmax_cmp = np.nanmax([vmax_ref_cmp, vmax_dev_cmp]) +# else: +# vmin_cmp = np.nanmin([np.nanmin(ds_ref_cmp), np.nanmin(ds_dev_cmp)]) +# vmax_cmp = np.nanmax([np.nanmax(ds_ref_cmp), np.nanmax(ds_dev_cmp)]) + + # Get overall min & max + vmin_abs = np.nanmin([vmin_ref, vmin_dev])#, vmin_cmp]) + vmax_abs = np.nanmax([vmax_ref, vmax_dev])#, vmax_cmp]) + # ============================================================== + # Test if Ref and/or Dev contain all zeroes or all NaNs. + # This will have implications as to how we set min and max + # values for the color ranges below. + # ============================================================== + + ref_is_all_zero, ref_is_all_nan = all_zero_or_nan(ds_ref.values) + dev_is_all_zero, dev_is_all_nan = all_zero_or_nan(ds_dev.values) + + # ============================================================== + # Calculate absolute difference + # ============================================================== + if cmpgridtype == "ll": + absdiff = np.array(ds_dev_cmp) - np.array(ds_ref_cmp) + else: + absdiff = ds_dev_cmp_reshaped - ds_ref_cmp_reshaped + # Test if the abs. diff. is zero everywhere or NaN everywhere + absdiff_is_all_zero, absdiff_is_all_nan = all_zero_or_nan(absdiff) + # For cubed-sphere, take special care to avoid a spurious + # boundary line, as described here: https://stackoverflow.com/ + # questions/46527456/preventing-spurious-horizontal-lines-for- + # ungridded-pcolormesh-data + if cmpgridtype == "cs": + absdiff = np.ma.masked_where(np.abs(cmpgrid["lon"] - 180) < 2, + absdiff) + + # ============================================================== + # Calculate fractional difference, set divides by zero to NaN + # ============================================================== + if cmpgridtype == "ll": + # Replace fractional difference plots with absolute difference + # of fractional datasets if necessary + if frac_ds_dev_cmp is not None and frac_ds_ref_cmp is not None: + fracdiff = np.array(frac_ds_dev_cmp) - \ + np.array(frac_ds_ref_cmp) + else: + fracdiff = np.abs(np.array(ds_dev_cmp)) / \ + np.abs(np.array(ds_ref_cmp)) + else: + if frac_ds_dev_cmp is not None and frac_ds_ref_cmp is not None: + fracdiff = frac_ds_dev_cmp_reshaped - \ + frac_ds_ref_cmp_reshaped + else: + fracdiff = np.abs(ds_dev_cmp_reshaped) / \ + np.abs(ds_ref_cmp_reshaped) + + # Replace Infinity values with NaN + fracdiff = np.where(np.abs(fracdiff) == np.inf, np.nan, fracdiff) + fracdiff[np.abs(fracdiff > 1e308)] = np.nan + + # Test if the frac. diff. is zero everywhere or NaN everywhere + fracdiff_is_all_zero = not np.any(fracdiff) or \ + (np.nanmin(fracdiff) == 0 and + np.nanmax(fracdiff) == 0) + fracdiff_is_all_nan = np.isnan(fracdiff).all() or ref_is_all_zero + + # For cubed-sphere, take special care to avoid a spurious + # boundary line, as described here: https://stackoverflow.com/ + # questions/46527456/preventing-spurious-horizontal-lines-for- + # ungridded-pcolormesh-data + if cmpgridtype == "cs": + fracdiff = np.ma.masked_where(np.abs(cmpgrid["lon"] - 180) < 2, + fracdiff) + + # ============================================================== + # Create 3x2 figure + # ============================================================== + + # Create figures and axes objects + # Also define the map projection that will be shown + if extent[0] > extent[1]: + proj = ccrs.PlateCarree(central_longitude=180) + else: + proj = ccrs.PlateCarree() + figs, ((ax0, ax1), (ax2, ax3), (ax4, ax5)) = plt.subplots( + 3, 2, figsize=[12, 14], + subplot_kw={"projection": proj} + ) + # Ensure subplots don't overlap when invoking plt.show() + if not savepdf: + plt.subplots_adjust(hspace=0.4) + # Give the figure a title + offset = 0.96 + if "lev" in ds_ref.dims and "lev" in ds_dev.dims: + if ilev == 0: + levstr = "Surface" + elif ilev == 22: + levstr = "500 hPa" + else: + levstr = "Level " + str(ilev - 1) + if extra_title_txt is not None: + figs.suptitle( + f"{varname}, {levstr} ({extra_title_txt})", + y=offset, + ) + else: + figs.suptitle( + f"{varname}, {levstr}", + y=offset + ) + elif ( + "lat" in ds_ref.dims + and "lat" in ds_dev.dims + and "lon" in ds_ref.dims + and "lon" in ds_dev.dims + ): + if extra_title_txt is not None: + figs.suptitle( + f"{varname} ({extra_title_txt})", + y=offset, + ) + else: + figs.suptitle( + f"{varname}", + y=offset) + else: + print(f"Incorrect dimensions for {varname}!") + + # ============================================================== + # Set colormaps for data plots + # + # Use shallow copy (copy.copy() to create color map objects, + # in order to avoid set_bad() from being applied to the base + # color table. See: https://docs.python.org/3/library/copy.html + # ============================================================== + + # Colormaps for 1st row (Ref and Dev) + if use_cmap_RdBu: + cmap_toprow_nongray = copy.copy(mpl.colormaps["RdBu_r"]) + cmap_toprow_gray = copy.copy(mpl.colormaps["RdBu_r"]) + else: + cmap_toprow_nongray = copy.copy(WhGrYlRd) + cmap_toprow_gray = copy.copy(WhGrYlRd) + cmap_toprow_gray.set_bad(color="gray") + + if refgridtype == "ll": + if ref_is_all_nan: + ref_cmap = cmap_toprow_gray + else: + ref_cmap = cmap_toprow_nongray + + if dev_is_all_nan: + dev_cmap = cmap_toprow_gray + else: + dev_cmap = cmap_toprow_nongray + + # Colormaps for 2nd row (Abs. Diff.) and 3rd row (Frac. Diff,) + cmap_nongray = copy.copy(mpl.colormaps["RdBu_r"]) + cmap_gray = copy.copy(mpl.colormaps["RdBu_r"]) + cmap_gray.set_bad(color="gray") + + # ============================================================== + # Set titles for plots + # ============================================================== + + if refgridtype == "ll": + ref_title = f"{refstr} (Ref){subtitle_extra}\n{refres}" + else: + ref_title = f"{refstr} (Ref){subtitle_extra}\nc{refres}" + + if devgridtype == "ll": + dev_title = f"{devstr} (Dev){subtitle_extra}\n{devres}" + else: + dev_title = f"{devstr} (Dev){subtitle_extra}\nc{devres}" + if regridany: + absdiff_dynam_title = \ + f"Difference ({cmpres})\nDev - Ref, Dynamic Range" + absdiff_fixed_title = \ + f"Difference ({cmpres})\nDev - Ref, Restricted Range [5%,95%]" + if diff_of_diffs: + fracdiff_dynam_title = \ + f"Difference ({cmpres}), " + \ + f"Dynamic Range\n{frac_devstr} - {frac_refstr}" + fracdiff_fixed_title = \ + f"Difference ({cmpres}), " + \ + f"Restricted Range [5%,95%]\n{frac_devstr} - {frac_refstr}" + else: + fracdiff_dynam_title = \ + f"Ratio ({cmpres})\nDev/Ref, Dynamic Range" + fracdiff_fixed_title = \ + f"Ratio ({cmpres})\nDev/Ref, Fixed Range" + else: + absdiff_dynam_title = "Difference\nDev - Ref, Dynamic Range" + absdiff_fixed_title = \ + "Difference\nDev - Ref, Restricted Range [5%,95%]" + if diff_of_diffs: + fracdiff_dynam_title = \ + f"Difference, Dynamic Range\n{frac_devstr} - {frac_refstr}" + fracdiff_fixed_title = \ + "Difference, Restricted Range " + \ + f"[5%,95%]\n{frac_devstr} - {frac_refstr}" + else: + fracdiff_dynam_title = "Ratio \nDev/Ref, Dynamic Range" + fracdiff_fixed_title = "Ratio \nDev/Ref, Fixed Range" + + # ============================================================== + # Bundle variables for 6 parallel plotting calls + # 0 = Ref 1 = Dev + # 2 = Dynamic abs diff 3 = Restricted abs diff + # 4 = Dynamic frac diff 5 = Restricted frac diff + # ============================================================== + + subplots = six_panel_subplot_names(diff_of_diffs) + + all_zeros = [ + ref_is_all_zero, + dev_is_all_zero, + absdiff_is_all_zero, + absdiff_is_all_zero, + fracdiff_is_all_zero, + fracdiff_is_all_zero, + ] + + all_nans = [ + ref_is_all_nan, + dev_is_all_nan, + absdiff_is_all_nan, + absdiff_is_all_nan, + fracdiff_is_all_nan, + fracdiff_is_all_nan, + ] + if -1000 not in extent: + extents = [extent[:], extent[:], + extent[:], extent[:], + extent[:], extent[:]] + else: + plot_extent = [np.max([cmp_extent[0], -180]), + np.min([cmp_extent[1], 180]), + cmp_extent[2], cmp_extent[3]] + extents = [plot_extent[:], plot_extent[:], + plot_extent[:], plot_extent[:], + plot_extent[:], plot_extent[:]] + plot_vals = [ds_ref, ds_dev, absdiff, absdiff, fracdiff, fracdiff] + grids = [refgrid, devgrid, regional_cmp_grid.copy(), regional_cmp_grid.copy(), + regional_cmp_grid.copy(), regional_cmp_grid.copy()] + axs = [ax0, ax1, ax2, ax3, ax4, ax5] + rowcols = [(0, 0), (0, 1), (1, 0), (1, 1), (2, 0), (2, 1)] + titles = [ + ref_title, + dev_title, + absdiff_dynam_title, + absdiff_fixed_title, + fracdiff_dynam_title, + fracdiff_fixed_title, + ] + + if refgridtype == "ll": + cmaps = [ref_cmap, dev_cmap, cmap_gray, + cmap_gray, cmap_gray, cmap_gray] + else: + cmaps = [ + cmap_toprow_nongray, + cmap_toprow_nongray, + cmap_nongray, + cmap_nongray, + cmap_nongray, + cmap_nongray, + ] + + ref_masked = None + dev_masked = None + if refgridtype == "cs": + ref_masked = np.ma.masked_where( + np.abs(refgrid["lon"] - 180) < 2, ds_ref_reshaped + ) + if devgridtype == "cs": + dev_masked = np.ma.masked_where( + np.abs(devgrid["lon"] - 180) < 2, ds_dev_reshaped + ) + masked = [ref_masked, dev_masked, absdiff, absdiff, fracdiff, fracdiff] + + gridtypes = [ + refgridtype, + devgridtype, + cmpgridtype, + cmpgridtype, + cmpgridtype, + cmpgridtype, + ] + + unit_list = [ds_ref.units, ds_dev.units, cmn_units, + cmn_units, "unitless", "unitless"] + + other_all_nans = [dev_is_all_nan, ref_is_all_nan, + False, False, False, False] + + mins = [vmin_ref, vmin_dev, vmin_abs] + maxs = [vmax_ref, vmax_dev, vmax_abs] + + ratio_logs = [False, False, False, False, True, True] + + # Plot + for i in range(6): + six_plot( + subplots[i], + all_zeros[i], + all_nans[i], + plot_vals[i], + grids[i], + axs[i], + rowcols[i], + titles[i], + cmaps[i], + unit_list[i], + extents[i], + masked[i], + other_all_nans[i], + gridtypes[i], + mins, + maxs, + use_cmap_RdBu, + match_cbar, + verbose, + log_color_scale, + plot_type="single_level", + ratio_log=ratio_logs[i], + proj=proj, + ll_plot_func=ll_plot_func, + **extra_plot_args + ) + + + # ============================================================== + # Add this page of 6-panel plots to a PDF file + # ============================================================== + if savepdf: + folders = pdfname.split('/') + pdfname_temp = folders[-1] + "BENCHMARKFIGCREATION.pdf" + str(ivar) + full_path = temp_dir + for folder in folders[:-1]: + full_path = os.path.join(full_path, folder) + if not os.path.isdir(full_path): + try: + os.mkdir(full_path) + except FileExistsError: + pass + pdf = PdfPages(os.path.join(full_path, pdfname_temp)) + pdf.savefig(figs) + pdf.close() + plt.close(figs) + # ============================================================== + # Update the list of variables with significant differences. + # Criterion: abs(1 - max(fracdiff)) > 0.1 + # Do not include NaNs in the criterion, because these indicate + # places where fracdiff could not be computed (div-by-zero). + # ============================================================== + if np.abs(1 - np.nanmax(fracdiff)) > 0.1: + sigdiff_list.append(varname) + return varname + return "" + + # ================================================================== + # Call figure generation function in a parallel loop over variables + # ================================================================== + # do not attempt nested thread parallelization due to issues with + # matplotlib + if current_process().name != "MainProcess": + n_job = 1 + + if not savepdf: + # disable parallel plotting to allow interactive figure plotting + for i in range(n_var): + createfig(i) + + else: + with TemporaryDirectory() as temp_dir: + # --------------------------------------- + # Turn off parallelization if n_job=1 + if n_job != 1: + results = Parallel(n_jobs=n_job)( + delayed(createfig)(i, temp_dir) + for i in range(n_var) + ) + else: + for i in range(n_var): + results = createfig(i, temp_dir) + # --------------------------------------- + + # update sig diffs after parallel calls + if current_process().name == "MainProcess": + for varname in results: + if isinstance(varname, str): + sigdiff_list.append(varname) + + # ========================================================== + # Finish + # ========================================================== + if verbose: + print("Closed PDF") + merge = PdfMerger() + #print(f"Creating {pdfname} for {n_var} variables") + pdf = PdfPages(pdfname) + pdf.close() + for i in range(n_var): + temp_pdfname = pdfname + if pdfname[0] == '/': + temp_pdfname = temp_pdfname[1:] + merge.append( + os.path.join( + str(temp_dir), + temp_pdfname + + "BENCHMARKFIGCREATION.pdf" + + str(i))) + merge.write(pdfname) + merge.close() + warnings.showwarning = _warning_format diff --git a/gcpy/plot/compare_zonal_mean.py b/gcpy/plot/compare_zonal_mean.py new file mode 100644 index 00000000..3999bb3e --- /dev/null +++ b/gcpy/plot/compare_zonal_mean.py @@ -0,0 +1,1080 @@ +""" +Creates a six-panel comparison plot of zonal means from two different +GEOS-Chem model versions. Called from the GEOS-Chem benchmarking scripts +and from the compare_diags.py example script. +""" +import os +import copy +import warnings +from multiprocessing import current_process +from tempfile import TemporaryDirectory +import matplotlib as mpl +import matplotlib.pyplot as plt +from matplotlib.backends.backend_pdf import PdfPages +import numpy as np +import xarray as xr +from joblib import Parallel, delayed +from pypdf import PdfMerger +from gcpy.grid import get_vert_grid, get_pressure_indices, \ + pad_pressure_edges, convert_lev_to_pres +from gcpy.regrid import regrid_comparison_data, create_regridders, gen_xmat, \ + regrid_vertical +from gcpy.util import reshape_MAPL_CS, get_diff_of_diffs, \ + all_zero_or_nan, compare_varnames, \ + read_config_file, verify_variable_type +from gcpy.units import check_units, data_unit_is_mol_per_mol +from gcpy.constants import MW_AIR_g +from gcpy.plot.core import gcpy_style, six_panel_subplot_names, \ + _warning_format, WhGrYlRd +from gcpy.plot.six_plot import six_plot + +# Suppress numpy divide by zero warnings to prevent output spam +np.seterr(divide="ignore", invalid="ignore") + +# Use a style sheet to control plot attributes +plt.style.use(gcpy_style) + + +def compare_zonal_mean( + refdata, + refstr, + devdata, + devstr, + varlist=None, + itime=0, + refmet=None, + devmet=None, + weightsdir='.', + pdfname="", + cmpres=None, + match_cbar=True, + pres_range=None, + normalize_by_area=False, + enforce_units=True, + convert_to_ugm3=False, + flip_ref=False, + flip_dev=False, + use_cmap_RdBu=False, + verbose=False, + log_color_scale=False, + log_yaxis=False, + extra_title_txt=None, + n_job=-1, + sigdiff_list=None, + second_ref=None, + second_dev=None, + spcdb_dir=os.path.dirname(__file__), + sg_ref_path='', + sg_dev_path='', + ref_vert_params=None, + dev_vert_params=None, + **extra_plot_args +): + """ + Creates 3x2 comparison zonal-mean plots for variables + common in two xarray Datasets. Optionally save to PDF. + + Args: + refdata: xarray dataset + Dataset used as reference in comparison + refstr: str + String description for reference data to be used in plots + devdata: xarray dataset + Dataset used as development in comparison + devstr: str + String description for development data to be used in plots + + Keyword Args (optional): + varlist: list of strings + List of xarray dataset variable names to make plots for + Default value: None (will compare all common 3D variables) + itime: integer + Dataset time dimension index using 0-based system + Default value: 0 + refmet: xarray dataset + Dataset containing ref meteorology + Default value: None + devmet: xarray dataset + Dataset containing dev meteorology + Default value: None + weightsdir: str + Directory path for storing regridding weights + Default value: None (will create/store weights in + current directory) + pdfname: str + File path to save plots as PDF + Default value: Empty string (will not create PDF) + cmpres: str + String description of grid resolution at which + to compare datasets + Default value: None (will compare at highest resolution + of Ref and Dev) + match_cbar: bool + Set this flag to True to use same the colorbar bounds + for both Ref and Dev plots. + Default value: True + pres_range: list of two integers + Pressure range of levels to plot [hPa]. The vertical axis + will span the outer pressure edges of levels that contain + pres_range endpoints. + Default value: [0, 2000] + normalize_by_area: bool + Set this flag to True to to normalize raw data in both + Ref and Dev datasets by grid area. Input ref and dev + datasets must include AREA variable in m2 if normalizing + by area. + Default value: False + enforce_units: bool + Set this flag to True force an error if the variables in + the Ref and Dev datasets have different units. + Default value: True + convert_to_ugm3: str + Whether to convert data units to ug/m3 for plotting. + Default value: False + flip_ref: bool + Set this flag to True to flip the vertical dimension of + 3D variables in the Ref dataset. + Default value: False + flip_dev: bool + Set this flag to True to flip the vertical dimension of + 3D variables in the Dev dataset. + Default value: False + use_cmap_RdBu: bool + Set this flag to True to use a blue-white-red colormap for + plotting raw reference and development datasets. + Default value: False + verbose: logical + Set this flag to True to enable informative printout. + Default value: False + log_color_scale: bool + Set this flag to True to enable plotting data (not diffs) + on a log color scale. + Default value: False + log_yaxis: bool + Set this flag to True if you wish to create zonal mean + plots with a log-pressure Y-axis. + Default value: False + extra_title_txt: str + Specifies extra text (e.g. a date string such as "Jan2016") + for the top-of-plot title. + Default value: None + n_job: int + Defines the number of simultaneous workers for parallel + plotting. Set to 1 to disable parallel plotting. + Value of -1 allows the application to decide. + Default value: -1 + sigdiff_list: list of str + Returns a list of all quantities having significant + differences (where |max(fractional difference)| > 0.1). + Default value: None + second_ref: xarray Dataset + A dataset of the same model type / grid as refdata, + to be used in diff-of-diffs plotting. + Default value: None + second_dev: xarray Dataset + A dataset of the same model type / grid as devdata, + to be used in diff-of-diffs plotting. + Default value: None + spcdb_dir: str + Directory containing species_database.yml file. + Default value: Path of GCPy code repository + sg_ref_path: str + Path to NetCDF file containing stretched-grid info + (in attributes) for the ref dataset + Default value: '' (will not be read in) + sg_dev_path: str + Path to NetCDF file containing stretched-grid info + (in attributes) for the dev dataset + Default value: '' (will not be read in) + ref_vert_params: list(AP, BP) of list-like types + Hybrid grid parameter A in hPa and B (unitless). + Needed if ref grid is not 47 or 72 levels. + Default value: None + dev_vert_params: list(AP, BP) of list-like types + Hybrid grid parameter A in hPa and B (unitless). + Needed if dev grid is not 47 or 72 levels. + Default value: None + extra_plot_args: various + Any extra keyword arguments are passed through the + plotting functions to be used in calls to pcolormesh() + (CS) or imshow() (Lat/Lon). + """ + warnings.showwarning = _warning_format + verify_variable_type(refdata, xr.Dataset) + verify_variable_type(devdata, xr.Dataset) + + # Create empty lists for keyword arguments + if sigdiff_list is None: + sigdiff_list = [] + if ref_vert_params is None: + ref_vert_params = [[], []] + if dev_vert_params is None: + dev_vert_params = [[], []] + if pres_range is None: + pres_range = [0, 2000] + + # Determine if doing diff-of-diffs + diff_of_diffs = second_ref is not None and second_dev is not None + + # Prepare diff-of-diffs datasets if needed + if diff_of_diffs: + refdata, devdata = refdata.load(), devdata.load() + second_ref, second_dev = second_ref.load(), second_dev.load() + +# # If needed, use fake time dim in case dates are different in datasets. +# # This needs more work for case of single versus multiple times. +# aligned_time = np.datetime64('2000-01-01') +# refdata = refdata.assign_coords({'time' : [aligned_time]}) +# devdata = devdata.assign_coords({'time' : [aligned_time]}) +# second_ref = second_ref.assign_coords({'time' : [aligned_time]}) +# second_dev = second_dev.assign_coords({'time' : [aligned_time]}) + + refdata, fracrefdata = get_diff_of_diffs(refdata, second_ref) + devdata, fracdevdata = get_diff_of_diffs(devdata, second_dev) + + frac_refstr = 'GCC_dev / GCC_ref' + frac_devstr = 'GCHP_dev / GCHP_ref' + + # If no varlist is passed, plot all 3D variables in the dataset + if varlist is None: + quiet = not verbose + vardict = compare_varnames(refdata, devdata, quiet=quiet) + varlist = vardict["commonvars3D"] + print("Plotting all 3D variables") + n_var = len(varlist) + + # Exit out if there are no 3D variables + if not n_var: + print("WARNING: no 3D variables to plot zonal mean for!") + return + + # If no PDF name passed, then do not save to PDF + savepdf = True + if pdfname == "": + savepdf = False + # If converting to ug/m3, load the species database + if convert_to_ugm3: + properties = read_config_file( + os.path.join( + spcdb_dir, + "species_database.yml" + ), + quiet=True + ) + + # Get mid-point pressure and edge pressures for this grid + ref_pedge, ref_pmid, _ = get_vert_grid(refdata, *ref_vert_params) + dev_pedge, dev_pmid, _ = get_vert_grid(devdata, *dev_vert_params) + + # Get indexes of pressure subrange (full range is default) + ref_pedge_ind = get_pressure_indices(ref_pedge, pres_range) + dev_pedge_ind = get_pressure_indices(dev_pedge, pres_range) + + # Pad edges if subset does not include surface or TOA so data spans + # entire subrange + ref_pedge_ind = pad_pressure_edges( + ref_pedge_ind, + refdata.sizes["lev"], + np.size(ref_pmid)) + dev_pedge_ind = pad_pressure_edges( + dev_pedge_ind, + devdata.sizes["lev"], + np.size(dev_pmid)) + + # pmid indexes do not include last pedge index + ref_pmid_ind = ref_pedge_ind[:-1] + dev_pmid_ind = dev_pedge_ind[:-1] + + # Convert levels to pressures in ref and dev data + refdata = convert_lev_to_pres(refdata, ref_pmid, ref_pedge) + devdata = convert_lev_to_pres(devdata, dev_pmid, dev_pedge) + + if diff_of_diffs: + fracrefdata = convert_lev_to_pres(fracrefdata, ref_pmid, ref_pedge) + fracdevdata = convert_lev_to_pres(fracdevdata, dev_pmid, dev_pedge) + + # ================================================================== + # Reduce pressure range if reduced range passed as input. Indices + # must be flipped if flipping vertical axis. + # ================================================================== + # this may require checking for 48 / 73 levels + ref_pmid_ind_flipped = refdata.sizes["lev"] - ref_pmid_ind[::-1] - 1 + dev_pmid_ind_flipped = devdata.sizes["lev"] - dev_pmid_ind[::-1] - 1 + if flip_ref: + ref_pmid_ind = ref_pmid_ind_flipped + if flip_dev: + dev_pmid_ind = dev_pmid_ind_flipped + + refdata = refdata.isel(lev=ref_pmid_ind) + devdata = devdata.isel(lev=dev_pmid_ind) + if diff_of_diffs: + fracrefdata = fracrefdata.isel(lev=ref_pmid_ind) + fracdevdata = fracdevdata.isel(lev=dev_pmid_ind) + + sg_ref_params = [1, 170, -90] + sg_dev_params = [1, 170, -90] + # Get stretched-grid info if passed + if sg_ref_path != '': + sg_ref_attrs = xr.open_dataset(sg_ref_path).attrs + sg_ref_params = [ + sg_ref_attrs['stretch_factor'], + sg_ref_attrs['target_longitude'], + sg_ref_attrs['target_latitude']] + + if sg_dev_path != '': + sg_dev_attrs = xr.open_dataset(sg_dev_path).attrs + sg_dev_params = [ + sg_dev_attrs['stretch_factor'], + sg_dev_attrs['target_longitude'], + sg_dev_attrs['target_latitude']] + + [refres, refgridtype, devres, devgridtype, cmpres, cmpgridtype, + regridref, regriddev, regridany, refgrid, devgrid, cmpgrid, + refregridder, devregridder, refregridder_list, devregridder_list] = \ + create_regridders( + refdata, + devdata, + weightsdir=weightsdir, + cmpres=cmpres, + zm=True, + sg_ref_params=sg_ref_params, + sg_dev_params=sg_dev_params + ) + + # Use smaller vertical grid as target for vertical regridding + # NOTE: Convert target_index from numpy.int64 to int to conform + # to the Python style guide (as per Pylint). + # -- Bob Yantosca (21 Sep 2023) + target_index = int(np.array([len(ref_pedge), len(dev_pedge)]).argmin()) + pedge = [ref_pedge, dev_pedge][target_index] + pedge_ind = [ref_pedge_ind, dev_pedge_ind][target_index] + + # ================================================================== + # Loop over all variables + # ================================================================== + ds_refs = [None] * n_var + frac_ds_refs = [None] * n_var + ds_devs = [None] * n_var + frac_ds_devs = [None] * n_var + for i in range(n_var): + + varname = varlist[i] + + # ================================================================== + # Slice the data, allowing for no time dimension (bpch) + # ================================================================== + + # Ref + if "time" in refdata[varname].dims: + ds_refs[i] = refdata[varname].isel(time=itime) + if diff_of_diffs: + frac_ds_refs[i] = fracrefdata[varname].isel(time=itime) + else: + ds_refs[i] = refdata[varname] + if diff_of_diffs: + frac_ds_refs[i] = fracrefdata[varname] + + # Dev + if "time" in devdata[varname].dims: + ds_devs[i] = devdata[varname].isel(time=itime) + if diff_of_diffs: + frac_ds_devs[i] = fracdevdata[varname].isel(time=itime) + + else: + ds_devs[i] = devdata[varname] + if diff_of_diffs: + frac_ds_devs[i] = fracdevdata[varname] + + # ================================================================== + # Handle units as needed + # ================================================================== + + # Convert to ppb if units string is variation of mol/mol + if data_unit_is_mol_per_mol(ds_refs[i]): + ds_refs[i].values = ds_refs[i].values * 1e9 + ds_refs[i].attrs["units"] = "ppb" + if data_unit_is_mol_per_mol(ds_devs[i]): + ds_devs[i].values = ds_devs[i].values * 1e9 + ds_devs[i].attrs["units"] = "ppb" + + # If units string is ppbv (true for bpch data) then rename units + if ds_refs[i].units.strip() == "ppbv": + ds_refs[i].attrs["units"] = "ppb" + if ds_devs[i].units.strip() == "ppbv": + ds_devs[i].attrs["units"] = "ppb" + + # If units string is W/m2 (may be true for bpch data) then rename units + if ds_refs[i].units.strip() == "W/m2": + ds_refs[i].attrs["units"] = "W m-2" + if ds_devs[i].units.strip() == "W/m2": + ds_devs[i].attrs["units"] = "W m-2" + + # If units string is UNITLESS (may be true for bpch data) then rename + # units + if ds_refs[i].units.strip() == "UNITLESS": + ds_refs[i].attrs["units"] = "1" + if ds_devs[i].units.strip() == "UNITLESS": + ds_devs[i].attrs["units"] = "1" + + # Compare units of ref and dev. The check_units function will throw an error + # if the units do not match and enforce_units is True. + check_units(ds_refs[i], ds_devs[i], enforce_units) + + # Convert from ppb to ug/m3 if convert_to_ugm3 is passed as true + if convert_to_ugm3: + + # Error checks: must pass met, not normalize by area, and be in ppb + if refmet is None or devmet is None: + msg = "Met mata ust be passed to convert units to ug/m3." + raise ValueError(msg) + if normalize_by_area: + msg = "Normalizing by area is now allowed if plotting ug/m3" + raise ValueError(msg) + if ds_refs[i].units != "ppb" or ds_devs[i].units != "ppb": + msg = "Units must be mol/mol if converting to ug/m3." + raise ValueError(msg) + + # Slice air density data by time and lev + # (assume same format and dimensions as refdata and devdata) + if "time" in refmet["Met_AIRDEN"].dims: + ref_airden = refmet["Met_AIRDEN"].isel(time=itime, + lev=ref_pmid_ind) + else: + ref_airden = refmet["Met_AIRDEN"].isel(lev=ref_pmid_ind) + if "time" in devmet["Met_AIRDEN"].dims: + dev_airden = devmet["Met_AIRDEN"].isel(time=itime, + lev=dev_pmid_ind) + else: + dev_airden = devmet["Met_AIRDEN"].isel(lev=dev_pmid_ind) + + # Get a list of properties for the given species + spc_name = varname.replace(varname.split("_")[0] + "_", "") + species_properties = properties.get(spc_name) + + # If no properties are found, then exit with an error. + # Otherwise, get the molecular weight in g/mol. + if species_properties is None: + # Hack lumped species until we implement a solution + if spc_name in ["Simple_SOA", "Complex_SOA"]: + spc_mw_g = 150.0 + else: + msg = f"No properties found for {spc_name}. Cannot convert" \ + + " to ug/m3." + raise ValueError(msg) + else: + # Get the species molecular weight in g/mol + spc_mw_g = species_properties.get("MW_g") + if spc_mw_g is None: + msg = f"Molecular weight not found for for species {spc_name}!" \ + + " Cannot convert to ug/m3." + raise ValueError(msg) + + # Convert values from ppb to ug/m3: + # ug/m3 = 1e-9ppb * mol/g air * kg/m3 air * 1e3g/kg + # * g/mol spc * 1e6ug/g + # = ppb * air density * (spc MW / air MW) + ds_refs[i].values = ds_refs[i].values * ref_airden.values \ + * (spc_mw_g / MW_AIR_g) + ds_devs[i].values = ds_devs[i].values * dev_airden.values \ + * (spc_mw_g / MW_AIR_g) + + # Update units string + ds_refs[i].attrs["units"] = "\u03BCg/m3" # ug/m3 using mu + ds_devs[i].attrs["units"] = "\u03BCg/m3" + + # ============================================================== + # Reshape cubed sphere data if using MAPL v1.0.0+ + # TODO: update function to expect data in this format + # ============================================================== + + ds_refs[i] = reshape_MAPL_CS(ds_refs[i]) + ds_devs[i] = reshape_MAPL_CS(ds_devs[i]) + if diff_of_diffs: + frac_ds_refs[i] = reshape_MAPL_CS(frac_ds_refs[i]) + frac_ds_devs[i] = reshape_MAPL_CS(frac_ds_devs[i]) + + # Flip in the vertical if applicable + if flip_ref: + ds_refs[i].data = ds_refs[i].data[::-1, :, :] + if diff_of_diffs: + frac_ds_refs[i].data = frac_ds_refs[i].data[::-1, :, :] + if flip_dev: + ds_devs[i].data = ds_devs[i].data[::-1, :, :] + if diff_of_diffs: + frac_ds_devs[i].data = frac_ds_devs[i].data[::-1, :, :] + # ================================================================== + # Get the area variables if normalize_by_area=True. They can be + # either in the main datasets as variable AREA or in the optionally + # passed meteorology datasets as Met_AREAM2. + # ================================================================== + if normalize_by_area: + if "AREA" in refdata.data_vars.keys(): + ref_area = refdata["AREA"] + elif refmet is not None: + if "Met_AREAM2" in refmet.data_vars.keys(): + ref_area = refmet["Met_AREAM2"] + else: + msg = "normalize_by_area = True but AREA not " \ + + "present in the Ref dataset and ref met with Met_AREAM2" \ + + " not passed!" + raise ValueError(msg) + if "time" in ref_area.dims: + ref_area = ref_area.isel(time=0) + if refgridtype == 'cs': + ref_area = reshape_MAPL_CS(ref_area) + + if "AREA" in devdata.data_vars.keys(): + dev_area = devdata["AREA"] + elif devmet is not None: + if "Met_AREAM2" in devmet.data_vars.keys(): + dev_area = devmet["Met_AREAM2"] + else: + msg = "normalize_by_area = True but AREA not " \ + + "present in the Dev dataset and dev met with Met_AREAM2" \ + | " not passed!" + raise ValueError(msg) + if "time" in dev_area.dims: + dev_area = dev_area.isel(time=0) + if devgridtype == 'cs': + dev_area = reshape_MAPL_CS(dev_area) + + # Make sure the areas do not have a lev dimension + if "lev" in ref_area.dims: + ref_area = ref_area.isel(lev=0) + if "lev" in dev_area.dims: + dev_area = dev_area.isel(lev=0) + + # ================================================================== + # Create arrays for each variable in the Ref and Dev dataset + # and regrid to the comparison grid. + # ================================================================== + ds_ref_cmps = [None] * n_var + ds_dev_cmps = [None] * n_var + frac_ds_ref_cmps = [None] * n_var + frac_ds_dev_cmps = [None] * n_var + # store units in case data changes from DataArray to numpy array + ref_units = [None] * n_var + dev_units = [None] * n_var + + # regrid vertically if necessary + if len(ref_pedge) != len(pedge): + xmat = gen_xmat(ref_pedge[ref_pedge_ind], pedge[pedge_ind]) + elif len(dev_pedge) != len(pedge): + xmat = gen_xmat(dev_pedge[dev_pedge_ind], pedge[pedge_ind]) + + for i in range(n_var): + + ds_ref = ds_refs[i] + ds_dev = ds_devs[i] + frac_ds_ref = frac_ds_refs[i] + frac_ds_dev = frac_ds_devs[i] + # Do area normalization before regridding if normalize_by_area=True + if normalize_by_area: + exclude_list = ["WetLossConvFrac", "Prod_", "Loss_"] + if not any(s in varname for s in exclude_list): + ds_ref.values = ds_ref.values / ref_area.values + ds_dev.values = ds_dev.values / dev_area.values + ds_refs[i] = ds_ref + ds_devs[i] = ds_dev + if diff_of_diffs: + frac_ds_ref.values = frac_ds_ref.values / ref_area.values + frac_ds_refs[i] = frac_ds_ref + frac_ds_dev.values = frac_ds_dev.values / dev_area.values + frac_ds_devs[i] = frac_ds_dev + + # save units for later use + ref_units[i] = ds_ref.attrs["units"] + dev_units[i] = ds_dev.attrs["units"] + + ref_nlev = len(ds_ref['lev']) + dev_nlev = len(ds_dev['lev']) + + # Regrid variables horizontally + # Ref + ds_ref = regrid_comparison_data( + ds_ref, + refres, + regridref, + refregridder, + refregridder_list, + cmpgrid, + refgridtype, + cmpgridtype, + nlev=ref_nlev + ) + if diff_of_diffs: + frac_ds_ref = regrid_comparison_data( + frac_ds_ref, + refres, + regridref, + refregridder, + refregridder_list, + cmpgrid, + cmpgridtype, + refgridtype, + nlev=ref_nlev + ) + # Dev + ds_dev = regrid_comparison_data( + ds_dev, + devres, + regriddev, + devregridder, + devregridder_list, + cmpgrid, + devgridtype, + cmpgridtype, + nlev=dev_nlev + ) + if diff_of_diffs: + frac_ds_dev = regrid_comparison_data( + frac_ds_dev, + devres, + regriddev, + devregridder, + devregridder_list, + cmpgrid, + devgridtype, + cmpgridtype, + nlev=dev_nlev + ) + + # store regridded CS data before dealing with vertical regridding + if refgridtype == "cs": + ds_refs[i] = ds_ref + frac_ds_refs[i] = frac_ds_ref + if devgridtype == "cs": + ds_devs[i] = ds_dev + frac_ds_devs[i] = frac_ds_dev + + # Reduce variables to smaller vert grid if necessary for comparison + if len(ref_pedge) != len(pedge): + ds_ref = regrid_vertical(ds_ref, xmat, dev_pmid[dev_pmid_ind]) + if diff_of_diffs: + frac_ds_ref = regrid_vertical(frac_ds_ref, xmat, dev_pmid[dev_pmid_ind]) + + if len(dev_pedge) != len(pedge): + ds_dev = regrid_vertical(ds_dev, xmat, ref_pmid[ref_pmid_ind]) + if diff_of_diffs: + frac_ds_dev = regrid_vertical(frac_ds_dev, xmat, ref_pmid[ref_pmid_ind]) + ds_ref_cmps[i] = ds_ref + ds_dev_cmps[i] = ds_dev + if diff_of_diffs: + frac_ds_ref_cmps[i] = frac_ds_ref + frac_ds_dev_cmps[i] = frac_ds_dev + # Universal plot setup + xtick_positions = np.arange(-90, 91, 30) + xticklabels = [rf"{x}$\degree$" for x in xtick_positions] + + # ================================================================== + # Define function to create a single page figure to be called + # in a parallel loop + # ================================================================== + def createfig(ivar, temp_dir=''): + + # Suppress harmless run-time warnings (mostly about underflow) + warnings.filterwarnings('ignore', category=RuntimeWarning) + warnings.filterwarnings('ignore', category=UserWarning) + + if savepdf and verbose: + print(f"{ivar} ", end="") + varname = varlist[ivar] + + # ============================================================== + # Assign data variables + # ============================================================== + ds_ref = ds_refs[ivar] + ds_dev = ds_devs[ivar] + ds_ref_cmp = ds_ref_cmps[ivar] + ds_dev_cmp = ds_dev_cmps[ivar] + frac_ds_ref_cmp = frac_ds_ref_cmps[ivar] + frac_ds_dev_cmp = frac_ds_dev_cmps[ivar] + + # ============================================================== + # Area normalization units and subtitle + # Set units and subtitle, including modification if normalizing + # area. Note if enforce_units is False (non-default) then + # units on difference plots will be wrong. + # ============================================================== + cmn_units = ref_units[ivar] + subtitle_extra = "" + if normalize_by_area: + exclude_list = ["WetLossConvFrac", "Prod_", "Loss_"] + if not any(s in varname for s in exclude_list): + if "/" in cmn_units: + cmn_units = f"{cmn_units}/m2" + else: + cmn_units = f"{cmn_units} m-2" + ref_units[ivar] = cmn_units + dev_units[ivar] = cmn_units + subtitle_extra = ", Normalized by Area" + + # ============================================================== + # Calculate zonal mean + # ============================================================== + # Ref + if refgridtype == "ll": + zm_ref = ds_ref.mean(dim="lon") + else: + zm_ref = ds_ref.mean(axis=2) + + # Dev + if devgridtype == "ll": + zm_dev = ds_dev.mean(dim="lon") + else: + zm_dev = ds_dev.mean(axis=2) + # Comparison + zm_dev_cmp = ds_dev_cmp.mean(axis=2) + zm_ref_cmp = ds_ref_cmp.mean(axis=2) + if diff_of_diffs: + frac_zm_dev_cmp = frac_ds_dev_cmp.mean(axis=2) + frac_zm_ref_cmp = frac_ds_ref_cmp.mean(axis=2) + # ============================================================== + # Get min and max values for use in the colorbars + # and also flag if Ref and/or Dev are all zero or all NaN + # ============================================================== + + # Ref + vmin_ref = float(zm_ref.min()) + vmax_ref = float(zm_ref.max()) + + # Dev + vmin_dev = float(zm_dev.min()) + vmax_dev = float(zm_dev.max()) + + # Comparison + vmin_cmp = np.min([zm_ref_cmp.min(), zm_dev_cmp.min()]) + vmax_cmp = np.max([zm_ref_cmp.max(), zm_dev_cmp.max()]) + + # Take min/max across all grids + vmin_abs = np.min([vmin_ref, vmin_dev, vmin_cmp]) + vmax_abs = np.max([vmax_ref, vmax_dev, vmax_cmp]) + + # ============================================================== + # Test if Ref and/or Dev contain all zeroes or all NaNs. + # This will have implications as to how we set min and max + # values for the color ranges below. + # ============================================================== + ref_values = ds_ref.values if isinstance(ds_ref, xr.DataArray) else ds_ref + dev_values = ds_dev.values if isinstance(ds_dev, xr.DataArray) else ds_dev + ref_is_all_zero, ref_is_all_nan = all_zero_or_nan(ref_values) + dev_is_all_zero, dev_is_all_nan = all_zero_or_nan(dev_values) + + # ============================================================== + # Calculate zonal mean difference + # ============================================================== + + zm_diff = np.array(zm_dev_cmp) - np.array(zm_ref_cmp) + + # Test if abs. diff is zero everywhere or NaN everywhere + absdiff_is_all_zero, absdiff_is_all_nan = all_zero_or_nan(zm_diff) + + # ============================================================== + # Calculate fractional difference, set divides by zero to Nan + # ============================================================== + if diff_of_diffs: + zm_fracdiff = np.array(frac_zm_dev_cmp) - \ + np.array(frac_zm_ref_cmp) + else: + zm_fracdiff = np.abs(np.array(zm_dev_cmp)) / \ + np.abs(np.array(zm_ref_cmp)) + zm_fracdiff = np.where(np.abs(zm_fracdiff) == + np.inf, np.nan, zm_fracdiff) + zm_fracdiff[zm_fracdiff > 1e308] = np.nan + # Test if the frac. diff is zero everywhere or NaN everywhere + fracdiff_is_all_zero = not np.any(zm_fracdiff) or \ + (np.nanmin(zm_fracdiff) == 0 and + np.nanmax(zm_fracdiff) == 0) + fracdiff_is_all_nan = np.isnan(zm_fracdiff).all() + + # ============================================================== + # Create 3x2 figure + # ============================================================== + + # Create figs and axes objects + figs, ((ax0, ax1), (ax2, ax3), (ax4, ax5)) = plt.subplots( + 3, 2, figsize=[12, 15.3] + ) + # Add extra adding so that plots don't bump into each other. + # For zonal mean plots, we need to leave extra padding at the + # left (for the Y-axis label) and at the bottom (for the colrobar). + plt.subplots_adjust( + left=0.10, # Fraction of page width, from left edge + right=0.925, # Fraction of page width, from left edge + bottom=0.05, # Fraction of page height, from bottom edge + wspace=0.25, # Horizontal spacing btw subplots (frac of width) + hspace=0.35 # Vertical spacing btw subplots (fract of height) + ) + # Give the plot a title + offset = 0.96 + if extra_title_txt is not None: + figs.suptitle( + f"{varname}, Zonal Mean ({extra_title_txt})", + y=offset, + ) + else: + figs.suptitle( + f"{varname}, Zonal Mean", + y=offset + ) + + # ============================================================== + # Set color map objects. Use gray for NaNs (no worries, + # because zonal means are always plotted on lat-alt grids). + # + # Use shallow copy (copy.copy() to create color map objects, + # in order to avoid set_bad() from being applied to the base + # color table. See: https://docs.python.org/3/library/copy.html + # ============================================================== + + if use_cmap_RdBu: + cmap1 = copy.copy(mpl.colormaps["RdBu_r"]) + else: + cmap1 = copy.copy(WhGrYlRd) + cmap1.set_bad("gray") + + cmap_plot = copy.copy(mpl.colormaps["RdBu_r"]) + cmap_plot.set_bad(color="gray") + + # ============================================================== + # Set titles for plots + # ============================================================== + + if refgridtype == "ll": + ref_title = f"{refstr} (Ref){subtitle_extra}\n{refres}" + else: + ref_title = f"{refstr} (Ref){subtitle_extra}\n{cmpres} regridded from c{refres}" + + if devgridtype == "ll": + dev_title = f"{devstr} (Dev){subtitle_extra}\n{devres}" + else: + dev_title = f"{devstr} (Dev){subtitle_extra}\n{cmpres} regridded from c{devres}" + + if regridany: + absdiff_dynam_title = \ + f"Difference ({cmpres})\nDev - Ref, Dynamic Range" + absdiff_fixed_title = \ + f"Difference ({cmpres})\nDev - Ref, Restricted Range [5%,95%]" + if diff_of_diffs: + fracdiff_dynam_title = \ + f"Difference ({cmpres}), " + \ + f"Dynamic Range\n{frac_devstr} - {frac_refstr}" + fracdiff_fixed_title = \ + f"Difference ({cmpres}), " + \ + f"Restricted Range [5%,95%]\n{frac_devstr} - {frac_refstr}" + else: + fracdiff_dynam_title = \ + f"Ratio ({cmpres})\nDev/Ref, Dynamic Range" + fracdiff_fixed_title = \ + f"Ratio ({cmpres})\nDev/Ref, Fixed Range" + else: + absdiff_dynam_title = "Difference\nDev - Ref, Dynamic Range" + absdiff_fixed_title = \ + "Difference\nDev - Ref, Restricted Range [5%,95%]" + if diff_of_diffs: + fracdiff_dynam_title = \ + f"Difference, Dynamic Range\n{frac_devstr} - {frac_refstr}" + fracdiff_fixed_title = \ + "Difference, Restricted Range " + \ + f"[5%,95%]\n{frac_devstr} - {frac_refstr}" + else: + fracdiff_dynam_title = "Ratio \nDev/Ref, Dynamic Range" + fracdiff_fixed_title = "Ratio \nDev/Ref, Fixed Range" + + # ============================================================== + # Bundle variables for 6 parallel plotting calls + # 0 = Ref 1 = Dev + # 2 = Dynamic abs diff 3 = Restricted abs diff + # 4 = Dynamic frac diff 5 = Restricted frac diff + # ============================================================== + + subplots = six_panel_subplot_names(diff_of_diffs) + + all_zeros = [ + ref_is_all_zero, + dev_is_all_zero, + absdiff_is_all_zero, + absdiff_is_all_zero, + fracdiff_is_all_zero, + fracdiff_is_all_zero, + ] + + all_nans = [ + ref_is_all_nan, + dev_is_all_nan, + absdiff_is_all_nan, + absdiff_is_all_nan, + fracdiff_is_all_nan, + fracdiff_is_all_nan, + ] + plot_vals = [zm_ref, zm_dev, zm_diff, zm_diff, + zm_fracdiff, zm_fracdiff] + + axs = [ax0, ax1, ax2, ax3, ax4, ax5] + + cmaps = [cmap1, cmap1, cmap_plot, cmap_plot, cmap_plot, cmap_plot] + + rowcols = [(0, 0), (0, 1), (1, 0), (1, 1), (2, 0), (2, 1)] + + titles = [ + ref_title, + dev_title, + absdiff_dynam_title, + absdiff_fixed_title, + fracdiff_dynam_title, + fracdiff_fixed_title, + ] + + grids = [refgrid, devgrid, cmpgrid, cmpgrid, cmpgrid, cmpgrid] + + if refgridtype != "ll": + grids[0] = cmpgrid + if devgridtype != "ll": + grids[1] = cmpgrid + extents = [None, None, None, None, None, None] + + masked = ["ZM", "ZM", "ZM", "ZM", "ZM", "ZM"] + + unit_list = [ref_units[ivar], dev_units[ivar], cmn_units, cmn_units, + "unitless", "unitless"] + + other_all_nans = [dev_is_all_nan, ref_is_all_nan, + False, False, False, False] + + gridtypes = [ + cmpgridtype, + cmpgridtype, + cmpgridtype, + cmpgridtype, + cmpgridtype, + cmpgridtype, + ] + + pedges = [ref_pedge, dev_pedge, pedge, pedge, pedge, pedge] + + pedge_inds = [ref_pedge_ind, dev_pedge_ind, pedge_ind, + pedge_ind, pedge_ind, pedge_ind] + + mins = [vmin_ref, vmin_dev, vmin_abs] + maxs = [vmax_ref, vmax_dev, vmax_abs] + + ratio_logs = [False, False, False, False, True, True] + # Plot + for i in range(6): + six_plot( + subplots[i], + all_zeros[i], + all_nans[i], + plot_vals[i], + grids[i], + axs[i], + rowcols[i], + titles[i], + cmaps[i], + unit_list[i], + extents[i], + masked[i], + other_all_nans[i], + gridtypes[i], + mins, + maxs, + use_cmap_RdBu, + match_cbar, + verbose, + log_color_scale, + pedges[i], + pedge_inds[i], + log_yaxis, + plot_type="zonal_mean", + xtick_positions=xtick_positions, + xticklabels=xticklabels, + ratio_log=ratio_logs[i], + **extra_plot_args + ) + + # ============================================================== + # Add this page of 6-panel plots to the PDF file + # ============================================================== + if savepdf: + folders = pdfname.split('/') + pdfname_temp = folders[-1] + "BENCHMARKFIGCREATION.pdf" + str(ivar) + full_path = temp_dir + for folder in folders[:-1]: + full_path = os.path.join(full_path, folder) + if not os.path.isdir(full_path): + try: + os.mkdir(full_path) + except FileExistsError: + pass + pdf = PdfPages(os.path.join(full_path, pdfname_temp)) + pdf.savefig(figs) + pdf.close() + plt.close(figs) + # ============================================================== + # Update the list of variables with significant differences. + # Criterion: abs(1 - max(fracdiff)) > 0.1 + # Do not include NaNs in the criterion, because these indicate + # places where fracdiff could not be computed (div-by-zero). + # ============================================================== + if np.abs(1 - np.nanmax(zm_fracdiff)) > 0.1: + sigdiff_list.append(varname) + return varname + return "" + + # ================================================================== + # Call figure generation function in a parallel loop over variables + # + # ================================================================== + + # Disable parallelization if this routine is already being + # called in parallel. This is due to issues with matplotlib. + if current_process().name != "MainProcess": + n_job = 1 + + if not savepdf: + # disable parallel plotting to allow interactive figure plotting + for i in range(n_var): + createfig(i) + + else: + with TemporaryDirectory() as temp_dir: + # --------------------------------------- + # Turn off parallelization if n_job=1 + if n_job != 1: + results = Parallel(n_jobs=n_job)( + delayed(createfig)(i, temp_dir) + for i in range(n_var) + ) + else: + for i in range(n_var): + results = createfig(i, temp_dir) + # --------------------------------------- + + # update sig diffs after parallel calls + if current_process().name == "MainProcess": + for varname in results: + if isinstance(varname, str): + sigdiff_list.append(varname) + + # ========================================================== + # Finish + # ========================================================== + if verbose: + print("Closed PDF") + merge = PdfMerger() + #print("Creating {} for {} variables".format(pdfname, n_var)) + pdf = PdfPages(pdfname) + pdf.close() + for i in range(n_var): + temp_pdfname = pdfname + if pdfname[0] == '/': + temp_pdfname = temp_pdfname[1:] + merge.append( + os.path.join( + str(temp_dir), + temp_pdfname + + "BENCHMARKFIGCREATION.pdf" + + str(i))) + merge.write(pdfname) + merge.close() + warnings.showwarning = _warning_format diff --git a/gcpy/plot/core.py b/gcpy/plot/core.py new file mode 100644 index 00000000..aaf8e29f --- /dev/null +++ b/gcpy/plot/core.py @@ -0,0 +1,147 @@ +""" +Common variables and functions used by modules in gcpy.plot. +""" +from os import path +import warnings +from matplotlib import colors +import numpy as np + +# Save warnings format to undo overwriting built into pypdf +_warning_format = warnings.showwarning + +# Current directory +_plot_dir = path.dirname(__file__) + +# Colormap definitions +_rgb_WhGrYlRd = np.genfromtxt( + path.join(_plot_dir, 'colormaps', 'WhGrYlRd.txt'), + delimiter=' ' +) +WhGrYlRd = colors.ListedColormap(_rgb_WhGrYlRd / 255.0) + +# Use a style sheet to control plot attributes +gcpy_style = path.join(_plot_dir, "gcpy_plot_style") + + +def six_panel_subplot_names(diff_of_diffs): + """ + Returns the names of the subplots for the 6-panel plots. + + Args: + ----- + diff_of_diffs : bool + Indicates if this is a diff-of-diffs benchmark (True) + or not (False), Ratio plots are only included if + diff_of_diffs is False. + + Returns: + -------- + subplots : list of str + List of names of each of the subplots in the 6-panel plot. + """ + if diff_of_diffs: + return ["ref", "dev", + "dyn_absdiff", "res_absdiff", + "dyn_absdiff", "res_absdiff"] + + return ["ref", "dev", + "dyn_absdiff", "res_absdiff", + "dyn_ratio", "res_ratio", + ] + + +def normalize_colors( + vmin, + vmax, + is_difference=False, + log_color_scale=False, + ratio_log=False +): + """ + Normalizes a data range to the colormap range used by matplotlib + functions. For log-color scales, special handling is done to prevent + taking the log of data that is all zeroes. + + Args: + vmin: float + Minimum value of the data range. + vmax: float + Maximum value of the data range. + + Keyword Args (optional): + is_difference: bool + Set this switch to denote that we are using a difference + color scale (i.e. with zero in the middle of the range). + Default value: False + log_color_scale: bool + Logical flag to denote that we are using a logarithmic + color scale instead of a linear color scale. + Default value: False + ratio_log : bool + Indicates whether we are using log scaling for ratio plots + (True) or not (False). + Default value: False + + Returns: + norm: matplotlib Norm + The normalized matplotlib color range, stored in + a matplotlib Norm object. + + Remarks: + For log color scales, we will use a range of 3 orders of + magnitude (i.e. from vmax/1e3 to vmax). + """ + + # Define class for logarithmic non-symmetric color scheme + class MidpointLogNorm(colors.LogNorm): + """ + Class for logarithmic non-symmetric color scheme + """ + def __init__( + self, + vmin=None, + vmax=None, + midpoint=None, + clip=False + ): + super().__init__(vmin, vmax, clip) + self.midpoint = midpoint + + def __call__(self, value, clip=None): + result, _ = self.process_value(value) + x_val = [ + np.log(self.vmin), + np.log(self.midpoint), + np.log(self.vmax) + ] + y_val = [0, 0.5, 1] + return np.ma.array( + np.interp(np.log(value), x_val, y_val), + mask=result.mask, + copy=False + ) + + # Absolute value of v + abs_vmin = abs(vmin) + abs_vmax = abs(vmax) + + if (abs_vmin == 0 and abs_vmax == 0) or \ + (np.isnan(vmin) and np.isnan(vmax)): + # If the data is zero everywhere (vmin=vmax=0) or undefined + # everywhere (vmin=vmax=NaN), then normalize the data range + # so that the color corresponding to zero (white) will be + # placed in the middle of the colorbar, where we will + # add a single tick. + if is_difference: + return colors.Normalize(vmin=-1.0, vmax=1.0) + return colors.Normalize(vmin=0.0, vmax=1.0) + + # For log color scales, assume a range 3 orders of magnitude + # below the maximum value. Otherwise use a linear scale. + if log_color_scale and not ratio_log: + return colors.LogNorm(vmin=vmax / 1e3, vmax=vmax) + if log_color_scale: + return MidpointLogNorm(vmin=vmin, vmax=vmax, midpoint=1) + + # For linear color scales: Normalize between min & max + return colors.Normalize(vmin=vmin, vmax=vmax) diff --git a/gcpy/plot/gcpy_plot_style b/gcpy/plot/gcpy_plot_style new file mode 100644 index 00000000..08e01668 --- /dev/null +++ b/gcpy/plot/gcpy_plot_style @@ -0,0 +1,15 @@ +# ====================================================================== +# Customizable style sheet to set plot parameters such as +# font sizes of axes labels, tick marks, etc. +# +# Default values will be applied for settings not explicitly listed +# below. For more information, see the Matplotlib documentation at: +# https://matplotlib.org/stable/users/explain/customizing.html +# ====================================================================== + +figure.titlesize : 25 # Top-of-plot title fontsize +figure.autolayout : false # Don't use tight_layout() +axes.titlesize : medium # Subplot title size +axes.titlelocation : center # Subplot title location +axes.titleweight : medium # Subplot title weight +axes.labelsize : small # Subplot X and Y label size diff --git a/gcpy/plot/single_panel.py b/gcpy/plot/single_panel.py new file mode 100644 index 00000000..0fd01501 --- /dev/null +++ b/gcpy/plot/single_panel.py @@ -0,0 +1,582 @@ +""" +Creates a single panel plot (geographic map or zonal mean). +""" +import copy +from matplotlib import ticker +from matplotlib import pyplot as plt +from matplotlib.backends.backend_pdf import PdfPages +import numpy as np +from dask.array import Array as DaskArray +import xarray as xr +import cartopy.crs as ccrs +from gcpy.grid import get_vert_grid, get_pressure_indices, \ + pad_pressure_edges, convert_lev_to_pres, get_grid_extents, \ + call_make_grid, get_input_res +from gcpy.regrid import regrid_comparison_data, create_regridders +from gcpy.util import reshape_MAPL_CS, all_zero_or_nan, verify_variable_type +from gcpy.plot.core import gcpy_style, normalize_colors, WhGrYlRd + +# Suppress numpy divide by zero warnings to prevent output spam +np.seterr(divide="ignore", invalid="ignore") + +# Use a style sheet to control plot attributes +plt.style.use(gcpy_style) + + +def single_panel( + plot_vals, + ax=None, + plot_type="single_level", + grid=None, + gridtype="", + title="fill", + comap=WhGrYlRd, + norm=None, + unit="", + extent=None, + masked_data=None, + use_cmap_RdBu=False, + log_color_scale=False, + add_cb=True, + pres_range=None, + pedge=np.full((1, 1), -1), + pedge_ind=np.full((1, 1), -1), + log_yaxis=False, + xtick_positions=None, + xticklabels=None, + proj=ccrs.PlateCarree(), + sg_path='', + ll_plot_func="imshow", + vert_params=None, + pdfname="", + weightsdir='.', + vmin=None, + vmax=None, + return_list_of_plots=False, + **extra_plot_args +): + """ + Core plotting routine -- creates a single plot panel. + + Args: + plot_vals: xarray.DataArray, numpy.ndarray, or dask.array.Array + Single data variable GEOS-Chem output to plot + + Keyword Args (Optional): + ax: matplotlib axes + Axes object to plot information + Default value: None (Will create a new axes) + plot_type: str + Either "single_level" or "zonal_mean" + Default value: "single_level" + grid: dict + Dictionary mapping plot_vals to plottable coordinates + Default value: {} (will attempt to read grid from plot_vals) + gridtype: str + "ll" for lat/lon or "cs" for cubed-sphere + Default value: "" (will automatically determine from grid) + title: str + Title to put at top of plot + Default value: "fill" (will use name attribute of plot_vals + if available) + comap: matplotlib Colormap + Colormap for plotting data values + Default value: WhGrYlRd + norm: list + List with range [0..1] normalizing color range for matplotlib + methods. Default value: None (will determine from plot_vals) + unit: str + Units of plotted data + Default value: "" (will use units attribute of plot_vals + if available) + extent: tuple (minlon, maxlon, minlat, maxlat) + Describes minimum and maximum latitude and longitude of input + data. Default value: None (Will use full extent of plot_vals + if plot is single level). + masked_data: numpy array + Masked area for avoiding near-dateline cubed-sphere plotting + issues Default value: None (will attempt to determine from + plot_vals) + use_cmap_RdBu: bool + Set this flag to True to use a blue-white-red colormap + Default value: False + log_color_scale: bool + Set this flag to True to use a log-scale colormap + Default value: False + add_cb: bool + Set this flag to True to add a colorbar to the plot + Default value: True + pres_range: list(int) + Range from minimum to maximum pressure for zonal mean + plotting. Default value: [0, 2000] (will plot entire + atmosphere) + pedge: numpy array + Edge pressures of vertical grid cells in plot_vals + for zonal mean plotting. Default value: np.full((1, 1), -1) + (will determine automatically) + pedge_ind: numpy array + Index of edge pressure values within pressure range in + plot_vals for zonal mean plotting. + Default value: np.full((1, 1), -1) (will determine + automatically) + log_yaxis: bool + Set this flag to True to enable log scaling of pressure in + zonal mean plots. Default value: False + xtick_positions: list(float) + Locations of lat/lon or lon ticks on plot + Default value: None (will place automatically for + zonal mean plots) + xticklabels: list(str) + Labels for lat/lon ticks + Default value: None (will determine automatically from + xtick_positions) + proj: cartopy projection + Projection for plotting data + Default value: ccrs.PlateCarree() + sg_path: str + Path to NetCDF file containing stretched-grid info + (in attributes) for plot_vals. + Default value: '' (will not be read in) + ll_plot_func: str + Function to use for lat/lon single level plotting with + possible values 'imshow' and 'pcolormesh'. imshow is much + faster but is slightly displaced when plotting from dateline + to dateline and/or pole to pole. Default value: 'imshow' + vert_params: list(AP, BP) of list-like types + Hybrid grid parameter A in hPa and B (unitless). Needed if + grid is not 47 or 72 levels. Default value: None + pdfname: str + File path to save plots as PDF + Default value: "" (will not create PDF) + weightsdir: str + Directory path for storing regridding weights + Default value: "." (will store regridding files in + current directory) + vmin: float + minimum for colorbars + Default value: None (will use plot value minimum) + vmax: float + maximum for colorbars + Default value: None (will use plot value maximum) + return_list_of_plots: bool + Return plots as a list. This is helpful if you are using + a cubedsphere grid and would like access to all 6 plots + Default value: False + extra_plot_args: various + Any extra keyword arguments are passed to calls to + pcolormesh() (CS) or imshow() (Lat/Lon). + + Returns: + plot: matplotlib plot + Plot object created from input + """ + verify_variable_type(plot_vals, (xr.DataArray, np.ndarray, DaskArray)) + + # Create empty lists for keyword arguments + if pres_range is None: + pres_range = [0, 2000] + if vert_params is None: + vert_params = [[], []] + + # Eliminate 1D level or time dimensions + plot_vals = plot_vals.squeeze() + data_is_xr = isinstance(plot_vals, xr.DataArray) + if xtick_positions is None: + xtick_positions = [] + if plot_type == "zonal_mean": + xtick_positions = np.arange(-90, 90, 30) + + if xticklabels is None: + xticklabels = [rf"{x}$\degree$" for x in xtick_positions] + + if unit == "" and data_is_xr: + try: + unit = plot_vals.units.strip() + except BaseException: + pass + + if title == "fill" and data_is_xr: + try: + title = plot_vals.name + except BaseException: + pass + # Generate grid if not passed + if grid is None: + res, gridtype = get_input_res(plot_vals) + sg_params = [1, 170, -90] + if sg_path != '': + sg_attrs = xr.open_dataset(sg_path).attrs + sg_params = [ + sg_attrs['stretch_factor'], + sg_attrs['target_longitude'], + sg_attrs['target_latitude']] + + if plot_type == 'single_level': + grid_extent = get_grid_extents(plot_vals) + [grid, _] = call_make_grid( + res, + gridtype, + in_extent=grid_extent, + sg_params=sg_params + ) + + else: # zonal mean + if np.all(pedge_ind == -1) or np.all(pedge == -1): + + # Get mid-point pressure and edge pressures for this grid + pedge, pmid, _ = get_vert_grid(plot_vals, *vert_params) + + # Get indexes of pressure subrange (full range is default) + pedge_ind = get_pressure_indices(pedge, pres_range) + + # Pad edges if subset does not include surface or TOA so data spans + # entire subrange + pedge_ind = pad_pressure_edges( + pedge_ind, plot_vals.sizes["lev"], len(pmid)) + + # pmid indexes do not include last pedge index + pmid_ind = pedge_ind[:-1] + # Convert levels to pressures in ref and dev data + plot_vals = convert_lev_to_pres(plot_vals, pmid, pedge) + # get proper levels + plot_vals = plot_vals.isel(lev=pmid_ind) + + [input_res, input_gridtype, _, _, + _, new_gridtype, regrid, _, _, _, _, + grid, regridder, _, regridder_list, _] = create_regridders( + plot_vals, + plot_vals, + weightsdir=weightsdir, + cmpres=None, + zm=True, + sg_ref_params=sg_params + ) + if gridtype == 'cs': + plot_vals = reshape_MAPL_CS(plot_vals) + nlev = len(plot_vals['lev']) + # Ref + plot_vals = regrid_comparison_data( + plot_vals, + input_res, + regrid, + regridder, + regridder_list, + grid, + input_gridtype, + new_gridtype, + nlev=nlev + ) + # average across longitude bands + # assume lon dim is index 2 (no time dim) if a numpy array is passed + lon_ind = 2 + if isinstance(plot_vals, xr.DataArray): + lon_ind = plot_vals.dims.index('lon') + # calculate zonal means + plot_vals = plot_vals.mean(axis=lon_ind) + if gridtype == "": + _, gridtype = get_input_res(plot_vals) + if extent is None or extent == (None, None, None, None): + extent = get_grid_extents(grid) + # convert to -180 to 180 grid if needed (necessary if going + # cross-dateline later) + if extent[0] > 180 or extent[1] > 180: + #extent = [((extent[0]+180)%360)-180, ((extent[1]+180)%360)-180, extent[2], extent[3]] + extent = [extent[0] - 180, extent[1] - 180, extent[2], extent[3]] + #''' + #if extent[0] < -180 and 'x' in res: + # lon_res = float(res.split('x')[1]) + # extent = [180, + #if extent[1] > 180 and 'x' in res: + # extent[1] = 180 + #''' + # Account for cross-dateline extent + if extent[0] > extent[1]: + if gridtype == "ll": + # rearrange data with dateline in the middle instead of prime meridian + # change extent / grid to where dateline is 0, prime meridian is -180 / 180 + # needed for numpy arrays if doing pcolormesh / imshow, and xarray DataArrays + # if using imshow + proj = ccrs.PlateCarree(central_longitude=180) + if ll_plot_func == "imshow" or \ + not isinstance(plot_vals, xr.DataArray): + i = 0 + while grid['lon_b'][i] < 0: + i = i+1 + plot_vals_holder = copy.deepcopy(plot_vals) + if not isinstance(plot_vals, xr.DataArray): + plot_vals_holder[:,:-i] = plot_vals[:,i:] + plot_vals_holder[:,-i:] = plot_vals[:,:i] + else: + plot_vals_holder.values[:,:-i] = plot_vals.values[:,i:] + plot_vals_holder.values[:,-i:] = plot_vals.values[:,:i] + plot_vals = plot_vals_holder + extent[0] = extent[0] % 360 - 180 + extent[1] = extent[1] % 360 - 180 + grid["lon_b"] = grid["lon_b"] % 360 - 180 + grid["lon"] = grid["lon"] % 360 - 180 + if isinstance(plot_vals, xr.DataArray): + plot_vals['lon'] = plot_vals['lon'] % 360 - 180 + # realign grid also if doing imshow or using numpy arrays + if ll_plot_func == "imshow" or \ + not isinstance(plot_vals, xr.DataArray): + temp_grid = copy.deepcopy(grid) + temp_grid['lon_b'][:-i] = grid['lon_b'][i:] + temp_grid['lon_b'][-i:] = grid['lon_b'][:i] + temp_grid['lon'][:-i] = grid['lon'][i:] + temp_grid['lon'][-i:] = grid['lon'][:i] + grid = temp_grid + if isinstance(plot_vals, xr.DataArray): + plot_vals = plot_vals.assign_coords({'lon' : grid['lon']}) + if gridtype == "cs": + proj = ccrs.PlateCarree(central_longitude=180) + extent[0] = extent[0] % 360 - 180 + extent[1] = extent[1] % 360 - 180 + grid["lon_b"] = grid["lon_b"] % 360 - 180 + grid["lon"] = grid["lon"] % 360 - 180 + + if ax is None: + if plot_type == "zonal_mean": + ax = plt.axes() + if plot_type == "single_level": + ax = plt.axes(projection=proj) + + fig = plt.gcf() + data_is_xr = isinstance(plot_vals, xr.DataArray) + # Normalize colors (put into range [0..1] for matplotlib methods) + if norm is None: + if data_is_xr: + vmin = plot_vals.data.min() if vmin is None else vmin + vmax = plot_vals.data.max() if vmax is None else vmax + elif isinstance(plot_vals, np.ndarray): + vmin = np.min(plot_vals) if vmin is None else vmin + vmax = np.max(plot_vals) if vmax is None else vmax + norm = normalize_colors( + vmin, + vmax, + is_difference=use_cmap_RdBu, + log_color_scale=log_color_scale) + + # Create plot + ax.set_title(title) + if plot_type == "zonal_mean": + # Zonal mean plot + plot = ax.pcolormesh( + grid["lat_b"], + pedge[pedge_ind], + plot_vals, + cmap=comap, + norm=norm, + **extra_plot_args) + ax.set_aspect("auto") + ax.set_ylabel("Pressure (hPa)") + if log_yaxis: + ax.set_yscale("log") + ax.yaxis.set_major_formatter( + ticker.FuncFormatter(lambda y, _: f"{y:g}") + ) + ax.invert_yaxis() + ax.set_xticks(xtick_positions) + ax.set_xticklabels(xticklabels) + + elif gridtype == "ll": + if ll_plot_func == 'imshow': + # Lat/Lon single level + [minlon, maxlon, minlat, maxlat] = extent + # expand extent to minimize imshow distortion + #[dlat,dlon] = list(map(float, res.split('x'))) + dlon = grid['lon'][2] - grid['lon'][1] + dlat = grid['lat'][2] - grid['lat'][1] + + def get_nearest_extent(val, array, direction, spacing): + # choose nearest values in grid to desired extent to minimize distortion + grid_vals = np.asarray(array) + diff = grid_vals - val + if direction == 'greater': + diff[diff < 0] = np.inf + i = diff.argmin() + if diff[i] == np.inf: + # expand extent to value beyond grid limits if extent + # is already > max grid value + return grid_vals[(np.abs(grid_vals - val)).argmin()] + return grid_vals[i] + # if direction is not "greater": + diff[diff > 0] = -np.inf + i = diff.argmax() + if diff[i] == -np.inf: + # expand extent to value beyond grid limits if + # extent is already < min grid value + # plot will be distorted if full global to avoid + # cartopy issues + return grid_vals[( + np.abs(grid_vals - val)).argmin()] - spacing + return max(grid_vals[i], -180) + closest_minlon = get_nearest_extent( + minlon, grid['lon_b'], 'less', dlon) + closest_maxlon = get_nearest_extent( + maxlon, grid['lon_b'], 'greater', dlon) + # don't adjust if extent includes poles where points are not evenly + # spaced anyway + if np.abs( + grid['lat_b'][0] - + grid['lat_b'][1]) != np.abs( + grid['lat_b'][1] - + grid['lat_b'][2]) and minlat < grid['lat_b'][1]: + closest_minlat = grid['lat_b'][0] + else: + closest_minlat = get_nearest_extent( + minlat, grid['lat_b'], 'less', dlat) + + if np.abs(grid['lat_b'][-1] - grid['lat_b'][-2]) != \ + np.abs(grid['lat_b'][-2] - grid['lat_b'][-3]) and \ + maxlat > grid['lat_b'][-2]: + closest_maxlat = grid['lat_b'][-1] + else: + closest_maxlat = get_nearest_extent( + maxlat, grid['lat_b'], 'greater', dlat) + + extent = [ + closest_minlon, + closest_maxlon, + closest_minlat, + closest_maxlat] + if isinstance(plot_vals, xr.DataArray): + # filter data by bounds of extent + plot_vals = plot_vals.where( + plot_vals.lon > closest_minlon, + drop=True).where( + plot_vals.lon < closest_maxlon, + drop=True).where( + plot_vals.lat > minlat, + drop=True).where( + plot_vals.lat < maxlat, + drop=True) + else: + # filter data by indices of grid + minlon_i = np.where(grid['lon_b']==closest_minlon)[0] + if len(minlon_i) == 0: + minlon_i = 0 + else: + minlon_i = int(minlon_i) + maxlon_i = np.where(grid['lon_b']==closest_maxlon)[0] + if len(maxlon_i) == 0: + maxlon_i = -1 + else: + maxlon_i = int(maxlon_i) + minlat_i = np.where(grid['lat_b']==closest_minlat)[0] + if len(minlat_i) == 0: + minlat_i = 0 + else: + minlat_i = int(minlat_i) + maxlat_i = np.where(grid['lat_b']==closest_maxlat)[0] + if len(maxlat_i) == 0: + maxlat_i = -1 + else: + maxlat_i = int(maxlat_i) + plot_vals = plot_vals[minlat_i:maxlat_i+1, + minlon_i:maxlon_i+1] + # Create a lon/lat plot + plot = ax.imshow( + plot_vals, + extent=extent, + transform=proj, + cmap=comap, + norm=norm, + origin='lower', + interpolation='nearest', + **extra_plot_args + ) + else: + plot = ax.pcolormesh( + grid["lon_b"], + grid["lat_b"], + plot_vals, + transform=proj, + cmap=comap, + norm=norm, + **extra_plot_args + ) + ax.set_extent(extent, crs=proj) + ax.coastlines() + ax.set_xticks(xtick_positions) + ax.set_xticklabels(xticklabels) + + else: + # Cubed-sphere single level + try: + if masked_data is None: + masked_data = np.ma.masked_where( + np.abs( + grid["lon"] - + 180) < 2, + plot_vals.data.reshape( + 6, + res, + res)) + except ValueError: + # Comparison of numpy arrays throws errors + pass + [minlon, maxlon, minlat, maxlat] = extent + # Catch issue with plots extending into both the western and eastern + # hemisphere + if np.max(grid["lon_b"] > 180): + grid["lon_b"] = (((grid["lon_b"] + 180) % 360) - 180) + + plots = [] + for j in range(6): + plot = ax.pcolormesh( + grid["lon_b"][j, :, :], + grid["lat_b"][j, :, :], + masked_data[j, :, :], + transform=proj, + cmap=comap, + norm=norm, + **extra_plot_args + ) + plots.append(plot) + ax.set_extent(extent, crs=proj) + ax.coastlines() + ax.set_xticks(xtick_positions) + ax.set_xticklabels(xticklabels) + + if add_cb: + cbar = plt.colorbar(plot, ax=ax, orientation="horizontal", pad=0.10) + cbar.mappable.set_norm(norm) + if data_is_xr: + all_zero, all_nan = all_zero_or_nan(plot_vals.values) + else: + all_zero, all_nan = all_zero_or_nan(plot_vals) + if all_zero or all_nan: + if use_cmap_RdBu: + cbar.set_ticks([0.0]) + else: + cbar.set_ticks([0.5]) + if all_nan: + cbar.set_ticklabels(["Undefined throughout domain"]) + else: + cbar.set_ticklabels(["Zero throughout domain"]) + else: + if log_color_scale: + cbar.formatter = ticker.LogFormatter(base=10) + else: + if (vmax - vmin) < 0.1 or (vmax - vmin) > 100: + cbar.locator = ticker.MaxNLocator(nbins=4) + + try: + cbar.formatter.set_useOffset(False) + except BaseException: + # not all automatically chosen colorbar formatters properly handle + # the above method + pass + cbar.update_ticks() + cbar.set_label(unit) + + if pdfname != "": + pdf = PdfPages(pdfname) + pdf.savefig(fig) + pdf.close() + + # in some cases users may wish to get a list of all associated plots + # eg. cubedsphere grids have six plots associated with them + if return_list_of_plots: + return plots if 'plots' in locals() else [plot] + return plot diff --git a/gcpy/plot/six_plot.py b/gcpy/plot/six_plot.py new file mode 100644 index 00000000..b4db49aa --- /dev/null +++ b/gcpy/plot/six_plot.py @@ -0,0 +1,938 @@ +""" +Creates a six-panel comparison plot. + +Row 1: Model output (Ref version, Dev version) +Row 2: Abs difference (dynamic range and restricted range) +Row 3: Ratio (dynamic range and restricted range) + +NOTE: For diff-of-diffs comparisons, Row 3 (Ratio) is replaced +by Fractional Difference (dynamic range and restricted range). + +Also contains several helper routines that were split off +from the gcpy/plot.py. +""" +from matplotlib import ticker +import matplotlib.pyplot as plt +import numpy as np +from dask.array import Array as DaskArray +import xarray as xr +import cartopy.crs as ccrs +from gcpy.util import get_nan_mask, verify_variable_type +from gcpy.plot.core import gcpy_style, normalize_colors +from gcpy.plot.single_panel import single_panel + +# Suppress numpy divide by zero warnings to prevent output spam +np.seterr(divide="ignore", invalid="ignore") + +# Use a style sheet to control plot attributes +plt.style.use(gcpy_style) + + +def six_plot( + subplot, + all_zero, + all_nan, + plot_val, + grid, + axes, + rowcol, + title, + comap, + unit, + extent, + masked_data, + other_all_nan, + gridtype, + vmins, + vmaxs, + use_cmap_RdBu, + match_cbar, + verbose, + log_color_scale, + pedge=np.full((1, 1), -1), + pedge_ind=np.full((1, 1), -1), + log_yaxis=False, + xtick_positions=None, + xticklabels=None, + plot_type="single_level", + ratio_log=False, + proj=ccrs.PlateCarree(), + ll_plot_func='imshow', + **extra_plot_args +): + """ + Plotting function to be called from compare_single_level or + compare_zonal_mean. Primarily exists to eliminate code redundancy + in the prior listed functions and has not been tested separately. + + Args: + ----- + subplot: str + Type of plot to create (ref, dev, absolute difference or + fractional difference). + all_zero: bool + Set this flag to True if the data to be plotted consist + only of zeros. + all_nan: bool + Set this flag to True if the data to be plotted consist + only of NaNs. + plot_val: xarray.DataArray, numpy.ndarray, or dask.array.Array + Single data variable to plot. + grid: dict + Dictionary mapping plot_val to plottable coordinates + axes: matplotlib.axes + Axes object to plot information. Will create a new axes + if none is passed. + rowcol: tuple + Subplot position in overall Figure. + title: str + Title to print on axes + comap: matplotlib Colormap + Colormap for plotting data values. + unit: str + Units of plotted data. + extent: tuple (minlon, maxlon, minlat, maxlat) + Describes minimum and maximum latitude and longitude of + input data. + masked_data: numpy array + Masked area for cubed-sphere plotting. + other_all_nan: bool + Set this flag to True if plotting ref/dev and the other + of ref/dev is all nan. + gridtype: str + "ll" for lat/lon or "cs" for cubed-sphere. + vmins: list of float + list of length 3 of minimum ref value, dev value, + and absdiff value. + vmaxs: list of float + list of length 3 of maximum ref value, dev value, + and absdiff value. + use_cmap_RdBu: bool + Set this flag to True to use a blue-white-red colormap + match_cbar: bool + Set this flag to True if you are plotting with the + same colorbar for ref and dev. + verbose: bool + Set this flag to True to enable informative printout. + log_color_scale: bool + Set this flag to True to enable log-scale colormapping. + + Keyword Args (optional): + ------------------------ + pedge: numpy array + Edge pressures of grid cells in data to be plotted. + Default value: np.full((1,1), -1) + pedge_ind: numpy array + Indices where edge pressure values are within a given + pressure range. + Default value: np.full((1,1), -1) + log_yaxis: bool + Set this flag to True to enable log scaling of pressure + in zonal mean plots. + Default value: False + xtick_positions: list of float + Locations of lat/lon or lon ticks on plot. + Default value: None + xticklabels: list of str + Labels for lat/lon ticks. + Default value: None + plot_type: str + Type of plot, either "single_level" or "zonal"mean". + Default value: "single_level" + ratio_log: bool + Set this flag to True to enable log scaling for ratio plots + Default value: False + proj: cartopy projection + Projection for plotting data. + Default value: ccrs.PlateCarree() + ll_plot_func: str + Function to use for lat/lon single level plotting with + possible values 'imshow' and 'pcolormesh'. imshow is much + faster but is slightly displaced when plotting from dateline + to dateline and/or pole to pole. + Default value: 'imshow' + extra_plot_args: various + Any extra keyword arguments are passed through the + plotting functions to be used in calls to pcolormesh() (CS) + or imshow() (Lat/Lon). + """ + verify_variable_type(plot_val, (np.ndarray, xr.DataArray, DaskArray)) + + # Compute the min & max values + vmin, vmax = compute_vmin_vmax_for_plot( + plot_val, + vmins, + vmaxs, + subplot, + rowcol, + all_zero=all_zero, + all_nan=all_nan, + other_all_nan=other_all_nan, + match_cbar=match_cbar, + use_cmap_RdBu=use_cmap_RdBu, + verbose=verbose, + ) + + # Compute the norm object (i.e. put the colorscale on a + # range of 0..1, which are matplotlib color coordinates) + # (also remove NaNs in data for ratio plots) + plot_val, norm = compute_norm_for_plot( + plot_val, + vmin, + vmax, + subplot, + use_cmap_RdBu=use_cmap_RdBu, + log_color_scale=log_color_scale, + ratio_log=ratio_log + ) + + # Create one of the 6 subplots + plot = single_panel( + plot_val, + axes, + plot_type, + grid, + gridtype, + title, + comap, + norm, + unit, + extent, + masked_data, + use_cmap_RdBu, + log_color_scale, + add_cb=False, + pedge=pedge, + pedge_ind=pedge_ind, + log_yaxis=log_yaxis, + xtick_positions=xtick_positions, + xticklabels=xticklabels, + proj=proj, + ll_plot_func=ll_plot_func, + **extra_plot_args) + + # Control how close to the plot the colorbar will go + pad = 0.15 + if "single_level" in plot_type: + pad = 0.025 + + # Define the colorbar for the plot + cbar = plt.colorbar( + plot, + ax=axes, + orientation="horizontal", + norm=norm, + pad=pad + ) + cbar.mappable.set_norm(norm) + cbar = colorbar_ticks_and_format( + plot_val, + cbar, + vmin, + vmax, + subplot, + all_zero=all_zero, + all_nan=all_nan, + use_cmap_RdBu=use_cmap_RdBu, + log_color_scale=log_color_scale, + ) + cbar.set_label(unit) + + +def verbose_print(verbose, rowcol, vmin, vmax): + """ + Routine to print the vmin & vmax values for each subplot. + + Args: + ----- + verbose : bool + Toggles informative prrintout on (True) or off (False). + rowcol : int + Subplot index. + vmin, vmax : float + Minimum and maximum of data range. + """ + if verbose: + print(f"Subplot ({rowcol}) vmin, vmax: {vmin}, {vmax}") + + +def compute_vmin_vmax_for_plot( + plot_val, + vmins, + vmaxs, + subplot, + rowcol, + all_zero=False, + all_nan=False, + other_all_nan=False, + match_cbar=False, + use_cmap_RdBu=False, + verbose=False +): + """ + Computes the min & max values for a subplot of a six-panel plot. + + Args: + ----- + plot_val: xarray.DataArray, numpy.ndarray, or dask.array.Array + Single data variable to plot. + subplot: str + Subplot name (see routine six_panel_subplot_names) + vmins: list of float + [minimum ref value, minimum dev value, absdiff value] + vmaxs: list of float + [maximum ref value, maximum dev value, absdiff value] + + Keyword Arguments (optional): + ----------------------------- + all_zero: bool + Indicates if the data consists of all zeros (True) + or not (False) + all_nan: bool + Indicates if the data consists of all NaN values (True) + or not (False) + other_all_nan: bool + Indicates if plotting ref/dev and the other of ref/dev contains + all NaN values (True) or not (False). + match_cbar: bool + Toggles using the same colorbar for ref and dev on (True) + or off (False). + use_cmap_RdBu: bool + Toggles a blue-white-red colormap on (True) or off (False). + verbose: bool + Toggles informative printout on (True) or off (False). + + Returns: + -------- + vmin, vmax : float + Min and max values for this subplot of a 6-panel plot + """ + # ================================================================== + # Get min and max values for Ref or Dev subplots + # ================================================================== + if subplot in ("ref", "dev"): + return vmin_vmax_for_ref_dev_plots( + subplot, + rowcol, + vmins, + vmaxs, + all_zero=all_zero, + all_nan=all_nan, + other_all_nan=other_all_nan, + match_cbar=match_cbar, + use_cmap_RdBu=use_cmap_RdBu, + verbose=verbose + ) + + # ================================================================== + # Get min and max values for Absdiff and Ratio subplots + # ================================================================== + + # First check if all data is zero or NaN + if all_zero: + verbose_print(verbose, rowcol, 0, 0) + return 0, 0 + if all_nan: + verbose_print(verbose, rowcol, np.nan, np.nan) + return np.nan, np.nan + + # Absdiff + if subplot in ("dyn_absdiff", "res_absdiff"): + return vmin_vmax_for_absdiff_plots( + plot_val, + subplot, + rowcol, + verbose=verbose + ) + + # Ratio + if subplot in ("dyn_ratio", "res_ratio"): + return vmin_vmax_for_ratio_plots( + plot_val, + subplot, + rowcol, + verbose=verbose + ) + + # Make sure the function returns a value. This will avoid + # an "inconsistent-return-statements" warning from Pylint. + return None + + +def vmin_vmax_for_ref_dev_plots( + subplot, + rowcol, + vmins, + vmaxs, + all_zero=False, + all_nan=False, + other_all_nan=False, + match_cbar=False, + use_cmap_RdBu=False, + verbose=False, +): + """ + Returns the vmin and vmax values for the "Ref" or "Dev" + subplots of a six-panel plot. + + Args: + ----- + subplot: str + Subplot name (see routine six_panel_subplot_names). + rowcol : int + Subplot index. + vmins: list of float + [minimum ref value, minimum dev value, absdiff value] + vmaxs: list of float + [maximum ref value, maximum dev value, absdiff value] + + Keyword Arguments (optional): + ----------------------------- + all_zero: bool + Indicates if the data consists of all zeros (True) + or not (False). + all_nan: bool + Indicates if the data consists of all NaN values (True) + or not (False). + other_all_nan: bool + Indicates if plotting ref/dev and the other of ref/dev contains + all NaN values (True) or not (False). + match_cbar: bool + Toggles using the same colorbar for ref and dev on (True) + or off (False). + use_cmap_RdBu: bool + Toggles a blue-white-red colormap on (True) or off (False). + verbose: bool + Toggles informative printout on (True) or off (False). + + Returns: + -------- + vmin, vmax : float + Min and max values to plot. + """ + #--------------------------------------------------------------- + # Data is all zero or Nan + #--------------------------------------------------------------- + if all_zero or all_nan: + [vmin, vmax] = [vmins[1], vmaxs[1]] + if subplot == "ref": + [vmin, vmax] = [vmins[0], vmaxs[0]] + verbose_print(verbose, rowcol, vmin, vmax) + return vmin, vmax + + #--------------------------------------------------------------- + # We are using a difference colormap (diff of diffs) + #--------------------------------------------------------------- + if use_cmap_RdBu: + + # Ref supblot, diff-of-diffs + if subplot in "ref": + vmax = max([np.abs(vmins[0]), np.abs(vmaxs[0])]) + if match_cbar and not other_all_nan: + vmax = max([np.abs(vmins[2]), np.abs(vmaxs[2])]) + verbose_print(verbose, rowcol, -vmax, vmax) + return -vmax, vmax + + # Dev subplot, diff-of-diffs + vmax = max([np.abs(vmins[1]), np.abs(vmaxs[1])]) + if match_cbar and not other_all_nan: + vmax = max([np.abs(vmins[2]), np.abs(vmaxs[2])]) + verbose_print(verbose, rowcol, -vmax, vmax) + return -vmax, vmax + + #--------------------------------------------------------------- + # We are using a gradient colormap + #--------------------------------------------------------------- + + # Ref subplot + if subplot in "ref": + [vmin, vmax] = [vmins[0], vmaxs[0]] + if match_cbar and not other_all_nan: + [vmin, vmax] = [vmins[2], vmaxs[2]] + verbose_print(verbose, rowcol, vmin, vmax) + return vmin, vmax + + # Dev subplot + [vmin, vmax] = [vmins[1], vmaxs[1]] + if match_cbar and not other_all_nan: + [vmin, vmax] = [vmins[2], vmaxs[2]] + verbose_print(verbose, rowcol, vmin, vmax) + return vmin, vmax + + +def vmin_vmax_for_absdiff_plots( + plot_val, + subplot, + rowcol, + verbose=False, +): + """ + Returns the vmin and vmax values for the "Absolute Difference + (dynamic range)" or "Absolute Difference (restricted range)" + subplots of a of a six-panel plot. + + Args: + ----- + plot_val: xarray.DataArray, numpy.ndarray, or dask.array.Array + Single data variable of GEOS-Chem output to plot. + subplot: str + Subplot name (see routine six_panel_subplot_names). + rowcol : int + Subplot index. + + Keyword Arguments (optional): + ----------------------------- + verbose: bool + Toggles informative printout on (True) or off (False). + + Returns: + -------- + vmin, vmax : float + Min and max values to plot. + """ + # Absdiff (dynamic range) subplot: min & max (excluding NaNs) + if subplot in "dyn_absdiff": + vmax = max( + [np.abs(np.nanmin(plot_val)), np.abs(np.nanmax(plot_val))] + ) + verbose_print(verbose, rowcol, -vmax, vmax) + return -vmax, vmax + + # Absdiff (restricted range) subplot + if subplot in "res_absdiff": + [pct5, pct95] = [ + np.percentile(plot_val, 5), + np.percentile(plot_val, 95), + ] + vmax = np.max([np.abs(pct5), np.abs(pct95)]) + verbose_print(verbose, rowcol, -vmax, vmax) + return -vmax, vmax + + # Make sure the function returns a value. This will avoid + # an "inconsistent-return-statements" warning from Pylint. + return None + + +def vmin_vmax_for_ratio_plots( + plot_val, + subplot, + rowcol, + verbose=False, +): + """ + Returns the vmin and vmax values for the "Ratio (dynamic range)" + or "Ratio (restricted range) subplot of a six-panel plot. + + Args: + ----- + plot_val: xarray.DataArray, numpy.ndarray, or dask.array.Array + Single data variable to plot. + subplot: str + Subplot name (see routine six_panel_subplot_names). + rowcol : int + Subplot index. + + Keyword Arguments (optional): + ----------------------------- + verbose: bool + Toggles informative printout on (True) or off (False). + + Returns: + -------- + vmin, vmax : float + Min and max values to plot. + """ + # Ratio (dynamic range) subplot) + if subplot in "dyn_ratio": + vmax = np.max( + [np.abs(np.nanmin(plot_val)), np.abs(np.nanmax(plot_val))] + ) + vmin = 1.0 / vmax + if vmin > vmax: + vmin, vmax = vmax, vmin + verbose_print(verbose, rowcol, vmin, vmax) + return vmin, vmax + + # Ratio (restricted range) subplot + verbose_print(verbose, rowcol, 0.5, 2.0) + return 0.5, 2.0 + + +def compute_norm_for_plot( + plot_val, + vmin, + vmax, + subplot, + use_cmap_RdBu=False, + log_color_scale=False, + ratio_log=False, +): + """ + Normalize colors (put into range [0..1] for matplotlib methods). + + Args: + ----- + plot_val: xarray.DataArray, numpy.ndarray, or dask.array.Array + Single data variable GEOS-Chem output to plot + vmin, vmax : float + Min and max value for this subplot of a 6-panel plot. + subplot: str + Subplot name (see routine six_panel_subplot_names) + + Keyword Arguments (optional): + ----------------------------- + use_cmap_RdBu: bool + Toggles a blue-white-red colormap on (True) or off (False). + log_color_scale : bool + Toggles a logarithmic color scale on (True) or off (False). + ratio_log : bool + Toggles log scaling for ratio plots on (True) or not (False). + verbose: bool + Toggles informative printout on (True) or off (False). + + Returns: + -------- + vmin, vmax : float + Min and max values for this subplot of a 6-panel plot + """ + # ================================================================== + # Ref and Dev subplots + # ================================================================== + if subplot in ("ref", "dev"): + return plot_val, normalize_colors( + vmin, + vmax, + is_difference=use_cmap_RdBu, + log_color_scale=log_color_scale, + ratio_log=ratio_log + ) + + # ================================================================== + # Absdiff (dynamic & restricted range) subplots + # ================================================================== + if subplot in ("dyn_absdiff", "res_absdiff"): + return plot_val, normalize_colors( + vmin, + vmax, + is_difference=True + ) + + # ================================================================== + # Ratio (dynamic & restricted range) subplots + # Remove NaNs for compatibility with color normalization + # ================================================================== + plot_val = get_nan_mask(plot_val) + return plot_val, normalize_colors( + vmin, + vmax, + is_difference=True, + log_color_scale=True, + ratio_log=ratio_log + ) + + +def colorbar_ticks_and_format( + plot_val, + cbar, + vmin, + vmax, + subplot, + all_zero=False, + all_nan=False, + use_cmap_RdBu=False, + log_color_scale=False, +): + """ + Adjusts colorbar tick placement and label formatting style + for a subplot of a 6-panel plot. Called from routine six_plot. + + Args: + ----- + plot_val: xarray.DataArray, numpy.ndarray, or dask.array.Array + Single data variable to plot. + cbar : matplotlib.colorbar.Colorbar + The input colorbar. + vmin, vmax : float + Min and max of the data range to plot. + subplot: str + Subplot name (see routine six_panel_subplot_names). + + Keyword Arguments (optional): + ----------------------------- + all_zero: bool + Indicates if the data consists of all zeros (True) + or not (False). + all_nan: bool + Indicates if the data consists of all NaN values (True) + or not (False). + use_cmap_RdBu: bool + Toggles a blue-white-red colormap on (True) or off (False). + log_color_scale : bool + Toggles a logarithmic color scale on (True) or off (False). + + Returns: + -------- + cbar : matplotlib.colorbar.Colorbar + The modified colorbar. + """ + # ================================================================== + # Data is all zero or NaN: + # Place a single tick with an appropriate label in the middle. + # For RdBu colortables this goes at 0.0; otherwise at 0.5. + # ================================================================== + if all_zero or all_nan: + return colorbar_for_all_zero_or_nan( + cbar, + subplot, + all_nan=all_nan, + use_cmap_RdBu=use_cmap_RdBu, + ) + + # ================================================================== + # Data is plottable: Pick the locations and format of tick + # labels depending the subplot and the colormap that is used. + # ================================================================== + + #------------------------------------------------------------------- + # Ref and Dev subplots, log scale + #------------------------------------------------------------------- + if subplot in ("ref", "dev") and log_color_scale: + cbar.formatter = ticker.LogFormatter(base=10) + cbar.minorticks_off() + return cbar + + #------------------------------------------------------------------- + # Ratio (dynamic and restricted range) subplots): + #------------------------------------------------------------------- + if subplot in ("dyn_ratio", "res_ratio"): + + def ref_equals_dev(array): + """ + Internal routine to check that returns true if all elements + of Ref/Dev are equal to 1 or NaN (aka missing value). + This is needed to be able to add a ticklabel stating + that Ref & Dev are equal throughout the domain. + """ + uniq = np.unique(array) + if len(uniq) == 2: + return np.any(np.isin(uniq, [1.0])) and np.any(np.isnan(uniq)) + return np.all(np.isin(uniq, [1.0])) + + # When Ref == Dev + if ref_equals_dev(plot_val): + return colorbar_for_ref_equals_dev(cbar) + + # Dynamic range ratio subplot + if subplot in "dyn_ratio": + return colorbar_for_dyn_ratio_plots(cbar, vmin, vmax) + + # Restricted range ratio subplot + return colorbar_for_res_ratio_plots(cbar) + + #------------------------------------------------------------------- + # For the following subplots: + # (1) Ref & Dev, with non-log color scales + # (2) Absdiff (dynamic range) + # (3) Absdiff (restricted range) + #------------------------------------------------------------------- + + # For data ranges between 0.1 and 100: + if 0.1 < (vmax - vmin) < 100.0: + return colorbar_for_small_data_range( + cbar, + vmin, + vmax, + diff_cmap=(use_cmap_RdBu or "absdiff" in subplot) + ) + + # For larger data ranges, automatically find good tick locations + # (but not too many that the labels smush together) + cbar.locator = ticker.MaxNLocator(nbins=4) + cbar.minorticks_off() + return cbar + + +def colorbar_for_all_zero_or_nan( + cbar, + subplot, + all_nan=False, + use_cmap_RdBu=False, +): + """ + Formats a colorbar object for the case when Ref or Dev + contains either all zeroes or all NaNs. + + Args: + ----- + cbar : matplotlib.colorbar.Colorbar + The input colorbar. + subplot : str + Name of this subplot of a 6-panel plot. + + Keyword Args (optional): + ------------------------ + all_nan : bool + Indicates that the data array contains all NaN values (True) + or not (False). + use_cmap_RdBu : bool + Indicates that we are using a difference colortable (True) + or not (False). + + Returns: + -------- + cbar : matplotlib.colorbar.Colorbar + The modified colorbar + """ + pos = [0.0] + if subplot in ("ref", "dev"): + if not use_cmap_RdBu: + pos = [0.5] + labels = ["Zero throughout domain"] + if all_nan: + labels = ["Undefined throughout domain"] + cbar.set_ticks(pos, labels=labels) + cbar.minorticks_off() + return cbar + + +def colorbar_for_ref_equals_dev(cbar): + """ + Formats a colorbar object for the case when Ref and Dev + are equal throughout the domain. + + Args: + ----- + cbar : matplotlib.colorbar.Colorbar + The input colorbar. + + Returns: + -------- + cbar : matplotlib.colorbar.Colorbar + The modified colorbar. + """ + pos = [1.0] + cbar.set_ticks( + pos, + labels=["Ref and Dev equal throughout domain"] + ) + cbar.minorticks_off() + return cbar + + +def colorbar_for_dyn_ratio_plots( + cbar, + vmin, + vmax +): + """ + Formats a colorbar object for the "dynamic range ratio" + subplot of a six-panel plot. + + Args: + ----- + cbar : matplotlib.colorbar.Colorbar + The input colorbar. + vmin, vmax : float + Min and max of the data range. + + Returns: + -------- + cbar : matplotlib.colorbar.Colorbar + The modified colorbar. + """ + # If the ratio is in the range 0.999 and 1.001, then + # place tickmarks at [vmin, 1, vmax]. This should help + # to avoid the tick labels from running together. + if vmin > 0.999 and vmax < 1.001: + pos = [vmin, 1.0, vmax] + cbar.set_ticks(pos) + cbar.formatter = ticker.ScalarFormatter() + cbar.formatter.set_useOffset(False) + cbar.minorticks_off() + return cbar + + # If the ratio is in the range 0.1 .. 10.0, then place + # tickmarks [vmin, avg(vmin,1), 1, avg(vmax,1), vmax]. + # This should be good enough for most cases. Perhaps + # think about implementing a better method later on. + if vmin > 0.1 and vmax < 10.0: + pos = [vmin, (vmin+1.0)/2.0, 1.0, (vmax+1.0)/2.0, vmax] + cbar.set_ticks(pos) + cbar.formatter = ticker.ScalarFormatter() + cbar.formatter.set_useOffset(False) + cbar.minorticks_off() + return cbar + + # Use LogLocator and LogFormatter for larger data ranges + cbar.locator = ticker.LogLocator(base=10, subs='all') + cbar.formatter = ticker.LogFormatter(base=10) + cbar.minorticks_off() + return cbar + + +def colorbar_for_res_ratio_plots(cbar): + """ + Formats a colorbar object for the "restricted range ratio" + subplot of a six-panel plot. + + Args: + ----- + cbar : matplotlib.colorbar.Colorbar + The input colorbar. + + Returns: + -------- + cbar : matplotlib.colorbar.Colorbar + The modified colorbar. + """ + # Use fixed ticks and ScalarFormatter + pos = [0.5, 0.75, 1.0, 1.5, 2.0] + cbar.set_ticks(pos) + cbar.formatter = ticker.ScalarFormatter() + cbar.minorticks_off() + return cbar + + +def colorbar_for_small_data_range( + cbar, + vmin, + vmax, + diff_cmap=False, +): + """ + Formats a colorbar object for data that falls within the range + of 0.1 .. 100. + + Args: + ----- + cbar : matplotlib.colorbar.Colorbar + The input colorbar. + vmin, vmax : float + Min and max of the data range. + diff_cmap : bool + Indicates that we are using a diverging colortable (True) + or not (False). + + Returns: + -------- + cbar : matplotlib.colorbar.Colorbar + The modified colorbar. + """ + # If using a difference colormap (e.g. for absdiff), + # then place ticks symmetrically around zero. + if diff_cmap: + pos = [vmin, vmin/2.0, 0.0, vmax/2.0, vmax] + cbar.set_ticks(pos) + cbar.formatter = ticker.ScalarFormatter() + cbar.formatter.set_useOffset(False) + cbar.minorticks_off() + return cbar + + # Otherwise place ticks symmetrically along the data range + vrange = vmax - vmin + pos = [vmin, vmin+vrange*0.25, vmin+vrange*0.5, vmin+vrange*0.75, vmax] + cbar.set_ticks(pos) + cbar.formatter = ticker.ScalarFormatter() + cbar.formatter.set_useOffset(False) + cbar.minorticks_off() + return cbar diff --git a/gcpy/regrid.py b/gcpy/regrid.py index 09fbe4db..3e9e4fc3 100644 --- a/gcpy/regrid.py +++ b/gcpy/regrid.py @@ -1,20 +1,25 @@ -''' Functions for creating xesmf regridder objects ''' - +""" +Module containing functions for creating xESMF regridder objects. +""" import os -import xesmf as xe -from .grid import make_grid_LL, make_grid_CS, make_grid_SG, get_input_res, call_make_grid, \ - get_grid_extents, get_vert_grid +import warnings import hashlib +import xesmf as xe import numpy as np import xarray as xr import pandas as pd import scipy.sparse -import warnings +from gcpy.grid import make_grid_LL, make_grid_CS, make_grid_SG, \ + get_input_res, call_make_grid, get_grid_extents, get_vert_grid def make_regridder_L2L( - llres_in, llres_out, weightsdir='.', reuse_weights=False, + llres_in, + llres_out, + weightsdir='.', + reuse_weights=False, in_extent=[-180, 180, -90, 90], - out_extent=[-180, 180, -90, 90]): + out_extent=[-180, 180, -90, 90] +): """ Create an xESMF regridder between two lat/lon grids @@ -64,11 +69,11 @@ def make_regridder_L2L( weightsfile = os.path.join( weightsdir, 'conservative_{}_{}_{}_{}.nc'.format( llres_in, llres_out, in_extent_str, out_extent_str)) - + if not os.path.isfile(weightsfile) and reuse_weights: #prevent error with more recent versions of xesmf reuse_weights=False - + try: regridder = xe.Regridder( llgrid_in, @@ -86,8 +91,13 @@ def make_regridder_L2L( return regridder -def make_regridder_C2L(csres_in, llres_out, weightsdir='.', - reuse_weights=True, sg_params=[1, 170, -90]): +def make_regridder_C2L( + csres_in, + llres_out, + weightsdir='.', + reuse_weights=True, + sg_params=[1, 170, -90] +): """ Create an xESMF regridder from a cubed-sphere to lat/lon grid @@ -132,7 +142,7 @@ def make_regridder_C2L(csres_in, llres_out, weightsdir='.', else: weights_fname = f'conservative_sg{sg_hash(csres_in, sf_in, tlat_in, tlon_in)}_ll{llres_out}_F{i}.nc' weightsfile = os.path.join(weightsdir, weights_fname) - + if not os.path.isfile(weightsfile) and reuse_weights: #prevent error with more recent versions of xesmf reuse_weights=False @@ -239,8 +249,13 @@ def make_regridder_S2S( return regridder_list -def make_regridder_L2S(llres_in, csres_out, weightsdir='.', - reuse_weights=True, sg_params=[1, 170, -90]): +def make_regridder_L2S( + llres_in, + csres_out, + weightsdir='.', + reuse_weights=True, + sg_params=[1, 170, -90] +): """ Create an xESMF regridder from a lat/lon to a cubed-sphere grid @@ -309,9 +324,15 @@ def make_regridder_L2S(llres_in, csres_out, weightsdir='.', def create_regridders( - refds, devds, weightsdir='.', reuse_weights=True, cmpres=None, - zm=False, sg_ref_params=[1, 170, -90], - sg_dev_params=[1, 170, -90]): + refds, + devds, + weightsdir='.', + reuse_weights=True, + cmpres=None, + zm=False, + sg_ref_params=[1, 170, -90], + sg_dev_params=[1, 170, -90] +): """ Internal function used for creating regridders between two datasets. Follows decision logic needed for plotting functions. @@ -328,19 +349,22 @@ def create_regridders( Directory in which to create xESMF regridder NetCDF files Default value: '.' reuse_weights: bool - Set this flag to True to reuse existing xESMF regridder NetCDF files - Default value: False + Set this flag to True to reuse existing xESMF regridder + NetCDF files. Default value: False cmpres: int or str - Specific target resolution for comparison grid used in difference and ratio plots - Default value: None (will follow logic chain below) + Specific target resolution for comparison grid used in + difference and ratio plots. Default value: None (will + follow logic chain below) zm: bool - Set this flag to True if regridders will be used in zonal mean plotting - Default value: False - sg_ref_params: list[float, float, float] (stretch_factor, target_longitude, target_latitude) + Set this flag to True if regridders will be used in zonal mean + plotting. Default value: False + sg_ref_params: list[float, float, float] + (stretch_factor, target_longitude, target_latitude) Ref grid stretched-grid parameters in the format [stretch_factor, target_longitude, target_latitude]. Default value: [1, 170, -90] (no stretching) - sg_dev_params: list[float, float, float] (stretch_factor, target_longitude, target_latitude) + sg_dev_params: list[float, float, float] + (stretch_factor, target_longitude, target_latitude) Dev grid stretched-grid parameters in the format [stretch_factor, target_longitude, target_latitude]. Default value: [1, 170, -90] (no stretching) @@ -359,8 +383,9 @@ def create_regridders( Regridder object between refgrid or devgrid and cmpgrid (will be None if input grid is not lat/lon) refregridder_list, devregridder_list: list[6 xESMF regridders] - List of regridder objects for each face between refgrid or devgrid and cmpgrid - (will be None if input grid is not cubed-sphere) + List of regridder objects for each face between refgrid + or devgrid and cmpgrid (will be None if input grid is + not cubed-sphere) """ # Take two lat/lon or cubed-sphere xarray datasets and regrid them if @@ -682,13 +707,18 @@ def regrid_comparison_data( new_data = reformat_dims( new_data, format=data_format, towards_common=False) return new_data - else: - return data + return data -def reformat_dims(ds, format, towards_common): + +def reformat_dims( + ds, + format, + towards_common +): """ - Reformat dimensions of a cubed-sphere / stretched-grid grid between different GCHP formats + Reformat dimensions of a cubed-sphere / stretched-grid grid + between different GCHP formats Args: ds: xarray Dataset @@ -713,7 +743,7 @@ def unravel_checkpoint_lat(ds_in): np.linspace(1, 6, 6), np.linspace(1, cs_res, cs_res) ]) - ds_in = ds_in.assign_coords({'lat': mi}) + ds_in = ds_in.assign_coords({"lat": mi}) ds_in = ds_in.unstack('lat') return ds_in @@ -724,7 +754,7 @@ def ravel_checkpoint_lat(ds_out): cs_res = ds_out['lon'].size ds_out = ds_out.stack(lat=['lat_level_0', 'lat_level_1']) ds_out = ds_out.assign_coords({ - 'lat': np.linspace(1, 6 * cs_res, 6 * cs_res) + 'lat': np.linspace(1, 6 * cs_res, 6 * cs_res), }) return ds_out @@ -749,9 +779,11 @@ def ravel_checkpoint_lat(ds_out): 'Ydim': 'Y', 'time': 'T', }, - 'transpose': ('time', 'lev', 'nf', 'Ydim', 'Xdim') + 'transpose': ('time', 'lev', 'nf', 'Xdim', 'Ydim') } } + + # %%%% Renaming toward the common format %%%% if towards_common: # Unravel dimensions for unravel_callback in dim_formats[format].get('unravel', []): @@ -759,31 +791,32 @@ def ravel_checkpoint_lat(ds_out): # Rename dimensions ds = ds.rename(dim_formats[format].get('rename', {})) - - return ds - else: - # Reverse rename - ds = ds.rename( - {v: k for k, v in dim_formats[format].get('rename', {}).items()}) - - # Ravel dimensions - for ravel_callback in dim_formats[format].get('ravel', []): - ds = ravel_callback(ds) - - # Transpose - if len(ds.dims) == 5 or (len(ds.dims) == 4 and 'lev' in list( - ds.dims) and 'time' in list(ds.dims)): - # full dim dataset - ds = ds.transpose(*dim_formats[format].get('transpose', [])) - elif len(ds.dims) == 4: - # single time - ds = ds.transpose(*dim_formats[format].get('transpose', [])[1:]) - elif len(ds.dims) == 3: - # single level / time - ds = ds.transpose(*dim_formats[format].get('transpose', [])[2:]) return ds + # %%%% Renaming from the common format %%%% + # Reverse rename + ds = ds.rename( + {v: k for k, v in dim_formats[format].get('rename', {}).items()}) + + # Ravel dimensions + for ravel_callback in dim_formats[format].get('ravel', []): + ds = ravel_callback(ds) + + # Transpose + if len(ds.dims) == 5 or (len(ds.dims) == 4 and 'lev' in list( + ds.dims) and 'time' in list(ds.dims)): + # full dim dataset + ds = ds.transpose(*dim_formats[format].get('transpose', [])) + elif len(ds.dims) == 4: + # single time + ds = ds.transpose(*dim_formats[format].get('transpose', [])[1:]) + elif len(ds.dims) == 3: + # single level / time + ds = ds.transpose(*dim_formats[format].get('transpose', [])[2:]) + return ds + + def sg_hash( cs_res, stretch_factor: float, @@ -797,13 +830,19 @@ def sg_hash( cs_res=cs_res).encode()).hexdigest()[ :7] -def regrid_vertical_datasets(ref, dev, target_grid_choice='ref', ref_vert_params=[[],[]], - dev_vert_params=[[],[]], target_vert_params=[[],[]]): +def regrid_vertical_datasets( + ref, + dev, + target_grid_choice='ref', + ref_vert_params=[[],[]], + dev_vert_params=[[],[]], + target_vert_params=[[],[]] +): """ - Perform complete vertical regridding of GEOS-Chem datasets to - the vertical grid of one of the datasets or an entirely different + Perform complete vertical regridding of GEOS-Chem datasets to + the vertical grid of one of the datasets or an entirely different vertical grid. - + Args: ref: xarray.Dataset First dataset @@ -814,15 +853,15 @@ def regrid_vertical_datasets(ref, dev, target_grid_choice='ref', ref_vert_params unless target_vert_params is provided Default value: 'ref' ref_vert_params (optional): list(list, list) of list-like types - Hybrid grid parameter A in hPa and B (unitless) in [AP, BP] format. + Hybrid grid parameter A in hPa and B (unitless) in [AP, BP] format. Needed if ref grid is not 47 or 72 levels Default value: [[], []] dev_vert_params (optional): list(list, list) of list-like types - Hybrid grid parameter A in hPa and B (unitless) in [AP, BP] format. + Hybrid grid parameter A in hPa and B (unitless) in [AP, BP] format. Needed if dev grid is not 47 or 72 levels Default value: [[], []] target_vert_params (optional): list(list, list) of list-like types - Hybrid grid parameter A in hPa and B (unitless) in [AP, BP] format. + Hybrid grid parameter A in hPa and B (unitless) in [AP, BP] format. Will override target_grid_choice as target grid Default value: [[], []] Returns: @@ -835,30 +874,40 @@ def regrid_vertical_datasets(ref, dev, target_grid_choice='ref', ref_vert_params # Get mid-point pressure and edge pressures for this grid ref_pedge, ref_pmid, _ = get_vert_grid(ref, *ref_vert_params) dev_pedge, dev_pmid, _ = get_vert_grid(dev, *dev_vert_params) - + new_ref, new_dev = ref, dev - + if len(ref_pedge) != len(dev_pedge) or target_vert_params != [[],[]]: if target_vert_params != [[],[]]: #use a specific target grid for regridding if passed target_grid = vert_grid(*target_vert_params) - target_pedge, target_pmid = target_grid.p_edge(), target_grid.p_mid() + target_pedge = target_grid.p_edge() + target_pmid = target_grid.p_mid() elif target_grid_choice == 'ref': - target_pedge, target_pmid = ref_pedge, ref_pmid + target_pedge = ref_pedge + target_pmid = ref_pmid else: - target_pedge, target_pmid = dev_pedge, dev_pmid - - def regrid_one_vertical_dataset(ds, ds_pedge, target_pedge, target_pmid): + target_pedge = dev_pedge + target_pmid = dev_pmid + + def regrid_one_vertical_dataset( + ds, + ds_pedge, + target_pedge, + target_pmid + ): new_ds = ds if len(ds_pedge) != len(target_pedge): #regrid all 3D (plus possible time dimension) variables xmat_ds = gen_xmat(ds_pedge, target_pedge) - regrid_variables = [v for v in ds.data_vars if (("lat" in ds[v].dims or "Xdim" in ds[v].dims) - and ("lon" in ds[v].dims or "Ydim" in ds[v].dims) - and ("lev" in ds[v].dims))] + regrid_variables = [v for v in ds.data_vars if ( + ("lat" in ds[v].dims or "Xdim" in ds[v].dims) and \ + ("lon" in ds[v].dims or "Ydim" in ds[v].dims) and \ + ("lev" in ds[v].dims) + )] new_ds = xr.Dataset() #currently drop data vars that have lev but don't also have x and y coordinates - for v in (set(ds.data_vars)-set(regrid_variables)): + for v in (set(ds.data_vars)-set(regrid_variables)): if 'lev' not in ds[v].dims: new_ds[v] = ds[v] new_ds.attrs = ds.attrs @@ -866,15 +915,29 @@ def regrid_one_vertical_dataset(ds, ds_pedge, target_pedge, target_pmid): if "time" in ds[v].dims: new_ds_temp = [] for time in range(len(ds[v].time)): - new_ds_v = regrid_vertical(ds[v].isel(time=time), xmat_ds, target_pmid) + new_ds_v = regrid_vertical( + ds[v].isel(time=time), + xmat_ds, + target_pmid + ) new_ds_temp.append(new_ds_v.expand_dims("time")) new_ds[v] = xr.concat(new_ds_temp, "time") else: new_ds[v] = regrid_vertical(ds[v], xmat, target_pmid) return new_ds - - new_ref = regrid_one_vertical_dataset(ref, ref_pedge, target_pedge, target_pmid) - new_dev = regrid_one_vertical_dataset(dev, dev_pedge, target_pedge, target_pmid) + + new_ref = regrid_one_vertical_dataset( + ref, + ref_pedge, + target_pedge, + target_pmid + ) + new_dev = regrid_one_vertical_dataset( + dev, + dev_pedge, + target_pedge, + target_pmid + ) return new_ref, new_dev @@ -949,7 +1012,7 @@ def regrid_vertical(src_data_3D, xmat_regrid, target_levs=[]): new_coords['lons'] = ( ('lat', 'lon'), src_data_3D.coords['lons'].data) out_data = xr.DataArray(out_data, - dims=tuple([dim for dim in src_data_3D.dims]), + dims=tuple(list(src_data_3D.dims)), coords=new_coords, attrs=src_data_3D.attrs) @@ -997,7 +1060,6 @@ def gen_xmat(p_edge_from, p_edge_to): while p_edge_to[i_to + 1] > p_edge_from[0]: i_to += 1 - frac_to_total = 0.0 i_weight = 0 for i_from in range(first_from, n_from): @@ -1007,7 +1069,6 @@ def gen_xmat(p_edge_from, p_edge_to): # Climb the "to" pressures until you intersect with this box while i_to < n_to and p_base_from <= p_edge_to[i_to + 1]: i_to += 1 - frac_to_total = 0.0 # Now, loop over output layers as long as there is any overlap, # i.e. as long as the base of the "to" layer is below the diff --git a/gcpy/regrid_restart_file.py b/gcpy/regrid_restart_file.py index 2683ef08..e811f1ad 100644 --- a/gcpy/regrid_restart_file.py +++ b/gcpy/regrid_restart_file.py @@ -190,17 +190,23 @@ def is_gchp_restart_file(dataset): """ Checks whether or not an xarray dataset represents a GCHP restart file. + Args: + dataset: xarray Dataset + Returns: bool: True if `dataset` represents a GCHP restart file. """ - is_gchp_restart = "SPC_O3" in dataset.data_vars - is_gcclassic = "SpeciesRst_O3" in dataset.data_vars - if not any((is_gchp_restart, is_gcclassic)): - raise ValueError( - "Couldn't determine if the provided file is a GC-Classic or GCHP " - "restart file." - ) - return is_gchp_restart + if not isinstance(dataset, xr.Dataset): + msg = "Input argument dataset is not an xarray Dataset object!" + raise ValueError(msg) + + for v in dataset.data_vars.keys(): + if "SPC_" in v: + return True + if "SpeciesRst_" in v: + return False + msg = "Input file is not a GCHP or GCClassic restart file!" + raise ValueError(msg) def open_dataset(file_or_url, chunk_size=8192): @@ -271,12 +277,14 @@ def rename_variables(dataset, to_gchp=True): return dataset.rename(rename_dict) -def reverse_lev(dataset): +def reverse_lev(dataset, to_gchp): """ - Reverse the level index of the passed dataset. + Reverse the level index of the passed dataset and adjusts the + "lev:positive" attribute index accordingly. Args: dataset (xarray.Dataset): The dataset to have its level index reversed. + to_gchp (bool): True if we are saving out a GCHP restart file. Returns: xarray.Dataset: The input dataset with a reversed level index. @@ -284,6 +292,17 @@ def reverse_lev(dataset): logging.info("Reversing coordinate 'lev'") dataset = dataset.reindex(lev=dataset.lev[::-1]) dataset = dataset.assign_coords(lev=dataset.lev.values[::-1]) + + # GCHP restart files are indexed from top-of-atm downward. + # GCClassic restart files are indexed from surface upward. + # + # TODO: Make this more robust, to prevent a situation where + # the already down data is flipped to up, but labeled as down. + if to_gchp: + dataset["lev"].attrs["positive"] = "down" + else: + dataset["lev"].attrs["positive"] = "up" + return dataset @@ -497,7 +516,7 @@ def regrid_restart_file( if is_conversion: to_gchp = output_is_gchp dataset = rename_variables(dataset, to_gchp) - dataset = reverse_lev(dataset) + dataset = reverse_lev(dataset, to_gchp) dataset, output_template = drop_variables(dataset, output_template) dataset = regrid(dataset, output_template, weights_file=regrid_weights) diff --git a/gcpy/species_database.yml b/gcpy/species_database.yml index f53fedc1..106c9ce5 100644 --- a/gcpy/species_database.yml +++ b/gcpy/species_database.yml @@ -136,6 +136,36 @@ ALK4: Is_Advected: true Is_Gas: true MW_g: 58.12 +aoa_PROP: &aoaproperties + Is_Advected: true + Is_Gas: true + Is_Tracer: true + MW_g: 1.0 + Snk_Mode: constant + Snk_Value: 0 + Src_Add: true + Src_Horiz: all + Src_Mode: constant + Src_Units: timestep + Src_Value: 1 + Src_Vert: all + Units: days +aoa: + << : *aoaproperties + FullName: Age of air uniform source tracer + Snk_Horiz: all + Snk_Vert: surface +aoa_bl: + << : *aoaproperties + FullName: Age of air uniform source tracer with sink restricted to the boundary layer + Snk_Horiz: all + Snk_Vert: boundary_layer +aoa_nh: + << : *aoaproperties + FullName: Age of air uniform source tracer with surface sink restricted to a zone in the northern hemisphere + Snk_Horiz: lat_zone + Snk_Lats: [30.0, 50.0] + Snk_Vert: surface AONITA: DD_F0: 1.0 DD_Hstar: 2.9e+3 @@ -314,7 +344,14 @@ Be_PROP: &Beproperties Is_Aerosol: true Is_DryDep: true Is_RadioNuclide: true - Is_WetDep: true + Is_Tracer: true + Is_WetDep: true +# Comment out tracer-specific code for now and use RnPbBe_mod.F90 +# Snk_Horiz: all +# Snk_Mode: halflife +# Snk_Vert: all +# Src_Add: true +# Src_Mode: HEMCO WD_AerScavEff: 1.0 WD_KcScaleFac: [1.0, 0.5, 1.0] WD_RainoutEff: [1.0, 0.0, 1.0] @@ -324,21 +361,29 @@ Be10: Formula: Be10 FullName: Beryllium-10 isotope MW_g: 10.0 -Be10Strat: +# Snk_Period: 5.84e8 +# Src_Vert: all +Be10s: << : *Beproperties Formula: Be10 - FullName: Beryllium-10 isotope in stratosphere + FullName: Beryllium-10 isotope stratospheric-source tracer MW_g: 10.0 +# Snk_Period: 5.84e8 +# Src_Vert: stratosphere Be7: << : *Beproperties Formula: Be7 FullName: Beryllium-7 isotope MW_g: 7.0 -Be7Strat: +# Snk_Period: 53.3 +# Src_Vert: all +Be7s: << : *Beproperties Formula: Be7 - FullName: Beryllium-7 isotope in stratosphere + FullName: Beryllium-7 isotope stratospheric-source tracer MW_g: 7.0 +# Snk_Period: 53.3 +# Src_Vert: stratosphere BENZ: Formula: C6H6 FullName: Benzene @@ -453,6 +498,12 @@ BrSALA: FullName: Fine sea salt bromine Is_HygroGrowth: false MW_g: 79.90 +BUTDI: + Formula: C4H4O2 + FullName: Butenedial + Is_Advected: true + Is_Gas: true + MW_g: 84.07 BZCO3: Formula: C7H5O3 FullName: Acyl peroxy radical from benzaldehyde @@ -708,6 +759,7 @@ CH3Cl: Is_Photolysis: true MW_g: 50.45 CH3I: + Background_VV: 1.0e-20 Formula: CH3I FullName: Methyl iodide Henry_CR: 3.6e+3 @@ -715,17 +767,19 @@ CH3I: Is_Advected: true Is_Gas: true Is_Photolysis: true - MW_g: 141.94 -CH3ITracer: - FullName: Methyl_iodide - Is_Advected: true - Is_Gas: true + Is_Tracer: true + Snk_Horiz: all + Snk_Mode: efolding + Snk_Period: 5 + Snk_Vert: all + Src_Add: true + Src_Mode: HEMCO MW_g: 141.94 CH4_PROP: &CH4properties Formula: CH4 Is_Advected: true Is_Gas: true - MW_g: 16.05 + MW_g: 16.04 CH4: << : *CH4properties Background_VV: 1.8e-6 @@ -762,6 +816,10 @@ CH4_OTA: << : *CH4properties Background_VV: 1.0e-20 FullName: Methane from other anthropogenic emissions +CH4_RES: + << : *CH4properties + Background_VV: 1.0e-20 + FullName: Methane from hydroelectric reservoir emissions CH4_RIC: << : *CH4properties Background_VV: 1.0e-20 @@ -926,6 +984,10 @@ CO2se: << : *CO2properties Background_VV: 1.0e-20 FullName: Carbon dioxide from ship emissions +CO2fromOH: + << : *CO2properties + Background_VV: 1.0e-20 + FullName: Carbon dioxide loss by OH (carbon mechanism) CO_PROP: &COproperties Formula: CO Is_Advected: true @@ -939,14 +1001,28 @@ COacet: << : *COproperties Background_VV: 1.0e-20 FullName: CO produced from acetone oxidation -COAnthroEmis25dayTracer: +CO_25: << : *COproperties Background_VV: 1.0e-20 - FullName: Anthropogenic_CO_with_25day_lifetime -COAnthroEmis50dayTracer: + FullName: Anthropogenic CO 25 day tracer + Is_Tracer: true + Snk_Horiz: all + Snk_Mode: efolding + Snk_Period: 25 + Snk_Vert: all + Src_Add: true + Src_Mode: HEMCO +CO_50: << : *COproperties Background_VV: 1.0e-20 - FullName: Anthropogenic_CO_with_50day_lifetime + FullName: Anthropogenic CO 50 day tracer + Is_Tracer: true + Snk_Horiz: all + Snk_Mode: efolding + Snk_Period: 50 + Snk_Vert: all + Src_Add: true + Src_Mode: HEMCO COasia: << : *COproperties Background_VV: 1.0e-20 @@ -1015,6 +1091,14 @@ COus: << : *COproperties Background_VV: 1.0e-20 FullName: Anthropogenic + biofuel CO emitted over the USA +COfromCH4: + << : *COproperties + Background_VV: 1.0e-20 + FullName: CO produced from methane oxidation (carbon mechanism) +COfromNMVOC: + << : *COproperties + Background_VV: 1.0e-20 + FullName: CO produced from non-methane VOCs oxidation (carbon mechanism) CSL: DD_F0: 1.0 DD_Hstar: 4.2e+2 @@ -1068,6 +1152,47 @@ DSTAL4: << : *DST4properties FullName: Dust alkalinity, Reff = 4.5 microns MW_g: 29.0 +Dummy: + FullName: Dummy species (carbon mechanism) + Is_Gas: true + MW_g: 1.0 +DummyCH4: + << : *CH4properties + Background_VV: 1.8e-6 + FullName: Methane (external input for carbon mechanism) +DummyNMVOC: + << : *COproperties + Background_VV: 1.0e-20 + FullName: CO produced from NMVOC oxidation (external input for carbon mechanism) +e90_PROP: &e90properties + Background_VV: 1.0e-20 + Is_Advected: true + Is_Gas: true + Is_Tracer: true + MW_g: 1.0 + Snk_Horiz: all + Snk_Mode: efolding + Snk_Period: 90 + Snk_Vert: all + Src_Add: true + Src_Mode: maintain_mixing_ratio + Src_Units: ppbv + Src_Value: 100 + Src_Vert: surface +e90: + << : *e90properties + FullName: Constant burden 90 day tracer + Src_Horiz: all +e90_n: + << : *e90properties + FullName: Constant burden Northern Hemisphere 90 day tracer + Src_Horiz: lat_zone + Src_Lats: [ 40.0, 91.0] +e90_s: + << : *e90properties + FullName: Constant burden Southern Hemisphere 90 day tracer + Src_Horiz: lat_zone + Src_Lats: [ -91.0, -40.0 ] EOH: DD_F0: 0.0 DD_Hstar: 1.9e+2 @@ -1174,11 +1299,32 @@ FeF2: Formula: Fe Fullname: Iron on dust, Reff = 1.4 microns MW_g: 55.84 -GlobEmis90dayTracer: - FullName: Globally_emitted_tracer_with_90day_lifetime_and_100ppbv_maintained_mixing_ratio +FixedCl: + Formula: Cl + FullName: Atomic chlorine (external input for carbon mechanism) Is_Advected: true Is_Gas: true - MW_g: 1.0 + MW_g: 35.45 +FixedOH: + Background_VV: 4.0e-15 + Formula: OH + FullName: Hydroxyl radical (external input for carbon mechanism) + Is_Gas: true + MW_g: 17.01 +FURA: + DD_F0: 1.0 + DD_Hstar: 1.80e-1 + Formula: C4H4O + FullName: Furan + Henry_CR: 6100.0 + Henry_K0: 1.80e-1 + Is_Advected: true + Is_DryDep: true + Is_Gas: true + Is_Photolysis: false + Is_WetDep: true + MW_g: 68.07 + WD_RetFactor: 2.0e-2 GLYC: DD_F0: 1.0 DD_Hstar: 4.1e+4 @@ -1359,7 +1505,7 @@ HCOOH: MW_g: 46.03 WD_RetFactor: 2.0e-2 Hg0_PROP: &Hg0properties - DD_F0: 1.0e-5 + DD_F0: 3.0e-5 DD_Hstar: 0.11 Formula: 'Hg' Is_Advected: true @@ -1656,6 +1802,161 @@ HgP_usa: HgP_waf: << : *HgPproperties FullName: Particulate mercury from West Africa +Hg_OTHER_PROP: &HgChemProperties + Henry_CR: 8.40e+03 + Henry_K0: 1.40e+06 + Is_Advected: true + Is_DryDep: true + Is_Gas: true + Is_Photolysis: true + Is_WetDep: true + WD_RetFactor: 1.0 +HgBr: + Fullname: HgBr + Formula: HgBr + Is_Advected: true + Is_Gas: true + Is_Photolysis: true + MW_g: 280.49 +HgBrNO2: + Fullname: syn-HgBrONO + Formula: BrHgONO + Is_Advected: true + Is_DryDep: true + Is_Gas: true + Is_Photolysis: true + MW_g: 326.50 +HgBrHO2: + << : *HgChemProperties + Fullname: HgBrHO2 + Formula: BrHgOOH + MW_g: 313.50 +HgBrBrO: + << : *HgChemProperties + Fullname: HgBrBrO + Formula: BrHgOBr + MW_g: 376.40 +HgBrClO: + << : *HgChemProperties + Fullname: HgBrClO + Formula: BrHgOCl + MW_g: 332.00 +HgBrOH: + << : *HgChemProperties + Fullname: HgBrOH + Formula: BrHgOH + MW_g: 297.50 +HgBr2: + << : *HgChemProperties + Fullname: HgBr2 + Formula: HgBr2 + MW_g: 360.40 +HgCl: + Fullname: HgCl + Formula: HgCl + Is_Advected: true + Is_Gas: true + Is_Photolysis: true + Is_WetDep: false + MW_g: 236.04 +HgClNO2: + << : *HgChemProperties + Fullname: syn-HgClONO + Formula: ClHgONO + MW_g: 282.00 +HgClHO2: + WD_RetFactor: 1.0 + << : *HgChemProperties + Fullname: HgClHO2 + Formula: ClHgOOH + MW_g: 269.00 + WD_RetFactor: 1.0 +HgClClO: + << : *HgChemProperties + Fullname: HgClClO + Formula: ClHgOCl + MW_g: 287.50 +HgClBrO: + << : *HgChemProperties + Fullname: HgClBrO + Formula: ClHgOBr + MW_g: 332.00 +HgClBr: + << : *HgChemProperties + Fullname: HgClBr + Formula: HgBrCl + MW_g: 316.00 +HgClOH: + << : *HgChemProperties + Fullname: HgClOH + Formula: ClHgOH + MW_g: 253.00 +HgOH: + Fullname: HgOH + Formula: HgOH + Is_Advected: true + Is_Gas: true + Is_Photolysis: true + MW_g: 201.00 +HgOHNO2: + << : *HgChemProperties + Fullname: syn-HgOHONO + Formula: HOHgONO + MW_g: 263.60 +HgOHHO2: + << : *HgChemProperties + Fullname: HgOHHO2 + Formula: HOHgOOH + MW_g: 250.60 +HgOHClO: + << : *HgChemProperties + Fullname: HgBrClO + Formula: HOHgOCl + MW_g: 269.0000 +HgOHBrO: + << : *HgChemProperties + Fullname: HgOHBrO + Formula: HOHgOBr + MW_g: 313.5000 +HgOHOH: + << : *HgChemProperties + Fullname: HgOH2 + Formula: HOHgOH + MW_g: 234.60 +HgCl2: + << : *HgChemProperties + Fullname: HgCl2 + Formula: HgCl2 + MW_g: 271.5000 +Hg2ClP: + Fullname: Hg(II) chloride salts on sea-salt aerosols + Formula: HgCln + Is_Aerosol: true + Is_DryDep: true + Is_HygroGrowth: false + Is_WetDep: true + MW_g: 201.00 + WD_AerScavEff: 1.0 + WD_KcScaleFac: [1.0, 0.5, 1.0] + WD_RainoutEff: [1.0, 1.0, 1.0] +Hg2ORGP: + Fullname: Hg(II) organic complex in aerosols + Formula: R-Hg + Is_Advected: true + Is_Aerosol: true + Is_DryDep: true + Is_Photolysis: true + Is_WetDep: true + MW_g: 201.00 + WD_AerScavEff: 1.0 + WD_KcScaleFac: [1.0, 0.5, 1.0] + WD_RainoutEff: [1.0, 1.0, 1.0] +Hg2STRP: + Fullname: Hg(II) in stratospheric aerosols + Formula: Hg2+ + Is_Advected: true + Is_Aerosol: true + MW_g: 201.00 HI: DD_F0: 0.0 DD_Hstar: 2.35e+16 @@ -2466,7 +2767,7 @@ LBRO2N: LCH4: FullName: Dummy species to track loss rate of CH4 Is_Gas: true - MW_g: 16.05 + MW_g: 16.04 LCO: FullName: Dummy species to track loss rate of CO Is_Gas: true @@ -3072,6 +3373,29 @@ NAP: Is_Advected: true Is_Gas: true MW_g: 128.18 +nh_PROP: &nhproperties + Is_Advected: true + Is_Gas: true + Is_Tracer: true + MW_g: 1.0 + Snk_Horiz: all + Snk_Mode: efolding + Snk_Vert: all + Src_Add: false + Src_Mode: constant + Src_Horiz: lat_zone + Src_Lats: [30.0, 50.0] + Src_Units: ppbv + Src_Value: 100 + Src_Vert: all +nh_5: + << : *nhproperties + FullName: Northern Hemisphere 5 day tracer + Snk_Period: 5 +nh_50: + << : *nhproperties + FullName: Northern Hemisphere 50 day tracer + Snk_Period: 50 NH3: DD_DvzAerSnow: 0.03 DD_DvzMinVal: [0.2, 0.3] @@ -3108,11 +3432,6 @@ NH4: WD_KcScaleFac: [1.0, 0.5, 1.0] WD_RainoutEff: [1.0, 0.0, 1.0] WD_RainoutEff_Luo: [0.4, 0.0, 1.0] -NHEmis90dayTracer: - FullName: Northern_hemisphere_emitted_tracer_with_90day_lifetime_and_100ppbv_maintained_mi - Is_Advected: true - Is_Gas: true - MW_g: 1.0 NiF1: << : *DST1properties Formula: Ni @@ -3161,6 +3480,7 @@ NITs: FullName: Inorganic nitrates on surface of seasalt aerosol Is_Photolysis: true MW_g: 31.4 + WD_CoarseAer: true 'NO': Background_VV: 4.0e-13 Formula: 'NO' @@ -3442,10 +3762,14 @@ PAN: MW_g: 121.06 WD_RetFactor: 2.0e-2 PassiveTracer: + Background_VV: 1.0e-7 FullName: Passive tracer for mass conservation evaluation Is_Advected: true Is_Gas: true + Is_Tracer: true MW_g: 1.0 + Snk_Mode: none + Src_Mode: none Pb210_PROP: &Pbproperties DD_DvzAerSnow: 0.03 DD_F0: 0.0 @@ -3455,8 +3779,18 @@ Pb210_PROP: &Pbproperties Is_Aerosol: true Is_DryDep: true Is_RadioNuclide: true + Is_Tracer: true Is_WetDep: true MW_g: 210.0 +# Comment out tracer-specific code for now and use RnPbBe_mod.F90 +# Snk_Horiz: all +# Snk_Mode: efolding +# Snk_Period: 11742.8 +# Snk_Vert: all +# Src_Add: true +# Src_Mode: HEMCO +# Src_Mode: decay_of_another_species +# Src_Species: Rn222 WD_AerScavEff: 1.0 WD_KcScaleFac: [1.0, 0.5, 1.0] WD_RainoutEff: [1.0, 0.0, 1.0] @@ -3464,9 +3798,11 @@ Pb210_PROP: &Pbproperties Pb210: << : *Pbproperties FullName: Lead-210 isotope -Pb210Strat: +# Src_Vert: all +Pb210s: << : *Pbproperties - FullName: Lead-210 isotope in stratosphere + FullName: Lead-210 isotope stratospheric-source tracer +# Src_Vert: stratosphere PbF1: << : *DST1properties Formula: Pb @@ -4032,7 +4368,17 @@ Rn222: Is_Advected: true Is_Aerosol: true Is_RadioNuclide: true + Is_Tracer: true MW_g: 222.0 +# Comment out tracer-specific code for now and use RnPbBe_mod.F90 +# Snk_Horiz: all +# Snk_Mode: efolding +# Snk_Period: 5.5 +# Snk_Vert: all +# Src_Add: true +# Src_Mode: HEMCO +# Src_Mode: decay_of_another_species +# Src_Species: Rn222 ROH: Formula: C3H7OH FullName: '> C2 alcohols' @@ -4089,17 +4435,17 @@ SALCCL: Is_HygroGrowth: false MW_g: 35.45 WD_CoarseAer: true -SF6Tracer: +SF6: + Background_VV: 1.0e-20 Formula: SF6 - FullName: Sulfur_hexafluoride + FullName: Sulfur hexafluoride Is_Advected: true Is_Gas: true + Is_Tracer: true MW_g: 146.06 -SHEmis90dayTracer: - FullName: Southern_hemisphere_emitted_tracer_with_90day_lifetime_and_100ppbv_maintained_mi - Is_Advected: true - Is_Gas: true - MW_g: 1.0 + Snk_Mode: none + Src_Add: true + Src_Mode: HEMCO SiF1: << : *DST1properties Fullname: Silicon on dust, Reff = 0.7 microns @@ -4196,6 +4542,7 @@ SO4s: << : *SALCproperties FullName: Sulfate on surface of seasalt aerosol MW_g: 31.4 + WD_CoarseAer: true SOAGX: DD_DvzAerSnow: 0.03 DD_F0: 0.0 @@ -4245,6 +4592,23 @@ SOAS: WD_KcScaleFac: [1.0, 0.5, 1.0] WD_RainoutEff: [0.8, 0.0, 0.8] WD_RainoutEff_Luo: [0.4, 0.0, 0.8] +st80_25: + FullName: Stratosphere source 25 day tracer + Is_Advected: true + Is_Gas: true + Is_Tracer: true + MW_g: 1.0 + Snk_Horiz: all + Snk_Mode: efolding + Snk_Period: 25 + Snk_Vert: troposphere + Src_Add: false + Src_Horiz: all + Src_Mode: constant + Src_Pressures: [0, 80] + Src_Units: ppbv + Src_Value: 200 + Src_Vert: pressures TiF1: << : *DST1properties Formula: Ti diff --git a/gcpy/units.py b/gcpy/units.py index 121e399a..55f2ada1 100644 --- a/gcpy/units.py +++ b/gcpy/units.py @@ -234,7 +234,8 @@ def convert_units( # Mass of dry air in kg (required when converting from v/v) if 'molmol-1' in units: - air_mass = delta_p * 100.0 / g0 * area_m2 + + air_mass = delta_p.values * 100.0 / g0 * area_m2.values # Conversion factor for v/v to kg # v/v * kg dry air / g/mol dry air * g/mol species = kg species diff --git a/gcpy/util.py b/gcpy/util.py index 13e44e64..beb08474 100644 --- a/gcpy/util.py +++ b/gcpy/util.py @@ -2,20 +2,25 @@ Internal utilities for helping to manage xarray and numpy objects used throughout GCPy """ - import os import warnings import shutil from textwrap import wrap -from yaml import safe_load as yaml_safe_load +from yaml import safe_load import numpy as np import xarray as xr -from PyPDF2 import PdfFileWriter, PdfFileReader +from pypdf import PdfWriter, PdfReader +from gcpy.constants import ENCODING, TABLE_WIDTH +from gcpy.cstools import is_cubed_sphere_rst_grid + +# ====================================================================== +# %%%%% METHODS %%%%% +# ====================================================================== def convert_lon( data, dim='lon', - format='atlantic', + fmt='atlantic', neg_dateline=True ): """ @@ -42,6 +47,7 @@ def convert_lon( Returns: data, with dimension 'dim' altered according to conversion rule """ + verify_variable_type(data, (xr.DataArray, xr.Dataset)) data_copy = data.copy() @@ -51,12 +57,13 @@ def convert_lon( # Tweak offset for rolling the longitudes later offset = 0 if neg_dateline else 1 - if format not in ['atlantic', 'pacific']: - msg = f"Cannot convert longitudes for format '{format}'; please choose one of 'atlantic' or 'pacific'" + if fmt not in ['atlantic', 'pacific']: + msg = f"Cannot convert longitudes for format '{fmt}'; " + msg += "please choose one of 'atlantic' or 'pacific'" raise ValueError(msg) # Create a mask to decide how to mutate the longitude values - if format == 'atlantic': + if fmt == 'atlantic': mask = lon >= 180 if neg_dateline else lon > 180 new_lon[mask] = -(360. - lon[mask]) @@ -64,7 +71,7 @@ def convert_lon( roll_len = len(data[dim]) // 2 - offset - elif format == 'pacific': + elif fmt == 'pacific': mask = lon < 0. new_lon[mask] = lon[mask] + 360. @@ -141,27 +148,75 @@ def create_display_name( # Initialize display_name = diagnostic_name + # For restart files, just split at the first underscore and return + # the text followiong the underscore. This will preserve certain + # species names, such as the TransportTracers species CO_25, etc. if "SpeciesRst" in display_name: - display_name = display_name.split("_")[1] + return display_name.split("_", 1)[1] # Special handling for Inventory totals if "INV" in display_name.upper(): display_name = display_name.replace("_", " ") # Replace text - for v in ["Emis", "EMIS", "emis", "Inv", "INV", "inv"]: - display_name = display_name.replace(v, "") + for var in ["Emis", "EMIS", "emis", "Inv", "INV", "inv"]: + display_name = display_name.replace(var, "") - # Replace underscores - display_name = display_name.replace("_", " ") + # Replace only the first underscore with a space + display_name = display_name.replace("_", " ", 1) return display_name +def format_number_for_table( + number, + max_thresh=1.0e8, + min_thresh=1.0e-6, + f_fmt="18.6f", + e_fmt="18.8e" +): + """ + Returns a format string for use in the "print_totals" routine. + If the number is greater than a maximum threshold or smaller + than a minimum threshold, then use scientific notation format. + Otherwise use floating-piont format. + + Special case: do not convert 0.0 to exponential notation. + + Args: + ----- + number : float + Number to be printed + + max_thresh, min_thresh: float + If |number| > max_thresh, use scientific notation. + If |number| < min_thresh, use scientific notation + + f_fmt, e_fmt : str + The default floating point string and default scientific + notation string. + Default values: 18.6f, 18.6e + + Returns: + -------- + fmt_str : str + Formatted string that can be inserted into the print + statement in print_totals. + """ + abs_number = np.abs(number) + + if not abs_number > 1e-60: + return f"{number:{f_fmt}}" + + if abs_number > max_thresh or abs_number < min_thresh: + return f"{number:{e_fmt}}" + return f"{number:{f_fmt}}" + + def print_totals( ref, dev, - f, + ofile, diff_list, masks=None, ): @@ -174,7 +229,7 @@ def print_totals( The first DataArray to be compared (aka "Reference") dev: xarray DataArray The second DataArray to be compared (aka "Development") - f: file + ofile: file File object denoting a text file where output will be directed. Keyword Args (optional): @@ -193,14 +248,9 @@ def print_totals( # ================================================================== # Initialization and error checks # ================================================================== - - # Make sure that both Ref and Dev are xarray DataArray objects - if not isinstance(ref, xr.DataArray): - raise TypeError("The 'ref' argument must be an xarray DataArray!") - if not isinstance(dev, xr.DataArray): - raise TypeError("The 'dev' argument must be an xarray DataArray!") - if not isinstance(diff_list, list): - raise TypeError("The 'diff_list' argument must be a list!") + verify_variable_type(ref, xr.DataArray) + verify_variable_type(dev, xr.DataArray) + verify_variable_type(diff_list, list) # Determine if either Ref or Dev have all NaN values: ref_is_all_nan = np.isnan(ref.values).all() @@ -208,7 +258,7 @@ def print_totals( # If Ref and Dev do not contain all NaNs, then make sure # that Ref and Dev have the same units before proceeding. - if (not ref_is_all_nan) and (not dev_is_all_nan): + if not ref_is_all_nan and not dev_is_all_nan: if ref.units != dev.units: msg = f"Ref has units {ref.units}, but Dev has units {dev.units}!" raise ValueError(msg) @@ -216,23 +266,22 @@ def print_totals( # ================================================================== # Get the diagnostic name and units # ================================================================== + diagnostic_name = dev.name if dev_is_all_nan: diagnostic_name = ref.name - else: - diagnostic_name = dev.name - # Create the display name by editing the diagnostic name + # Create the display name for the table display_name = create_display_name(diagnostic_name) # Get the species name from the display name species_name = display_name - c = species_name.find(" ") - if c > 0: - species_name = display_name[0:c] + cidx = species_name.find(" ") + if cidx > 0: + species_name = display_name[0:cidx] # Special handling for totals if "_TOTAL" in diagnostic_name.upper(): - print("-"*90, file=f) + print("-" * TABLE_WIDTH, file=ofile) # ================================================================== # Sum the Ref array (or set to NaN if missing) @@ -282,13 +331,18 @@ def print_totals( pctdiff = np.nan else: pctdiff = ((total_dev - total_ref) / total_ref) * 100.0 - if total_ref < 1.0e-15: + if np.abs(total_ref) < 1.0e-15: pctdiff = np.nan # ================================================================== # Write output to file and return # ================================================================== - print(f"{display_name.ljust(19)}: {total_ref:18.6f} {total_dev:18.6f} {diff:12.6f} {pctdiff:8.3f} {diff_str}", file=f) + ref_fmt = format_number_for_table(total_ref) + dev_fmt = format_number_for_table(total_dev) + diff_fmt = format_number_for_table(diff) + pctdiff_fmt = format_number_for_table(pctdiff) + + print(f"{display_name[0:19].ljust(19)}: {ref_fmt} {dev_fmt} {diff_fmt} {pctdiff_fmt} {diff_str}", file=ofile) return diff_list @@ -374,27 +428,28 @@ def add_bookmarks_to_pdf( """ # Setup - pdfobj = open(pdfname, "rb") - input_pdf = PdfFileReader(pdfobj, overwriteWarnings=False) - output_pdf = PdfFileWriter() + with open(pdfname, "rb") as pdfobj: + input_pdf = PdfReader(pdfobj) #, overwriteWarnings=False) + output_pdf = PdfWriter() - for i, varname in enumerate(varlist): - bookmarkname = varname.replace(remove_prefix, "") - if verbose: - print(f"Adding bookmark for {varname} with name {bookmarkname}") - output_pdf.addPage(input_pdf.getPage(i)) - output_pdf.addBookmark(bookmarkname, i) - output_pdf.setPageMode("/UseOutlines") + for i, varname in enumerate(varlist): + bookmarkname = varname.replace(remove_prefix, "") + if verbose: + print(f"Adding bookmark for {varname} with name {bookmarkname}") + output_pdf.add_page(input_pdf.pages[i]) + output_pdf.add_outline_item(bookmarkname, i) + output_pdf.page_mode = "/UseOutlines" + + # Write to temp file + pdfname_tmp = pdfname + "_with_bookmarks.pdf" - # Write to temp file - pdfname_tmp = pdfname + "_with_bookmarks.pdf" - outputstream = open(pdfname_tmp, "wb") - output_pdf.write(outputstream) - outputstream.close() + with open(pdfname_tmp, "wb") as output_stream: + output_pdf.write(output_stream) + output_stream.close() - # Rename temp file with the target name - os.rename(pdfname_tmp, pdfname) - pdfobj.close() + # Rename temp file with the target name + os.rename(pdfname_tmp, pdfname) + pdfobj.close() def add_nested_bookmarks_to_pdf( @@ -433,62 +488,62 @@ def add_nested_bookmarks_to_pdf( # ================================================================== # Setup # ================================================================== - pdfobj = open(pdfname, "rb") - input_pdf = PdfFileReader(pdfobj, overwriteWarnings=False) - output_pdf = PdfFileWriter() - warninglist = [k.replace(remove_prefix, "") for k in warninglist] + with open(pdfname, "rb") as pdfobj: + input_pdf = PdfReader(pdfobj) + output_pdf = PdfWriter() + warninglist = [k.replace(remove_prefix, "") for k in warninglist] + + # =============================================================== + # Loop over the subcats in this category; make parent bookmark + # =============================================================== + i = -1 + for subcat in catdict[category]: + + # First check that there are actual variables for + # this subcategory; otherwise skip + numvars = 0 + if catdict[category][subcat]: + for varname in catdict[category][subcat]: + if varname in warninglist: + continue + numvars += 1 + else: + continue + if numvars == 0: + continue - # ================================================================== - # Loop over the subcategories in this category; make parent bookmark - # ================================================================== - i = -1 - for subcat in catdict[category]: + # There are non-zero variables to plot in this subcategory + i = i + 1 + output_pdf.add_page(input_pdf.pages[i]) + parent = output_pdf.add_outline_item(subcat, i) + output_pdf.page_mode = "/UseOutlines" + first = True - # First check that there are actual variables for - # this subcategory; otherwise skip - numvars = 0 - if catdict[category][subcat]: + # Loop over variables in this subcategory; make children bookmarks for varname in catdict[category][subcat]: if varname in warninglist: + print(f"Warning: skipping {varname}") continue - numvars += 1 - else: - continue - if numvars == 0: - continue - - # There are non-zero variables to plot in this subcategory - i = i + 1 - output_pdf.addPage(input_pdf.getPage(i)) - parent = output_pdf.addBookmark(subcat, i) - output_pdf.setPageMode("/UseOutlines") - first = True - - # Loop over variables in this subcategory; make children bookmarks - for varname in catdict[category][subcat]: - if varname in warninglist: - print(f"Warning: skipping {varname}") - continue - if first: - output_pdf.addBookmark(varname, i, parent) - first = False - else: - i = i + 1 - output_pdf.addPage(input_pdf.getPage(i)) - output_pdf.addBookmark(varname, i, parent) - output_pdf.setPageMode("/UseOutlines") + if first: + output_pdf.add_outline_item(varname, i, parent) + first = False + else: + i = i + 1 + output_pdf.add_page(input_pdf.pages[i]) + output_pdf.add_outline_item(varname, i, parent) + output_pdf.page_mode = "/UseOutlines" - # ================================================================== - # Write to temp file - # ================================================================== - pdfname_tmp = pdfname + "_with_bookmarks.pdf" - outputstream = open(pdfname_tmp, "wb") - output_pdf.write(outputstream) - outputstream.close() + # ============================================================== + # Write to temp file + # ============================================================== + pdfname_tmp = pdfname + "_with_bookmarks.pdf" + with open(pdfname_tmp, "wb") as output_stream: + output_pdf.write(output_stream) + output_stream.close() - # Rename temp file with the target name - os.rename(pdfname_tmp, pdfname) - pdfobj.close() + # Rename temp file with the target name + os.rename(pdfname_tmp, pdfname) + pdfobj.close() def add_missing_variables( @@ -529,12 +584,8 @@ def add_missing_variables( # ================================================================== # Initialize # ================================================================== - - # Make sure that refdata and devdata are both xarray Dataset objects - if not isinstance(refdata, xr.Dataset): - raise TypeError("The refdata object must be an xarray Dataset!") - if not isinstance(devdata, xr.Dataset): - raise TypeError("The refdata object must be an xarray Dataset!") + verify_variable_type(refdata, xr.Dataset) + verify_variable_type(devdata, xr.Dataset) # Find common variables as well as variables only in one or the other vardict = compare_varnames(refdata, devdata, quiet=True) @@ -550,17 +601,17 @@ def add_missing_variables( # variables as missing values # when we plot against refdata. # ============================================================== devlist = [devdata] - for v in refonly: + for var in refonly: if verbose: - print(f"Creating array of NaN in devdata for: {v}") - dr = create_blank_dataarray( - name=refdata[v].name, + print(f"Creating array of NaN in devdata for: {var}") + darr = create_blank_dataarray( + name=refdata[var].name, sizes=devdata.sizes, coords=devdata.coords, - attrs=refdata[v].attrs, + attrs=refdata[var].attrs, **kwargs ) - devlist.append(dr) + devlist.append(darr) devdata = xr.merge(devlist) # ============================================================== @@ -570,55 +621,61 @@ def add_missing_variables( # variables as missing values # when we plot against devdata. # ================================================================== reflist = [refdata] - for v in devonly: + for var in devonly: if verbose: - print(f"Creating array of NaN in refdata for: {v}") - dr = create_blank_dataarray( - name=devdata[v].name, + print(f"Creating array of NaN in refdata for: {var}") + darr = create_blank_dataarray( + name=devdata[var].name, sizes=refdata.sizes, coords=refdata.coords, - attrs=devdata[v].attrs, + attrs=devdata[var].attrs, **kwargs ) - reflist.append(dr) + reflist.append(darr) refdata = xr.merge(reflist) return refdata, devdata -def reshape_MAPL_CS( - da -): +def reshape_MAPL_CS(darr): """ Reshapes data if contains dimensions indicate MAPL v1.0.0+ output + (i.e. reshapes from "diagnostic" to "checkpoint" dimension format.) + Args: - da: xarray DataArray - Data array variable + ----- + darr: xarray DataArray + The input data array. Returns: - data: xarray DataArray - Data with dimensions renamed and transposed to match old MAPL format - """ + -------- + darr: xarray DataArray + The modified data array (w/ dimensions renamed & transposed). + Remarks: + -------- + Currently only used for GCPy plotting code. + """ # Suppress annoying future warnings for now warnings.filterwarnings("ignore", category=FutureWarning) - #if type(da) != np.ndarray: - if not isinstance(da, np.ndarray): - vdims = da.dims - if "nf" in vdims and "Xdim" in vdims and "Ydim" in vdims: - da = da.stack(lat=("nf", "Ydim")) - da = da.rename({"Xdim": "lon"}) - - if "lev" in da.dims and "time" in da.dims: - da = da.transpose("time", "lev", "lat", "lon") - elif "lev" in da.dims: - da = da.transpose("lev", "lat", "lon") - elif "time" in da.dims: - da = da.transpose("time", "lat", "lon") - else: - da = da.transpose("lat", "lon") - return da + # Only do the following for DataArray objects + # (otherwise just fall through and return the original argument as-is) + if isinstance(darr, xr.DataArray): + with xr.set_options(keep_attrs=True): + if "nf" in darr.dims and \ + "Xdim" in darr.dims and "Ydim" in darr.dims: + darr = darr.stack(lat=("nf", "Ydim")) + darr = darr.rename({"Xdim": "lon"}) + if "lev" in darr.dims and "time" in darr.dims: + darr = darr.transpose("time", "lev", "lat", "lon") + elif "lev" in darr.dims: + darr = darr.transpose("lev", "lat", "lon") + elif "time" in darr.dims: + darr = darr.transpose("time", "lat", "lon") + else: + darr = darr.transpose("lat", "lon") + return darr def get_diff_of_diffs( @@ -653,39 +710,39 @@ def get_diff_of_diffs( # if the coords do not align then set time dimensions equal try: xr.align(dev, ref, join='exact') - except: + except BaseException: ref.coords["time"] = dev.coords["time"] with xr.set_options(keep_attrs=True): absdiffs = dev - ref fracdiffs = dev / ref - for v in dev.data_vars.keys(): + for var in dev.data_vars.keys(): # Ensure the diffs Dataset includes attributes - absdiffs[v].attrs = dev[v].attrs - fracdiffs[v].attrs = dev[v].attrs + absdiffs[var].attrs = dev[var].attrs + fracdiffs[var].attrs = dev[var].attrs elif 'nf' in ref.dims and 'nf' in dev.dims: # Include special handling if cubed sphere grid dimension names are different # since they changed in MAPL v1.0.0. if "lat" in ref.dims and "Xdim" in dev.dims: ref_newdimnames = dev.copy() - for v in dev.data_vars.keys(): - if "Xdim" in dev[v].dims: - ref_newdimnames[v].values = ref[v].values.reshape( - dev[v].values.shape) + for var in dev.data_vars.keys(): + if "Xdim" in dev[var].dims: + ref_newdimnames[var].values = ref[var].values.reshape( + dev[var].values.shape) # NOTE: the reverse conversion is gchp_dev[v].stack(lat=("nf","Ydim")).transpose( # "time","lev","lat","Xdim").values with xr.set_options(keep_attrs=True): absdiffs = dev.copy() fracdiffs = dev.copy() - for v in dev.data_vars.keys(): - if "Xdim" in dev[v].dims or "lat" in dev[v].dims: - absdiffs[v].values = dev[v].values - ref[v].values - fracdiffs[v].values = dev[v].values / ref[v].values + for var in dev.data_vars.keys(): + if "Xdim" in dev[var].dims or "lat" in dev[var].dims: + absdiffs[var].values = dev[var].values - ref[var].values + fracdiffs[var].values = dev[var].values / ref[var].values # NOTE: The diffs Datasets are created without variable # attributes; we have to reattach them - absdiffs[v].attrs = dev[v].attrs - fracdiffs[v].attrs = dev[v].attrs + absdiffs[var].attrs = dev[var].attrs + fracdiffs[var].attrs = dev[var].attrs else: print('Diff-of-diffs plot supports only identical grid types (lat/lon or cubed-sphere)' + \ ' within each dataset pair') @@ -695,7 +752,7 @@ def get_diff_of_diffs( def slice_by_lev_and_time( - ds, + dset, varname, itime, ilev, @@ -705,7 +762,7 @@ def slice_by_lev_and_time( Given a Dataset, returns a DataArray sliced by desired time and level. Args: - ds: xarray Dataset + dset: xarray Dataset Dataset containing GEOS-Chem data. varname: str Variable name for data variable to be sliced @@ -717,69 +774,84 @@ def slice_by_lev_and_time( Whether to flip ilev to be indexed from ground or top of atmosphere Returns: - dr: xarray DataArray + darr: xarray DataArray DataArray of data variable sliced according to ilev and itime """ # used in compare_single_level and compare_zonal_mean to get dataset slices - if not isinstance(ds, xr.Dataset): - msg="ds is not of type xarray.Dataset!" - raise TypeError(msg) - if not varname in ds.data_vars.keys(): + verify_variable_type(dset, xr.Dataset) + if not varname in dset.data_vars.keys(): msg="Could not find 'varname' in ds!" raise ValueError(msg) # NOTE: isel no longer seems to work on a Dataset, so # first createthe DataArray object, then use isel on it. # -- Bob Yantosca (19 Jan 2023) - dr = ds[varname] - vdims = dr.dims - if ("time" in vdims and dr.time.size > 0) and "lev" in vdims: + darr = dset[varname] + vdims = darr.dims + if ("time" in vdims and darr.time.size > 0) and "lev" in vdims: if flip: - fliplev=len(dr['lev']) - 1 - ilev - return dr.isel(time=itime, lev=fliplev) - return dr.isel(time=itime, lev=ilev) + fliplev=len(darr['lev']) - 1 - ilev + return darr.isel(time=itime, lev=fliplev) + return darr.isel(time=itime, lev=ilev) if ("time" not in vdims or itime == -1) and "lev" in vdims: if flip: - fliplev= len(dr['lev']) - 1 - ilev - return dr.isel(lev=fliplev) - return dr.isel(lev=ilev) - if ("time" in vdims and dr.time.size > 0 and itime != -1) and \ + fliplev= len(darr['lev']) - 1 - ilev + return darr.isel(lev=fliplev) + return darr.isel(lev=ilev) + if ("time" in vdims and darr.time.size > 0 and itime != -1) and \ "lev" not in vdims: - return dr.isel(time=itime) - return dr + return darr.isel(time=itime) + return darr def rename_and_flip_gchp_rst_vars( - ds + dset ): ''' - Transforms a GCHP restart dataset to match GCC names and level convention + Transforms a GCHP restart dataset to match GCClassic names + and level conventions. Args: - ds: xarray Dataset - Dataset containing GCHP restart file data, such as variables - SPC_{species}, BXHEIGHT, DELP_DRY, and TropLev, with level - convention down (level 0 is top-of-atmosphere). + dset: xarray Dataset + The input dataset. Returns: - ds: xarray Dataset - Dataset containing GCHP restart file data with names and level - convention matching GCC restart. Variables include - SpeciesRst_{species}, Met_BXHEIGHT, Met_DELPDRY, and Met_TropLev, - with level convention up (level 0 is surface). + dset: xarray Dataset + If the input dataset is from a GCHP restart file, then + dset will contain the original data with variables renamed + to match the GEOS-Chem Classic naming conventions, and + with levels indexed as lev:positive="up". Otherwise, the + original data will be returned. ''' - for v in ds.data_vars.keys(): - if v.startswith('SPC_'): - spc = v.replace('SPC_', '') - ds = ds.rename({v: 'SpeciesRst_' + spc}) - elif v == 'DELP_DRY': - ds = ds.rename({"DELP_DRY": "Met_DELPDRY"}) - elif v == 'BXHEIGHT': - ds = ds.rename({"BXHEIGHT": "Met_BXHEIGHT"}) - elif v == 'TropLev': - ds = ds.rename({"TropLev": "Met_TropLev"}) - ds = ds.sortby('lev', ascending=False) - return ds + verify_variable_type(dset, xr.Dataset) + + # Return if this dataset is not from a GCHP checkpoint/restart file + if not is_cubed_sphere_rst_grid(dset): + return dset + + # Create dictionary of variable name replacements + old_to_new = {} + for var in dset.data_vars.keys(): + # TODO: Think of better algorithm in case we ever change + # the internal state to start with something else than "SPC_". + if var.startswith("SPC_"): + spc = var.replace('SPC_', '') + old_to_new[var] = 'SpeciesRst_' + spc + if var == "DELP_DRY": + old_to_new["DELP_DRY"] = "Met_DELPDRY" + if var == "BXHEIGHT": + old_to_new["BXHEIGHT"] = "Met_BXHEIGHT" + if var == "TropLev": + old_to_new["TropLev"] = "Met_TropLev" + + # Replace variable names in one operation + dset = dset.rename(old_to_new) + + # Flip levels + dset = dset.sortby('lev', ascending=False) + dset.lev.attrs["positive"] = "up" + + return dset def dict_diff( @@ -798,6 +870,9 @@ def dict_diff( result: dict Key-by-key difference of dict1 - dict0 """ + verify_variable_type(dict0, dict) + verify_variable_type(dict1, dict) + result = {} for key, _ in dict0.items(): result[key] = dict1[key] - dict0[key] @@ -853,35 +928,38 @@ def compare_varnames( devonly List of 2D or 3D variables that are only present in devdata """ - refvars = [k for k in refdata.data_vars.keys()] - devvars = [k for k in devdata.data_vars.keys()] + verify_variable_type(refdata, xr.Dataset) + verify_variable_type(devdata, xr.Dataset) + + refvars = list(refdata.data_vars.keys()) + devvars = list(devdata.data_vars.keys()) commonvars = sorted(list(set(refvars).intersection(set(devvars)))) - refonly = [v for v in refvars if v not in devvars] - devonly = [v for v in devvars if v not in refvars] + refonly = [var for var in refvars if var not in devvars] + devonly = [var for var in devvars if var not in refvars] dimmismatch = [v for v in commonvars if refdata[v].ndim != devdata[v].ndim] # Assume plottable data has lon and lat # This is OK for purposes of benchmarking # -- Bob Yantosca (09 Feb 2023) - commonvarsData = [ - v for v in commonvars if ( - ("lat" in refdata[v].dims or "Ydim" in refdata[v].dims) + commonvars_data = [ + var for var in commonvars if ( + ("lat" in refdata[var].dims or "Ydim" in refdata[var].dims) and - ("lon" in refdata[v].dims or "Xdim" in refdata[v].dims) + ("lon" in refdata[var].dims or "Xdim" in refdata[var].dims) ) - ] - commonvarsOther = [ - v for v in commonvars if ( - v not in commonvarsData - ) ] - commonvars2D = [ - v for v in commonvars if ( - (v in commonvarsData) and ("lev" not in refdata[v].dims) + commonvars_other = [ + var for var in commonvars if ( + var not in commonvars_data ) ] - commonvars3D = [ - v for v in commonvars if ( - (v in commonvarsData) and ("lev" in refdata[v].dims) + commonvars_2d = [ + var for var in commonvars if ( + (var in commonvars_data) and ("lev" not in refdata[var].dims) + ) + ] + commonvars_3d = [ + var for var in commonvars if ( + (var in commonvars_data) and ("lev" in refdata[var].dims) ) ] @@ -909,16 +987,16 @@ def compare_varnames( # For safety's sake, remove the 0-D and 1-D variables from # commonvarsData, refonly, and devonly. This will ensure that # these lists will only contain variables that can be plotted. - commonvarsData = [v for v in commonvars if v not in commonvarsOther] - refonly = [v for v in refonly if v not in commonvarsOther] - devonly = [v for v in devonly if v not in commonvarsOther] + commonvars_data = [var for var in commonvars if var not in commonvars_other] + refonly = [var for var in refonly if var not in commonvars_other] + devonly = [var for var in devonly if var not in commonvars_other] return { "commonvars": commonvars, - "commonvars2D": commonvars2D, - "commonvars3D": commonvars3D, - "commonvarsData": commonvarsData, - "commonvarsOther": commonvarsOther, + "commonvars2D": commonvars_2d, + "commonvars3D": commonvars_3d, + "commonvarsData": commonvars_data, + "commonvarsOther": commonvars_other, "refonly": refonly, "devonly": devonly } @@ -969,7 +1047,7 @@ def compare_stats(refdata, refstr, devdata, devstr, varname): def convert_bpch_names_to_netcdf_names( - ds, + dset, verbose=False ): """ @@ -1044,7 +1122,7 @@ def convert_bpch_names_to_netcdf_names( old_to_new = {} # Loop over all variable names in the data set - for variable_name in ds.data_vars.keys(): + for variable_name in dset.data_vars.keys(): # Save the original variable name, since this is the name # that we actually need to replace in the dataset. @@ -1151,7 +1229,7 @@ def convert_bpch_names_to_netcdf_names( # Overwrite certain variable names if newvar in special_vars: - newvar = special_vars[newvar] + newvar = special_vars.get(newvar) # Update the dictionary of names with this pair old_to_new.update({original_variable_name: newvar}) @@ -1166,10 +1244,10 @@ def convert_bpch_names_to_netcdf_names( if verbose: print("\nRenaming variables in the data...") with xr.set_options(keep_attrs=True): - ds = ds.rename(name_dict=old_to_new) + dset = dset.rename(name_dict=old_to_new) # Return the dataset - return ds + return dset def get_lumped_species_definitions(): @@ -1210,7 +1288,7 @@ def archive_lumped_species_definitions( def add_lumped_species_to_dataset( - ds, + dset, lspc_dict=None, lspc_yaml="", verbose=False, @@ -1226,7 +1304,7 @@ def add_lumped_species_to_dataset( collection output. Args: - ds: xarray Dataset + dset: xarray Dataset An xarray Dataset object prior to adding lumped species. Keyword Args (optional): @@ -1254,7 +1332,7 @@ def add_lumped_species_to_dataset( Default value: "SpeciesConcVV_" Returns: - ds: xarray Dataset + dset: xarray Dataset A new xarray Dataset object containing all of the original species plus new lumped species. """ @@ -1275,9 +1353,9 @@ def add_lumped_species_to_dataset( # Get a dummy DataArray to use for initialization dummy_darr = None - for var in ds.data_vars: + for var in dset.data_vars: if prefix in var or prefix.replace("VV", "") in var: - dummy_darr = ds[var] + dummy_darr = dset[var] dummy_type = dummy_darr.dtype dummy_shape = dummy_darr.shape break @@ -1288,23 +1366,23 @@ def add_lumped_species_to_dataset( # Create a list with a copy of the dummy DataArray object n_lumped_spc = len(lspc_dict) lumped_spc = [None] * n_lumped_spc - for v, spcname in enumerate(lspc_dict): - lumped_spc[v] = dummy_darr.copy(deep=False) - lumped_spc[v].name = prefix + spcname - lumped_spc[v].values = np.full(dummy_shape, 0.0, dtype=dummy_type) + for var, spcname in enumerate(lspc_dict): + lumped_spc[var] = dummy_darr.copy(deep=False) + lumped_spc[var].name = prefix + spcname + lumped_spc[var].values = np.full(dummy_shape, 0.0, dtype=dummy_type) # Loop over lumped species list - for v, lspc in enumerate(lumped_spc): + for var, lspc in enumerate(lumped_spc): # Search key for lspc_dict is lspc.name minus the prefix - c = lspc.name.find("_") - key = lspc.name[c+1:] + cidx = lspc.name.find("_") + key = lspc.name[cidx+1:] # Check if overlap with existing species - if lspc.name in ds.data_vars and overwrite: - ds.drop(lspc.name) + if lspc.name in dset.data_vars and overwrite: + dset.drop(lspc.name) else: - assert(lspc.name not in ds.data_vars), \ + assert(lspc.name not in dset.data_vars), \ f"{lspc.name} already in dataset. To overwrite pass overwrite=True." # Verbose prints @@ -1315,13 +1393,13 @@ def add_lumped_species_to_dataset( num_spc = 0 for _, spcname in enumerate(lspc_dict[key]): varname = prefix + spcname - if varname not in ds.data_vars: + if varname not in dset.data_vars: if verbose: print(f"Warning: {varname} needed for {lspc_dict[key][spcname]} not in dataset") continue if verbose: print(f" -> adding {varname} with scale {lspc_dict[key][spcname]}") - lspc.values += ds[varname].values * lspc_dict[key][spcname] + lspc.values += dset[varname].values * lspc_dict[key][spcname] num_spc += 1 # Replace values with NaN if no species found in dataset @@ -1332,10 +1410,10 @@ def add_lumped_species_to_dataset( # Insert the DataSet into the list of DataArrays # so that we can only do the merge operation once - lumped_spc.insert(0, ds) - ds = xr.merge(lumped_spc) + lumped_spc.insert(0, dset) + dset = xr.merge(lumped_spc) - return ds + return dset def filter_names( @@ -1360,16 +1438,13 @@ def filter_names( """ if text != "": - filtered_names = [k for k in names if text in k] - else: - filtered_names = [k for k in names if k] - - return filtered_names + return [var for var in names if text in var] + return [var for var in names if var] def divide_dataset_by_dataarray( - ds, - dr, + dset, + darr, varlist=None ): """ @@ -1382,9 +1457,9 @@ def divide_dataset_by_dataarray( fraction of time it was local noon in each grid box, etc. Args: - ds: xarray Dataset + dset: xarray Dataset The Dataset object containing variables to be divided. - dr: xarray DataArray + darr: xarray DataArray The DataArray object that will be used to divide the variables of ds. @@ -1395,21 +1470,18 @@ def divide_dataset_by_dataarray( of ds will be divided by dr. Default value: None Returns: - ds_new: xarray Dataset - A new xarray Dataset object with its variables divided by dr. + dset_new: xarray Dataset + A new xarray Dataset object with its variables divided + by darr. """ # ----------------------------- # Check arguments # ----------------------------- - if not isinstance(ds, xr.Dataset): - raise TypeError("The ds argument must be of type xarray.Dataset!") - - if not isinstance(dr, xr.DataArray): - raise TypeError("The dr argument must be of type xarray.DataArray!") - + verify_variable_type(dset, xr.Dataset) + verify_variable_type(darr, xr.DataArray) if varlist is None: - varlist = ds.data_vars.keys() + varlist = dset.data_vars.keys() # ----------------------------- # Do the division @@ -1419,12 +1491,12 @@ def divide_dataset_by_dataarray( with xr.set_options(keep_attrs=True): # Loop over variables - for v in varlist: + for var in varlist: # Divide each variable of ds by dr - ds[v] = ds[v] / dr + dset[var] = dset[var] / darr - return ds + return dset def get_shape_of_data( @@ -1462,7 +1534,6 @@ def get_shape_of_data( (['time', 'lev', 'lat', 'lon'] for GEOS-Chem "Classic", or ['time', 'lev', 'nf', 'Ydim', 'Xdim'] for GCHP. """ - # Validate the data argument if isinstance(data, (xr.Dataset, xr.DataArray)): sizelist = data.sizes @@ -1482,10 +1553,10 @@ def get_shape_of_data( # Return a tuple with the shape of each dimension (and also a # list of each dimension if return_dims is True). - for d in dimlist: - if d in sizelist: - shape += (sizelist[d],) - dims.append(d) + for dim in dimlist: + if dim in sizelist: + shape += (sizelist[dim],) + dims.append(dim) if return_dims: return shape, dims @@ -1493,7 +1564,7 @@ def get_shape_of_data( def get_area_from_dataset( - ds + dset ): """ Convenience routine to return the area variable (which is @@ -1501,17 +1572,18 @@ def get_area_from_dataset( for GCHP) from an xarray Dataset object. Args: - ds: xarray Dataset + dset: xarray Dataset The input dataset. Returns: area_m2: xarray DataArray The surface area in m2, as found in ds. """ + verify_variable_type(dset, xr.Dataset) - if "Met_AREAM2" in ds.data_vars.keys(): - return ds["Met_AREAM2"] - if "AREA" in ds.data_vars.keys(): - return ds["AREA"] + if "Met_AREAM2" in dset.data_vars.keys(): + return dset["Met_AREAM2"] + if "AREA" in dset.data_vars.keys(): + return dset["AREA"] msg = ( 'An area variable ("AREA" or "Met_AREAM2" is missing' + " from this dataset!" @@ -1520,7 +1592,7 @@ def get_area_from_dataset( def get_variables_from_dataset( - ds, + dset, varlist ): """ @@ -1529,13 +1601,13 @@ def get_variables_from_dataset( found in the Dataset, or else an error will be raised. Args: - ds: xarray Dataset + dset: xarray Dataset The input dataset. varlist: list of str List of DataArray variables to extract from ds. Returns: - ds_subset: xarray Dataset + dset_subset: xarray Dataset A new data set containing only the variables that were requested. @@ -1543,16 +1615,17 @@ def get_variables_from_dataset( Use this routine if you absolutely need all of the requested variables to be returned. Otherwise """ + verify_variable_type(dset, xr.Dataset) - ds_subset = xr.Dataset() - for v in varlist: - if v in ds.data_vars.keys(): - ds_subset = xr.merge([ds_subset, ds[v]]) + dset_subset = xr.Dataset() + for var in varlist: + if var in dset.data_vars.keys(): + dset_subset = xr.merge([dset_subset, dset[var]]) else: - msg = f"{v} was not found in this dataset!" + msg = f"{var} was not found in this dataset!" raise ValueError(msg) - return ds_subset + return dset_subset def create_blank_dataarray( @@ -1640,7 +1713,7 @@ def create_blank_dataarray( def check_for_area( - ds, + dset, gcc_area_name="AREA", gchp_area_name="Met_AREAM2" ): @@ -1654,7 +1727,7 @@ def check_for_area( GEOS-Chem "Classic" area name if it is present. Args: - ds: xarray Dataset + dset: xarray Dataset The Dataset object that will be checked. Keyword Args (optional): @@ -1670,18 +1743,20 @@ def check_for_area( ds: xarray Dataset The modified Dataset object """ + verify_variable_type(dset, xr.Dataset) - found_gcc = gcc_area_name in ds.data_vars.keys() - found_gchp = gchp_area_name in ds.data_vars.keys() + found_gcc = gcc_area_name in dset.data_vars.keys() + found_gchp = gchp_area_name in dset.data_vars.keys() - if (not found_gcc) and (not found_gchp): - msg = f"Could not find {gcc_area_name} or {gchp_area_name} in the dataset!" + if not found_gcc and not found_gchp: + msg = f"Could not find {gcc_area_name} or {gchp_area_name} " + msg += "in the dataset!" raise ValueError(msg) if found_gchp: - ds[gcc_area_name] = ds[gchp_area_name] + dset[gcc_area_name] = dset[gchp_area_name] - return ds + return dset def get_filepath( @@ -1690,7 +1765,6 @@ def get_filepath( date, is_gchp=False, gchp_res="c00", - gchp_is_pre_13_1=False, gchp_is_pre_14_0=False ): """ @@ -1716,10 +1790,6 @@ def get_filepath( Only needed for restart files. Default value: "c00". - gchp_is_pre_13_1: bool - Set this switch to True to obtain GCHP file pathnames used in - versions before 13.1. Only needed for diagnostic files. - gchp_is_pre_14_0: bool Set this switch to True to obtain GCHP file pathnames used in versions before 14.0. Only needed for restart files. @@ -1749,10 +1819,7 @@ def get_filepath( "GEOSChem.Restart." ) else: - if gchp_is_pre_13_1: - file_tmpl = os.path.join(datadir, f"GCHP.{col}.") - else: - file_tmpl = os.path.join(datadir, f"GEOSChem.{col}.") + file_tmpl = os.path.join(datadir, f"GEOSChem.{col}.") else: if "Emissions" in col: file_tmpl = os.path.join(datadir, "HEMCO_diagnostics.") @@ -1771,7 +1838,8 @@ def get_filepath( # Set file path. Include grid resolution if GCHP restart file. path = file_tmpl + date_str + extension if is_gchp and "Restart" in col and not gchp_is_pre_14_0: - path = file_tmpl + date_str[:len(date_str)-2] + "z." + gchp_res + extension + path = file_tmpl + date_str[:len(date_str)-2] + \ + "z." + gchp_res + extension return path @@ -1782,7 +1850,6 @@ def get_filepaths( dates, is_gchp=False, gchp_res="c00", - gchp_is_pre_13_1=False, gchp_is_pre_14_0=False ): """ @@ -1808,10 +1875,6 @@ def get_filepaths( Only needed for restart files. Default value: "c00". - gchp_is_pre_13_1: bool - Set this switch to True to obtain GCHP file pathnames used in - versions before 13.1. Only needed for diagnostic files. - gchp_is_pre_14_0: bool Set this switch to True to obtain GCHP file pathnames used in versions before 14.0. Only needed for diagnostic files. @@ -1838,7 +1901,7 @@ def get_filepaths( # ================================================================== # Create the file list # ================================================================== - for c, collection in enumerate(collections): + for c_idx, collection in enumerate(collections): separator = "_" extension = "z.nc4" @@ -1859,16 +1922,10 @@ def get_filepaths( "GEOSChem.Restart." ) else: - if gchp_is_pre_13_1: - file_tmpl = os.path.join( - datadir, - f"GCHP.{collection}." - ) - else: - file_tmpl = os.path.join( - datadir, - f"GEOSChem.{collection}." - ) + file_tmpl = os.path.join( + datadir, + f"GEOSChem.{collection}." + ) else: # --------------------------------------- # Get the file path template for GCC @@ -1895,7 +1952,7 @@ def get_filepaths( # -------------------------------------------- # Create a list of files for each date/time # -------------------------------------------- - for d, date in enumerate(dates): + for d_idx, date in enumerate(dates): if is_gchp and "Restart" in collection: date_time = str(np.datetime_as_string(date, unit="s")) else: @@ -1905,9 +1962,11 @@ def get_filepaths( date_time = date_time.replace(":", "") # Set file path. Include grid resolution if GCHP restart file. - paths[c][d] = file_tmpl + date_time + extension + paths[c_idx][d_idx] = file_tmpl + date_time + extension if is_gchp and "Restart" in collection and not gchp_is_pre_14_0: - paths[c][d] = file_tmpl + date_time[:len(date_time)-2] + "z." + gchp_res + extension + paths[c_idx][d_idx] = file_tmpl + \ + date_time[:len(date_time)-2] + \ + "z." + gchp_res + extension return paths @@ -1941,11 +2000,11 @@ def extract_pathnames_from_log( data_list = set() # only keep unique files # Open file - with open(filename, "r") as f: + with open(filename, "r", encoding=ENCODING) as ifile: # Read data from the file line by line. # Add file paths to the data_list set. - line = f.readline() + line = ifile.readline() while line: upcaseline = line.upper() if (": OPENING" in upcaseline) or (": READING" in upcaseline): @@ -1956,10 +2015,10 @@ def extract_pathnames_from_log( data_list.add(trimmed_path) # Read next line - line = f.readline() + line = ifile.readline() # Close file and return - f.close() + ifile.close() data_list = sorted(list(data_list)) return data_list @@ -2055,13 +2114,13 @@ def get_nan_mask( def all_zero_or_nan( - ds + dset ): """ Return whether ds is all zeros, or all nans Args: - ds: numpy array + dset: numpy array Input GEOS-Chem data Returns: all_zero, all_nan: bool, bool @@ -2069,11 +2128,11 @@ def all_zero_or_nan( all_nan is whether ds is all NaNs """ - return not np.any(ds), np.isnan(ds).all() + return not np.any(dset), np.isnan(dset).all() def dataset_mean( - ds, + dset, dim="time", skipna=True ): @@ -2081,7 +2140,7 @@ def dataset_mean( Convenience wrapper for taking the mean of an xarray Dataset. Args: - ds : xarray Dataset + dset : xarray Dataset Input data Keyword Args: @@ -2097,14 +2156,13 @@ def dataset_mean( Dataset containing mean values Will return None if ds is not defined """ - if ds is None: - return ds + verify_variable_type(dset, (xr.Dataset, type(None))) - if not isinstance(ds, xr.Dataset): - raise ValueError("Argument ds must be None or xarray.Dataset!") + if dset is None: + return dset with xr.set_options(keep_attrs=True): - return ds.mean(dim=dim, skipna=skipna) + return dset.mean(dim=dim, skipna=skipna) def dataset_reader( @@ -2143,14 +2201,12 @@ def read_config_file(config_file, quiet=False): try: if not quiet: print(f"Using configuration file {config_file}") - config = yaml_safe_load(open(config_file)) + with open(config_file, encoding=ENCODING) as stream: + return safe_load(stream) except Exception as err: msg = f"Error reading configuration in {config_file}: {err}" raise Exception(msg) from err - return config - - def unique_values( this_list, drop=None, @@ -2171,17 +2227,15 @@ def unique_values( unique: list List of unique values from this_list """ - if not isinstance(this_list, list): - raise ValueError("Argument 'this_list' must be a list object!") - if not isinstance(drop, list): - raise ValueError("Argument 'drop' must be a list object!") + verify_variable_type(this_list, list) + verify_variable_type(drop, list) unique = list(set(this_list)) if drop is not None: - for d in drop: - if d in unique: - unique.remove(d) + for var in drop: + if var in unique: + unique.remove(var) unique.sort() @@ -2242,13 +2296,9 @@ def insert_text_into_file( width: int Will "word-wrap" the text in 'replace_text' to this width """ - if not isinstance(search_text, str): - raise ValueError("Argument 'search_text' needs to be a string!") - if not isinstance(replace_text, str) and \ - not isinstance(replace_text, list): - raise ValueError( - "Argument 'replace_text' needs to be a list or a string" - ) + verify_variable_type(filename, str) + verify_variable_type(search_text, str) + verify_variable_type(replace_text, (str, list)) # Word-wrap the replacement text # (does list -> str conversion if necessary) @@ -2257,18 +2307,18 @@ def insert_text_into_file( width=width ) - with open(filename, "r") as f: - filedata = f.read() - f.close() + with open(filename, "r", encoding=ENCODING) as ifile: + filedata = ifile.read() + ifile.close() filedata = filedata.replace( search_text, replace_text ) - with open(filename, "w") as f: - f.write(filedata) - f.close() + with open(filename, "w", encoding=ENCODING) as ofile: + ofile.write(filedata) + ofile.close() def array_equals( @@ -2331,6 +2381,8 @@ def make_directory( Set to True if you wish to overwrite prior contents in the directory 'dir_name' """ + verify_variable_type(dir_name, str) + verify_variable_type(overwrite, bool) if os.path.isdir(dir_name) and not overwrite: msg = f"Directory {dir_name} exists!\n" @@ -2348,16 +2400,37 @@ def trim_cloud_benchmark_label( Removes the first part of the cloud benchmark label string (e.g. "gchp-c24-1Hr", "gcc-4x5-1Mon", etc) to avoid clutter. """ - if not isinstance(label, str): - raise ValueError("Argument 'label' must be a string!") + verify_variable_type(label, str) - for v in [ + for var in [ "gcc-4x5-1Hr", "gchp-c24-1Hr", "gcc-4x5-1Mon", "gchp-c24-1Mon", ]: - if v in label: - label.replace(v, "") - + if var in label: + label.replace(var, "") + return label + + +def verify_variable_type( + var, + var_type +): + """ + Convenience routine that will raise a TypeError if a variable's + type does not match a list of expected types. + + Args: + ----- + var : variable of any type + The variable to check. + + var_type : type or tuple of types + A single type definition (list, str, pandas.Series, etc.) + or a tuple of type definitions. + """ + if isinstance(var, var_type): + return + raise TypeError( f"{var} is not of type: {var_type}!") diff --git a/setup.py b/setup.py index 3c448591..d21be7e2 100644 --- a/setup.py +++ b/setup.py @@ -29,8 +29,8 @@ ] MAJOR = 1 -MINOR = 3 -MICRO = 2 +MINOR = 4 +MICRO = 0 EXTRA = '' # for alpha (aN), beta (bN), rc (rcN) versions VERSION = "{}.{}.{}{}".format(MAJOR, MINOR, MICRO, EXTRA) @@ -105,8 +105,9 @@ def _write_version_file(): "netcdf-fortran==4.5.4", "numpy==1.21.1", "pypdf2==1.26.0", + "pyproj==3.6.0", "recommonmark==0.7.1", - "requests==2.26.0", + "requests==2.31.0", "scipy==1.7.0", "sparselt>=0.1.3", "sphinx==3.5.4",